diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-17 00:42:26 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-17 00:42:26 +0300 |
commit | 009bd55dfcc857d8b00a5bbb17a8db060317af6f (patch) | |
tree | 3a623fc690ea03bd76630c5bcc003324136ae0f6 | |
parent | 60f7c503d971a731ee3c4f884a9f2e80d476730d (diff) | |
parent | e246b7c035d74abfb3507fa10082d0c42cc016c3 (diff) | |
download | linux-009bd55dfcc857d8b00a5bbb17a8db060317af6f.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"A smaller set of patches, nothing stands out as being particularly
major this cycle. The biggest item would be the new HIP09 HW support
from HNS, otherwise it was pretty quiet for new work here:
- Driver bug fixes and updates: bnxt_re, cxgb4, rxe, hns, i40iw,
cxgb4, mlx4 and mlx5
- Bug fixes and polishing for the new rts ULP
- Cleanup of uverbs checking for allowed driver operations
- Use sysfs_emit all over the place
- Lots of bug fixes and clarity improvements for hns
- hip09 support for hns
- NDR and 50/100Gb signaling rates
- Remove dma_virt_ops and go back to using the IB DMA wrappers
- mlx5 optimizations for contiguous DMA regions"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (147 commits)
RDMA/cma: Don't overwrite sgid_attr after device is released
RDMA/mlx5: Fix MR cache memory leak
RDMA/rxe: Use acquire/release for memory ordering
RDMA/hns: Simplify AEQE process for different types of queue
RDMA/hns: Fix inaccurate prints
RDMA/hns: Fix incorrect symbol types
RDMA/hns: Clear redundant variable initialization
RDMA/hns: Fix coding style issues
RDMA/hns: Remove unnecessary access right set during INIT2INIT
RDMA/hns: WARN_ON if get a reserved sl from users
RDMA/hns: Avoid filling sl in high 3 bits of vlan_id
RDMA/hns: Do shift on traffic class when using RoCEv2
RDMA/hns: Normalization the judgment of some features
RDMA/hns: Limit the length of data copied between kernel and userspace
RDMA/mlx4: Remove bogus dev_base_lock usage
RDMA/uverbs: Fix incorrect variable type
RDMA/core: Do not indicate device ready when device enablement fails
RDMA/core: Clean up cq pool mechanism
RDMA/core: Update kernel documentation for ib_create_named_qp()
MAINTAINERS: SOFT-ROCE: Change Zhu Yanjun's email address
...
175 files changed, 3845 insertions, 3915 deletions
@@ -345,3 +345,4 @@ Wolfram Sang <wsa@kernel.org> <w.sang@pengutronix.de> Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de> Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com> Yusuke Goda <goda.yusuke@renesas.com> +Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com> diff --git a/MAINTAINERS b/MAINTAINERS index a14cad29c031..8c1c5f9830c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16399,7 +16399,7 @@ F: drivers/infiniband/sw/siw/ F: include/uapi/rdma/siw-abi.h SOFT-ROCE DRIVER (rxe) -M: Zhu Yanjun <yanjunz@nvidia.com> +M: Zhu Yanjun <zyjzyj2000@gmail.com> L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/sw/rxe/ diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 5afd142fe8c7..98165589c8ab 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1251,7 +1251,8 @@ out: EXPORT_SYMBOL(ib_cm_listen); /** - * Create a new listening ib_cm_id and listen on the given service ID. + * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on + * the given service ID. * * If there's an existing ID listening on that same device and service ID, * return it. @@ -1765,7 +1766,7 @@ static u16 cm_get_bth_pkey(struct cm_work *work) } /** - * Convert OPA SGID to IB SGID + * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID * ULPs (such as IPoIB) do not understand OPA GIDs and will * reject them as the local_gid will not match the sgid. Therefore, * change the pathrec's SGID to an IB SGID. @@ -4273,8 +4274,8 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, group = container_of(obj, struct cm_counter_group, obj); cm_attr = container_of(attr, struct cm_counter_attribute, attr); - return sprintf(buf, "%ld\n", - atomic_long_read(&group->counter[cm_attr->index])); + return sysfs_emit(buf, "%ld\n", + atomic_long_read(&group->counter[cm_attr->index])); } static const struct sysfs_ops cm_counter_ops = { diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a77750b8954d..c51b84b2d2f3 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -477,6 +477,10 @@ static void cma_release_dev(struct rdma_id_private *id_priv) list_del(&id_priv->list); cma_dev_put(id_priv->cma_dev); id_priv->cma_dev = NULL; + if (id_priv->id.route.addr.dev_addr.sgid_attr) { + rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); + id_priv->id.route.addr.dev_addr.sgid_attr = NULL; + } mutex_unlock(&lock); } @@ -1861,9 +1865,6 @@ static void _destroy_id(struct rdma_id_private *id_priv, kfree(id_priv->id.route.path_rec); - if (id_priv->id.route.addr.dev_addr.sgid_attr) - rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); - put_net(id_priv->id.route.addr.dev_addr.net); rdma_restrack_del(&id_priv->res); kfree(id_priv); @@ -2495,8 +2496,9 @@ static int cma_listen_handler(struct rdma_cm_id *id, return id_priv->id.event_handler(id, event); } -static void cma_listen_on_dev(struct rdma_id_private *id_priv, - struct cma_device *cma_dev) +static int cma_listen_on_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev, + struct rdma_id_private **to_destroy) { struct rdma_id_private *dev_id_priv; struct net *net = id_priv->id.route.addr.dev_addr.net; @@ -2504,21 +2506,21 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, lockdep_assert_held(&lock); + *to_destroy = NULL; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) - return; + return 0; dev_id_priv = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type, id_priv); if (IS_ERR(dev_id_priv)) - return; + return PTR_ERR(dev_id_priv); dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); _cma_attach_to_dev(dev_id_priv, cma_dev); - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); cma_id_get(id_priv); dev_id_priv->internal_id = 1; dev_id_priv->afonly = id_priv->afonly; @@ -2527,19 +2529,42 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) - dev_warn(&cma_dev->device->dev, - "RDMA CMA: cma_listen_on_dev, error %d\n", ret); + goto err_listen; + list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); + return 0; +err_listen: + /* Caller must destroy this after releasing lock */ + *to_destroy = dev_id_priv; + dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret); + return ret; } -static void cma_listen_on_all(struct rdma_id_private *id_priv) +static int cma_listen_on_all(struct rdma_id_private *id_priv) { + struct rdma_id_private *to_destroy; struct cma_device *cma_dev; + int ret; mutex_lock(&lock); list_add_tail(&id_priv->list, &listen_any_list); - list_for_each_entry(cma_dev, &dev_list, list) - cma_listen_on_dev(id_priv, cma_dev); + list_for_each_entry(cma_dev, &dev_list, list) { + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); + if (ret) { + /* Prevent racing with cma_process_remove() */ + if (to_destroy) + list_del_init(&to_destroy->list); + goto err_listen; + } + } mutex_unlock(&lock); + return 0; + +err_listen: + list_del(&id_priv->list); + mutex_unlock(&lock); + if (to_destroy) + rdma_destroy_id(&to_destroy->id); + return ret; } void rdma_set_service_type(struct rdma_cm_id *id, int tos) @@ -3692,8 +3717,11 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) ret = -ENOSYS; goto err; } - } else - cma_listen_on_all(id_priv); + } else { + ret = cma_listen_on_all(id_priv); + if (ret) + goto err; + } return 0; err: @@ -4773,69 +4801,6 @@ static struct notifier_block cma_nb = { .notifier_call = cma_netdev_callback }; -static int cma_add_one(struct ib_device *device) -{ - struct cma_device *cma_dev; - struct rdma_id_private *id_priv; - unsigned int i; - unsigned long supported_gids = 0; - int ret; - - cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); - if (!cma_dev) - return -ENOMEM; - - cma_dev->device = device; - cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, - sizeof(*cma_dev->default_gid_type), - GFP_KERNEL); - if (!cma_dev->default_gid_type) { - ret = -ENOMEM; - goto free_cma_dev; - } - - cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, - sizeof(*cma_dev->default_roce_tos), - GFP_KERNEL); - if (!cma_dev->default_roce_tos) { - ret = -ENOMEM; - goto free_gid_type; - } - - rdma_for_each_port (device, i) { - supported_gids = roce_gid_type_mask_support(device, i); - WARN_ON(!supported_gids); - if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) - cma_dev->default_gid_type[i - rdma_start_port(device)] = - CMA_PREFERRED_ROCE_GID_TYPE; - else - cma_dev->default_gid_type[i - rdma_start_port(device)] = - find_first_bit(&supported_gids, BITS_PER_LONG); - cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; - } - - init_completion(&cma_dev->comp); - refcount_set(&cma_dev->refcount, 1); - INIT_LIST_HEAD(&cma_dev->id_list); - ib_set_client_data(device, &cma_client, cma_dev); - - mutex_lock(&lock); - list_add_tail(&cma_dev->list, &dev_list); - list_for_each_entry(id_priv, &listen_any_list, list) - cma_listen_on_dev(id_priv, cma_dev); - mutex_unlock(&lock); - - trace_cm_add_one(device); - return 0; - -free_gid_type: - kfree(cma_dev->default_gid_type); - -free_cma_dev: - kfree(cma_dev); - return ret; -} - static void cma_send_device_removal_put(struct rdma_id_private *id_priv) { struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; @@ -4898,6 +4863,80 @@ static void cma_process_remove(struct cma_device *cma_dev) wait_for_completion(&cma_dev->comp); } +static int cma_add_one(struct ib_device *device) +{ + struct rdma_id_private *to_destroy; + struct cma_device *cma_dev; + struct rdma_id_private *id_priv; + unsigned int i; + unsigned long supported_gids = 0; + int ret; + + cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL); + if (!cma_dev) + return -ENOMEM; + + cma_dev->device = device; + cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_gid_type), + GFP_KERNEL); + if (!cma_dev->default_gid_type) { + ret = -ENOMEM; + goto free_cma_dev; + } + + cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_roce_tos), + GFP_KERNEL); + if (!cma_dev->default_roce_tos) { + ret = -ENOMEM; + goto free_gid_type; + } + + rdma_for_each_port (device, i) { + supported_gids = roce_gid_type_mask_support(device, i); + WARN_ON(!supported_gids); + if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) + cma_dev->default_gid_type[i - rdma_start_port(device)] = + CMA_PREFERRED_ROCE_GID_TYPE; + else + cma_dev->default_gid_type[i - rdma_start_port(device)] = + find_first_bit(&supported_gids, BITS_PER_LONG); + cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; + } + + init_completion(&cma_dev->comp); + refcount_set(&cma_dev->refcount, 1); + INIT_LIST_HEAD(&cma_dev->id_list); + ib_set_client_data(device, &cma_client, cma_dev); + + mutex_lock(&lock); + list_add_tail(&cma_dev->list, &dev_list); + list_for_each_entry(id_priv, &listen_any_list, list) { + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); + if (ret) + goto free_listen; + } + mutex_unlock(&lock); + + trace_cm_add_one(device); + return 0; + +free_listen: + list_del(&cma_dev->list); + mutex_unlock(&lock); + + /* cma_process_remove() will delete to_destroy */ + cma_process_remove(cma_dev); + kfree(cma_dev->default_roce_tos); +free_gid_type: + kfree(cma_dev->default_gid_type); + +free_cma_dev: + kfree(cma_dev); + return ret; +} + static void cma_remove_one(struct ib_device *device, void *client_data) { struct cma_device *cma_dev = client_data; diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7ec4af2ed87a..7f70e5a7de10 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -115,7 +115,7 @@ static ssize_t default_roce_mode_show(struct config_item *item, if (gid_type < 0) return gid_type; - return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type)); + return sysfs_emit(buf, "%s\n", ib_cache_gid_type_str(gid_type)); } static ssize_t default_roce_mode_store(struct config_item *item, @@ -157,7 +157,7 @@ static ssize_t default_roce_tos_show(struct config_item *item, char *buf) tos = cma_get_default_roce_tos(cma_dev, group->port_num); cma_configfs_params_put(cma_dev); - return sprintf(buf, "%u\n", tos); + return sysfs_emit(buf, "%u\n", tos); } static ssize_t default_roce_tos_store(struct config_item *item, diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index e84b0fedaacb..315f7a297eee 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -318,15 +318,12 @@ struct ib_device *ib_device_get_by_index(const struct net *net, u32 index); void nldev_init(void); void nldev_exit(void); -static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, - struct ib_pd *pd, - struct ib_qp_init_attr *attr, - struct ib_udata *udata, - struct ib_uqp_object *uobj) +static inline struct ib_qp * +_ib_create_qp(struct ib_device *dev, struct ib_pd *pd, + struct ib_qp_init_attr *attr, struct ib_udata *udata, + struct ib_uqp_object *uobj, const char *caller) { - enum ib_qp_type qp_type = attr->qp_type; struct ib_qp *qp; - bool is_xrc; if (!dev->ops.create_qp) return ERR_PTR(-EOPNOTSUPP); @@ -347,6 +344,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->srq = attr->srq; qp->rwq_ind_tbl = attr->rwq_ind_tbl; qp->event_handler = attr->event_handler; + qp->port = attr->port_num; atomic_set(&qp->usecnt, 0); spin_lock_init(&qp->mr_lock); @@ -354,16 +352,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, INIT_LIST_HEAD(&qp->sig_mrs); rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP); - /* - * We don't track XRC QPs for now, because they don't have PD - * and more importantly they are created internaly by driver, - * see mlx5 create_dev_resources() as an example. - */ - is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; - if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { - rdma_restrack_parent_name(&qp->res, &pd->res); - rdma_restrack_add(&qp->res); - } + WARN_ONCE(!udata && !caller, "Missing kernel QP owner"); + rdma_restrack_set_name(&qp->res, udata ? NULL : caller); + rdma_restrack_add(&qp->res); return qp; } @@ -411,7 +402,6 @@ void rdma_umap_priv_init(struct rdma_umap_priv *priv, struct vm_area_struct *vma, struct rdma_user_mmap_entry *entry); -void ib_cq_pool_init(struct ib_device *dev); -void ib_cq_pool_destroy(struct ib_device *dev); +void ib_cq_pool_cleanup(struct ib_device *dev); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index e4ff0d3328b6..92745522250e 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -64,8 +64,40 @@ out: return ret; } -static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, - enum rdma_nl_counter_mode mode) +static void auto_mode_init_counter(struct rdma_counter *counter, + const struct ib_qp *qp, + enum rdma_nl_counter_mask new_mask) +{ + struct auto_mode_param *param = &counter->mode.param; + + counter->mode.mode = RDMA_COUNTER_MODE_AUTO; + counter->mode.mask = new_mask; + + if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) + param->qp_type = qp->qp_type; +} + +static int __rdma_counter_bind_qp(struct rdma_counter *counter, + struct ib_qp *qp) +{ + int ret; + + if (qp->counter) + return -EINVAL; + + if (!qp->device->ops.counter_bind_qp) + return -EOPNOTSUPP; + + mutex_lock(&counter->lock); + ret = qp->device->ops.counter_bind_qp(counter, qp); + mutex_unlock(&counter->lock); + + return ret; +} + +static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port, + struct ib_qp *qp, + enum rdma_nl_counter_mode mode) { struct rdma_port_counter *port_counter; struct rdma_counter *counter; @@ -88,11 +120,22 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, port_counter = &dev->port_data[port].port_counter; mutex_lock(&port_counter->lock); - if (mode == RDMA_COUNTER_MODE_MANUAL) { + switch (mode) { + case RDMA_COUNTER_MODE_MANUAL: ret = __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_MANUAL, 0); - if (ret) + if (ret) { + mutex_unlock(&port_counter->lock); goto err_mode; + } + break; + case RDMA_COUNTER_MODE_AUTO: + auto_mode_init_counter(counter, qp, port_counter->mode.mask); + break; + default: + ret = -EOPNOTSUPP; + mutex_unlock(&port_counter->lock); + goto err_mode; } port_counter->num_counters++; @@ -102,10 +145,15 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, kref_init(&counter->kref); mutex_init(&counter->lock); + ret = __rdma_counter_bind_qp(counter, qp); + if (ret) + goto err_mode; + + rdma_restrack_parent_name(&counter->res, &qp->res); + rdma_restrack_add(&counter->res); return counter; err_mode: - mutex_unlock(&port_counter->lock); kfree(counter->stats); err_stats: rdma_restrack_put(&counter->res); @@ -132,19 +180,6 @@ static void rdma_counter_free(struct rdma_counter *counter) kfree(counter); } -static void auto_mode_init_counter(struct rdma_counter *counter, - const struct ib_qp *qp, - enum rdma_nl_counter_mask new_mask) -{ - struct auto_mode_param *param = &counter->mode.param; - - counter->mode.mode = RDMA_COUNTER_MODE_AUTO; - counter->mode.mask = new_mask; - - if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) - param->qp_type = qp->qp_type; -} - static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, enum rdma_nl_counter_mask auto_mask) { @@ -161,24 +196,6 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, return match; } -static int __rdma_counter_bind_qp(struct rdma_counter *counter, - struct ib_qp *qp) -{ - int ret; - - if (qp->counter) - return -EINVAL; - - if (!qp->device->ops.counter_bind_qp) - return -EOPNOTSUPP; - - mutex_lock(&counter->lock); - ret = qp->device->ops.counter_bind_qp(counter, qp); - mutex_unlock(&counter->lock); - - return ret; -} - static int __rdma_counter_unbind_qp(struct ib_qp *qp) { struct rdma_counter *counter = qp->counter; @@ -247,13 +264,6 @@ next: return counter; } -static void rdma_counter_res_add(struct rdma_counter *counter, - struct ib_qp *qp) -{ - rdma_restrack_parent_name(&counter->res, &qp->res); - rdma_restrack_add(&counter->res); -} - static void counter_release(struct kref *kref) { struct rdma_counter *counter; @@ -275,7 +285,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; - if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) + if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res)) return 0; if (!rdma_is_port_valid(dev, port)) @@ -293,19 +303,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) return ret; } } else { - counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); + counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO); if (!counter) return -ENOMEM; - - auto_mode_init_counter(counter, qp, port_counter->mode.mask); - - ret = __rdma_counter_bind_qp(counter, qp); - if (ret) { - rdma_counter_free(counter); - return ret; - } - - rdma_counter_res_add(counter, qp); } return 0; @@ -419,15 +419,6 @@ err: return NULL; } -static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, - struct ib_qp *qp) -{ - if ((counter->device != qp->device) || (counter->port != qp->port)) - return -EINVAL; - - return __rdma_counter_bind_qp(counter, qp); -} - static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, u32 counter_id) { @@ -475,7 +466,12 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, goto err_task; } - ret = rdma_counter_bind_qp_manual(counter, qp); + if ((counter->device != qp->device) || (counter->port != qp->port)) { + ret = -EINVAL; + goto err_task; + } + + ret = __rdma_counter_bind_qp(counter, qp); if (ret) goto err_task; @@ -520,26 +516,18 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, goto err; } - counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); + counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL); if (!counter) { ret = -ENOMEM; goto err; } - ret = rdma_counter_bind_qp_manual(counter, qp); - if (ret) - goto err_bind; - if (counter_id) *counter_id = counter->id; - rdma_counter_res_add(counter, qp); - rdma_restrack_put(&qp->res); - return ret; + return 0; -err_bind: - rdma_counter_free(counter); err: rdma_restrack_put(&qp->res); return ret; diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 12ebacf52958..433b426729d4 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -123,7 +123,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs, } /** - * ib_process_direct_cq - process a CQ in caller context + * ib_process_cq_direct - process a CQ in caller context * @cq: CQ to process * @budget: number of CQEs to poll for * @@ -197,7 +197,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) } /** - * __ib_alloc_cq allocate a completion queue + * __ib_alloc_cq - allocate a completion queue * @dev: device to allocate the CQ for * @private: driver private data, accessible from cq->cq_context * @nr_cqe: number of CQEs to allocate @@ -349,16 +349,7 @@ void ib_free_cq(struct ib_cq *cq) } EXPORT_SYMBOL(ib_free_cq); -void ib_cq_pool_init(struct ib_device *dev) -{ - unsigned int i; - - spin_lock_init(&dev->cq_pools_lock); - for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) - INIT_LIST_HEAD(&dev->cq_pools[i]); -} - -void ib_cq_pool_destroy(struct ib_device *dev) +void ib_cq_pool_cleanup(struct ib_device *dev) { struct ib_cq *cq, *n; unsigned int i; @@ -367,6 +358,7 @@ void ib_cq_pool_destroy(struct ib_device *dev) list_for_each_entry_safe(cq, n, &dev->cq_pools[i], pool_entry) { WARN_ON(cq->cqe_used); + list_del(&cq->pool_entry); cq->shared = false; ib_free_cq(cq); } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a3b1fc84cdca..e96f979e6d52 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -284,6 +284,7 @@ static void ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(poll_cq), IB_MANDATORY_FUNC(req_notify_cq), IB_MANDATORY_FUNC(get_dma_mr), + IB_MANDATORY_FUNC(reg_user_mr), IB_MANDATORY_FUNC(dereg_mr), IB_MANDATORY_FUNC(get_port_immutable) }; @@ -569,6 +570,7 @@ static void rdma_init_coredev(struct ib_core_device *coredev, struct ib_device *_ib_alloc_device(size_t size) { struct ib_device *device; + unsigned int i; if (WARN_ON(size < sizeof(struct ib_device))) return NULL; @@ -600,6 +602,41 @@ struct ib_device *_ib_alloc_device(size_t size) init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); + spin_lock_init(&device->cq_pools_lock); + for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++) + INIT_LIST_HEAD(&device->cq_pools[i]); + + device->uverbs_cmd_mask = + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) | + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | + BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | + BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) | + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | + BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) | + BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) | + BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); return device; } EXPORT_SYMBOL(_ib_alloc_device); @@ -1177,25 +1214,6 @@ out: return ret; } -static void setup_dma_device(struct ib_device *device, - struct device *dma_device) -{ - /* - * If the caller does not provide a DMA capable device then the IB - * device will be used. In this case the caller should fully setup the - * ibdev for DMA. This usually means using dma_virt_ops. - */ -#ifdef CONFIG_DMA_VIRT_OPS - if (!dma_device) { - device->dev.dma_ops = &dma_virt_ops; - dma_device = &device->dev; - } -#endif - WARN_ON(!dma_device); - device->dma_device = dma_device; - WARN_ON(!device->dma_device->dma_parms); -} - /* * setup_device() allocates memory and sets up data that requires calling the * device ops, this is the only reason these actions are not done during @@ -1249,7 +1267,7 @@ static void disable_device(struct ib_device *device) remove_client_context(device, cid); } - ib_cq_pool_destroy(device); + ib_cq_pool_cleanup(device); /* Pairs with refcount_set in enable_device */ ib_device_put(device); @@ -1294,8 +1312,6 @@ static int enable_device_and_get(struct ib_device *device) goto out; } - ib_cq_pool_init(device); - down_read(&clients_rwsem); xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { ret = add_client_context(device, client); @@ -1341,7 +1357,14 @@ int ib_register_device(struct ib_device *device, const char *name, if (ret) return ret; - setup_dma_device(device, dma_device); + /* + * If the caller does not provide a DMA capable device then the IB core + * will set up ib_sge and scatterlist structures that stash the kernel + * virtual address into the address field. + */ + WARN_ON(dma_device && !dma_device->dma_parms); + device->dma_device = dma_device; + ret = setup_device(device); if (ret) return ret; @@ -1374,9 +1397,6 @@ int ib_register_device(struct ib_device *device, const char *name, } ret = enable_device_and_get(device); - dev_set_uevent_suppress(&device->dev, false); - /* Mark for userspace that device is ready */ - kobject_uevent(&device->dev.kobj, KOBJ_ADD); if (ret) { void (*dealloc_fn)(struct ib_device *); @@ -1396,8 +1416,12 @@ int ib_register_device(struct ib_device *device, const char *name, ib_device_put(device); __ib_unregister_device(device); device->ops.dealloc_driver = dealloc_fn; + dev_set_uevent_suppress(&device->dev, false); return ret; } + dev_set_uevent_suppress(&device->dev, false); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); ib_device_put(device); return 0; @@ -2576,6 +2600,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_qp); SET_DEVICE_OP(dev_ops, create_rwq_ind_table); SET_DEVICE_OP(dev_ops, create_srq); + SET_DEVICE_OP(dev_ops, create_user_ah); SET_DEVICE_OP(dev_ops, create_wq); SET_DEVICE_OP(dev_ops, dealloc_dm); SET_DEVICE_OP(dev_ops, dealloc_driver); @@ -2675,6 +2700,21 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) } EXPORT_SYMBOL(ib_set_device_ops); +#ifdef CONFIG_INFINIBAND_VIRT_DMA +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + sg_dma_address(s) = (uintptr_t)sg_virt(s); + sg_dma_len(s) = s->length; + } + return nents; +} +EXPORT_SYMBOL(ib_dma_virt_map_sg); +#endif /* CONFIG_INFINIBAND_VIRT_DMA */ + static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 1bf87d9fd0bd..eeb8e6010907 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -141,7 +141,7 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request); int iwpm_get_nlmsg_seq(void); /** - * iwpm_add_reminfo - Add remote address info of the connecting peer + * iwpm_add_remote_info - Add remote address info of the connecting peer * to the remote info hash table * @reminfo: The remote info to be added */ diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index ffe11b03724c..75eafd9208aa 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -137,15 +137,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, } else if (uobj->object) { ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, attrs); - if (ret) { - if (ib_is_destroy_retryable(ret, reason, uobj)) - return ret; - - /* Nothing to be done, dangle the memory and move on */ - WARN(true, - "ib_uverbs: failed to remove uobject id %d, driver err=%d", - uobj->id, ret); - } + if (ret) + /* Nothing to be done, wait till ucontext will clean it */ + return ret; uobj->object = NULL; } @@ -543,12 +537,7 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, struct uverbs_obj_idr_type, type); int ret = idr_type->destroy_object(uobj, why, attrs); - /* - * We can only fail gracefully if the user requested to destroy the - * object or when a retry may be called upon an error. - * In the rest of the cases, just remove whatever you can. - */ - if (ib_is_destroy_retryable(ret, why, uobj)) + if (ret) return ret; if (why == RDMA_REMOVE_ABORT) @@ -581,11 +570,8 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, { const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); - int ret = fd_type->destroy_object(uobj, why); - - if (ib_is_destroy_retryable(ret, why, uobj)) - return ret; + fd_type->destroy_object(uobj, why); return 0; } @@ -609,6 +595,27 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj) WARN_ON(old != NULL); } +static void swap_idr_uobjects(struct ib_uobject *obj_old, + struct ib_uobject *obj_new) +{ + struct ib_uverbs_file *ufile = obj_old->ufile; + void *old; + + /* + * New must be an object that been allocated but not yet committed, this + * moves the pre-committed state to obj_old, new still must be comitted. + */ + old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY, + GFP_KERNEL); + if (WARN_ON(old != obj_old)) + return; + + swap(obj_old->id, obj_new->id); + + old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL); + WARN_ON(old != NULL); +} + static void alloc_commit_fd_uobject(struct ib_uobject *uobj) { int fd = uobj->id; @@ -655,6 +662,35 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj, } /* + * new_uobj will be assigned to the handle currently used by to_uobj, and + * to_uobj will be destroyed. + * + * Upon return the caller must do: + * rdma_alloc_commit_uobject(new_uobj) + * uobj_put_destroy(to_uobj) + * + * to_uobj must have a write get but the put mode switches to destroy once + * this is called. + */ +void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj, + struct uverbs_attr_bundle *attrs) +{ + assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE); + + if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object || + !to_uobj->uapi_object->type_class->swap_uobjects)) + return; + + to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj); + + /* + * If this fails then the uobject is still completely valid (though with + * a new ID) and we leak it until context close. + */ + uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs); +} + +/* * This consumes the kref for uobj. It is up to the caller to unwind the HW * object and anything else connected to uobj before calling this. */ @@ -761,6 +797,7 @@ const struct uverbs_obj_type_class uverbs_idr_class = { .lookup_put = lookup_put_idr_uobject, .destroy_hw = destroy_hw_idr_uobject, .remove_handle = remove_handle_idr_uobject, + .swap_uobjects = swap_idr_uobjects, }; EXPORT_SYMBOL(uverbs_idr_class); @@ -863,11 +900,18 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * racing with a lookup_get. */ WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); + if (reason == RDMA_REMOVE_DRIVER_FAILURE) + obj->object = NULL; if (!uverbs_destroy_uobject(obj, reason, &attrs)) ret = 0; else atomic_set(&obj->usecnt, 0); } + + if (reason == RDMA_REMOVE_DRIVER_FAILURE) { + WARN_ON(!list_empty(&ufile->uobjects)); + return 0; + } return ret; } @@ -889,21 +933,12 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, if (!ufile->ucontext) goto done; - ufile->ucontext->cleanup_retryable = true; - while (!list_empty(&ufile->uobjects)) - if (__uverbs_cleanup_ufile(ufile, reason)) { - /* - * No entry was cleaned-up successfully during this - * iteration. It is a driver bug to fail destruction. - */ - WARN_ON(!list_empty(&ufile->uobjects)); - break; - } - - ufile->ucontext->cleanup_retryable = false; - if (!list_empty(&ufile->uobjects)) - __uverbs_cleanup_ufile(ufile, reason); + while (!list_empty(&ufile->uobjects) && + !__uverbs_cleanup_ufile(ufile, reason)) { + } + if (WARN_ON(!list_empty(&ufile->uobjects))) + __uverbs_cleanup_ufile(ufile, RDMA_REMOVE_DRIVER_FAILURE); ufile_destroy_ucontext(ufile, reason); done: diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 4aeeaaed0f17..e0a41c867002 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -221,19 +221,29 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); struct rdma_restrack_root *rt; - int ret; + int ret = 0; if (!dev) return; + if (res->no_track) + goto out; + rt = &dev->res[res->type]; if (res->type == RDMA_RESTRACK_QP) { /* Special case to ensure that LQPN points to right QP */ struct ib_qp *qp = container_of(res, struct ib_qp, res); - ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL); - res->id = ret ? 0 : qp->qp_num; + WARN_ONCE(qp->qp_num >> 24 || qp->port >> 8, + "QP number 0x%0X and port 0x%0X", qp->qp_num, + qp->port); + res->id = qp->qp_num; + if (qp->qp_type == IB_QPT_SMI || qp->qp_type == IB_QPT_GSI) + res->id |= qp->port << 24; + ret = xa_insert(&rt->xa, res->id, res, GFP_KERNEL); + if (ret) + res->id = 0; } else if (res->type == RDMA_RESTRACK_COUNTER) { /* Special case to ensure that cntn points to right counter */ struct rdma_counter *counter; @@ -246,6 +256,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) &rt->next_id, GFP_KERNEL); } +out: if (!ret) res->valid = true; } @@ -318,6 +329,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) return; } + if (res->no_track) + goto out; + dev = res_to_dev(res); if (WARN_ON(!dev)) return; @@ -328,8 +342,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP) return; WARN_ON(old != res); - res->valid = false; +out: + res->valid = false; rdma_restrack_put(res); wait_for_completion(&res->comp); } diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 13f43ab7220b..a96030b784eb 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -285,8 +285,11 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir) { - if (is_pci_p2pdma_page(sg_page(sg))) + if (is_pci_p2pdma_page(sg_page(sg))) { + if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) + return 0; return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + } return ib_dma_map_sg(dev, sg, sg_cnt, dir); } diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8c930bf1df89..89a831fa1885 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1435,7 +1435,8 @@ enum opa_pr_supported { }; /** - * Check if current PR query can be an OPA query. + * opa_pr_query_possible - Check if current PR query can be an OPA query. + * * Retuns PR_NOT_SUPPORTED if a path record query is not * possible, PR_OPA_SUPPORTED if an OPA path record query * is possible and PR_IB_SUPPORTED if an IB path record diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 914cddea525d..b8abb30f80df 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -165,9 +165,11 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d: %s\n", attr.state, - attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ? - state_name[attr.state] : "UNKNOWN"); + return sysfs_emit(buf, "%d: %s\n", attr.state, + attr.state >= 0 && + attr.state < ARRAY_SIZE(state_name) ? + state_name[attr.state] : + "UNKNOWN"); } static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, @@ -180,7 +182,7 @@ static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%x\n", attr.lid); + return sysfs_emit(buf, "0x%x\n", attr.lid); } static ssize_t lid_mask_count_show(struct ib_port *p, @@ -194,7 +196,7 @@ static ssize_t lid_mask_count_show(struct ib_port *p, if (ret) return ret; - return sprintf(buf, "%d\n", attr.lmc); + return sysfs_emit(buf, "%d\n", attr.lmc); } static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, @@ -207,7 +209,7 @@ static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%x\n", attr.sm_lid); + return sysfs_emit(buf, "0x%x\n", attr.sm_lid); } static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, @@ -220,7 +222,7 @@ static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d\n", attr.sm_sl); + return sysfs_emit(buf, "%d\n", attr.sm_sl); } static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, @@ -233,7 +235,7 @@ static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%08x\n", attr.port_cap_flags); + return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags); } static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, @@ -273,6 +275,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, speed = " HDR"; rate = 500; break; + case IB_SPEED_NDR: + speed = " NDR"; + rate = 1000; + break; case IB_SPEED_SDR: default: /* default to SDR for invalid rates */ speed = " SDR"; @@ -284,9 +290,9 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, if (rate < 0) return -EINVAL; - return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", - rate / 10, rate % 10 ? ".5" : "", - ib_width_enum_to_int(attr.active_width), speed); + return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10, + rate % 10 ? ".5" : "", + ib_width_enum_to_int(attr.active_width), speed); } static const char *phys_state_to_str(enum ib_port_phys_state phys_state) @@ -318,21 +324,28 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d: %s\n", attr.phys_state, - phys_state_to_str(attr.phys_state)); + return sysfs_emit(buf, "%d: %s\n", attr.phys_state, + phys_state_to_str(attr.phys_state)); } static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, char *buf) { + const char *output; + switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) { case IB_LINK_LAYER_INFINIBAND: - return sprintf(buf, "%s\n", "InfiniBand"); + output = "InfiniBand"; + break; case IB_LINK_LAYER_ETHERNET: - return sprintf(buf, "%s\n", "Ethernet"); + output = "Ethernet"; + break; default: - return sprintf(buf, "%s\n", "Unknown"); + output = "Unknown"; + break; } + + return sysfs_emit(buf, "%s\n", output); } static PORT_ATTR_RO(state); @@ -358,27 +371,28 @@ static struct attribute *port_default_attrs[] = { NULL }; -static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) +static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { struct net_device *ndev; - size_t ret = -EINVAL; + int ret = -EINVAL; rcu_read_lock(); ndev = rcu_dereference(gid_attr->ndev); if (ndev) - ret = sprintf(buf, "%s\n", ndev->name); + ret = sysfs_emit(buf, "%s\n", ndev->name); rcu_read_unlock(); return ret; } -static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) +static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) { - return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); + return sysfs_emit(buf, "%s\n", + ib_cache_gid_type_str(gid_attr->gid_type)); } static ssize_t _show_port_gid_attr( struct ib_port *p, struct port_attribute *attr, char *buf, - size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) + ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); @@ -401,7 +415,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); const struct ib_gid_attr *gid_attr; - ssize_t ret; + int len; gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); if (IS_ERR(gid_attr)) { @@ -416,12 +430,12 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, * space throwing such error on fail to read gid, return zero * GID as before. This maintains backward compatibility. */ - return sprintf(buf, "%pI6\n", zgid.raw); + return sysfs_emit(buf, "%pI6\n", zgid.raw); } - ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw); + len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw); rdma_put_gid_attr(gid_attr); - return ret; + return len; } static ssize_t show_port_gid_attr_ndev(struct ib_port *p, @@ -443,13 +457,13 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); u16 pkey; - ssize_t ret; + int ret; ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey); if (ret) return ret; - return sprintf(buf, "0x%04x\n", pkey); + return sysfs_emit(buf, "0x%04x\n", pkey); } #define PORT_PMA_ATTR(_name, _counter, _width, _offset) \ @@ -521,8 +535,9 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, container_of(attr, struct port_table_attribute, attr); int offset = tab_attr->index & 0xffff; int width = (tab_attr->index >> 16) & 0xff; - ssize_t ret; + int ret; u8 data[8]; + int len; ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data, 40 + offset / 8, sizeof(data)); @@ -531,30 +546,27 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, switch (width) { case 4: - ret = sprintf(buf, "%u\n", (*data >> - (4 - (offset % 8))) & 0xf); + len = sysfs_emit(buf, "%u\n", + (*data >> (4 - (offset % 8))) & 0xf); break; case 8: - ret = sprintf(buf, "%u\n", *data); + len = sysfs_emit(buf, "%u\n", *data); break; case 16: - ret = sprintf(buf, "%u\n", - be16_to_cpup((__be16 *)data)); + len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data)); break; case 32: - ret = sprintf(buf, "%u\n", - be32_to_cpup((__be32 *)data)); + len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data)); break; case 64: - ret = sprintf(buf, "%llu\n", - be64_to_cpup((__be64 *)data)); + len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data)); break; - default: - ret = 0; + len = 0; + break; } - return ret; + return len; } static PORT_PMA_ATTR(symbol_error , 0, 16, 32); @@ -815,12 +827,12 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, return 0; } -static ssize_t print_hw_stat(struct ib_device *dev, int port_num, - struct rdma_hw_stats *stats, int index, char *buf) +static int print_hw_stat(struct ib_device *dev, int port_num, + struct rdma_hw_stats *stats, int index, char *buf) { u64 v = rdma_counter_get_hwstat_value(dev, port_num, index); - return sprintf(buf, "%llu\n", stats->value[index] + v); + return sysfs_emit(buf, "%llu\n", stats->value[index] + v); } static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, @@ -877,7 +889,7 @@ static ssize_t show_stats_lifespan(struct kobject *kobj, msecs = jiffies_to_msecs(stats->lifespan); mutex_unlock(&stats->lock); - return sprintf(buf, "%d\n", msecs); + return sysfs_emit(buf, "%d\n", msecs); } static ssize_t set_stats_lifespan(struct kobject *kobj, @@ -1224,21 +1236,34 @@ err_put: return ret; } +static const char *node_type_string(int node_type) +{ + switch (node_type) { + case RDMA_NODE_IB_CA: + return "CA"; + case RDMA_NODE_IB_SWITCH: + return "switch"; + case RDMA_NODE_IB_ROUTER: + return "router"; + case RDMA_NODE_RNIC: + return "RNIC"; + case RDMA_NODE_USNIC: + return "usNIC"; + case RDMA_NODE_USNIC_UDP: + return "usNIC UDP"; + case RDMA_NODE_UNSPECIFIED: + return "unspecified"; + } + return "<unknown>"; +} + static ssize_t node_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); - switch (dev->node_type) { - case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); - case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); - case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type); - case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type); - case RDMA_NODE_UNSPECIFIED: return sprintf(buf, "%d: unspecified\n", dev->node_type); - case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); - case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); - default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); - } + return sysfs_emit(buf, "%d: %s\n", dev->node_type, + node_type_string(dev->node_type)); } static DEVICE_ATTR_RO(node_type); @@ -1246,12 +1271,13 @@ static ssize_t sys_image_guid_show(struct device *device, struct device_attribute *dev_attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + __be16 *guid = (__be16 *)&dev->attrs.sys_image_guid; - return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3])); + return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", + be16_to_cpu(guid[0]), + be16_to_cpu(guid[1]), + be16_to_cpu(guid[2]), + be16_to_cpu(guid[3])); } static DEVICE_ATTR_RO(sys_image_guid); @@ -1259,12 +1285,13 @@ static ssize_t node_guid_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + __be16 *node_guid = (__be16 *)&dev->node_guid; - return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) &dev->node_guid)[0]), - be16_to_cpu(((__be16 *) &dev->node_guid)[1]), - be16_to_cpu(((__be16 *) &dev->node_guid)[2]), - be16_to_cpu(((__be16 *) &dev->node_guid)[3])); + return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", + be16_to_cpu(node_guid[0]), + be16_to_cpu(node_guid[1]), + be16_to_cpu(node_guid[2]), + be16_to_cpu(node_guid[3])); } static DEVICE_ATTR_RO(node_guid); @@ -1273,7 +1300,7 @@ static ssize_t node_desc_show(struct device *device, { struct ib_device *dev = rdma_device_to_ibdev(device); - return sprintf(buf, "%.64s\n", dev->node_desc); + return sysfs_emit(buf, "%.64s\n", dev->node_desc); } static ssize_t node_desc_store(struct device *device, @@ -1300,10 +1327,11 @@ static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + char version[IB_FW_VERSION_NAME_MAX] = {}; + + ib_get_device_fw_str(dev, version); - ib_get_device_fw_str(dev, buf); - strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); - return strlen(buf); + return sysfs_emit(buf, "%s\n", version); } static DEVICE_ATTR_RO(fw_ver); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ffe2563ad345..7dab9a27a145 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1825,7 +1825,7 @@ static ssize_t show_abi_version(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); + return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); } static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index e9fecbdf391b..7ca4112e3e8f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -84,6 +84,15 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, dma_addr_t mask; int i; + if (umem->is_odp) { + unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift); + + /* ODP must always be self consistent. */ + if (!(pgsz_bitmap & page_size)) + return 0; + return page_size; + } + /* rdma_for_each_block() has a bug if the page size is smaller than the * page size used to build the umem. For now prevent smaller page sizes * from being returned. @@ -220,10 +229,10 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, cur_base += ret * PAGE_SIZE; npages -= ret; - sg = __sg_alloc_table_from_pages( - &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, - dma_get_max_seg_size(device->dma_device), sg, npages, - GFP_KERNEL); + sg = __sg_alloc_table_from_pages(&umem->sg_head, page_list, ret, + 0, ret << PAGE_SHIFT, + ib_dma_max_seg_size(device), sg, npages, + GFP_KERNEL); umem->sg_nents = umem->sg_head.nents; if (IS_ERR(sg)) { unpin_user_pages_dirty_lock(page_list, ret, 0); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index b0d0b522cc76..19104a675691 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1191,7 +1191,7 @@ static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr, if (!port) return -ENODEV; - return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev)); + return sysfs_emit(buf, "%s\n", dev_name(&port->ib_dev->dev)); } static DEVICE_ATTR_RO(ibdev); @@ -1203,7 +1203,7 @@ static ssize_t port_show(struct device *dev, struct device_attribute *attr, if (!port) return -ENODEV; - return sprintf(buf, "%d\n", port->port_num); + return sysfs_emit(buf, "%d\n", port->port_num); } static DEVICE_ATTR_RO(port); @@ -1222,7 +1222,7 @@ static char *umad_devnode(struct device *dev, umode_t *mode) static ssize_t abi_version_show(struct class *class, struct class_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); + return sysfs_emit(buf, "%d\n", IB_USER_MAD_ABI_VERSION); } static CLASS_ATTR_RO(abi_version); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 418d133a8fb0..98a5d36813ff 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -681,8 +681,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, return 0; ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata); - - if (ib_is_destroy_retryable(ret, why, uobject)) { + if (ret) { atomic_inc(&xrcd->usecnt); return ret; } @@ -690,7 +689,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, if (inode) xrcd_table_delete(dev, inode); - return ret; + return 0; } static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) @@ -710,29 +709,20 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; - ret = ib_check_mr_access(cmd.access_flags); - if (ret) - return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); + ret = ib_check_mr_access(ib_dev, cmd.access_flags); + if (ret) + goto err_free; + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_free; } - if (cmd.access_flags & IB_ACCESS_ON_DEMAND) { - if (!(pd->device->attrs.device_cap_flags & - IB_DEVICE_ON_DEMAND_PAGING)) { - pr_debug("ODP support not available\n"); - ret = -EINVAL; - goto err_put; - } - } - mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, cmd.access_flags, &attrs->driver_udata); @@ -774,23 +764,28 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_rereg_mr cmd; struct ib_uverbs_rereg_mr_resp resp; - struct ib_pd *pd = NULL; struct ib_mr *mr; - struct ib_pd *old_pd; int ret; struct ib_uobject *uobj; + struct ib_uobject *new_uobj; + struct ib_device *ib_dev; + struct ib_pd *orig_pd; + struct ib_pd *new_pd; + struct ib_mr *new_mr; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; - if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) + if (!cmd.flags) return -EINVAL; + if (cmd.flags & ~IB_MR_REREG_SUPPORTED) + return -EOPNOTSUPP; + if ((cmd.flags & IB_MR_REREG_TRANS) && - (!cmd.start || !cmd.hca_va || 0 >= cmd.length || - (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) - return -EINVAL; + (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) + return -EINVAL; uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs); if (IS_ERR(uobj)) @@ -804,36 +799,74 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) } if (cmd.flags & IB_MR_REREG_ACCESS) { - ret = ib_check_mr_access(cmd.access_flags); + ret = ib_check_mr_access(mr->device, cmd.access_flags); if (ret) goto put_uobjs; } + orig_pd = mr->pd; if (cmd.flags & IB_MR_REREG_PD) { - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, - attrs); - if (!pd) { + new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, + attrs); + if (!new_pd) { ret = -EINVAL; goto put_uobjs; } + } else { + new_pd = mr->pd; } - old_pd = mr->pd; - ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start, - cmd.length, cmd.hca_va, - cmd.access_flags, pd, - &attrs->driver_udata); - if (ret) + /* + * The driver might create a new HW object as part of the rereg, we need + * to have a uobject ready to hold it. + */ + new_uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); + if (IS_ERR(new_uobj)) { + ret = PTR_ERR(new_uobj); goto put_uobj_pd; - - if (cmd.flags & IB_MR_REREG_PD) { - atomic_inc(&pd->usecnt); - mr->pd = pd; - atomic_dec(&old_pd->usecnt); } - if (cmd.flags & IB_MR_REREG_TRANS) - mr->iova = cmd.hca_va; + new_mr = ib_dev->ops.rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length, + cmd.hca_va, cmd.access_flags, new_pd, + &attrs->driver_udata); + if (IS_ERR(new_mr)) { + ret = PTR_ERR(new_mr); + goto put_new_uobj; + } + if (new_mr) { + new_mr->device = new_pd->device; + new_mr->pd = new_pd; + new_mr->type = IB_MR_TYPE_USER; + new_mr->dm = NULL; + new_mr->sig_attrs = NULL; + new_mr->uobject = uobj; + atomic_inc(&new_pd->usecnt); + new_mr->iova = cmd.hca_va; + new_uobj->object = new_mr; + + rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR); + rdma_restrack_set_name(&new_mr->res, NULL); + rdma_restrack_add(&new_mr->res); + + /* + * The new uobj for the new HW object is put into the same spot + * in the IDR and the old uobj & HW object is deleted. + */ + rdma_assign_uobject(uobj, new_uobj, attrs); + rdma_alloc_commit_uobject(new_uobj, attrs); + uobj_put_destroy(uobj); + new_uobj = NULL; + uobj = NULL; + mr = new_mr; + } else { + if (cmd.flags & IB_MR_REREG_PD) { + atomic_dec(&orig_pd->usecnt); + mr->pd = new_pd; + atomic_inc(&new_pd->usecnt); + } + if (cmd.flags & IB_MR_REREG_TRANS) + mr->iova = cmd.hca_va; + } memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; @@ -841,12 +874,16 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) ret = uverbs_response(attrs, &resp, sizeof(resp)); +put_new_uobj: + if (new_uobj) + uobj_alloc_abort(new_uobj, attrs); put_uobj_pd: if (cmd.flags & IB_MR_REREG_PD) - uobj_put_obj_read(pd); + uobj_put_obj_read(new_pd); put_uobjs: - uobj_put_write(uobj); + if (uobj) + uobj_put_write(uobj); return ret; } @@ -1401,8 +1438,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - obj); + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, + NULL); if (IS_ERR(qp)) { ret = PTR_ERR(qp); @@ -1906,8 +1943,7 @@ static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs) if (ret) return ret; - if (cmd.base.attr_mask & - ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) + if (cmd.base.attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; return modify_qp(attrs, &cmd); @@ -1929,10 +1965,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs) * Last bit is reserved for extending the attr_mask by * using another field. */ - BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31)); - - if (cmd.base.attr_mask & - ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) + if (cmd.base.attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) return -EOPNOTSUPP; ret = modify_qp(attrs, &cmd); @@ -3693,13 +3726,13 @@ const struct uapi_definition uverbs_def_write_intf[] = { ib_uverbs_create_ah, UAPI_DEF_WRITE_UDATA_IO( struct ib_uverbs_create_ah, - struct ib_uverbs_create_ah_resp), - UAPI_DEF_METHOD_NEEDS_FN(create_ah)), + struct ib_uverbs_create_ah_resp)), DECLARE_UVERBS_WRITE( IB_USER_VERBS_CMD_DESTROY_AH, ib_uverbs_destroy_ah, - UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah), - UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))), + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah)), + UAPI_DEF_OBJ_NEEDS_FN(create_user_ah), + UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)), DECLARE_UVERBS_OBJECT( UVERBS_OBJECT_COMP_CHANNEL, @@ -3753,7 +3786,7 @@ const struct uapi_definition uverbs_def_write_intf[] = { IB_USER_VERBS_EX_CMD_MODIFY_CQ, ib_uverbs_ex_modify_cq, UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq), - UAPI_DEF_METHOD_NEEDS_FN(create_cq))), + UAPI_DEF_METHOD_NEEDS_FN(modify_cq))), DECLARE_UVERBS_OBJECT( UVERBS_OBJECT_DEVICE, @@ -3999,8 +4032,7 @@ const struct uapi_definition uverbs_def_write_intf[] = { DECLARE_UVERBS_WRITE( IB_USER_VERBS_CMD_CLOSE_XRCD, ib_uverbs_close_xrcd, - UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd), - UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)), + UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd)), DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP, ib_uverbs_open_qp, UAPI_DEF_WRITE_UDATA_IO( @@ -4010,8 +4042,9 @@ const struct uapi_definition uverbs_def_write_intf[] = { ib_uverbs_open_xrcd, UAPI_DEF_WRITE_UDATA_IO( struct ib_uverbs_open_xrcd, - struct ib_uverbs_open_xrcd_resp), - UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))), + struct ib_uverbs_open_xrcd_resp)), + UAPI_DEF_OBJ_NEEDS_FN(alloc_xrcd), + UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)), {}, }; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 4bb7c642f80c..f173ecd102dc 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1046,7 +1046,7 @@ static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) - ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev)); + ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev)); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; @@ -1065,7 +1065,7 @@ static ssize_t abi_version_show(struct device *device, srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) - ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); + ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 0658101fca00..13776a66e2e4 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -88,7 +88,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, return -EBUSY; ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; for (i = 0; i < table_size; i++) @@ -96,7 +96,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, kfree(rwq_ind_tbl); kfree(ind_tbl); - return ret; + return 0; } static int uverbs_free_xrcd(struct ib_uobject *uobject, @@ -108,9 +108,8 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, container_of(uobject, struct ib_uxrcd_object, uobject); int ret; - ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&uxrcd->refcnt)) + return -EBUSY; mutex_lock(&attrs->ufile->device->xrcd_tree_mutex); ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs); @@ -124,11 +123,9 @@ static int uverbs_free_pd(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_pd *pd = uobject->object; - int ret; - ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&pd->usecnt)) + return -EBUSY; return ib_dealloc_pd_user(pd, &attrs->driver_udata); } @@ -157,7 +154,7 @@ void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue) spin_unlock_irq(&event_queue->lock); } -static int +static void uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, enum rdma_remove_reason why) { @@ -166,7 +163,6 @@ uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, uobj); ib_uverbs_free_event_queue(&file->ev_queue); - return 0; } int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c index 61899eaf1f91..cc24cfdf7aee 100644 --- a/drivers/infiniband/core/uverbs_std_types_async_fd.c +++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c @@ -19,8 +19,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)( return 0; } -static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct ib_uverbs_async_event_file *event_file = container_of(uobj, struct ib_uverbs_async_event_file, uobj); @@ -30,7 +30,6 @@ static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, if (why == RDMA_REMOVE_DRIVER_REMOVE) ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL, NULL, NULL); - return 0; } int uverbs_async_event_release(struct inode *inode, struct file *filp) diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index b3c6c066b601..999da9c79866 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -42,9 +42,8 @@ static int uverbs_free_counters(struct ib_uobject *uobject, struct ib_counters *counters = uobject->object; int ret; - ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&counters->usecnt)) + return -EBUSY; ret = counters->device->ops.destroy_counters(counters); if (ret) diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 8dabd05988b2..370ad7c83f88 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -46,7 +46,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, int ret; ret = ib_destroy_cq_user(cq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; ib_uverbs_release_ucq( @@ -55,7 +55,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, ev_queue) : NULL, ucq); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 302f898c5833..9ec6971056fa 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -317,8 +317,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( struct ib_device *ib_dev; size_t user_entry_size; ssize_t num_entries; - size_t max_entries; - size_t num_bytes; + int max_entries; u32 flags; int ret; @@ -336,19 +335,16 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, user_entry_size); if (max_entries <= 0) - return -EINVAL; + return max_entries ?: -EINVAL; ucontext = ib_uverbs_get_ucontext(attrs); if (IS_ERR(ucontext)) return PTR_ERR(ucontext); ib_dev = ucontext->device; - if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes)) - return -EINVAL; - - entries = uverbs_zalloc(attrs, num_bytes); - if (!entries) - return -ENOMEM; + entries = uverbs_kcalloc(attrs, max_entries, sizeof(*entries)); + if (IS_ERR(entries)) + return PTR_ERR(entries); num_entries = rdma_query_gid_table(ib_dev, entries, max_entries); if (num_entries < 0) diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index d5a1de33c2c9..98c522cf86d6 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -39,11 +39,9 @@ static int uverbs_free_dm(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_dm *dm = uobject->object; - int ret; - ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&dm->usecnt)) + return -EBUSY; return dm->device->ops.dealloc_dm(dm, attrs); } diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index 459cf165b231..d42ed7ff223e 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -39,11 +39,9 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_flow_action *action = uobject->object; - int ret; - ret = ib_destroy_usecnt(&action->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&action->usecnt)) + return -EBUSY; return action->device->ops.destroy_flow_action(action); } diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 9b22bb553e8b..dd4e76b26c74 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -33,6 +33,7 @@ #include "rdma_core.h" #include "uverbs.h" #include <rdma/uverbs_std_types.h> +#include "restrack.h" static int uverbs_free_mr(struct ib_uobject *uobject, enum rdma_remove_reason why, @@ -114,7 +115,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( if (!(attr.access_flags & IB_ZERO_BASED)) return -EINVAL; - ret = ib_check_mr_access(attr.access_flags); + ret = ib_check_mr_access(ib_dev, attr.access_flags); if (ret) return ret; @@ -134,6 +135,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( atomic_inc(&pd->usecnt); atomic_inc(&dm->usecnt); + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_set_name(&mr->res, NULL); + rdma_restrack_add(&mr->res); uobj->object = mr; uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE); diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c index 3bf8dcdfe7eb..c00cfb5ed387 100644 --- a/drivers/infiniband/core/uverbs_std_types_qp.c +++ b/drivers/infiniband/core/uverbs_std_types_qp.c @@ -32,14 +32,14 @@ static int uverbs_free_qp(struct ib_uobject *uobject, } ret = ib_destroy_qp_user(qp, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; if (uqp->uxrcd) atomic_dec(&uqp->uxrcd->refcnt); ib_uverbs_release_uevent(&uqp->uevent); - return ret; + return 0; } static int check_creation_flags(enum ib_qp_type qp_type, @@ -251,8 +251,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)( if (attr.qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - obj); + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, + NULL); if (IS_ERR(qp)) { ret = PTR_ERR(qp); diff --git a/drivers/infiniband/core/uverbs_std_types_srq.c b/drivers/infiniband/core/uverbs_std_types_srq.c index c0ecbba26bf4..e5513f828bdc 100644 --- a/drivers/infiniband/core/uverbs_std_types_srq.c +++ b/drivers/infiniband/core/uverbs_std_types_srq.c @@ -18,7 +18,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, int ret; ret = ib_destroy_srq_user(srq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; if (srq_type == IB_SRQT_XRC) { @@ -30,7 +30,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, } ib_uverbs_release_uevent(uevent); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c index f2e6a625724a..7ded8339346f 100644 --- a/drivers/infiniband/core/uverbs_std_types_wq.c +++ b/drivers/infiniband/core/uverbs_std_types_wq.c @@ -17,11 +17,11 @@ static int uverbs_free_wq(struct ib_uobject *uobject, int ret; ret = ib_destroy_wq_user(wq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; ib_uverbs_release_uevent(&uwq->uevent); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 5addc8fae3f3..62f5bcb712cf 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -79,10 +79,7 @@ static int uapi_create_write(struct uverbs_api *uapi, method_elm->is_ex = def->write.is_ex; method_elm->handler = def->func_write; - if (def->write.is_ex) - method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask & - BIT_ULL(def->write.command_num)); - else + if (!def->write.is_ex) method_elm->disabled = !(ibdev->uverbs_cmd_mask & BIT_ULL(def->write.command_num)); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 740f8454b6b4..9137a25bb521 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -244,7 +244,7 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); /* Protection domains */ /** - * ib_alloc_pd - Allocates an unused protection domain. + * __ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. * @flags: protection domain flags * @caller: caller's build-time module name @@ -516,7 +516,7 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); - if (!device->ops.create_ah) + if (!udata && !device->ops.create_ah) return ERR_PTR(-EOPNOTSUPP); ah = rdma_zalloc_drv_obj_gfp( @@ -533,7 +533,10 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, init_attr.flags = flags; init_attr.xmit_slave = xmit_slave; - ret = device->ops.create_ah(ah, &init_attr, udata); + if (udata) + ret = device->ops.create_user_ah(ah, &init_attr, udata); + else + ret = device->ops.create_ah(ah, &init_attr, NULL); if (ret) { kfree(ah); return ERR_PTR(ret); @@ -1188,17 +1191,19 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, } /** - * ib_create_qp - Creates a kernel QP associated with the specified protection + * ib_create_named_qp - Creates a kernel QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. * @qp_init_attr: A list of initial attributes required to create the * QP. If QP creation succeeds, then the attributes are updated to * the actual capabilities of the created QP. + * @caller: caller's build-time module name * * NOTE: for user qp use ib_create_qp_user with valid udata! */ -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr) +struct ib_qp *ib_create_named_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller) { struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; struct ib_qp *qp; @@ -1223,7 +1228,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (qp_init_attr->cap.max_rdma_ctxs) rdma_rw_init_qp(device, qp_init_attr); - qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL); + qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL, caller); if (IS_ERR(qp)) return qp; @@ -1289,7 +1294,7 @@ err: return ERR_PTR(ret); } -EXPORT_SYMBOL(ib_create_qp); +EXPORT_SYMBOL(ib_create_named_qp); static const struct { int valid; @@ -1662,7 +1667,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp) qp->qp_type == IB_QPT_XRC_TGT); } -/** +/* * IB core internal function to perform QP attributes modification. */ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, @@ -1698,8 +1703,10 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, slave = rdma_lag_get_ah_roce_slave(qp->device, &attr->ah_attr, GFP_KERNEL); - if (IS_ERR(slave)) + if (IS_ERR(slave)) { + ret = PTR_ERR(slave); goto out_av; + } attr->xmit_slave = slave; } } diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index cf3db9628397..401bdc9e931e 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1271,10 +1271,12 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, } qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */ - if (init_attr->create_flags) + if (init_attr->create_flags) { ibdev_dbg(&rdev->ibdev, "QP create flags 0x%x not supported", init_attr->create_flags); + return -EOPNOTSUPP; + } /* Setup CQs */ if (init_attr->send_cq) { @@ -1657,8 +1659,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.max_wqe = entries; srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; - srq->qplib_srq.wqe_size = - bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); + /* 128 byte wqe size for SRQ . So use max sges */ + srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges); srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; @@ -1829,6 +1831,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, unsigned int flags; u8 nw_type; + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp->qplib_qp.modify_flags = 0; if (qp_attr_mask & IB_QP_STATE) { curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state); @@ -2078,6 +2083,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, goto out; } qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state); + qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state); qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0; qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access); qp_attr->pkey_index = qplib_qp->pkey_index; @@ -2827,6 +2833,9 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct bnxt_qplib_nq *nq = NULL; unsigned int nq_alloc_cnt; + if (attr->flags) + return -EOPNOTSUPP; + /* Validate CQ fields */ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded"); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 04621ba8fa76..fdb8c2478258 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -608,7 +608,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); + return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor); } static DEVICE_ATTR_RO(hw_rev); @@ -618,7 +618,7 @@ static ssize_t hca_type_show(struct device *device, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); + return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc); } static DEVICE_ATTR_RO(hca_type); @@ -646,6 +646,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .create_cq = bnxt_re_create_cq, .create_qp = bnxt_re_create_qp, .create_srq = bnxt_re_create_srq, + .create_user_ah = bnxt_re_create_ah, .dealloc_driver = bnxt_re_dealloc_driver, .dealloc_pd = bnxt_re_dealloc_pd, .dealloc_ucontext = bnxt_re_dealloc_ucontext, @@ -701,35 +702,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->dev.parent = &rdev->en_dev->pdev->dev; ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; - /* User space */ - ibdev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */ - - rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ib_set_device_ops(ibdev, &bnxt_re_dev_ops); ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 64d44f51db4b..6316179583a6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -118,7 +118,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, * 128 WQEs needs to be reserved for the HW (8916). Prevent * reporting the max number */ - attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS; + attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ? 6 : sb->max_sge; attr->max_cq = le32_to_cpu(sb->max_cq); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 28349ed50885..44c2416588d4 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -1006,6 +1006,9 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, pr_debug("ib_dev %p entries %d\n", ibdev, entries); if (attr->flags) + return -EOPNOTSUPP; + + if (entries < 1 || entries > ibdev->attrs.max_cqe) return -EINVAL; if (vector >= rhp->rdev.lldi.nciq) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index a27899402f59..f85477f3b037 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -983,9 +983,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -int c4iw_dealloc_mw(struct ib_mw *mw); void c4iw_dealloc(struct uld_ctx *ctx); -int c4iw_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 42234df896fb..a2c71a1d93d5 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -365,22 +365,6 @@ static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, pbl_size, pbl_addr, skb, wr_waitp); } -static int allocate_window(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, - struct c4iw_wr_wait *wr_waitp) -{ - *stag = T4_STAG_UNSET; - return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, - 0UL, 0, 0, 0, 0, NULL, wr_waitp); -} - -static int deallocate_window(struct c4iw_rdev *rdev, u32 stag, - struct sk_buff *skb, - struct c4iw_wr_wait *wr_waitp) -{ - return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, - 0, skb, wr_waitp); -} - static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr, struct c4iw_wr_wait *wr_waitp) @@ -611,74 +595,6 @@ err_free_mhp: return ERR_PTR(err); } -int c4iw_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) -{ - struct c4iw_mw *mhp = to_c4iw_mw(ibmw); - struct c4iw_dev *rhp; - struct c4iw_pd *php; - u32 mmid; - u32 stag = 0; - int ret; - - if (ibmw->type != IB_MW_TYPE_1) - return -EINVAL; - - php = to_c4iw_pd(ibmw->pd); - rhp = php->rhp; - mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); - if (!mhp->wr_waitp) - return -ENOMEM; - - mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); - if (!mhp->dereg_skb) { - ret = -ENOMEM; - goto free_wr_wait; - } - - ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp); - if (ret) - goto free_skb; - - mhp->rhp = rhp; - mhp->attr.pdid = php->pdid; - mhp->attr.type = FW_RI_STAG_MW; - mhp->attr.stag = stag; - mmid = (stag) >> 8; - ibmw->rkey = stag; - if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { - ret = -ENOMEM; - goto dealloc_win; - } - pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); - return 0; - -dealloc_win: - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, - mhp->wr_waitp); -free_skb: - kfree_skb(mhp->dereg_skb); -free_wr_wait: - c4iw_put_wr_wait(mhp->wr_waitp); - return ret; -} - -int c4iw_dealloc_mw(struct ib_mw *mw) -{ - struct c4iw_dev *rhp; - struct c4iw_mw *mhp; - u32 mmid; - - mhp = to_c4iw_mw(mw); - rhp = mhp->rhp; - mmid = (mw->rkey) >> 8; - xa_erase_irq(&rhp->mrs, mmid); - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, - mhp->wr_waitp); - kfree_skb(mhp->dereg_skb); - c4iw_put_wr_wait(mhp->wr_waitp); - return 0; -} - struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 8138c57a1e43..1f1f856f8715 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -322,8 +322,9 @@ static ssize_t hw_rev_show(struct device *dev, rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); pr_debug("dev 0x%p\n", dev); - return sprintf(buf, "%d\n", - CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); + return sysfs_emit( + buf, "%d\n", + CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); } static DEVICE_ATTR_RO(hw_rev); @@ -337,7 +338,7 @@ static ssize_t hca_type_show(struct device *dev, pr_debug("dev 0x%p\n", dev); lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.driver); + return sysfs_emit(buf, "%s\n", info.driver); } static DEVICE_ATTR_RO(hca_type); @@ -348,8 +349,8 @@ static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); pr_debug("dev 0x%p\n", dev); - return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, - c4iw_dev->rdev.lldi.pdev->device); + return sysfs_emit(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, + c4iw_dev->rdev.lldi.pdev->device); } static DEVICE_ATTR_RO(board_id); @@ -456,13 +457,11 @@ static const struct ib_device_ops c4iw_dev_ops = { .alloc_hw_stats = c4iw_alloc_stats, .alloc_mr = c4iw_alloc_mr, - .alloc_mw = c4iw_alloc_mw, .alloc_pd = c4iw_allocate_pd, .alloc_ucontext = c4iw_alloc_ucontext, .create_cq = c4iw_create_cq, .create_qp = c4iw_create_qp, .create_srq = c4iw_create_srq, - .dealloc_mw = c4iw_dealloc_mw, .dealloc_pd = c4iw_deallocate_pd, .dealloc_ucontext = c4iw_dealloc_ucontext, .dereg_mr = c4iw_dereg_mr, @@ -533,28 +532,6 @@ void c4iw_register_device(struct work_struct *work) if (fastreg_support) dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.local_dma_lkey = 0; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index f20379e4e2ec..a7401398cb34 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2126,7 +2126,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, pr_debug("ib_pd %p\n", pd); - if (attrs->qp_type != IB_QPT_RC) + if (attrs->qp_type != IB_QPT_RC || attrs->create_flags) return ERR_PTR(-EOPNOTSUPP); php = to_c4iw_pd(pd); @@ -2374,6 +2374,9 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, pr_debug("ib_qp %p\n", ibqp); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* iwarp does not support the RTR state */ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) attr_mask &= ~IB_QP_STATE; @@ -2680,6 +2683,9 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, int ret; int wr_len; + if (attrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + pr_debug("%s ib_pd %p\n", __func__, pd); php = to_c4iw_pd(pd); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 6faed3a81e08..0f578734bddb 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -245,9 +245,9 @@ static const struct ib_device_ops efa_dev_ops = { .alloc_hw_stats = efa_alloc_hw_stats, .alloc_pd = efa_alloc_pd, .alloc_ucontext = efa_alloc_ucontext, - .create_ah = efa_create_ah, .create_cq = efa_create_cq, .create_qp = efa_create_qp, + .create_user_ah = efa_create_ah, .dealloc_pd = efa_dealloc_pd, .dealloc_ucontext = efa_dealloc_ucontext, .dereg_mr = efa_dereg_mr, @@ -308,27 +308,6 @@ static int efa_ib_device_add(struct efa_dev *dev) dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &pdev->dev; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - - dev->ibdev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); - ib_set_device_ops(&dev->ibdev, &efa_dev_ops); err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); @@ -405,19 +384,12 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev) return err; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_width)); if (err) { - dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err); + dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", err); return err; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); - if (err) { - dev_err(&pdev->dev, - "err_pci_set_consistent_dma_mask failed %d\n", - err); - return err; - } dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; } diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 4e940fc50bba..479b604e533a 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -917,6 +917,9 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, enum ib_qp_state new_state; int err; + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + if (udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen)) { ibdev_dbg(&dev->ibdev, @@ -1029,6 +1032,9 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, ibdev_dbg(ibdev, "create_cq entries %d\n", entries); + if (attr->flags) + return -EOPNOTSUPP; + if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { ibdev_dbg(ibdev, "cq: requested entries[%u] non-positive or greater than max[%u]\n", diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 356518e17fa6..681bb4e918c9 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -339,6 +339,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) return -EINVAL; if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) return -EINVAL; + break; default: break; } diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 074ec71772d2..5650130e68d4 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -151,7 +151,7 @@ struct hfi1_port_attr { static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf) { - return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); + return sysfs_emit(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); } static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf, @@ -296,7 +296,7 @@ static ssize_t sc2vl_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, sc2vl_kobj); struct hfi1_devdata *dd = ppd->dd; - return sprintf(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); + return sysfs_emit(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); } static const struct sysfs_ops hfi1_sc2vl_ops = { @@ -401,7 +401,7 @@ static ssize_t sl2sc_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, sl2sc_kobj); struct hfi1_ibport *ibp = &ppd->ibport_data; - return sprintf(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); + return sysfs_emit(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); } static const struct sysfs_ops hfi1_sl2sc_ops = { @@ -475,7 +475,7 @@ static ssize_t vl2mtu_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, vl2mtu_kobj); struct hfi1_devdata *dd = ppd->dd; - return sprintf(buf, "%u\n", dd->vld[vlattr->vl].mtu); + return sysfs_emit(buf, "%u\n", dd->vld[vlattr->vl].mtu); } static const struct sysfs_ops hfi1_vl2mtu_ops = { @@ -500,7 +500,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct hfi1_ibdev *dev = rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); } static DEVICE_ATTR_RO(hw_rev); @@ -510,13 +510,11 @@ static ssize_t board_id_show(struct device *device, struct hfi1_ibdev *dev = rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); - int ret; if (!dd->boardname) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); - return ret; + return -EINVAL; + + return sysfs_emit(buf, "%s\n", dd->boardname); } static DEVICE_ATTR_RO(board_id); @@ -528,7 +526,7 @@ static ssize_t boardversion_show(struct device *device, struct hfi1_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); + return sysfs_emit(buf, "%s", dd->boardversion); } static DEVICE_ATTR_RO(boardversion); @@ -545,9 +543,9 @@ static ssize_t nctxts_show(struct device *device, * and a receive context, so returning the smaller of the two counts * give a more accurate picture of total contexts available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", - min(dd->num_user_contexts, - (u32)dd->sc_sizes[SC_USER].count)); + return sysfs_emit(buf, "%u\n", + min(dd->num_user_contexts, + (u32)dd->sc_sizes[SC_USER].count)); } static DEVICE_ATTR_RO(nctxts); @@ -559,7 +557,7 @@ static ssize_t nfreectxts_show(struct device *device, struct hfi1_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); + return sysfs_emit(buf, "%u\n", dd->freectxts); } static DEVICE_ATTR_RO(nfreectxts); @@ -570,7 +568,8 @@ static ssize_t serial_show(struct device *device, rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); - return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); + /* dd->serial is already newline terminated in chip.c */ + return sysfs_emit(buf, "%s", dd->serial); } static DEVICE_ATTR_RO(serial); @@ -598,9 +597,8 @@ static DEVICE_ATTR_WO(chip_reset); * Convert the reported temperature from an integer (reported in * units of 0.25C) to a floating point number. */ -#define temp2str(temp, buf, size, idx) \ - scnprintf((buf) + (idx), (size) - (idx), "%u.%02u ", \ - ((temp) >> 2), ((temp) & 0x3) * 25) +#define temp_d(t) ((t) >> 2) +#define temp_f(t) (((t)&0x3) * 25u) /* * Dump tempsense values, in decimal, to ease shell-scripts. @@ -615,19 +613,17 @@ static ssize_t tempsense_show(struct device *device, int ret; ret = hfi1_tempsense_rd(dd, &temp); - if (!ret) { - int idx = 0; - - idx += temp2str(temp.curr, buf, PAGE_SIZE, idx); - idx += temp2str(temp.lo_lim, buf, PAGE_SIZE, idx); - idx += temp2str(temp.hi_lim, buf, PAGE_SIZE, idx); - idx += temp2str(temp.crit_lim, buf, PAGE_SIZE, idx); - idx += scnprintf(buf + idx, PAGE_SIZE - idx, - "%u %u %u\n", temp.triggers & 0x1, - temp.triggers & 0x2, temp.triggers & 0x4); - ret = idx; - } - return ret; + if (ret) + return ret; + + return sysfs_emit(buf, "%u.%02u %u.%02u %u.%02u %u.%02u %u %u %u\n", + temp_d(temp.curr), temp_f(temp.curr), + temp_d(temp.lo_lim), temp_f(temp.lo_lim), + temp_d(temp.hi_lim), temp_f(temp.hi_lim), + temp_d(temp.crit_lim), temp_f(temp.crit_lim), + temp.triggers & 0x1, + temp.triggers & 0x2, + temp.triggers & 0x4); } static DEVICE_ATTR_RO(tempsense); @@ -817,7 +813,7 @@ static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf) if (vl < 0) return vl; - return snprintf(buf, PAGE_SIZE, "%d\n", vl); + return sysfs_emit(buf, "%d\n", vl); } static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO, diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 73d197e21730..92aa2a9b3b5a 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2826,6 +2826,7 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, default: break; } + break; default: break; } @@ -3005,6 +3006,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, default: break; } + break; default: break; } @@ -3221,6 +3223,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) req = wqe_to_tid_req(prev); if (req->ack_seg != req->total_segs) goto interlock; + break; default: break; } @@ -3239,9 +3242,11 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) req = wqe_to_tid_req(prev); if (req->ack_seg != req->total_segs) goto interlock; + break; default: break; } + break; default: break; } diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 75b06db60f7c..cc258edec331 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -31,14 +31,11 @@ */ #include <linux/platform_device.h> +#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> #include "hns_roce_device.h" -#define HNS_ROCE_PORT_NUM_SHIFT 24 -#define HNS_ROCE_VLAN_SL_BIT_MASK 7 -#define HNS_ROCE_VLAN_SL_SHIFT 13 - static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) { u32 fl = ah_attr->grh.flow_label; @@ -58,47 +55,41 @@ static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); - const struct ib_gid_attr *gid_attr; - struct device *dev = hr_dev->dev; - struct hns_roce_ah *ah = to_hr_ah(ibah); struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - u16 vlan_id = 0xffff; - bool vlan_en = false; - int ret; - - gid_attr = ah_attr->grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); - if (ret) - return ret; - - /* Get mac address */ - memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); + struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); + struct hns_roce_ah *ah = to_hr_ah(ibah); + int ret = 0; - if (vlan_id < VLAN_N_VID) { - vlan_en = true; - vlan_id |= (rdma_ah_get_sl(ah_attr) & - HNS_ROCE_VLAN_SL_BIT_MASK) << - HNS_ROCE_VLAN_SL_SHIFT; - } + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && udata) + return -EOPNOTSUPP; ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan_id = vlan_id; - ah->av.vlan_en = vlan_en; - dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, - ah->av.vlan_id); if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; - memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); - ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.hop_limit = grh->hop_limit; ah->av.flowlabel = grh->flow_label; ah->av.udp_sport = get_ah_udp_sport(ah_attr); + ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.tclass = get_tclass(grh); - return 0; + memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); + memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); + + /* HIP08 needs to record vlan info in Address Vector */ + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { + ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, + &ah->av.vlan_id, NULL); + if (ret) + return ret; + + ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID; + } + + return ret; } int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index a6b23dec1adc..4bcaaa0524b1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -159,76 +159,96 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap) void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf) { - struct device *dev = hr_dev->dev; - u32 size = buf->size; - int i; + struct hns_roce_buf_list *trunks; + u32 i; - if (size == 0) + if (!buf) return; - buf->size = 0; + trunks = buf->trunk_list; + if (trunks) { + buf->trunk_list = NULL; + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift, + trunks[i].buf, trunks[i].map); - if (hns_roce_buf_is_direct(buf)) { - dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); - } else { - for (i = 0; i < buf->npages; ++i) - if (buf->page_list[i].buf) - dma_free_coherent(dev, 1 << buf->page_shift, - buf->page_list[i].buf, - buf->page_list[i].map); - kfree(buf->page_list); - buf->page_list = NULL; + kfree(trunks); } + + kfree(buf); } -int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift) +/* + * Allocate the dma buffer for storing ROCEE table entries + * + * @size: required size + * @page_shift: the unit size in a continuous dma address range + * @flags: HNS_ROCE_BUF_ flags to control the allocation flow. + */ +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags) { - struct hns_roce_buf_list *buf_list; - struct device *dev = hr_dev->dev; - u32 page_size; - int i; + u32 trunk_size, page_size, alloced_size; + struct hns_roce_buf_list *trunks; + struct hns_roce_buf *buf; + gfp_t gfp_flags; + u32 ntrunk, i; /* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */ - buf->page_shift = max_t(int, HNS_HW_PAGE_SHIFT, page_shift); + if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT)) + return ERR_PTR(-EINVAL); + + gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL; + buf = kzalloc(sizeof(*buf), gfp_flags); + if (!buf) + return ERR_PTR(-ENOMEM); + buf->page_shift = page_shift; page_size = 1 << buf->page_shift; - buf->npages = DIV_ROUND_UP(size, page_size); - - /* required size is not bigger than one trunk size */ - if (size <= max_direct) { - buf->page_list = NULL; - buf->direct.buf = dma_alloc_coherent(dev, size, - &buf->direct.map, - GFP_KERNEL); - if (!buf->direct.buf) - return -ENOMEM; + + /* Calc the trunk size and num by required size and page_shift */ + if (flags & HNS_ROCE_BUF_DIRECT) { + buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE)); + ntrunk = 1; } else { - buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL); - if (!buf_list) - return -ENOMEM; - - for (i = 0; i < buf->npages; i++) { - buf_list[i].buf = dma_alloc_coherent(dev, page_size, - &buf_list[i].map, - GFP_KERNEL); - if (!buf_list[i].buf) - break; - } + buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE)); + ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift); + } - if (i != buf->npages && i > 0) { - while (i-- > 0) - dma_free_coherent(dev, page_size, - buf_list[i].buf, - buf_list[i].map); - kfree(buf_list); - return -ENOMEM; - } - buf->page_list = buf_list; + trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags); + if (!trunks) { + kfree(buf); + return ERR_PTR(-ENOMEM); } - buf->size = size; - return 0; + trunk_size = 1 << buf->trunk_shift; + alloced_size = 0; + for (i = 0; i < ntrunk; i++) { + trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size, + &trunks[i].map, gfp_flags); + if (!trunks[i].buf) + break; + + alloced_size += trunk_size; + } + + buf->ntrunks = i; + + /* In nofail mode, it's only failed when the alloced size is 0 */ + if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) { + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, trunk_size, + trunks[i].buf, trunks[i].map); + + kfree(trunks); + kfree(buf); + return ERR_PTR(-ENOMEM); + } + + buf->npages = DIV_ROUND_UP(alloced_size, page_size); + buf->trunk_list = trunks; + + return buf; } int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, @@ -240,7 +260,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, end = start + buf_cnt; if (end > buf->npages) { dev_err(hr_dev->dev, - "Failed to check kmem bufs, end %d + %d total %d!\n", + "failed to check kmem bufs, end %d + %d total %u!\n", start, buf_cnt, buf->npages); return -EINVAL; } @@ -262,7 +282,7 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, u64 addr; if (page_shift < HNS_HW_PAGE_SHIFT) { - dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n", + dev_err(hr_dev->dev, "failed to check umem page shift %u!\n", page_shift); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 455d533dd7c4..339e3fd98b0b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -36,9 +36,9 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" -#define CMD_POLL_TOKEN 0xffff -#define CMD_MAX_NUM 32 -#define CMD_TOKEN_MASK 0x1f +#define CMD_POLL_TOKEN 0xffff +#define CMD_MAX_NUM 32 +#define CMD_TOKEN_MASK 0x1f static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, @@ -60,7 +60,7 @@ static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { struct device *dev = hr_dev->dev; int ret; @@ -78,7 +78,7 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned long timeout) + u8 op_modifier, u16 op, unsigned int timeout) { int ret; @@ -93,8 +93,8 @@ static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, u64 out_param) { - struct hns_roce_cmd_context - *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask]; + struct hns_roce_cmd_context *context = + &hr_dev->cmd.context[token % hr_dev->cmd.max_cmds]; if (token != context->token) return; @@ -108,7 +108,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { struct hns_roce_cmdq *cmd = &hr_dev->cmd; struct hns_roce_cmd_context *context; @@ -159,13 +159,13 @@ out: static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned long timeout) + u8 op_modifier, u16 op, unsigned int timeout) { int ret; down(&hr_dev->cmd.event_sem); - ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, timeout); + ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, timeout); up(&hr_dev->cmd.event_sem); return ret; @@ -173,7 +173,7 @@ static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { int ret; @@ -231,9 +231,8 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; int i; - hr_cmd->context = kmalloc_array(hr_cmd->max_cmds, - sizeof(*hr_cmd->context), - GFP_KERNEL); + hr_cmd->context = + kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL); if (!hr_cmd->context) return -ENOMEM; @@ -262,8 +261,8 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) hr_cmd->use_events = 0; } -struct hns_roce_cmd_mailbox - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) +struct hns_roce_cmd_mailbox * +hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) { struct hns_roce_cmd_mailbox *mailbox; @@ -271,8 +270,8 @@ struct hns_roce_cmd_mailbox if (!mailbox) return ERR_PTR(-ENOMEM); - mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, - &mailbox->dma); + mailbox->buf = + dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, &mailbox->dma); if (!mailbox->buf) { kfree(mailbox); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 1915bacaded0..8025e7f657fa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -141,10 +141,10 @@ enum { int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout); + unsigned int timeout); -struct hns_roce_cmd_mailbox - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); +struct hns_roce_cmd_mailbox * +hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index f5669ff8cfeb..5afee04fb02c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -38,21 +38,33 @@ #define roce_raw_write(value, addr) \ __raw_writel((__force u32)cpu_to_le32(value), (addr)) -#define roce_get_field(origin, mask, shift) \ - (((le32_to_cpu(origin)) & (mask)) >> (shift)) +#define roce_get_field(origin, mask, shift) \ + ((le32_to_cpu(origin) & (mask)) >> (u32)(shift)) #define roce_get_bit(origin, shift) \ roce_get_field((origin), (1ul << (shift)), (shift)) -#define roce_set_field(origin, mask, shift, val) \ - do { \ - (origin) &= ~cpu_to_le32(mask); \ - (origin) |= cpu_to_le32(((u32)(val) << (shift)) & (mask)); \ +#define roce_set_field(origin, mask, shift, val) \ + do { \ + (origin) &= ~cpu_to_le32(mask); \ + (origin) |= cpu_to_le32(((u32)(val) << (u32)(shift)) & (mask)); \ } while (0) -#define roce_set_bit(origin, shift, val) \ +#define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val)) +#define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l + +#define _hr_reg_enable(ptr, field_type, field_h, field_l) \ + ({ \ + const field_type *_ptr = ptr; \ + *((__le32 *)_ptr + (field_h) / 32) |= \ + cpu_to_le32(BIT((field_l) % 32)) + \ + BUILD_BUG_ON_ZERO((field_h) != (field_l)); \ + }) + +#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 809b22aa5056..8533fc2d8df2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -36,43 +36,42 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" -#include <rdma/hns-abi.h> #include "hns_roce_common.h" static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_cq_table *cq_table; - struct ib_device *ibdev = &hr_dev->ib_dev; u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; int ret; ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts), &dma_handle); - if (ret < 1) { - ibdev_err(ibdev, "Failed to find CQ mtr\n"); + if (!ret) { + ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret); return -EINVAL; } cq_table = &hr_dev->cq_table; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ bitmap, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret); return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - ibdev_err(ibdev, "Failed to get CQ(0x%lx) context, err %d\n", + ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n", hr_cq->cqn, ret); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - ibdev_err(ibdev, "Failed to xa_store CQ\n"); + ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret); goto err_put; } @@ -91,7 +90,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(ibdev, - "Failed to send create cmd for CQ(0x%lx), err %d\n", + "failed to send create cmd for CQ(0x%lx), ret = %d.\n", hr_cq->cqn, ret); goto err_xa; } @@ -147,7 +146,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int err; + int ret; buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; @@ -155,13 +154,13 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, buf_attr.region_count = 1; buf_attr.fixed_page = true; - err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, + ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); - if (err) - ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err); + if (ret) + ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret); - return err; + return ret; } static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) @@ -251,14 +250,17 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, u32 cq_entries = attr->cqe; int ret; + if (attr->flags) + return -EOPNOTSUPP; + if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n", + ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n", cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } if (vector >= hr_dev->caps.num_comp_vectors) { - ibdev_err(ibdev, "Failed to check CQ vector=%d max=%d\n", + ibdev_err(ibdev, "failed to check CQ vector = %d, max = %d.\n", vector, hr_dev->caps.num_comp_vectors); return -EINVAL; } @@ -274,9 +276,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, if (udata) { ret = ib_copy_from_udata(&ucmd, udata, - min(sizeof(ucmd), udata->inlen)); + min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", + ibdev_err(ibdev, "failed to copy CQ udata, ret = %d.\n", ret); return ret; } @@ -286,19 +288,20 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret); return ret; } ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ db, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret); goto err_cq_buf; } ret = alloc_cqc(hr_dev, hr_cq); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ context, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc CQ context, ret = %d.\n", ret); goto err_cq_db; } @@ -313,7 +316,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, if (udata) { resp.cqn = hr_cq->cqn; - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); if (ret) goto err_cqc; } diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index bff6abdccfb0..5cb7376ce978 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -95,8 +95,8 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir( static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir, struct hns_roce_db *db, int order) { - int o; - int i; + unsigned long o; + unsigned long i; for (o = order; o <= 1; ++o) { i = find_first_bit(pgdir->bits[o], HNS_ROCE_DB_PER_PAGE >> o); @@ -154,8 +154,8 @@ out: void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) { - int o; - int i; + unsigned long o; + unsigned long i; mutex_lock(&hr_dev->pgdir_mutex); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6d2acff69f98..55d538625e36 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_DEVICE_H #include <rdma/ib_verbs.h> +#include <rdma/hns-abi.h> #define DRV_NAME "hns_roce" @@ -117,6 +118,8 @@ #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 +#define HNS_ROCE_QP_BANK_NUM 8 + /* The chip implementation of the consumer index is calculated * according to twice the actual EQ depth */ @@ -129,15 +132,6 @@ enum { SERV_TYPE_UD, }; -enum { - HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), - HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), -}; - -enum hns_roce_cq_flags { - HNS_ROCE_CQ_FLAG_RECORD_DB = BIT(0), -}; - enum hns_roce_qp_state { HNS_ROCE_QP_STATE_RST, HNS_ROCE_QP_STATE_INIT, @@ -166,7 +160,6 @@ enum hns_roce_event { /* 0x10 and 0x11 is unused in currently application case */ HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12, HNS_ROCE_EVENT_TYPE_MB = 0x13, - HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14, HNS_ROCE_EVENT_TYPE_FLR = 0x15, }; @@ -221,6 +214,8 @@ enum { HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), + HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), + HNS_ROCE_CAP_FLAG_STASH = BIT(17), }; #define HNS_ROCE_DB_TYPE_COUNT 2 @@ -265,9 +260,6 @@ enum { #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) -/* The minimum page count for hardware access page directly. */ -#define HNS_HW_DIRECT_PAGE_COUNT 2 - struct hns_roce_uar { u64 pfn; unsigned long index; @@ -318,7 +310,7 @@ struct hns_roce_hem_table { }; struct hns_roce_buf_region { - int offset; /* page offset */ + u32 offset; /* page offset */ u32 count; /* page count */ int hopnum; /* addressing hop num */ }; @@ -338,10 +330,10 @@ struct hns_roce_buf_attr { size_t size; /* region size */ int hopnum; /* multi-hop addressing hop num */ } region[HNS_ROCE_MAX_BT_REGION]; - int region_count; /* valid region count */ + unsigned int region_count; /* valid region count */ unsigned int page_shift; /* buffer page shift */ bool fixed_page; /* decide page shift is fixed-size or maximum size */ - int user_access; /* umem access flag */ + unsigned int user_access; /* umem access flag */ bool mtt_only; /* only alloc buffer-required MTT memory */ }; @@ -352,7 +344,7 @@ struct hns_roce_hem_cfg { unsigned int buf_pg_shift; /* buffer page shift */ unsigned int buf_pg_count; /* buffer page count */ struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; - int region_count; + unsigned int region_count; }; /* memory translate region */ @@ -400,7 +392,7 @@ struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; u32 wqe_cnt; /* WQE num */ - int max_gs; + u32 max_gs; int offset; int wqe_shift; /* WQE size */ u32 head; @@ -419,11 +411,26 @@ struct hns_roce_buf_list { dma_addr_t map; }; +/* + * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous + * dma address range. + * + * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep. + * + * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even + * the allocated size is smaller than the required size. + */ +enum { + HNS_ROCE_BUF_DIRECT = BIT(0), + HNS_ROCE_BUF_NOSLEEP = BIT(1), + HNS_ROCE_BUF_NOFAIL = BIT(2), +}; + struct hns_roce_buf { - struct hns_roce_buf_list direct; - struct hns_roce_buf_list *page_list; + struct hns_roce_buf_list *trunk_list; + u32 ntrunks; u32 npages; - u32 size; + unsigned int trunk_shift; unsigned int page_shift; }; @@ -451,8 +458,8 @@ struct hns_roce_db { } u; dma_addr_t dma; void *virt_addr; - int index; - int order; + unsigned long index; + unsigned long order; }; struct hns_roce_cq { @@ -500,8 +507,8 @@ struct hns_roce_srq { u64 *wrid; struct hns_roce_idx_que idx_que; spinlock_t lock; - int head; - int tail; + u16 head; + u16 tail; struct mutex mutex; void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; @@ -510,13 +517,22 @@ struct hns_roce_uar_table { struct hns_roce_bitmap bitmap; }; +struct hns_roce_bank { + struct ida ida; + u32 inuse; /* Number of IDs allocated */ + u32 min; /* Lowest ID to allocate. */ + u32 max; /* Highest ID to allocate. */ + u32 next; /* Next ID to allocate. */ +}; + struct hns_roce_qp_table { - struct hns_roce_bitmap bitmap; struct hns_roce_hem_table qp_table; struct hns_roce_hem_table irrl_table; struct hns_roce_hem_table trrl_table; struct hns_roce_hem_table sccc_table; struct mutex scc_mutex; + struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM]; + spinlock_t bank_lock; }; struct hns_roce_cq_table { @@ -547,7 +563,7 @@ struct hns_roce_av { u8 dgid[HNS_ROCE_GID_SIZE]; u8 mac[ETH_ALEN]; u16 vlan_id; - bool vlan_en; + u8 vlan_en; }; struct hns_roce_ah { @@ -619,10 +635,9 @@ enum { struct hns_roce_work { struct hns_roce_dev *hr_dev; struct work_struct work; - u32 qpn; - u32 cqn; int event_type; int sub_type; + u32 queue_num; }; struct hns_roce_qp { @@ -690,28 +705,10 @@ struct hns_roce_aeqe { __le32 asyn; union { struct { - __le32 qp; - u32 rsv0; - u32 rsv1; - } qp_event; - - struct { - __le32 srq; - u32 rsv0; - u32 rsv1; - } srq_event; - - struct { - __le32 cq; - u32 rsv0; - u32 rsv1; - } cq_event; - - struct { - __le32 ceqe; + __le32 num; u32 rsv0; u32 rsv1; - } ce_event; + } queue_event; struct { __le64 out_param; @@ -730,11 +727,11 @@ struct hns_roce_eq { int type_flag; /* Aeq:1 ceq:0 */ int eqn; u32 entries; - int log_entries; + u32 log_entries; int eqe_size; int irq; int log_page_size; - int cons_index; + u32 cons_index; struct hns_roce_buf_list *buf_list; int over_ignore; int coalesce; @@ -742,7 +739,7 @@ struct hns_roce_eq { int hop_num; struct hns_roce_mtr mtr; u16 eq_max_cnt; - int eq_period; + u32 eq_period; int shift; int event_type; int sub_type; @@ -765,8 +762,8 @@ struct hns_roce_caps { u32 max_sq_inline; u32 max_rq_sg; u32 max_extend_sg; - int num_qps; - int reserved_qps; + u32 num_qps; + u32 reserved_qps; int num_qpc_timer; int num_cqc_timer; int num_srqs; @@ -778,7 +775,7 @@ struct hns_roce_caps { u32 max_srq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; - int num_cqs; + u32 num_cqs; u32 max_cqes; u32 min_cqes; u32 min_wqes; @@ -787,7 +784,7 @@ struct hns_roce_caps { int num_aeq_vectors; int num_comp_vectors; int num_other_vectors; - int num_mtpts; + u32 num_mtpts; u32 num_mtt_segs; u32 num_cqe_segs; u32 num_srqwqe_segs; @@ -825,6 +822,7 @@ struct hns_roce_caps { u32 cqc_timer_bt_num; u32 mpt_bt_num; u32 sccc_bt_num; + u32 gmv_bt_num; u32 qpc_ba_pg_sz; u32 qpc_buf_pg_sz; u32 qpc_hop_num; @@ -864,6 +862,11 @@ struct hns_roce_caps { u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; + u32 gmv_entry_num; + u32 gmv_entry_sz; + u32 gmv_ba_pg_sz; + u32 gmv_buf_pg_sz; + u32 gmv_hop_num; u32 sl_num; u32 tsq_buf_pg_sz; u32 tpq_buf_pg_sz; @@ -898,7 +901,7 @@ struct hns_roce_hw { int (*post_mbox)(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event); - int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); + int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned int timeout); int (*rst_prc_mbox)(struct hns_roce_dev *hr_dev); int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr); @@ -999,6 +1002,10 @@ struct hns_roce_dev { struct hns_roce_eq_table eq_table; struct hns_roce_hem_table qpc_timer_table; struct hns_roce_hem_table cqc_timer_table; + /* GMV is the memory area that the driver allocates for the hardware + * to store SGID, SMAC and VLAN information. + */ + struct hns_roce_hem_table gmv_table; int cmd_mod; int loop_idc; @@ -1069,29 +1076,19 @@ static inline struct hns_roce_qp return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); } -static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf) +static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, + unsigned int offset) { - if (buf->page_list) - return false; - - return true; + return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) + + (offset & ((1 << buf->trunk_shift) - 1)); } -static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) +static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) { - if (hns_roce_buf_is_direct(buf)) - return (char *)(buf->direct.buf) + (offset & (buf->size - 1)); - - return (char *)(buf->page_list[offset >> buf->page_shift].buf) + - (offset & ((1 << buf->page_shift) - 1)); -} + unsigned int offset = idx << buf->page_shift; -static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) -{ - if (hns_roce_buf_is_direct(buf)) - return buf->direct.map + ((dma_addr_t)idx << buf->page_shift); - else - return buf->page_list[idx].map; + return buf->trunk_list[offset >> buf->trunk_shift].map + + (offset & ((1 << buf->trunk_shift) - 1)); } #define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) @@ -1132,6 +1129,14 @@ static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift) return ilog2(to_hr_hem_entries_count(count, buf_shift)); } +#define DSCP_SHIFT 2 + +static inline u8 get_tclass(const struct ib_global_route *grh) +{ + return grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP ? + grh->traffic_class >> DSCP_SHIFT : grh->traffic_class; +} + int hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); @@ -1155,7 +1160,7 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr); int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int page_cnt); + dma_addr_t *pages, unsigned int page_cnt); int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); @@ -1198,9 +1203,10 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata); +struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, @@ -1215,8 +1221,8 @@ int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int hns_roce_dealloc_mw(struct ib_mw *ibmw); void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); -int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift); +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags); int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct hns_roce_buf *buf); @@ -1238,10 +1244,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); -void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n); -void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n); -void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n); -bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, +void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n); +void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n); +void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n); +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, struct ib_cq *ib_cq); enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, @@ -1271,7 +1277,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); -int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 7487cf3d2c37..edc9d6b98d95 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -75,6 +75,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) case HEM_TYPE_CQC_TIMER: hop_num = hr_dev->caps.cqc_timer_hop_num; break; + case HEM_TYPE_GMV: + hop_num = hr_dev->caps.gmv_hop_num; + break; default: return false; } @@ -183,8 +186,16 @@ static int get_hem_table_config(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = hr_dev->caps.srqc_bt_num; mhop->hop_num = hr_dev->caps.srqc_hop_num; break; + case HEM_TYPE_GMV: + mhop->buf_chunk_size = 1 << (hr_dev->caps.gmv_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.gmv_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.gmv_bt_num; + mhop->hop_num = hr_dev->caps.gmv_hop_num; + break; default: - dev_err(dev, "Table %d not support multi-hop addressing!\n", + dev_err(dev, "table %u not support multi-hop addressing!\n", type); return -EINVAL; } @@ -198,9 +209,9 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, { struct device *dev = hr_dev->dev; u32 chunk_ba_num; + u32 chunk_size; u32 table_idx; u32 bt_num; - u32 chunk_size; if (get_hem_table_config(hr_dev, mhop, table->type)) return -EINVAL; @@ -232,8 +243,8 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, mhop->l0_idx = table_idx; break; default: - dev_err(dev, "Table %d not support hop_num = %d!\n", - table->type, mhop->hop_num); + dev_err(dev, "table %u not support hop_num = %u!\n", + table->type, mhop->hop_num); return -EINVAL; } if (mhop->l0_idx >= mhop->ba_l0_num) @@ -332,15 +343,15 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, { spinlock_t *lock = &hr_dev->bt_cmd_lock; struct device *dev = hr_dev->dev; - long end; - unsigned long flags; struct hns_roce_hem_iter iter; void __iomem *bt_cmd; __le32 bt_cmd_val[2]; __le32 bt_cmd_h = 0; + unsigned long flags; __le32 bt_cmd_l; - u64 bt_ba; int ret = 0; + u64 bt_ba; + long end; /* Find the HEM(Hardware Entry Memory) entry */ unsigned long i = (obj & (table->num_obj - 1)) / @@ -438,13 +449,13 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev, index->buf = l0_idx; break; default: - ibdev_err(ibdev, "Table %d not support mhop.hop_num = %d!\n", + ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n", table->type, mhop->hop_num); return -EINVAL; } if (unlikely(index->buf >= table->num_hem)) { - ibdev_err(ibdev, "Table %d exceed hem limt idx %llu,max %lu!\n", + ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n", table->type, index->buf, table->num_hem); return -EINVAL; } @@ -640,8 +651,8 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { struct device *dev = hr_dev->dev; - int ret = 0; unsigned long i; + int ret = 0; if (hns_roce_check_whether_mhop(hr_dev, table->type)) return hns_roce_table_mhop_get(hr_dev, table, obj); @@ -714,15 +725,15 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, step_idx = hop_num; if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx)) - ibdev_warn(ibdev, "Clear hop%d HEM failed.\n", hop_num); + ibdev_warn(ibdev, "failed to clear hop%u HEM.\n", hop_num); if (index->inited & HEM_INDEX_L1) if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) - ibdev_warn(ibdev, "Clear HEM step 1 failed.\n"); + ibdev_warn(ibdev, "failed to clear HEM step 1.\n"); if (index->inited & HEM_INDEX_L0) if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) - ibdev_warn(ibdev, "Clear HEM step 0 failed.\n"); + ibdev_warn(ibdev, "failed to clear HEM step 0.\n"); } } @@ -789,14 +800,14 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem_chunk *chunk; struct hns_roce_hem_mhop mhop; struct hns_roce_hem *hem; - void *addr = NULL; unsigned long mhop_obj = obj; unsigned long obj_per_chunk; unsigned long idx_offset; int offset, dma_offset; + void *addr = NULL; + u32 hem_idx = 0; int length; int i, j; - u32 hem_idx = 0; if (!table->lowmem) return NULL; @@ -876,7 +887,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, unsigned long buf_chunk_size; unsigned long bt_chunk_size; unsigned long bt_chunk_num; - unsigned long num_bt_l0 = 0; + unsigned long num_bt_l0; u32 hop_num; if (get_hem_table_config(hr_dev, &mhop, type)) @@ -966,8 +977,8 @@ static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev, { struct hns_roce_hem_mhop mhop; u32 buf_chunk_size; - int i; u64 obj; + int i; if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop)) return; @@ -1017,7 +1028,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { - if (hr_dev->caps.srqc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); @@ -1027,12 +1038,16 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) if (hr_dev->caps.cqc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); - if (hr_dev->caps.sccc_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.trrl_table); + + if (hr_dev->caps.gmv_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->gmv_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); @@ -1234,7 +1249,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, } if (offset < r->offset) { - dev_err(hr_dev->dev, "invalid offset %d,min %d!\n", + dev_err(hr_dev->dev, "invalid offset %d, min %u!\n", offset, r->offset); return -EINVAL; } @@ -1298,8 +1313,8 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, const struct hns_roce_buf_region *regions, int region_cnt) { - struct roce_hem_item *hem, *temp_hem, *root_hem; struct list_head temp_list[HNS_ROCE_MAX_BT_REGION]; + struct roce_hem_item *hem, *temp_hem, *root_hem; const struct hns_roce_buf_region *r; struct list_head temp_root; struct list_head temp_btm; @@ -1404,8 +1419,8 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; - int ret; int unit; + int ret; int i; if (region_cnt > HNS_ROCE_MAX_BT_REGION) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index b34c940077bb..13fdeb3274e7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -47,6 +47,7 @@ enum { HEM_TYPE_SCCC, HEM_TYPE_QPC_TIMER, HEM_TYPE_CQC_TIMER, + HEM_TYPE_GMV, /* UNMAP HEM */ HEM_TYPE_MTT, @@ -174,4 +175,4 @@ static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter) return sg_dma_address(&iter->chunk->mem[iter->page_idx]); } -#endif /*_HNS_ROCE_HEM_H*/ +#endif /* _HNS_ROCE_HEM_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5f4d8a32ed6d..f68585ff8e8a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -239,7 +239,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, break; } - /*Ctrl field, ctrl set type: sig, solic, imm, fence */ + /* Ctrl field, ctrl set type: sig, solic, imm, fence */ /* SO wait for conforming application scenarios */ ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ? cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | @@ -288,7 +288,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ret = -EINVAL; *bad_wr = wr; dev_err(dev, "inline len(1-%d)=%d, illegal", - ctrl->msg_length, + le32_to_cpu(ctrl->msg_length), hr_dev->caps.max_sq_inline); goto out; } @@ -300,7 +300,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, } ctrl->flag |= cpu_to_le32(HNS_ROCE_WQE_INLINE); } else { - /*sqe num is two */ + /* sqe num is two */ for (i = 0; i < wr->num_sge; i++) set_data_seg(dseg + i, wr->sg_list + i); @@ -353,8 +353,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, unsigned long flags = 0; unsigned int wqe_idx; int ret = 0; - int nreq = 0; - int i = 0; + int nreq; + int i; u32 reg_val; spin_lock_irqsave(&hr_qp->rq.lock, flags); @@ -1165,7 +1165,7 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) } raq->e_raq_buf->map = addr; - /* Configure raq extended address. 48bit 4K align*/ + /* Configure raq extended address. 48bit 4K align */ roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12); /* Configure raq_shift */ @@ -1639,7 +1639,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, } static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, - unsigned long timeout) + unsigned int timeout) { u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG; unsigned long end; @@ -2062,11 +2062,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); } -static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) -{ - return -EOPNOTSUPP; -} - static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { @@ -2305,7 +2300,7 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) struct hns_roce_qp *cur_qp = NULL; unsigned long flags; int npolled; - int ret = 0; + int ret; spin_lock_irqsave(&hr_cq->lock, flags); @@ -2765,7 +2760,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qpc_bytes_16, QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); - } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { roce_set_field(context->qpc_bytes_4, QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, @@ -3261,6 +3255,8 @@ static int hns_roce_v1_modify_qp(struct ib_qp *ibqp, enum ib_qp_state cur_state, enum ib_qp_state new_state) { + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state, @@ -3604,10 +3600,10 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) return 0; } -static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not) +static void set_eq_cons_index_v1(struct hns_roce_eq *eq, u32 req_not) { roce_raw_write((eq->cons_index & HNS_ROCE_V1_CONS_IDX_M) | - (req_not << eq->log_entries), eq->doorbell); + (req_not << eq->log_entries), eq->doorbell); } static void hns_roce_v1_wq_catas_err_handle(struct hns_roce_dev *hr_dev, @@ -3687,10 +3683,10 @@ static void hns_roce_v1_qp_err_handle(struct hns_roce_dev *hr_dev, int phy_port; int qpn; - qpn = roce_get_field(aeqe->event.qp_event.qp, + qpn = roce_get_field(aeqe->event.queue_event.num, HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S); - phy_port = roce_get_field(aeqe->event.qp_event.qp, + phy_port = roce_get_field(aeqe->event.queue_event.num, HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M, HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S); if (qpn <= 1) @@ -3721,9 +3717,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; u32 cqn; - cqn = roce_get_field(aeqe->event.cq_event.cq, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); + cqn = roce_get_field(aeqe->event.queue_event.num, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: @@ -3798,7 +3794,6 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, int event_type; while ((aeqe = next_aeqe_sw_v1(eq))) { - /* Make sure we read the AEQ entry after we have checked the * ownership bit */ @@ -3853,12 +3848,6 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: hns_roce_v1_db_overflow_handle(hr_dev, aeqe); break; - case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: - dev_warn(dev, "CEQ 0x%lx overflow.\n", - roce_get_field(aeqe->event.ce_event.ceqe, - HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M, - HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S)); - break; default: dev_warn(dev, "Unhandled event %d on EQ %d at idx %u.\n", event_type, eq->eqn, eq->cons_index); @@ -3903,7 +3892,6 @@ static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev, u32 cqn; while ((ceqe = next_ceqe_sw_v1(eq))) { - /* Make sure we read CEQ entry after we have checked the * ownership bit */ @@ -4129,7 +4117,7 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn]; struct device *dev = &hr_dev->pdev->dev; dma_addr_t tmp_dma_addr; - u32 eqcuridx_val = 0; + u32 eqcuridx_val; u32 eqconsindx_val; u32 eqshift_val; __le32 tmp2 = 0; @@ -4347,7 +4335,6 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev) static const struct ib_device_ops hns_roce_v1_dev_ops = { .destroy_qp = hns_roce_v1_destroy_qp, - .modify_cq = hns_roce_v1_modify_cq, .poll_cq = hns_roce_v1_poll_cq, .post_recv = hns_roce_v1_post_recv, .post_send = hns_roce_v1_post_send, @@ -4367,7 +4354,6 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .set_mtu = hns_roce_v1_set_mtu, .write_mtpt = hns_roce_v1_write_mtpt, .write_cqc = hns_roce_v1_write_cqc, - .modify_cq = hns_roce_v1_modify_cq, .clear_hem = hns_roce_v1_clear_hem, .modify_qp = hns_roce_v1_modify_qp, .query_qp = hns_roce_v1_query_qp, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index ffd0156080f5..46ab0a321d21 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -419,7 +419,7 @@ struct hns_roce_wqe_data_seg { struct hns_roce_wqe_raddr_seg { __le32 rkey; - __le32 len;/* reserved */ + __le32 len; /* reserved */ __le64 raddr; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0468028ffe39..833e1f259936 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -214,25 +214,20 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, return 0; } -static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, - unsigned int *sge_ind, unsigned int valid_num_sge) +static void set_extend_sge(struct hns_roce_qp *qp, struct ib_sge *sge, + unsigned int *sge_ind, unsigned int cnt) { struct hns_roce_v2_wqe_data_seg *dseg; - unsigned int cnt = valid_num_sge; - struct ib_sge *sge = wr->sg_list; unsigned int idx = *sge_ind; - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - cnt -= HNS_ROCE_SGE_IN_WQE; - sge += HNS_ROCE_SGE_IN_WQE; - } - while (cnt > 0) { dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); - set_data_seg_v2(dseg, sge); - idx++; + if (likely(sge->length)) { + set_data_seg_v2(dseg, sge); + idx++; + cnt--; + } sge++; - cnt--; } *sge_ind = idx; @@ -340,7 +335,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, } } - set_extend_sge(qp, wr, sge_ind, valid_num_sge); + set_extend_sge(qp, wr->sg_list + i, sge_ind, + valid_num_sge - HNS_ROCE_SGE_IN_WQE); } roce_set_field(rc_sq_wqe->byte_16, @@ -365,7 +361,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, } else if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || hr_qp->state == IB_QPS_RTR)) { - ibdev_err(ibdev, "failed to post WQE, QP state %d!\n", + ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n", hr_qp->state); return -EINVAL; } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { @@ -422,19 +418,54 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, return 0; } +static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + struct hns_roce_ah *ah) +{ + struct ib_device *ib_dev = ah->ibah.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, + V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); + + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, + V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, + V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, + V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); + + if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) + return -EINVAL; + + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, + V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); + + ud_sq_wqe->sgid_index = ah->av.gid_index; + + memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN); + memcpy(ud_sq_wqe->dgid, ah->av.dgid, GID_LEN_V2); + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; + + roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, + ah->av.vlan_en); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, + V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); + + return 0; +} + static inline int set_ud_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, unsigned int owner_bit) { - struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe; unsigned int curr_idx = *sge_idx; - int valid_num_sge; + unsigned int valid_num_sge; u32 msg_len = 0; - bool loopback; - u8 *smac; int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); @@ -444,38 +475,13 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, if (WARN_ON(ret)) return ret; - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M, - V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M, - V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M, - V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M, - V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_4_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_4_S, ah->av.mac[4]); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]); - - /* MAC loopback */ - smac = (u8 *)hr_dev->dev_addr[qp->port]; - loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0; - - roce_set_bit(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback); - ud_sq_wqe->msg_len = cpu_to_le32(msg_len); - /* Set sig attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, - (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + !!(wr->send_flags & IB_SEND_SIGNALED)); - /* Set se attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, - (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); - - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); + !!(wr->send_flags & IB_SEND_SOLICITED)); roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); @@ -488,36 +494,29 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, curr_idx & (qp->sge.sge_cnt - 1)); - roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? qp->qkey : ud_wr(wr)->remote_qkey); roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, - V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, - V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, - V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M, - V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port); - - roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, - ah->av.vlan_en ? 1 : 0); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, - V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index); + ret = fill_ud_av(ud_sq_wqe, ah); + if (ret) + return ret; - memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); + set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge); - set_extend_sge(qp, wr, &curr_idx, valid_num_sge); + /* + * The pipeline can sequentially post all valid WQEs into WQ buffer, + * including new WQEs waiting for the doorbell to update the PI again. + * Therefore, the owner bit of WQE MUST be updated after all fields + * and extSGEs have been written into DDR instead of cache. + */ + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) + dma_wmb(); *sge_idx = curr_idx; + roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); return 0; } @@ -591,9 +590,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); @@ -601,7 +597,18 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, &curr_idx, valid_num_sge); + /* + * The pipeline can sequentially post all valid WQEs into WQ buffer, + * including new WQEs waiting for the doorbell to update the PI again. + * Therefore, the owner bit of WQE MUST be updated after all fields + * and extSGEs have been written into DDR instead of cache. + */ + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) + dma_wmb(); + *sge_idx = curr_idx; + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); return ret; } @@ -649,7 +656,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, unsigned int sge_idx; unsigned int wqe_idx; void *wqe = NULL; - int nreq; + u32 nreq; int ret; spin_lock_irqsave(&qp->sq.lock, flags); @@ -673,7 +680,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); if (unlikely(wr->num_sge > qp->sq.max_gs)) { - ibdev_err(ibdev, "num_sge=%d > qp->sq.max_gs=%d\n", + ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n", wr->num_sge, qp->sq.max_gs); ret = -EINVAL; *bad_wr = wr; @@ -686,7 +693,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); /* Corresponding to the QP type, wqe process separately */ - if (ibqp->qp_type == IB_QPT_GSI) + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit); else if (ibqp->qp_type == IB_QPT_RC) ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit); @@ -758,7 +765,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { - ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n", + ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", wr->num_sge, hr_qp->rq.max_gs); ret = -EINVAL; *bad_wr = wr; @@ -827,7 +834,7 @@ static void *get_srq_wqe(struct hns_roce_srq *srq, int n) return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); } -static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n) +static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n) { return hns_roce_buf_offset(idx_que->mtr.kmem, n << idx_que->entry_shift); @@ -868,12 +875,12 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_v2_db srq_db; unsigned long flags; + unsigned int ind; __le32 *srq_idx; int ret = 0; int wqe_idx; void *wqe; int nreq; - int ind; int i; spin_lock_irqsave(&srq->lock, flags); @@ -1018,8 +1025,8 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; - unsigned long instance_stage; /* the current instance stage */ - unsigned long reset_stage; /* the current reset stage */ + unsigned long instance_stage; /* the current instance stage */ + unsigned long reset_stage; /* the current reset stage */ unsigned long reset_cnt; bool sw_resetting; bool hw_resetting; @@ -1118,7 +1125,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, upper_32_bits(dma)); roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG, - ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); + (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0); roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0); } else { @@ -1126,7 +1133,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG, upper_32_bits(dma)); roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG, - ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); + (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0); roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0); } @@ -1573,6 +1580,10 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) PF_RES_DATA_4_PF_SCCC_BT_NUM_M, PF_RES_DATA_4_PF_SCCC_BT_NUM_S); + hr_dev->caps.gmv_bt_num = roce_get_field(req_b->gmv_idx_num, + PF_RES_DATA_5_PF_GMV_BT_NUM_M, + PF_RES_DATA_5_PF_GMV_BT_NUM_S); + return 0; } @@ -1896,11 +1907,20 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; + caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_sz); + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; + caps->gmv_ba_pg_sz = 0; + caps->gmv_buf_pg_sz = 0; + caps->gid_table_len[0] = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / + caps->gmv_entry_sz); } } -static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, - int *buf_page_size, int *bt_page_size, u32 hem_type) +static void calc_pg_sz(u32 obj_num, u32 obj_size, u32 hop_num, u32 ctx_bt_num, + u32 *buf_page_size, u32 *bt_page_size, u32 hem_type) { u64 obj_per_chunk; u64 bt_chunk_size = PAGE_SIZE; @@ -1930,8 +1950,8 @@ static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, obj_per_chunk = ctx_bt_num * obj_per_chunk_default; break; default: - pr_err("Table %d not support hop_num = %d!\n", hem_type, - hop_num); + pr_err("table %u not support hop_num = %u!\n", hem_type, + hop_num); return; } @@ -2122,6 +2142,14 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_sz); + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; + caps->gmv_ba_pg_sz = 0; + caps->gmv_buf_pg_sz = 0; + caps->gid_table_len[0] = caps->gmv_bt_num * + (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); } calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num, @@ -2371,10 +2399,10 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, u32 buf_chk_sz; dma_addr_t t; int func_num = 1; - int pg_num_a; - int pg_num_b; - int pg_num; - int size; + u32 pg_num_a; + u32 pg_num_b; + u32 pg_num; + u32 size; int i; switch (type) { @@ -2423,7 +2451,6 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, if (i < (pg_num - 1)) entry[i].blk_ba1_nxt_ptr |= (i + 1) << HNS_ROCE_LINK_TABLE_NXT_PTR_S; - } link_tbl->npages = pg_num; link_tbl->pg_sz = buf_chk_sz; @@ -2465,24 +2492,13 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, link_tbl->table.map); } -static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +static int get_hem_table(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = hr_dev->priv; - int qpc_count, cqc_count; - int ret, i; - - /* TSQ includes SQ doorbell and ack doorbell */ - ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); - if (ret) { - dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret); - return ret; - } - - ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); - if (ret) { - dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret); - goto err_tpq_init_failed; - } + unsigned int qpc_count; + unsigned int cqc_count; + unsigned int gmv_count; + int ret; + int i; /* Alloc memory for QPC Timer buffer space chunk */ for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num; @@ -2506,8 +2522,23 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) } } + /* Alloc memory for GMV(GID/MAC/VLAN) table buffer space chunk */ + for (gmv_count = 0; gmv_count < hr_dev->caps.gmv_entry_num; + gmv_count++) { + ret = hns_roce_table_get(hr_dev, &hr_dev->gmv_table, gmv_count); + if (ret) { + dev_err(hr_dev->dev, + "failed to get gmv table, ret = %d.\n", ret); + goto err_gmv_failed; + } + } + return 0; +err_gmv_failed: + for (i = 0; i < gmv_count; i++) + hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i); + err_cqc_timer_failed: for (i = 0; i < cqc_count; i++) hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i); @@ -2516,6 +2547,34 @@ err_qpc_timer_failed: for (i = 0; i < qpc_count; i++) hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i); + return ret; +} + +static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + int ret; + + /* TSQ includes SQ doorbell and ack doorbell */ + ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "failed to init TSQ, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "failed to init TPQ, ret = %d.\n", ret); + goto err_tpq_init_failed; + } + + ret = get_hem_table(hr_dev); + if (ret) + goto err_get_hem_table_failed; + + return 0; + +err_get_hem_table_failed: hns_roce_free_link_table(hr_dev, &priv->tpq); err_tpq_init_failed: @@ -2539,7 +2598,7 @@ static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev) struct hns_roce_cmq_desc desc; struct hns_roce_mbox_status *mb_st = (struct hns_roce_mbox_status *)desc.data; - enum hns_roce_cmd_return_status status; + int status; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); @@ -2610,7 +2669,7 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, } static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, - unsigned long timeout) + unsigned int timeout) { struct device *dev = hr_dev->dev; unsigned long end; @@ -2637,14 +2696,27 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } -static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, - int gid_index, const union ib_gid *gid, - enum hns_roce_sgid_type sgid_type) +static void copy_gid(void *dest, const union ib_gid *gid) +{ +#define GID_SIZE 4 + const union ib_gid *src = gid; + __le32 (*p)[GID_SIZE] = dest; + int i; + + if (!gid) + src = &zgid; + + for (i = 0; i < GID_SIZE; i++) + (*p)[i] = cpu_to_le32(*(u32 *)&src->raw[i * sizeof(u32)]); +} + +static int config_sgid_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type) { struct hns_roce_cmq_desc desc; struct hns_roce_cfg_sgid_tb *sgid_tb = (struct hns_roce_cfg_sgid_tb *)desc.data; - u32 *p; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); @@ -2653,19 +2725,54 @@ static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M, CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); - p = (u32 *)&gid->raw[0]; - sgid_tb->vf_sgid_l = cpu_to_le32(*p); + copy_gid(&sgid_tb->vf_sgid_l, gid); - p = (u32 *)&gid->raw[4]; - sgid_tb->vf_sgid_ml = cpu_to_le32(*p); + return hns_roce_cmq_send(hr_dev, &desc, 1); +} - p = (u32 *)&gid->raw[8]; - sgid_tb->vf_sgid_mh = cpu_to_le32(*p); +static int config_gmv_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type, + const struct ib_gid_attr *attr) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_cfg_gmv_tb_a *tb_a = + (struct hns_roce_cfg_gmv_tb_a *)desc[0].data; + struct hns_roce_cfg_gmv_tb_b *tb_b = + (struct hns_roce_cfg_gmv_tb_b *)desc[1].data; - p = (u32 *)&gid->raw[0xc]; - sgid_tb->vf_sgid_h = cpu_to_le32(*p); + u16 vlan_id = VLAN_CFI_MASK; + u8 mac[ETH_ALEN] = {}; + int ret; - return hns_roce_cmq_send(hr_dev, &desc, 1); + if (gid) { + ret = rdma_read_gid_l2_fields(attr, &vlan_id, mac); + if (ret) + return ret; + } + + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_CFG_GMV_TBL, false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_CFG_GMV_TBL, false); + + copy_gid(&tb_a->vf_sgid_l, gid); + + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M, + CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type); + roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S, + vlan_id < VLAN_CFI_MASK); + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M, + CFG_GMV_TB_VF_VLAN_ID_S, vlan_id); + + tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac); + roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M, + CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]); + + roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M, + CFG_GMV_TB_SGID_IDX_S, gid_index); + + return hns_roce_cmq_send(hr_dev, desc, 2); } static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, @@ -2675,23 +2782,24 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; int ret; - if (!gid || !attr) - return -EINVAL; - - if (attr->gid_type == IB_GID_TYPE_ROCE) - sgid_type = GID_TYPE_FLAG_ROCE_V1; - - if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { - if (ipv6_addr_v4mapped((void *)gid)) - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; - else - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; + if (gid) { + if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + if (ipv6_addr_v4mapped((void *)gid)) + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; + else + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; + } else if (attr->gid_type == IB_GID_TYPE_ROCE) { + sgid_type = GID_TYPE_FLAG_ROCE_V1; + } } - ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + ret = config_gmv_table(hr_dev, gid_index, gid, sgid_type, attr); + else + ret = config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (ret) - ibdev_err(&hr_dev->ib_dev, - "failed to configure sgid table, ret = %d!\n", + ibdev_err(&hr_dev->ib_dev, "failed to set gid, ret = %d!\n", ret); return ret; @@ -2959,7 +3067,7 @@ static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size); } -static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) +static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, unsigned int n) { struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe); @@ -3060,6 +3168,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == HNS_ROCE_V3_CQE_SIZE ? 1 : 0); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(cq_context, CQC_STASH); + cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); roce_set_field(cq_context->byte_16_hop_addr, @@ -3303,7 +3414,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, int is_send; u16 wqe_ctr; u32 opcode; - int qpn; + u32 qpn; int ret; /* Find cqe according to consumer index */ @@ -3572,7 +3683,7 @@ static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type, break; default: dev_warn(hr_dev->dev, - "Table %d not to be written by mailbox!\n", type); + "table %u not to be written by mailbox!\n", type); return -EINVAL; } @@ -3583,9 +3694,25 @@ static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba, u32 hem_type, int step_idx) { struct hns_roce_cmd_mailbox *mailbox; + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_gmv_bt *gmv_bt = + (struct hns_roce_cfg_gmv_bt *)desc.data; int ret; int op; + if (hem_type == HEM_TYPE_GMV) { + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, + false); + + gmv_bt->gmv_ba_l = cpu_to_le32(bt_ba >> HNS_HW_PAGE_SHIFT); + gmv_bt->gmv_ba_h = cpu_to_le32(bt_ba >> (HNS_HW_PAGE_SHIFT + + 32)); + gmv_bt->gmv_bt_idx = cpu_to_le32(obj / + (HNS_HW_PAGE_SIZE / hr_dev->caps.gmv_entry_sz)); + + return hns_roce_cmq_send(hr_dev, &desc, 1); + } + op = get_op_for_set_hem(hr_dev, hem_type, step_idx); if (op < 0) return 0; @@ -3683,24 +3810,20 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, case HEM_TYPE_CQC: op = HNS_ROCE_CMD_DESTROY_CQC_BT0; break; - case HEM_TYPE_SCCC: - case HEM_TYPE_QPC_TIMER: - case HEM_TYPE_CQC_TIMER: - break; case HEM_TYPE_SRQC: op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; break; + case HEM_TYPE_SCCC: + case HEM_TYPE_QPC_TIMER: + case HEM_TYPE_CQC_TIMER: + case HEM_TYPE_GMV: + return 0; default: - dev_warn(dev, "Table %d not to be destroyed by mailbox!\n", + dev_warn(dev, "table %u not to be destroyed by mailbox!\n", table->type); return 0; } - if (table->type == HEM_TYPE_SCCC || - table->type == HEM_TYPE_QPC_TIMER || - table->type == HEM_TYPE_CQC_TIMER) - return 0; - op += step_idx; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -3851,9 +3974,14 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1); - hr_qp->access_flags = attr->qp_access_flags; roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn); + + if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ) + return; + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(&context->ext, QPCEX_STASH); } static void modify_qp_init_to_init(struct ib_qp *ibqp, @@ -3874,51 +4002,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, V2_QPC_BYTE_4_TST_S, 0); - if (attr_mask & IB_QP_ACCESS_FLAGS) { - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_WRITE)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - 0); - roce_set_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, 0); - } else { - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_WRITE)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - 0); - roce_set_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, 0); - } - roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, @@ -4328,7 +4411,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask); if (ret) { - ibdev_err(ibdev, "failed to config sq buf, ret %d\n", ret); + ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret); return ret; } @@ -4421,7 +4504,9 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, IB_GID_TYPE_ROCE_UDP_ENCAP); } - if (vlan_id < VLAN_N_VID) { + /* Only HIP08 needs to set the vlan_en bits in QPC */ + if (vlan_id < VLAN_N_VID && + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, @@ -4468,15 +4553,11 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - if (is_udp) - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); - else - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class); - + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, get_tclass(&attr->ah_attr.grh)); roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, 0); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, V2_QPC_BYTE_28_FL_S, grh->flow_label); roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, @@ -4758,6 +4839,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, unsigned long rq_flag = 0; int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* * In v2 engine, software pass context and context mask to hardware * when modifying qp. If software need modify some fields in context, @@ -4818,7 +4902,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, /* SW pass context to HW */ ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp); if (ret) { - ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret); + ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret); goto out; } @@ -4911,7 +4995,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context); if (ret) { - ibdev_err(ibdev, "failed to query QPC, ret = %d\n", ret); + ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret); ret = -EINVAL; goto out; } @@ -5026,13 +5110,15 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, unsigned long flags; int ret = 0; - if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { + if ((hr_qp->ibqp.qp_type == IB_QPT_RC || + hr_qp->ibqp.qp_type == IB_QPT_UD) && + hr_qp->state != IB_QPS_RESET) { /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); if (ret) ibdev_err(ibdev, - "failed to modify QP to RST, ret = %d\n", + "failed to modify QP to RST, ret = %d.\n", ret); } @@ -5071,7 +5157,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) ibdev_err(&hr_dev->ib_dev, - "failed to destroy QP 0x%06lx, ret = %d\n", + "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", hr_qp->qpn, ret); hns_roce_qp_destroy(hr_dev, hr_qp, udata); @@ -5094,7 +5180,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d\n", ret); + ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d.\n", ret); goto out; } @@ -5104,7 +5190,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, clr->qpn = cpu_to_le32(hr_qp->qpn); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d\n", ret); + ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d.\n", ret); goto out; } @@ -5353,7 +5439,7 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when modifying CQ, ret = %d\n", + "failed to process cmd when modifying CQ, ret = %d.\n", ret); return ret; @@ -5364,8 +5450,6 @@ static void hns_roce_irq_work_handle(struct work_struct *work) struct hns_roce_work *irq_work = container_of(work, struct hns_roce_work, work); struct ib_device *ibdev = &irq_work->hr_dev->ib_dev; - u32 qpn = irq_work->qpn; - u32 cqn = irq_work->cqn; switch (irq_work->event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -5381,15 +5465,15 @@ static void hns_roce_irq_work_handle(struct work_struct *work) break; case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n", - qpn, irq_work->sub_type); + irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: ibdev_err(ibdev, "Invalid request local work queue 0x%x error.\n", - qpn); + irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: ibdev_err(ibdev, "Local access violation work queue 0x%x error, sub_event type is: %d\n", - qpn, irq_work->sub_type); + irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: ibdev_warn(ibdev, "SRQ limit reach.\n"); @@ -5401,10 +5485,10 @@ static void hns_roce_irq_work_handle(struct work_struct *work) ibdev_err(ibdev, "SRQ catas error.\n"); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - ibdev_err(ibdev, "CQ 0x%x access err.\n", cqn); + ibdev_err(ibdev, "CQ 0x%x access err.\n", irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - ibdev_warn(ibdev, "CQ 0x%x overflow\n", cqn); + ibdev_warn(ibdev, "CQ 0x%x overflow\n", irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: ibdev_warn(ibdev, "DB overflow.\n"); @@ -5420,8 +5504,7 @@ static void hns_roce_irq_work_handle(struct work_struct *work) } static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq, - u32 qpn, u32 cqn) + struct hns_roce_eq *eq, u32 queue_num) { struct hns_roce_work *irq_work; @@ -5431,10 +5514,9 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); irq_work->hr_dev = hr_dev; - irq_work->qpn = qpn; - irq_work->cqn = cqn; irq_work->event_type = eq->event_type; irq_work->sub_type = eq->sub_type; + irq_work->queue_num = queue_num; queue_work(hr_dev->irq_workq, &(irq_work->work)); } @@ -5486,10 +5568,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); int aeqe_found = 0; int event_type; + u32 queue_num; int sub_type; - u32 srqn; - u32 qpn; - u32 cqn; while (aeqe) { /* Make sure we read AEQ entry after we have checked the @@ -5503,15 +5583,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M, HNS_ROCE_V2_AEQE_SUB_TYPE_S); - qpn = roce_get_field(aeqe->event.qp_event.qp, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); - cqn = roce_get_field(aeqe->event.cq_event.cq, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); - srqn = roce_get_field(aeqe->event.srq_event.srq, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + queue_num = roce_get_field(aeqe->event.queue_event.num, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -5522,17 +5596,15 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - hns_roce_qp_event(hr_dev, qpn, event_type); + hns_roce_qp_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: - hns_roce_srq_event(hr_dev, srqn, event_type); + hns_roce_srq_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_cq_event(hr_dev, cqn, event_type); - break; - case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: + hns_roce_cq_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, @@ -5540,8 +5612,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, aeqe->event.cmd.status, le64_to_cpu(aeqe->event.cmd.out_param)); break; - case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: - break; + case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: case HNS_ROCE_EVENT_TYPE_FLR: break; default: @@ -5558,7 +5629,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, if (eq->cons_index > (2 * eq->entries - 1)) eq->cons_index = 0; - hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn); + hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); aeqe = next_aeqe_sw_v2(eq); } @@ -6193,6 +6264,7 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA), 0}, /* required last entry */ {0, } }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index be7f2fe1e883..bdaccf86460d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -44,6 +44,7 @@ #define HNS_ROCE_VF_SMAC_NUM 32 #define HNS_ROCE_VF_SGID_NUM 32 #define HNS_ROCE_VF_SL_NUM 8 +#define HNS_ROCE_VF_GMV_BT_NUM 256 #define HNS_ROCE_V2_MAX_QP_NUM 0x100000 #define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200 @@ -89,6 +90,7 @@ #define HNS_ROCE_V2_SCCC_SZ 32 #define HNS_ROCE_V3_SCCC_SZ 64 +#define HNS_ROCE_V3_GMV_ENTRY_SZ 32 #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE @@ -241,6 +243,8 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CLR_SCCC = 0x8509, HNS_ROCE_OPC_QUERY_SCCC = 0x850a, HNS_ROCE_OPC_RESET_SCCC = 0x850b, + HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f, + HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, HNS_SWITCH_PARAMETER_CFG = 0x1033, }; @@ -263,23 +267,24 @@ enum hns_roce_sgid_type { }; struct hns_roce_v2_cq_context { - __le32 byte_4_pg_ceqn; - __le32 byte_8_cqn; - __le32 cqe_cur_blk_addr; - __le32 byte_16_hop_addr; - __le32 cqe_nxt_blk_addr; - __le32 byte_24_pgsz_addr; - __le32 byte_28_cq_pi; - __le32 byte_32_cq_ci; - __le32 cqe_ba; - __le32 byte_40_cqe_ba; - __le32 byte_44_db_record; - __le32 db_record_addr; - __le32 byte_52_cqe_cnt; - __le32 byte_56_cqe_period_maxcnt; - __le32 cqe_report_timer; - __le32 byte_64_se_cqe_idx; + __le32 byte_4_pg_ceqn; + __le32 byte_8_cqn; + __le32 cqe_cur_blk_addr; + __le32 byte_16_hop_addr; + __le32 cqe_nxt_blk_addr; + __le32 byte_24_pgsz_addr; + __le32 byte_28_cq_pi; + __le32 byte_32_cq_ci; + __le32 cqe_ba; + __le32 byte_40_cqe_ba; + __le32 byte_44_db_record; + __le32 db_record_addr; + __le32 byte_52_cqe_cnt; + __le32 byte_56_cqe_period_maxcnt; + __le32 cqe_report_timer; + __le32 byte_64_se_cqe_idx; }; + #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 @@ -356,6 +361,10 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) +#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l) + +#define CQC_STASH CQC_FIELD_LOC(63, 63) + struct hns_roce_srq_context { __le32 byte_4_srqn_srqst; __le32 byte_8_limit_wl; @@ -440,7 +449,7 @@ struct hns_roce_srq_context { #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1 #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1) -enum{ +enum { V2_MPT_ST_VALID = 0x1, V2_MPT_ST_FREE = 0x2, }; @@ -457,68 +466,72 @@ enum hns_roce_v2_qp_state { HNS_ROCE_QP_NUM_ST }; +struct hns_roce_v2_qp_context_ex { + __le32 data[64]; +}; struct hns_roce_v2_qp_context { - __le32 byte_4_sqpn_tst; - __le32 wqe_sge_ba; - __le32 byte_12_sq_hop; - __le32 byte_16_buf_ba_pg_sz; - __le32 byte_20_smac_sgid_idx; - __le32 byte_24_mtu_tc; - __le32 byte_28_at_fl; - u8 dgid[GID_LEN_V2]; - __le32 dmac; - __le32 byte_52_udpspn_dmac; - __le32 byte_56_dqpn_err; - __le32 byte_60_qpst_tempid; - __le32 qkey_xrcd; - __le32 byte_68_rq_db; - __le32 rq_db_record_addr; - __le32 byte_76_srqn_op_en; - __le32 byte_80_rnr_rx_cqn; - __le32 byte_84_rq_ci_pi; - __le32 rq_cur_blk_addr; - __le32 byte_92_srq_info; - __le32 byte_96_rx_reqmsn; - __le32 rq_nxt_blk_addr; - __le32 byte_104_rq_sge; - __le32 byte_108_rx_reqepsn; - __le32 rq_rnr_timer; - __le32 rx_msg_len; - __le32 rx_rkey_pkt_info; - __le64 rx_va; - __le32 byte_132_trrl; - __le32 trrl_ba; - __le32 byte_140_raq; - __le32 byte_144_raq; - __le32 byte_148_raq; - __le32 byte_152_raq; - __le32 byte_156_raq; - __le32 byte_160_sq_ci_pi; - __le32 sq_cur_blk_addr; - __le32 byte_168_irrl_idx; - __le32 byte_172_sq_psn; - __le32 byte_176_msg_pktn; - __le32 sq_cur_sge_blk_addr; - __le32 byte_184_irrl_idx; - __le32 cur_sge_offset; - __le32 byte_192_ext_sge; - __le32 byte_196_sq_psn; - __le32 byte_200_sq_max; - __le32 irrl_ba; - __le32 byte_208_irrl; - __le32 byte_212_lsn; - __le32 sq_timer; - __le32 byte_220_retry_psn_msn; - __le32 byte_224_retry_msg; - __le32 rx_sq_cur_blk_addr; - __le32 byte_232_irrl_sge; - __le32 irrl_cur_sge_offset; - __le32 byte_240_irrl_tail; - __le32 byte_244_rnr_rxack; - __le32 byte_248_ack_psn; - __le32 byte_252_err_txcqn; - __le32 byte_256_sqflush_rqcqe; - __le32 ext[64]; + __le32 byte_4_sqpn_tst; + __le32 wqe_sge_ba; + __le32 byte_12_sq_hop; + __le32 byte_16_buf_ba_pg_sz; + __le32 byte_20_smac_sgid_idx; + __le32 byte_24_mtu_tc; + __le32 byte_28_at_fl; + u8 dgid[GID_LEN_V2]; + __le32 dmac; + __le32 byte_52_udpspn_dmac; + __le32 byte_56_dqpn_err; + __le32 byte_60_qpst_tempid; + __le32 qkey_xrcd; + __le32 byte_68_rq_db; + __le32 rq_db_record_addr; + __le32 byte_76_srqn_op_en; + __le32 byte_80_rnr_rx_cqn; + __le32 byte_84_rq_ci_pi; + __le32 rq_cur_blk_addr; + __le32 byte_92_srq_info; + __le32 byte_96_rx_reqmsn; + __le32 rq_nxt_blk_addr; + __le32 byte_104_rq_sge; + __le32 byte_108_rx_reqepsn; + __le32 rq_rnr_timer; + __le32 rx_msg_len; + __le32 rx_rkey_pkt_info; + __le64 rx_va; + __le32 byte_132_trrl; + __le32 trrl_ba; + __le32 byte_140_raq; + __le32 byte_144_raq; + __le32 byte_148_raq; + __le32 byte_152_raq; + __le32 byte_156_raq; + __le32 byte_160_sq_ci_pi; + __le32 sq_cur_blk_addr; + __le32 byte_168_irrl_idx; + __le32 byte_172_sq_psn; + __le32 byte_176_msg_pktn; + __le32 sq_cur_sge_blk_addr; + __le32 byte_184_irrl_idx; + __le32 cur_sge_offset; + __le32 byte_192_ext_sge; + __le32 byte_196_sq_psn; + __le32 byte_200_sq_max; + __le32 irrl_ba; + __le32 byte_208_irrl; + __le32 byte_212_lsn; + __le32 sq_timer; + __le32 byte_220_retry_psn_msn; + __le32 byte_224_retry_msg; + __le32 rx_sq_cur_blk_addr; + __le32 byte_232_irrl_sge; + __le32 irrl_cur_sge_offset; + __le32 byte_240_irrl_tail; + __le32 byte_244_rnr_rxack; + __le32 byte_248_ack_psn; + __le32 byte_252_err_txcqn; + __le32 byte_256_sqflush_rqcqe; + + struct hns_roce_v2_qp_context_ex ext; }; #define V2_QPC_BYTE_4_TST_S 0 @@ -887,6 +900,10 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16 #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16) +#define QPCEX_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context_ex, h, l) + +#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82) + #define V2_QP_RWE_S 1 /* rdma write enable */ #define V2_QP_RRE_S 2 /* rdma read enable */ #define V2_QP_ATE_S 3 /* rdma atomic enable */ @@ -1073,12 +1090,13 @@ struct hns_roce_v2_ud_send_wqe { __le32 byte_32; __le32 byte_36; __le32 byte_40; - __le32 dmac; - __le32 byte_48; + u8 dmac[ETH_ALEN]; + u8 sgid_index; + u8 smac_index; u8 dgid[GID_LEN_V2]; - }; -#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 + +#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 #define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) #define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7 @@ -1117,37 +1135,10 @@ struct hns_roce_v2_ud_send_wqe { #define V2_UD_SEND_WQE_BYTE_40_SL_S 20 #define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20) -#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24 -#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24) - #define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30 #define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 -#define V2_UD_SEND_WQE_DMAC_0_S 0 -#define V2_UD_SEND_WQE_DMAC_0_M GENMASK(7, 0) - -#define V2_UD_SEND_WQE_DMAC_1_S 8 -#define V2_UD_SEND_WQE_DMAC_1_M GENMASK(15, 8) - -#define V2_UD_SEND_WQE_DMAC_2_S 16 -#define V2_UD_SEND_WQE_DMAC_2_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_DMAC_3_S 24 -#define V2_UD_SEND_WQE_DMAC_3_M GENMASK(31, 24) - -#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_S 0 -#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_M GENMASK(7, 0) - -#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_S 8 -#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_M GENMASK(15, 8) - -#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S 16 -#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_S 24 -#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_M GENMASK(31, 24) - struct hns_roce_v2_rc_send_wqe { __le32 byte_4; __le32 msg_len; @@ -1334,7 +1325,7 @@ struct hns_roce_pf_res_b { __le32 sgid_idx_num; __le32 qid_idx_sl_num; __le32 sccc_bt_idx_num; - __le32 rsv; + __le32 gmv_idx_num; }; #define PF_RES_DATA_1_PF_SMAC_IDX_S 0 @@ -1361,6 +1352,12 @@ struct hns_roce_pf_res_b { #define PF_RES_DATA_4_PF_SCCC_BT_NUM_S 9 #define PF_RES_DATA_4_PF_SCCC_BT_NUM_M GENMASK(17, 9) +#define PF_RES_DATA_5_PF_GMV_BT_IDX_S 0 +#define PF_RES_DATA_5_PF_GMV_BT_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_5_PF_GMV_BT_NUM_S 8 +#define PF_RES_DATA_5_PF_GMV_BT_NUM_M GENMASK(16, 8) + struct hns_roce_pf_timer_res_a { __le32 rsv0; __le32 qpc_timer_bt_idx_num; @@ -1425,7 +1422,7 @@ struct hns_roce_vf_res_b { __le32 vf_sgid_idx_num; __le32 vf_qid_idx_sl_num; __le32 vf_sccc_idx_num; - __le32 rsv1; + __le32 vf_gmv_idx_num; }; #define VF_RES_B_DATA_0_VF_ID_S 0 @@ -1455,6 +1452,12 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S 9 #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M GENMASK(17, 9) +#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_S 0 +#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_M GENMASK(7, 0) + +#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_S 16 +#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_M GENMASK(24, 16) + struct hns_roce_vf_switch { __le32 rocee_sel; __le32 fun_id; @@ -1577,6 +1580,46 @@ struct hns_roce_cfg_smac_tb { #define CFG_SMAC_TB_VF_SMAC_H_S 0 #define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) +struct hns_roce_cfg_gmv_bt { + __le32 gmv_ba_l; + __le32 gmv_ba_h; + __le32 gmv_bt_idx; + __le32 rsv[3]; +}; + +#define CFG_GMV_BA_H_S 0 +#define CFG_GMV_BA_H_M GENMASK(19, 0) + +struct hns_roce_cfg_gmv_tb_a { + __le32 vf_sgid_l; + __le32 vf_sgid_ml; + __le32 vf_sgid_mh; + __le32 vf_sgid_h; + __le32 vf_sgid_type_vlan; + __le32 resv; +}; + +#define CFG_GMV_TB_SGID_IDX_S 0 +#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0) + +#define CFG_GMV_TB_VF_SGID_TYPE_S 0 +#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0) + +#define CFG_GMV_TB_VF_VLAN_EN_S 2 + +#define CFG_GMV_TB_VF_VLAN_ID_S 16 +#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16) + +struct hns_roce_cfg_gmv_tb_b { + __le32 vf_smac_l; + __le32 vf_smac_h; + __le32 table_idx_rsv; + __le32 resv[3]; +}; + +#define CFG_GMV_TB_SMAC_H_S 0 +#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0) + #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 struct hns_roce_query_pf_caps_a { u8 number_ports; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index afeffafc59f9..d9179bae4989 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -33,13 +33,13 @@ #include <linux/acpi.h> #include <linux/of_platform.h> #include <linux/module.h> +#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> #include "hns_roce_common.h" #include "hns_roce_device.h" -#include <rdma/hns-abi.h> #include "hns_roce_hem.h" /** @@ -53,7 +53,7 @@ * GID[0][0], GID[1][0],.....GID[N - 1][0], * And so on */ -int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) { return gid_index * hr_dev->caps.num_ports + port; } @@ -61,7 +61,10 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; - u32 i = 0; + u32 i; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN)) return 0; @@ -90,14 +93,13 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); - struct ib_gid_attr zattr = {}; u8 port = attr->port_num - 1; int ret; if (port >= hr_dev->caps.num_ports) return -EINVAL; - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr); + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, NULL, NULL); return ret; } @@ -325,7 +327,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, resp.cqe_size = hr_dev->caps.cqe_sz; - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); if (ret) goto error_fail_copy_to_udata; @@ -421,6 +424,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .alloc_pd = hns_roce_alloc_pd, .alloc_ucontext = hns_roce_alloc_ucontext, .create_ah = hns_roce_create_ah, + .create_user_ah = hns_roce_create_ah, .create_cq = hns_roce_create_cq, .create_qp = hns_roce_create_qp, .dealloc_pd = hns_roce_dealloc_pd, @@ -491,36 +495,13 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->phys_port_cnt = hr_dev->caps.num_ports; ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; - ib_dev->uverbs_cmd_mask = - (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ULL << IB_USER_VERBS_CMD_REG_MR) | - (1ULL << IB_USER_VERBS_CMD_DEREG_MR) | - (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ULL << IB_USER_VERBS_CMD_CREATE_QP) | - (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); - - ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { - ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); - } /* MW */ - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { - ib_dev->uverbs_cmd_mask |= - (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops); - } /* FRMR */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) @@ -528,12 +509,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) /* SRQ */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { - ib_dev->uverbs_cmd_mask |= - (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV); ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops); ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); } @@ -580,8 +555,8 @@ error_failed_setup_mtu_mac: static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, @@ -631,7 +606,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) goto err_unmap_trrl; } - if (hr_dev->caps.srqc_entry_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, HEM_TYPE_SRQC, hr_dev->caps.srqc_entry_sz, @@ -643,7 +618,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } - if (hr_dev->caps.sccc_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.sccc_table, HEM_TYPE_SCCC, @@ -680,18 +655,35 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } + if (hr_dev->caps.gmv_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table, + HEM_TYPE_GMV, + hr_dev->caps.gmv_entry_sz, + hr_dev->caps.gmv_entry_num, 1); + if (ret) { + dev_err(dev, + "failed to init gmv table memory, ret = %d\n", + ret); + goto err_unmap_cqc_timer; + } + } + return 0; +err_unmap_cqc_timer: + if (hr_dev->caps.cqc_timer_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); + err_unmap_qpc_timer: if (hr_dev->caps.qpc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table); err_unmap_ctx: - if (hr_dev->caps.sccc_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); err_unmap_srq: - if (hr_dev->caps.srqc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); err_unmap_cq: @@ -721,8 +713,8 @@ err_unmap_dmpt: */ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; spin_lock_init(&hr_dev->sm_lock); spin_lock_init(&hr_dev->bt_cmd_lock); @@ -846,8 +838,8 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) int hns_roce_init(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; if (hr_dev->hw->reset) { ret = hr_dev->hw->reset(hr_dev, true); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 7f81a695e9af..1bcffd93ff3e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -167,10 +167,10 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - int ret; unsigned long mtpt_idx = key_to_hw_index(mr->key); - struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; + struct device *dev = hr_dev->dev; + int ret; /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -185,14 +185,14 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, else ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); if (ret) { - dev_err(dev, "Write mtpt fail!\n"); + dev_err(dev, "failed to write mtpt, ret = %d.\n", ret); goto err_page; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "CREATE_MPT failed (%d)\n", ret); + dev_err(dev, "failed to create mpt, ret = %d.\n", ret); goto err_page; } @@ -328,9 +328,10 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, return ret; } -int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata) +struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct ib_device *ib_dev = &hr_dev->ib_dev; @@ -341,11 +342,11 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, int ret; if (!mr->enabled) - return -EINVAL; + return ERR_PTR(-EINVAL); mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + return ERR_CAST(mailbox); mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, @@ -390,12 +391,12 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return 0; + return NULL; free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return ret; + return ERR_PTR(ret); } int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) @@ -495,7 +496,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); if (ret < 1) { - ibdev_err(ibdev, "failed to store sg pages %d %d, cnt = %d.\n", + ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n", mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); goto err_page_list; } @@ -509,7 +510,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); ret = 0; } else { - mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size); + mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size); ret = mr->npages; } @@ -695,15 +696,6 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) return size; } -static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, - unsigned int page_shift) -{ - if (is_direct) - return ALIGN(alloc_size, 1 << page_shift); - else - return HNS_HW_DIRECT_PAGE_COUNT << page_shift; -} - /* * check the given pages in continuous address space * Returns 0 on success, or the error page num. @@ -732,7 +724,6 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) /* release kernel buffers */ if (mtr->kmem) { hns_roce_buf_free(hr_dev, mtr->kmem); - kfree(mtr->kmem); mtr->kmem = NULL; } } @@ -744,13 +735,12 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct ib_device *ibdev = &hr_dev->ib_dev; unsigned int best_pg_shift; int all_pg_count = 0; - size_t direct_size; size_t total_size; int ret; total_size = mtr_bufs_size(buf_attr); if (total_size < 1) { - ibdev_err(ibdev, "Failed to check mtr size\n"); + ibdev_err(ibdev, "failed to check mtr size\n."); return -EINVAL; } @@ -762,7 +752,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); if (IS_ERR_OR_NULL(mtr->umem)) { - ibdev_err(ibdev, "Failed to get umem, ret %ld\n", + ibdev_err(ibdev, "failed to get umem, ret = %ld.\n", PTR_ERR(mtr->umem)); return -ENOMEM; } @@ -780,19 +770,16 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, ret = 0; } else { mtr->umem = NULL; - mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL); - if (!mtr->kmem) { - ibdev_err(ibdev, "Failed to alloc kmem\n"); - return -ENOMEM; - } - direct_size = mtr_kmem_direct_size(is_direct, total_size, - buf_attr->page_shift); - ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, - mtr->kmem, buf_attr->page_shift); - if (ret) { - ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); - goto err_alloc_mem; + mtr->kmem = + hns_roce_buf_alloc(hr_dev, total_size, + buf_attr->page_shift, + is_direct ? HNS_ROCE_BUF_DIRECT : 0); + if (IS_ERR(mtr->kmem)) { + ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", + PTR_ERR(mtr->kmem)); + return PTR_ERR(mtr->kmem); } + best_pg_shift = buf_attr->page_shift; all_pg_count = mtr->kmem->npages; } @@ -800,7 +787,8 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, /* must bigger than minimum hardware page shift */ if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { ret = -EINVAL; - ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", + ibdev_err(ibdev, + "failed to check mtr, page shift = %u count = %d.\n", best_pg_shift, all_pg_count); goto err_alloc_mem; } @@ -841,12 +829,12 @@ static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, } int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int page_cnt) + dma_addr_t *pages, unsigned int page_cnt) { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; + unsigned int i; int err; - int i; /* * Only use the first page address as root ba when hopnum is 0, this @@ -862,7 +850,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, if (r->offset + r->count > page_cnt) { err = -EINVAL; ibdev_err(ibdev, - "Failed to check mtr%d end %d + %d, max %d\n", + "failed to check mtr%u end %u + %u, max %u.\n", i, r->offset, r->count, page_cnt); return err; } @@ -870,7 +858,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); if (err) { ibdev_err(ibdev, - "Failed to map mtr%d offset %d, err %d\n", + "failed to map mtr%u offset %u, ret = %d.\n", i, r->offset, err); return err; } @@ -883,13 +871,12 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int mtt_count, left; int start_index; - int mtt_count; int total = 0; __le64 *mtts; - int npage; + u32 npage; u64 addr; - int left; if (!mtt_buf || mtt_max < 1) goto done; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 98f69496adb4..cca818d05a8f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -32,7 +32,6 @@ #include <linux/platform_device.h> #include <linux/pci.h> -#include <uapi/rdma/hns-abi.h> #include "hns_roce_device.h" static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) @@ -65,21 +64,22 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn); if (ret) { - ibdev_err(ib_dev, "failed to alloc pd, ret = %d\n", ret); + ibdev_err(ib_dev, "failed to alloc pd, ret = %d.\n", ret); return ret; } if (udata) { - struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn}; + struct hns_roce_ib_alloc_pd_resp resp = {.pdn = pd->pdn}; - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) { hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); - ibdev_err(ib_dev, "failed to copy to udata\n"); - return -EFAULT; + ibdev_err(ib_dev, "failed to copy to udata, ret = %d\n", ret); } } - return 0; + return ret; } int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6c081dd985fc..d8e2fe5558d2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -39,7 +39,6 @@ #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" -#include <rdma/hns-abi.h> static void flush_work_handle(struct work_struct *work) { @@ -114,8 +113,8 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, enum hns_roce_event type) { - struct ib_event event; struct ib_qp *ibqp = &hr_qp->ibqp; + struct ib_event event; if (ibqp->event_handler) { event.device = ibqp->device; @@ -154,9 +153,50 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, } } +static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank) +{ + u32 least_load = bank[0].inuse; + u8 bankid = 0; + u32 bankcnt; + u8 i; + + for (i = 1; i < HNS_ROCE_QP_BANK_NUM; i++) { + bankcnt = bank[i].inuse; + if (bankcnt < least_load) { + least_load = bankcnt; + bankid = i; + } + } + + return bankid; +} + +static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid, + unsigned long *qpn) +{ + int id; + + id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL); + if (id < 0) { + id = ida_alloc_range(&bank->ida, bank->min, bank->max, + GFP_KERNEL); + if (id < 0) + return id; + } + + /* the QPN should keep increasing until the max value is reached. */ + bank->next = (id + 1) > bank->max ? bank->min : id + 1; + + /* the lower 3 bits is bankid */ + *qpn = (id << 3) | bankid; + + return 0; +} static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; unsigned long num = 0; + u8 bankid; int ret; if (hr_qp->ibqp.qp_type == IB_QPT_GSI) { @@ -169,13 +209,21 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hr_qp->doorbell_qpn = 1; } else { - ret = hns_roce_bitmap_alloc_range(&hr_dev->qp_table.bitmap, - 1, 1, &num); + spin_lock(&qp_table->bank_lock); + bankid = get_least_load_bankid_for_qp(qp_table->bank); + + ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, + &num); if (ret) { - ibdev_err(&hr_dev->ib_dev, "Failed to alloc bitmap\n"); - return -ENOMEM; + ibdev_err(&hr_dev->ib_dev, + "failed to alloc QPN, ret = %d\n", ret); + spin_unlock(&qp_table->bank_lock); + return ret; } + qp_table->bank[bankid].inuse++; + spin_unlock(&qp_table->bank_lock); + hr_qp->doorbell_qpn = (u32)num; } @@ -286,7 +334,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) } } - if (hr_dev->caps.sccc_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { /* Alloc memory for SCC CTX */ ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, hr_qp->qpn); @@ -340,9 +388,15 @@ static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); } +static inline u8 get_qp_bankid(unsigned long qpn) +{ + /* The lower 3 bits of QPN are used to hash to different banks */ + return (u8)(qpn & GENMASK(2, 0)); +} + static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + u8 bankid; if (hr_qp->ibqp.qp_type == IB_QPT_GSI) return; @@ -350,7 +404,13 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (hr_qp->qpn < hr_dev->caps.reserved_qps) return; - hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR); + bankid = get_qp_bankid(hr_qp->qpn); + + ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3); + + spin_lock(&hr_dev->qp_table.bank_lock); + hr_dev->qp_table.bank[bankid].inuse--; + spin_unlock(&hr_dev->qp_table.bank_lock); } static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, @@ -404,37 +464,43 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return 0; } -static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, - struct hns_roce_qp *hr_qp, - struct ib_qp_cap *cap) +static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) { - u32 cnt; + /* GSI/UD QP only has extended sge */ + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) + return qp->sq.max_gs; - cnt = max(1U, cap->max_send_sge); - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { - hr_qp->sq.max_gs = roundup_pow_of_two(cnt); - hr_qp->sge.sge_cnt = 0; + if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) + return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; - return 0; - } + return 0; +} - hr_qp->sq.max_gs = cnt; +static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, + struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) +{ + u32 total_sge_cnt; + u32 wqe_sge_cnt; - /* UD sqwqe's sge use extend sge */ - if (hr_qp->ibqp.qp_type == IB_QPT_GSI || - hr_qp->ibqp.qp_type == IB_QPT_UD) { - cnt = roundup_pow_of_two(sq_wqe_cnt * hr_qp->sq.max_gs); - } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { - cnt = roundup_pow_of_two(sq_wqe_cnt * - (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); - } else { - cnt = 0; + hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; + + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + hr_qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; + return; } - hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - hr_qp->sge.sge_cnt = cnt; + hr_qp->sq.max_gs = max(1U, cap->max_send_sge); - return 0; + wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp); + + /* If the number of extended sge is not zero, they MUST use the + * space of HNS_HW_PAGE_SIZE at least. + */ + if (wqe_sge_cnt) { + total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt); + hr_qp->sge.sge_cnt = max(total_sge_cnt, + (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); + } } static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, @@ -447,12 +513,12 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, /* Sanity check SQ size before proceeding */ if (ucmd->log_sq_stride > max_sq_stride || ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ stride size\n"); + ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n"); return -EINVAL; } if (cap->max_send_sge > hr_dev->caps.max_sq_sg) { - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ SGE size %d\n", + ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n", cap->max_send_sge); return -EINVAL; } @@ -479,9 +545,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, return ret; } - ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); - if (ret) - return ret; + set_ext_sge_param(hr_dev, cnt, hr_qp, cap); hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; @@ -546,7 +610,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, { struct ib_device *ibdev = &hr_dev->ib_dev; u32 cnt; - int ret; if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || cap->max_send_sge > hr_dev->caps.max_sq_sg) { @@ -558,7 +621,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes)); if (cnt > hr_dev->caps.max_wqes) { - ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n", + ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n", cnt); return -EINVAL; } @@ -566,9 +629,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); hr_qp->sq.wqe_cnt = cnt; - ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); - if (ret) - return ret; + set_ext_sge_param(hr_dev, cnt, hr_qp, cap); /* sync the parameters of kernel QP to user's configuration */ cap->max_send_wr = cnt; @@ -725,13 +786,17 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_device *ibdev = &hr_dev->ib_dev; int ret; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE) + hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB; + if (udata) { if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { ret = hns_roce_db_map_user(uctx, udata, ucmd->sdb_addr, &hr_qp->sdb); if (ret) { ibdev_err(ibdev, - "Failed to map user SQ doorbell\n"); + "failed to map user SQ doorbell, ret = %d.\n", + ret); goto err_out; } hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; @@ -743,7 +808,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, &hr_qp->rdb); if (ret) { ibdev_err(ibdev, - "Failed to map user RQ doorbell\n"); + "failed to map user RQ doorbell, ret = %d.\n", + ret); goto err_sdb; } hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; @@ -760,7 +826,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0); if (ret) { ibdev_err(ibdev, - "Failed to alloc kernel RQ doorbell\n"); + "failed to alloc kernel RQ doorbell, ret = %d.\n", + ret); goto err_out; } *hr_qp->rdb.db_record = 0; @@ -803,14 +870,14 @@ static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev, sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL); if (ZERO_OR_NULL_PTR(sq_wrid)) { - ibdev_err(ibdev, "Failed to alloc SQ wrid\n"); + ibdev_err(ibdev, "failed to alloc SQ wrid.\n"); return -ENOMEM; } if (hr_qp->rq.wqe_cnt) { rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL); if (ZERO_OR_NULL_PTR(rq_wrid)) { - ibdev_err(ibdev, "Failed to alloc RQ wrid\n"); + ibdev_err(ibdev, "failed to alloc RQ wrid.\n"); ret = -ENOMEM; goto err_sq; } @@ -860,29 +927,25 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } if (udata) { - if (ib_copy_from_udata(ucmd, udata, sizeof(*ucmd))) { - ibdev_err(ibdev, "Failed to copy QP ucmd\n"); - return -EFAULT; + ret = ib_copy_from_udata(ucmd, udata, + min(udata->inlen, sizeof(*ucmd))); + if (ret) { + ibdev_err(ibdev, + "failed to copy QP ucmd, ret = %d\n", ret); + return ret; } ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); if (ret) - ibdev_err(ibdev, "Failed to set user SQ size\n"); + ibdev_err(ibdev, + "failed to set user SQ size, ret = %d.\n", + ret); } else { - if (init_attr->create_flags & - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - ibdev_err(ibdev, "Failed to check multicast loopback\n"); - return -EINVAL; - } - - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { - ibdev_err(ibdev, "Failed to check ipoib ud lso\n"); - return -EINVAL; - } - ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) - ibdev_err(ibdev, "Failed to set kernel SQ size\n"); + ibdev_err(ibdev, + "failed to set kernel SQ size, ret = %d.\n", + ret); } return ret; @@ -906,47 +969,53 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->state = IB_QPS_RESET; hr_qp->flush_flag = 0; + if (init_attr->create_flags) + return -EOPNOTSUPP; + ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { - ibdev_err(ibdev, "Failed to set QP param\n"); + ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret); return ret; } if (!udata) { ret = alloc_kernel_wrid(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc wrid\n"); + ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n", + ret); return ret; } } ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP doorbell\n"); + ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n", + ret); goto err_wrid; } ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP buffer\n"); + ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret); goto err_db; } ret = alloc_qpn(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QPN\n"); + ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret); goto err_buf; } ret = alloc_qpc(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP context\n"); + ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n", + ret); goto err_qpn; } ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr); if (ret) { - ibdev_err(ibdev, "Failed to store QP\n"); + ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret); goto err_qpc; } @@ -1003,6 +1072,30 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, kfree(hr_qp); } +static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type, + bool is_user) +{ + switch (type) { + case IB_QPT_UD: + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && + is_user) + goto out; + fallthrough; + case IB_QPT_RC: + case IB_QPT_GSI: + break; + default: + goto out; + } + + return 0; + +out: + ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type); + + return -EOPNOTSUPP; +} + struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) @@ -1012,15 +1105,9 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct hns_roce_qp *hr_qp; int ret; - switch (init_attr->qp_type) { - case IB_QPT_RC: - case IB_QPT_GSI: - break; - default: - ibdev_err(ibdev, "not support QP type %d\n", - init_attr->qp_type); - return ERR_PTR(-EOPNOTSUPP); - } + ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); + if (ret) + return ERR_PTR(ret); hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); if (!hr_qp) @@ -1035,10 +1122,11 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, if (ret) { ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n", init_attr->qp_type, ret); - ibdev_err(ibdev, "Create GSI QP failed!\n"); + kfree(hr_qp); return ERR_PTR(ret); } + return &hr_qp->ibqp; } @@ -1091,9 +1179,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) { - ibdev_err(&hr_dev->ib_dev, - "attr port_num invalid.attr->port_num=%d\n", - attr->port_num); + ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n", + attr->port_num); return -EINVAL; } @@ -1101,8 +1188,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) { ibdev_err(&hr_dev->ib_dev, - "attr pkey_index invalid.attr->pkey_index=%d\n", - attr->pkey_index); + "invalid attr, pkey_index = %u.\n", + attr->pkey_index); return -EINVAL; } } @@ -1110,16 +1197,16 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) { ibdev_err(&hr_dev->ib_dev, - "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n", - attr->max_rd_atomic); + "invalid attr, max_rd_atomic = %u.\n", + attr->max_rd_atomic); return -EINVAL; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) { ibdev_err(&hr_dev->ib_dev, - "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n", - attr->max_dest_rd_atomic); + "invalid attr, max_dest_rd_atomic = %u.\n", + attr->max_dest_rd_atomic); return -EINVAL; } @@ -1244,22 +1331,22 @@ static inline void *get_wqe(struct hns_roce_qp *hr_qp, int offset) return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); } -void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift)); } -void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift)); } -void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); } -bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, struct ib_cq *ib_cq) { struct hns_roce_cq *hr_cq; @@ -1280,22 +1367,24 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - int reserved_from_top = 0; - int reserved_from_bot; - int ret; + unsigned int reserved_from_bot; + unsigned int i; mutex_init(&qp_table->scc_mutex); xa_init(&hr_dev->qp_table_xa); reserved_from_bot = hr_dev->caps.reserved_qps; - ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, - hr_dev->caps.num_qps - 1, reserved_from_bot, - reserved_from_top); - if (ret) { - dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n", - ret); - return ret; + for (i = 0; i < reserved_from_bot; i++) { + hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++; + hr_dev->qp_table.bank[get_qp_bankid(i)].min++; + } + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { + ida_init(&hr_dev->qp_table.bank[i].ida); + hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps / + HNS_ROCE_QP_BANK_NUM - 1; + hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min; } return 0; @@ -1303,5 +1392,8 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) { - hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap); + int i; + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) + ida_destroy(&hr_dev->qp_table.bank[i].ida); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 8caf74e44efd..c4ae57e4173a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -4,7 +4,6 @@ */ #include <rdma/ib_umem.h> -#include <rdma/hns-abi.h> #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" @@ -93,7 +92,8 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); if (ret < 1) { - ibdev_err(ibdev, "Failed to find mtr for SRQ WQE\n"); + ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", + ret); return -ENOBUFS; } @@ -101,32 +101,34 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, ARRAY_SIZE(mtts_idx), &dma_handle_idx); if (ret < 1) { - ibdev_err(ibdev, "Failed to find mtr for SRQ idx\n"); + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); return -ENOBUFS; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ number, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ number, ret = %d.\n", ret); return -ENOMEM; } ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); if (ret) { - ibdev_err(ibdev, "Failed to get SRQC table, err %d\n", ret); + ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret); goto err_out; } ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); if (ret) { - ibdev_err(ibdev, "Failed to store SRQC, err %d\n", ret); + ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); goto err_put; } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR_OR_NULL(mailbox)) { ret = -ENOMEM; - ibdev_err(ibdev, "Failed to alloc mailbox for SRQC\n"); + ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); goto err_xa; } @@ -137,7 +139,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - ibdev_err(ibdev, "Failed to config SRQC, err %d\n", ret); + ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); goto err_xa; } @@ -198,7 +200,8 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hr_dev->caps.srqwqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) - ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err); + ibdev_err(ibdev, + "failed to alloc SRQ buf mtr, ret = %d.\n", err); return err; } @@ -229,18 +232,18 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) { - ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err); + ibdev_err(ibdev, + "failed to alloc SRQ idx mtr, ret = %d.\n", err); return err; } if (!udata) { idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) { - ibdev_err(ibdev, "Failed to alloc SRQ idx bitmap\n"); + ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n"); err = -ENOMEM; goto err_idx_mtr; } - } return 0; @@ -288,6 +291,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, int ret; u32 cqn; + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + /* Check the actual SRQ wqe and SRQ sge num */ if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) @@ -300,9 +307,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->max_gs = init_attr->attr.max_sge; if (udata) { - ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + ret = ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n", + ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", ret); return ret; } @@ -310,20 +318,21 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ buffer, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ buffer, ret = %d.\n", ret); return ret; } ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ idx, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret); goto err_buf_alloc; } if (!udata) { ret = alloc_srq_wrid(hr_dev, srq); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ wrid, err %d\n", + ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n", ret); goto err_idx_alloc; } @@ -335,7 +344,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ context, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ context, ret = %d.\n", ret); goto err_wrid_alloc; } @@ -343,11 +353,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, resp.srqn = srq->srqn; if (udata) { - if (ib_copy_to_udata(udata, &resp, - min(udata->outlen, sizeof(resp)))) { - ret = -EFAULT; + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) goto err_srqc_alloc; - } } return 0; diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 832b80de004f..6a79502c8b53 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -274,7 +274,6 @@ struct i40iw_device { u8 max_sge; u8 iw_status; u8 send_term_ok; - bool push_mode; /* Initialized from parameter passed to driver */ /* x710 specific */ struct mutex pbl_mutex; diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 3053c345a5a3..9acc0ecc9a43 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2426,7 +2426,7 @@ static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node, } break; case I40IW_CM_STATE_MPAREQ_RCVD: - atomic_add_return(1, &cm_node->passive_state); + atomic_inc(&cm_node->passive_state); break; case I40IW_CM_STATE_ESTABLISHED: case I40IW_CM_STATE_SYN_RCVD: @@ -3020,7 +3020,7 @@ static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 i40iw_cleanup_retrans_entry(cm_node); if (!loopback) { - passive_state = atomic_add_return(1, &cm_node->passive_state); + passive_state = atomic_inc_return(&cm_node->passive_state); if (passive_state == I40IW_SEND_RESET_EVENT) { cm_node->state = I40IW_CM_STATE_CLOSED; i40iw_rem_ref_cm_node(cm_node); @@ -3678,7 +3678,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -EINVAL; } - passive_state = atomic_add_return(1, &cm_node->passive_state); + passive_state = atomic_inc_return(&cm_node->passive_state); if (passive_state == I40IW_SEND_RESET_EVENT) { i40iw_rem_ref_cm_node(cm_node); return -ECONNRESET; diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 86d3f8aff329..c943d491b72b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -820,46 +820,6 @@ static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done( } /** - * i40iw_sc_manage_push_page - Handle push page - * @cqp: struct for cqp hw - * @info: push page info - * @scratch: u64 saved to be used during cqp completion - * @post_sq: flag for cqp db to ring - */ -static enum i40iw_status_code i40iw_sc_manage_push_page( - struct i40iw_sc_cqp *cqp, - struct i40iw_cqp_manage_push_page_info *info, - u64 scratch, - bool post_sq) -{ - u64 *wqe; - u64 header; - - if (info->push_idx >= I40IW_MAX_PUSH_PAGE_COUNT) - return I40IW_ERR_INVALID_PUSH_PAGE_INDEX; - - wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); - if (!wqe) - return I40IW_ERR_RING_FULL; - - set_64bit_val(wqe, 16, info->qs_handle); - - header = LS_64(info->push_idx, I40IW_CQPSQ_MPP_PPIDX) | - LS_64(I40IW_CQP_OP_MANAGE_PUSH_PAGES, I40IW_CQPSQ_OPCODE) | - LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) | - LS_64(info->free_page, I40IW_CQPSQ_MPP_FREE_PAGE); - - i40iw_insert_wqe_hdr(wqe, header); - - i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE", - wqe, I40IW_CQP_WQE_SIZE * 8); - - if (post_sq) - i40iw_sc_cqp_post_sq(cqp); - return 0; -} - -/** * i40iw_sc_manage_hmc_pm_func_table - manage of function table * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion @@ -2859,9 +2819,7 @@ static enum i40iw_status_code i40iw_sc_qp_setctx( LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) | LS_64(qp->xmit_tph_en, I40IWQPC_XMITTPHEN) | LS_64(qp->rq_tph_en, I40IWQPC_RQTPHEN) | - LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN) | - LS_64(info->push_idx, I40IWQPC_PPIDX) | - LS_64(info->push_mode_en, I40IWQPC_PMENA); + LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN); set_64bit_val(qp_ctx, 8, qp->sq_pa); set_64bit_val(qp_ctx, 16, qp->rq_pa); @@ -4291,13 +4249,6 @@ static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev, pcmdinfo->in.u.add_arp_cache_entry.scratch, pcmdinfo->post_sq); break; - case OP_MANAGE_PUSH_PAGE: - status = i40iw_sc_manage_push_page( - pcmdinfo->in.u.manage_push_page.cqp, - &pcmdinfo->in.u.manage_push_page.info, - pcmdinfo->in.u.manage_push_page.scratch, - pcmdinfo->post_sq); - break; case OP_UPDATE_PE_SDS: /* case I40IW_CQP_OP_UPDATE_PE_SDS */ status = i40iw_update_pe_sds( @@ -5098,7 +5049,7 @@ void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi) i40iw_hw_stats_stop_timer(vsi); } -static struct i40iw_cqp_ops iw_cqp_ops = { +static const struct i40iw_cqp_ops iw_cqp_ops = { .cqp_init = i40iw_sc_cqp_init, .cqp_create = i40iw_sc_cqp_create, .cqp_post_sq = i40iw_sc_cqp_post_sq, @@ -5107,7 +5058,7 @@ static struct i40iw_cqp_ops iw_cqp_ops = { .poll_for_cqp_op_done = i40iw_sc_poll_for_cqp_op_done }; -static struct i40iw_ccq_ops iw_ccq_ops = { +static const struct i40iw_ccq_ops iw_ccq_ops = { .ccq_init = i40iw_sc_ccq_init, .ccq_create = i40iw_sc_ccq_create, .ccq_destroy = i40iw_sc_ccq_destroy, @@ -5116,7 +5067,7 @@ static struct i40iw_ccq_ops iw_ccq_ops = { .ccq_arm = i40iw_sc_ccq_arm }; -static struct i40iw_ceq_ops iw_ceq_ops = { +static const struct i40iw_ceq_ops iw_ceq_ops = { .ceq_init = i40iw_sc_ceq_init, .ceq_create = i40iw_sc_ceq_create, .cceq_create_done = i40iw_sc_cceq_create_done, @@ -5126,7 +5077,7 @@ static struct i40iw_ceq_ops iw_ceq_ops = { .process_ceq = i40iw_sc_process_ceq }; -static struct i40iw_aeq_ops iw_aeq_ops = { +static const struct i40iw_aeq_ops iw_aeq_ops = { .aeq_init = i40iw_sc_aeq_init, .aeq_create = i40iw_sc_aeq_create, .aeq_destroy = i40iw_sc_aeq_destroy, @@ -5137,11 +5088,11 @@ static struct i40iw_aeq_ops iw_aeq_ops = { }; /* iwarp pd ops */ -static struct i40iw_pd_ops iw_pd_ops = { +static const struct i40iw_pd_ops iw_pd_ops = { .pd_init = i40iw_sc_pd_init, }; -static struct i40iw_priv_qp_ops iw_priv_qp_ops = { +static const struct i40iw_priv_qp_ops iw_priv_qp_ops = { .qp_init = i40iw_sc_qp_init, .qp_create = i40iw_sc_qp_create, .qp_modify = i40iw_sc_qp_modify, @@ -5156,14 +5107,14 @@ static struct i40iw_priv_qp_ops iw_priv_qp_ops = { .iw_mr_fast_register = i40iw_sc_mr_fast_register }; -static struct i40iw_priv_cq_ops iw_priv_cq_ops = { +static const struct i40iw_priv_cq_ops iw_priv_cq_ops = { .cq_init = i40iw_sc_cq_init, .cq_create = i40iw_sc_cq_create, .cq_destroy = i40iw_sc_cq_destroy, .cq_modify = i40iw_sc_cq_modify, }; -static struct i40iw_mr_ops iw_mr_ops = { +static const struct i40iw_mr_ops iw_mr_ops = { .alloc_stag = i40iw_sc_alloc_stag, .mr_reg_non_shared = i40iw_sc_mr_reg_non_shared, .mr_reg_shared = i40iw_sc_mr_reg_shared, @@ -5172,8 +5123,7 @@ static struct i40iw_mr_ops iw_mr_ops = { .mw_alloc = i40iw_sc_mw_alloc }; -static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { - .manage_push_page = i40iw_sc_manage_push_page, +static const struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { .manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table, .set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile, .commit_fpm_values = i40iw_sc_commit_fpm_values, @@ -5195,7 +5145,7 @@ static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { .update_resume_qp = i40iw_sc_resume_qp }; -static struct i40iw_hmc_ops iw_hmc_ops = { +static const struct i40iw_hmc_ops iw_hmc_ops = { .init_iw_hmc = i40iw_sc_init_iw_hmc, .parse_fpm_query_buf = i40iw_sc_parse_fpm_query_buf, .configure_iw_fpm = i40iw_sc_configure_iw_fpm, diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index e8367d67575d..86d5a33c57cc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -40,11 +40,6 @@ #define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024) #define I40IW_VF_DB_ADDR_OFFSET (64 * 1024) -#define I40IW_PUSH_OFFSET (4 * 1024 * 1024) -#define I40IW_PF_FIRST_PUSH_PAGE_INDEX 16 -#define I40IW_VF_PUSH_OFFSET ((8 + 64) * 1024) -#define I40IW_VF_FIRST_PUSH_PAGE_INDEX 2 - #define I40IW_PE_DB_SIZE_4M 1 #define I40IW_PE_DB_SIZE_8M 2 @@ -402,7 +397,6 @@ #define I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE 0x0e #define I40IW_CQP_OP_MANAGE_ARP 0x0f #define I40IW_CQP_OP_MANAGE_VF_PBLE_BP 0x10 -#define I40IW_CQP_OP_MANAGE_PUSH_PAGES 0x11 #define I40IW_CQP_OP_QUERY_RDMA_FEATURES 0x12 #define I40IW_CQP_OP_UPLOAD_CONTEXT 0x13 #define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14 @@ -843,7 +837,6 @@ #define I40IW_CQPSQ_MVPBP_PD_PLPBA_MASK \ (0x1fffffffffffffffULL << I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT) -/* Manage Push Page - MPP */ #define I40IW_INVALID_PUSH_PAGE_INDEX 0xffff #define I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT 0 @@ -1352,9 +1345,6 @@ #define I40IWQPSQ_ADDFRAGCNT_SHIFT 38 #define I40IWQPSQ_ADDFRAGCNT_MASK (0x7ULL << I40IWQPSQ_ADDFRAGCNT_SHIFT) -#define I40IWQPSQ_PUSHWQE_SHIFT 56 -#define I40IWQPSQ_PUSHWQE_MASK (1ULL << I40IWQPSQ_PUSHWQE_SHIFT) - #define I40IWQPSQ_STREAMMODE_SHIFT 58 #define I40IWQPSQ_STREAMMODE_MASK (1ULL << I40IWQPSQ_STREAMMODE_SHIFT) @@ -1740,18 +1730,17 @@ enum i40iw_alignment { #define OP_MW_ALLOC 20 #define OP_QP_FLUSH_WQES 21 #define OP_ADD_ARP_CACHE_ENTRY 22 -#define OP_MANAGE_PUSH_PAGE 23 -#define OP_UPDATE_PE_SDS 24 -#define OP_MANAGE_HMC_PM_FUNC_TABLE 25 -#define OP_SUSPEND 26 -#define OP_RESUME 27 -#define OP_MANAGE_VF_PBLE_BP 28 -#define OP_QUERY_FPM_VALUES 29 -#define OP_COMMIT_FPM_VALUES 30 -#define OP_REQUESTED_COMMANDS 31 -#define OP_COMPLETED_COMMANDS 32 -#define OP_GEN_AE 33 -#define OP_QUERY_RDMA_FEATURES 34 -#define OP_SIZE_CQP_STAT_ARRAY 35 +#define OP_UPDATE_PE_SDS 23 +#define OP_MANAGE_HMC_PM_FUNC_TABLE 24 +#define OP_SUSPEND 25 +#define OP_RESUME 26 +#define OP_MANAGE_VF_PBLE_BP 27 +#define OP_QUERY_FPM_VALUES 28 +#define OP_COMMIT_FPM_VALUES 29 +#define OP_REQUESTED_COMMANDS 30 +#define OP_COMPLETED_COMMANDS 31 +#define OP_GEN_AE 32 +#define OP_QUERY_RDMA_FEATURES 33 +#define OP_SIZE_CQP_STAT_ARRAY 34 #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h index d1c5855bd8c3..36a19c4e5bba 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_status.h +++ b/drivers/infiniband/hw/i40iw/i40iw_status.h @@ -61,7 +61,6 @@ enum i40iw_status_code { I40IW_ERR_QUEUE_EMPTY = -22, I40IW_ERR_INVALID_ALIGNMENT = -23, I40IW_ERR_FLUSHED_QUEUE = -24, - I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25, I40IW_ERR_INVALID_INLINE_DATA_SIZE = -26, I40IW_ERR_TIMEOUT = -27, I40IW_ERR_OPCODE_MISMATCH = -28, diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index c3babf3cbb8e..394e182686cf 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -387,7 +387,6 @@ struct i40iw_sc_qp { u8 *q2_buf; u64 qp_compl_ctx; u16 qs_handle; - u16 push_idx; u8 sq_tph_val; u8 rq_tph_val; u8 qp_state; @@ -493,16 +492,16 @@ struct i40iw_sc_dev { struct i40iw_sc_aeq *aeq; struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT]; struct i40iw_sc_cq *ccq; - struct i40iw_cqp_ops *cqp_ops; - struct i40iw_ccq_ops *ccq_ops; - struct i40iw_ceq_ops *ceq_ops; - struct i40iw_aeq_ops *aeq_ops; - struct i40iw_pd_ops *iw_pd_ops; - struct i40iw_priv_qp_ops *iw_priv_qp_ops; - struct i40iw_priv_cq_ops *iw_priv_cq_ops; - struct i40iw_mr_ops *mr_ops; - struct i40iw_cqp_misc_ops *cqp_misc_ops; - struct i40iw_hmc_ops *hmc_ops; + const struct i40iw_cqp_ops *cqp_ops; + const struct i40iw_ccq_ops *ccq_ops; + const struct i40iw_ceq_ops *ceq_ops; + const struct i40iw_aeq_ops *aeq_ops; + const struct i40iw_pd_ops *iw_pd_ops; + const struct i40iw_priv_qp_ops *iw_priv_qp_ops; + const struct i40iw_priv_cq_ops *iw_priv_cq_ops; + const struct i40iw_mr_ops *mr_ops; + const struct i40iw_cqp_misc_ops *cqp_misc_ops; + const struct i40iw_hmc_ops *hmc_ops; struct i40iw_vchnl_if vchnl_if; const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; @@ -749,8 +748,6 @@ struct i40iw_qp_host_ctx_info { struct i40iwarp_offload_info *iwarp_info; u32 send_cq_num; u32 rcv_cq_num; - u16 push_idx; - bool push_mode_en; bool tcp_info_valid; bool iwarp_info_valid; bool err_rq_idx_valid; @@ -937,12 +934,6 @@ struct i40iw_local_mac_ipaddr_entry_info { u8 entry_idx; }; -struct i40iw_cqp_manage_push_page_info { - u32 push_idx; - u16 qs_handle; - u8 free_page; -}; - struct i40iw_qp_flush_info { u16 sq_minor_code; u16 sq_major_code; @@ -1114,9 +1105,6 @@ struct i40iw_mr_ops { }; struct i40iw_cqp_misc_ops { - enum i40iw_status_code (*manage_push_page)(struct i40iw_sc_cqp *, - struct i40iw_cqp_manage_push_page_info *, - u64, bool); enum i40iw_status_code (*manage_hmc_pm_func_table)(struct i40iw_sc_cqp *, u64, u8, bool, bool); enum i40iw_status_code (*set_hmc_resource_profile)(struct i40iw_sc_cqp *, @@ -1254,12 +1242,6 @@ struct cqp_info { } manage_vf_pble_bp; struct { - struct i40iw_sc_cqp *cqp; - struct i40iw_cqp_manage_push_page_info info; - u64 scratch; - } manage_push_page; - - struct { struct i40iw_sc_dev *dev; struct i40iw_upload_context_info info; u64 scratch; diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 8afa5a67a86b..c3633c9944db 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -115,17 +115,6 @@ void i40iw_qp_post_wr(struct i40iw_qp_uk *qp) } /** - * i40iw_qp_ring_push_db - ring qp doorbell - * @qp: hw qp ptr - * @wqe_idx: wqe index - */ -static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx) -{ - set_32bit_val(qp->push_db, 0, LS_32((wqe_idx >> 2), I40E_PFPE_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id); - qp->initial_ring.head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); -} - -/** * i40iw_qp_get_next_send_wqe - return next wqe ptr * @qp: hw qp ptr * @wqe_idx: return wqe index @@ -426,7 +415,6 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, u64 *wqe; u8 *dest, *src; struct i40iw_inline_rdma_write *op_info; - u64 *push; u64 header = 0; u32 wqe_idx; enum i40iw_status_code ret_code; @@ -453,7 +441,6 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) | LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | LS_64(1, I40IWQPSQ_INLINEDATAFLAG) | - LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) | LS_64(read_fence, I40IWQPSQ_READFENCE) | LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | @@ -475,14 +462,8 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, set_64bit_val(wqe, 24, header); - if (qp->push_db) { - push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20); - memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32); - i40iw_qp_ring_push_db(qp, wqe_idx); - } else { - if (post_sq) - i40iw_qp_post_wr(qp); - } + if (post_sq) + i40iw_qp_post_wr(qp); return 0; } @@ -507,7 +488,6 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, enum i40iw_status_code ret_code; bool read_fence = false; u8 wqe_size; - u64 *push; op_info = &info->op.inline_send; if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE) @@ -526,7 +506,6 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, LS_64(info->op_type, I40IWQPSQ_OPCODE) | LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | LS_64(1, I40IWQPSQ_INLINEDATAFLAG) | - LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) | LS_64(read_fence, I40IWQPSQ_READFENCE) | LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | @@ -548,14 +527,8 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, set_64bit_val(wqe, 24, header); - if (qp->push_db) { - push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20); - memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32); - i40iw_qp_ring_push_db(qp, wqe_idx); - } else { - if (post_sq) - i40iw_qp_post_wr(qp); - } + if (post_sq) + i40iw_qp_post_wr(qp); return 0; } @@ -772,7 +745,6 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, q_type = (u8)RS_64(qword3, I40IW_CQ_SQ); info->error = (bool)RS_64(qword3, I40IW_CQ_ERROR); - info->push_dropped = (bool)RS_64(qword3, I40IWCQ_PSHDROP); if (info->error) { info->comp_status = I40IW_COMPL_STATUS_FLUSHED; info->major_err = (bool)RS_64(qword3, I40IW_CQ_MAJERR); @@ -951,7 +923,6 @@ enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth) static const struct i40iw_qp_uk_ops iw_qp_uk_ops = { .iw_qp_post_wr = i40iw_qp_post_wr, - .iw_qp_ring_push_db = i40iw_qp_ring_push_db, .iw_rdma_write = i40iw_rdma_write, .iw_rdma_read = i40iw_rdma_read, .iw_send = i40iw_send, @@ -1009,11 +980,7 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp, qp->wqe_alloc_reg = info->wqe_alloc_reg; qp->qp_id = info->qp_id; - qp->sq_size = info->sq_size; - qp->push_db = info->push_db; - qp->push_wqe = info->push_wqe; - qp->max_sq_frag_cnt = info->max_sq_frag_cnt; sq_ring_size = qp->sq_size << sqshift; diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index b125925641e0..93fc3081dd65 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -64,13 +64,11 @@ enum i40iw_device_capabilities_const { I40IW_MAX_SGE_RD = 1, I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647, I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647, - I40IW_MAX_PUSH_PAGE_COUNT = 4096, I40IW_MAX_PE_ENABLED_VF_COUNT = 32, I40IW_MAX_VF_FPM_ID = 47, I40IW_MAX_VF_PER_PF = 127, I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496, I40IW_MAX_INLINE_DATA_SIZE = 48, - I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48, I40IW_MAX_IRD_SIZE = 64, I40IW_MAX_ORD_SIZE = 127, I40IW_MAX_WQ_ENTRIES = 2048, @@ -272,7 +270,6 @@ struct i40iw_cq_poll_info { u16 minor_err; u8 op_type; bool stag_invalid_set; - bool push_dropped; bool error; bool is_srq; bool solicited_event; @@ -280,7 +277,6 @@ struct i40iw_cq_poll_info { struct i40iw_qp_uk_ops { void (*iw_qp_post_wr)(struct i40iw_qp_uk *); - void (*iw_qp_ring_push_db)(struct i40iw_qp_uk *, u32); enum i40iw_status_code (*iw_rdma_write)(struct i40iw_qp_uk *, struct i40iw_post_sq_info *, bool); enum i40iw_status_code (*iw_rdma_read)(struct i40iw_qp_uk *, @@ -340,8 +336,6 @@ struct i40iw_qp_uk { struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array; u64 *rq_wrid_array; u64 *shadow_area; - u32 *push_db; - u64 *push_wqe; struct i40iw_ring sq_ring; struct i40iw_ring rq_ring; struct i40iw_ring initial_ring; @@ -381,8 +375,6 @@ struct i40iw_qp_uk_init_info { u64 *shadow_area; struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array; u64 *rq_wrid_array; - u32 *push_db; - u64 *push_wqe; u32 qp_id; u32 sq_size; u32 rq_size; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 533f3caecb7a..65aedfe57e77 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -180,78 +180,6 @@ static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) } /** - * i40iw_alloc_push_page - allocate a push page for qp - * @iwdev: iwarp device - * @qp: hardware control qp - */ -static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp) -{ - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; - enum i40iw_status_code status; - - if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX) - return; - - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true); - if (!cqp_request) - return; - - atomic_inc(&cqp_request->refcount); - - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; - cqp_info->post_sq = 1; - - cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; - cqp_info->in.u.manage_push_page.info.free_page = 0; - cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; - cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; - - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - qp->push_idx = cqp_request->compl_info.op_ret_val; - else - i40iw_pr_err("CQP-OP Push page fail"); - i40iw_put_cqp_request(&iwdev->cqp, cqp_request); -} - -/** - * i40iw_dealloc_push_page - free a push page for qp - * @iwdev: iwarp device - * @qp: hardware control qp - */ -static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp) -{ - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; - enum i40iw_status_code status; - - if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) - return; - - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); - if (!cqp_request) - return; - - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; - cqp_info->post_sq = 1; - - cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx; - cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; - cqp_info->in.u.manage_push_page.info.free_page = 1; - cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; - cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; - - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; - else - i40iw_pr_err("CQP-OP Push page fail"); -} - -/** * i40iw_alloc_pd - allocate protection domain * @pd: PD pointer * @udata: user data @@ -348,7 +276,6 @@ void i40iw_free_qp_resources(struct i40iw_qp *iwqp) u32 qp_num = iwqp->ibqp.qp_num; i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); - i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp); if (qp_num) i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num); if (iwpbl->pbl_allocated) @@ -533,7 +460,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(-ENODEV); if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE) init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; @@ -561,8 +488,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; - qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; - iwqp->iwdev = iwdev; iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info; @@ -606,8 +531,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, err_code = -EOPNOTSUPP; goto error; } - if (iwdev->push_mode) - i40iw_alloc_push_page(iwdev, qp); if (udata) { err_code = ib_copy_from_udata(&req, udata, sizeof(req)); if (err_code) { @@ -666,13 +589,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, ctx_info->iwarp_info_valid = true; ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; - if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) { - ctx_info->push_mode_en = false; - } else { - ctx_info->push_mode_en = true; - ctx_info->push_idx = qp->push_idx; - } - ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)iwqp->host_ctx.va, ctx_info); @@ -712,7 +628,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, uresp.actual_sq_size = sq_size; uresp.actual_rq_size = rq_size; uresp.qp_id = qp_num; - uresp.push_idx = qp->push_idx; + uresp.push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (err_code) { i40iw_pr_err("copy_to_udata failed\n"); @@ -832,6 +748,9 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, u32 err; unsigned long flags; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + memset(&info, 0, sizeof(info)); ctx_info = &iwqp->ctx_info; iwarp_info = &iwqp->iwarp_info; @@ -1081,6 +1000,9 @@ static int i40iw_create_cq(struct ib_cq *ibcq, int err_code; int entries = attr->cqe; + if (attr->flags) + return -EOPNOTSUPP; + if (iwdev->closing) return -ENODEV; @@ -2033,7 +1955,7 @@ static ssize_t hw_rev_show(struct device *dev, rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev); u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev; - return sprintf(buf, "%x\n", hw_rev); + return sysfs_emit(buf, "%x\n", hw_rev); } static DEVICE_ATTR_RO(hw_rev); @@ -2043,7 +1965,7 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "I40IW\n"); + return sysfs_emit(buf, "I40IW\n"); } static DEVICE_ATTR_RO(hca_type); @@ -2053,7 +1975,7 @@ static DEVICE_ATTR_RO(hca_type); static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%.*s\n", 32, "I40IW Board ID"); + return sysfs_emit(buf, "%.*s\n", 32, "I40IW Board ID"); } static DEVICE_ATTR_RO(board_id); @@ -2661,27 +2583,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.node_type = RDMA_NODE_RNIC; ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr); - iwibdev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_POST_SEND); iwibdev->ibdev.phys_port_cnt = 1; iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 8bd16474708f..f3ace85552f3 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1523,6 +1523,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc return; } else *slave_id = slave; + break; default: /* nothing */; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index cd0fba6b0964..e3cd402c079a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2024,7 +2024,8 @@ static ssize_t hca_type_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); + + return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2033,7 +2034,8 @@ static ssize_t hw_rev_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->dev->rev_id); + + return sysfs_emit(buf, "%x\n", dev->dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2043,8 +2045,7 @@ static ssize_t board_id_show(struct device *device, struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, - dev->dev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -2264,10 +2265,7 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, u64 release_mac = MLX4_IB_INVALID_MAC; struct mlx4_ib_qp *qp; - read_lock(&dev_base_lock); new_smac = mlx4_mac_to_u64(dev->dev_addr); - read_unlock(&dev_base_lock); - atomic64_set(&ibdev->iboe.mac[port - 1], new_smac); /* no need for update QP1 and mac registration in non-SRIOV */ @@ -2657,73 +2655,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; - ibdev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == - IB_LINK_LAYER_ETHERNET))) { - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + IB_LINK_LAYER_ETHERNET))) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); - } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || - dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { - ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops); - } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { - ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops); } if (check_flow_steering_support(dev)) { ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); } diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 5e4ec9786081..33f525b744f2 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -988,53 +988,63 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, } static ssize_t sysfs_show_group(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { struct mcast_group *group = container_of(attr, struct mcast_group, dentry); struct mcast_req *req = NULL; - char pending_str[40]; char state_str[40]; - ssize_t len = 0; - int f; + char pending_str[40]; + int len; + int i; + u32 hoplimit; if (group->state == MCAST_IDLE) - sprintf(state_str, "%s", get_state_string(group->state)); + scnprintf(state_str, sizeof(state_str), "%s", + get_state_string(group->state)); else - sprintf(state_str, "%s(TID=0x%llx)", - get_state_string(group->state), - be64_to_cpu(group->last_req_tid)); + scnprintf(state_str, sizeof(state_str), "%s(TID=0x%llx)", + get_state_string(group->state), + be64_to_cpu(group->last_req_tid)); + if (list_empty(&group->pending_list)) { - sprintf(pending_str, "No"); + scnprintf(pending_str, sizeof(pending_str), "No"); } else { - req = list_first_entry(&group->pending_list, struct mcast_req, group_list); - sprintf(pending_str, "Yes(TID=0x%llx)", - be64_to_cpu(req->sa_mad.mad_hdr.tid)); + req = list_first_entry(&group->pending_list, struct mcast_req, + group_list); + scnprintf(pending_str, sizeof(pending_str), "Yes(TID=0x%llx)", + be64_to_cpu(req->sa_mad.mad_hdr.tid)); } - len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ", - group->rec.scope_join_state & 0xf, - group->members[2], group->members[1], group->members[0], - atomic_read(&group->refcount), - pending_str, - state_str); - for (f = 0; f < MAX_VFS; ++f) - if (group->func[f].state == MCAST_MEMBER) - len += sprintf(buf + len, "%d[%1x] ", - f, group->func[f].join_state); - - len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x " - "%4x %4x %2x %2x)\n", - be16_to_cpu(group->rec.pkey), - be32_to_cpu(group->rec.qkey), - (group->rec.mtusel_mtu & 0xc0) >> 6, - group->rec.mtusel_mtu & 0x3f, - group->rec.tclass, - (group->rec.ratesel_rate & 0xc0) >> 6, - group->rec.ratesel_rate & 0x3f, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8, - be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff, - group->rec.proxy_join); + + len = sysfs_emit(buf, "%1d [%02d,%02d,%02d] %4d %4s %5s ", + group->rec.scope_join_state & 0xf, + group->members[2], + group->members[1], + group->members[0], + atomic_read(&group->refcount), + pending_str, + state_str); + + for (i = 0; i < MAX_VFS; i++) { + if (group->func[i].state == MCAST_MEMBER) + len += sysfs_emit_at(buf, len, "%d[%1x] ", i, + group->func[i].join_state); + } + + hoplimit = be32_to_cpu(group->rec.sl_flowlabel_hoplimit); + len += sysfs_emit_at(buf, len, + "\t\t(%4hx %4x %2x %2x %2x %2x %2x %4x %4x %2x %2x)\n", + be16_to_cpu(group->rec.pkey), + be32_to_cpu(group->rec.qkey), + (group->rec.mtusel_mtu & 0xc0) >> 6, + (group->rec.mtusel_mtu & 0x3f), + group->rec.tclass, + (group->rec.ratesel_rate & 0xc0) >> 6, + (group->rec.ratesel_rate & 0x3f), + (hoplimit & 0xf0000000) >> 28, + (hoplimit & 0x0fffff00) >> 8, + (hoplimit & 0x000000ff), + group->rec.proxy_join); return len; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 58df06492d69..78c9bb79ec75 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -908,10 +908,10 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); -int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, - u64 start, u64 length, u64 virt_addr, - int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata); +struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, const struct ib_gid_attr *attr); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 426fed005d53..50becc0e4b62 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -456,10 +456,10 @@ err_free: return ERR_PTR(err); } -int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, - u64 start, u64 length, u64 virt_addr, - int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata) +struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(mr->device); struct mlx4_ib_mr *mmr = to_mmr(mr); @@ -472,9 +472,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, * race exists. */ err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); - if (err) - return err; + return ERR_PTR(err); if (flags & IB_MR_REREG_PD) { err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, @@ -542,8 +541,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, release_mpt_entry: mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); - - return err; + if (err) + return ERR_PTR(err); + return NULL; } static int diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 5cb8e602294c..651785bd57f2 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1493,7 +1493,7 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | MLX4_IB_QP_CREATE_ROCE_V2_GSI)) - return -EINVAL; + return -EOPNOTSUPP; if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { if (init_attr->qp_type != IB_QPT_UD) @@ -1561,6 +1561,11 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, if (err) return err; + if (init_attr->create_flags & + (MLX4_IB_SRIOV_SQP | MLX4_IB_SRIOV_TUNNEL_QP)) + /* Internal QP created with ib_create_qp */ + rdma_restrack_no_track(&qp->ibqp.res); + qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1; @@ -2787,6 +2792,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx4_ib_qp *mqp = to_mqp(ibqp); int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { @@ -4007,7 +4015,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context.params2)); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index bf618529e734..6a381751c0d8 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -86,6 +86,10 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, int err; int i; + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index ea1f3a081b05..1b5891130aab 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -56,7 +56,7 @@ static ssize_t show_admin_alias_guid(struct device *dev, mlx4_ib_iov_dentry->entry_num, port->num); - return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); + return sysfs_emit(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); } /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. @@ -117,22 +117,24 @@ static ssize_t show_port_gid(struct device *dev, struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_dev *mdev = port->dev; union ib_gid gid; - ssize_t ret; + int ret; + __be16 *raw; ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num, mlx4_ib_iov_dentry->entry_num, &gid, 1); if (ret) return ret; - ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) gid.raw)[0]), - be16_to_cpu(((__be16 *) gid.raw)[1]), - be16_to_cpu(((__be16 *) gid.raw)[2]), - be16_to_cpu(((__be16 *) gid.raw)[3]), - be16_to_cpu(((__be16 *) gid.raw)[4]), - be16_to_cpu(((__be16 *) gid.raw)[5]), - be16_to_cpu(((__be16 *) gid.raw)[6]), - be16_to_cpu(((__be16 *) gid.raw)[7])); - return ret; + + raw = (__be16 *)gid.raw; + return sysfs_emit(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + be16_to_cpu(raw[0]), + be16_to_cpu(raw[1]), + be16_to_cpu(raw[2]), + be16_to_cpu(raw[3]), + be16_to_cpu(raw[4]), + be16_to_cpu(raw[5]), + be16_to_cpu(raw[6]), + be16_to_cpu(raw[7])); } static ssize_t show_phys_port_pkey(struct device *dev, @@ -151,7 +153,7 @@ static ssize_t show_phys_port_pkey(struct device *dev, if (ret) return ret; - return sprintf(buf, "0x%04x\n", pkey); + return sysfs_emit(buf, "0x%04x\n", pkey); } #define DENTRY_REMOVE(_dentry) \ @@ -441,16 +443,12 @@ static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr, { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - ssize_t ret = -ENODEV; - - if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >= - (p->dev->dev->caps.pkey_table_len[p->port_num])) - ret = sprintf(buf, "none\n"); - else - ret = sprintf(buf, "%d\n", - p->dev->pkeys.virt2phys_pkey[p->slave] - [p->port_num - 1][tab_attr->index]); - return ret; + struct pkey_mgt *m = &p->dev->pkeys; + u8 key = m->virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index]; + + if (key >= p->dev->dev->caps.pkey_table_len[p->port_num]) + return sysfs_emit(buf, "none\n"); + return sysfs_emit(buf, "%d\n", key); } static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, @@ -488,7 +486,7 @@ static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, static ssize_t show_port_gid_idx(struct mlx4_port *p, struct port_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", p->slave); + return sysfs_emit(buf, "%d\n", p->slave); } static struct attribute ** @@ -542,14 +540,10 @@ static ssize_t sysfs_show_smi_enabled(struct device *dev, { struct mlx4_port *p = container_of(attr, struct mlx4_port, smi_enabled); - ssize_t len = 0; - if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) - len = sprintf(buf, "%d\n", 1); - else - len = sprintf(buf, "%d\n", 0); - - return len; + return sysfs_emit(buf, "%d\n", + !!mlx4_vf_smi_enabled(p->dev->dev, p->slave, + p->port_num)); } static ssize_t sysfs_show_enable_smi_admin(struct device *dev, @@ -558,14 +552,10 @@ static ssize_t sysfs_show_enable_smi_admin(struct device *dev, { struct mlx4_port *p = container_of(attr, struct mlx4_port, enable_smi_admin); - ssize_t len = 0; - - if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) - len = sprintf(buf, "%d\n", 1); - else - len = sprintf(buf, "%d\n", 0); - return len; + return sysfs_emit(buf, "%d\n", + !!mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, + p->port_num)); } static ssize_t sysfs_store_enable_smi_admin(struct device *dev, diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index fb62f1d04afa..eb92cefffd77 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -707,10 +707,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd = {}; + unsigned long page_size; + unsigned int page_offset_quantized; size_t ucmdlen; - int page_shift; __be64 *pas; - int npages; int ncont; void *cqc; int err; @@ -742,14 +742,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, return err; } + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, - &ncont, NULL); - mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", - ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); + ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n", + ucmd.buf_addr, entries * ucmd.cqe_size, + ib_umem_num_pages(cq->buf.umem), page_size, ncont); *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; @@ -760,11 +770,12 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, } pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); - mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0); cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, - page_shift - MLX5_ADAPTER_PAGE_SHIFT); + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { *index = ucmd.uar_page_index; @@ -1128,13 +1139,12 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) } static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, - int entries, struct ib_udata *udata, int *npas, - int *page_shift, int *cqe_size) + int entries, struct ib_udata *udata, + int *cqe_size) { struct mlx5_ib_resize_cq ucmd; struct ib_umem *umem; int err; - int npages; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) @@ -1155,9 +1165,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, - npas, NULL); - cq->resize_umem = umem; *cqe_size = ucmd.cqe_size; @@ -1250,7 +1257,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) int err; int npas; __be64 *pas; - int page_shift; + unsigned int page_offset_quantized = 0; + unsigned int page_shift; int inlen; int cqe_size; unsigned long flags; @@ -1277,22 +1285,34 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) mutex_lock(&cq->resize_mutex); if (udata) { - err = resize_user(dev, cq, entries, udata, &npas, &page_shift, - &cqe_size); + unsigned long page_size; + + err = resize_user(dev, cq, entries, udata, &cqe_size); + if (err) + goto ex; + + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->resize_umem, cqc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto ex_resize; + } + npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size); + page_shift = order_base_2(page_size); } else { + struct mlx5_frag_buf *frag_buf; + cqe_size = 64; err = resize_kernel(dev, cq, entries, cqe_size); - if (!err) { - struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf; - - npas = frag_buf->npages; - page_shift = frag_buf->page_shift; - } + if (err) + goto ex; + frag_buf = &cq->resize_buf->frag_buf; + npas = frag_buf->npages; + page_shift = frag_buf->page_shift; } - if (err) - goto ex; - inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; @@ -1304,8 +1324,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); if (udata) - mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, - pas, 0); + mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas, + 0); else mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas); @@ -1319,6 +1339,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size, cq->private_flags & diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9e3d8b826498..819c142857d6 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -93,9 +93,6 @@ struct devx_async_event_file { struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; - u32 page_offset; - int page_shift; - int ncont; u32 dinlen; u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; }; @@ -1311,7 +1308,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, else ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; devx_event_table = &dev->devx_event_table; @@ -2057,9 +2054,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, u64 addr; size_t size; u32 access; - int npages; int err; - u32 page_mask; if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) @@ -2073,57 +2068,62 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - err = ib_check_mr_access(access); + err = ib_check_mr_access(&dev->ib_dev, access); if (err) return err; obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); - - mlx5_ib_cont_pages(obj->umem, obj->umem->address, - MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &obj->page_shift, &obj->ncont, NULL); - - if (!npages) { - ib_umem_release(obj->umem); - return -EINVAL; - } - - page_mask = (1 << obj->page_shift) - 1; - obj->page_offset = obj->umem->address & page_mask; - return 0; } -static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, +static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, + struct uverbs_attr_bundle *attrs, struct devx_umem *obj, struct devx_umem_reg_cmd *cmd) { + unsigned int page_size; + __be64 *mtt; + void *umem; + + /* + * We don't know what the user intends to use this umem for, but the HW + * restrictions must be met. MR, doorbell records, QP, WQ and CQ all + * have different requirements. Since we have no idea how to sort this + * out, only support PAGE_SIZE with the expectation that userspace will + * provide the necessary alignments inside the known PAGE_SIZE and that + * FW will check everything. + */ + page_size = ib_umem_find_best_pgoff( + obj->umem, PAGE_SIZE, + __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset), + 0)); + if (!page_size) + return -EINVAL; + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + - (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); + (MLX5_ST_SZ_BYTES(mtt) * + ib_umem_num_dma_blocks(obj->umem, page_size)); cmd->in = uverbs_zalloc(attrs, cmd->inlen); - return PTR_ERR_OR_ZERO(cmd->in); -} - -static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, - struct devx_umem *obj, - struct devx_umem_reg_cmd *cmd) -{ - void *umem; - __be64 *mtt; + if (IS_ERR(cmd->in)) + return PTR_ERR(cmd->in); umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); - MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); - MLX5_SET(umem, umem, log_page_size, obj->page_shift - - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(umem, umem, page_offset, obj->page_offset); - mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, + MLX5_SET64(umem, umem, num_of_mtt, + ib_umem_num_dma_blocks(obj->umem, page_size)); + MLX5_SET(umem, umem, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(umem, umem, page_offset, + ib_umem_dma_offset(obj->umem, page_size)); + + mlx5_ib_populate_pas(obj->umem, page_size, mtt, (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | - MLX5_IB_MTT_READ); + MLX5_IB_MTT_READ); + return 0; } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( @@ -2150,12 +2150,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (err) goto err_obj_free; - err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd); + err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd); if (err) goto err_umem_release; - devx_umem_reg_cmd_build(dev, obj, &cmd); - MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, sizeof(cmd.out)); @@ -2187,7 +2185,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, int err; err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(err, why, uobject)) + if (err) return err; ib_umem_release(obj->umem); @@ -2600,8 +2598,8 @@ static const struct file_operations devx_async_event_fops = { .llseek = no_llseek, }; -static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_cmd_event_file *comp_ev_file = container_of(uobj, struct devx_async_cmd_event_file, @@ -2623,11 +2621,10 @@ static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, kvfree(entry); } spin_unlock_irq(&comp_ev_file->ev_queue.lock); - return 0; }; -static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_event_file *ev_file = container_of(uobj, struct devx_async_event_file, @@ -2671,7 +2668,6 @@ static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, mutex_unlock(&dev->devx_event_table.event_xa_lock); put_device(&dev->ib_dev.dev); - return 0; }; DECLARE_UVERBS_NAMED_METHOD( diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 492cfe063bca..25da0b05b4e2 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2035,11 +2035,9 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_flow_matcher *obj = uobject->object; - int ret; - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&obj->usecnt)) + return -EBUSY; kfree(obj); return 0; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 55545f1286e5..3bae9ba0ead8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -75,12 +75,6 @@ static LIST_HEAD(mlx5_ib_dev_list); */ static DEFINE_MUTEX(mlx5_ib_multiport_mutex); -/* We can't use an array for xlt_emergency_page because dma_map_single - * doesn't work on kernel modules memory - */ -static unsigned long xlt_emergency_page; -static struct mutex xlt_emergency_page_mutex; - struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) { struct mlx5_ib_dev *dev; @@ -425,10 +419,22 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_NDR; + break; case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_NDR; + break; + case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_NDR; + break; default: return -EINVAL; } @@ -2628,7 +2634,7 @@ static ssize_t fw_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); + return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages); } static DEVICE_ATTR_RO(fw_pages); @@ -2638,7 +2644,7 @@ static ssize_t reg_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); + return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } static DEVICE_ATTR_RO(reg_pages); @@ -2648,7 +2654,7 @@ static ssize_t hca_type_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); + return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2658,7 +2664,7 @@ static ssize_t hw_rev_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->mdev->rev_id); + return sysfs_emit(buf, "%x\n", dev->mdev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2668,8 +2674,8 @@ static ssize_t board_id_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, - dev->mdev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN, + dev->mdev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -4024,6 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .create_cq = mlx5_ib_create_cq, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, + .create_user_ah = mlx5_ib_create_ah, .dealloc_pd = mlx5_ib_dealloc_pd, .dealloc_ucontext = mlx5_ib_dealloc_ucontext, .del_gid = mlx5_ib_del_gid, @@ -4141,42 +4148,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; int err; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - dev->ib_dev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); - if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) ib_set_device_ops(&dev->ib_dev, @@ -4187,19 +4158,11 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); - if (MLX5_CAP_GEN(mdev, imaicl)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + if (MLX5_CAP_GEN(mdev, imaicl)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); - } - if (MLX5_CAP_GEN(mdev, xrc)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + if (MLX5_CAP_GEN(mdev, xrc)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); - } if (MLX5_CAP_DEV_MEM(mdev, memic) || MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & @@ -4278,12 +4241,6 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); port_num = mlx5_core_native_port_num(dev->mdev) - 1; @@ -4878,30 +4835,17 @@ static struct auxiliary_driver mlx5r_driver = { .id_table = mlx5r_id_table, }; -unsigned long mlx5_ib_get_xlt_emergency_page(void) -{ - mutex_lock(&xlt_emergency_page_mutex); - return xlt_emergency_page; -} - -void mlx5_ib_put_xlt_emergency_page(void) -{ - mutex_unlock(&xlt_emergency_page_mutex); -} - static int __init mlx5_ib_init(void) { int ret; - xlt_emergency_page = __get_free_page(GFP_KERNEL); + xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL); if (!xlt_emergency_page) return -ENOMEM; - mutex_init(&xlt_emergency_page_mutex); - mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); if (!mlx5_ib_event_wq) { - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); return -ENOMEM; } @@ -4934,8 +4878,7 @@ static void __exit mlx5_ib_cleanup(void) mlx5r_rep_cleanup(); destroy_workqueue(mlx5_ib_event_wq); - mutex_destroy(&xlt_emergency_page_mutex); - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); } module_init(mlx5_ib_init); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 13de3d2edd34..844545064c9e 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -36,161 +36,65 @@ #include "mlx5_ib.h" #include <linux/jiffies.h> -/* @umem: umem object to scan - * @addr: ib virtual address requested by the user - * @max_page_shift: high limit for page_shift - 0 means no limit - * @count: number of PAGE_SIZE pages covered by umem - * @shift: page shift for the compound pages found in the region - * @ncont: number of compund pages - * @order: log2 of the number of compound pages +/* + * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be + * filled in the pas array. */ -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, int *shift, - int *ncont, int *order) +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags) { - unsigned long tmp; - unsigned long m; - u64 base = ~0, p = 0; - u64 len, pfn; - int i = 0; - struct scatterlist *sg; - int entry; - - addr = addr >> PAGE_SHIFT; - tmp = (unsigned long)addr; - m = find_first_bit(&tmp, BITS_PER_LONG); - if (max_page_shift) - m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); - - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - pfn = sg_dma_address(sg) >> PAGE_SHIFT; - if (base + p != pfn) { - /* If either the offset or the new - * base are unaligned update m - */ - tmp = (unsigned long)(pfn | p); - if (!IS_ALIGNED(tmp, 1 << m)) - m = find_first_bit(&tmp, BITS_PER_LONG); - - base = pfn; - p = 0; - } - - p += len; - i += len; - } - - if (i) { - m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); - - if (order) - *order = ilog2(roundup_pow_of_two(i) >> m); - - *ncont = DIV_ROUND_UP(i, (1 << m)); - } else { - m = 0; + struct ib_block_iter biter; - if (order) - *order = 0; - - *ncont = 0; + rdma_umem_for_each_dma_block (umem, &biter, page_size) { + *pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) | + access_flags); + pas++; } - *shift = PAGE_SHIFT + m; - *count = i; } /* - * Populate the given array with bus addresses from the umem. - * - * dev - mlx5_ib device - * umem - umem to use to fill the pages - * page_shift - determines the page size used in the resulting array - * offset - offset into the umem to start from, - * only implemented for ODP umems - * num_pages - total number of pages to fill - * pas - bus addresses array to fill - * access_flags - access flags to set on all present pages. - use enum mlx5_ib_mtt_access_flags for this. + * Compute the page shift and page_offset for mailboxes that use a quantized + * page_offset. The granulatity of the page offset scales according to page + * size. */ -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags) +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized) { - int shift = page_shift - PAGE_SHIFT; - int mask = (1 << shift) - 1; - int i, k, idx; - u64 cur = 0; - u64 base; - int len; - struct scatterlist *sg; - int entry; - - i = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - base = sg_dma_address(sg); - - /* Skip elements below offset */ - if (i + len < offset << shift) { - i += len; - continue; - } - - /* Skip pages below offset */ - if (i < offset << shift) { - k = (offset << shift) - i; - i = offset << shift; - } else { - k = 0; - } - - for (; k < len; k++) { - if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - cur |= access_flags; - idx = (i >> shift) - offset; - - pas[idx] = cpu_to_be64(cur); - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", - i >> shift, be64_to_cpu(pas[idx])); - } - i++; - - /* Stop after num_pages reached */ - if (i >> shift >= offset + num_pages) - return; - } + const u64 page_offset_mask = (1UL << page_offset_bits) - 1; + unsigned long page_size; + u64 page_offset; + + page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask); + if (!page_size) + return 0; + + /* + * page size is the largest possible page size. + * + * Reduce the page_size, and thus the page_offset and quanta, until the + * page_offset fits into the mailbox field. Once page_size < scale this + * loop is guaranteed to terminate. + */ + page_offset = ib_umem_dma_offset(umem, page_size); + while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) { + page_size /= 2; + page_offset = ib_umem_dma_offset(umem, page_size); } -} -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags) -{ - return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, - ib_umem_num_dma_blocks(umem, PAGE_SIZE), - pas, access_flags); -} -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) -{ - u64 page_size; - u64 page_mask; - u64 off_size; - u64 off_mask; - u64 buf_off; - - page_size = (u64)1 << page_shift; - page_mask = page_size - 1; - buf_off = addr & page_mask; - off_size = page_size >> 6; - off_mask = off_size - 1; - - if (buf_off & off_mask) - return -EINVAL; - - *offset = buf_off >> ilog2(off_size); - return 0; + /* + * The address is not aligned, or otherwise cannot be represented by the + * page_offset. + */ + if (!(pgsz_bitmap & page_size)) + return 0; + + *page_offset_quantized = + (unsigned long)page_offset / (page_size / scale); + if (WARN_ON(*page_offset_quantized > page_offset_mask)) + return 0; + return page_size; } #define WR_ID_BF 0xBF diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index ea5243815cf6..b0fdc1b08e06 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -40,7 +40,73 @@ #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) -#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) +static __always_inline unsigned long +__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits, + unsigned int pgsz_shift) +{ + unsigned int largest_pg_shift = + min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift, + BITS_PER_LONG - 1); + + /* + * Despite a command allowing it, the device does not support lower than + * 4k page size. + */ + pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift); + return GENMASK(largest_pg_shift, pgsz_shift); +} + +/* + * For mkc users, instead of a page_offset the command has a start_iova which + * specifies both the page_offset and the on-the-wire IOVA + */ +#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova) \ + ib_umem_find_best_pgsz(umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), \ + pgsz_shift), \ + iova) + +static __always_inline unsigned long +__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits, + unsigned int offset_shift) +{ + unsigned int largest_offset_shift = + min_t(unsigned long, page_offset_bits - 1 + offset_shift, + BITS_PER_LONG - 1); + + return GENMASK(largest_offset_shift, offset_shift); +} + +/* + * QP/CQ/WQ/etc type commands take a page offset that satisifies: + * page_offset_quantized * (page_size/scale) = page_offset + * Which restricts allowed page sizes to ones that satisify the above. + */ +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized); +#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), \ + GENMASK(31, order_base_2(scale)), scale, \ + page_offset_quantized) + +#define mlx5_umem_find_best_cq_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \ + page_offset_quantized) enum { MLX5_IB_MMAP_OFFSET_START = 9, @@ -597,14 +663,12 @@ struct mlx5_ib_mr { int max_descs; int desc_size; int access_mode; + unsigned int page_shift; struct mlx5_core_mkey mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; - unsigned int order; struct mlx5_cache_ent *cache_ent; - int npages; - struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; void *descs_alloc; @@ -1042,6 +1106,11 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } +static inline struct mlx5_ib_dev *mr_to_mdev(struct mlx5_ib_mr *mr) +{ + return to_mdev(mr->ibmr.device); +} + static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata) { struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( @@ -1189,9 +1258,9 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int access_flags, - struct ib_pd *pd, struct ib_udata *udata); +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int access_flags, + struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); @@ -1210,7 +1279,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, struct ib_smp *out_mad); @@ -1230,15 +1298,8 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, int *shift, - int *ncont, int *order); -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags); -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags); +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); @@ -1283,7 +1344,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); -int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable); +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1305,7 +1366,7 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } -static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { return -EOPNOTSUPP; } @@ -1456,8 +1517,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; } -unsigned long mlx5_ib_get_xlt_emergency_page(void); -void mlx5_ib_put_xlt_emergency_page(void); +extern void *xlt_emergency_page; int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b261797b258f..24f8d59a42ea 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -41,6 +41,13 @@ #include <rdma/ib_verbs.h> #include "mlx5_ib.h" +/* + * We can't use an array for xlt_emergency_page because dma_map_single doesn't + * work on kernel modules memory + */ +void *xlt_emergency_page; +static DEFINE_MUTEX(xlt_emergency_page_mutex); + enum { MAX_PENDING_REG_MR = 8, }; @@ -49,6 +56,9 @@ enum { static void create_mkey_callback(int status, struct mlx5_async_work *context); +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags, + unsigned int page_size, bool populate); static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, struct ib_pd *pd) @@ -123,19 +133,12 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start, - u64 length) -{ - return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= - length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); -} - static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); - struct mlx5_ib_dev *dev = mr->dev; struct mlx5_cache_ent *ent = mr->cache_ent; + struct mlx5_ib_dev *dev = ent->dev; unsigned long flags; if (status) { @@ -172,9 +175,7 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return NULL; - mr->order = ent->order; mr->cache_ent = ent; - mr->dev = ent->dev; set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); @@ -642,6 +643,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) if (mlx5_mr_cache_invalidate(mr)) { detach_mr_from_cache(mr); destroy_mkey(dev, mr); + kfree(mr); return; } @@ -867,57 +869,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, - int access_flags, struct ib_umem **umem, int *npages, - int *page_shift, int *ncont, int *order) -{ - struct ib_umem *u; - - *umem = NULL; - - if (access_flags & IB_ACCESS_ON_DEMAND) { - struct ib_umem_odp *odp; - - odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, - &mlx5_mn_ops); - if (IS_ERR(odp)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", - PTR_ERR(odp)); - return PTR_ERR(odp); - } - - u = &odp->umem; - - *page_shift = odp->page_shift; - *ncont = ib_umem_odp_num_pages(odp); - *npages = *ncont << (*page_shift - PAGE_SHIFT); - if (order) - *order = ilog2(roundup_pow_of_two(*ncont)); - } else { - u = ib_umem_get(&dev->ib_dev, start, length, access_flags); - if (IS_ERR(u)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); - return PTR_ERR(u); - } - - mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, - page_shift, ncont, order); - } - - if (!*npages) { - mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(u); - return -EINVAL; - } - - *umem = u; - - mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", - *npages, *ncont, *order, *page_shift); - - return 0; -} - static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = @@ -974,25 +925,49 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, return &cache->ent[order]; } -static struct mlx5_ib_mr * -alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, - u64 len, int npages, int page_shift, unsigned int order, - int access_flags) +static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, + u64 length, int access_flags) +{ + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + mr->ibmr.length = length; + mr->ibmr.device = &dev->ib_dev; + mr->access_flags = access_flags; +} + +static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, + struct ib_umem *umem, u64 iova, + int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); + struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; + unsigned int page_size; - if (!ent) - return ERR_PTR(-E2BIG); - - /* Matches access in alloc_cache_mr() */ - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) - return ERR_PTR(-EOPNOTSUPP); + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!page_size)) + return ERR_PTR(-EINVAL); + ent = mr_cache_ent_from_order( + dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); + /* + * Matches access in alloc_cache_mr(). If the MR can't come from the + * cache then synchronously create an uncached one. + */ + if (!ent || ent->limit == 0 || + !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { + mutex_lock(&dev->slow_path_mutex); + mr = reg_create(pd, umem, iova, access_flags, page_size, false); + mutex_unlock(&dev->slow_path_mutex); + return mr; + } mr = get_cache_mr(ent); if (!mr) { mr = create_cache_mr(ent); + /* + * The above already tried to do the same stuff as reg_create(), + * no reason to try it again. + */ if (IS_ERR(mr)) return mr; } @@ -1001,9 +976,12 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, mr->umem = umem; mr->access_flags = access_flags; mr->desc_size = sizeof(struct mlx5_mtt); - mr->mmkey.iova = virt_addr; - mr->mmkey.size = len; + mr->mmkey.iova = iova; + mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; + mr->page_shift = order_base_2(page_size); + mr->umem = umem; + set_mr_fields(dev, mr, umem->length, access_flags); return mr; } @@ -1012,14 +990,144 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, MLX5_UMR_MTT_ALIGNMENT) #define MLX5_SPARE_UMR_CHUNK 0x10000 +/* + * Allocate a temporary buffer to hold the per-page information to transfer to + * HW. For efficiency this should be as large as it can be, but buffer + * allocation failure is not allowed, so try smaller sizes. + */ +static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) +{ + const size_t xlt_chunk_align = + MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size); + size_t size; + void *res = NULL; + + static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); + + /* + * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the + * allocation can't trigger any kind of reclaim. + */ + might_sleep(); + + gfp_mask |= __GFP_ZERO; + + /* + * If the system already has a suitable high order page then just use + * that, but don't try hard to create one. This max is about 1M, so a + * free x86 huge page will satisfy it. + */ + size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), + MLX5_MAX_UMR_CHUNK); + *nents = size / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + + if (size > MLX5_SPARE_UMR_CHUNK) { + size = MLX5_SPARE_UMR_CHUNK; + *nents = get_order(size) / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + } + + *nents = PAGE_SIZE / ent_size; + res = (void *)__get_free_page(gfp_mask); + if (res) + return res; + + mutex_lock(&xlt_emergency_page_mutex); + memset(xlt_emergency_page, 0, PAGE_SIZE); + return xlt_emergency_page; +} + +static void mlx5_ib_free_xlt(void *xlt, size_t length) +{ + if (xlt == xlt_emergency_page) { + mutex_unlock(&xlt_emergency_page_mutex); + return; + } + + free_pages((unsigned long)xlt, get_order(length)); +} + +/* + * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for + * submission. + */ +static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, + struct mlx5_umr_wr *wr, struct ib_sge *sg, + size_t nents, size_t ent_size, + unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + dma_addr_t dma; + void *xlt; + + xlt = mlx5_ib_alloc_xlt(&nents, ent_size, + flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : + GFP_KERNEL); + sg->length = nents * ent_size; + dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); + mlx5_ib_free_xlt(xlt, sg->length); + return NULL; + } + sg->addr = dma; + sg->lkey = dev->umrc.pd->local_dma_lkey; + + memset(wr, 0, sizeof(*wr)); + wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; + if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) + wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; + wr->wr.sg_list = sg; + wr->wr.num_sge = 1; + wr->wr.opcode = MLX5_IB_WR_UMR; + wr->pd = mr->ibmr.pd; + wr->mkey = mr->mmkey.key; + wr->length = mr->mmkey.size; + wr->virt_addr = mr->mmkey.iova; + wr->access_flags = mr->access_flags; + wr->page_shift = mr->page_shift; + wr->xlt_size = sg->length; + return xlt; +} + +static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, + struct ib_sge *sg) +{ + struct device *ddev = &dev->mdev->pdev->dev; + + dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); + mlx5_ib_free_xlt(xlt, sg->length); +} + +static unsigned int xlt_wr_final_send_flags(unsigned int flags) +{ + unsigned int res = 0; + + if (flags & MLX5_IB_UPD_XLT_ENABLE) + res |= MLX5_IB_SEND_UMR_ENABLE_MR | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | + MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) + res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; + if (flags & MLX5_IB_UPD_XLT_ADDR) + res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + return res; +} + int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags) { - struct mlx5_ib_dev *dev = mr->dev; - struct device *ddev = dev->ib_dev.dev.parent; - int size; + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; void *xlt; - dma_addr_t dma; struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; @@ -1030,15 +1138,17 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, const int page_mask = page_align - 1; size_t pages_mapped = 0; size_t pages_to_map = 0; - size_t pages_iter = 0; + size_t pages_iter; size_t size_to_map = 0; - gfp_t gfp; - bool use_emergency_page = false; + size_t orig_sg_length; if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && !umr_can_use_indirect_mkey(dev)) return -EPERM; + if (WARN_ON(!mr->umem->is_odp)) + return -EINVAL; + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, * so we need to align the offset and length accordingly */ @@ -1046,63 +1156,21 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, npages += idx & page_mask; idx &= ~page_mask; } - - gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; - gfp |= __GFP_ZERO | __GFP_NOWARN; - pages_to_map = ALIGN(npages, page_align); - size = desc_size * pages_to_map; - size = min_t(int, size, MLX5_MAX_UMR_CHUNK); - - xlt = (void *)__get_free_pages(gfp, get_order(size)); - if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { - mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", - size, get_order(size), MLX5_SPARE_UMR_CHUNK); - - size = MLX5_SPARE_UMR_CHUNK; - xlt = (void *)__get_free_pages(gfp, get_order(size)); - } - if (!xlt) { - mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); - xlt = (void *)mlx5_ib_get_xlt_emergency_page(); - size = PAGE_SIZE; - memset(xlt, 0, size); - use_emergency_page = true; - } - pages_iter = size / desc_size; - dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); - err = -ENOMEM; - goto free_xlt; - } + xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); + if (!xlt) + return -ENOMEM; + pages_iter = sg.length / desc_size; + orig_sg_length = sg.length; - if (mr->umem->is_odp) { - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; - pages_to_map = min_t(size_t, pages_to_map, max_pages); - } + pages_to_map = min_t(size_t, pages_to_map, max_pages); } - sg.addr = dma; - sg.lkey = dev->umrc.pd->local_dma_lkey; - - memset(&wr, 0, sizeof(wr)); - wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; - if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) - wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; - wr.wr.sg_list = &sg; - wr.wr.num_sge = 1; - wr.wr.opcode = MLX5_IB_WR_UMR; - - wr.pd = mr->ibmr.pd; - wr.mkey = mr->mmkey.key; - wr.length = mr->mmkey.size; - wr.virt_addr = mr->mmkey.iova; - wr.access_flags = mr->access_flags; wr.page_shift = page_shift; for (pages_mapped = 0; @@ -1110,50 +1178,87 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, pages_mapped += pages_iter, idx += pages_iter) { npages = min_t(int, pages_iter, pages_to_map - pages_mapped); size_to_map = npages * desc_size; - dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); - if (mr->umem->is_odp) { - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); - } else { - __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, - npages, xlt, - MLX5_IB_MTT_PRESENT); - /* Clear padding after the pages - * brought from the umem. - */ - memset(xlt + size_to_map, 0, size - size_to_map); - } - dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); - if (pages_mapped + pages_iter >= pages_to_map) { - if (flags & MLX5_IB_UPD_XLT_ENABLE) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_ENABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - if (flags & MLX5_IB_UPD_XLT_PD || - flags & MLX5_IB_UPD_XLT_ACCESS) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - if (flags & MLX5_IB_UPD_XLT_ADDR) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - } + if (pages_mapped + pages_iter >= pages_to_map) + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); wr.offset = idx * desc_size; wr.xlt_size = sg.length; err = mlx5_ib_post_send_wait(dev, &wr); } - dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, xlt, &sg); + return err; +} + +/* + * Send the DMA list to the HW for a normal MR using UMR. + */ +static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + struct ib_block_iter biter; + struct mlx5_mtt *cur_mtt; + struct mlx5_umr_wr wr; + size_t orig_sg_length; + struct mlx5_mtt *mtt; + size_t final_size; + struct ib_sge sg; + int err = 0; + + if (WARN_ON(mr->umem->is_odp)) + return -EINVAL; + + mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, + ib_umem_num_dma_blocks(mr->umem, + 1 << mr->page_shift), + sizeof(*mtt), flags); + if (!mtt) + return -ENOMEM; + orig_sg_length = sg.length; + + cur_mtt = mtt; + rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap, + BIT(mr->page_shift)) { + if (cur_mtt == (void *)mtt + sg.length) { + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + if (err) + goto err; + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + wr.offset += sg.length; + cur_mtt = mtt; + } + + cur_mtt->ptag = + cpu_to_be64(rdma_block_iter_dma_address(&biter) | + MLX5_IB_MTT_PRESENT); + cur_mtt++; + } -free_xlt: - if (use_emergency_page) - mlx5_ib_put_xlt_emergency_page(); - else - free_pages((unsigned long)xlt, get_order(size)); + final_size = (void *)cur_mtt - (void *)mtt; + sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); + memset(cur_mtt, 0, sg.length - final_size); + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); + wr.xlt_size = sg.length; + dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + +err: + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, mtt, &sg); return err; } @@ -1161,11 +1266,9 @@ free_xlt: * If ibmr is NULL it will be allocated by reg_create. * Else, the given ibmr will be used. */ -static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, - u64 virt_addr, u64 length, - struct ib_umem *umem, int npages, - int page_shift, int access_flags, - bool populate) +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags, + unsigned int page_size, bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr; @@ -1176,16 +1279,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); - mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); + if (!page_size) + return ERR_PTR(-EINVAL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); mr->ibmr.pd = pd; mr->access_flags = access_flags; + mr->page_shift = order_base_2(page_size); inlen = MLX5_ST_SZ_BYTES(create_mkey_in); if (populate) - inlen += sizeof(*pas) * roundup(npages, 2); + inlen += sizeof(*pas) * + roundup(ib_umem_num_dma_blocks(umem, page_size), 2); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1197,7 +1304,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, err = -EINVAL; goto err_2; } - mlx5_ib_populate_pas(dev, umem, page_shift, pas, + mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); } @@ -1206,20 +1313,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr, + set_mkc_access_pd_addr_fields(mkc, access_flags, iova, populate ? pd : dev->umrc.pd); MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET64(mkc, mkc, len, length); + MLX5_SET64(mkc, mkc, len, umem->length); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, - get_octo_len(virt_addr, length, page_shift)); - MLX5_SET(mkc, mkc, log_page_size, page_shift); + get_octo_len(iova, umem->length, mr->page_shift)); + MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(virt_addr, length, page_shift)); + get_octo_len(iova, umem->length, mr->page_shift)); } err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); @@ -1229,7 +1336,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, } mr->mmkey.type = MLX5_MKEY_MR; mr->desc_size = sizeof(struct mlx5_mtt); - mr->dev = dev; + mr->umem = umem; + set_mr_fields(dev, mr, umem->length, access_flags); kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); @@ -1238,25 +1346,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, err_2: kvfree(in); - err_1: - if (!ibmr) - kfree(mr); - + kfree(mr); return ERR_PTR(err); } -static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - int npages, u64 length, int access_flags) -{ - mr->npages = npages; - atomic_add(npages, &dev->mdev->priv.reg_pages); - mr->ibmr.lkey = mr->mmkey.key; - mr->ibmr.rkey = mr->mmkey.key; - mr->ibmr.length = length; - mr->access_flags = access_flags; -} - static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, u64 length, int acc, int mode) { @@ -1290,8 +1384,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, kfree(in); - mr->umem = NULL; - set_mr_fields(dev, mr, 0, length, acc); + set_mr_fields(dev, mr, length, acc); return &mr->ibmr; @@ -1352,116 +1445,128 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, attr->access_flags, mode); } -struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int access_flags, - struct ib_udata *udata) +static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; bool xlt_with_umr; - struct ib_umem *umem; - int page_shift; - int npages; - int ncont; - int order; int err; - if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) - return ERR_PTR(-EOPNOTSUPP); - - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", - start, virt_addr, length, access_flags); - - xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length); - /* ODP requires xlt update via umr to work. */ - if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND)) - return ERR_PTR(-EINVAL); - - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && - length == U64_MAX) { - if (virt_addr != start) - return ERR_PTR(-EINVAL); - if (!(access_flags & IB_ACCESS_ON_DEMAND) || - !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) - return ERR_PTR(-EINVAL); - - mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); - if (IS_ERR(mr)) - return ERR_CAST(mr); - return &mr->ibmr; - } - - err = mr_umem_get(dev, start, length, access_flags, &umem, - &npages, &page_shift, &ncont, &order); - - if (err < 0) - return ERR_PTR(err); - + xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length); if (xlt_with_umr) { - mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, - page_shift, order, access_flags); - if (IS_ERR(mr)) - mr = NULL; - } + mr = alloc_cacheable_mr(pd, umem, iova, access_flags); + } else { + unsigned int page_size = mlx5_umem_find_best_pgsz( + umem, mkc, log_page_size, 0, iova); - if (!mr) { mutex_lock(&dev->slow_path_mutex); - mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, !xlt_with_umr); + mr = reg_create(pd, umem, iova, access_flags, page_size, true); mutex_unlock(&dev->slow_path_mutex); } - if (IS_ERR(mr)) { - err = PTR_ERR(mr); - goto error; + ib_umem_release(umem); + return ERR_CAST(mr); } mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); - mr->umem = umem; - set_mr_fields(dev, mr, npages, length, access_flags); + atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); - if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { + if (xlt_with_umr) { /* * If the MR was created with reg_create then it will be * configured properly but left disabled. It is safe to go ahead * and configure it again via UMR while enabling it. */ - int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; - - err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, - update_xlt_flags); + err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); if (err) { dereg_mr(dev, mr); return ERR_PTR(err); } } + return &mr->ibmr; +} - if (is_odp_mr(mr)) { - to_ib_umem_odp(mr->umem)->private = mr; - init_waitqueue_head(&mr->q_deferred_work); - atomic_set(&mr->num_deferred_work, 0); - err = xa_err(xa_store(&dev->odp_mkeys, - mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, - GFP_KERNEL)); - if (err) { - dereg_mr(dev, mr); - return ERR_PTR(err); - } +static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_odp *odp; + struct mlx5_ib_mr *mr; + int err; - err = mlx5_ib_init_odp_mr(mr, xlt_with_umr); - if (err) { - dereg_mr(dev, mr); - return ERR_PTR(err); - } + if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + return ERR_PTR(-EOPNOTSUPP); + + if (!start && length == U64_MAX) { + if (iova != 0) + return ERR_PTR(-EINVAL); + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + return ERR_PTR(-EINVAL); + + mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); + if (IS_ERR(mr)) + return ERR_CAST(mr); + return &mr->ibmr; + } + + /* ODP requires xlt update via umr to work. */ + if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + return ERR_PTR(-EINVAL); + + odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, + &mlx5_mn_ops); + if (IS_ERR(odp)) + return ERR_CAST(odp); + + mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags); + if (IS_ERR(mr)) { + ib_umem_release(&odp->umem); + return ERR_CAST(mr); } + odp->private = mr; + init_waitqueue_head(&mr->q_deferred_work); + atomic_set(&mr->num_deferred_work, 0); + err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL)); + if (err) + goto err_dereg_mr; + + err = mlx5_ib_init_odp_mr(mr); + if (err) + goto err_dereg_mr; return &mr->ibmr; -error: - ib_umem_release(umem); + +err_dereg_mr: + dereg_mr(dev, mr); return ERR_PTR(err); } +struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem *umem; + + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) + return ERR_PTR(-EOPNOTSUPP); + + mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, iova, length, access_flags); + + if (access_flags & IB_ACCESS_ON_DEMAND) + return create_user_odp_mr(pd, start, length, iova, access_flags, + udata); + umem = ib_umem_get(&dev->ib_dev, start, length, access_flags); + if (IS_ERR(umem)) + return ERR_CAST(umem); + return create_real_mr(pd, umem, iova, access_flags); +} + /** * mlx5_mr_cache_invalidate - Fence all DMA on the MR * @mr: The MR to fence @@ -1474,151 +1579,224 @@ int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) { struct mlx5_umr_wr umrwr = {}; - if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = mr->dev->umrc.pd; + umrwr.pd = mr_to_mdev(mr)->umrc.pd; umrwr.mkey = mr->mmkey.key; umrwr.ignore_free_state = 1; - return mlx5_ib_post_send_wait(mr->dev, &umrwr); + return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr); } -static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, - int access_flags, int flags) -{ - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_umr_wr umrwr = {}; +/* + * True if the change in access flags can be done via UMR, only some access + * flags can be updated. + */ +static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, + unsigned int current_access_flags, + unsigned int target_access_flags) +{ + unsigned int diffs = current_access_flags ^ target_access_flags; + + if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) + return false; + return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, + target_access_flags); +} + +static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_umr_wr umrwr = { + .wr = { + .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS, + .opcode = MLX5_IB_WR_UMR, + }, + .mkey = mr->mmkey.key, + .pd = pd, + .access_flags = access_flags, + }; int err; - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; - - umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.mkey = mr->mmkey.key; - - if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) { - umrwr.pd = pd; - umrwr.access_flags = access_flags; - umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - } - err = mlx5_ib_post_send_wait(dev, &umrwr); + if (err) + return err; - return err; + mr->access_flags = access_flags; + mr->mmkey.pd = to_mpd(pd)->pdn; + return 0; } -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int new_access_flags, - struct ib_pd *new_pd, struct ib_udata *udata) +static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, + struct ib_umem *new_umem, + int new_access_flags, u64 iova, + unsigned long *page_size) { - struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); - struct mlx5_ib_mr *mr = to_mmr(ib_mr); - struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; - int access_flags = flags & IB_MR_REREG_ACCESS ? - new_access_flags : - mr->access_flags; - int page_shift = 0; - int upd_flags = 0; - int npages = 0; - int ncont = 0; - int order = 0; - u64 addr, len; - int err; + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", - start, virt_addr, length, access_flags); + /* We only track the allocated sizes of MRs from the cache */ + if (!mr->cache_ent) + return false; + if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) + return false; - atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + *page_size = + mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!*page_size)) + return false; + return (1ULL << mr->cache_ent->order) >= + ib_umem_num_dma_blocks(new_umem, *page_size); +} - if (!mr->umem) - return -EINVAL; +static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags, int flags, struct ib_umem *new_umem, + u64 iova, unsigned long page_size) +{ + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE; + struct ib_umem *old_umem = mr->umem; + int err; - if (is_odp_mr(mr)) - return -EOPNOTSUPP; + /* + * To keep everything simple the MR is revoked before we start to mess + * with it. This ensure the change is atomic relative to any use of the + * MR. + */ + err = mlx5_mr_cache_invalidate(mr); + if (err) + return err; - if (flags & IB_MR_REREG_TRANS) { - addr = virt_addr; - len = length; - } else { - addr = mr->umem->address; - len = mr->umem->length; + if (flags & IB_MR_REREG_PD) { + mr->ibmr.pd = pd; + mr->mmkey.pd = to_mpd(pd)->pdn; + upd_flags |= MLX5_IB_UPD_XLT_PD; + } + if (flags & IB_MR_REREG_ACCESS) { + mr->access_flags = access_flags; + upd_flags |= MLX5_IB_UPD_XLT_ACCESS; } - if (flags != IB_MR_REREG_PD) { + mr->ibmr.length = new_umem->length; + mr->mmkey.iova = iova; + mr->mmkey.size = new_umem->length; + mr->page_shift = order_base_2(page_size); + mr->umem = new_umem; + err = mlx5_ib_update_mr_pas(mr, upd_flags); + if (err) { /* - * Replace umem. This needs to be done whether or not UMR is - * used. + * The MR is revoked at this point so there is no issue to free + * new_umem. */ - flags |= IB_MR_REREG_TRANS; - ib_umem_release(mr->umem); - mr->umem = NULL; - err = mr_umem_get(dev, addr, len, access_flags, &mr->umem, - &npages, &page_shift, &ncont, &order); - if (err) - goto err; + mr->umem = old_umem; + return err; } - if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags, - access_flags) || - !mlx5_ib_can_load_pas_with_umr(dev, len) || - (flags & IB_MR_REREG_TRANS && - !mlx5_ib_pas_fits_in_mr(mr, addr, len))) { - /* - * UMR can't be used - MKey needs to be replaced. - */ - if (mr->cache_ent) - detach_mr_from_cache(mr); - err = destroy_mkey(dev, mr); - if (err) - goto err; + atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages); + ib_umem_release(old_umem); + atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages); + return 0; +} - mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, - page_shift, access_flags, true); +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 iova, int new_access_flags, + struct ib_pd *new_pd, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); + struct mlx5_ib_mr *mr = to_mmr(ib_mr); + int err; - if (IS_ERR(mr)) { - err = PTR_ERR(mr); - mr = to_mmr(ib_mr); - goto err; - } - } else { - /* - * Send a UMR WQE - */ - mr->ibmr.pd = pd; - mr->access_flags = access_flags; - mr->mmkey.iova = addr; - mr->mmkey.size = len; - mr->mmkey.pd = to_mpd(pd)->pdn; + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) + return ERR_PTR(-EOPNOTSUPP); - if (flags & IB_MR_REREG_TRANS) { - upd_flags = MLX5_IB_UPD_XLT_ADDR; - if (flags & IB_MR_REREG_PD) - upd_flags |= MLX5_IB_UPD_XLT_PD; - if (flags & IB_MR_REREG_ACCESS) - upd_flags |= MLX5_IB_UPD_XLT_ACCESS; - err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, - upd_flags); - } else { - err = rereg_umr(pd, mr, access_flags, flags); + mlx5_ib_dbg( + dev, + "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, iova, length, new_access_flags); + + if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) + return ERR_PTR(-EOPNOTSUPP); + + if (!(flags & IB_MR_REREG_ACCESS)) + new_access_flags = mr->access_flags; + if (!(flags & IB_MR_REREG_PD)) + new_pd = ib_mr->pd; + + if (!(flags & IB_MR_REREG_TRANS)) { + struct ib_umem *umem; + + /* Fast path for PD/access change */ + if (can_use_umr_rereg_access(dev, mr->access_flags, + new_access_flags)) { + err = umr_rereg_pd_access(mr, new_pd, new_access_flags); + if (err) + return ERR_PTR(err); + return NULL; } + /* DM or ODP MR's don't have a umem so we can't re-use it */ + if (!mr->umem || is_odp_mr(mr)) + goto recreate; + /* + * Only one active MR can refer to a umem at one time, revoke + * the old MR before assigning the umem to the new one. + */ + err = mlx5_mr_cache_invalidate(mr); if (err) - goto err; - } - - set_mr_fields(dev, mr, npages, len, access_flags); + return ERR_PTR(err); + umem = mr->umem; + mr->umem = NULL; + atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); - return 0; + return create_real_mr(new_pd, umem, mr->mmkey.iova, + new_access_flags); + } -err: - ib_umem_release(mr->umem); - mr->umem = NULL; + /* + * DM doesn't have a PAS list so we can't re-use it, odp does but the + * logic around releasing the umem is different + */ + if (!mr->umem || is_odp_mr(mr)) + goto recreate; + + if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && + can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { + struct ib_umem *new_umem; + unsigned long page_size; + + new_umem = ib_umem_get(&dev->ib_dev, start, length, + new_access_flags); + if (IS_ERR(new_umem)) + return ERR_CAST(new_umem); + + /* Fast path for PAS change */ + if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova, + &page_size)) { + err = umr_rereg_pas(mr, new_pd, new_access_flags, flags, + new_umem, iova, page_size); + if (err) { + ib_umem_release(new_umem); + return ERR_PTR(err); + } + return NULL; + } + return create_real_mr(new_pd, new_umem, iova, new_access_flags); + } - clean_mr(dev, mr); - return err; + /* + * Everything else has no state we can preserve, just create a new MR + * from scratch + */ +recreate: + return mlx5_ib_reg_user_mr(new_pd, start, length, iova, + new_access_flags, udata); } static int @@ -1627,6 +1805,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, int ndescs, int desc_size) { + struct mlx5_ib_dev *dev = to_mdev(device); + struct device *ddev = &dev->mdev->pdev->dev; int size = ndescs * desc_size; int add_size; int ret; @@ -1639,9 +1819,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); - mr->desc_map = dma_map_single(device->dev.parent, mr->descs, - size, DMA_TO_DEVICE); - if (dma_mapping_error(device->dev.parent, mr->desc_map)) { + mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mr->desc_map)) { ret = -ENOMEM; goto err; } @@ -1659,9 +1838,10 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) if (mr->descs) { struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; + struct mlx5_ib_dev *dev = to_mdev(device); - dma_unmap_single(device->dev.parent, mr->desc_map, - size, DMA_TO_DEVICE); + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, + DMA_TO_DEVICE); kfree(mr->descs_alloc); mr->descs = NULL; } @@ -1691,7 +1871,6 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int npages = mr->npages; struct ib_umem *umem = mr->umem; /* Stop all DMA */ @@ -1700,14 +1879,17 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) else clean_mr(dev, mr); + if (umem) { + if (!is_odp_mr(mr)) + atomic_sub(ib_umem_num_pages(umem), + &dev->mdev->priv.reg_pages); + ib_umem_release(umem); + } + if (mr->cache_ent) mlx5_mr_cache_free(dev, mr); else kfree(mr); - - ib_umem_release(umem); - atomic_sub(npages, &dev->mdev->priv.reg_pages); - } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 5c853ec1b0d8..aa2413b50adc 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -102,7 +102,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, if (flags & MLX5_IB_UPD_XLT_ZAP) { for (; pklm != end; pklm++, idx++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); pklm->va = 0; } return; @@ -129,7 +129,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * locking around the xarray. */ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); - lockdep_assert_held(&imr->dev->odp_srcu); + lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu); for (; pklm != end; pklm++, idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -139,7 +139,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, pklm->key = cpu_to_be32(mtt->ibmr.lkey); pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); } else { - pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); pklm->va = 0; } } @@ -199,7 +199,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) mutex_unlock(&odp->umem_mutex); if (!mr->cache_ent) { - mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); + mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey); WARN_ON(mr->descs); } } @@ -222,19 +222,19 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) WARN_ON(atomic_read(&mr->num_deferred_work)); if (need_imr_xlt) { - srcu_key = srcu_read_lock(&mr->dev->odp_srcu); + srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); + srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key); } dma_fence_odp_mr(mr); mr->parent = NULL; - mlx5_mr_cache_free(mr->dev, mr); + mlx5_mr_cache_free(mr_to_mdev(mr), mr); ib_umem_odp_release(odp); if (atomic_dec_and_test(&imr->num_deferred_work)) wake_up(&imr->q_deferred_work); @@ -274,7 +274,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) goto out_unlock; atomic_inc(&imr->num_deferred_work); - call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, + call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu, free_implicit_child_mr_rcu); out_unlock: @@ -476,12 +476,13 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY, - imr->access_flags); + ret = mr = mlx5_mr_cache_alloc( + mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags); if (IS_ERR(mr)) goto out_umem; mr->ibmr.pd = imr->ibmr.pd; + mr->ibmr.device = &mr_to_mdev(imr)->ib_dev; mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; @@ -517,11 +518,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_mr; } - mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); + mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; out_mr: - mlx5_mr_cache_free(imr->dev, mr); + mlx5_mr_cache_free(mr_to_mdev(imr), mr); out_umem: ib_umem_odp_release(odp); return ret; @@ -536,6 +537,10 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct mlx5_ib_mr *imr; int err; + if (!mlx5_ib_can_load_pas_with_umr(dev, + MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) + return ERR_PTR(-EOPNOTSUPP); + umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); @@ -551,6 +556,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->umem = &umem_odp->umem; imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; + imr->ibmr.device = &dev->ib_dev; imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; atomic_set(&imr->num_deferred_work, 0); @@ -584,7 +590,7 @@ out_umem: void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); - struct mlx5_ib_dev *dev = imr->dev; + struct mlx5_ib_dev *dev = mr_to_mdev(imr); struct list_head destroy_list; struct mlx5_ib_mr *mtt; struct mlx5_ib_mr *tmp; @@ -654,10 +660,10 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) { /* Prevent new page faults and prefetch requests from succeeding */ - xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&mr->dev->odp_srcu); + synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); @@ -701,7 +707,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, if (ret < 0) { if (ret != -EAGAIN) - mlx5_ib_err(mr->dev, + mlx5_ib_err(mr_to_mdev(mr), "Failed to update mkey page tables\n"); goto out; } @@ -791,7 +797,7 @@ out: MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); if (err) { - mlx5_ib_err(imr->dev, "Failed to update PAS\n"); + mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n"); return err; } return ret; @@ -811,7 +817,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - lockdep_assert_held(&mr->dev->odp_srcu); + lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; @@ -831,17 +837,13 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, flags); } -int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { - u32 flags = MLX5_PF_FLAGS_SNAPSHOT; int ret; - if (enable) - flags |= MLX5_PF_FLAGS_ENABLE; - - ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), - mr->umem->address, mr->umem->length, NULL, - flags); + ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address, + mr->umem->length, NULL, + MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE); return ret >= 0 ? 0 : ret; } @@ -1783,7 +1785,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ WARN_ON(!work->num_sge); - dev = work->frags[0].mr->dev; + dev = mr_to_mdev(work->frags[0].mr); /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < work->num_sge; ++i) { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 600e056798c0..0cb7cc642d87 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -778,39 +778,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, return bfregi->sys_pages[index_of_sys_page] + offset; } -static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, - unsigned long addr, size_t size, - struct ib_umem **umem, int *npages, int *page_shift, - int *ncont, u32 *offset) -{ - int err; - - *umem = ib_umem_get(&dev->ib_dev, addr, size, 0); - if (IS_ERR(*umem)) { - mlx5_ib_dbg(dev, "umem_get failed\n"); - return PTR_ERR(*umem); - } - - mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL); - - err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n", - addr, size, *npages, *page_shift, *ncont, *offset); - - return 0; - -err_umem: - ib_umem_release(*umem); - *umem = NULL; - - return err; -} - static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_rwq *rwq, struct ib_udata *udata) { @@ -833,10 +800,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - int page_shift = 0; - int npages; + unsigned long page_size = 0; u32 offset = 0; - int ncont = 0; int err; if (!ucmd->buf_addr) @@ -849,23 +814,26 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, - &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, - &rwq->rq_page_offset); - if (err) { + page_size = mlx5_umem_find_best_quantized_pgoff( + rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &rwq->rq_page_offset); + if (!page_size) { mlx5_ib_warn(dev, "bad offset\n"); + err = -EINVAL; goto err_umem; } - rwq->rq_num_pas = ncont; - rwq->page_shift = page_shift; - rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size); + rwq->page_shift = order_base_2(page_size); + rwq->log_page_size = rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT; rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); - mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", - (unsigned long long)ucmd->buf_addr, rwq->buf_size, - npages, page_shift, ncont, offset); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n", + (unsigned long long)ucmd->buf_addr, rwq->buf_size, + ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas, + offset); err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db); if (err) { @@ -896,10 +864,9 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *context; struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; - int page_shift = 0; + unsigned int page_offset_quantized = 0; + unsigned long page_size = 0; int uar_index = 0; - int npages; - u32 offset = 0; int bfregn; int ncont = 0; __be64 *pas; @@ -950,11 +917,21 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->buf_addr && ubuffer->buf_size) { ubuffer->buf_addr = ucmd->buf_addr; - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, - ubuffer->buf_size, &ubuffer->umem, - &npages, &page_shift, &ncont, &offset); - if (err) + ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(ubuffer->umem)) { + err = PTR_ERR(ubuffer->umem); goto err_bfreg; + } + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, qpc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size); } else { ubuffer->umem = NULL; } @@ -969,15 +946,14 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; MLX5_SET(create_qp_in, *in, uid, uid); - pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); - if (ubuffer->umem) - mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); - qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); - - MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(qpc, qpc, page_offset, offset); - + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); + if (ubuffer->umem) { + mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0); + MLX5_SET(qpc, qpc, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, page_offset, page_offset_quantized); + } MLX5_SET(qpc, qpc, uar_page, uar_index); if (bfregn != MLX5_IB_INVALID_BFREG) resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn); @@ -1209,18 +1185,24 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, void *wq; int inlen; int err; - int page_shift = 0; - int npages; - int ncont = 0; - u32 offset = 0; - - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, - &sq->ubuffer.umem, &npages, &page_shift, &ncont, - &offset); - if (err) - return err; + unsigned int page_offset_quantized; + unsigned long page_size; + + sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(sq->ubuffer.umem)) + return PTR_ERR(sq->ubuffer.umem); + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } - inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont; + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * + ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1248,11 +1230,12 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size)); - MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(wq, wq, page_offset, offset); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0); err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp); @@ -1278,40 +1261,31 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, ib_umem_release(sq->ubuffer.umem); } -static size_t get_rq_pas_size(void *qpc) -{ - u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12; - u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride); - u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size); - u32 page_offset = MLX5_GET(qpc, qpc, page_offset); - u32 po_quanta = 1 << (log_page_size - 6); - u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride); - u32 page_size = 1 << log_page_size; - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; - - return rq_num_pas * sizeof(u64); -} - static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - size_t qpinlen, struct ib_pd *pd) + struct ib_pd *pd) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; - __be64 *qp_pas; void *in; void *rqc; void *wq; void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); - size_t rq_pas_size = get_rq_pas_size(qpc); + struct ib_umem *umem = rq->base.ubuffer.umem; + unsigned int page_offset_quantized; + unsigned long page_size = 0; size_t inlen; int err; - if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas)) + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, + MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, + &page_offset_quantized); + if (!page_size) return -EINVAL; - inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size; + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1333,16 +1307,16 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING) MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset)); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4); - MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size)); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size)); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas); - memcpy(pas, qp_pas, rq_pas_size); + mlx5_ib_populate_pas(umem, page_size, pas, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp); @@ -1463,7 +1437,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd); + err = create_raw_packet_qp_rq(dev, rq, in, pd); if (err) goto err_destroy_sq; @@ -2436,7 +2410,7 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd, } qp->state = IB_QPS_RESET; - + rdma_restrack_no_track(&qp->ibqp.res); return 0; } @@ -2460,6 +2434,7 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case IB_QPT_GSI: if (dev->profile == &raw_eth_profile) goto out; + fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: @@ -2712,11 +2687,12 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, true, qp); - if (create_flags) + if (create_flags) { mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n", create_flags); - - return (create_flags) ? -EINVAL : 0; + return -EOPNOTSUPP; + } + return 0; } static int process_udata_size(struct mlx5_ib_dev *dev, @@ -3102,7 +3078,7 @@ static int ib_to_mlx5_rate_map(u8 rate) return 5; default: return rate + MLX5_STAT_RATE_OFFSET; - }; + } return 0; } @@ -4247,6 +4223,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int err = -EINVAL; int port; + if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) + return -EOPNOTSUPP; + if (ibqp->rwq_ind_tbl) return -ENOSYS; @@ -4576,7 +4555,9 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path); qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index); @@ -4882,7 +4863,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(rqc, rqc, delay_drop_en, 1); } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); + mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp); if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { err = set_delay_drop(dev); diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 887270dd3ce2..4ac429e72004 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -116,7 +116,7 @@ static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); - return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY, + return fill_res_raw(msg, mr_to_mdev(mr), MLX5_SGMT_TYPE_PRM_QUERY_MKEY, mlx5_mkey_to_idx(mr->mmkey.key)); } diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index e2f720eec1e1..fab6736e4d6a 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -51,10 +51,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata, struct mlx5_ib_ucontext, ibucontext); size_t ucmdlen; int err; - int npages; - int page_shift; - int ncont; - u32 offset; u32 uidx = MLX5_IB_DEFAULT_UIDX; ucmdlen = min(udata->inlen, sizeof(ucmd)); @@ -86,32 +82,14 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, err = PTR_ERR(srq->umem); return err; } - - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, - &page_shift, &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, - &offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - in->pas = kvcalloc(ncont, sizeof(*in->pas), GFP_KERNEL); - if (!in->pas) { - err = -ENOMEM; - goto err_umem; - } - - mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); + in->umem = srq->umem; err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); if (err) { mlx5_ib_dbg(dev, "map doorbell failed\n"); - goto err_in; + goto err_umem; } - in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->page_offset = offset; in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) @@ -119,9 +97,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return 0; -err_in: - kvfree(in->pas); - err_umem: ib_umem_release(srq->umem); @@ -226,6 +201,11 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC && + init_attr->srq_type != IB_SRQT_TM) + return -EOPNOTSUPP; + /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= max_srq_wqes) { mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 2c3627b2509d..a7e3dc5564ac 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -28,6 +28,7 @@ struct mlx5_srq_attr { u32 user_index; u64 db_record; __be64 *pas; + struct ib_umem *umem; u32 tm_log_list_size; u32 tm_next_tag; u32 tm_hw_phase_cnt; diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index db889ec3fd48..8b3385396599 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -92,6 +92,25 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) return srq; } +static int __set_srq_page_size(struct mlx5_srq_attr *in, + unsigned long page_size) +{ + if (!page_size) + return -EINVAL; + in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; + + if (WARN_ON(get_pas_size(in) != + ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64))) + return -EINVAL; + return 0; +} + +#define set_srq_page_size(in, typ, log_pgsz_fld) \ + __set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff( \ + (in)->umem, typ, log_pgsz_fld, \ + MLX5_ADAPTER_PAGE_SHIFT, page_offset, \ + 64, &(in)->page_offset)) + static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { @@ -103,6 +122,12 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -114,7 +139,13 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); set_srqc(srqc, in); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_SRQ); @@ -194,6 +225,12 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, xrc_srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -207,7 +244,13 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, set_srqc(xrc_srqc, in); MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_xrc_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); @@ -289,11 +332,18 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in = NULL; void *rmpc; void *wq; + void *pas; int pas_size; int outlen; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; outlen = MLX5_ST_SZ_BYTES(create_rmp_out); @@ -309,8 +359,16 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); MLX5_SET(create_rmp_in, create_in, uid, in->uid); + pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas); + set_wq(wq, in); - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); @@ -421,10 +479,17 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in; void *xrqc; void *wq; + void *pas; int pas_size; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -433,9 +498,16 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas); set_wq(wq, in); - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); if (in->type == IB_SRQT_TM) { MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 119b2573c9a0..26c3408dcaca 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -604,7 +604,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; default: - entry->opcode = MTHCA_OPCODE_INVALID; + entry->opcode = 0xFF; break; } } else { diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 9dbbf4d16796..a445160de3e1 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -105,7 +105,6 @@ enum { MTHCA_OPCODE_ATOMIC_CS = 0x11, MTHCA_OPCODE_ATOMIC_FA = 0x12, MTHCA_OPCODE_BIND_MW = 0x18, - MTHCA_OPCODE_INVALID = 0xff }; enum { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index c4d9cdc4ee97..1a3dd07f993b 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -470,7 +470,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, int err; if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); switch (init_attr->qp_type) { case IB_QPT_RC: @@ -612,7 +612,7 @@ static int mthca_create_cq(struct ib_cq *ibcq, udata, struct mthca_ucontext, ibucontext); if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return -EINVAL; @@ -961,29 +961,34 @@ static ssize_t hw_rev_show(struct device *device, struct mthca_dev *dev = rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - return sprintf(buf, "%x\n", dev->rev_id); + return sysfs_emit(buf, "%x\n", dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); -static ssize_t hca_type_show(struct device *device, - struct device_attribute *attr, char *buf) +static const char *hca_type_string(int hca_type) { - struct mthca_dev *dev = - rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - - switch (dev->pdev->device) { + switch (hca_type) { case PCI_DEVICE_ID_MELLANOX_TAVOR: - return sprintf(buf, "MT23108\n"); + return "MT23108"; case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT: - return sprintf(buf, "MT25208 (MT23108 compat mode)\n"); + return "MT25208 (MT23108 compat mode)"; case PCI_DEVICE_ID_MELLANOX_ARBEL: - return sprintf(buf, "MT25208\n"); + return "MT25208"; case PCI_DEVICE_ID_MELLANOX_SINAI: case PCI_DEVICE_ID_MELLANOX_SINAI_OLD: - return sprintf(buf, "MT25204\n"); - default: - return sprintf(buf, "unknown\n"); + return "MT25204"; } + + return "unknown"; +} + +static ssize_t hca_type_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mthca_dev *dev = + rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); + + return sysfs_emit(buf, "%s\n", hca_type_string(dev->pdev->device)); } static DEVICE_ATTR_RO(hca_type); @@ -993,7 +998,7 @@ static ssize_t board_id_show(struct device *device, struct mthca_dev *dev = rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); + return sysfs_emit(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -1158,36 +1163,12 @@ int mthca_register_device(struct mthca_dev *dev) if (ret) return ret; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; if (dev->mthca_flags & MTHCA_FLAG_SRQ) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); - if (mthca_is_memfree(dev)) ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_srq_ops); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 08a2a7afafd3..07cfc0934b17 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -863,6 +863,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, new_state; int err = -EINVAL; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + mutex_lock(&qp->mutex); if (attr_mask & IB_QP_CUR_STATE) { cur_state = attr->cur_qp_state; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 9b96661a7143..9a834a9cca0e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -119,7 +119,7 @@ static ssize_t hw_rev_show(struct device *device, struct ocrdma_dev *dev = rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); + return sysfs_emit(buf, "0x%x\n", dev->nic_info.pdev->vendor); } static DEVICE_ATTR_RO(hw_rev); @@ -129,7 +129,7 @@ static ssize_t hca_type_show(struct device *device, struct ocrdma_dev *dev = rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); + return sysfs_emit(buf, "%s\n", &dev->model_number[0]); } static DEVICE_ATTR_RO(hca_type); @@ -154,6 +154,7 @@ static const struct ib_device_ops ocrdma_dev_ops = { .create_ah = ocrdma_create_ah, .create_cq = ocrdma_create_cq, .create_qp = ocrdma_create_qp, + .create_user_ah = ocrdma_create_ah, .dealloc_pd = ocrdma_dealloc_pd, .dealloc_ucontext = ocrdma_dealloc_ucontext, .dereg_mr = ocrdma_dereg_mr, @@ -204,32 +205,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask = - OCRDMA_UVERBS(GET_CONTEXT) | - OCRDMA_UVERBS(QUERY_DEVICE) | - OCRDMA_UVERBS(QUERY_PORT) | - OCRDMA_UVERBS(ALLOC_PD) | - OCRDMA_UVERBS(DEALLOC_PD) | - OCRDMA_UVERBS(REG_MR) | - OCRDMA_UVERBS(DEREG_MR) | - OCRDMA_UVERBS(CREATE_COMP_CHANNEL) | - OCRDMA_UVERBS(CREATE_CQ) | - OCRDMA_UVERBS(RESIZE_CQ) | - OCRDMA_UVERBS(DESTROY_CQ) | - OCRDMA_UVERBS(REQ_NOTIFY_CQ) | - OCRDMA_UVERBS(CREATE_QP) | - OCRDMA_UVERBS(MODIFY_QP) | - OCRDMA_UVERBS(QUERY_QP) | - OCRDMA_UVERBS(DESTROY_QP) | - OCRDMA_UVERBS(POLL_CQ) | - OCRDMA_UVERBS(POST_SEND) | - OCRDMA_UVERBS(POST_RECV); - - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(CREATE_AH) | - OCRDMA_UVERBS(MODIFY_AH) | - OCRDMA_UVERBS(QUERY_AH) | - OCRDMA_UVERBS(DESTROY_AH); dev->ibdev.node_type = RDMA_NODE_IB_CA; dev->ibdev.phys_port_cnt = 1; @@ -240,16 +215,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops); - if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(CREATE_SRQ) | - OCRDMA_UVERBS(MODIFY_SRQ) | - OCRDMA_UVERBS(QUERY_SRQ) | - OCRDMA_UVERBS(DESTROY_SRQ) | - OCRDMA_UVERBS(POST_SRQ_RECV); - + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); - } + rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1); if (ret) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 7350fe16f164..bc98bd950d99 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -974,7 +974,7 @@ int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ocrdma_create_cq_ureq ureq; if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (udata) { if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) @@ -1299,6 +1299,9 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, struct ocrdma_create_qp_ureq ureq; u16 dpp_credit_lmt, dpp_offset; + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + status = ocrdma_check_qp_params(ibpd, dev, attrs, udata); if (status) goto gen_err; @@ -1391,6 +1394,9 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct ocrdma_dev *dev; enum ib_qp_state old_qps, new_qps; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp = get_ocrdma_qp(ibqp); dev = get_ocrdma_dev(ibqp->device); @@ -1770,6 +1776,9 @@ int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq); + if (init_attr->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (init_attr->attr.max_sge > dev->attr.max_recv_sge) return -EINVAL; if (init_attr->attr.max_wr > dev->attr.max_rqe) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 967641662b24..8e7c069e1a2d 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -124,7 +124,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->attr.hw_ver); + return sysfs_emit(buf, "0x%x\n", dev->attr.hw_ver); } static DEVICE_ATTR_RO(hw_rev); @@ -134,10 +134,9 @@ static ssize_t hca_type_show(struct device *device, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "FastLinQ QL%x %s\n", - dev->pdev->device, - rdma_protocol_iwarp(&dev->ibdev, 1) ? - "iWARP" : "RoCE"); + return sysfs_emit(buf, "FastLinQ QL%x %s\n", dev->pdev->device, + rdma_protocol_iwarp(&dev->ibdev, 1) ? "iWARP" : + "RoCE"); } static DEVICE_ATTR_RO(hca_type); @@ -188,10 +187,6 @@ static void qedr_roce_register_device(struct qedr_dev *dev) dev->ibdev.node_type = RDMA_NODE_IB_CA; ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); - - dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(OPEN_XRCD) | - QEDR_UVERBS(CLOSE_XRCD) | - QEDR_UVERBS(CREATE_XSRQ); } static const struct ib_device_ops qedr_dev_ops = { @@ -249,31 +244,6 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.node_guid = dev->attr.node_guid; memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) | - QEDR_UVERBS(QUERY_DEVICE) | - QEDR_UVERBS(QUERY_PORT) | - QEDR_UVERBS(ALLOC_PD) | - QEDR_UVERBS(DEALLOC_PD) | - QEDR_UVERBS(CREATE_COMP_CHANNEL) | - QEDR_UVERBS(CREATE_CQ) | - QEDR_UVERBS(RESIZE_CQ) | - QEDR_UVERBS(DESTROY_CQ) | - QEDR_UVERBS(REQ_NOTIFY_CQ) | - QEDR_UVERBS(CREATE_QP) | - QEDR_UVERBS(MODIFY_QP) | - QEDR_UVERBS(QUERY_QP) | - QEDR_UVERBS(DESTROY_QP) | - QEDR_UVERBS(CREATE_SRQ) | - QEDR_UVERBS(DESTROY_SRQ) | - QEDR_UVERBS(QUERY_SRQ) | - QEDR_UVERBS(MODIFY_SRQ) | - QEDR_UVERBS(POST_SRQ_RECV) | - QEDR_UVERBS(REG_MR) | - QEDR_UVERBS(DEREG_MR) | - QEDR_UVERBS(POLL_CQ) | - QEDR_UVERBS(POST_SEND) | - QEDR_UVERBS(POST_RECV); - if (IS_IWARP(dev)) { rc = qedr_iw_register_device(dev); if (rc) @@ -796,6 +766,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) } xa_unlock_irqrestore(&dev->srqs, flags); DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq); + break; default: break; } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 511c95bb3d01..0eb6a7a618e0 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -928,6 +928,9 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, "create_cq: called from %s. entries=%d, vector=%d\n", udata ? "User Lib" : "Kernel", entries, vector); + if (attr->flags) + return -EOPNOTSUPP; + if (entries > QEDR_MAX_CQES) { DP_ERR(dev, "create cq: the number of entries %d is too high. Must be equal or below %d.\n", @@ -1546,6 +1549,10 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, "create SRQ called from %s (pd %p)\n", (udata) ? "User lib" : "kernel", pd); + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + rc = qedr_check_srq_params(dev, init_attr, udata); if (rc) return -EINVAL; @@ -2241,6 +2248,9 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, struct ib_qp *ibqp; int rc = 0; + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + if (attrs->qp_type == IB_QPT_XRC_TGT) { xrcd = get_qedr_xrcd(attrs->xrcd); dev = get_qedr_dev(xrcd->ibxrcd.device); @@ -2477,6 +2487,9 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask, attr->qp_state); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + old_qp_state = qedr_get_ibqp_state(qp->state); if (attr_mask & IB_QP_STATE) new_qp_state = attr->qp_state; diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 3dc6ce033319..2e07b3749b88 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -90,25 +90,18 @@ int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) goto bail; } - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (ret) { /* * If the 64 bit setup fails, try 32 bit. Some systems * do not setup 64 bit maps on systems with 2GB or less * memory installed. */ - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret); goto bail; } - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } else - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - qib_early_err(&pdev->dev, - "Unable to set DMA consistent mask: %d\n", ret); - goto bail; } pci_set_master(pdev); diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 021df0654ba7..62c179fc764b 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -43,11 +43,8 @@ static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf) { struct qib_devdata *dd = ppd->dd; - int ret; - ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT); - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; + return sysfs_emit(buf, "%d\n", dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT)); } static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, @@ -106,14 +103,10 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf, static ssize_t show_status(struct qib_pportdata *ppd, char *buf) { - ssize_t ret; - if (!ppd->statusp) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", - (unsigned long long) *(ppd->statusp)); - return ret; + return -EINVAL; + + return sysfs_emit(buf, "0x%llx\n", (unsigned long long)*(ppd->statusp)); } /* @@ -392,7 +385,7 @@ static ssize_t sl2vl_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct qib_pportdata, sl2vl_kobj); struct qib_ibport *qibp = &ppd->ibport_data; - return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); + return sysfs_emit(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); } static const struct sysfs_ops qib_sl2vl_ops = { @@ -501,17 +494,18 @@ static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, diagc_kobj); struct qib_ibport *qibp = &ppd->ibport_data; + u64 val; if (!strncmp(dattr->attr.name, "rc_acks", 7)) - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks)); + val = READ_PER_CPU_CNTR(rc_acks); else if (!strncmp(dattr->attr.name, "rc_qacks", 8)) - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks)); + val = READ_PER_CPU_CNTR(rc_qacks); else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15)) - return sprintf(buf, "%llu\n", - READ_PER_CPU_CNTR(rc_delayed_comp)); + val = READ_PER_CPU_CNTR(rc_delayed_comp); else - return sprintf(buf, "%u\n", - *(u32 *)((char *)qibp + dattr->counter)); + val = *(u32 *)((char *)qibp + dattr->counter); + + return sysfs_emit(buf, "%llu\n", val); } static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, @@ -565,7 +559,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); } static DEVICE_ATTR_RO(hw_rev); @@ -575,13 +569,10 @@ static ssize_t hca_type_show(struct device *device, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); - int ret; if (!dd->boardname) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); - return ret; + return -EINVAL; + return sysfs_emit(buf, "%s\n", dd->boardname); } static DEVICE_ATTR_RO(hca_type); static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL); @@ -590,7 +581,7 @@ static ssize_t version_show(struct device *device, struct device_attribute *attr, char *buf) { /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version); + return sysfs_emit(buf, "%s", (char *)ib_qib_version); } static DEVICE_ATTR_RO(version); @@ -602,7 +593,7 @@ static ssize_t boardversion_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); + return sysfs_emit(buf, "%s", dd->boardversion); } static DEVICE_ATTR_RO(boardversion); @@ -614,7 +605,7 @@ static ssize_t localbus_info_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info); + return sysfs_emit(buf, "%s", dd->lbus_info); } static DEVICE_ATTR_RO(localbus_info); @@ -628,9 +619,10 @@ static ssize_t nctxts_show(struct device *device, /* Return the number of user ports (contexts) available. */ /* The calculation below deals with a special case where * cfgctxts is set to 1 on a single-port board. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", - (dd->first_user_ctxt > dd->cfgctxts) ? 0 : - (dd->cfgctxts - dd->first_user_ctxt)); + return sysfs_emit(buf, "%u\n", + (dd->first_user_ctxt > dd->cfgctxts) ? + 0 : + (dd->cfgctxts - dd->first_user_ctxt)); } static DEVICE_ATTR_RO(nctxts); @@ -642,21 +634,20 @@ static ssize_t nfreectxts_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); + return sysfs_emit(buf, "%u\n", dd->freectxts); } static DEVICE_ATTR_RO(nfreectxts); -static ssize_t serial_show(struct device *device, - struct device_attribute *attr, char *buf) +static ssize_t serial_show(struct device *device, struct device_attribute *attr, + char *buf) { struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); + const u8 *end = memchr(dd->serial, 0, ARRAY_SIZE(dd->serial)); + int size = end ? end - dd->serial : ARRAY_SIZE(dd->serial); - buf[sizeof(dd->serial)] = '\0'; - memcpy(buf, dd->serial, sizeof(dd->serial)); - strcat(buf, "\n"); - return strlen(buf); + return sysfs_emit(buf, ".%*s\n", size, dd->serial); } static DEVICE_ATTR_RO(serial); @@ -689,27 +680,26 @@ static ssize_t tempsense_show(struct device *device, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); - int ret; - int idx; + int i; u8 regvals[8]; - ret = -ENXIO; - for (idx = 0; idx < 8; ++idx) { - if (idx == 6) + for (i = 0; i < 8; i++) { + int ret; + + if (i == 6) continue; - ret = dd->f_tempsense_rd(dd, idx); + ret = dd->f_tempsense_rd(dd, i); if (ret < 0) - break; - regvals[idx] = ret; + return ret; /* return error on bad read */ + regvals[i] = ret; } - if (idx == 8) - ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n", - *(signed char *)(regvals), - *(signed char *)(regvals + 1), - regvals[2], regvals[3], - *(signed char *)(regvals + 5), - *(signed char *)(regvals + 7)); - return ret; + return sysfs_emit(buf, "%d %d %02X %02X %d %d\n", + (signed char)regvals[0], + (signed char)regvals[1], + regvals[2], + regvals[3], + (signed char)regvals[5], + (signed char)regvals[7]); } static DEVICE_ATTR_RO(tempsense); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index aa2e65fc5cd6..1b63a491fa72 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -398,25 +398,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; us_ibdev->ib_dev.dev.parent = &dev->dev; - us_ibdev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops); rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index c85d48ae7442..e59615a4c9d9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -57,7 +57,7 @@ static ssize_t board_id_show(struct device *device, subsystem_device_id = us_ibdev->pdev->subsystem_device; mutex_unlock(&us_ibdev->usdev_lock); - return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id); + return sysfs_emit(buf, "%u\n", subsystem_device_id); } static DEVICE_ATTR_RO(board_id); @@ -69,19 +69,13 @@ config_show(struct device *device, struct device_attribute *attr, char *buf) { struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - char *ptr; - unsigned left; - unsigned n; enum usnic_vnic_res_type res_type; - - /* Buffer space limit is 1 page */ - ptr = buf; - left = PAGE_SIZE; + int len; mutex_lock(&us_ibdev->usdev_lock); if (kref_read(&us_ibdev->vf_cnt) > 0) { char *busname; - + char *sep = ""; /* * bus name seems to come with annoying prefix. * Remove it if it is predictable @@ -90,39 +84,35 @@ config_show(struct device *device, struct device_attribute *attr, char *buf) if (strncmp(busname, "PCI Bus ", 8) == 0) busname += 8; - n = scnprintf(ptr, left, - "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:", - dev_name(&us_ibdev->ib_dev.dev), - busname, - PCI_SLOT(us_ibdev->pdev->devfn), - PCI_FUNC(us_ibdev->pdev->devfn), - netdev_name(us_ibdev->netdev), - us_ibdev->ufdev->mac, - kref_read(&us_ibdev->vf_cnt)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "%s: %s:%d.%d, %s, %pM, %u VFs\n", + dev_name(&us_ibdev->ib_dev.dev), + busname, + PCI_SLOT(us_ibdev->pdev->devfn), + PCI_FUNC(us_ibdev->pdev->devfn), + netdev_name(us_ibdev->netdev), + us_ibdev->ufdev->mac, + kref_read(&us_ibdev->vf_cnt)); + len += sysfs_emit_at(buf, len, " Per VF:"); for (res_type = USNIC_VNIC_RES_TYPE_EOL; - res_type < USNIC_VNIC_RES_TYPE_MAX; - res_type++) { + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { if (us_ibdev->vf_res_cnt[res_type] == 0) continue; - n = scnprintf(ptr, left, " %d %s%s", - us_ibdev->vf_res_cnt[res_type], - usnic_vnic_res_type_to_str(res_type), - (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ? - "," : ""); - UPDATE_PTR_LEFT(n, ptr, left); + len += sysfs_emit_at(buf, len, "%s %d %s", + sep, + us_ibdev->vf_res_cnt[res_type], + usnic_vnic_res_type_to_str(res_type)); + sep = ","; } - n = scnprintf(ptr, left, "\n"); - UPDATE_PTR_LEFT(n, ptr, left); + len += sysfs_emit_at(buf, len, "\n"); } else { - n = scnprintf(ptr, left, "%s: no VFs\n", - dev_name(&us_ibdev->ib_dev.dev)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "%s: no VFs\n", + dev_name(&us_ibdev->ib_dev.dev)); } + mutex_unlock(&us_ibdev->usdev_lock); - return ptr - buf; + return len; } static DEVICE_ATTR_RO(config); @@ -132,8 +122,7 @@ iface_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", - netdev_name(us_ibdev->netdev)); + return sysfs_emit(buf, "%s\n", netdev_name(us_ibdev->netdev)); } static DEVICE_ATTR_RO(iface); @@ -143,8 +132,7 @@ max_vf_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", - kref_read(&us_ibdev->vf_cnt)); + return sysfs_emit(buf, "%u\n", kref_read(&us_ibdev->vf_cnt)); } static DEVICE_ATTR_RO(max_vf); @@ -158,8 +146,7 @@ qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); - return scnprintf(buf, PAGE_SIZE, - "%d\n", qp_per_vf); + return sysfs_emit(buf, "%d\n", qp_per_vf); } static DEVICE_ATTR_RO(qp_per_vf); @@ -169,8 +156,8 @@ cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%d\n", - us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); + return sysfs_emit(buf, "%d\n", + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); } static DEVICE_ATTR_RO(cq_per_vf); @@ -217,43 +204,36 @@ struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME) static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf) { - return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx); + return sysfs_emit(buf, "0x%p\n", qp_grp->ctx); } static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) { - int i, j, n; - int left; - char *ptr; + int i, j; struct usnic_vnic_res_chunk *res_chunk; struct usnic_vnic_res *vnic_res; + int len; - left = PAGE_SIZE; - ptr = buf; - - n = scnprintf(ptr, left, - "QPN: %d State: (%s) PID: %u VF Idx: %hu ", - qp_grp->ibqp.qp_num, - usnic_ib_qp_grp_state_to_string(qp_grp->state), - qp_grp->owner_pid, - usnic_vnic_get_index(qp_grp->vf->vnic)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "QPN: %d State: (%s) PID: %u VF Idx: %hu ", + qp_grp->ibqp.qp_num, + usnic_ib_qp_grp_state_to_string(qp_grp->state), + qp_grp->owner_pid, + usnic_vnic_get_index(qp_grp->vf->vnic)); for (i = 0; qp_grp->res_chunk_list[i]; i++) { res_chunk = qp_grp->res_chunk_list[i]; for (j = 0; j < res_chunk->cnt; j++) { vnic_res = res_chunk->res[j]; - n = scnprintf(ptr, left, "%s[%d] ", + len += sysfs_emit_at( + buf, len, "%s[%d] ", usnic_vnic_res_type_to_str(vnic_res->type), vnic_res->vnic_idx); - UPDATE_PTR_LEFT(n, ptr, left); } } - n = scnprintf(ptr, left, "\n"); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit_at(buf, len, "\n"); - return ptr - buf; + return len; } static QPN_ATTR_RO(context); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 9e961f8ffa10..38a37770c016 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -474,7 +474,7 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, us_ibdev = to_usdev(pd->device); if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); if (err) { @@ -557,6 +557,9 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int status; usnic_dbg("\n"); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp_grp = to_uqp_grp(ibqp); mutex_lock(&qp_grp->vf->pf->usdev_lock); @@ -581,7 +584,7 @@ int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; return 0; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 319546a39a0d..a119ac3e103c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -119,6 +119,9 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); + if (attr->flags) + return -EOPNOTSUPP; + entries = roundup_pow_of_two(entries); if (entries < 1 || entries > dev->dsr->caps.max_cqe) return -EINVAL; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 6895bac53990..00a330909bb3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -68,21 +68,21 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); + return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); } static DEVICE_ATTR_RO(hca_type); static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", PVRDMA_REV_ID); + return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID); } static DEVICE_ATTR_RO(hw_rev); static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); + return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID); } static DEVICE_ATTR_RO(board_id); @@ -205,27 +205,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->flags = 0; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; @@ -249,13 +228,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) /* Check if SRQ is supported by backend */ if (dev->dsr->caps.max_srq) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 428256c55065..1d3bdd7bb51d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -209,7 +209,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, dev_warn(&dev->pdev->dev, "invalid create queuepair flags %#x\n", init_attr->create_flags); - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } if (init_attr->qp_type != IB_QPT_RC && @@ -544,6 +544,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_state cur_state, next_state; int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* Sanity checking. Should need lock here */ mutex_lock(&qp->mutex); cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state : diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 082208f9aa90..bdc2703532c6 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -121,7 +121,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, dev_warn(&dev->pdev->dev, "shared receive queue type %d not supported\n", init_attr->srq_type); - return -EINVAL; + return -EOPNOTSUPP; } if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr || diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig index c8e268082952..0df48b3a6b56 100644 --- a/drivers/infiniband/sw/rdmavt/Kconfig +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -4,6 +4,5 @@ config INFINIBAND_RDMAVT depends on INFINIBAND_VIRT_DMA depends on X86_64 depends on PCI - select DMA_VIRT_OPS help This is a common software verbs provider for RDMA networks. diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index b938c4ffa99a..a3e5b368c5e7 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -126,10 +126,9 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, } /** - * rvt_destory_ah - Destory an address handle + * rvt_destroy_ah - Destroy an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) - * * Return: 0 on success */ int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 19248be14093..20cc0799ac4b 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -211,7 +211,7 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, int err; if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (entries < 1 || entries > rdi->dparms.props.max_cqe) return -EINVAL; diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index dd11c6fcd060..5233a63d99a6 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -54,7 +54,7 @@ #include "mcast.h" /** - * rvt_driver_mcast - init resources for multicast + * rvt_driver_mcast_init - init resources for multicast * @rdi: rvt dev struct * * This is per device that registers with rdmavt @@ -69,7 +69,7 @@ void rvt_driver_mcast_init(struct rvt_dev_info *rdi) } /** - * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct + * rvt_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct * @qp: the QP to link */ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) @@ -98,7 +98,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) } /** - * mcast_alloc - allocate the multicast GID structure + * rvt_mcast_alloc - allocate the multicast GID structure * @mgid: the multicast GID * @lid: the muilticast LID (host order) * @@ -181,7 +181,7 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, EXPORT_SYMBOL(rvt_mcast_find); /** - * mcast_add - insert mcast GID into table and attach QP struct + * rvt_mcast_add - insert mcast GID into table and attach QP struct * @mcast: the mcast GID table * @mqp: the QP to attach * @@ -426,8 +426,8 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } /** - *rvt_mast_tree_empty - determine if any qps are attached to any mcast group - *@rdi: rvt dev struct + * rvt_mcast_tree_empty - determine if any qps are attached to any mcast group + * @rdi: rvt dev struct * * Return: in use count */ diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8490fdb9c91e..90fc234f489a 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -324,8 +324,6 @@ static void __rvt_free_mr(struct rvt_mr *mr) * @acc: access flags * * Return: the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the functions in - * struct dma_virt_ops. */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) { @@ -766,7 +764,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, /* * We use LKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ if (sge->lkey == 0) { struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); @@ -877,7 +875,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, /* * We use RKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ rcu_read_lock(); if (rkey == 0) { diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ee48befc8978..22fa9bde5419 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1083,10 +1083,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (!rdi) return ERR_PTR(-EINVAL); + if (init_attr->create_flags & ~IB_QP_CREATE_NETDEV_USE) + return ERR_PTR(-EOPNOTSUPP); + if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || - init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - (init_attr->create_flags && - init_attr->create_flags != IB_QP_CREATE_NETDEV_USE)) + init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr) return ERR_PTR(-EINVAL); /* Check receive queue parameters if no SRQ is specified. */ @@ -1469,6 +1470,9 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int pmtu = 0; /* for gcc warning only */ int opa_ah; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); @@ -1823,7 +1827,7 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } /** - * rvt_post_receive - post a receive on a QP + * rvt_post_recv - post a receive on a QP * @ibqp: the QP to post the receive on * @wr: the WR to post * @bad_wr: the first bad WR is put here @@ -2245,7 +2249,7 @@ bail: } /** - * rvt_post_srq_receive - post a receive on a shared receive queue + * rvt_post_srq_recv - post a receive on a shared receive queue * @ibsrq: the SRQ to post the receive on * @wr: the list of work requests to post * @bad_wr: A pointer to the first WR to cause a problem is put here @@ -2497,7 +2501,7 @@ bail: EXPORT_SYMBOL(rvt_get_rwqe); /** - * qp_comm_est - handle trap with QP established + * rvt_comm_est - handle trap with QP established * @qp: the QP */ void rvt_comm_est(struct rvt_qp *qp) @@ -2943,7 +2947,7 @@ static enum ib_wc_status loopback_qp_drop(struct rvt_ibport *rvp, } /** - * ruc_loopback - handle UC and RC loopback requests + * rvt_ruc_loopback - handle UC and RC loopback requests * @sqp: the sending QP * * This is called from rvt_do_send() to forward a WQE addressed to the same HFI diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 670a9623b46e..49cec85a372a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -384,6 +384,7 @@ static const struct ib_device_ops rvt_dev_ops = { .create_cq = rvt_create_cq, .create_qp = rvt_create_qp, .create_srq = rvt_create_srq, + .create_user_ah = rvt_create_ah, .dealloc_pd = rvt_dealloc_pd, .dealloc_ucontext = rvt_dealloc_ucontext, .dereg_mr = rvt_dereg_mr, @@ -524,7 +525,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) int rvt_register_device(struct rvt_dev_info *rdi) { int ret = 0, i; - u64 dma_mask; if (!rdi) return -EINVAL; @@ -579,13 +579,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) /* Completion queues */ spin_lock_init(&rdi->n_cqs_lock); - /* DMA Operations */ - rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms; - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask); - if (ret) - goto bail_wss; - /* Protection Domain */ spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; @@ -596,36 +589,11 @@ int rvt_register_device(struct rvt_dev_info *rdi) * exactly which functions rdmavt supports, nor do they know the ABI * version, so we do all of this sort of stuff here. */ - rdi->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + rdi->ibdev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); rdi->ibdev.node_type = RDMA_NODE_IB_CA; if (!rdi->ibdev.num_comp_vectors) diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 8810bfa68049..452149066792 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -5,7 +5,6 @@ config RDMA_RXE depends on INFINIBAND_VIRT_DMA select NET_UDP_TUNNEL select CRYPTO_CRC32 - select DMA_VIRT_OPS help This driver implements the InfiniBand RDMA transport over the Linux network stack. It enables a system with a diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 43394c3f29d4..b315ebf041ac 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -123,11 +123,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); - /* make sure all changes to the CQ are written before we update the - * producer pointer - */ - smp_wmb(); - advance_producer(cq->queue); spin_unlock_irqrestore(&cq->cq_lock, flags); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index d2ce852447c1..6e8c41567ba0 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -31,7 +31,6 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) return 0; case RXE_MEM_TYPE_MR: - case RXE_MEM_TYPE_FMR: if (iova < mem->iova || length > mem->length || iova > mem->iova + mem->length - length) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 34bef7d8e6b4..c4b06ced30a7 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -8,7 +8,6 @@ #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/if.h> -#include <linux/if_vlan.h> #include <net/udp_tunnel.h> #include <net/sch_generic.h> #include <linux/netfilter.h> @@ -20,18 +19,6 @@ static struct rxe_recv_sockets recv_sockets; -struct device *rxe_dma_device(struct rxe_dev *rxe) -{ - struct net_device *ndev; - - ndev = rxe->ndev; - - if (is_vlan_dev(ndev)) - ndev = vlan_dev_real_dev(ndev); - - return ndev->dev.parent; -} - int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) { int err; @@ -166,14 +153,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct udphdr *udph; struct net_device *ndev = skb->dev; - struct net_device *rdev = ndev; struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - if (!rxe && is_vlan_dev(rdev)) { - rdev = vlan_dev_real_dev(ndev); - rxe = rxe_get_dev_from_net(rdev); - } if (!rxe) goto drop; diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 7d434a6837a7..2902ca7b288c 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -7,9 +7,11 @@ #ifndef RXE_QUEUE_H #define RXE_QUEUE_H +/* for definition of shared struct rxe_queue_buf */ +#include <uapi/rdma/rdma_user_rxe.h> + /* implements a simple circular buffer that can optionally be * shared between user space and the kernel and can be resized - * the requested element size is rounded up to a power of 2 * and the number of elements in the buffer is also rounded * up to a power of 2. Since the queue is empty when the @@ -17,28 +19,6 @@ * of the queue is one less than the number of element slots */ -/* this data structure is shared between user space and kernel - * space for those cases where the queue is shared. It contains - * the producer and consumer indices. Is also contains a copy - * of the queue size parameters for user space to use but the - * kernel must use the parameters in the rxe_queue struct - * this MUST MATCH the corresponding librxe struct - * for performance reasons arrange to have producer and consumer - * pointers in separate cache lines - * the kernel should always mask the indices to avoid accessing - * memory outside of the data area - */ -struct rxe_queue_buf { - __u32 log2_elem_size; - __u32 index_mask; - __u32 pad_1[30]; - __u32 producer_index; - __u32 pad_2[31]; - __u32 consumer_index; - __u32 pad_3[31]; - __u8 data[]; -}; - struct rxe_queue { struct rxe_dev *rxe; struct rxe_queue_buf *buf; @@ -46,7 +26,7 @@ struct rxe_queue { size_t buf_size; size_t elem_size; unsigned int log2_elem_size; - unsigned int index_mask; + u32 index_mask; }; int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, @@ -76,26 +56,56 @@ static inline int next_index(struct rxe_queue *q, int index) static inline int queue_empty(struct rxe_queue *q) { - return ((q->buf->producer_index - q->buf->consumer_index) - & q->index_mask) == 0; + u32 prod; + u32 cons; + + /* make sure all changes to queue complete before + * testing queue empty + */ + prod = smp_load_acquire(&q->buf->producer_index); + /* same */ + cons = smp_load_acquire(&q->buf->consumer_index); + + return ((prod - cons) & q->index_mask) == 0; } static inline int queue_full(struct rxe_queue *q) { - return ((q->buf->producer_index + 1 - q->buf->consumer_index) - & q->index_mask) == 0; + u32 prod; + u32 cons; + + /* make sure all changes to queue complete before + * testing queue full + */ + prod = smp_load_acquire(&q->buf->producer_index); + /* same */ + cons = smp_load_acquire(&q->buf->consumer_index); + + return ((prod + 1 - cons) & q->index_mask) == 0; } static inline void advance_producer(struct rxe_queue *q) { - q->buf->producer_index = (q->buf->producer_index + 1) - & q->index_mask; + u32 prod; + + prod = (q->buf->producer_index + 1) & q->index_mask; + + /* make sure all changes to queue complete before + * changing producer index + */ + smp_store_release(&q->buf->producer_index, prod); } static inline void advance_consumer(struct rxe_queue *q) { - q->buf->consumer_index = (q->buf->consumer_index + 1) - & q->index_mask; + u32 cons; + + cons = (q->buf->consumer_index + 1) & q->index_mask; + + /* make sure all changes to queue complete before + * changing consumer index + */ + smp_store_release(&q->buf->consumer_index, cons); } static inline void *producer_addr(struct rxe_queue *q) @@ -112,12 +122,28 @@ static inline void *consumer_addr(struct rxe_queue *q) static inline unsigned int producer_index(struct rxe_queue *q) { - return q->buf->producer_index; + u32 index; + + /* make sure all changes to queue + * complete before getting producer index + */ + index = smp_load_acquire(&q->buf->producer_index); + index &= q->index_mask; + + return index; } static inline unsigned int consumer_index(struct rxe_queue *q) { - return q->buf->consumer_index; + u32 index; + + /* make sure all changes to queue + * complete before getting consumer index + */ + index = smp_load_acquire(&q->buf->consumer_index); + index &= q->index_mask; + + return index; } static inline void *addr_from_index(struct rxe_queue *q, unsigned int index) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index af3923bf0a36..d4917646641a 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -634,7 +634,8 @@ next_wqe: } if (unlikely(qp_type(qp) == IB_QPT_RC && - qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) { + psn_compare(qp->req.psn, (qp->comp.psn + + RXE_MAX_UNACKED_PSNS)) > 0)) { qp->req.wait_psn = 1; goto exit; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c7e3b6a4af38..5a098083a9d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -872,11 +872,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, else wc->network_hdr_type = RDMA_NETWORK_IPV6; - if (is_vlan_dev(skb->dev)) { - wc->wc_flags |= IB_WC_WITH_VLAN; - wc->vlan_id = vlan_dev_vlan_id(skb->dev); - } - if (pkt->mask & RXE_IMMDT_MASK) { wc->wc_flags |= IB_WC_WITH_IMM; wc->ex.imm_data = immdt_imm(pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index f9c832e82552..a031514e2f41 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -244,11 +244,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) recv_wqe->dma.cur_sge = 0; recv_wqe->dma.sge_offset = 0; - /* make sure all changes to the work queue are written before we - * update the producer pointer - */ - smp_wmb(); - advance_producer(rq->queue); return 0; @@ -265,6 +260,9 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_create_srq_resp __user *uresp = NULL; + if (init->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (udata) { if (udata->outlen < sizeof(*uresp)) return -EINVAL; @@ -392,6 +390,9 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, uresp = udata->outbuf; } + if (init->create_flags) + return ERR_PTR(-EOPNOTSUPP); + err = rxe_qp_chk_init(rxe, init); if (err) goto err1; @@ -433,6 +434,9 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct rxe_dev *rxe = to_rdev(ibqp->device); struct rxe_qp *qp = to_rqp(ibqp); + if (mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + err = rxe_qp_chk_attr(rxe, qp, attr, mask); if (err) goto err1; @@ -624,12 +628,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, if (unlikely(err)) goto err1; - /* - * make sure all changes to the work queue are - * written before we update the producer pointer - */ - smp_wmb(); - advance_producer(sq->queue); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); @@ -765,7 +763,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector); if (err) @@ -1033,7 +1031,7 @@ static ssize_t parent_show(struct device *device, struct rxe_dev *rxe = rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", rxe_parent_name(rxe, 1)); + return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1)); } static DEVICE_ATTR_RO(parent); @@ -1070,6 +1068,7 @@ static const struct ib_device_ops rxe_dev_ops = { .create_cq = rxe_create_cq, .create_qp = rxe_create_qp, .create_srq = rxe_create_srq, + .create_user_ah = rxe_create_ah, .dealloc_driver = rxe_dealloc, .dealloc_pd = rxe_dealloc_pd, .dealloc_ucontext = rxe_dealloc_ucontext, @@ -1118,56 +1117,18 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) int err; struct ib_device *dev = &rxe->ib_dev; struct crypto_shash *tfm; - u64 dma_mask; strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->num_comp_vectors = num_possible_cpus(); - dev->dev.parent = rxe_dma_device(rxe); dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); - dev->dev.dma_parms = &rxe->dma_parms; - dma_set_max_seg_size(&dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - err = dma_coerce_mask_and_coherent(&dev->dev, dma_mask); - if (err) - return err; - dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) - | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) - | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) - | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) - | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) - | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) - ; + dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | + BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); ib_set_device_ops(dev, &rxe_dev_ops); err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 3414b341b709..79e0a5a878da 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -273,7 +273,6 @@ enum rxe_mem_type { RXE_MEM_TYPE_NONE, RXE_MEM_TYPE_DMA, RXE_MEM_TYPE_MR, - RXE_MEM_TYPE_FMR, RXE_MEM_TYPE_MW, }; @@ -352,7 +351,6 @@ struct rxe_port { struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; - struct device_dma_parameters dma_parms; int max_ucontext; int max_inline_data; struct mutex usdev_lock; diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index 3450ba5081df..1b5105cbabae 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -2,7 +2,6 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" depends on INET && INFINIBAND && LIBCRC32C depends on INFINIBAND_VIRT_DMA - select DMA_VIRT_OPS help This driver implements the iWARP RDMA transport over the Linux TCP/IP network stack. It enables a system with a diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index e9753831ac3f..adda78996219 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -69,7 +69,6 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; - struct device_dma_parameters dma_parms; struct net_device *netdev; struct siw_dev_cap attrs; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 66764f7ef072..1f9e15b71504 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1047,7 +1047,7 @@ static void siw_cm_work_handler(struct work_struct *w) cep->state); } } - if (rv && rv != EAGAIN) + if (rv && rv != -EAGAIN) release_cep = 1; break; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 181e06c1c43d..ee95cf29179d 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -305,25 +305,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) { struct siw_device *sdev = NULL; struct ib_device *base_dev; - struct device *parent = netdev->dev.parent; - u64 dma_mask; int rv; - if (!parent) { - /* - * The loopback device has no parent device, - * so it appears as a top-level device. To support - * loopback device connectivity, take this device - * as the parent device. Skip all other devices - * w/o parent device. - */ - if (netdev->type != ARPHRD_LOOPBACK) { - pr_warn("siw: device %s error: no parent device\n", - netdev->name); - return NULL; - } - parent = &netdev->dev; - } sdev = ib_alloc_device(siw_device, base_dev); if (!sdev) return NULL; @@ -347,30 +330,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, addr); } - base_dev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + base_dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); base_dev->node_type = RDMA_NODE_RNIC; memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON, @@ -382,13 +343,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) * per physical port. */ base_dev->phys_port_cnt = 1; - base_dev->dev.parent = parent; - base_dev->dev.dma_parms = &sdev->dma_parms; - dma_set_max_seg_size(&base_dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - if (dma_coerce_mask_and_coherent(&base_dev->dev, dma_mask)) - goto error; - base_dev->num_comp_vectors = num_possible_cpus(); xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); @@ -430,7 +384,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) atomic_set(&sdev->num_mr, 0); atomic_set(&sdev->num_pd, 0); - sdev->numa_node = dev_to_node(parent); + sdev->numa_node = dev_to_node(&netdev->dev); spin_lock_init(&sdev->lock); return sdev; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 7cf3242ffb41..68fd053fc774 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -307,6 +307,9 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, siw_dbg(base_dev, "create new QP\n"); + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { siw_dbg(base_dev, "too many QP's\n"); rv = -ENOMEM; @@ -544,6 +547,9 @@ int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr, if (!attr_mask) return 0; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + memset(&new_attrs, 0, sizeof(new_attrs)); if (attr_mask & IB_QP_ACCESS_FLAGS) { @@ -1094,6 +1100,9 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct siw_cq *cq = to_siw_cq(base_cq); int rv, size = attr->cqe; + if (attr->flags) + return -EOPNOTSUPP; + if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) { siw_dbg(base_cq->device, "too many CQ's\n"); rv = -ENOMEM; @@ -1555,6 +1564,9 @@ int siw_create_srq(struct ib_srq *base_srq, base_ucontext); int rv; + if (init_attrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) { siw_dbg_pd(base_srq->pd, "too many SRQ's\n"); rv = -ENOMEM; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 8f0b598a46ec..d5d592bdab35 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1514,9 +1514,9 @@ static ssize_t show_mode(struct device *d, struct device_attribute *attr, struct ipoib_dev_priv *priv = ipoib_priv(dev); if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) - return sprintf(buf, "connected\n"); + return sysfs_emit(buf, "connected\n"); else - return sprintf(buf, "datagram\n"); + return sysfs_emit(buf, "datagram\n"); } static ssize_t set_mode(struct device *d, struct device_attribute *attr, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 67a21fdf5367..823f6831e7ea 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -166,6 +166,10 @@ static inline int ib_speed_enum_to_int(int speed) return SPEED_14000; case IB_SPEED_EDR: return SPEED_25000; + case IB_SPEED_HDR: + return SPEED_50000; + case IB_SPEED_NDR: + return SPEED_100000; } return SPEED_UNKNOWN; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index abfab89423f4..a6f413491321 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2266,7 +2266,7 @@ static ssize_t show_pkey(struct device *dev, struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); - return sprintf(buf, "0x%04x\n", priv->pkey); + return sysfs_emit(buf, "0x%04x\n", priv->pkey); } static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); @@ -2276,7 +2276,8 @@ static ssize_t show_umcast(struct device *dev, struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); - return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); + return sysfs_emit(buf, "%d\n", + test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); } void ipoib_set_umcast(struct net_device *ndev, int umcast_val) @@ -2446,7 +2447,7 @@ static ssize_t dev_id_show(struct device *dev, "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n", current->comm); - return sprintf(buf, "%#x\n", ndev->dev_id); + return sysfs_emit(buf, "%#x\n", ndev->dev_id); } static DEVICE_ATTR_RO(dev_id); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 587252fd6f57..5a150a080ac2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -158,6 +158,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) int ret, size, req_vec; int i; + static atomic_t counter; size = ipoib_recvq_size + 1; ret = ipoib_cm_dev_init(dev); @@ -171,8 +172,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (ret != -EOPNOTSUPP) return ret; - req_vec = (priv->port - 1) * 2; - + req_vec = atomic_inc_return(&counter) * 2; cq_attr.cqe = size; cq_attr.comp_vector = req_vec % priv->ca->num_comp_vectors; priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 4c50a87ed7cc..5958840dbeed 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -46,7 +46,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, struct net_device *dev = to_net_dev(d); struct ipoib_dev_priv *priv = ipoib_priv(dev); - return sprintf(buf, "%s\n", priv->parent->name); + return sysfs_emit(buf, "%s\n", priv->parent->name); } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 3690e28cc7ea..4792b9bf400f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -187,23 +187,14 @@ iser_initialize_task_headers(struct iscsi_task *task, struct iser_device *device = iser_conn->ib_conn.device; struct iscsi_iser_task *iser_task = task->dd_data; u64 dma_addr; - const bool mgmt_task = !task->sc && !in_interrupt(); - int ret = 0; - if (unlikely(mgmt_task)) - mutex_lock(&iser_conn->state_mutex); - - if (unlikely(iser_conn->state != ISER_CONN_UP)) { - ret = -ENODEV; - goto out; - } + if (unlikely(iser_conn->state != ISER_CONN_UP)) + return -ENODEV; dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, ISER_HEADERS_LEN, DMA_TO_DEVICE); - if (ib_dma_mapping_error(device->ib_device, dma_addr)) { - ret = -ENOMEM; - goto out; - } + if (ib_dma_mapping_error(device->ib_device, dma_addr)) + return -ENOMEM; tx_desc->inv_wr.next = NULL; tx_desc->reg_wr.wr.next = NULL; @@ -214,11 +205,8 @@ iser_initialize_task_headers(struct iscsi_task *task, tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey; iser_task->iser_conn = iser_conn; -out: - if (unlikely(mgmt_task)) - mutex_unlock(&iser_conn->state_mutex); - return ret; + return 0; } /** @@ -739,7 +727,7 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, } /** - * iscsi_iser_set_param() - set class connection parameter + * iscsi_iser_conn_get_stats() - get iscsi connection statistics * @cls_conn: iscsi class connection * @stats: iscsi stats to output * diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 67f65dcb15a6..2ba27221ea85 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -28,6 +28,18 @@ static int isert_debug_level; module_param_named(debug_level, isert_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)"); +static int isert_sg_tablesize_set(const char *val, + const struct kernel_param *kp); +static const struct kernel_param_ops sg_tablesize_ops = { + .set = isert_sg_tablesize_set, + .get = param_get_int, +}; + +static int isert_sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE; +module_param_cb(sg_tablesize, &sg_tablesize_ops, &isert_sg_tablesize, 0644); +MODULE_PARM_DESC(sg_tablesize, + "Number of gather/scatter entries in a single scsi command, should >= 128 (default: 256, max: 4096)"); + static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); static struct workqueue_struct *isert_comp_wq; @@ -47,6 +59,19 @@ static void isert_send_done(struct ib_cq *cq, struct ib_wc *wc); static void isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void isert_login_send_done(struct ib_cq *cq, struct ib_wc *wc); +static int isert_sg_tablesize_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (ret != 0 || n < ISCSI_ISER_MIN_SG_TABLESIZE || + n > ISCSI_ISER_MAX_SG_TABLESIZE) + return -EINVAL; + + return param_set_int(val, kp); +} + + static inline bool isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) { @@ -101,7 +126,7 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; factor = rdma_rw_mr_factor(device->ib_device, cma_id->port_num, - ISCSI_ISER_MAX_SG_TABLESIZE); + isert_sg_tablesize); attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX * factor; attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; attr.cap.max_recv_sge = 1; @@ -1076,7 +1101,7 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, sequence_cmd: rc = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); - if (!rc && dump_payload == false && unsol_data) + if (!rc && !dump_payload && unsol_data) iscsit_set_unsolicited_dataout(cmd); else if (dump_payload && imm_data) target_put_sess_cmd(&cmd->se_cmd); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 7fee4a65e181..6c5af13db4e0 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -65,6 +65,12 @@ */ #define ISER_RX_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 1024) +/* Default I/O size is 1MB */ +#define ISCSI_ISER_DEF_SG_TABLESIZE 256 + +/* Minimum I/O size is 512KB */ +#define ISCSI_ISER_MIN_SG_TABLESIZE 128 + /* Maximum support is 16MB I/O size */ #define ISCSI_ISER_MAX_SG_TABLESIZE 4096 diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h index f64519872297..012fc27c5c93 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -437,7 +437,7 @@ struct opa_veswport_trap { } __packed; /** - * struct opa_vnic_iface_macs_entry - single entry in the mac list + * struct opa_vnic_iface_mac_entry - single entry in the mac list * @mac_addr: MAC address */ struct opa_vnic_iface_mac_entry { diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c index 868b5aec1537..292c037aa239 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c @@ -74,7 +74,7 @@ void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event) } /** - * opa_vnic_get_error_counters - get summary counters + * opa_vnic_get_summary_counters - get summary counters * @adapter: vnic port adapter * @cntrs: pointer to destination summary counters structure * diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index ac4c49cbf153..ba00f0de14ca 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -52,7 +52,8 @@ static ssize_t max_reconnect_attempts_show(struct device *dev, { struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); - return sprintf(page, "%d\n", rtrs_clt_get_max_reconnect_attempts(clt)); + return sysfs_emit(page, "%d\n", + rtrs_clt_get_max_reconnect_attempts(clt)); } static ssize_t max_reconnect_attempts_store(struct device *dev, @@ -95,11 +96,13 @@ static ssize_t mpath_policy_show(struct device *dev, switch (clt->mp_policy) { case MP_POLICY_RR: - return sprintf(page, "round-robin (RR: %d)\n", clt->mp_policy); + return sysfs_emit(page, "round-robin (RR: %d)\n", + clt->mp_policy); case MP_POLICY_MIN_INFLIGHT: - return sprintf(page, "min-inflight (MI: %d)\n", clt->mp_policy); + return sysfs_emit(page, "min-inflight (MI: %d)\n", + clt->mp_policy); default: - return sprintf(page, "Unknown (%d)\n", clt->mp_policy); + return sysfs_emit(page, "Unknown (%d)\n", clt->mp_policy); } } @@ -138,9 +141,10 @@ static DEVICE_ATTR_RW(mpath_policy); static ssize_t add_path_show(struct device *dev, struct device_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, - "Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n", - attr->attr.name); + return sysfs_emit( + page, + "Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n", + attr->attr.name); } static ssize_t add_path_store(struct device *dev, @@ -184,20 +188,18 @@ static ssize_t rtrs_clt_state_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_clt_sess, kobj); if (sess->state == RTRS_CLT_CONNECTED) - return sprintf(page, "connected\n"); + return sysfs_emit(page, "connected\n"); - return sprintf(page, "disconnected\n"); + return sysfs_emit(page, "disconnected\n"); } static struct kobj_attribute rtrs_clt_state_attr = __ATTR(state, 0444, rtrs_clt_state_show, NULL); static ssize_t rtrs_clt_reconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_reconnect_store(struct kobject *kobj, @@ -225,11 +227,9 @@ static struct kobj_attribute rtrs_clt_reconnect_attr = rtrs_clt_reconnect_store); static ssize_t rtrs_clt_disconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj, @@ -257,11 +257,9 @@ static struct kobj_attribute rtrs_clt_disconnect_attr = rtrs_clt_disconnect_store); static ssize_t rtrs_clt_remove_path_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_remove_path_store(struct kobject *kobj, @@ -324,7 +322,7 @@ static ssize_t rtrs_clt_hca_port_show(struct kobject *kobj, sess = container_of(kobj, typeof(*sess), kobj); - return scnprintf(page, PAGE_SIZE, "%u\n", sess->hca_port); + return sysfs_emit(page, "%u\n", sess->hca_port); } static struct kobj_attribute rtrs_clt_hca_port_attr = @@ -338,7 +336,7 @@ static ssize_t rtrs_clt_hca_name_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_clt_sess, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", sess->hca_name); + return sysfs_emit(page, "%s\n", sess->hca_name); } static struct kobj_attribute rtrs_clt_hca_name_attr = @@ -349,12 +347,13 @@ static ssize_t rtrs_clt_src_addr_show(struct kobject *kobj, char *page) { struct rtrs_clt_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_clt_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_clt_src_addr_attr = @@ -365,12 +364,13 @@ static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj, char *page) { struct rtrs_clt_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_clt_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_clt_dst_addr_attr = diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index f298adc02acb..560865f65dc4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1236,8 +1236,7 @@ static void free_sess_reqs(struct rtrs_clt_sess *sess) if (req->mr) ib_dereg_mr(req->mr); kfree(req->sge); - rtrs_iu_free(req->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(req->iu, sess->s.dev->ib_dev, 1); } kfree(sess->reqs); sess->reqs = NULL; @@ -1499,6 +1498,7 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid) con->c.cid = cid; con->c.sess = &sess->s; atomic_set(&con->io_cnt, 0); + mutex_init(&con->con_mutex); sess->s.con[cid] = &con->c; @@ -1510,6 +1510,7 @@ static void destroy_con(struct rtrs_clt_con *con) struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); sess->s.con[con->c.cid] = NULL; + mutex_destroy(&con->con_mutex); kfree(con); } @@ -1520,15 +1521,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; - /* - * This function can fail, but still destroy_con_cq_qp() should - * be called, this is because create_con_cq_qp() is called on cm - * event path, thus caller/waiter never knows: have we failed before - * create_con_cq_qp() or after. To solve this dilemma without - * creating any additional flags just allow destroy_con_cq_qp() be - * called many times. - */ - + lockdep_assert_held(&con->con_mutex); if (con->c.cid == 0) { /* * One completion for each receive and two for each send @@ -1602,11 +1595,10 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con) * Be careful here: destroy_con_cq_qp() can be called even * create_con_cq_qp() failed, see comments there. */ - + lockdep_assert_held(&con->con_mutex); rtrs_cq_qp_destroy(&con->c); if (con->rsp_ius) { - rtrs_iu_free(con->rsp_ius, DMA_FROM_DEVICE, - sess->s.dev->ib_dev, con->queue_size); + rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_size); con->rsp_ius = NULL; con->queue_size = 0; } @@ -1634,16 +1626,16 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con) struct rtrs_sess *s = con->c.sess; int err; + mutex_lock(&con->con_mutex); err = create_con_cq_qp(con); + mutex_unlock(&con->con_mutex); if (err) { rtrs_err(s, "create_con_cq_qp(), err: %d\n", err); return err; } err = rdma_resolve_route(con->c.cm_id, RTRS_CONNECT_TIMEOUT_MS); - if (err) { + if (err) rtrs_err(s, "Resolving route failed, err: %d\n", err); - destroy_con_cq_qp(con); - } return err; } @@ -1837,8 +1829,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, cm_err = rtrs_rdma_route_resolved(con); break; case RDMA_CM_EVENT_ESTABLISHED: - con->cm_err = rtrs_rdma_conn_established(con, ev); - if (likely(!con->cm_err)) { + cm_err = rtrs_rdma_conn_established(con, ev); + if (likely(!cm_err)) { /* * Report success and wake up. Here we abuse state_wq, * i.e. wake up without state change, but we set cm_err. @@ -1851,20 +1843,22 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_REJECTED: cm_err = rtrs_rdma_conn_rejected(con, ev); break; + case RDMA_CM_EVENT_DISCONNECTED: + /* No message for disconnecting */ + cm_err = -ECONNRESET; + break; case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_TIMEWAIT_EXIT: rtrs_wrn(s, "CM error event %d\n", ev->event); cm_err = -ECONNRESET; break; case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: + rtrs_wrn(s, "CM error event %d\n", ev->event); cm_err = -EHOSTUNREACH; break; - case RDMA_CM_EVENT_DISCONNECTED: - case RDMA_CM_EVENT_ADDR_CHANGE: - case RDMA_CM_EVENT_TIMEWAIT_EXIT: - cm_err = -ECONNRESET; - break; case RDMA_CM_EVENT_DEVICE_REMOVAL: /* * Device removal is a special case. Queue close and return 0. @@ -1949,8 +1943,9 @@ static int create_cm(struct rtrs_clt_con *con) errr: stop_cm(con); - /* Is safe to call destroy if cq_qp is not inited */ + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm: destroy_cm(con); @@ -2057,7 +2052,9 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) if (!sess->s.con[cid]) break; con = to_clt_con(sess->s.con[cid]); + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm(con); destroy_con(con); } @@ -2164,8 +2161,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) mutex_unlock(&clt->paths_mutex); } -static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess, - struct rtrs_addr *addr) +static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess) { struct rtrs_clt *clt = sess->clt; @@ -2224,7 +2220,10 @@ destroy: struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]); stop_cm(con); + + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm(con); destroy_con(con); } @@ -2245,7 +2244,7 @@ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); if (unlikely(wc->status != IB_WC_SUCCESS)) { rtrs_err(sess->clt, "Sess info request send failed: %s\n", @@ -2264,8 +2263,12 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, int i, sgi; sg_cnt = le16_to_cpu(msg->sg_cnt); - if (unlikely(!sg_cnt)) + if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { + rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", + sg_cnt); return -EINVAL; + } + /* * Check if IB immediate data size is enough to hold the mem_id and * the offset inside the memory chunk. @@ -2278,11 +2281,6 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size); return -EINVAL; } - if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { - rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", - sg_cnt); - return -EINVAL; - } total_len = 0; for (sgi = 0, i = 0; sgi < sg_cnt && i < sess->queue_depth; sgi++) { const struct rtrs_sg_desc *desc = &msg->desc[sgi]; @@ -2374,7 +2372,7 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) out: rtrs_clt_update_wc_stats(con); - rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); rtrs_clt_change_state(sess, state); } @@ -2436,9 +2434,9 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) out: if (tx_iu) - rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); if (rx_iu) - rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); if (unlikely(err)) /* If we've never taken async path because of malloc problems */ rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); @@ -2938,7 +2936,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, * IO will never grab it. Also it is very important to add * path before init, since init fires LINK_CONNECTED event. */ - rtrs_clt_add_path_to_arr(sess, addr); + rtrs_clt_add_path_to_arr(sess); err = init_sess(sess); if (err) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index 167acd3c90fc..b8dbd701b3cb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -72,6 +72,7 @@ struct rtrs_clt_con { struct rtrs_iu *rsp_ius; u32 queue_size; unsigned int cpu; + struct mutex con_mutex; atomic_t io_cnt; int cm_err; }; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index b8e43dc4d95a..3f2918671dbe 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -287,8 +287,7 @@ struct rtrs_msg_rdma_hdr { struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t t, struct ib_device *dev, enum dma_data_direction, void (*done)(struct ib_cq *cq, struct ib_wc *wc)); -void rtrs_iu_free(struct rtrs_iu *iu, enum dma_data_direction dir, - struct ib_device *dev, u32 queue_size); +void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_size); int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu); int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, struct ib_send_wr *head); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 07fbb063555d..d2edff3b8f0d 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -27,11 +27,9 @@ static struct kobj_type ktype = { }; static ssize_t rtrs_srv_disconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj, @@ -72,8 +70,7 @@ static ssize_t rtrs_srv_hca_port_show(struct kobject *kobj, sess = container_of(kobj, typeof(*sess), kobj); usr_con = sess->s.con[0]; - return scnprintf(page, PAGE_SIZE, "%u\n", - usr_con->cm_id->port_num); + return sysfs_emit(page, "%u\n", usr_con->cm_id->port_num); } static struct kobj_attribute rtrs_srv_hca_port_attr = @@ -87,8 +84,7 @@ static ssize_t rtrs_srv_hca_name_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_srv_sess, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", - sess->s.dev->ib_dev->name); + return sysfs_emit(page, "%s\n", sess->s.dev->ib_dev->name); } static struct kobj_attribute rtrs_srv_hca_name_attr = @@ -115,12 +111,13 @@ static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj, char *page) { struct rtrs_srv_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_srv_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_srv_dst_addr_attr = diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index d6f93601712e..c42fd470c4eb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -113,28 +113,18 @@ static bool __rtrs_srv_change_state(struct rtrs_srv_sess *sess, return changed; } -static bool rtrs_srv_change_state_get_old(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state, - enum rtrs_srv_state *old_state) +static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, + enum rtrs_srv_state new_state) { bool changed; spin_lock_irq(&sess->state_lock); - *old_state = sess->state; changed = __rtrs_srv_change_state(sess, new_state); spin_unlock_irq(&sess->state_lock); return changed; } -static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state) -{ - enum rtrs_srv_state old_state; - - return rtrs_srv_change_state_get_old(sess, new_state, &old_state); -} - static void free_id(struct rtrs_srv_op *id) { if (!id) @@ -471,10 +461,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, void close_sess(struct rtrs_srv_sess *sess) { - enum rtrs_srv_state old_state; - - if (rtrs_srv_change_state_get_old(sess, RTRS_SRV_CLOSING, - &old_state)) + if (rtrs_srv_change_state(sess, RTRS_SRV_CLOSING)) queue_work(rtrs_wq, &sess->close_work); WARN_ON(sess->state != RTRS_SRV_CLOSING); } @@ -577,8 +564,7 @@ static void unmap_cont_bufs(struct rtrs_srv_sess *sess) struct rtrs_srv_mr *srv_mr; srv_mr = &sess->mrs[i]; - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); ib_dereg_mr(srv_mr->mr); ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl, srv_mr->sgt.nents, DMA_BIDIRECTIONAL); @@ -682,8 +668,7 @@ err: sgt = &srv_mr->sgt; mr = srv_mr->mr; free_iu: - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); dereg_mr: ib_dereg_mr(mr); unmap_sg: @@ -735,7 +720,7 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); if (unlikely(wc->status != IB_WC_SUCCESS)) { rtrs_err(s, "Sess info response send failed: %s\n", @@ -861,7 +846,7 @@ static int process_info_req(struct rtrs_srv_con *con, if (unlikely(err)) { rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err); iu_free: - rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); } rwr_free: kfree(rwr); @@ -906,7 +891,7 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) goto close; out: - rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); return; close: close_sess(sess); @@ -929,7 +914,7 @@ static int post_recv_info_req(struct rtrs_srv_con *con) err = rtrs_iu_post_recv(&con->c, rx_iu); if (unlikely(err)) { rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err); - rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); return err; } @@ -1328,17 +1313,42 @@ static void rtrs_srv_dev_release(struct device *dev) kfree(srv); } -static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) +static void free_srv(struct rtrs_srv *srv) +{ + int i; + + WARN_ON(refcount_read(&srv->refcount)); + for (i = 0; i < srv->queue_depth; i++) + mempool_free(srv->chunks[i], chunk_pool); + kfree(srv->chunks); + mutex_destroy(&srv->paths_mutex); + mutex_destroy(&srv->paths_ev_mutex); + /* last put to release the srv structure */ + put_device(&srv->dev); +} + +static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, + const uuid_t *paths_uuid) { struct rtrs_srv *srv; int i; + mutex_lock(&ctx->srv_mutex); + list_for_each_entry(srv, &ctx->srv_list, ctx_list) { + if (uuid_equal(&srv->paths_uuid, paths_uuid) && + refcount_inc_not_zero(&srv->refcount)) { + mutex_unlock(&ctx->srv_mutex); + return srv; + } + } + + /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); - if (!srv) + if (!srv) { + mutex_unlock(&ctx->srv_mutex); return NULL; + } - refcount_set(&srv->refcount, 1); INIT_LIST_HEAD(&srv->paths_list); mutex_init(&srv->paths_mutex); mutex_init(&srv->paths_ev_mutex); @@ -1347,6 +1357,8 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, srv->ctx = ctx; device_initialize(&srv->dev); srv->dev.release = rtrs_srv_dev_release; + list_add(&srv->ctx_list, &ctx->srv_list); + mutex_unlock(&ctx->srv_mutex); srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); @@ -1358,7 +1370,7 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, if (!srv->chunks[i]) goto err_free_chunks; } - list_add(&srv->ctx_list, &ctx->srv_list); + refcount_set(&srv->refcount, 1); return srv; @@ -1369,52 +1381,9 @@ err_free_chunks: err_free_srv: kfree(srv); - - return NULL; -} - -static void free_srv(struct rtrs_srv *srv) -{ - int i; - - WARN_ON(refcount_read(&srv->refcount)); - for (i = 0; i < srv->queue_depth; i++) - mempool_free(srv->chunks[i], chunk_pool); - kfree(srv->chunks); - mutex_destroy(&srv->paths_mutex); - mutex_destroy(&srv->paths_ev_mutex); - /* last put to release the srv structure */ - put_device(&srv->dev); -} - -static inline struct rtrs_srv *__find_srv_and_get(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - list_for_each_entry(srv, &ctx->srv_list, ctx_list) { - if (uuid_equal(&srv->paths_uuid, paths_uuid) && - refcount_inc_not_zero(&srv->refcount)) - return srv; - } - return NULL; } -static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - mutex_lock(&ctx->srv_mutex); - srv = __find_srv_and_get(ctx, paths_uuid); - if (!srv) - srv = __alloc_srv(ctx, paths_uuid); - mutex_unlock(&ctx->srv_mutex); - - return srv; -} - static void put_srv(struct rtrs_srv *srv) { if (refcount_dec_and_test(&srv->refcount)) { @@ -1813,7 +1782,11 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, } recon_cnt = le16_to_cpu(msg->recon_cnt); srv = get_or_create_srv(ctx, &msg->paths_uuid); - if (!srv) { + /* + * "refcount == 0" happens if a previous thread calls get_or_create_srv + * allocate srv, but chunks of srv are not allocated yet. + */ + if (!srv || refcount_read(&srv->refcount) == 0) { err = -ENOMEM; goto reject_w_err; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 08b0b8a6eebe..9543ae19996c 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -62,7 +62,7 @@ struct rtrs_srv_op { /* * server side memory region context, when always_invalidate=Y, we need - * queue_depth of memory regrion to invalidate each memory region. + * queue_depth of memory region to invalidate each memory region. */ struct rtrs_srv_mr { struct ib_mr *mr; diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index ff1093d6e4bc..2e3a849e0a77 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -31,6 +31,7 @@ struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask, return NULL; for (i = 0; i < queue_size; i++) { iu = &ius[i]; + iu->direction = dir; iu->buf = kzalloc(size, gfp_mask); if (!iu->buf) goto err; @@ -41,17 +42,15 @@ struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask, iu->cqe.done = done; iu->size = size; - iu->direction = dir; } return ius; err: - rtrs_iu_free(ius, dir, dma_dev, i); + rtrs_iu_free(ius, dma_dev, i); return NULL; } EXPORT_SYMBOL_GPL(rtrs_iu_alloc); -void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir, - struct ib_device *ibdev, u32 queue_size) +void rtrs_iu_free(struct rtrs_iu *ius, struct ib_device *ibdev, u32 queue_size) { struct rtrs_iu *iu; int i; @@ -61,7 +60,7 @@ void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir, for (i = 0; i < queue_size; i++) { iu = &ius[i]; - ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, dir); + ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, iu->direction); kfree(iu->buf); } kfree(ius); @@ -105,6 +104,22 @@ int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe) } EXPORT_SYMBOL_GPL(rtrs_post_recv_empty); +static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head, + struct ib_send_wr *wr) +{ + if (head) { + struct ib_send_wr *tail = head; + + while (tail->next) + tail = tail->next; + tail->next = wr; + } else { + head = wr; + } + + return ib_post_send(qp, head, NULL); +} + int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, struct ib_send_wr *head) { @@ -127,17 +142,7 @@ int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, .send_flags = IB_SEND_SIGNALED, }; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = ≀ - } else { - head = ≀ - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr); } EXPORT_SYMBOL_GPL(rtrs_iu_post_send); @@ -169,17 +174,7 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, if (WARN_ON(sge[i].length == 0)) return -EINVAL; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = &wr.wr; - } else { - head = &wr.wr; - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr.wr); } EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm); @@ -196,17 +191,7 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, .ex.imm_data = cpu_to_be32(imm_data), }; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = ≀ - } else { - head = ≀ - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr); } EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d8fcd21ab472..5492b66a8153 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -169,9 +169,9 @@ static int srp_tmo_get(char *buffer, const struct kernel_param *kp) int tmo = *(int *)kp->arg; if (tmo >= 0) - return sprintf(buffer, "%d\n", tmo); + return sysfs_emit(buffer, "%d\n", tmo); else - return sprintf(buffer, "off\n"); + return sysfs_emit(buffer, "off\n"); } static int srp_tmo_set(const char *val, const struct kernel_param *kp) @@ -2896,7 +2896,7 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); } static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, @@ -2904,7 +2904,7 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); } static ssize_t show_service_id(struct device *dev, @@ -2914,8 +2914,8 @@ static ssize_t show_service_id(struct device *dev, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "0x%016llx\n", - be64_to_cpu(target->ib_cm.service_id)); + return sysfs_emit(buf, "0x%016llx\n", + be64_to_cpu(target->ib_cm.service_id)); } static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, @@ -2925,7 +2925,8 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); + + return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); } static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, @@ -2933,7 +2934,7 @@ static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%pI6\n", target->sgid.raw); + return sysfs_emit(buf, "%pI6\n", target->sgid.raw); } static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, @@ -2944,7 +2945,8 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); + + return sysfs_emit(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); } static ssize_t show_orig_dgid(struct device *dev, @@ -2954,7 +2956,8 @@ static ssize_t show_orig_dgid(struct device *dev, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); + + return sysfs_emit(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); } static ssize_t show_req_lim(struct device *dev, @@ -2968,7 +2971,8 @@ static ssize_t show_req_lim(struct device *dev, ch = &target->ch[i]; req_lim = min(req_lim, ch->req_lim); } - return sprintf(buf, "%d\n", req_lim); + + return sysfs_emit(buf, "%d\n", req_lim); } static ssize_t show_zero_req_lim(struct device *dev, @@ -2976,7 +2980,7 @@ static ssize_t show_zero_req_lim(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->zero_req_lim); + return sysfs_emit(buf, "%d\n", target->zero_req_lim); } static ssize_t show_local_ib_port(struct device *dev, @@ -2984,7 +2988,7 @@ static ssize_t show_local_ib_port(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->srp_host->port); + return sysfs_emit(buf, "%d\n", target->srp_host->port); } static ssize_t show_local_ib_device(struct device *dev, @@ -2992,8 +2996,8 @@ static ssize_t show_local_ib_device(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%s\n", - dev_name(&target->srp_host->srp_dev->dev->dev)); + return sysfs_emit(buf, "%s\n", + dev_name(&target->srp_host->srp_dev->dev->dev)); } static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, @@ -3001,7 +3005,7 @@ static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->ch_count); + return sysfs_emit(buf, "%d\n", target->ch_count); } static ssize_t show_comp_vector(struct device *dev, @@ -3009,7 +3013,7 @@ static ssize_t show_comp_vector(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->comp_vector); + return sysfs_emit(buf, "%d\n", target->comp_vector); } static ssize_t show_tl_retry_count(struct device *dev, @@ -3017,7 +3021,7 @@ static ssize_t show_tl_retry_count(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->tl_retry_count); + return sysfs_emit(buf, "%d\n", target->tl_retry_count); } static ssize_t show_cmd_sg_entries(struct device *dev, @@ -3025,7 +3029,7 @@ static ssize_t show_cmd_sg_entries(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%u\n", target->cmd_sg_cnt); + return sysfs_emit(buf, "%u\n", target->cmd_sg_cnt); } static ssize_t show_allow_ext_sg(struct device *dev, @@ -3033,7 +3037,7 @@ static ssize_t show_allow_ext_sg(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); + return sysfs_emit(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); } static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); @@ -3893,7 +3897,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, { struct srp_host *host = container_of(dev, struct srp_host, dev); - return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); + return sysfs_emit(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); @@ -3903,7 +3907,7 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr, { struct srp_host *host = container_of(dev, struct srp_host, dev); - return sprintf(buf, "%d\n", host->port); + return sysfs_emit(buf, "%d\n", host->port); } static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index a17c56cd8312..6be60aa5ffe2 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3448,7 +3448,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rdma_size); } static ssize_t srpt_tpg_attrib_srp_max_rdma_size_store(struct config_item *item, @@ -3485,7 +3485,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rsp_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rsp_size); } static ssize_t srpt_tpg_attrib_srp_max_rsp_size_store(struct config_item *item, @@ -3522,7 +3522,7 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_sq_size); } static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item, @@ -3559,7 +3559,7 @@ static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%d\n", sport->port_attrib.use_srq); + return sysfs_emit(page, "%d\n", sport->port_attrib.use_srq); } static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item, @@ -3649,7 +3649,7 @@ out: static ssize_t srpt_rdma_cm_port_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", rdma_cm_port); + return sysfs_emit(page, "%d\n", rdma_cm_port); } static ssize_t srpt_rdma_cm_port_store(struct config_item *item, @@ -3705,7 +3705,7 @@ static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page) struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return snprintf(page, PAGE_SIZE, "%d\n", sport->enabled); + return sysfs_emit(page, "%d\n", sport->enabled); } static ssize_t srpt_tpg_enable_store(struct config_item *item, @@ -3812,7 +3812,7 @@ static void srpt_drop_tport(struct se_wwn *wwn) static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf) { - return scnprintf(buf, PAGE_SIZE, "\n"); + return sysfs_emit(buf, "\n"); } CONFIGFS_ATTR_RO(srpt_wwn_, version); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index bdeb010efee6..76e66f630c17 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -347,7 +347,7 @@ struct srpt_nexus { }; /** - * struct srpt_port_attib - attributes for SRPT port + * struct srpt_port_attrib - attributes for SRPT port * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. * @srp_sq_size: Shared receive queue (SRQ) size. diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index df9f6f4549f1..cf6c49d09c82 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -853,7 +853,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, return error; ctrl->device = ctrl->queues[0].device; - ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device); + ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev); /* T10-PI support */ if (ctrl->device->dev->attrs.device_cap_flags & diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ae6620489457..5c1e7cb7fe0d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -414,7 +414,8 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) goto out_free_rsp; - r->req.p2p_client = &ndev->device->dev; + if (!ib_uses_virt_dma(ndev->device)) + r->req.p2p_client = &ndev->device->dev; r->send_sge.length = sizeof(*r->req.cqe); r->send_sge.lkey = ndev->pd->local_dma_lkey; diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index bace04145c5f..196382630363 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -556,15 +556,6 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, return -1; for (i = 0; i < num_clients; i++) { -#ifdef CONFIG_DMA_VIRT_OPS - if (clients[i]->dma_ops == &dma_virt_ops) { - if (verbose) - dev_warn(clients[i], - "cannot be used for peer-to-peer DMA because the driver makes use of dma_virt_ops\n"); - return -1; - } -#endif - pci_client = find_parent_pci_dev(clients[i]); if (!pci_client) { if (verbose) @@ -834,24 +825,10 @@ static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, struct device *dev, struct scatterlist *sg, int nents) { struct scatterlist *s; - phys_addr_t paddr; int i; - /* - * p2pdma mappings are not compatible with devices that use - * dma_virt_ops. If the upper layers do the right thing - * this should never happen because it will be prevented - * by the check in pci_p2pdma_distance_many() - */ -#ifdef CONFIG_DMA_VIRT_OPS - if (WARN_ON_ONCE(dev->dma_ops == &dma_virt_ops)) - return 0; -#endif - for_each_sg(sg, s, nents, i) { - paddr = sg_phys(s); - - s->dma_address = paddr - p2p_pgmap->bus_offset; + s->dma_address = sg_phys(s) - p2p_pgmap->bus_offset; sg_dma_len(s) = s->length; } diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 956151052d45..2aaed35b556d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -565,6 +565,4 @@ static inline int dma_mmap_wc(struct device *dev, int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, dma_addr_t dma_start, u64 size); -extern const struct dma_map_ops dma_virt_ops; - #endif /* _LINUX_DMA_MAPPING_H */ diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 70597508c765..7752211c9638 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -34,6 +34,13 @@ static inline int ib_umem_offset(struct ib_umem *umem) return umem->address & ~PAGE_MASK; } +static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem, + unsigned long pgsz) +{ + return (sg_dma_address(umem->sg_head.sgl) + ib_umem_offset(umem)) & + (pgsz - 1); +} + static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem, unsigned long pgsz) { @@ -79,6 +86,35 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt); +/** + * ib_umem_find_best_pgoff - Find best HW page size + * + * @umem: umem struct + * @pgsz_bitmap bitmap of HW supported page sizes + * @pgoff_bitmask: Mask of bits that can be represented with an offset + * + * This is very similar to ib_umem_find_best_pgsz() except instead of accepting + * an IOVA it accepts a bitmask specifying what address bits can be represented + * with a page offset. + * + * For instance if the HW has multiple page sizes, requires 64 byte alignemnt, + * and can support aligned offsets up to 4032 then pgoff_bitmask would be + * "111111000000". + * + * If the pgoff_bitmask requires either alignment in the low bit or an + * unavailable page size for the high bits, this function returns 0. + */ +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + struct scatterlist *sg = umem->sg_head.sgl; + dma_addr_t dma_addr; + + dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK); + return ib_umem_find_best_pgsz(umem, pgsz_bitmap, + dma_addr & pgoff_bitmask); +} #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -101,6 +137,12 @@ static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, { return 0; } +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + return 0; +} #endif /* CONFIG_INFINIBAND_USER_MEM */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3883efd588aa..9fed65bf9279 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1235,6 +1235,8 @@ enum ib_qp_attr_mask { IB_QP_RESERVED3 = (1<<23), IB_QP_RESERVED4 = (1<<24), IB_QP_RATE_LIMIT = (1<<25), + + IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0), }; enum ib_qp_state { @@ -1470,6 +1472,8 @@ enum rdma_remove_reason { RDMA_REMOVE_DRIVER_REMOVE, /* uobj is being cleaned-up before being committed */ RDMA_REMOVE_ABORT, + /* The driver failed to destroy the uobject and is being disconnected */ + RDMA_REMOVE_DRIVER_FAILURE, }; struct ib_rdmacg_object { @@ -1482,8 +1486,6 @@ struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; - bool cleanup_retryable; - struct ib_rdmacg_object cg_obj; /* * Implementation details of the RDMA core, don't use in drivers: @@ -2402,6 +2404,8 @@ struct ib_device_ops { int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, struct ib_udata *udata); + int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah, u32 flags); @@ -2430,9 +2434,10 @@ struct ib_device_ops { struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); - int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_pd *pd, struct ib_udata *udata); + struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); @@ -2666,7 +2671,6 @@ struct ib_device { const struct attribute_group *groups[3]; u64 uverbs_cmd_mask; - u64 uverbs_ex_cmd_mask; char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; @@ -2902,46 +2906,6 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, } /** - * ib_is_destroy_retryable - Check whether the uobject destruction - * is retryable. - * @ret: The initial destruction return code - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * This function is a helper function that IB layer and low-level drivers - * can use to consider whether the destruction of the given uobject is - * retry-able. - * It checks the original return code, if it wasn't success the destruction - * is retryable according to the ucontext state (i.e. cleanup_retryable) and - * the remove reason. (i.e. why). - * Must be called with the object locked for destroy. - */ -static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - return ret && (why == RDMA_REMOVE_DESTROY || - uobj->context->cleanup_retryable); -} - -/** - * ib_destroy_usecnt - Called during destruction to check the usecnt - * @usecnt: The usecnt atomic - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * Non-zero usecnts will block destruction unless destruction was triggered by - * a ucontext cleanup. - */ -static inline int ib_destroy_usecnt(atomic_t *usecnt, - enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) - return -EBUSY; - return 0; -} - -/** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for * the given QP state transition. @@ -3431,6 +3395,17 @@ enum ib_pd_flags { struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); +/** + * ib_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + * + * Every PD has a local_dma_lkey which can be used as the lkey value for local + * memory operations. + */ #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) @@ -3656,8 +3631,14 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, bad_recv_wr ? : &dummy); } -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); +struct ib_qp *ib_create_named_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller); +static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + return ib_create_named_qp(pd, init_attr, KBUILD_MODNAME); +} /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. @@ -3944,6 +3925,16 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) -ENOSYS; } +/* + * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to + * NULL. This causes the ib_dma* helpers to just stash the kernel virtual + * address into the dma address. + */ +static inline bool ib_uses_virt_dma(struct ib_device *dev) +{ + return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; +} + /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created @@ -3951,6 +3942,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { + if (ib_uses_virt_dma(dev)) + return 0; return dma_mapping_error(dev->dma_device, dma_addr); } @@ -3965,6 +3958,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)cpu_addr; return dma_map_single(dev->dma_device, cpu_addr, size, direction); } @@ -3979,7 +3974,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_single(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_single(dev->dma_device, addr, size, direction); } /** @@ -3996,6 +3992,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)(page_address(page) + offset); return dma_map_page(dev->dma_device, page, offset, size, direction); } @@ -4010,7 +4008,30 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_page(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_page(dev->dma_device, addr, size, direction); +} + +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents); +static inline int ib_dma_map_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (ib_uses_virt_dma(dev)) + return ib_dma_virt_map_sg(dev, sg, nents); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); +} + +static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (!ib_uses_virt_dma(dev)) + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } /** @@ -4024,7 +4045,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - return dma_map_sg(dev->dma_device, sg, nents, direction); + return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4038,24 +4059,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - dma_unmap_sg(dev->dma_device, sg, nents, direction); -} - -static inline int ib_dma_map_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, - dma_attrs); -} - -static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); + ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4066,6 +4070,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { + if (ib_uses_virt_dma(dev)) + return UINT_MAX; return dma_get_max_seg_size(dev->dma_device); } @@ -4081,7 +4087,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -4096,36 +4103,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(dev->dma_device, addr, size, dir); -} - -/** - * ib_dma_alloc_coherent - Allocate memory and map it for DMA - * @dev: The device for which the DMA address is requested - * @size: The size of the region to allocate in bytes - * @dma_handle: A pointer for returning the DMA address of the region - * @flag: memory allocator flags - */ -static inline void *ib_dma_alloc_coherent(struct ib_device *dev, - size_t size, - dma_addr_t *dma_handle, - gfp_t flag) -{ - return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); -} - -/** - * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() - * @dev: The device for which the DMA addresses were allocated - * @size: The size of the region - * @cpu_addr: the address returned by ib_dma_alloc_coherent() - * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() - */ -static inline void ib_dma_free_coherent(struct ib_device *dev, - size_t size, void *cpu_addr, - dma_addr_t dma_handle) -{ - dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /* ib_reg_user_mr - register a memory region for virtual addresses from kernel @@ -4217,7 +4196,8 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, struct inode *inode, struct ib_udata *udata); int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); -static inline int ib_check_mr_access(int flags) +static inline int ib_check_mr_access(struct ib_device *ib_dev, + unsigned int flags) { /* * Local write permission is required if remote write or @@ -4230,6 +4210,9 @@ static inline int ib_check_mr_access(int flags) if (flags & ~IB_ACCESS_SUPPORTED) return -EINVAL; + if (flags & IB_ACCESS_ON_DEMAND && + !(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) + return -EINVAL; return 0; } @@ -4617,6 +4600,19 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) } /** + * ibdev_to_node - return the NUMA node for a given ib_device + * @dev: device to get the NUMA node for. + */ +static inline int ibdev_to_node(struct ib_device *ibdev) +{ + struct device *parent = ibdev->dev.parent; + + if (!parent) + return NUMA_NO_NODE; + return dev_to_node(parent); +} + +/** * rdma_device_to_drv_device - Helper macro to reach back to driver's * ib_device holder structure from device pointer. * diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index d3a1cc5be7bc..05e18839eaff 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -68,6 +68,14 @@ struct rdma_restrack_entry { * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI */ bool valid; + /** + * @no_track: don't add this entry to restrack DB + * + * This field is used to mark an entry that doesn't need to be added to + * internal restrack DB and presented later to the users at the nldev + * query stage. + */ + u8 no_track : 1; /* * @kref: Protect destroy of the resource */ @@ -145,4 +153,20 @@ int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id); + +/** + * rdma_restrack_no_track() - don't add resource to the DB + * @res: resource entry + * + * Every user of thie API should be cross examined. + * Probaby you don't need to use this function. + */ +static inline void rdma_restrack_no_track(struct rdma_restrack_entry *res) +{ + res->no_track = true; +} +static inline bool rdma_restrack_is_tracked(struct rdma_restrack_entry *res) +{ + return !res->no_track; +} #endif /* _RDMA_RESTRACK_H_ */ diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index b00270c72740..39ef204753ec 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -647,12 +647,15 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b * 'ucontext'. * */ -#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ - (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ - driver_udata) \ - ->context, \ - drv_dev_struct, member) : \ - (drv_dev_struct *)NULL) +static inline struct uverbs_attr_bundle * +rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata) +{ + return container_of(udata, struct uverbs_attr_bundle, driver_udata); +} + +#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ + (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \ + drv_dev_struct, member) : (drv_dev_struct *)NULL) #define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) @@ -862,6 +865,16 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, { return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); } + +static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle, + size_t n, size_t size) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return ERR_PTR(-EOVERFLOW); + return uverbs_zalloc(bundle, bytes); +} int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val); diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 06db27e35f40..ccd11631c167 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -71,6 +71,8 @@ struct uverbs_obj_type_class { enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); + void (*swap_uobjects)(struct ib_uobject *obj_old, + struct ib_uobject *obj_new); }; struct uverbs_obj_type { @@ -116,6 +118,9 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, bool hw_obj_valid); void rdma_alloc_commit_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); +void rdma_assign_uobject(struct ib_uobject *to_uobj, + struct ib_uobject *new_uobj, + struct uverbs_attr_bundle *attrs); /* * uverbs_uobject_get is called in order to increase the reference count on @@ -138,8 +143,8 @@ struct uverbs_obj_fd_type { * because the driver is removed or the FD is closed. */ struct uverbs_obj_type type; - int (*destroy_object)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + void (*destroy_object)(struct ib_uobject *uobj, + enum rdma_remove_reason why); const struct file_operations *fops; const char *name; int flags; diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 9ec85f76e9ac..90b739d05adf 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -43,6 +43,10 @@ struct hns_roce_ib_create_cq { __u32 reserved; }; +enum hns_roce_cq_cap_flags { + HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0, +}; + struct hns_roce_ib_create_cq_resp { __aligned_u64 cqn; /* Only 32 bits used, 64 for compat */ __aligned_u64 cap_flags; @@ -69,6 +73,12 @@ struct hns_roce_ib_create_qp { __aligned_u64 sdb_addr; }; +enum hns_roce_qp_cap_flags { + HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, + HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, + HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, +}; + struct hns_roce_ib_create_qp_resp { __aligned_u64 cap_flags; }; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 456438c18c2c..7ee73a0652f1 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -596,20 +596,6 @@ enum { IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, }; -enum { - /* - * This value is equal to IB_QP_DEST_QPN. - */ - IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20, -}; - -enum { - /* - * This value is equal to IB_QP_RATE_LIMIT. - */ - IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25, -}; - struct ib_uverbs_ex_create_qp { __aligned_u64 user_handle; __u32 pd_handle; diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index e591d8c1f3cf..068433e2229d 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -181,4 +181,25 @@ struct rxe_modify_srq_cmd { __aligned_u64 mmap_info_addr; }; +/* This data structure is stored at the base of work and + * completion queues shared between user space and kernel space. + * It contains the producer and consumer indices. Is also + * contains a copy of the queue size parameters for user space + * to use but the kernel must use the parameters in the + * rxe_queue struct. For performance reasons arrange to have + * producer and consumer indices in separate cache lines + * the kernel should always mask the indices to avoid accessing + * memory outside of the data area + */ +struct rxe_queue_buf { + __u32 log2_elem_size; + __u32 index_mask; + __u32 pad_1[30]; + __u32 producer_index; + __u32 pad_2[31]; + __u32 consumer_index; + __u32 pad_3[31]; + __u8 data[]; +}; + #endif /* RDMA_USER_RXE_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index c99de4a21458..fd2db2665fc6 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -75,11 +75,6 @@ config ARCH_HAS_DMA_PREP_COHERENT config ARCH_HAS_FORCE_DMA_UNENCRYPTED bool -config DMA_VIRT_OPS - bool - depends on HAS_DMA - select DMA_OPS - config SWIOTLB bool select NEED_DMA_MAP_STATE diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index dc755ab68aab..cd1d86358a7a 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_DMA_OPS) += ops_helpers.o obj-$(CONFIG_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o -obj-$(CONFIG_DMA_VIRT_OPS) += virt.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c deleted file mode 100644 index 59d32317dd57..000000000000 --- a/kernel/dma/virt.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations that map to virtual addresses without flushing memory. - */ -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-map-ops.h> -#include <linux/scatterlist.h> - -static void *dma_virt_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - ret = (void *)__get_free_pages(gfp | __GFP_ZERO, get_order(size)); - if (ret) - *dma_handle = (uintptr_t)ret; - return ret; -} - -static void dma_virt_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - free_pages((unsigned long)cpu_addr, get_order(size)); -} - -static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return (uintptr_t)(page_address(page) + offset); -} - -static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - sg_dma_address(sg) = (uintptr_t)sg_virt(sg); - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -const struct dma_map_ops dma_virt_ops = { - .alloc = dma_virt_alloc, - .free = dma_virt_free, - .map_page = dma_virt_map_page, - .map_sg = dma_virt_map_sg, - .alloc_pages = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, -}; -EXPORT_SYMBOL(dma_virt_ops); diff --git a/net/rds/ib.c b/net/rds/ib.c index deecbdcdae84..24c9a9005a6f 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -30,7 +30,6 @@ * SOFTWARE. * */ -#include <linux/dmapool.h> #include <linux/kernel.h> #include <linux/in.h> #include <linux/if.h> @@ -108,7 +107,6 @@ static void rds_ib_dev_free(struct work_struct *work) rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); - dma_pool_destroy(rds_ibdev->rid_hdrs_pool); list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { list_del(&i_ipaddr->list); @@ -191,14 +189,6 @@ static int rds_ib_add_one(struct ib_device *device) rds_ibdev->pd = NULL; goto put_dev; } - rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name, - device->dma_device, - sizeof(struct rds_header), - L1_CACHE_BYTES, 0); - if (!rds_ibdev->rid_hdrs_pool) { - ret = -ENOMEM; - goto put_dev; - } rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); diff --git a/net/rds/ib.h b/net/rds/ib.h index 8dfff43cf07f..2ba71102b1f1 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -246,7 +246,6 @@ struct rds_ib_device { struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; - struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */ u8 odp_capable:1; unsigned int max_mrs; @@ -264,13 +263,6 @@ struct rds_ib_device { int *vector_load; }; -static inline int ibdev_to_node(struct ib_device *ibdev) -{ - struct device *parent; - - parent = ibdev->dev.parent; - return parent ? dev_to_node(parent) : NUMA_NO_NODE; -} #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev) /* bits for i_ack_flags */ @@ -387,11 +379,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6); void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event); -struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, - struct dma_pool *pool, - dma_addr_t **dma_addrs, u32 num_hdrs); -void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, - dma_addr_t *dma_addrs, u32 num_hdrs); #define rds_ib_conn_error(conn, fmt...) \ __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index b36b60668b1d..f5cbe963cd8f 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -30,7 +30,6 @@ * SOFTWARE. * */ -#include <linux/dmapool.h> #include <linux/kernel.h> #include <linux/in.h> #include <linux/slab.h> @@ -441,42 +440,87 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index) rds_ibdev->vector_load[index]--; } +static void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr, + dma_addr_t dma_addr, enum dma_data_direction dir) +{ + ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir); + kfree(hdr); +} + +static struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev, + dma_addr_t *dma_addr, enum dma_data_direction dir) +{ + struct rds_header *hdr; + + hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev)); + if (!hdr) + return NULL; + + *dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr), + DMA_BIDIRECTIONAL); + if (ib_dma_mapping_error(dev, *dma_addr)) { + kfree(hdr); + return NULL; + } + + return hdr; +} + +/* Free the DMA memory used to store struct rds_header. + * + * @dev: the RDS IB device + * @hdrs: pointer to the array storing DMA memory pointers + * @dma_addrs: pointer to the array storing DMA addresses + * @num_hdars: number of headers to free. + */ +static void rds_dma_hdrs_free(struct rds_ib_device *dev, + struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs, + enum dma_data_direction dir) +{ + u32 i; + + for (i = 0; i < num_hdrs; i++) + rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir); + kvfree(hdrs); + kvfree(dma_addrs); +} + + /* Allocate DMA coherent memory to be used to store struct rds_header for * sending/receiving packets. The pointers to the DMA memory and the * associated DMA addresses are stored in two arrays. * - * @ibdev: the IB device - * @pool: the DMA memory pool + * @dev: the RDS IB device * @dma_addrs: pointer to the array for storing DMA addresses * @num_hdrs: number of headers to allocate * * It returns the pointer to the array storing the DMA memory pointers. On * error, NULL pointer is returned. */ -struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, - struct dma_pool *pool, - dma_addr_t **dma_addrs, u32 num_hdrs) +static struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev, + dma_addr_t **dma_addrs, u32 num_hdrs, + enum dma_data_direction dir) { struct rds_header **hdrs; dma_addr_t *hdr_daddrs; u32 i; hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, - ibdev_to_node(ibdev)); + ibdev_to_node(dev->dev)); if (!hdrs) return NULL; hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, - ibdev_to_node(ibdev)); + ibdev_to_node(dev->dev)); if (!hdr_daddrs) { kvfree(hdrs); return NULL; } for (i = 0; i < num_hdrs; i++) { - hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]); + hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir); if (!hdrs[i]) { - rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i); + rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir); return NULL; } } @@ -485,24 +529,6 @@ struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, return hdrs; } -/* Free the DMA memory used to store struct rds_header. - * - * @pool: the DMA memory pool - * @hdrs: pointer to the array storing DMA memory pointers - * @dma_addrs: pointer to the array storing DMA addresses - * @num_hdars: number of headers to free. - */ -void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, - dma_addr_t *dma_addrs, u32 num_hdrs) -{ - u32 i; - - for (i = 0; i < num_hdrs; i++) - dma_pool_free(pool, hdrs[i], dma_addrs[i]); - kvfree(hdrs); - kvfree(dma_addrs); -} - /* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. @@ -516,7 +542,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn) struct rds_ib_device *rds_ibdev; unsigned long max_wrs; int ret, fr_queue_space; - struct dma_pool *pool; /* * It's normal to see a null device if an incoming connection races @@ -612,25 +637,26 @@ static int rds_ib_setup_qp(struct rds_connection *conn) goto recv_cq_out; } - pool = rds_ibdev->rid_hdrs_pool; - ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr, + DMA_TO_DEVICE); if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("DMA send hdrs alloc failed\n"); goto qp_out; } - ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr, + DMA_FROM_DEVICE); if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("DMA recv hdrs alloc failed\n"); goto send_hdrs_dma_out; } - ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL, - &ic->i_ack_dma); + ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma, + DMA_TO_DEVICE); if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("DMA ack header alloc failed\n"); @@ -666,18 +692,19 @@ sends_out: vfree(ic->i_sends); ack_dma_out: - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma, + DMA_TO_DEVICE); ic->i_ack = NULL; recv_hdrs_dma_out: - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr, DMA_FROM_DEVICE); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; send_hdrs_dma_out: - rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr, DMA_TO_DEVICE); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; @@ -1110,29 +1137,30 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) } if (ic->rds_ibdev) { - struct dma_pool *pool; - - pool = ic->rds_ibdev->rid_hdrs_pool; - /* then free the resources that ib callbacks use */ if (ic->i_send_hdrs) { - rds_dma_hdrs_free(pool, ic->i_send_hdrs, + rds_dma_hdrs_free(ic->rds_ibdev, + ic->i_send_hdrs, ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + ic->i_send_ring.w_nr, + DMA_TO_DEVICE); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; } if (ic->i_recv_hdrs) { - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, + rds_dma_hdrs_free(ic->rds_ibdev, + ic->i_recv_hdrs, ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + ic->i_recv_ring.w_nr, + DMA_FROM_DEVICE); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; } if (ic->i_ack) { - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack, + ic->i_ack_dma, DMA_TO_DEVICE); ic->i_ack = NULL; } } else { diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 3cffcec5fb37..6fdedd9dbbc2 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -662,10 +662,16 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi seq = rds_ib_get_ack(ic); rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); + + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma, + sizeof(*hdr), DMA_TO_DEVICE); rds_message_populate_header(hdr, 0, 0, 0); hdr->h_ack = cpu_to_be64(seq); hdr->h_credit = adv_credits; rds_message_make_checksum(hdr); + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma, + sizeof(*hdr), DMA_TO_DEVICE); + ic->i_ack_queued = jiffies; ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL); @@ -845,6 +851,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_incoming *ibinc = ic->i_ibinc; struct rds_header *ihdr, *hdr; + dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; /* XXX shut down the connection if port 0,0 are seen? */ @@ -863,6 +870,8 @@ static void rds_ib_process_recv(struct rds_connection *conn, ihdr = ic->i_recv_hdrs[recv - ic->i_recvs]; + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr, + sizeof(*ihdr), DMA_FROM_DEVICE); /* Validate the checksum. */ if (!rds_message_verify_checksum(ihdr)) { rds_ib_conn_error(conn, "incoming message " @@ -870,7 +879,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, "forcing a reconnect\n", &conn->c_faddr); rds_stats_inc(s_recv_drop_bad_checksum); - return; + goto done; } /* Process the ACK sequence which comes with every packet */ @@ -899,7 +908,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, */ rds_ib_frag_free(ic, recv->r_frag); recv->r_frag = NULL; - return; + goto done; } /* @@ -933,7 +942,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, hdr->h_dport != ihdr->h_dport) { rds_ib_conn_error(conn, "fragment header mismatch; forcing reconnect\n"); - return; + goto done; } } @@ -965,6 +974,9 @@ static void rds_ib_process_recv(struct rds_connection *conn, rds_inc_put(&ibinc->ii_inc); } +done: + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr, + sizeof(*ihdr), DMA_FROM_DEVICE); } void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index dfe778220657..92b4a8689aae 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -638,6 +638,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, send->s_sge[0].length = sizeof(struct rds_header); send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, + ic->i_send_hdrs_dma[pos], + sizeof(struct rds_header), + DMA_TO_DEVICE); memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); @@ -688,6 +692,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, adv_credits = 0; rds_ib_stats_inc(s_ib_tx_credit_updates); } + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, + ic->i_send_hdrs_dma[pos], + sizeof(struct rds_header), + DMA_TO_DEVICE); if (prev) prev->s_wr.next = &send->s_wr; diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index 8a577c95496e..71c960dcd8a4 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -9,6 +9,7 @@ struct test { int alloc_ret; unsigned num_pages; unsigned *pfn; + unsigned *pfn_app; unsigned size; unsigned int max_seg; unsigned int expected_segments; @@ -52,31 +53,39 @@ int main(void) { const unsigned int sgmax = UINT_MAX; struct test *test, tests[] = { - { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 }, - { 0, 1, pfn(0), 1, sgmax, 1 }, - { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 }, - { 0, 2, pfn(1, 0), 2 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(0, 1, 2), 3 * PAGE_SIZE, sgmax, 1 }, - { 0, 3, pfn(0, 2, 1), 3 * PAGE_SIZE, sgmax, 3 }, - { 0, 3, pfn(0, 1, 3), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(1, 2, 4), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(1, 3, 4), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 4, pfn(0, 1, 3, 4), 4 * PAGE_SIZE, sgmax, 2 }, - { 0, 5, pfn(0, 1, 3, 4, 5), 5 * PAGE_SIZE, sgmax, 2 }, - { 0, 5, pfn(0, 1, 3, 4, 6), 5 * PAGE_SIZE, sgmax, 3 }, - { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, sgmax, 1 }, - { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 6, pfn(0, 1, 2, 3, 4, 5), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 6, pfn(0, 2, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 4 }, - { 0, 6, pfn(0, 1, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 0, NULL, 0, 0, 0 }, + { -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 }, + { 0, 1, pfn(0), NULL, 1, sgmax, 1 }, + { 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 }, + { 0, 2, pfn(1, 0), NULL, 2 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), pfn(3, 4, 5), 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), pfn(4, 5, 6), 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(0, 2, 1), NULL, 3 * PAGE_SIZE, sgmax, 3 }, + { 0, 3, pfn(0, 1, 3), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(1, 2, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(1, 3, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 4, pfn(0, 1, 3, 4), NULL, 4 * PAGE_SIZE, sgmax, 2 }, + { 0, 5, pfn(0, 1, 3, 4, 5), NULL, 5 * PAGE_SIZE, sgmax, 2 }, + { 0, 5, pfn(0, 1, 3, 4, 6), NULL, 5 * PAGE_SIZE, sgmax, 3 }, + { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, sgmax, 1 }, + { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, 2 * PAGE_SIZE, + 3 }, + { 0, 6, pfn(0, 1, 2, 3, 4, 5), NULL, 6 * PAGE_SIZE, + 2 * PAGE_SIZE, 3 }, + { 0, 6, pfn(0, 2, 3, 4, 5, 6), NULL, 6 * PAGE_SIZE, + 2 * PAGE_SIZE, 4 }, + { 0, 6, pfn(0, 1, 3, 4, 5, 6), pfn(7, 8, 9, 10, 11, 12), + 6 * PAGE_SIZE, 12 * PAGE_SIZE, 2 }, + { 0, 0, NULL, NULL, 0, 0, 0 }, }; unsigned int i; for (i = 0, test = tests; test->expected_segments; test++, i++) { + int left_pages = test->pfn_app ? test->num_pages : 0; struct page *pages[MAX_PAGES]; struct sg_table st; struct scatterlist *sg; @@ -84,14 +93,23 @@ int main(void) set_pages(pages, test->pfn, test->num_pages); sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, - test->size, test->max_seg, NULL, 0, GFP_KERNEL); + test->size, test->max_seg, NULL, left_pages, GFP_KERNEL); assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); if (test->alloc_ret) continue; + if (test->pfn_app) { + set_pages(pages, test->pfn_app, test->num_pages); + sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, + test->size, test->max_seg, sg, 0, GFP_KERNEL); + + assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); + } + VALIDATE(st.nents == test->expected_segments, &st, test); - VALIDATE(st.orig_nents == test->expected_segments, &st, test); + if (!test->pfn_app) + VALIDATE(st.orig_nents == test->expected_segments, &st, test); sg_free_table(&st); } |