diff options
Diffstat (limited to 'drivers/infiniband')
183 files changed, 8333 insertions, 4198 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 867cee5e27b2..69dee36e0e89 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -38,4 +38,4 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_cq.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ uverbs_std_types_mr.o uverbs_std_types_counters.o \ - uverbs_uapi.o + uverbs_uapi.o uverbs_std_types_device.o diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 324ef85a13b6..f82b4260de42 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -137,13 +137,13 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * err2: ib_free_send_mad(send_buf); err1: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); } static void agent_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - rdma_destroy_ah(mad_send_wc->send_buf->ah); + rdma_destroy_ah(mad_send_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(mad_send_wc->send_buf); } diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 5b2fce4a7091..7b04590f307f 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -215,10 +215,6 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry) dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__, port_num, entry->attr.index, entry->attr.gid.raw); - if (rdma_cap_roce_gid_table(device, port_num) && - entry->state != GID_TABLE_ENTRY_INVALID) - device->del_gid(&entry->attr, &entry->context); - write_lock_irq(&table->rwlock); /* @@ -324,7 +320,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry) return -EINVAL; } if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { - ret = attr->device->add_gid(attr, &entry->context); + ret = attr->device->ops.add_gid(attr, &entry->context); if (ret) { dev_err(&attr->device->dev, "%s GID add failed port=%d index=%d\n", @@ -364,6 +360,9 @@ static void del_gid(struct ib_device *ib_dev, u8 port, table->data_vec[ix] = NULL; write_unlock_irq(&table->rwlock); + if (rdma_cap_roce_gid_table(ib_dev, port)) + ib_dev->ops.del_gid(&entry->attr, &entry->context); + put_gid_entry_locked(entry); } @@ -548,8 +547,8 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, unsigned long mask; int ret; - if (ib_dev->get_netdev) { - idev = ib_dev->get_netdev(ib_dev, port); + if (ib_dev->ops.get_netdev) { + idev = ib_dev->ops.get_netdev(ib_dev, port); if (idev && attr->ndev != idev) { union ib_gid default_gid; @@ -1296,9 +1295,9 @@ static int config_non_roce_gid_cache(struct ib_device *device, mutex_lock(&table->lock); for (i = 0; i < gid_tbl_len; ++i) { - if (!device->query_gid) + if (!device->ops.query_gid) continue; - ret = device->query_gid(device, port, i, &gid_attr.gid); + ret = device->ops.query_gid(device, port, i, &gid_attr.gid); if (ret) { dev_warn(&device->dev, "query_gid failed (%d) for index %d\n", ret, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index edb2cb758be7..37980c7564c0 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -343,7 +343,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, ret = -ENODEV; goto out; } - ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr); + ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto out; @@ -355,7 +355,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, 0); ret = PTR_ERR(m); goto out; } @@ -400,7 +400,7 @@ static int cm_create_response_msg_ah(struct cm_port *port, static void cm_free_msg(struct ib_mad_send_buf *msg) { if (msg->ah) - rdma_destroy_ah(msg->ah); + rdma_destroy_ah(msg->ah, 0); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 15d5bb7bf6bb..63a7cc00bae0 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -494,7 +494,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); - rdma_restrack_add(&id_priv->res); + rdma_restrack_kadd(&id_priv->res); } static void cma_attach_to_dev(struct rdma_id_private *id_priv, diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 8c2dfb3e294e..3ec2c415bb70 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -33,7 +33,10 @@ #include <linux/module.h> #include <linux/configfs.h> #include <rdma/ib_verbs.h> +#include <rdma/rdma_cm.h> + #include "core_priv.h" +#include "cma_priv.h" struct cma_device; diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index 194cfe78c447..cf47c69436a7 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -94,4 +94,32 @@ struct rdma_id_private { */ struct rdma_restrack_entry res; }; + +#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) +int cma_configfs_init(void); +void cma_configfs_exit(void); +#else +static inline int cma_configfs_init(void) +{ + return 0; +} + +static inline void cma_configfs_exit(void) +{ +} +#endif + +void cma_ref_dev(struct cma_device *dev); +void cma_deref_dev(struct cma_device *dev); +typedef bool (*cma_device_filter)(struct ib_device *, void *); +struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, + void *cookie); +int cma_get_default_gid_type(struct cma_device *dev, unsigned int port); +int cma_set_default_gid_type(struct cma_device *dev, unsigned int port, + enum ib_gid_type default_gid_type); +int cma_get_default_roce_tos(struct cma_device *dev, unsigned int port); +int cma_set_default_roce_tos(struct cma_device *dev, unsigned int port, + u8 default_roce_tos); +struct ib_device *cma_get_ib_dev(struct cma_device *dev); + #endif /* _CMA_PRIV_H */ diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index bb9007a0cca7..3cd830d52967 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -54,35 +54,6 @@ struct pkey_index_qp_list { struct list_head qp_list; }; -#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) -int cma_configfs_init(void); -void cma_configfs_exit(void); -#else -static inline int cma_configfs_init(void) -{ - return 0; -} - -static inline void cma_configfs_exit(void) -{ -} -#endif -struct cma_device; -void cma_ref_dev(struct cma_device *cma_dev); -void cma_deref_dev(struct cma_device *cma_dev); -typedef bool (*cma_device_filter)(struct ib_device *, void *); -struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, - void *cookie); -int cma_get_default_gid_type(struct cma_device *cma_dev, - unsigned int port); -int cma_set_default_gid_type(struct cma_device *cma_dev, - unsigned int port, - enum ib_gid_type default_gid_type); -int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port); -int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port, - u8 default_roce_tos); -struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev); - int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)); @@ -244,10 +215,10 @@ static inline int ib_security_modify_qp(struct ib_qp *qp, int qp_attr_mask, struct ib_udata *udata) { - return qp->device->modify_qp(qp->real_qp, - qp_attr, - qp_attr_mask, - udata); + return qp->device->ops.modify_qp(qp->real_qp, + qp_attr, + qp_attr_mask, + udata); } static inline int ib_create_qp_security(struct ib_qp *qp, @@ -296,6 +267,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, #endif struct ib_device *ib_device_get_by_index(u32 ifindex); +void ib_device_put(struct ib_device *device); /* RDMA device netlink */ void nldev_init(void); void nldev_exit(void); @@ -308,10 +280,10 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, { struct ib_qp *qp; - if (!dev->create_qp) + if (!dev->ops.create_qp) return ERR_PTR(-EOPNOTSUPP); - qp = dev->create_qp(pd, attr, udata); + qp = dev->ops.create_qp(pd, attr, udata); if (IS_ERR(qp)) return qp; @@ -325,7 +297,10 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, */ if (attr->qp_type < IB_QPT_XRC_INI) { qp->res.type = RDMA_RESTRACK_QP; - rdma_restrack_add(&qp->res); + if (uobj) + rdma_restrack_uadd(&qp->res); + else + rdma_restrack_kadd(&qp->res); } else qp->res.valid = false; diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index b1e5365ddafa..d61e5e1427c2 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -145,7 +145,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, struct ib_cq *cq; int ret = -ENOMEM; - cq = dev->create_cq(dev, &cq_attr, NULL, NULL); + cq = dev->ops.create_cq(dev, &cq_attr, NULL, NULL); if (IS_ERR(cq)) return cq; @@ -162,7 +162,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, cq->res.type = RDMA_RESTRACK_CQ; rdma_restrack_set_task(&cq->res, caller); - rdma_restrack_add(&cq->res); + rdma_restrack_kadd(&cq->res); switch (cq->poll_ctx) { case IB_POLL_DIRECT: @@ -193,7 +193,7 @@ out_free_wc: kfree(cq->wc); rdma_restrack_del(&cq->res); out_destroy_cq: - cq->device->destroy_cq(cq); + cq->device->ops.destroy_cq(cq); return ERR_PTR(ret); } EXPORT_SYMBOL(__ib_alloc_cq); @@ -225,7 +225,7 @@ void ib_free_cq(struct ib_cq *cq) kfree(cq->wc); rdma_restrack_del(&cq->res); - ret = cq->device->destroy_cq(cq); + ret = cq->device->ops.destroy_cq(cq); WARN_ON_ONCE(ret); } EXPORT_SYMBOL(ib_free_cq); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 87eb4f2cdd7d..47ab34ee1a9d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -96,7 +96,7 @@ static struct notifier_block ibdev_lsm_nb = { static int ib_device_check_mandatory(struct ib_device *device) { -#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x } +#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x } static const struct { size_t offset; char *name; @@ -122,7 +122,8 @@ static int ib_device_check_mandatory(struct ib_device *device) int i; for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { - if (!*(void **) ((void *) device + mandatory_table[i].offset)) { + if (!*(void **) ((void *) &device->ops + + mandatory_table[i].offset)) { dev_warn(&device->dev, "Device is missing mandatory function %s\n", mandatory_table[i].name); @@ -145,7 +146,8 @@ static struct ib_device *__ib_device_get_by_index(u32 index) } /* - * Caller is responsible to return refrerence count by calling put_device() + * Caller must perform ib_device_put() to return the device reference count + * when ib_device_get_by_index() returns valid device pointer. */ struct ib_device *ib_device_get_by_index(u32 index) { @@ -153,13 +155,21 @@ struct ib_device *ib_device_get_by_index(u32 index) down_read(&lists_rwsem); device = __ib_device_get_by_index(index); - if (device) - get_device(&device->dev); - + if (device) { + /* Do not return a device if unregistration has started. */ + if (!refcount_inc_not_zero(&device->refcount)) + device = NULL; + } up_read(&lists_rwsem); return device; } +void ib_device_put(struct ib_device *device) +{ + if (refcount_dec_and_test(&device->refcount)) + complete(&device->unreg_completion); +} + static struct ib_device *__ib_device_get_by_name(const char *name) { struct ib_device *device; @@ -293,6 +303,8 @@ struct ib_device *ib_alloc_device(size_t size) rwlock_init(&device->client_data_lock); INIT_LIST_HEAD(&device->client_data_list); INIT_LIST_HEAD(&device->port_list); + refcount_set(&device->refcount, 1); + init_completion(&device->unreg_completion); return device; } @@ -362,8 +374,8 @@ static int read_port_immutable(struct ib_device *device) return -ENOMEM; for (port = start_port; port <= end_port; ++port) { - ret = device->get_port_immutable(device, port, - &device->port_immutable[port]); + ret = device->ops.get_port_immutable( + device, port, &device->port_immutable[port]); if (ret) return ret; @@ -375,8 +387,8 @@ static int read_port_immutable(struct ib_device *device) void ib_get_device_fw_str(struct ib_device *dev, char *str) { - if (dev->get_dev_fw_str) - dev->get_dev_fw_str(dev, str); + if (dev->ops.get_dev_fw_str) + dev->ops.get_dev_fw_str(dev, str); else str[0] = '\0'; } @@ -525,7 +537,7 @@ static int setup_device(struct ib_device *device) } memset(&device->attrs, 0, sizeof(device->attrs)); - ret = device->query_device(device, &device->attrs, &uhw); + ret = device->ops.query_device(device, &device->attrs, &uhw); if (ret) { dev_warn(&device->dev, "Couldn't query the device attributes\n"); @@ -641,6 +653,13 @@ void ib_unregister_device(struct ib_device *device) struct ib_client_data *context, *tmp; unsigned long flags; + /* + * Wait for all netlink command callers to finish working on the + * device. + */ + ib_device_put(device); + wait_for_completion(&device->unreg_completion); + mutex_lock(&device_mutex); down_write(&lists_rwsem); @@ -905,14 +924,14 @@ int ib_query_port(struct ib_device *device, return -EINVAL; memset(port_attr, 0, sizeof(*port_attr)); - err = device->query_port(device, port_num, port_attr); + err = device->ops.query_port(device, port_num, port_attr); if (err || port_attr->subnet_prefix) return err; if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) return 0; - err = device->query_gid(device, port_num, 0, &gid); + err = device->ops.query_gid(device, port_num, 0, &gid); if (err) return err; @@ -946,8 +965,8 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev, if (rdma_protocol_roce(ib_dev, port)) { struct net_device *idev = NULL; - if (ib_dev->get_netdev) - idev = ib_dev->get_netdev(ib_dev, port); + if (ib_dev->ops.get_netdev) + idev = ib_dev->ops.get_netdev(ib_dev, port); if (idev && idev->reg_state >= NETREG_UNREGISTERED) { @@ -1024,7 +1043,10 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey) { - return device->query_pkey(device, port_num, index, pkey); + if (!rdma_is_port_valid(device, port_num)) + return -EINVAL; + + return device->ops.query_pkey(device, port_num, index, pkey); } EXPORT_SYMBOL(ib_query_pkey); @@ -1041,11 +1063,11 @@ int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify) { - if (!device->modify_device) + if (!device->ops.modify_device) return -ENOSYS; - return device->modify_device(device, device_modify_mask, - device_modify); + return device->ops.modify_device(device, device_modify_mask, + device_modify); } EXPORT_SYMBOL(ib_modify_device); @@ -1069,9 +1091,10 @@ int ib_modify_port(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - if (device->modify_port) - rc = device->modify_port(device, port_num, port_modify_mask, - port_modify); + if (device->ops.modify_port) + rc = device->ops.modify_port(device, port_num, + port_modify_mask, + port_modify); else rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS; return rc; @@ -1198,6 +1221,105 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, } EXPORT_SYMBOL(ib_get_net_dev_by_params); +void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) +{ + struct ib_device_ops *dev_ops = &dev->ops; +#define SET_DEVICE_OP(ptr, name) \ + do { \ + if (ops->name) \ + if (!((ptr)->name)) \ + (ptr)->name = ops->name; \ + } while (0) + + SET_DEVICE_OP(dev_ops, add_gid); + SET_DEVICE_OP(dev_ops, alloc_dm); + SET_DEVICE_OP(dev_ops, alloc_fmr); + SET_DEVICE_OP(dev_ops, alloc_hw_stats); + SET_DEVICE_OP(dev_ops, alloc_mr); + SET_DEVICE_OP(dev_ops, alloc_mw); + SET_DEVICE_OP(dev_ops, alloc_pd); + SET_DEVICE_OP(dev_ops, alloc_rdma_netdev); + SET_DEVICE_OP(dev_ops, alloc_ucontext); + SET_DEVICE_OP(dev_ops, alloc_xrcd); + SET_DEVICE_OP(dev_ops, attach_mcast); + SET_DEVICE_OP(dev_ops, check_mr_status); + SET_DEVICE_OP(dev_ops, create_ah); + SET_DEVICE_OP(dev_ops, create_counters); + SET_DEVICE_OP(dev_ops, create_cq); + SET_DEVICE_OP(dev_ops, create_flow); + SET_DEVICE_OP(dev_ops, create_flow_action_esp); + SET_DEVICE_OP(dev_ops, create_qp); + SET_DEVICE_OP(dev_ops, create_rwq_ind_table); + SET_DEVICE_OP(dev_ops, create_srq); + SET_DEVICE_OP(dev_ops, create_wq); + SET_DEVICE_OP(dev_ops, dealloc_dm); + SET_DEVICE_OP(dev_ops, dealloc_fmr); + SET_DEVICE_OP(dev_ops, dealloc_mw); + SET_DEVICE_OP(dev_ops, dealloc_pd); + SET_DEVICE_OP(dev_ops, dealloc_ucontext); + SET_DEVICE_OP(dev_ops, dealloc_xrcd); + SET_DEVICE_OP(dev_ops, del_gid); + SET_DEVICE_OP(dev_ops, dereg_mr); + SET_DEVICE_OP(dev_ops, destroy_ah); + SET_DEVICE_OP(dev_ops, destroy_counters); + SET_DEVICE_OP(dev_ops, destroy_cq); + SET_DEVICE_OP(dev_ops, destroy_flow); + SET_DEVICE_OP(dev_ops, destroy_flow_action); + SET_DEVICE_OP(dev_ops, destroy_qp); + SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table); + SET_DEVICE_OP(dev_ops, destroy_srq); + SET_DEVICE_OP(dev_ops, destroy_wq); + SET_DEVICE_OP(dev_ops, detach_mcast); + SET_DEVICE_OP(dev_ops, disassociate_ucontext); + SET_DEVICE_OP(dev_ops, drain_rq); + SET_DEVICE_OP(dev_ops, drain_sq); + SET_DEVICE_OP(dev_ops, get_dev_fw_str); + SET_DEVICE_OP(dev_ops, get_dma_mr); + SET_DEVICE_OP(dev_ops, get_hw_stats); + SET_DEVICE_OP(dev_ops, get_link_layer); + SET_DEVICE_OP(dev_ops, get_netdev); + SET_DEVICE_OP(dev_ops, get_port_immutable); + SET_DEVICE_OP(dev_ops, get_vector_affinity); + SET_DEVICE_OP(dev_ops, get_vf_config); + SET_DEVICE_OP(dev_ops, get_vf_stats); + SET_DEVICE_OP(dev_ops, map_mr_sg); + SET_DEVICE_OP(dev_ops, map_phys_fmr); + SET_DEVICE_OP(dev_ops, mmap); + SET_DEVICE_OP(dev_ops, modify_ah); + SET_DEVICE_OP(dev_ops, modify_cq); + SET_DEVICE_OP(dev_ops, modify_device); + SET_DEVICE_OP(dev_ops, modify_flow_action_esp); + SET_DEVICE_OP(dev_ops, modify_port); + SET_DEVICE_OP(dev_ops, modify_qp); + SET_DEVICE_OP(dev_ops, modify_srq); + SET_DEVICE_OP(dev_ops, modify_wq); + SET_DEVICE_OP(dev_ops, peek_cq); + SET_DEVICE_OP(dev_ops, poll_cq); + SET_DEVICE_OP(dev_ops, post_recv); + SET_DEVICE_OP(dev_ops, post_send); + SET_DEVICE_OP(dev_ops, post_srq_recv); + SET_DEVICE_OP(dev_ops, process_mad); + SET_DEVICE_OP(dev_ops, query_ah); + SET_DEVICE_OP(dev_ops, query_device); + SET_DEVICE_OP(dev_ops, query_gid); + SET_DEVICE_OP(dev_ops, query_pkey); + SET_DEVICE_OP(dev_ops, query_port); + SET_DEVICE_OP(dev_ops, query_qp); + SET_DEVICE_OP(dev_ops, query_srq); + SET_DEVICE_OP(dev_ops, rdma_netdev_get_params); + SET_DEVICE_OP(dev_ops, read_counters); + SET_DEVICE_OP(dev_ops, reg_dm_mr); + SET_DEVICE_OP(dev_ops, reg_user_mr); + SET_DEVICE_OP(dev_ops, req_ncomp_notif); + SET_DEVICE_OP(dev_ops, req_notify_cq); + SET_DEVICE_OP(dev_ops, rereg_user_mr); + SET_DEVICE_OP(dev_ops, resize_cq); + SET_DEVICE_OP(dev_ops, set_vf_guid); + SET_DEVICE_OP(dev_ops, set_vf_link_state); + SET_DEVICE_OP(dev_ops, unmap_fmr); +} +EXPORT_SYMBOL(ib_set_device_ops); + static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 83ba0068e8bb..7d841b689a1e 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -211,8 +211,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, return ERR_PTR(-EINVAL); device = pd->device; - if (!device->alloc_fmr || !device->dealloc_fmr || - !device->map_phys_fmr || !device->unmap_fmr) { + if (!device->ops.alloc_fmr || !device->ops.dealloc_fmr || + !device->ops.map_phys_fmr || !device->ops.unmap_fmr) { dev_info(&device->dev, "Device does not support FMRs\n"); return ERR_PTR(-ENOSYS); } @@ -474,7 +474,7 @@ EXPORT_SYMBOL(ib_fmr_pool_map_phys); * Unmap an FMR. The FMR mapping may remain valid until the FMR is * reused (or until ib_flush_fmr_pool() is called). */ -int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) +void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) { struct ib_fmr_pool *pool; unsigned long flags; @@ -503,7 +503,5 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) #endif spin_unlock_irqrestore(&pool->pool_lock, flags); - - return 0; } EXPORT_SYMBOL(ib_fmr_pool_unmap); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index ba668d49c751..476abc74178e 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -502,17 +502,21 @@ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, */ static int iw_cm_map(struct iw_cm_id *cm_id, bool active) { + const char *devname = dev_name(&cm_id->device->dev); + const char *ifname = cm_id->device->iwcm->ifname; struct iwpm_dev_data pm_reg_msg; struct iwpm_sa_data pm_msg; int status; + if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) || + strlen(ifname) >= sizeof(pm_reg_msg.if_name)) + return -EINVAL; + cm_id->m_local_addr = cm_id->local_addr; cm_id->m_remote_addr = cm_id->remote_addr; - memcpy(pm_reg_msg.dev_name, dev_name(&cm_id->device->dev), - sizeof(pm_reg_msg.dev_name)); - memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname, - sizeof(pm_reg_msg.if_name)); + strncpy(pm_reg_msg.dev_name, devname, sizeof(pm_reg_msg.dev_name)); + strncpy(pm_reg_msg.if_name, ifname, sizeof(pm_reg_msg.if_name)); if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || !iwpm_valid_pid()) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index d7025cd5be28..7870823bac47 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -888,10 +888,10 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } /* No GRH for DR SMP */ - ret = device->process_mad(device, 0, port_num, &mad_wc, NULL, - (const struct ib_mad_hdr *)smp, mad_size, - (struct ib_mad_hdr *)mad_priv->mad, - &mad_size, &out_mad_pkey_index); + ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL, + (const struct ib_mad_hdr *)smp, mad_size, + (struct ib_mad_hdr *)mad_priv->mad, + &mad_size, &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: @@ -2305,14 +2305,12 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) } /* Give driver "right of first refusal" on incoming MAD */ - if (port_priv->device->process_mad) { - ret = port_priv->device->process_mad(port_priv->device, 0, - port_priv->port_num, - wc, &recv->grh, - (const struct ib_mad_hdr *)recv->mad, - recv->mad_size, - (struct ib_mad_hdr *)response->mad, - &mad_size, &resp_mad_pkey_index); + if (port_priv->device->ops.process_mad) { + ret = port_priv->device->ops.process_mad( + port_priv->device, 0, port_priv->port_num, wc, + &recv->grh, (const struct ib_mad_hdr *)recv->mad, + recv->mad_size, (struct ib_mad_hdr *)response->mad, + &mad_size, &resp_mad_pkey_index); if (opa) wc->pkey_index = resp_mad_pkey_index; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index e5cf09c66fe6..5ec57abc0849 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -81,7 +81,7 @@ static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) { deref_rmpp_recv(rmpp_recv); wait_for_completion(&rmpp_recv->comp); - rdma_destroy_ah(rmpp_recv->ah); + rdma_destroy_ah(rmpp_recv->ah, RDMA_DESTROY_AH_SLEEPABLE); kfree(rmpp_recv); } @@ -171,7 +171,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, hdr_len, 0, GFP_KERNEL, IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); else { msg->ah = ah; msg->context[0] = ah; @@ -201,7 +201,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent, ret = ib_post_send_mad(msg, NULL); if (ret) { - rdma_destroy_ah(msg->ah); + rdma_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(msg); } } @@ -209,7 +209,8 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent, void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) { if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah) - rdma_destroy_ah(mad_send_wc->send_buf->ah); + rdma_destroy_ah(mad_send_wc->send_buf->ah, + RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(mad_send_wc->send_buf); } @@ -237,7 +238,7 @@ static void nack_recv(struct ib_mad_agent_private *agent, ret = ib_post_send_mad(msg, NULL); if (ret) { - rdma_destroy_ah(msg->ah); + rdma_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(msg); } } diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 573399e3ccc1..e600fc23ae62 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -227,6 +227,7 @@ static int fill_port_info(struct sk_buff *msg, struct net_device *netdev = NULL; struct ib_port_attr attr; int ret; + u64 cap_flags = 0; if (fill_nldev_handle(msg, device)) return -EMSGSIZE; @@ -239,10 +240,12 @@ static int fill_port_info(struct sk_buff *msg, return ret; if (rdma_protocol_ib(device, port)) { - BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); + BUILD_BUG_ON((sizeof(attr.port_cap_flags) + + sizeof(attr.port_cap_flags2)) > sizeof(u64)); + cap_flags = attr.port_cap_flags | + ((u64)attr.port_cap_flags2 << 32); if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, - (u64)attr.port_cap_flags, - RDMA_NLDEV_ATTR_PAD)) + cap_flags, RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) @@ -259,8 +262,8 @@ static int fill_port_info(struct sk_buff *msg, if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) return -EMSGSIZE; - if (device->get_netdev) - netdev = device->get_netdev(device, port); + if (device->ops.get_netdev) + netdev = device->ops.get_netdev(device, port); if (netdev && net_eq(dev_net(netdev), net)) { ret = nla_put_u32(msg, @@ -308,6 +311,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) [RDMA_RESTRACK_QP] = "qp", [RDMA_RESTRACK_CM_ID] = "cm_id", [RDMA_RESTRACK_MR] = "mr", + [RDMA_RESTRACK_CTX] = "ctx", }; struct rdma_restrack_root *res = &device->res; @@ -636,13 +640,13 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); - put_device(&device->dev); + ib_device_put(device); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); err: - put_device(&device->dev); + ib_device_put(device); return err; } @@ -672,7 +676,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, err = ib_device_rename(device, name); } - put_device(&device->dev); + ib_device_put(device); return err; } @@ -756,14 +760,14 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_free; nlmsg_end(msg, nlh); - put_device(&device->dev); + ib_device_put(device); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); err: - put_device(&device->dev); + ib_device_put(device); return err; } @@ -820,7 +824,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, } out: - put_device(&device->dev); + ib_device_put(device); cb->args[0] = idx; return skb->len; } @@ -859,13 +863,13 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_free; nlmsg_end(msg, nlh); - put_device(&device->dev); + ib_device_put(device); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); err: - put_device(&device->dev); + ib_device_put(device); return ret; } @@ -1058,7 +1062,7 @@ next: idx++; if (!filled) goto err; - put_device(&device->dev); + ib_device_put(device); return skb->len; res_err: @@ -1069,7 +1073,7 @@ err: nlmsg_cancel(skb, nlh); err_index: - put_device(&device->dev); + ib_device_put(device); return ret; } diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h index 3bfab3505a29..af4879bdf3d6 100644 --- a/drivers/infiniband/core/opa_smi.h +++ b/drivers/infiniband/core/opa_smi.h @@ -55,7 +55,7 @@ static inline enum smi_action opa_smi_check_local_smp(struct opa_smp *smp, { /* C14-9:3 -- We're at the end of the DR segment of path */ /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ - return (device->process_mad && + return (device->ops.process_mad && !opa_get_smp_direction(smp) && (smp->hop_ptr == smp->hop_cnt + 1)) ? IB_SMI_HANDLE : IB_SMI_DISCARD; @@ -70,7 +70,7 @@ static inline enum smi_action opa_smi_check_local_returning_smp(struct opa_smp * { /* C14-13:3 -- We're at the end of the DR segment of path */ /* C14-13:4 -- Hop Pointer == 0 -> give to SM */ - return (device->process_mad && + return (device->ops.process_mad && opa_get_smp_direction(smp) && !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD; } diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 752a55c6bdce..6c4747e61d2b 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -224,12 +224,14 @@ out_unlock: * uverbs_put_destroy. */ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, - u32 id, struct ib_uverbs_file *ufile) + u32 id, + const struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj; int ret; - uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY); + uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_DESTROY); if (IS_ERR(uobj)) return uobj; @@ -243,21 +245,20 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, } /* - * Does both uobj_get_destroy() and uobj_put_destroy(). Returns success_res - * on success (negative errno on failure). For use by callers that do not need - * the uobj. + * Does both uobj_get_destroy() and uobj_put_destroy(). Returns 0 on success + * (negative errno on failure). For use by callers that do not need the uobj. */ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, - struct ib_uverbs_file *ufile, int success_res) + const struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj; - uobj = __uobj_get_destroy(obj, id, ufile); + uobj = __uobj_get_destroy(obj, id, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); - return success_res; + return 0; } /* alloc_uobj must be undone by uverbs_destroy_uobject() */ @@ -267,7 +268,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, struct ib_uobject *uobj; struct ib_ucontext *ucontext; - ucontext = ib_uverbs_get_ucontext(ufile); + ucontext = ib_uverbs_get_ucontext_file(ufile); if (IS_ERR(ucontext)) return ERR_CAST(ucontext); @@ -397,16 +398,23 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, struct ib_uobject *uobj; int ret; - if (!obj) - return ERR_PTR(-EINVAL); + if (IS_ERR(obj) && PTR_ERR(obj) == -ENOMSG) { + /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */ + uobj = lookup_get_idr_uobject(NULL, ufile, id, mode); + if (IS_ERR(uobj)) + return uobj; + } else { + if (IS_ERR(obj)) + return ERR_PTR(-EINVAL); - uobj = obj->type_class->lookup_get(obj, ufile, id, mode); - if (IS_ERR(uobj)) - return uobj; + uobj = obj->type_class->lookup_get(obj, ufile, id, mode); + if (IS_ERR(uobj)) + return uobj; - if (uobj->uapi_object != obj) { - ret = -EINVAL; - goto free; + if (uobj->uapi_object != obj) { + ret = -EINVAL; + goto free; + } } /* @@ -426,7 +434,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, return uobj; free: - obj->type_class->lookup_put(uobj, mode); + uobj->uapi_object->type_class->lookup_put(uobj, mode); uverbs_uobject_put(uobj); return ERR_PTR(ret); } @@ -490,7 +498,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, { struct ib_uobject *ret; - if (!obj) + if (IS_ERR(obj)) return ERR_PTR(-EINVAL); /* @@ -812,18 +820,20 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, */ if (reason == RDMA_REMOVE_DRIVER_REMOVE) { uverbs_user_mmap_disassociate(ufile); - if (ib_dev->disassociate_ucontext) - ib_dev->disassociate_ucontext(ucontext); + if (ib_dev->ops.disassociate_ucontext) + ib_dev->ops.disassociate_ucontext(ucontext); } ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); + rdma_restrack_del(&ucontext->res); + /* * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove * the error return. */ - ret = ib_dev->dealloc_ucontext(ucontext); + ret = ib_dev->ops.dealloc_ucontext(ucontext); WARN_ON(ret); ufile->ucontext = NULL; diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 4886d2bba7c7..be6b8e1257d0 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -118,43 +118,67 @@ void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); * Depending on ID the slot pointer in the radix tree points at one of these * structs. */ -struct uverbs_api_object { - const struct uverbs_obj_type *type_attrs; - const struct uverbs_obj_type_class *type_class; -}; struct uverbs_api_ioctl_method { - int (__rcu *handler)(struct ib_uverbs_file *ufile, - struct uverbs_attr_bundle *ctx); + int(__rcu *handler)(struct uverbs_attr_bundle *attrs); DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN); u16 bundle_size; u8 use_stack:1; u8 driver_method:1; + u8 disabled:1; + u8 has_udata:1; u8 key_bitmap_len; u8 destroy_bkey; }; +struct uverbs_api_write_method { + int (*handler)(struct uverbs_attr_bundle *attrs); + u8 disabled:1; + u8 is_ex:1; + u8 has_udata:1; + u8 has_resp:1; + u8 req_size; + u8 resp_size; +}; + struct uverbs_api_attr { struct uverbs_attr_spec spec; }; -struct uverbs_api_object; struct uverbs_api { /* radix tree contains struct uverbs_api_* pointers */ struct radix_tree_root radix; enum rdma_driver_id driver_id; + + unsigned int num_write; + unsigned int num_write_ex; + struct uverbs_api_write_method notsupp_method; + const struct uverbs_api_write_method **write_methods; + const struct uverbs_api_write_method **write_ex_methods; }; +/* + * Get an uverbs_api_object that corresponds to the given object_id. + * Note: + * -ENOMSG means that any object is allowed to match during lookup. + */ static inline const struct uverbs_api_object * uapi_get_object(struct uverbs_api *uapi, u16 object_id) { - return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id)); + const struct uverbs_api_object *res; + + if (object_id == UVERBS_IDR_ANY_OBJECT) + return ERR_PTR(-ENOMSG); + + res = radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id)); + if (!res) + return ERR_PTR(-ENOENT); + + return res; } char *uapi_key_format(char *S, unsigned int key); -struct uverbs_api *uverbs_alloc_api( - const struct uverbs_object_tree_def *const *driver_specs, - enum rdma_driver_id driver_id); +struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev); void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev); void uverbs_disassociate_api(struct uverbs_api *uapi); void uverbs_destroy_api(struct uverbs_api *uapi); @@ -162,4 +186,37 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, unsigned int num_attrs); void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile); +extern const struct uapi_definition uverbs_def_obj_counters[]; +extern const struct uapi_definition uverbs_def_obj_cq[]; +extern const struct uapi_definition uverbs_def_obj_device[]; +extern const struct uapi_definition uverbs_def_obj_dm[]; +extern const struct uapi_definition uverbs_def_obj_flow_action[]; +extern const struct uapi_definition uverbs_def_obj_intf[]; +extern const struct uapi_definition uverbs_def_obj_mr[]; +extern const struct uapi_definition uverbs_def_write_intf[]; + +static inline const struct uverbs_api_write_method * +uapi_get_method(const struct uverbs_api *uapi, u32 command) +{ + u32 cmd_idx = command & IB_USER_VERBS_CMD_COMMAND_MASK; + + if (command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED | + IB_USER_VERBS_CMD_COMMAND_MASK)) + return ERR_PTR(-EINVAL); + + if (command & IB_USER_VERBS_CMD_FLAG_EXTENDED) { + if (cmd_idx >= uapi->num_write_ex) + return ERR_PTR(-EOPNOTSUPP); + return uapi->write_ex_methods[cmd_idx]; + } + + if (cmd_idx >= uapi->num_write) + return ERR_PTR(-EOPNOTSUPP); + return uapi->write_methods[cmd_idx]; +} + +void uverbs_fill_udata(struct uverbs_attr_bundle *bundle, + struct ib_udata *udata, unsigned int attr_in, + unsigned int attr_out); + #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 06d8657ce583..46a5c553c624 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -32,6 +32,7 @@ static const char *type2str(enum rdma_restrack_type type) [RDMA_RESTRACK_QP] = "QP", [RDMA_RESTRACK_CM_ID] = "CM_ID", [RDMA_RESTRACK_MR] = "MR", + [RDMA_RESTRACK_CTX] = "CTX", }; return names[type]; @@ -130,31 +131,14 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) res)->id.device; case RDMA_RESTRACK_MR: return container_of(res, struct ib_mr, res)->device; + case RDMA_RESTRACK_CTX: + return container_of(res, struct ib_ucontext, res)->device; default: WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); return NULL; } } -static bool res_is_user(struct rdma_restrack_entry *res) -{ - switch (res->type) { - case RDMA_RESTRACK_PD: - return container_of(res, struct ib_pd, res)->uobject; - case RDMA_RESTRACK_CQ: - return container_of(res, struct ib_cq, res)->uobject; - case RDMA_RESTRACK_QP: - return container_of(res, struct ib_qp, res)->uobject; - case RDMA_RESTRACK_CM_ID: - return !res->kern_name; - case RDMA_RESTRACK_MR: - return container_of(res, struct ib_mr, res)->pd->uobject; - default: - WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); - return false; - } -} - void rdma_restrack_set_task(struct rdma_restrack_entry *res, const char *caller) { @@ -170,17 +154,17 @@ void rdma_restrack_set_task(struct rdma_restrack_entry *res, } EXPORT_SYMBOL(rdma_restrack_set_task); -void rdma_restrack_add(struct rdma_restrack_entry *res) +static void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); if (!dev) return; - if (res->type != RDMA_RESTRACK_CM_ID || !res_is_user(res)) + if (res->type != RDMA_RESTRACK_CM_ID || rdma_is_kernel_res(res)) res->task = NULL; - if (res_is_user(res)) { + if (!rdma_is_kernel_res(res)) { if (!res->task) rdma_restrack_set_task(res, NULL); res->kern_name = NULL; @@ -196,7 +180,28 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) hash_add(dev->res.hash, &res->node, res->type); up_write(&dev->res.rwsem); } -EXPORT_SYMBOL(rdma_restrack_add); + +/** + * rdma_restrack_kadd() - add kernel object to the reource tracking database + * @res: resource entry + */ +void rdma_restrack_kadd(struct rdma_restrack_entry *res) +{ + res->user = false; + rdma_restrack_add(res); +} +EXPORT_SYMBOL(rdma_restrack_kadd); + +/** + * rdma_restrack_uadd() - add user object to the reource tracking database + * @res: resource entry + */ +void rdma_restrack_uadd(struct rdma_restrack_entry *res) +{ + res->user = true; + rdma_restrack_add(res); +} +EXPORT_SYMBOL(rdma_restrack_uadd); int __must_check rdma_restrack_get(struct rdma_restrack_entry *res) { diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index be5ba5e15496..97e6d7b69abf 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1147,7 +1147,7 @@ static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); - rdma_destroy_ah(sm_ah->ah); + rdma_destroy_ah(sm_ah->ah, 0); kfree(sm_ah); } @@ -2276,7 +2276,8 @@ static void update_sm_ah(struct work_struct *work) cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); } - new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr); + new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr, + RDMA_CREATE_AH_SLEEPABLE); if (IS_ERR(new_ah->ah)) { pr_warn("Couldn't create new SM AH\n"); kfree(new_ah); diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 1143c0448666..1efadbccf394 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -626,10 +626,10 @@ int ib_security_modify_qp(struct ib_qp *qp, } if (!ret) - ret = real_qp->device->modify_qp(real_qp, - qp_attr, - qp_attr_mask, - udata); + ret = real_qp->device->ops.modify_qp(real_qp, + qp_attr, + qp_attr_mask, + udata); if (new_pps) { /* Clean up the lists and free the appropriate diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index 33c91c8a16e9..91d9b353ab85 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -67,7 +67,7 @@ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp, { /* C14-9:3 -- We're at the end of the DR segment of path */ /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ - return ((device->process_mad && + return ((device->ops.process_mad && !ib_get_smp_direction(smp) && (smp->hop_ptr == smp->hop_cnt + 1)) ? IB_SMI_HANDLE : IB_SMI_DISCARD); @@ -82,7 +82,7 @@ static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp, { /* C14-13:3 -- We're at the end of the DR segment of path */ /* C14-13:4 -- Hop Pointer == 0 -> give to SM */ - return ((device->process_mad && + return ((device->ops.process_mad && ib_get_smp_direction(smp) && !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD); } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 6fcce2c206c6..80f68eb0ba5c 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -462,7 +462,7 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, u16 out_mad_pkey_index = 0; ssize_t ret; - if (!dev->process_mad) + if (!dev->ops.process_mad) return -ENOSYS; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -481,11 +481,11 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, if (attr != IB_PMA_CLASS_PORT_INFO) in_mad->data[41] = port_num; /* PortSelect field */ - if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY, - port_num, NULL, NULL, - (const struct ib_mad_hdr *)in_mad, mad_size, - (struct ib_mad_hdr *)out_mad, &mad_size, - &out_mad_pkey_index) & + if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, + port_num, NULL, NULL, + (const struct ib_mad_hdr *)in_mad, mad_size, + (struct ib_mad_hdr *)out_mad, &mad_size, + &out_mad_pkey_index) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { ret = -EINVAL; @@ -786,7 +786,7 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan)) return 0; - ret = dev->get_hw_stats(dev, stats, port_num, index); + ret = dev->ops.get_hw_stats(dev, stats, port_num, index); if (ret < 0) return ret; if (ret == stats->num_counters) @@ -946,7 +946,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, struct rdma_hw_stats *stats; int i, ret; - stats = device->alloc_hw_stats(device, port_num); + stats = device->ops.alloc_hw_stats(device, port_num); if (!stats) return; @@ -964,8 +964,8 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, if (!hsag) goto err_free_stats; - ret = device->get_hw_stats(device, stats, port_num, - stats->num_counters); + ret = device->ops.get_hw_stats(device, stats, port_num, + stats->num_counters); if (ret != stats->num_counters) goto err_free_hsag; @@ -1057,7 +1057,7 @@ static int add_port(struct ib_device *device, int port_num, goto err_put; } - if (device->process_mad) { + if (device->ops.process_mad) { p->pma_table = get_counter_table(device, port_num); ret = sysfs_create_group(&p->kobj, p->pma_table); if (ret) @@ -1124,7 +1124,7 @@ static int add_port(struct ib_device *device, int port_num, * port, so holder should be device. Therefore skip per port conunter * initialization. */ - if (device->alloc_hw_stats && port_num) + if (device->ops.alloc_hw_stats && port_num) setup_hw_stats(device, p, port_num); list_add_tail(&p->kobj.entry, &device->port_list); @@ -1245,7 +1245,7 @@ static ssize_t node_desc_store(struct device *device, struct ib_device_modify desc = {}; int ret; - if (!dev->modify_device) + if (!dev->ops.modify_device) return -EIO; memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); @@ -1341,7 +1341,7 @@ int ib_device_register_sysfs(struct ib_device *device, } } - if (device->alloc_hw_stats) + if (device->ops.alloc_hw_stats) setup_hw_stats(device, NULL, 0); return 0; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 73332b9a25b5..7541fbaf58a3 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1242,7 +1242,7 @@ static void ib_ucm_add_one(struct ib_device *device) dev_t base; struct ib_ucm_device *ucm_dev; - if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1)) + if (!device->ops.alloc_ucontext || !rdma_cap_ib_cm(device, 1)) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 676c1fd1119d..a4ec43093cb3 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -146,15 +146,12 @@ static int invalidate_range_start_trampoline(struct ib_umem_odp *item, } static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end, - bool blockable) + const struct mmu_notifier_range *range) { struct ib_ucontext_per_mm *per_mm = container_of(mn, struct ib_ucontext_per_mm, mn); - if (blockable) + if (range->blockable) down_read(&per_mm->umem_rwsem); else if (!down_read_trylock(&per_mm->umem_rwsem)) return -EAGAIN; @@ -169,9 +166,10 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, return 0; } - return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end, + return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, + range->end, invalidate_range_start_trampoline, - blockable, NULL); + range->blockable, NULL); } static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start, @@ -182,9 +180,7 @@ static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start, } static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) + const struct mmu_notifier_range *range) { struct ib_ucontext_per_mm *per_mm = container_of(mn, struct ib_ucontext_per_mm, mn); @@ -192,8 +188,8 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, if (unlikely(!per_mm->active)) return; - rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, - end, + rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, + range->end, invalidate_range_end_trampoline, true, NULL); up_read(&per_mm->umem_rwsem); } @@ -647,8 +643,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, flags, local_page_list, NULL, NULL); up_read(&owning_mm->mmap_sem); - if (npages < 0) + if (npages < 0) { + if (npages != -EAGAIN) + pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages); + else + pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages); break; + } bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); mutex_lock(&umem_odp->umem_mutex); @@ -666,8 +667,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ret = ib_umem_odp_map_dma_single_page( umem_odp, k, local_page_list[j], access_mask, current_seq); - if (ret < 0) + if (ret < 0) { + if (ret != -EAGAIN) + pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret); + else + pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret); break; + } p = page_to_phys(local_page_list[j]); k++; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index f55f48f6b272..de8d31ab8945 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -88,10 +88,9 @@ enum { struct ib_umad_port { struct cdev cdev; - struct device *dev; - + struct device dev; struct cdev sm_cdev; - struct device *sm_dev; + struct device sm_dev; struct semaphore sm_sem; struct mutex file_mutex; @@ -104,8 +103,8 @@ struct ib_umad_port { }; struct ib_umad_device { - struct kobject kobj; - struct ib_umad_port port[0]; + struct kref kref; + struct ib_umad_port ports[]; }; struct ib_umad_file { @@ -130,8 +129,6 @@ struct ib_umad_packet { struct ib_user_mad mad; }; -static struct class *umad_class; - static const dev_t base_umad_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) + IB_UMAD_NUM_FIXED_MINOR; @@ -143,17 +140,23 @@ static DEFINE_IDA(umad_ida); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device, void *client_data); -static void ib_umad_release_dev(struct kobject *kobj) +static void ib_umad_dev_free(struct kref *kref) { struct ib_umad_device *dev = - container_of(kobj, struct ib_umad_device, kobj); + container_of(kref, struct ib_umad_device, kref); kfree(dev); } -static struct kobj_type ib_umad_dev_ktype = { - .release = ib_umad_release_dev, -}; +static void ib_umad_dev_get(struct ib_umad_device *dev) +{ + kref_get(&dev->kref); +} + +static void ib_umad_dev_put(struct ib_umad_device *dev) +{ + kref_put(&dev->kref, ib_umad_dev_free); +} static int hdr_size(struct ib_umad_file *file) { @@ -205,7 +208,7 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_umad_packet *packet = send_wc->send_buf->context[0]; dequeue_send(file, packet); - rdma_destroy_ah(packet->msg->ah); + rdma_destroy_ah(packet->msg->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { @@ -621,7 +624,7 @@ err_send: err_msg: ib_free_send_mad(packet->msg); err_ah: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); err_up: mutex_unlock(&file->mutex); err: @@ -657,7 +660,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, mutex_lock(&file->mutex); if (!file->port->ib_dev) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent: invalid device\n"); ret = -EPIPE; goto out; @@ -669,7 +672,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, } if (ureq.qpn != 0 && ureq.qpn != 1) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent: invalid QPN %d specified\n", ureq.qpn); ret = -EINVAL; @@ -680,7 +683,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, if (!__get_agent(file, agent_id)) goto found; - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent: Max Agents (%u) reached\n", IB_UMAD_MAX_AGENTS); ret = -ENOMEM; @@ -725,10 +728,10 @@ found: if (!file->already_used) { file->already_used = 1; if (!file->use_pkey_index) { - dev_warn(file->port->dev, + dev_warn(&file->port->dev, "process %s did not enable P_Key index support.\n", current->comm); - dev_warn(file->port->dev, + dev_warn(&file->port->dev, " Documentation/infiniband/user_mad.txt has info on the new ABI.\n"); } } @@ -759,7 +762,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) mutex_lock(&file->mutex); if (!file->port->ib_dev) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent2: invalid device\n"); ret = -EPIPE; goto out; @@ -771,7 +774,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) } if (ureq.qpn != 0 && ureq.qpn != 1) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent2: invalid QPN %d specified\n", ureq.qpn); ret = -EINVAL; @@ -779,7 +782,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) } if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n", ureq.flags, IB_USER_MAD_REG_FLAGS_CAP); ret = -EINVAL; @@ -796,7 +799,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) if (!__get_agent(file, agent_id)) goto found; - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent2: Max Agents (%u) reached\n", IB_UMAD_MAX_AGENTS); ret = -ENOMEM; @@ -808,7 +811,7 @@ found: req.mgmt_class = ureq.mgmt_class; req.mgmt_class_version = ureq.mgmt_class_version; if (ureq.oui & 0xff000000) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n", ureq.oui); ret = -EINVAL; @@ -986,8 +989,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp) goto out; } - kobject_get(&port->umad_dev->kobj); - + ib_umad_dev_get(port->umad_dev); out: mutex_unlock(&port->file_mutex); return ret; @@ -1025,8 +1027,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp) mutex_unlock(&file->port->file_mutex); kfree(file); - kobject_put(&dev->kobj); - + ib_umad_dev_put(dev); return 0; } @@ -1076,8 +1077,7 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) if (ret) goto err_clr_sm_cap; - kobject_get(&port->umad_dev->kobj); - + ib_umad_dev_get(port->umad_dev); return 0; err_clr_sm_cap: @@ -1106,8 +1106,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) up(&port->sm_sem); - kobject_put(&port->umad_dev->kobj); - + ib_umad_dev_put(port->umad_dev); return ret; } @@ -1124,7 +1123,7 @@ static struct ib_client umad_client = { .remove = ib_umad_remove_one }; -static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, +static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_umad_port *port = dev_get_drvdata(dev); @@ -1134,9 +1133,9 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev)); } -static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static DEVICE_ATTR_RO(ibdev); -static ssize_t show_port(struct device *dev, struct device_attribute *attr, +static ssize_t port_show(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_umad_port *port = dev_get_drvdata(dev); @@ -1146,10 +1145,59 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", port->port_num); } -static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); +static DEVICE_ATTR_RO(port); -static CLASS_ATTR_STRING(abi_version, S_IRUGO, - __stringify(IB_USER_MAD_ABI_VERSION)); +static struct attribute *umad_class_dev_attrs[] = { + &dev_attr_ibdev.attr, + &dev_attr_port.attr, + NULL, +}; +ATTRIBUTE_GROUPS(umad_class_dev); + +static char *umad_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} + +static ssize_t abi_version_show(struct class *class, + struct class_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); +} +static CLASS_ATTR_RO(abi_version); + +static struct attribute *umad_class_attrs[] = { + &class_attr_abi_version.attr, + NULL, +}; +ATTRIBUTE_GROUPS(umad_class); + +static struct class umad_class = { + .name = "infiniband_mad", + .devnode = umad_devnode, + .class_groups = umad_class_groups, + .dev_groups = umad_class_dev_groups, +}; + +static void ib_umad_release_port(struct device *device) +{ + struct ib_umad_port *port = dev_get_drvdata(device); + struct ib_umad_device *umad_dev = port->umad_dev; + + ib_umad_dev_put(umad_dev); +} + +static void ib_umad_init_port_dev(struct device *dev, + struct ib_umad_port *port, + const struct ib_device *device) +{ + device_initialize(dev); + ib_umad_dev_get(port->umad_dev); + dev->class = &umad_class; + dev->parent = device->dev.parent; + dev_set_drvdata(dev, port); + dev->release = ib_umad_release_port; +} static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_device *umad_dev, @@ -1158,6 +1206,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, int devnum; dev_t base_umad; dev_t base_issm; + int ret; devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL); if (devnum < 0) @@ -1172,63 +1221,41 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, } port->ib_dev = device; + port->umad_dev = umad_dev; port->port_num = port_num; sema_init(&port->sm_sem, 1); mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); + ib_umad_init_port_dev(&port->dev, port, device); + port->dev.devt = base_umad; + dev_set_name(&port->dev, "umad%d", port->dev_num); cdev_init(&port->cdev, &umad_fops); port->cdev.owner = THIS_MODULE; - cdev_set_parent(&port->cdev, &umad_dev->kobj); - kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); - if (cdev_add(&port->cdev, base_umad, 1)) - goto err_cdev; - port->dev = device_create(umad_class, device->dev.parent, - port->cdev.dev, port, - "umad%d", port->dev_num); - if (IS_ERR(port->dev)) + ret = cdev_device_add(&port->cdev, &port->dev); + if (ret) goto err_cdev; - if (device_create_file(port->dev, &dev_attr_ibdev)) - goto err_dev; - if (device_create_file(port->dev, &dev_attr_port)) - goto err_dev; - + ib_umad_init_port_dev(&port->sm_dev, port, device); + port->sm_dev.devt = base_issm; + dev_set_name(&port->sm_dev, "issm%d", port->dev_num); cdev_init(&port->sm_cdev, &umad_sm_fops); port->sm_cdev.owner = THIS_MODULE; - cdev_set_parent(&port->sm_cdev, &umad_dev->kobj); - kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); - if (cdev_add(&port->sm_cdev, base_issm, 1)) - goto err_sm_cdev; - - port->sm_dev = device_create(umad_class, device->dev.parent, - port->sm_cdev.dev, port, - "issm%d", port->dev_num); - if (IS_ERR(port->sm_dev)) - goto err_sm_cdev; - - if (device_create_file(port->sm_dev, &dev_attr_ibdev)) - goto err_sm_dev; - if (device_create_file(port->sm_dev, &dev_attr_port)) - goto err_sm_dev; - - return 0; -err_sm_dev: - device_destroy(umad_class, port->sm_cdev.dev); + ret = cdev_device_add(&port->sm_cdev, &port->sm_dev); + if (ret) + goto err_dev; -err_sm_cdev: - cdev_del(&port->sm_cdev); + return 0; err_dev: - device_destroy(umad_class, port->cdev.dev); - + put_device(&port->sm_dev); + cdev_device_del(&port->cdev, &port->dev); err_cdev: - cdev_del(&port->cdev); + put_device(&port->dev); ida_free(&umad_ida, devnum); - - return -1; + return ret; } static void ib_umad_kill_port(struct ib_umad_port *port) @@ -1236,17 +1263,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port) struct ib_umad_file *file; int id; - dev_set_drvdata(port->dev, NULL); - dev_set_drvdata(port->sm_dev, NULL); - - device_destroy(umad_class, port->cdev.dev); - device_destroy(umad_class, port->sm_cdev.dev); - - cdev_del(&port->cdev); - cdev_del(&port->sm_cdev); - mutex_lock(&port->file_mutex); + /* Mark ib_dev NULL and block ioctl or other file ops to progress + * further. + */ port->ib_dev = NULL; list_for_each_entry(file, &port->file_list, port_list) { @@ -1260,6 +1281,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port) } mutex_unlock(&port->file_mutex); + + cdev_device_del(&port->sm_cdev, &port->sm_dev); + put_device(&port->sm_dev); + cdev_device_del(&port->cdev, &port->dev); + put_device(&port->dev); ida_free(&umad_ida, port->dev_num); } @@ -1272,22 +1298,17 @@ static void ib_umad_add_one(struct ib_device *device) s = rdma_start_port(device); e = rdma_end_port(device); - umad_dev = kzalloc(sizeof *umad_dev + - (e - s + 1) * sizeof (struct ib_umad_port), - GFP_KERNEL); + umad_dev = kzalloc(struct_size(umad_dev, ports, e - s + 1), GFP_KERNEL); if (!umad_dev) return; - kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); - + kref_init(&umad_dev->kref); for (i = s; i <= e; ++i) { if (!rdma_cap_ib_mad(device, i)) continue; - umad_dev->port[i - s].umad_dev = umad_dev; - if (ib_umad_init_port(device, i, umad_dev, - &umad_dev->port[i - s])) + &umad_dev->ports[i - s])) goto err; count++; @@ -1305,10 +1326,10 @@ err: if (!rdma_cap_ib_mad(device, i)) continue; - ib_umad_kill_port(&umad_dev->port[i - s]); + ib_umad_kill_port(&umad_dev->ports[i - s]); } free: - kobject_put(&umad_dev->kobj); + ib_umad_dev_put(umad_dev); } static void ib_umad_remove_one(struct ib_device *device, void *client_data) @@ -1321,15 +1342,9 @@ static void ib_umad_remove_one(struct ib_device *device, void *client_data) for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) { if (rdma_cap_ib_mad(device, i + rdma_start_port(device))) - ib_umad_kill_port(&umad_dev->port[i]); + ib_umad_kill_port(&umad_dev->ports[i]); } - - kobject_put(&umad_dev->kobj); -} - -static char *umad_devnode(struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); + ib_umad_dev_put(umad_dev); } static int __init ib_umad_init(void) @@ -1338,7 +1353,7 @@ static int __init ib_umad_init(void) ret = register_chrdev_region(base_umad_dev, IB_UMAD_NUM_FIXED_MINOR * 2, - "infiniband_mad"); + umad_class.name); if (ret) { pr_err("couldn't register device number\n"); goto out; @@ -1346,28 +1361,19 @@ static int __init ib_umad_init(void) ret = alloc_chrdev_region(&dynamic_umad_dev, 0, IB_UMAD_NUM_DYNAMIC_MINOR * 2, - "infiniband_mad"); + umad_class.name); if (ret) { pr_err("couldn't register dynamic device number\n"); goto out_alloc; } dynamic_issm_dev = dynamic_umad_dev + IB_UMAD_NUM_DYNAMIC_MINOR; - umad_class = class_create(THIS_MODULE, "infiniband_mad"); - if (IS_ERR(umad_class)) { - ret = PTR_ERR(umad_class); + ret = class_register(&umad_class); + if (ret) { pr_err("couldn't create class infiniband_mad\n"); goto out_chrdev; } - umad_class->devnode = umad_devnode; - - ret = class_create_file(umad_class, &class_attr_abi_version.attr); - if (ret) { - pr_err("couldn't create abi_version attribute\n"); - goto out_class; - } - ret = ib_register_client(&umad_client); if (ret) { pr_err("couldn't register ib_umad client\n"); @@ -1377,7 +1383,7 @@ static int __init ib_umad_init(void) return 0; out_class: - class_destroy(umad_class); + class_unregister(&umad_class); out_chrdev: unregister_chrdev_region(dynamic_umad_dev, @@ -1394,7 +1400,7 @@ out: static void __exit ib_umad_cleanup(void) { ib_unregister_client(&umad_client); - class_destroy(umad_class); + class_unregister(&umad_class); unregister_chrdev_region(base_umad_dev, IB_UMAD_NUM_FIXED_MINOR * 2); unregister_chrdev_region(dynamic_umad_dev, diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index c97935a0c7c6..ea0bc6885517 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -161,9 +161,6 @@ struct ib_uverbs_file { struct mutex umap_lock; struct list_head umaps; - u64 uverbs_cmd_mask; - u64 uverbs_ex_cmd_mask; - struct idr idr; /* spinlock protects write access to idr */ spinlock_t idr_lock; @@ -249,7 +246,6 @@ int uverbs_dealloc_mw(struct ib_mw *mw); void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj); -void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata); long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); struct ib_uverbs_flow_spec { @@ -297,63 +293,29 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION); extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM); extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS); -#define IB_UVERBS_DECLARE_CMD(name) \ - ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ - const char __user *buf, int in_len, \ - int out_len) - -IB_UVERBS_DECLARE_CMD(get_context); -IB_UVERBS_DECLARE_CMD(query_device); -IB_UVERBS_DECLARE_CMD(query_port); -IB_UVERBS_DECLARE_CMD(alloc_pd); -IB_UVERBS_DECLARE_CMD(dealloc_pd); -IB_UVERBS_DECLARE_CMD(reg_mr); -IB_UVERBS_DECLARE_CMD(rereg_mr); -IB_UVERBS_DECLARE_CMD(dereg_mr); -IB_UVERBS_DECLARE_CMD(alloc_mw); -IB_UVERBS_DECLARE_CMD(dealloc_mw); -IB_UVERBS_DECLARE_CMD(create_comp_channel); -IB_UVERBS_DECLARE_CMD(create_cq); -IB_UVERBS_DECLARE_CMD(resize_cq); -IB_UVERBS_DECLARE_CMD(poll_cq); -IB_UVERBS_DECLARE_CMD(req_notify_cq); -IB_UVERBS_DECLARE_CMD(destroy_cq); -IB_UVERBS_DECLARE_CMD(create_qp); -IB_UVERBS_DECLARE_CMD(open_qp); -IB_UVERBS_DECLARE_CMD(query_qp); -IB_UVERBS_DECLARE_CMD(modify_qp); -IB_UVERBS_DECLARE_CMD(destroy_qp); -IB_UVERBS_DECLARE_CMD(post_send); -IB_UVERBS_DECLARE_CMD(post_recv); -IB_UVERBS_DECLARE_CMD(post_srq_recv); -IB_UVERBS_DECLARE_CMD(create_ah); -IB_UVERBS_DECLARE_CMD(destroy_ah); -IB_UVERBS_DECLARE_CMD(attach_mcast); -IB_UVERBS_DECLARE_CMD(detach_mcast); -IB_UVERBS_DECLARE_CMD(create_srq); -IB_UVERBS_DECLARE_CMD(modify_srq); -IB_UVERBS_DECLARE_CMD(query_srq); -IB_UVERBS_DECLARE_CMD(destroy_srq); -IB_UVERBS_DECLARE_CMD(create_xsrq); -IB_UVERBS_DECLARE_CMD(open_xrcd); -IB_UVERBS_DECLARE_CMD(close_xrcd); - -#define IB_UVERBS_DECLARE_EX_CMD(name) \ - int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ - struct ib_udata *ucore, \ - struct ib_udata *uhw) - -IB_UVERBS_DECLARE_EX_CMD(create_flow); -IB_UVERBS_DECLARE_EX_CMD(destroy_flow); -IB_UVERBS_DECLARE_EX_CMD(query_device); -IB_UVERBS_DECLARE_EX_CMD(create_cq); -IB_UVERBS_DECLARE_EX_CMD(create_qp); -IB_UVERBS_DECLARE_EX_CMD(create_wq); -IB_UVERBS_DECLARE_EX_CMD(modify_wq); -IB_UVERBS_DECLARE_EX_CMD(destroy_wq); -IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); -IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); -IB_UVERBS_DECLARE_EX_CMD(modify_qp); -IB_UVERBS_DECLARE_EX_CMD(modify_cq); +/* + * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the + * PortInfo CapabilityMask, but was extended with unique bits. + */ +static inline u32 make_port_cap_flags(const struct ib_port_attr *attr) +{ + u32 res; + + /* All IBA CapabilityMask bits are passed through here, except bit 26, + * which is overridden with IP_BASED_GIDS. This is due to a historical + * mistake in the implementation of IP_BASED_GIDS. Otherwise all other + * bits match the IBA definition across all kernel versions. + */ + res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS; + + if (attr->ip_gids) + res |= IB_UVERBS_PCF_IP_BASED_GIDS; + + return res; +} + +void copy_port_attr_to_resp(struct ib_port_attr *attr, + struct ib_uverbs_query_port_resp *resp, + struct ib_device *ib_dev, u8 port_num); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a93853770e3c..6b12cc5f97b2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -47,11 +47,134 @@ #include "uverbs.h" #include "core_priv.h" +/* + * Copy a response to userspace. If the provided 'resp' is larger than the + * user buffer it is silently truncated. If the user provided a larger buffer + * then the trailing portion is zero filled. + * + * These semantics are intended to support future extension of the output + * structures. + */ +static int uverbs_response(struct uverbs_attr_bundle *attrs, const void *resp, + size_t resp_len) +{ + int ret; + + if (copy_to_user(attrs->ucore.outbuf, resp, + min(attrs->ucore.outlen, resp_len))) + return -EFAULT; + + if (resp_len < attrs->ucore.outlen) { + /* + * Zero fill any extra memory that user + * space might have provided. + */ + ret = clear_user(attrs->ucore.outbuf + resp_len, + attrs->ucore.outlen - resp_len); + if (ret) + return -EFAULT; + } + + return 0; +} + +/* + * Copy a request from userspace. If the provided 'req' is larger than the + * user buffer then the user buffer is zero extended into the 'req'. If 'req' + * is smaller than the user buffer then the uncopied bytes in the user buffer + * must be zero. + */ +static int uverbs_request(struct uverbs_attr_bundle *attrs, void *req, + size_t req_len) +{ + if (copy_from_user(req, attrs->ucore.inbuf, + min(attrs->ucore.inlen, req_len))) + return -EFAULT; + + if (attrs->ucore.inlen < req_len) { + memset(req + attrs->ucore.inlen, 0, + req_len - attrs->ucore.inlen); + } else if (attrs->ucore.inlen > req_len) { + if (!ib_is_buffer_cleared(attrs->ucore.inbuf + req_len, + attrs->ucore.inlen - req_len)) + return -EOPNOTSUPP; + } + return 0; +} + +/* + * Generate the value for the 'response_length' protocol used by write_ex. + * This is the number of bytes the kernel actually wrote. Userspace can use + * this to detect what structure members in the response the kernel + * understood. + */ +static u32 uverbs_response_length(struct uverbs_attr_bundle *attrs, + size_t resp_len) +{ + return min_t(size_t, attrs->ucore.outlen, resp_len); +} + +/* + * The iterator version of the request interface is for handlers that need to + * step over a flex array at the end of a command header. + */ +struct uverbs_req_iter { + const void __user *cur; + const void __user *end; +}; + +static int uverbs_request_start(struct uverbs_attr_bundle *attrs, + struct uverbs_req_iter *iter, + void *req, + size_t req_len) +{ + if (attrs->ucore.inlen < req_len) + return -ENOSPC; + + if (copy_from_user(req, attrs->ucore.inbuf, req_len)) + return -EFAULT; + + iter->cur = attrs->ucore.inbuf + req_len; + iter->end = attrs->ucore.inbuf + attrs->ucore.inlen; + return 0; +} + +static int uverbs_request_next(struct uverbs_req_iter *iter, void *val, + size_t len) +{ + if (iter->cur + len > iter->end) + return -ENOSPC; + + if (copy_from_user(val, iter->cur, len)) + return -EFAULT; + + iter->cur += len; + return 0; +} + +static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter, + size_t len) +{ + const void __user *res = iter->cur; + + if (iter->cur + len > iter->end) + return ERR_PTR(-ENOSPC); + iter->cur += len; + return res; +} + +static int uverbs_request_finish(struct uverbs_req_iter *iter) +{ + if (!ib_is_buffer_cleared(iter->cur, iter->end - iter->cur)) + return -EOPNOTSUPP; + return 0; +} + static struct ib_uverbs_completion_event_file * -_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile) +_ib_uverbs_lookup_comp_file(s32 fd, const struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL, - fd, ufile); + fd, attrs); if (IS_ERR(uobj)) return (void *)uobj; @@ -65,24 +188,20 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile) #define ib_uverbs_lookup_comp_file(_fd, _ufile) \ _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile) -ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_file *file = attrs->ufile; struct ib_uverbs_get_context cmd; struct ib_uverbs_get_context_resp resp; - struct ib_udata udata; struct ib_ucontext *ucontext; struct file *filp; struct ib_rdmacg_object cg_obj; struct ib_device *ib_dev; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; mutex_lock(&file->ucontext_lock); ib_dev = srcu_dereference(file->device->ib_dev, @@ -97,16 +216,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err; } - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); - ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); if (ret) goto err; - ucontext = ib_dev->alloc_ucontext(ib_dev, &udata); + ucontext = ib_dev->ops.alloc_ucontext(ib_dev, &attrs->driver_udata); if (IS_ERR(ucontext)) { ret = PTR_ERR(ucontext); goto err_alloc; @@ -141,13 +255,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_fd; } - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_file; - } fd_install(resp.async_fd, filp); + ucontext->res.type = RDMA_RESTRACK_CTX; + rdma_restrack_uadd(&ucontext->res); + /* * Make sure that ib_uverbs_get_ucontext() sees the pointer update * only after all writes to setup the ucontext have completed @@ -156,7 +272,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, mutex_unlock(&file->ucontext_lock); - return in_len; + return 0; err_file: ib_uverbs_free_async_event_file(file); @@ -166,7 +282,7 @@ err_fd: put_unused_fd(resp.async_fd); err_free: - ib_dev->dealloc_ucontext(ucontext); + ib_dev->ops.dealloc_ucontext(ucontext); err_alloc: ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); @@ -224,57 +340,28 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext, resp->phys_port_cnt = ib_dev->phys_port_cnt; } -ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_query_device(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_device cmd; struct ib_uverbs_query_device_resp resp; struct ib_ucontext *ucontext; + int ret; - ucontext = ib_uverbs_get_ucontext(file); + ucontext = ib_uverbs_get_ucontext(attrs); if (IS_ERR(ucontext)) return PTR_ERR(ucontext); - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; memset(&resp, 0, sizeof resp); copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -/* - * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the - * PortInfo CapabilityMask, but was extended with unique bits. - */ -static u32 make_port_cap_flags(const struct ib_port_attr *attr) -{ - u32 res; - - /* All IBA CapabilityMask bits are passed through here, except bit 26, - * which is overridden with IP_BASED_GIDS. This is due to a historical - * mistake in the implementation of IP_BASED_GIDS. Otherwise all other - * bits match the IBA definition across all kernel versions. - */ - res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS; - - if (attr->ip_gids) - res |= IB_UVERBS_PCF_IP_BASED_GIDS; - - return res; + return uverbs_response(attrs, &resp, sizeof(resp)); } -ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_port cmd; struct ib_uverbs_query_port_resp resp; @@ -283,88 +370,43 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, struct ib_ucontext *ucontext; struct ib_device *ib_dev; - ucontext = ib_uverbs_get_ucontext(file); + ucontext = ib_uverbs_get_ucontext(attrs); if (IS_ERR(ucontext)) return PTR_ERR(ucontext); ib_dev = ucontext->device; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; ret = ib_query_port(ib_dev, cmd.port_num, &attr); if (ret) return ret; memset(&resp, 0, sizeof resp); + copy_port_attr_to_resp(&attr, &resp, ib_dev, cmd.port_num); - resp.state = attr.state; - resp.max_mtu = attr.max_mtu; - resp.active_mtu = attr.active_mtu; - resp.gid_tbl_len = attr.gid_tbl_len; - resp.port_cap_flags = make_port_cap_flags(&attr); - resp.max_msg_sz = attr.max_msg_sz; - resp.bad_pkey_cntr = attr.bad_pkey_cntr; - resp.qkey_viol_cntr = attr.qkey_viol_cntr; - resp.pkey_tbl_len = attr.pkey_tbl_len; - - if (rdma_is_grh_required(ib_dev, cmd.port_num)) - resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED; - - if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) { - resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); - resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid); - } else { - resp.lid = ib_lid_cpu16(attr.lid); - resp.sm_lid = ib_lid_cpu16(attr.sm_lid); - } - resp.lmc = attr.lmc; - resp.max_vl_num = attr.max_vl_num; - resp.sm_sl = attr.sm_sl; - resp.subnet_timeout = attr.subnet_timeout; - resp.init_type_reply = attr.init_type_reply; - resp.active_width = attr.active_width; - resp.active_speed = attr.active_speed; - resp.phys_state = attr.phys_state; - resp.link_layer = rdma_port_get_link_layer(ib_dev, - cmd.port_num); - - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_alloc_pd cmd; struct ib_uverbs_alloc_pd_resp resp; - struct ib_udata udata; struct ib_uobject *uobj; struct ib_pd *pd; int ret; struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_PD, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata); + pd = ib_dev->ops.alloc_pd(ib_dev, uobj->context, &attrs->driver_udata); if (IS_ERR(pd)) { ret = PTR_ERR(pd); goto err; @@ -379,14 +421,13 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; pd->res.type = RDMA_RESTRACK_PD; - rdma_restrack_add(&pd->res); + rdma_restrack_uadd(&pd->res); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } - return uobj_alloc_commit(uobj, in_len); + return uobj_alloc_commit(uobj); err_copy: ib_dealloc_pd(pd); @@ -396,17 +437,16 @@ err: return ret; } -ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_dealloc_pd(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_dealloc_pd cmd; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file, - in_len); + return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, attrs); } struct xrcd_table_entry { @@ -494,13 +534,11 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev, } } -ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_device *ibudev = attrs->ufile->device; struct ib_uverbs_open_xrcd cmd; struct ib_uverbs_open_xrcd_resp resp; - struct ib_udata udata; struct ib_uxrcd_object *obj; struct ib_xrcd *xrcd = NULL; struct fd f = {NULL, 0}; @@ -509,18 +547,11 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, int new_xrcd = 0; struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - mutex_lock(&file->device->xrcd_tree_mutex); + mutex_lock(&ibudev->xrcd_tree_mutex); if (cmd.fd != -1) { /* search for file descriptor */ @@ -531,7 +562,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } inode = file_inode(f.file); - xrcd = find_xrcd(file->device, inode); + xrcd = find_xrcd(ibudev, inode); if (!xrcd && !(cmd.oflags & O_CREAT)) { /* no file descriptor. Need CREATE flag */ ret = -EAGAIN; @@ -544,7 +575,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } } - obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file, + obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, attrs, &ib_dev); if (IS_ERR(obj)) { ret = PTR_ERR(obj); @@ -552,7 +583,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } if (!xrcd) { - xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata); + xrcd = ib_dev->ops.alloc_xrcd(ib_dev, obj->uobject.context, + &attrs->driver_udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; @@ -574,29 +606,28 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (inode) { if (new_xrcd) { /* create new inode/xrcd table entry */ - ret = xrcd_table_insert(file->device, inode, xrcd); + ret = xrcd_table_insert(ibudev, inode, xrcd); if (ret) goto err_dealloc_xrcd; } atomic_inc(&xrcd->usecnt); } - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } if (f.file) fdput(f); - mutex_unlock(&file->device->xrcd_tree_mutex); + mutex_unlock(&ibudev->xrcd_tree_mutex); - return uobj_alloc_commit(&obj->uobject, in_len); + return uobj_alloc_commit(&obj->uobject); err_copy: if (inode) { if (new_xrcd) - xrcd_table_delete(file->device, inode); + xrcd_table_delete(ibudev, inode); atomic_dec(&xrcd->usecnt); } @@ -610,22 +641,21 @@ err_tree_mutex_unlock: if (f.file) fdput(f); - mutex_unlock(&file->device->xrcd_tree_mutex); + mutex_unlock(&ibudev->xrcd_tree_mutex); return ret; } -ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_close_xrcd cmd; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file, - in_len); + return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, attrs); } int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, @@ -653,29 +683,19 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, return ret; } -ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_reg_mr cmd; struct ib_uverbs_reg_mr_resp resp; - struct ib_udata udata; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mr *mr; int ret; struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; @@ -684,11 +704,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (ret) return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_free; @@ -703,8 +723,9 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, } } - mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, - cmd.access_flags, &udata); + mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, + cmd.access_flags, + &attrs->driver_udata); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto err_put; @@ -716,7 +737,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_add(&mr->res); + rdma_restrack_uadd(&mr->res); uobj->object = mr; @@ -725,14 +746,13 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, resp.rkey = mr->rkey; resp.mr_handle = uobj->id; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, in_len); + return uobj_alloc_commit(uobj); err_copy: ib_dereg_mr(mr); @@ -745,29 +765,19 @@ err_free: return ret; } -ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_rereg_mr cmd; struct ib_uverbs_rereg_mr_resp resp; - struct ib_udata udata; struct ib_pd *pd = NULL; struct ib_mr *mr; struct ib_pd *old_pd; int ret; struct ib_uobject *uobj; - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) return -EINVAL; @@ -777,7 +787,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file); + uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -796,7 +806,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, if (cmd.flags & IB_MR_REREG_PD) { pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, - file); + attrs); if (!pd) { ret = -EINVAL; goto put_uobjs; @@ -804,9 +814,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, } old_pd = mr->pd; - ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, - cmd.length, cmd.hca_va, - cmd.access_flags, pd, &udata); + ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start, + cmd.length, cmd.hca_va, + cmd.access_flags, pd, + &attrs->driver_udata); if (!ret) { if (cmd.flags & IB_MR_REREG_PD) { atomic_inc(&pd->usecnt); @@ -821,10 +832,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, resp.lkey = mr->lkey; resp.rkey = mr->rkey; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) - ret = -EFAULT; - else - ret = in_len; + ret = uverbs_response(attrs, &resp, sizeof(resp)); put_uobj_pd: if (cmd.flags & IB_MR_REREG_PD) @@ -836,54 +844,43 @@ put_uobjs: return ret; } -ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_dereg_mr cmd; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file, - in_len); + return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, attrs); } -ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_alloc_mw cmd; struct ib_uverbs_alloc_mw_resp resp; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mw *mw; - struct ib_udata udata; int ret; struct ib_device *ib_dev; - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_MW, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_free; } - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); - - mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); + mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata); if (IS_ERR(mw)) { ret = PTR_ERR(mw); goto err_put; @@ -900,13 +897,12 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, resp.rkey = mw->rkey; resp.mw_handle = uobj->id; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, in_len); + return uobj_alloc_commit(uobj); err_copy: uverbs_dealloc_mw(mw); @@ -917,36 +913,32 @@ err_free: return ret; } -ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_dealloc_mw(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_dealloc_mw cmd; + int ret; - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file, - in_len); + return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, attrs); } -ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_comp_channel cmd; struct ib_uverbs_create_comp_channel_resp resp; struct ib_uobject *uobj; struct ib_uverbs_completion_event_file *ev_file; struct ib_device *ib_dev; + int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -956,25 +948,17 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, uobj); ib_uverbs_init_event_queue(&ev_file->ev_queue); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) { uobj_alloc_abort(uobj); - return -EFAULT; + return ret; } - return uobj_alloc_commit(uobj, in_len); + return uobj_alloc_commit(uobj); } -static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw, - struct ib_uverbs_ex_create_cq *cmd, - size_t cmd_sz, - int (*cb)(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *udata, - void *context), - void *context) +static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_ex_create_cq *cmd) { struct ib_ucq_object *obj; struct ib_uverbs_completion_event_file *ev_file = NULL; @@ -984,21 +968,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, struct ib_cq_init_attr attr = {}; struct ib_device *ib_dev; - if (cmd->comp_vector >= file->device->num_comp_vectors) + if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file, + obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs, &ib_dev); if (IS_ERR(obj)) return obj; - if (!ib_dev->create_cq) { - ret = -EOPNOTSUPP; - goto err; - } - if (cmd->comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file); + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs); if (IS_ERR(ev_file)) { ret = PTR_ERR(ev_file); goto err; @@ -1013,11 +992,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, attr.cqe = cmd->cqe; attr.comp_vector = cmd->comp_vector; + attr.flags = cmd->flags; - if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) - attr.flags = cmd->flags; - - cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw); + cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context, + &attrs->driver_udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; @@ -1034,18 +1012,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); resp.base.cq_handle = obj->uobject.id; resp.base.cqe = cq->cqe; - - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_add(&cq->res); + rdma_restrack_uadd(&cq->res); - ret = cb(file, obj, &resp, ucore, context); + ret = uverbs_response(attrs, &resp, sizeof(resp)); if (ret) goto err_cb; - ret = uobj_alloc_commit(&obj->uobject, 0); + ret = uobj_alloc_commit(&obj->uobject); if (ret) return ERR_PTR(ret); return obj; @@ -1055,7 +1031,7 @@ err_cb: err_file: if (ev_file) - ib_uverbs_release_ucq(file, ev_file, obj); + ib_uverbs_release_ucq(attrs->ufile, ev_file, obj); err: uobj_alloc_abort(&obj->uobject); @@ -1063,41 +1039,16 @@ err: return ERR_PTR(ret); } -static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *ucore, void *context) -{ - if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) - return -EFAULT; - - return 0; -} - -ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_cq cmd; struct ib_uverbs_ex_create_cq cmd_ex; - struct ib_uverbs_create_cq_resp resp; - struct ib_udata ucore; - struct ib_udata uhw; struct ib_ucq_object *obj; + int ret; - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response), - sizeof(cmd), sizeof(resp)); - - ib_uverbs_init_udata(&uhw, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; @@ -1105,43 +1056,19 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cmd_ex.comp_vector = cmd.comp_vector; cmd_ex.comp_channel = cmd.comp_channel; - obj = create_cq(file, &ucore, &uhw, &cmd_ex, - offsetof(typeof(cmd_ex), comp_channel) + - sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, - NULL); - - if (IS_ERR(obj)) - return PTR_ERR(obj); - - return in_len; -} - -static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *ucore, void *context) -{ - if (ib_copy_to_udata(ucore, resp, resp->response_length)) - return -EFAULT; - - return 0; + obj = create_cq(attrs, &cmd_ex); + return PTR_ERR_OR_ZERO(obj); } -int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_create_cq_resp resp; struct ib_uverbs_ex_create_cq cmd; struct ib_ucq_object *obj; - int err; - - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; + int ret; - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); - if (err) - return err; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if (cmd.comp_mask) return -EINVAL; @@ -1149,52 +1076,36 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, if (cmd.reserved) return -EINVAL; - if (ucore->outlen < (offsetof(typeof(resp), response_length) + - sizeof(resp.response_length))) - return -ENOSPC; - - obj = create_cq(file, ucore, uhw, &cmd, - min(ucore->inlen, sizeof(cmd)), - ib_uverbs_ex_create_cq_cb, NULL); - + obj = create_cq(attrs, &cmd); return PTR_ERR_OR_ZERO(obj); } -ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_resize_cq cmd; struct ib_uverbs_resize_cq_resp resp = {}; - struct ib_udata udata; struct ib_cq *cq; int ret = -EINVAL; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) return -EINVAL; - ret = cq->device->resize_cq(cq, cmd.cqe, &udata); + ret = cq->device->ops.resize_cq(cq, cmd.cqe, &attrs->driver_udata); if (ret) goto out; resp.cqe = cq->cqe; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp.cqe)) - ret = -EFAULT; - + ret = uverbs_response(attrs, &resp, sizeof(resp)); out: uobj_put_obj_read(cq); - return ret ? ret : in_len; + return ret; } static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, @@ -1227,9 +1138,7 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, return 0; } -ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_poll_cq cmd; struct ib_uverbs_poll_cq_resp resp; @@ -1239,15 +1148,16 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, struct ib_wc wc; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) return -EINVAL; /* we copy a struct ib_uverbs_poll_cq_resp to user space */ - header_ptr = u64_to_user_ptr(cmd.response); + header_ptr = attrs->ucore.outbuf; data_ptr = header_ptr + sizeof resp; memset(&resp, 0, sizeof resp); @@ -1271,24 +1181,24 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, goto out_put; } - ret = in_len; + ret = 0; out_put: uobj_put_obj_read(cq); return ret; } -ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_req_notify_cq cmd; struct ib_cq *cq; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) return -EINVAL; @@ -1297,22 +1207,22 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, uobj_put_obj_read(cq); - return in_len; + return 0; } -ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_cq cmd; struct ib_uverbs_destroy_cq_resp resp; struct ib_uobject *uobj; struct ib_ucq_object *obj; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file); + uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -1323,21 +1233,11 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, uobj_put_destroy(uobj); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -static int create_qp(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw, - struct ib_uverbs_ex_create_qp *cmd, - size_t cmd_sz, - int (*cb)(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *udata), - void *context) +static int create_qp(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_ex_create_qp *cmd) { struct ib_uqp_object *obj; struct ib_device *device; @@ -1347,7 +1247,6 @@ static int create_qp(struct ib_uverbs_file *file, struct ib_cq *scq = NULL, *rcq = NULL; struct ib_srq *srq = NULL; struct ib_qp *qp; - char *buf; struct ib_qp_init_attr attr = {}; struct ib_uverbs_ex_create_qp_resp resp; int ret; @@ -1358,7 +1257,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file, + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs, &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1366,12 +1265,10 @@ static int create_qp(struct ib_uverbs_file *file, obj->uevent.uobject.user_handle = cmd->user_handle; mutex_init(&obj->mcast_lock); - if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + - sizeof(cmd->rwq_ind_tbl_handle) && - (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { + if (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE) { ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL, - cmd->rwq_ind_tbl_handle, file); + cmd->rwq_ind_tbl_handle, attrs); if (!ind_tbl) { ret = -EINVAL; goto err_put; @@ -1380,13 +1277,6 @@ static int create_qp(struct ib_uverbs_file *file, attr.rwq_ind_tbl = ind_tbl; } - if (cmd_sz > sizeof(*cmd) && - !ib_is_udata_cleared(ucore, sizeof(*cmd), - cmd_sz - sizeof(*cmd))) { - ret = -EOPNOTSUPP; - goto err_put; - } - if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) { ret = -EINVAL; goto err_put; @@ -1397,7 +1287,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_XRC_TGT) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle, - file); + attrs); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; @@ -1417,7 +1307,7 @@ static int create_qp(struct ib_uverbs_file *file, } else { if (cmd->is_srq) { srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, - cmd->srq_handle, file); + cmd->srq_handle, attrs); if (!srq || srq->srq_type == IB_SRQT_XRC) { ret = -EINVAL; goto err_put; @@ -1428,7 +1318,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->recv_cq_handle != cmd->send_cq_handle) { rcq = uobj_get_obj_read( cq, UVERBS_OBJECT_CQ, - cmd->recv_cq_handle, file); + cmd->recv_cq_handle, attrs); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1439,11 +1329,11 @@ static int create_qp(struct ib_uverbs_file *file, if (has_sq) scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, - cmd->send_cq_handle, file); + cmd->send_cq_handle, attrs); if (!ind_tbl) rcq = rcq ?: scq; pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, - file); + attrs); if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; @@ -1453,7 +1343,7 @@ static int create_qp(struct ib_uverbs_file *file, } attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = file; + attr.qp_context = attrs->ufile; attr.send_cq = scq; attr.recv_cq = rcq; attr.srq = srq; @@ -1473,10 +1363,7 @@ static int create_qp(struct ib_uverbs_file *file, INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); - if (cmd_sz >= offsetof(typeof(*cmd), create_flags) + - sizeof(cmd->create_flags)) - attr.create_flags = cmd->create_flags; - + attr.create_flags = cmd->create_flags; if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_CROSS_CHANNEL | IB_QP_CREATE_MANAGED_SEND | @@ -1498,18 +1385,10 @@ static int create_qp(struct ib_uverbs_file *file, attr.source_qpn = cmd->source_qpn; } - buf = (void *)cmd + sizeof(*cmd); - if (cmd_sz > sizeof(*cmd)) - if (!(buf[0] == 0 && !memcmp(buf, buf + 1, - cmd_sz - sizeof(*cmd) - 1))) { - ret = -EINVAL; - goto err_put; - } - if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, uhw, + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, &obj->uevent.uobject); if (IS_ERR(qp)) { @@ -1557,11 +1436,9 @@ static int create_qp(struct ib_uverbs_file *file, resp.base.max_recv_wr = attr.cap.max_recv_wr; resp.base.max_send_wr = attr.cap.max_send_wr; resp.base.max_inline_data = attr.cap.max_inline_data; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); - - ret = cb(file, &resp, ucore); + ret = uverbs_response(attrs, &resp, sizeof(resp)); if (ret) goto err_cb; @@ -1583,7 +1460,7 @@ static int create_qp(struct ib_uverbs_file *file, if (ind_tbl) uobj_put_obj_read(ind_tbl); - return uobj_alloc_commit(&obj->uevent.uobject, 0); + return uobj_alloc_commit(&obj->uevent.uobject); err_cb: ib_destroy_qp(qp); @@ -1605,39 +1482,15 @@ err_put: return ret; } -static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *ucore) -{ - if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) - return -EFAULT; - - return 0; -} - -ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_qp(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_qp cmd; struct ib_uverbs_ex_create_qp cmd_ex; - struct ib_udata ucore; - struct ib_udata uhw; - ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp); - int err; - - if (out_len < resp_size) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; + int ret; - ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response), - sizeof(cmd), resp_size); - ib_uverbs_init_udata(&uhw, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + resp_size, - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - resp_size); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; @@ -1654,42 +1507,17 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, cmd_ex.qp_type = cmd.qp_type; cmd_ex.is_srq = cmd.is_srq; - err = create_qp(file, &ucore, &uhw, &cmd_ex, - offsetof(typeof(cmd_ex), is_srq) + - sizeof(cmd.is_srq), ib_uverbs_create_qp_cb, - NULL); - - if (err) - return err; - - return in_len; + return create_qp(attrs, &cmd_ex); } -static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *ucore) +static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs) { - if (ib_copy_to_udata(ucore, resp, resp->response_length)) - return -EFAULT; - - return 0; -} - -int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_ex_create_qp_resp resp; - struct ib_uverbs_ex_create_qp cmd = {0}; - int err; - - if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) + - sizeof(cmd.comp_mask))) - return -EINVAL; + struct ib_uverbs_ex_create_qp cmd; + int ret; - err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); - if (err) - return err; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK) return -EINVAL; @@ -1697,26 +1525,13 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, if (cmd.reserved) return -EINVAL; - if (ucore->outlen < (offsetof(typeof(resp), response_length) + - sizeof(resp.response_length))) - return -ENOSPC; - - err = create_qp(file, ucore, uhw, &cmd, - min(ucore->inlen, sizeof(cmd)), - ib_uverbs_ex_create_qp_cb, NULL); - - if (err) - return err; - - return 0; + return create_qp(attrs, &cmd); } -ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, int out_len) +static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_open_qp cmd; struct ib_uverbs_create_qp_resp resp; - struct ib_udata udata; struct ib_uqp_object *obj; struct ib_xrcd *xrcd; struct ib_uobject *uninitialized_var(xrcd_uobj); @@ -1725,23 +1540,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, int ret; struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file, + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs, &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); - xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file); + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, attrs); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err_put; @@ -1754,7 +1562,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, } attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = file; + attr.qp_context = attrs->ufile; attr.qp_num = cmd.qpn; attr.qp_type = cmd.qp_type; @@ -1775,17 +1583,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, resp.qpn = qp->qp_num; resp.qp_handle = obj->uevent.uobject.id; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_destroy; - } obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); qp->uobject = &obj->uevent.uobject; uobj_put_read(xrcd_uobj); - return uobj_alloc_commit(&obj->uevent.uobject, in_len); + return uobj_alloc_commit(&obj->uevent.uobject); err_destroy: ib_destroy_qp(qp); @@ -1818,9 +1625,7 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr, uverb_attr->port_num = rdma_ah_get_port_num(rdma_attr); } -ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_qp cmd; struct ib_uverbs_query_qp_resp resp; @@ -1829,8 +1634,9 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, struct ib_qp_init_attr *init_attr; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; attr = kmalloc(sizeof *attr, GFP_KERNEL); init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); @@ -1839,7 +1645,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, goto out; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) { ret = -EINVAL; goto out; @@ -1886,14 +1692,13 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, resp.max_inline_data = init_attr->cap.max_inline_data; resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); out: kfree(attr); kfree(init_attr); - return ret ? ret : in_len; + return ret; } /* Remove ignored fields set in the attribute mask */ @@ -1933,8 +1738,8 @@ static void copy_ah_attr_from_uverbs(struct ib_device *dev, rdma_ah_set_make_grd(rdma_attr, false); } -static int modify_qp(struct ib_uverbs_file *file, - struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata) +static int modify_qp(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_ex_modify_qp *cmd) { struct ib_qp_attr *attr; struct ib_qp *qp; @@ -1944,7 +1749,8 @@ static int modify_qp(struct ib_uverbs_file *file, if (!attr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, + attrs); if (!qp) { ret = -EINVAL; goto out; @@ -2081,7 +1887,7 @@ static int modify_qp(struct ib_uverbs_file *file, ret = ib_modify_qp_with_udata(qp, attr, modify_qp_mask(qp->qp_type, cmd->base.attr_mask), - udata); + &attrs->driver_udata); release_qp: uobj_put_obj_read(qp); @@ -2091,80 +1897,64 @@ out: return ret; } -ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_modify_qp cmd = {}; - struct ib_udata udata; + struct ib_uverbs_ex_modify_qp cmd; int ret; - if (copy_from_user(&cmd.base, buf, sizeof(cmd.base))) - return -EFAULT; + ret = uverbs_request(attrs, &cmd.base, sizeof(cmd.base)); + if (ret) + return ret; if (cmd.base.attr_mask & ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) return -EOPNOTSUPP; - ib_uverbs_init_udata(&udata, buf + sizeof(cmd.base), NULL, - in_len - sizeof(cmd.base) - sizeof(struct ib_uverbs_cmd_hdr), - out_len); - - ret = modify_qp(file, &cmd, &udata); - if (ret) - return ret; - - return in_len; + return modify_qp(attrs, &cmd); } -int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_modify_qp cmd = {}; + struct ib_uverbs_ex_modify_qp cmd; + struct ib_uverbs_ex_modify_qp_resp resp = { + .response_length = uverbs_response_length(attrs, sizeof(resp)) + }; int ret; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; + /* * Last bit is reserved for extending the attr_mask by * using another field. */ BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); - if (ucore->inlen < sizeof(cmd.base)) - return -EINVAL; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); - if (ret) - return ret; - if (cmd.base.attr_mask & ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) return -EOPNOTSUPP; - if (ucore->inlen > sizeof(cmd)) { - if (!ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - } - - ret = modify_qp(file, &cmd, uhw); + ret = modify_qp(attrs, &cmd); + if (ret) + return ret; - return ret; + return uverbs_response(attrs, &resp, sizeof(resp)); } -ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_destroy_qp(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_qp cmd; struct ib_uverbs_destroy_qp_resp resp; struct ib_uobject *uobj; struct ib_uqp_object *obj; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file); + uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -2174,10 +1964,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, uobj_put_destroy(uobj); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } static void *alloc_wr(size_t wr_size, __u32 num_sge) @@ -2190,9 +1977,7 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge) num_sge * sizeof (struct ib_sge), GFP_KERNEL); } -ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; @@ -2202,24 +1987,31 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, struct ib_qp *qp; int i, sg_ind; int is_ud; - ssize_t ret = -EINVAL; + int ret, ret2; size_t next_size; + const struct ib_sge __user *sgls; + const void __user *wqes; + struct uverbs_req_iter iter; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + - cmd.sge_count * sizeof (struct ib_uverbs_sge)) - return -EINVAL; - - if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) - return -EINVAL; + ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); + if (ret) + return ret; + wqes = uverbs_request_next_ptr(&iter, cmd.wqe_size * cmd.wr_count); + if (IS_ERR(wqes)) + return PTR_ERR(wqes); + sgls = uverbs_request_next_ptr( + &iter, cmd.sge_count * sizeof(struct ib_uverbs_sge)); + if (IS_ERR(sgls)) + return PTR_ERR(sgls); + ret = uverbs_request_finish(&iter); + if (ret) + return ret; user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); if (!user_wr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) goto out; @@ -2227,8 +2019,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, sg_ind = 0; last = NULL; for (i = 0; i < cmd.wr_count; ++i) { - if (copy_from_user(user_wr, - buf + sizeof cmd + i * cmd.wqe_size, + if (copy_from_user(user_wr, wqes + i * cmd.wqe_size, cmd.wqe_size)) { ret = -EFAULT; goto out_put; @@ -2256,7 +2047,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, } ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, - user_wr->wr.ud.ah, file); + user_wr->wr.ud.ah, attrs); if (!ud->ah) { kfree(ud); ret = -EINVAL; @@ -2336,11 +2127,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, if (next->num_sge) { next->sg_list = (void *) next + ALIGN(next_size, sizeof(struct ib_sge)); - if (copy_from_user(next->sg_list, - buf + sizeof cmd + - cmd.wr_count * cmd.wqe_size + - sg_ind * sizeof (struct ib_sge), - next->num_sge * sizeof (struct ib_sge))) { + if (copy_from_user(next->sg_list, sgls + sg_ind, + next->num_sge * + sizeof(struct ib_sge))) { ret = -EFAULT; goto out_put; } @@ -2350,7 +2139,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, } resp.bad_wr = 0; - ret = qp->device->post_send(qp->real_qp, wr, &bad_wr); + ret = qp->device->ops.post_send(qp->real_qp, wr, &bad_wr); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -2358,8 +2147,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, break; } - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - ret = -EFAULT; + ret2 = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret2) + ret = ret2; out_put: uobj_put_obj_read(qp); @@ -2375,28 +2165,35 @@ out_put: out: kfree(user_wr); - return ret ? ret : in_len; + return ret; } -static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, - int in_len, - u32 wr_count, - u32 sge_count, - u32 wqe_size) +static struct ib_recv_wr * +ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count, + u32 wqe_size, u32 sge_count) { struct ib_uverbs_recv_wr *user_wr; struct ib_recv_wr *wr = NULL, *last, *next; int sg_ind; int i; int ret; - - if (in_len < wqe_size * wr_count + - sge_count * sizeof (struct ib_uverbs_sge)) - return ERR_PTR(-EINVAL); + const struct ib_sge __user *sgls; + const void __user *wqes; if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) return ERR_PTR(-EINVAL); + wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count); + if (IS_ERR(wqes)) + return ERR_CAST(wqes); + sgls = uverbs_request_next_ptr( + iter, sge_count * sizeof(struct ib_uverbs_sge)); + if (IS_ERR(sgls)) + return ERR_CAST(sgls); + ret = uverbs_request_finish(iter); + if (ret) + return ERR_PTR(ret); + user_wr = kmalloc(wqe_size, GFP_KERNEL); if (!user_wr) return ERR_PTR(-ENOMEM); @@ -2404,7 +2201,7 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, sg_ind = 0; last = NULL; for (i = 0; i < wr_count; ++i) { - if (copy_from_user(user_wr, buf + i * wqe_size, + if (copy_from_user(user_wr, wqes + i * wqe_size, wqe_size)) { ret = -EFAULT; goto err; @@ -2443,10 +2240,9 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, if (next->num_sge) { next->sg_list = (void *) next + ALIGN(sizeof *next, sizeof (struct ib_sge)); - if (copy_from_user(next->sg_list, - buf + wr_count * wqe_size + - sg_ind * sizeof (struct ib_sge), - next->num_sge * sizeof (struct ib_sge))) { + if (copy_from_user(next->sg_list, sgls + sg_ind, + next->num_sge * + sizeof(struct ib_sge))) { ret = -EFAULT; goto err; } @@ -2470,32 +2266,33 @@ err: return ERR_PTR(ret); } -ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_post_recv cmd; struct ib_uverbs_post_recv_resp resp; struct ib_recv_wr *wr, *next; const struct ib_recv_wr *bad_wr; struct ib_qp *qp; - ssize_t ret = -EINVAL; + int ret, ret2; + struct uverbs_req_iter iter; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); + if (ret) + return ret; - wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, - in_len - sizeof cmd, cmd.wr_count, - cmd.sge_count, cmd.wqe_size); + wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size, + cmd.sge_count); if (IS_ERR(wr)) return PTR_ERR(wr); - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); - if (!qp) + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); + if (!qp) { + ret = -EINVAL; goto out; + } resp.bad_wr = 0; - ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); + ret = qp->device->ops.post_recv(qp->real_qp, wr, &bad_wr); uobj_put_obj_read(qp); if (ret) { @@ -2506,9 +2303,9 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, } } - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - ret = -EFAULT; - + ret2 = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret2) + ret = ret2; out: while (wr) { next = wr->next; @@ -2516,36 +2313,36 @@ out: wr = next; } - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_post_srq_recv cmd; struct ib_uverbs_post_srq_recv_resp resp; struct ib_recv_wr *wr, *next; const struct ib_recv_wr *bad_wr; struct ib_srq *srq; - ssize_t ret = -EINVAL; + int ret, ret2; + struct uverbs_req_iter iter; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); + if (ret) + return ret; - wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, - in_len - sizeof cmd, cmd.wr_count, - cmd.sge_count, cmd.wqe_size); + wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size, + cmd.sge_count); if (IS_ERR(wr)) return PTR_ERR(wr); - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); - if (!srq) + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); + if (!srq) { + ret = -EINVAL; goto out; + } resp.bad_wr = 0; - ret = srq->device->post_srq_recv ? - srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP; + ret = srq->device->ops.post_srq_recv(srq, wr, &bad_wr); uobj_put_obj_read(srq); @@ -2556,8 +2353,9 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, break; } - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - ret = -EFAULT; + ret2 = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret2) + ret = ret2; out: while (wr) { @@ -2566,12 +2364,10 @@ out: wr = next; } - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_ah cmd; struct ib_uverbs_create_ah_resp resp; @@ -2580,21 +2376,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_ah *ah; struct rdma_ah_attr attr = {}; int ret; - struct ib_udata udata; struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_AH, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -2603,7 +2391,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, goto err; } - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err; @@ -2627,7 +2415,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, rdma_ah_set_ah_flags(&attr, 0); } - ah = rdma_create_user_ah(pd, &attr, &udata); + ah = rdma_create_user_ah(pd, &attr, &attrs->driver_udata); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; @@ -2639,16 +2427,15 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, resp.ah_handle = uobj->id; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, in_len); + return uobj_alloc_commit(uobj); err_copy: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); err_put: uobj_put_obj_read(pd); @@ -2658,21 +2445,19 @@ err: return ret; } -ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, - const char __user *buf, int in_len, int out_len) +static int ib_uverbs_destroy_ah(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_ah cmd; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file, - in_len); + return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, attrs); } -ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_attach_mcast cmd; struct ib_qp *qp; @@ -2680,10 +2465,11 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, struct ib_uverbs_mcast_entry *mcast; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) return -EINVAL; @@ -2716,12 +2502,10 @@ out_put: mutex_unlock(&obj->mcast_lock); uobj_put_obj_read(qp); - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_detach_mcast cmd; struct ib_uqp_object *obj; @@ -2730,10 +2514,11 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, int ret = -EINVAL; bool found = false; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) return -EINVAL; @@ -2759,7 +2544,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, out_put: mutex_unlock(&obj->mcast_lock); uobj_put_obj_read(qp); - return ret ? ret : in_len; + return ret; } struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) @@ -2838,7 +2623,7 @@ void flow_resources_add(struct ib_uflow_resources *uflow_res, } EXPORT_SYMBOL(flow_resources_add); -static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, +static int kern_spec_to_ib_spec_action(const struct uverbs_attr_bundle *attrs, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -2867,7 +2652,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, ib_spec->action.act = uobj_get_obj_read(flow_action, UVERBS_OBJECT_FLOW_ACTION, kern_spec->action.handle, - ufile); + attrs); if (!ib_spec->action.act) return -EINVAL; ib_spec->action.size = @@ -2885,7 +2670,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, uobj_get_obj_read(counters, UVERBS_OBJECT_COUNTERS, kern_spec->flow_count.handle, - ufile); + attrs); if (!ib_spec->flow_count.counters) return -EINVAL; ib_spec->flow_count.size = @@ -3066,7 +2851,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, kern_filter_sz, ib_spec); } -static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile, +static int kern_spec_to_ib_spec(struct uverbs_attr_bundle *attrs, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -3075,17 +2860,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile, return -EINVAL; if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG) - return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec, + return kern_spec_to_ib_spec_action(attrs, kern_spec, ib_spec, uflow_res); else return kern_spec_to_ib_spec_filter(kern_spec, ib_spec); } -int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_create_wq cmd = {}; + struct ib_uverbs_ex_create_wq cmd; struct ib_uverbs_ex_create_wq_resp resp = {}; struct ib_uwq_object *obj; int err = 0; @@ -3093,43 +2876,27 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, struct ib_pd *pd; struct ib_wq *wq; struct ib_wq_init_attr wq_init_attr = {}; - size_t required_cmd_sz; - size_t required_resp_len; struct ib_device *ib_dev; - required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); - required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + err = uverbs_request(attrs, &cmd, sizeof(cmd)); if (err) return err; if (cmd.comp_mask) return -EOPNOTSUPP; - obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file, + obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, attrs, &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { err = -EINVAL; goto err_uobj; } - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) { err = -EINVAL; goto err_put_pd; @@ -3138,20 +2905,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, wq_init_attr.cq = cq; wq_init_attr.max_sge = cmd.max_sge; wq_init_attr.max_wr = cmd.max_wr; - wq_init_attr.wq_context = file; + wq_init_attr.wq_context = attrs->ufile; wq_init_attr.wq_type = cmd.wq_type; wq_init_attr.event_handler = ib_uverbs_wq_event_handler; - if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) + - sizeof(cmd.create_flags))) - wq_init_attr.create_flags = cmd.create_flags; + wq_init_attr.create_flags = cmd.create_flags; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); - if (!pd->device->create_wq) { - err = -EOPNOTSUPP; - goto err_put_cq; - } - wq = pd->device->create_wq(pd, &wq_init_attr, uhw); + wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata); if (IS_ERR(wq)) { err = PTR_ERR(wq); goto err_put_cq; @@ -3175,15 +2936,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, resp.max_sge = wq_init_attr.max_sge; resp.max_wr = wq_init_attr.max_wr; resp.wqn = wq->wq_num; - resp.response_length = required_resp_len; - err = ib_copy_to_udata(ucore, - &resp, resp.response_length); + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + err = uverbs_response(attrs, &resp, sizeof(resp)); if (err) goto err_copy; uobj_put_obj_read(pd); uobj_put_obj_read(cq); - return uobj_alloc_commit(&obj->uevent.uobject, 0); + return uobj_alloc_commit(&obj->uevent.uobject); err_copy: ib_destroy_wq(wq); @@ -3197,41 +2957,23 @@ err_uobj: return err; } -int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_destroy_wq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_destroy_wq cmd = {}; + struct ib_uverbs_ex_destroy_wq cmd; struct ib_uverbs_ex_destroy_wq_resp resp = {}; struct ib_uobject *uobj; struct ib_uwq_object *obj; - size_t required_cmd_sz; - size_t required_resp_len; int ret; - required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle); - required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; if (cmd.comp_mask) return -EOPNOTSUPP; - resp.response_length = required_resp_len; - uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file); + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -3240,29 +2982,17 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, uobj_put_destroy(uobj); - return ib_copy_to_udata(ucore, &resp, resp.response_length); + return uverbs_response(attrs, &resp, sizeof(resp)); } -int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_modify_wq cmd = {}; + struct ib_uverbs_ex_modify_wq cmd; struct ib_wq *wq; struct ib_wq_attr wq_attr = {}; - size_t required_cmd_sz; int ret; - required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state); - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; @@ -3272,7 +3002,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; - wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs); if (!wq) return -EINVAL; @@ -3282,24 +3012,18 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, wq_attr.flags = cmd.flags; wq_attr.flags_mask = cmd.flags_mask; } - if (!wq->device->modify_wq) { - ret = -EOPNOTSUPP; - goto out; - } - ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); -out: + ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask, + &attrs->driver_udata); uobj_put_obj_read(wq); return ret; } -int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_create_rwq_ind_table cmd = {}; + struct ib_uverbs_ex_create_rwq_ind_table cmd; struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {}; struct ib_uobject *uobj; - int err = 0; + int err; struct ib_rwq_ind_table_init_attr init_attr = {}; struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_wq **wqs = NULL; @@ -3307,27 +3031,13 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, struct ib_wq *wq = NULL; int i, j, num_read_wqs; u32 num_wq_handles; - u32 expected_in_size; - size_t required_cmd_sz_header; - size_t required_resp_len; + struct uverbs_req_iter iter; struct ib_device *ib_dev; - required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); - required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); - - if (ucore->inlen < required_cmd_sz_header) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; - - err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header); + err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); if (err) return err; - ucore->inbuf += required_cmd_sz_header; - ucore->inlen -= required_cmd_sz_header; - if (cmd.comp_mask) return -EOPNOTSUPP; @@ -3335,26 +3045,17 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, return -EINVAL; num_wq_handles = 1 << cmd.log_ind_tbl_size; - expected_in_size = num_wq_handles * sizeof(__u32); - if (num_wq_handles == 1) - /* input size for wq handles is u64 aligned */ - expected_in_size += sizeof(__u32); - - if (ucore->inlen < expected_in_size) - return -EINVAL; - - if (ucore->inlen > expected_in_size && - !ib_is_udata_cleared(ucore, expected_in_size, - ucore->inlen - expected_in_size)) - return -EOPNOTSUPP; - wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles), GFP_KERNEL); if (!wqs_handles) return -ENOMEM; - err = ib_copy_from_udata(wqs_handles, ucore, - num_wq_handles * sizeof(__u32)); + err = uverbs_request_next(&iter, wqs_handles, + num_wq_handles * sizeof(__u32)); + if (err) + goto err_free; + + err = uverbs_request_finish(&iter); if (err) goto err_free; @@ -3367,7 +3068,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (num_read_wqs = 0; num_read_wqs < num_wq_handles; num_read_wqs++) { wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, - wqs_handles[num_read_wqs], file); + wqs_handles[num_read_wqs], attrs); if (!wq) { err = -EINVAL; goto put_wqs; @@ -3376,7 +3077,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, wqs[num_read_wqs] = wq; } - uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto put_wqs; @@ -3385,11 +3086,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; init_attr.ind_tbl = wqs; - if (!ib_dev->create_rwq_ind_table) { - err = -EOPNOTSUPP; - goto err_uobj; - } - rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); + rwq_ind_tbl = ib_dev->ops.create_rwq_ind_table(ib_dev, &init_attr, + &attrs->driver_udata); if (IS_ERR(rwq_ind_tbl)) { err = PTR_ERR(rwq_ind_tbl); @@ -3408,10 +3106,9 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, resp.ind_tbl_handle = uobj->id; resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; - resp.response_length = required_resp_len; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); - err = ib_copy_to_udata(ucore, - &resp, resp.response_length); + err = uverbs_response(attrs, &resp, sizeof(resp)); if (err) goto err_copy; @@ -3420,7 +3117,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (j = 0; j < num_read_wqs; j++) uobj_put_obj_read(wqs[j]); - return uobj_alloc_commit(uobj, 0); + return uobj_alloc_commit(uobj); err_copy: ib_destroy_rwq_ind_table(rwq_ind_tbl); @@ -3435,25 +3132,12 @@ err_free: return err; } -int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; - int ret; - size_t required_cmd_sz; - - required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; + struct ib_uverbs_ex_destroy_rwq_ind_table cmd; + int ret; - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; @@ -3461,12 +3145,10 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, return -EOPNOTSUPP; return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL, - cmd.ind_tbl_handle, file, 0); + cmd.ind_tbl_handle, attrs); } -int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_flow cmd; struct ib_uverbs_create_flow_resp resp; @@ -3477,24 +3159,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_qp *qp; struct ib_uflow_resources *uflow_res; struct ib_uverbs_flow_spec_hdr *kern_spec; - int err = 0; + struct uverbs_req_iter iter; + int err; void *ib_spec; int i; struct ib_device *ib_dev; - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - if (ucore->outlen < sizeof(resp)) - return -ENOSPC; - - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); if (err) return err; - ucore->inbuf += sizeof(cmd); - ucore->inlen -= sizeof(cmd); - if (cmd.comp_mask) return -EINVAL; @@ -3512,8 +3186,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) return -EINVAL; - if (cmd.flow_attr.size > ucore->inlen || - cmd.flow_attr.size > + if (cmd.flow_attr.size > (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec))) return -EINVAL; @@ -3528,21 +3201,25 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, return -ENOMEM; *kern_flow_attr = cmd.flow_attr; - err = ib_copy_from_udata(&kern_flow_attr->flow_specs, ucore, - cmd.flow_attr.size); + err = uverbs_request_next(&iter, &kern_flow_attr->flow_specs, + cmd.flow_attr.size); if (err) goto err_free_attr; } else { kern_flow_attr = &cmd.flow_attr; } - uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev); + err = uverbs_request_finish(&iter); + if (err) + goto err_free_attr; + + uobj = uobj_alloc(UVERBS_OBJECT_FLOW, attrs, &ib_dev); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto err_free_attr; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) { err = -EINVAL; goto err_uobj; @@ -3553,11 +3230,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_put; } - if (!qp->device->create_flow) { - err = -EOPNOTSUPP; - goto err_put; - } - flow_attr = kzalloc(struct_size(flow_attr, flows, cmd.flow_attr.num_of_specs), GFP_KERNEL); if (!flow_attr) { @@ -3584,7 +3256,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, cmd.flow_attr.size >= kern_spec->size; i++) { err = kern_spec_to_ib_spec( - file, (struct ib_uverbs_flow_spec *)kern_spec, + attrs, (struct ib_uverbs_flow_spec *)kern_spec, ib_spec, uflow_res); if (err) goto err_free; @@ -3602,8 +3274,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_free; } - flow_id = qp->device->create_flow(qp, flow_attr, - IB_FLOW_DOMAIN_USER, uhw); + flow_id = qp->device->ops.create_flow( + qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata); if (IS_ERR(flow_id)) { err = PTR_ERR(flow_id); @@ -3615,8 +3287,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, memset(&resp, 0, sizeof(resp)); resp.flow_handle = uobj->id; - err = ib_copy_to_udata(ucore, - &resp, sizeof(resp)); + err = uverbs_response(attrs, &resp, sizeof(resp)); if (err) goto err_copy; @@ -3624,9 +3295,9 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - return uobj_alloc_commit(uobj, 0); + return uobj_alloc_commit(uobj); err_copy: - if (!qp->device->destroy_flow(flow_id)) + if (!qp->device->ops.destroy_flow(flow_id)) atomic_dec(&qp->usecnt); err_free: ib_uverbs_flow_resources_free(uflow_res); @@ -3642,28 +3313,22 @@ err_free_attr: return err; } -int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_destroy_flow(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_flow cmd; int ret; - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; if (cmd.comp_mask) return -EINVAL; - return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file, - 0); + return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, attrs); } -static int __uverbs_create_xsrq(struct ib_uverbs_file *file, +static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) { @@ -3676,7 +3341,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, int ret; struct ib_device *ib_dev; - obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file, + obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs, &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -3686,7 +3351,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (cmd->srq_type == IB_SRQT_XRC) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle, - file); + attrs); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err; @@ -3704,21 +3369,21 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (ib_srq_has_cq(cmd->srq_type)) { attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, - cmd->cq_handle, file); + cmd->cq_handle, attrs); if (!attr.ext.cq) { ret = -EINVAL; goto err_put_xrcd; } } - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_put_cq; } attr.event_handler = ib_uverbs_srq_event_handler; - attr.srq_context = file; + attr.srq_context = attrs->ufile; attr.srq_type = cmd->srq_type; attr.attr.max_wr = cmd->max_wr; attr.attr.max_sge = cmd->max_sge; @@ -3727,7 +3392,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); - srq = pd->device->create_srq(pd, &attr, udata); + srq = pd->device->ops.create_srq(pd, &attr, udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); goto err_put; @@ -3763,11 +3428,9 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (cmd->srq_type == IB_SRQT_XRC) resp.srqn = srq->ext.xrc.srq_num; - if (copy_to_user(u64_to_user_ptr(cmd->response), - &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } if (cmd->srq_type == IB_SRQT_XRC) uobj_put_read(xrcd_uobj); @@ -3776,7 +3439,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, uobj_put_obj_read(attr.ext.cq); uobj_put_obj_read(pd); - return uobj_alloc_commit(&obj->uevent.uobject, 0); + return uobj_alloc_commit(&obj->uevent.uobject); err_copy: ib_destroy_srq(srq); @@ -3799,21 +3462,15 @@ err: return ret; } -ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_xsrq xcmd; - struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; memset(&xcmd, 0, sizeof(xcmd)); xcmd.response = cmd.response; @@ -3824,77 +3481,48 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, xcmd.max_sge = cmd.max_sge; xcmd.srq_limit = cmd.srq_limit; - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); - - ret = __uverbs_create_xsrq(file, &xcmd, &udata); - if (ret) - return ret; - - return in_len; + return __uverbs_create_xsrq(attrs, &xcmd, &attrs->driver_udata); } -ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, int out_len) +static int ib_uverbs_create_xsrq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_xsrq cmd; - struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); - - ret = __uverbs_create_xsrq(file, &cmd, &udata); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; - return in_len; + return __uverbs_create_xsrq(attrs, &cmd, &attrs->driver_udata); } -ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_modify_srq cmd; - struct ib_udata udata; struct ib_srq *srq; struct ib_srq_attr attr; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, - out_len); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); if (!srq) return -EINVAL; attr.max_wr = cmd.max_wr; attr.srq_limit = cmd.srq_limit; - ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); + ret = srq->device->ops.modify_srq(srq, &attr, cmd.attr_mask, + &attrs->driver_udata); uobj_put_obj_read(srq); - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_srq cmd; struct ib_uverbs_query_srq_resp resp; @@ -3902,13 +3530,11 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, struct ib_srq *srq; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); if (!srq) return -EINVAL; @@ -3925,25 +3551,22 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, resp.max_sge = attr.max_sge; resp.srq_limit = attr.srq_limit; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_destroy_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_srq cmd; struct ib_uverbs_destroy_srq_resp resp; struct ib_uobject *uobj; struct ib_uevent_object *obj; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file); + uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -3953,35 +3576,24 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, uobj_put_destroy(uobj); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_query_device_resp resp = { {0} }; + struct ib_uverbs_ex_query_device_resp resp = {}; struct ib_uverbs_ex_query_device cmd; struct ib_device_attr attr = {0}; struct ib_ucontext *ucontext; struct ib_device *ib_dev; int err; - ucontext = ib_uverbs_get_ucontext(file); + ucontext = ib_uverbs_get_ucontext(attrs); if (IS_ERR(ucontext)) return PTR_ERR(ucontext); ib_dev = ucontext->device; - if (!ib_dev->query_device) - return -EOPNOTSUPP; - - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + err = uverbs_request(attrs, &cmd, sizeof(cmd)); if (err) return err; @@ -3991,20 +3603,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, if (cmd.reserved) return -EINVAL; - resp.response_length = offsetof(typeof(resp), odp_caps); - - if (ucore->outlen < resp.response_length) - return -ENOSPC; - - err = ib_dev->query_device(ib_dev, &attr, uhw); + err = ib_dev->ops.query_device(ib_dev, &attr, &attrs->driver_udata); if (err) return err; copy_query_dev_fields(ucontext, &resp.base, &attr); - if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) - goto end; - #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING resp.odp_caps.general_caps = attr.odp_caps.general_caps; resp.odp_caps.per_transport_caps.rc_odp_caps = @@ -4014,99 +3618,39 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.odp_caps.per_transport_caps.ud_odp_caps = attr.odp_caps.per_transport_caps.ud_odp_caps; #endif - resp.response_length += sizeof(resp.odp_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask)) - goto end; resp.timestamp_mask = attr.timestamp_mask; - resp.response_length += sizeof(resp.timestamp_mask); - - if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock)) - goto end; - resp.hca_core_clock = attr.hca_core_clock; - resp.response_length += sizeof(resp.hca_core_clock); - - if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex)) - goto end; - resp.device_cap_flags_ex = attr.device_cap_flags; - resp.response_length += sizeof(resp.device_cap_flags_ex); - - if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps)) - goto end; - resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts; resp.rss_caps.max_rwq_indirection_tables = attr.rss_caps.max_rwq_indirection_tables; resp.rss_caps.max_rwq_indirection_table_size = attr.rss_caps.max_rwq_indirection_table_size; - - resp.response_length += sizeof(resp.rss_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq)) - goto end; - resp.max_wq_type_rq = attr.max_wq_type_rq; - resp.response_length += sizeof(resp.max_wq_type_rq); - - if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps)) - goto end; - resp.raw_packet_caps = attr.raw_packet_caps; - resp.response_length += sizeof(resp.raw_packet_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps)) - goto end; - resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size; resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags; resp.tm_caps.max_ops = attr.tm_caps.max_ops; resp.tm_caps.max_sge = attr.tm_caps.max_sge; resp.tm_caps.flags = attr.tm_caps.flags; - resp.response_length += sizeof(resp.tm_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.cq_moderation_caps)) - goto end; - resp.cq_moderation_caps.max_cq_moderation_count = attr.cq_caps.max_cq_moderation_count; resp.cq_moderation_caps.max_cq_moderation_period = attr.cq_caps.max_cq_moderation_period; - resp.response_length += sizeof(resp.cq_moderation_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.max_dm_size)) - goto end; - resp.max_dm_size = attr.max_dm_size; - resp.response_length += sizeof(resp.max_dm_size); -end: - err = ib_copy_to_udata(ucore, &resp, resp.response_length); - return err; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + + return uverbs_response(attrs, &resp, sizeof(resp)); } -int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_modify_cq cmd = {}; + struct ib_uverbs_ex_modify_cq cmd; struct ib_cq *cq; - size_t required_cmd_sz; int ret; - required_cmd_sz = offsetof(typeof(cmd), reserved) + - sizeof(cmd.reserved); - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - /* sanity checks */ - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; @@ -4116,7 +3660,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, if (cmd.attr_mask > IB_CQ_MODERATE) return -EOPNOTSUPP; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) return -EINVAL; @@ -4126,3 +3670,381 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, return ret; } + +/* + * Describe the input structs for write(). Some write methods have an input + * only struct, most have an input and output. If the struct has an output then + * the 'response' u64 must be the first field in the request structure. + * + * If udata is present then both the request and response structs have a + * trailing driver_data flex array. In this case the size of the base struct + * cannot be changed. + */ +#define offsetof_after(_struct, _member) \ + (offsetof(_struct, _member) + sizeof(((_struct *)NULL)->_member)) + +#define UAPI_DEF_WRITE_IO(req, resp) \ + .write.has_resp = 1 + \ + BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \ + BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \ + sizeof(u64)), \ + .write.req_size = sizeof(req), .write.resp_size = sizeof(resp) + +#define UAPI_DEF_WRITE_I(req) .write.req_size = sizeof(req) + +#define UAPI_DEF_WRITE_UDATA_IO(req, resp) \ + UAPI_DEF_WRITE_IO(req, resp), \ + .write.has_udata = \ + 1 + \ + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \ + sizeof(req)) + \ + BUILD_BUG_ON_ZERO(offsetof(resp, driver_data) != \ + sizeof(resp)) + +#define UAPI_DEF_WRITE_UDATA_I(req) \ + UAPI_DEF_WRITE_I(req), \ + .write.has_udata = \ + 1 + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \ + sizeof(req)) + +/* + * The _EX versions are for use with WRITE_EX and allow the last struct member + * to be specified. Buffers that do not include that member will be rejected. + */ +#define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \ + .write.has_resp = 1, \ + .write.req_size = offsetof_after(req, req_last_member), \ + .write.resp_size = offsetof_after(resp, resp_last_member) + +#define UAPI_DEF_WRITE_I_EX(req, req_last_member) \ + .write.req_size = offsetof_after(req, req_last_member) + +const struct uapi_definition uverbs_def_write_intf[] = { + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_AH, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_AH, + ib_uverbs_create_ah, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_ah, + struct ib_uverbs_create_ah_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_ah)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_AH, + ib_uverbs_destroy_ah, + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah), + UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_COMP_CHANNEL, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, + ib_uverbs_create_comp_channel, + UAPI_DEF_WRITE_IO( + struct ib_uverbs_create_comp_channel, + struct ib_uverbs_create_comp_channel_resp))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_CQ, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_CQ, + ib_uverbs_create_cq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_cq, + struct ib_uverbs_create_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_cq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_CQ, + ib_uverbs_destroy_cq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_cq, + struct ib_uverbs_destroy_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(destroy_cq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POLL_CQ, + ib_uverbs_poll_cq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_poll_cq, + struct ib_uverbs_poll_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(poll_cq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, + ib_uverbs_req_notify_cq, + UAPI_DEF_WRITE_I(struct ib_uverbs_req_notify_cq), + UAPI_DEF_METHOD_NEEDS_FN(req_notify_cq)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_RESIZE_CQ, + ib_uverbs_resize_cq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_resize_cq, + struct ib_uverbs_resize_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(resize_cq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_CQ, + ib_uverbs_ex_create_cq, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_cq, + reserved, + struct ib_uverbs_ex_create_cq_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(create_cq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_MODIFY_CQ, + ib_uverbs_ex_modify_cq, + UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq), + UAPI_DEF_METHOD_NEEDS_FN(create_cq))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_DEVICE, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_GET_CONTEXT, + ib_uverbs_get_context, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_get_context, + struct ib_uverbs_get_context_resp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_DEVICE, + ib_uverbs_query_device, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_device, + struct ib_uverbs_query_device_resp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_PORT, + ib_uverbs_query_port, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_port, + struct ib_uverbs_query_port_resp), + UAPI_DEF_METHOD_NEEDS_FN(query_port)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_QUERY_DEVICE, + ib_uverbs_ex_query_device, + UAPI_DEF_WRITE_IO_EX( + struct ib_uverbs_ex_query_device, + reserved, + struct ib_uverbs_ex_query_device_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(query_device)), + UAPI_DEF_OBJ_NEEDS_FN(alloc_ucontext), + UAPI_DEF_OBJ_NEEDS_FN(dealloc_ucontext)), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_FLOW, + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_FLOW, + ib_uverbs_ex_create_flow, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_create_flow, + flow_attr, + struct ib_uverbs_create_flow_resp, + flow_handle), + UAPI_DEF_METHOD_NEEDS_FN(create_flow)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_DESTROY_FLOW, + ib_uverbs_ex_destroy_flow, + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_flow), + UAPI_DEF_METHOD_NEEDS_FN(destroy_flow))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_MR, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_DEREG_MR, + ib_uverbs_dereg_mr, + UAPI_DEF_WRITE_I(struct ib_uverbs_dereg_mr), + UAPI_DEF_METHOD_NEEDS_FN(dereg_mr)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_REG_MR, + ib_uverbs_reg_mr, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr, + struct ib_uverbs_reg_mr_resp), + UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_REREG_MR, + ib_uverbs_rereg_mr, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr, + struct ib_uverbs_rereg_mr_resp), + UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_MW, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_ALLOC_MW, + ib_uverbs_alloc_mw, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_mw, + struct ib_uverbs_alloc_mw_resp), + UAPI_DEF_METHOD_NEEDS_FN(alloc_mw)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DEALLOC_MW, + ib_uverbs_dealloc_mw, + UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_mw), + UAPI_DEF_METHOD_NEEDS_FN(dealloc_mw))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_PD, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_ALLOC_PD, + ib_uverbs_alloc_pd, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_pd, + struct ib_uverbs_alloc_pd_resp), + UAPI_DEF_METHOD_NEEDS_FN(alloc_pd)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DEALLOC_PD, + ib_uverbs_dealloc_pd, + UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_pd), + UAPI_DEF_METHOD_NEEDS_FN(dealloc_pd))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_QP, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_ATTACH_MCAST, + ib_uverbs_attach_mcast, + UAPI_DEF_WRITE_I(struct ib_uverbs_attach_mcast), + UAPI_DEF_METHOD_NEEDS_FN(attach_mcast), + UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_QP, + ib_uverbs_create_qp, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_qp, + struct ib_uverbs_create_qp_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_qp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_QP, + ib_uverbs_destroy_qp, + UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_qp, + struct ib_uverbs_destroy_qp_resp), + UAPI_DEF_METHOD_NEEDS_FN(destroy_qp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DETACH_MCAST, + ib_uverbs_detach_mcast, + UAPI_DEF_WRITE_I(struct ib_uverbs_detach_mcast), + UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_MODIFY_QP, + ib_uverbs_modify_qp, + UAPI_DEF_WRITE_I(struct ib_uverbs_modify_qp), + UAPI_DEF_METHOD_NEEDS_FN(modify_qp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POST_RECV, + ib_uverbs_post_recv, + UAPI_DEF_WRITE_IO(struct ib_uverbs_post_recv, + struct ib_uverbs_post_recv_resp), + UAPI_DEF_METHOD_NEEDS_FN(post_recv)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POST_SEND, + ib_uverbs_post_send, + UAPI_DEF_WRITE_IO(struct ib_uverbs_post_send, + struct ib_uverbs_post_send_resp), + UAPI_DEF_METHOD_NEEDS_FN(post_send)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_QP, + ib_uverbs_query_qp, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_qp, + struct ib_uverbs_query_qp_resp), + UAPI_DEF_METHOD_NEEDS_FN(query_qp)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_QP, + ib_uverbs_ex_create_qp, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_qp, + comp_mask, + struct ib_uverbs_ex_create_qp_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(create_qp)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_MODIFY_QP, + ib_uverbs_ex_modify_qp, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_modify_qp, + base, + struct ib_uverbs_ex_modify_qp_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(modify_qp))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_RWQ_IND_TBL, + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, + ib_uverbs_ex_create_rwq_ind_table, + UAPI_DEF_WRITE_IO_EX( + struct ib_uverbs_ex_create_rwq_ind_table, + log_ind_tbl_size, + struct ib_uverbs_ex_create_rwq_ind_table_resp, + ind_tbl_num), + UAPI_DEF_METHOD_NEEDS_FN(create_rwq_ind_table)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, + ib_uverbs_ex_destroy_rwq_ind_table, + UAPI_DEF_WRITE_I( + struct ib_uverbs_ex_destroy_rwq_ind_table), + UAPI_DEF_METHOD_NEEDS_FN(destroy_rwq_ind_table))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_WQ, + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_WQ, + ib_uverbs_ex_create_wq, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_wq, + max_sge, + struct ib_uverbs_ex_create_wq_resp, + wqn), + UAPI_DEF_METHOD_NEEDS_FN(create_wq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_DESTROY_WQ, + ib_uverbs_ex_destroy_wq, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_destroy_wq, + wq_handle, + struct ib_uverbs_ex_destroy_wq_resp, + reserved), + UAPI_DEF_METHOD_NEEDS_FN(destroy_wq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_MODIFY_WQ, + ib_uverbs_ex_modify_wq, + UAPI_DEF_WRITE_I_EX(struct ib_uverbs_ex_modify_wq, + curr_wq_state), + UAPI_DEF_METHOD_NEEDS_FN(modify_wq))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_SRQ, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_SRQ, + ib_uverbs_create_srq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_srq, + struct ib_uverbs_create_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_srq)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_XSRQ, + ib_uverbs_create_xsrq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_xsrq, + struct ib_uverbs_create_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_srq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_SRQ, + ib_uverbs_destroy_srq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_srq, + struct ib_uverbs_destroy_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(destroy_srq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_MODIFY_SRQ, + ib_uverbs_modify_srq, + UAPI_DEF_WRITE_UDATA_I(struct ib_uverbs_modify_srq), + UAPI_DEF_METHOD_NEEDS_FN(modify_srq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POST_SRQ_RECV, + ib_uverbs_post_srq_recv, + UAPI_DEF_WRITE_IO(struct ib_uverbs_post_srq_recv, + struct ib_uverbs_post_srq_recv_resp), + UAPI_DEF_METHOD_NEEDS_FN(post_srq_recv)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_SRQ, + ib_uverbs_query_srq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_srq, + struct ib_uverbs_query_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(query_srq))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_XRCD, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_CLOSE_XRCD, + ib_uverbs_close_xrcd, + UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd), + UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP, + ib_uverbs_open_qp, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_open_qp, + struct ib_uverbs_create_qp_resp)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_XRCD, + ib_uverbs_open_xrcd, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_open_xrcd, + struct ib_uverbs_open_xrcd_resp), + UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))), + + {}, +}; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index b0e493e8d860..8c81ff698052 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -404,8 +404,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle, static int ib_uverbs_run_method(struct bundle_priv *pbundle, unsigned int num_attrs) { - int (*handler)(struct ib_uverbs_file *ufile, - struct uverbs_attr_bundle *ctx); + int (*handler)(struct uverbs_attr_bundle *attrs); size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs); unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey; unsigned int i; @@ -436,6 +435,11 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, pbundle->method_elm->key_bitmap_len))) return -EINVAL; + if (pbundle->method_elm->has_udata) + uverbs_fill_udata(&pbundle->bundle, + &pbundle->bundle.driver_udata, + UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT); + if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) { struct uverbs_obj_attr *destroy_attr = &pbundle->bundle.attrs[destroy_bkey].obj_attr; @@ -445,10 +449,10 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, return ret; __clear_bit(destroy_bkey, pbundle->uobj_finalize); - ret = handler(pbundle->bundle.ufile, &pbundle->bundle); + ret = handler(&pbundle->bundle); uobj_put_destroy(destroy_attr->uobject); } else { - ret = handler(pbundle->bundle.ufile, &pbundle->bundle); + ret = handler(&pbundle->bundle); } /* @@ -662,35 +666,37 @@ int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, EXPORT_SYMBOL(uverbs_get_flags32); /* - * This is for ease of conversion. The purpose is to convert all drivers to - * use uverbs_attr_bundle instead of ib_udata. Assume attr == 0 is input and - * attr == 1 is output. + * Fill a ib_udata struct (core or uhw) using the given attribute IDs. + * This is primarily used to convert the UVERBS_ATTR_UHW() into the + * ib_udata format used by the drivers. */ -void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata) +void uverbs_fill_udata(struct uverbs_attr_bundle *bundle, + struct ib_udata *udata, unsigned int attr_in, + unsigned int attr_out) { struct bundle_priv *pbundle = container_of(bundle, struct bundle_priv, bundle); - const struct uverbs_attr *uhw_in = - uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN); - const struct uverbs_attr *uhw_out = - uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT); - - if (!IS_ERR(uhw_in)) { - udata->inlen = uhw_in->ptr_attr.len; - if (uverbs_attr_ptr_is_inline(uhw_in)) + const struct uverbs_attr *in = + uverbs_attr_get(&pbundle->bundle, attr_in); + const struct uverbs_attr *out = + uverbs_attr_get(&pbundle->bundle, attr_out); + + if (!IS_ERR(in)) { + udata->inlen = in->ptr_attr.len; + if (uverbs_attr_ptr_is_inline(in)) udata->inbuf = - &pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx] + &pbundle->user_attrs[in->ptr_attr.uattr_idx] .data; else - udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data); + udata->inbuf = u64_to_user_ptr(in->ptr_attr.data); } else { udata->inbuf = NULL; udata->inlen = 0; } - if (!IS_ERR(uhw_out)) { - udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data); - udata->outlen = uhw_out->ptr_attr.len; + if (!IS_ERR(out)) { + udata->outbuf = u64_to_user_ptr(out->ptr_attr.data); + udata->outlen = out->ptr_attr.len; } else { udata->outbuf = NULL; udata->outlen = 0; @@ -745,3 +751,14 @@ int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, return 0; } EXPORT_SYMBOL(_uverbs_get_const); + +int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, + size_t idx, const void *from, size_t size) +{ + const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + + if (clear_user(u64_to_user_ptr(attr->ptr_attr.data), + attr->ptr_attr.len)) + return -EFAULT; + return uverbs_copy_to(bundle, idx, from, size); +} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 6d373f5515b7..9f9172eb1512 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -74,64 +74,6 @@ static dev_t dynamic_uverbs_dev; static struct class *uverbs_class; static DEFINE_IDA(uverbs_ida); - -static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) = { - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, - [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, - [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, - [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, - [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, - [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, - [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, - [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, - [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, - [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, - [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, - [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, - [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, - [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, -}; - -static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) = { - [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, - [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, - [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, - [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, - [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, - [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, - [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, - [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, - [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, - [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, - [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, - [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq, -}; - static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); @@ -139,7 +81,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); * Must be called with the ufile->device->disassociate_srcu held, and the lock * must be held until use of the ucontext is finished. */ -struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile) +struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile) { /* * We do not hold the hw_destroy_rwsem lock for this flow, instead @@ -157,14 +99,14 @@ struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile) return ucontext; } -EXPORT_SYMBOL(ib_uverbs_get_ucontext); +EXPORT_SYMBOL(ib_uverbs_get_ucontext_file); int uverbs_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd = mw->pd; int ret; - ret = mw->device->dealloc_mw(mw); + ret = mw->device->ops.dealloc_mw(mw); if (!ret) atomic_dec(&pd->usecnt); return ret; @@ -255,7 +197,7 @@ void ib_uverbs_release_file(struct kref *ref) srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); - if (ib_dev && !ib_dev->disassociate_ucontext) + if (ib_dev && !ib_dev->ops.disassociate_ucontext) module_put(ib_dev->owner); srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); @@ -646,51 +588,19 @@ err_put_refs: return filp; } -static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command, - bool extended) -{ - if (!extended) - return ufile->uverbs_cmd_mask & BIT_ULL(command); - - return ufile->uverbs_ex_cmd_mask & BIT_ULL(command); -} - -static bool verify_command_idx(u32 command, bool extended) -{ - if (extended) - return command < ARRAY_SIZE(uverbs_ex_cmd_table) && - uverbs_ex_cmd_table[command]; - - return command < ARRAY_SIZE(uverbs_cmd_table) && - uverbs_cmd_table[command]; -} - -static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr, - u32 *command, bool *extended) -{ - if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED | - IB_USER_VERBS_CMD_COMMAND_MASK)) - return -EINVAL; - - *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK; - *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED; - - if (!verify_command_idx(*command, *extended)) - return -EOPNOTSUPP; - - return 0; -} - static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, - struct ib_uverbs_ex_cmd_hdr *ex_hdr, - size_t count, bool extended) + struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count, + const struct uverbs_api_write_method *method_elm) { - if (extended) { + if (method_elm->is_ex) { count -= sizeof(*hdr) + sizeof(*ex_hdr); if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count) return -EINVAL; + if (hdr->in_words * 8 < method_elm->req_size) + return -ENOSPC; + if (ex_hdr->cmd_hdr_reserved) return -EINVAL; @@ -698,6 +608,9 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, if (!hdr->out_words && !ex_hdr->provider_out_words) return -EINVAL; + if (hdr->out_words * 8 < method_elm->resp_size) + return -ENOSPC; + if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(ex_hdr->response), (hdr->out_words + ex_hdr->provider_out_words) * 8)) @@ -714,6 +627,24 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, if (hdr->in_words * 4 != count) return -EINVAL; + if (count < method_elm->req_size + sizeof(hdr)) { + /* + * rdma-core v18 and v19 have a bug where they send DESTROY_CQ + * with a 16 byte write instead of 24. Old kernels didn't + * check the size so they allowed this. Now that the size is + * checked provide a compatibility work around to not break + * those userspaces. + */ + if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ && + count == 16) { + hdr->in_words = 6; + return 0; + } + return -ENOSPC; + } + if (hdr->out_words * 4 < method_elm->resp_size) + return -ENOSPC; + return 0; } @@ -721,11 +652,12 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; + const struct uverbs_api_write_method *method_elm; + struct uverbs_api *uapi = file->device->uapi; struct ib_uverbs_ex_cmd_hdr ex_hdr; struct ib_uverbs_cmd_hdr hdr; - bool extended; + struct uverbs_attr_bundle bundle; int srcu_key; - u32 command; ssize_t ret; if (!ib_safe_file_access(filp)) { @@ -740,57 +672,92 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - ret = process_hdr(&hdr, &command, &extended); - if (ret) - return ret; + method_elm = uapi_get_method(uapi, hdr.command); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); - if (extended) { + if (method_elm->is_ex) { if (count < (sizeof(hdr) + sizeof(ex_hdr))) return -EINVAL; if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) return -EFAULT; } - ret = verify_hdr(&hdr, &ex_hdr, count, extended); + ret = verify_hdr(&hdr, &ex_hdr, count, method_elm); if (ret) return ret; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - if (!verify_command_mask(file, command, extended)) { - ret = -EOPNOTSUPP; - goto out; - } - buf += sizeof(hdr); - if (!extended) { - ret = uverbs_cmd_table[command](file, buf, - hdr.in_words * 4, - hdr.out_words * 4); - } else { - struct ib_udata ucore; - struct ib_udata uhw; + bundle.ufile = file; + if (!method_elm->is_ex) { + size_t in_len = hdr.in_words * 4 - sizeof(hdr); + size_t out_len = hdr.out_words * 4; + u64 response = 0; + + if (method_elm->has_udata) { + bundle.driver_udata.inlen = + in_len - method_elm->req_size; + in_len = method_elm->req_size; + if (bundle.driver_udata.inlen) + bundle.driver_udata.inbuf = buf + in_len; + else + bundle.driver_udata.inbuf = NULL; + } else { + memset(&bundle.driver_udata, 0, + sizeof(bundle.driver_udata)); + } + + if (method_elm->has_resp) { + /* + * The macros check that if has_resp is set + * then the command request structure starts + * with a '__aligned u64 response' member. + */ + ret = get_user(response, (const u64 *)buf); + if (ret) + goto out_unlock; + + if (method_elm->has_udata) { + bundle.driver_udata.outlen = + out_len - method_elm->resp_size; + out_len = method_elm->resp_size; + if (bundle.driver_udata.outlen) + bundle.driver_udata.outbuf = + u64_to_user_ptr(response + + out_len); + else + bundle.driver_udata.outbuf = NULL; + } + } else { + bundle.driver_udata.outlen = 0; + bundle.driver_udata.outbuf = NULL; + } + ib_uverbs_init_udata_buf_or_null( + &bundle.ucore, buf, u64_to_user_ptr(response), + in_len, out_len); + } else { buf += sizeof(ex_hdr); - ib_uverbs_init_udata_buf_or_null(&ucore, buf, + ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf, u64_to_user_ptr(ex_hdr.response), hdr.in_words * 8, hdr.out_words * 8); - ib_uverbs_init_udata_buf_or_null(&uhw, - buf + ucore.inlen, - u64_to_user_ptr(ex_hdr.response) + ucore.outlen, - ex_hdr.provider_in_words * 8, - ex_hdr.provider_out_words * 8); + ib_uverbs_init_udata_buf_or_null( + &bundle.driver_udata, buf + bundle.ucore.inlen, + u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen, + ex_hdr.provider_in_words * 8, + ex_hdr.provider_out_words * 8); - ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw); - ret = (ret) ? : count; } -out: + ret = method_elm->handler(&bundle); +out_unlock: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); - return ret; + return (ret) ? : count; } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) @@ -801,13 +768,13 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ucontext = ib_uverbs_get_ucontext(file); + ucontext = ib_uverbs_get_ucontext_file(file); if (IS_ERR(ucontext)) { ret = PTR_ERR(ucontext); goto out; } - ret = ucontext->device->mmap(ucontext, vma); + ret = ucontext->device->ops.mmap(ucontext, vma); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; @@ -1069,7 +1036,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) /* In case IB device supports disassociate ucontext, there is no hard * dependency between uverbs device and its low level device. */ - module_dependent = !(ib_dev->disassociate_ucontext); + module_dependent = !(ib_dev->ops.disassociate_ucontext); if (module_dependent) { if (!try_module_get(ib_dev->owner)) { @@ -1102,9 +1069,6 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); - file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask; - file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask; - setup_ufile_idr_uobject(file); return nonseekable_open(inode, filp); @@ -1224,7 +1188,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device, { struct uverbs_api *uapi; - uapi = uverbs_alloc_api(device->driver_specs, device->driver_id); + uapi = uverbs_alloc_api(device); if (IS_ERR(uapi)) return PTR_ERR(uapi); @@ -1239,7 +1203,7 @@ static void ib_uverbs_add_one(struct ib_device *device) struct ib_uverbs_device *uverbs_dev; int ret; - if (!device->alloc_ucontext) + if (!device->ops.alloc_ucontext) return; uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); @@ -1285,7 +1249,7 @@ static void ib_uverbs_add_one(struct ib_device *device) dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum); cdev_init(&uverbs_dev->cdev, - device->mmap ? &uverbs_mmap_fops : &uverbs_fops); + device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops); uverbs_dev->cdev.owner = THIS_MODULE; ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev); @@ -1373,7 +1337,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); ida_free(&uverbs_ida, uverbs_dev->devnum); - if (device->disassociate_ucontext) { + if (device->ops.disassociate_ucontext) { /* We disassociate HW resources and immediately return. * Userspace will see a EIO errno for all future access. * Upon returning, ib_device may be freed internally and is not diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 203cc96ac6f5..cbc72312eb41 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -42,7 +42,8 @@ static int uverbs_free_ah(struct ib_uobject *uobject, enum rdma_remove_reason why) { - return rdma_destroy_ah((struct ib_ah *)uobject->object); + return rdma_destroy_ah((struct ib_ah *)uobject->object, + RDMA_DESTROY_AH_SLEEPABLE); } static int uverbs_free_flow(struct ib_uobject *uobject, @@ -54,7 +55,7 @@ static int uverbs_free_flow(struct ib_uobject *uobject, struct ib_qp *qp = flow->qp; int ret; - ret = flow->device->destroy_flow(flow); + ret = flow->device->ops.destroy_flow(flow); if (!ret) { if (qp) atomic_dec(&qp->usecnt); @@ -210,8 +211,7 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj, return 0; }; -int uverbs_destroy_def_handler(struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) { return 0; } @@ -229,58 +229,106 @@ DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_QP, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp)); +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_MW_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MW_HANDLE, + UVERBS_OBJECT_MW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW, - UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw), + &UVERBS_METHOD(UVERBS_METHOD_MW_DESTROY)); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_SRQ, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), uverbs_free_srq)); +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_AH_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_AH_HANDLE, + UVERBS_OBJECT_AH, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH, - UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah), + &UVERBS_METHOD(UVERBS_METHOD_AH_DESTROY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_FLOW_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_FLOW, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), - uverbs_free_flow)); + uverbs_free_flow), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_DESTROY)); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_WQ, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq)); +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_RWQ_IND_TBL_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE, + UVERBS_OBJECT_RWQ_IND_TBL, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL, - UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl), + &UVERBS_METHOD(UVERBS_METHOD_RWQ_IND_TBL_DESTROY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_XRCD_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_XRCD_HANDLE, + UVERBS_OBJECT_XRCD, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_XRCD, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), - uverbs_free_xrcd)); + uverbs_free_xrcd), + &UVERBS_METHOD(UVERBS_METHOD_XRCD_DESTROY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_PD_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD, - UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd)); - -DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE); - -DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, - &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), - &UVERBS_OBJECT(UVERBS_OBJECT_PD), - &UVERBS_OBJECT(UVERBS_OBJECT_MR), - &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), - &UVERBS_OBJECT(UVERBS_OBJECT_CQ), - &UVERBS_OBJECT(UVERBS_OBJECT_QP), - &UVERBS_OBJECT(UVERBS_OBJECT_AH), - &UVERBS_OBJECT(UVERBS_OBJECT_MW), - &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), - &UVERBS_OBJECT(UVERBS_OBJECT_WQ), - &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), - &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), - &UVERBS_OBJECT(UVERBS_OBJECT_DM), - &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); - -const struct uverbs_object_tree_def *uverbs_default_get_objects(void) -{ - return &uverbs_default_objects; -} + UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd), + &UVERBS_METHOD(UVERBS_METHOD_PD_DESTROY)); + +const struct uapi_definition uverbs_def_obj_intf[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_PD, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP, + UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH, + UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ, + UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW, + UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ, + UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + UVERBS_OBJECT_RWQ_IND_TBL, + UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_XRCD, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index a0ffdcf9a51c..309c5e80988d 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -44,11 +44,11 @@ static int uverbs_free_counters(struct ib_uobject *uobject, if (ret) return ret; - return counters->device->destroy_counters(counters); + return counters->device->ops.destroy_counters(counters); } static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); @@ -61,10 +61,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( * have the ability to remove methods from parse tree once * such condition is met. */ - if (!ib_dev->create_counters) + if (!ib_dev->ops.create_counters) return -EOPNOTSUPP; - counters = ib_dev->create_counters(ib_dev, attrs); + counters = ib_dev->ops.create_counters(ib_dev, attrs); if (IS_ERR(counters)) { ret = PTR_ERR(counters); goto err_create_counters; @@ -82,7 +82,7 @@ err_create_counters: } static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_counters_read_attr read_attr = {}; const struct uverbs_attr *uattr; @@ -90,7 +90,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE); int ret; - if (!counters->device->read_counters) + if (!counters->device->ops.read_counters) return -EOPNOTSUPP; if (!atomic_read(&counters->usecnt)) @@ -109,7 +109,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( if (IS_ERR(read_attr.counters_buff)) return PTR_ERR(read_attr.counters_buff); - ret = counters->device->read_counters(counters, &read_attr, attrs); + ret = counters->device->ops.read_counters(counters, &read_attr, attrs); if (ret) return ret; @@ -149,3 +149,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ)); + +const struct uapi_definition uverbs_def_obj_counters[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COUNTERS, + UAPI_DEF_OBJ_NEEDS_FN(destroy_counters)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 5b5f2052cd52..a59ea89e3f2b 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -58,13 +58,12 @@ static int uverbs_free_cq(struct ib_uobject *uobject, } static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_ucq_object *obj = container_of( uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), typeof(*obj), uobject); struct ib_device *ib_dev = obj->uobject.context->device; - struct ib_udata uhw; int ret; u64 user_handle; struct ib_cq_init_attr attr = {}; @@ -72,7 +71,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( struct ib_uverbs_completion_event_file *ev_file = NULL; struct ib_uobject *ev_file_uobj; - if (!ib_dev->create_cq || !ib_dev->destroy_cq) + if (!ib_dev->ops.create_cq || !ib_dev->ops.destroy_cq) return -EOPNOTSUPP; ret = uverbs_copy_from(&attr.comp_vector, attrs, @@ -101,7 +100,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( uverbs_uobject_get(ev_file_uobj); } - if (attr.comp_vector >= file->device->num_comp_vectors) { + if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) { ret = -EINVAL; goto err_event_file; } @@ -111,10 +110,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); - /* Temporary, only until drivers get the new uverbs_attr_bundle */ - create_udata(attrs, &uhw); - - cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw); + cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context, + &attrs->driver_udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_event_file; @@ -129,7 +126,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( obj->uobject.user_handle = user_handle; atomic_set(&cq->usecnt, 0); cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_add(&cq->res); + rdma_restrack_uadd(&cq->res); ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, sizeof(cq->cqe)); @@ -173,7 +170,7 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_UHW()); static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE); @@ -207,3 +204,9 @@ DECLARE_UVERBS_NAMED_OBJECT( &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) #endif ); + +const struct uapi_definition uverbs_def_obj_cq[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_CQ, + UAPI_DEF_OBJ_NEEDS_FN(destroy_cq)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c new file mode 100644 index 000000000000..5030ec480370 --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/uverbs_std_types.h> +#include "rdma_core.h" +#include "uverbs.h" +#include <rdma/uverbs_ioctl.h> +#include <rdma/opa_addr.h> + +/* + * This ioctl method allows calling any defined write or write_ex + * handler. This essentially replaces the hdr/ex_hdr system with the ioctl + * marshalling, and brings the non-ex path into the same marshalling as the ex + * path. + */ +static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)( + struct uverbs_attr_bundle *attrs) +{ + struct uverbs_api *uapi = attrs->ufile->device->uapi; + const struct uverbs_api_write_method *method_elm; + u32 cmd; + int rc; + + rc = uverbs_get_const(&cmd, attrs, UVERBS_ATTR_WRITE_CMD); + if (rc) + return rc; + + method_elm = uapi_get_method(uapi, cmd); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + + uverbs_fill_udata(attrs, &attrs->ucore, UVERBS_ATTR_CORE_IN, + UVERBS_ATTR_CORE_OUT); + + if (attrs->ucore.inlen < method_elm->req_size || + attrs->ucore.outlen < method_elm->resp_size) + return -ENOSPC; + + return method_elm->handler(attrs); +} + +DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE, + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_WRITE_CMD, + enum ib_uverbs_write_cmds, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CORE_IN, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), + UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CORE_OUT, + UVERBS_ATTR_MIN_SIZE(0), + UA_OPTIONAL), + UVERBS_ATTR_UHW()); + +static uint32_t * +gather_objects_handle(struct ib_uverbs_file *ufile, + const struct uverbs_api_object *uapi_object, + struct uverbs_attr_bundle *attrs, + ssize_t out_len, + u64 *total) +{ + u64 max_count = out_len / sizeof(u32); + struct ib_uobject *obj; + u64 count = 0; + u32 *handles; + + /* Allocated memory that cannot page out where we gather + * all object ids under a spin_lock. + */ + handles = uverbs_zalloc(attrs, out_len); + if (IS_ERR(handles)) + return handles; + + spin_lock_irq(&ufile->uobjects_lock); + list_for_each_entry(obj, &ufile->uobjects, list) { + u32 obj_id = obj->id; + + if (obj->uapi_object != uapi_object) + continue; + + if (count >= max_count) + break; + + handles[count] = obj_id; + count++; + } + spin_unlock_irq(&ufile->uobjects_lock); + + *total = count; + return handles; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)( + struct uverbs_attr_bundle *attrs) +{ + const struct uverbs_api_object *uapi_object; + ssize_t out_len; + u64 total = 0; + u16 object_id; + u32 *handles; + int ret; + + out_len = uverbs_attr_get_len(attrs, UVERBS_ATTR_INFO_HANDLES_LIST); + if (out_len <= 0 || (out_len % sizeof(u32) != 0)) + return -EINVAL; + + ret = uverbs_get_const(&object_id, attrs, UVERBS_ATTR_INFO_OBJECT_ID); + if (ret) + return ret; + + uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id); + if (!uapi_object) + return -EINVAL; + + handles = gather_objects_handle(attrs->ufile, uapi_object, attrs, + out_len, &total); + if (IS_ERR(handles)) + return PTR_ERR(handles); + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_HANDLES_LIST, handles, + sizeof(u32) * total); + if (ret) + goto err; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_TOTAL_HANDLES, &total, + sizeof(total)); +err: + return ret; +} + +void copy_port_attr_to_resp(struct ib_port_attr *attr, + struct ib_uverbs_query_port_resp *resp, + struct ib_device *ib_dev, u8 port_num) +{ + resp->state = attr->state; + resp->max_mtu = attr->max_mtu; + resp->active_mtu = attr->active_mtu; + resp->gid_tbl_len = attr->gid_tbl_len; + resp->port_cap_flags = make_port_cap_flags(attr); + resp->max_msg_sz = attr->max_msg_sz; + resp->bad_pkey_cntr = attr->bad_pkey_cntr; + resp->qkey_viol_cntr = attr->qkey_viol_cntr; + resp->pkey_tbl_len = attr->pkey_tbl_len; + + if (rdma_is_grh_required(ib_dev, port_num)) + resp->flags |= IB_UVERBS_QPF_GRH_REQUIRED; + + if (rdma_cap_opa_ah(ib_dev, port_num)) { + resp->lid = OPA_TO_IB_UCAST_LID(attr->lid); + resp->sm_lid = OPA_TO_IB_UCAST_LID(attr->sm_lid); + } else { + resp->lid = ib_lid_cpu16(attr->lid); + resp->sm_lid = ib_lid_cpu16(attr->sm_lid); + } + + resp->lmc = attr->lmc; + resp->max_vl_num = attr->max_vl_num; + resp->sm_sl = attr->sm_sl; + resp->subnet_timeout = attr->subnet_timeout; + resp->init_type_reply = attr->init_type_reply; + resp->active_width = attr->active_width; + resp->active_speed = attr->active_speed; + resp->phys_state = attr->phys_state; + resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_device *ib_dev = attrs->ufile->device->ib_dev; + struct ib_port_attr attr = {}; + struct ib_uverbs_query_port_resp_ex resp = {}; + int ret; + u8 port_num; + + /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */ + if (!ib_dev->ops.query_port) + return -EOPNOTSUPP; + + ret = uverbs_get_const(&port_num, attrs, + UVERBS_ATTR_QUERY_PORT_PORT_NUM); + if (ret) + return ret; + + ret = ib_query_port(ib_dev, port_num, &attr); + if (ret) + return ret; + + copy_port_attr_to_resp(&attr, &resp.legacy_resp, ib_dev, port_num); + resp.port_cap_flags2 = attr.port_cap_flags2; + + return uverbs_copy_to_struct_or_zero(attrs, UVERBS_ATTR_QUERY_PORT_RESP, + &resp, sizeof(resp)); +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_INFO_HANDLES, + /* Also includes any device specific object ids */ + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_INFO_OBJECT_ID, + enum uverbs_default_objects, UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_TOTAL_HANDLES, + UVERBS_ATTR_TYPE(u32), UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_HANDLES_LIST, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_QUERY_PORT, + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_PORT_PORT_NUM, u8, UA_MANDATORY), + UVERBS_ATTR_PTR_OUT( + UVERBS_ATTR_QUERY_PORT_RESP, + UVERBS_ATTR_STRUCT(struct ib_uverbs_query_port_resp_ex, + reserved), + UA_MANDATORY)); + +DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE, + &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE), + &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES), + &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT)); + +const struct uapi_definition uverbs_def_obj_device[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE), + {}, +}; diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index edc3ff7733d4..2ef70637bee1 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -43,12 +43,11 @@ static int uverbs_free_dm(struct ib_uobject *uobject, if (ret) return ret; - return dm->device->dealloc_dm(dm); + return dm->device->ops.dealloc_dm(dm); } -static int -UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)( + struct uverbs_attr_bundle *attrs) { struct ib_dm_alloc_attr attr = {}; struct ib_uobject *uobj = @@ -58,7 +57,7 @@ UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file, struct ib_dm *dm; int ret; - if (!ib_dev->alloc_dm) + if (!ib_dev->ops.alloc_dm) return -EOPNOTSUPP; ret = uverbs_copy_from(&attr.length, attrs, @@ -71,7 +70,7 @@ UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file, if (ret) return ret; - dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs); + dm = ib_dev->ops.alloc_dm(ib_dev, uobj->context, &attr, attrs); if (IS_ERR(dm)) return PTR_ERR(dm); @@ -109,3 +108,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM, UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm), &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC), &UVERBS_METHOD(UVERBS_METHOD_DM_FREE)); + +const struct uapi_definition uverbs_def_obj_dm[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_dm)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index cb9486ad5c67..4962b87fa600 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -43,7 +43,7 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, if (ret) return ret; - return action->device->destroy_flow_action(action); + return action->device->ops.destroy_flow_action(action); } static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs, @@ -223,7 +223,6 @@ struct ib_flow_action_esp_attr { #define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW static int parse_flow_action_esp(struct ib_device *ib_dev, - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs, struct ib_flow_action_esp_attr *esp_attr, bool is_modify) @@ -305,7 +304,7 @@ static int parse_flow_action_esp(struct ib_device *ib_dev, } static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); @@ -314,15 +313,16 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( struct ib_flow_action *action; struct ib_flow_action_esp_attr esp_attr = {}; - if (!ib_dev->create_flow_action_esp) + if (!ib_dev->ops.create_flow_action_esp) return -EOPNOTSUPP; - ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, false); + ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false); if (ret) return ret; /* No need to check as this attribute is marked as MANDATORY */ - action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs); + action = ib_dev->ops.create_flow_action_esp(ib_dev, &esp_attr.hdr, + attrs); if (IS_ERR(action)) return PTR_ERR(action); @@ -333,7 +333,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( } static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); @@ -341,19 +341,19 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)( int ret; struct ib_flow_action_esp_attr esp_attr = {}; - if (!action->device->modify_flow_action_esp) + if (!action->device->ops.modify_flow_action_esp) return -EOPNOTSUPP; - ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr, - true); + ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true); if (ret) return ret; if (action->type != IB_FLOW_ACTION_ESP) return -EINVAL; - return action->device->modify_flow_action_esp(action, &esp_attr.hdr, - attrs); + return action->device->ops.modify_flow_action_esp(action, + &esp_attr.hdr, + attrs); } static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { @@ -438,3 +438,10 @@ DECLARE_UVERBS_NAMED_OBJECT( &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); + +const struct uapi_definition uverbs_def_obj_flow_action[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + UVERBS_OBJECT_FLOW_ACTION, + UAPI_DEF_OBJ_NEEDS_FN(destroy_flow_action)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index cf02e774303e..4d4be0c2b752 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -39,8 +39,44 @@ static int uverbs_free_mr(struct ib_uobject *uobject, return ib_dereg_mr((struct ib_mr *)uobject->object); } +static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_ADVISE_MR_PD_HANDLE); + enum ib_uverbs_advise_mr_advice advice; + struct ib_device *ib_dev = pd->device; + struct ib_sge *sg_list; + int num_sge; + u32 flags; + int ret; + + /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */ + if (!ib_dev->ops.advise_mr) + return -EOPNOTSUPP; + + ret = uverbs_get_const(&advice, attrs, UVERBS_ATTR_ADVISE_MR_ADVICE); + if (ret) + return ret; + + ret = uverbs_get_flags32(&flags, attrs, UVERBS_ATTR_ADVISE_MR_FLAGS, + IB_UVERBS_ADVISE_MR_FLAG_FLUSH); + if (ret) + return ret; + + num_sge = uverbs_attr_ptr_get_array_size( + attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge)); + if (num_sge < 0) + return num_sge; + + sg_list = uverbs_attr_get_alloced_ptr(attrs, + UVERBS_ATTR_ADVISE_MR_SGE_LIST); + return ib_dev->ops.advise_mr(pd, advice, flags, sg_list, num_sge, + attrs); +} + static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_dm_mr_attr attr = {}; struct ib_uobject *uobj = @@ -54,7 +90,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( struct ib_mr *mr; int ret; - if (!ib_dev->reg_dm_mr) + if (!ib_dev->ops.reg_dm_mr) return -EOPNOTSUPP; ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET); @@ -83,7 +119,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( attr.length > dm->length - attr.offset) return -EINVAL; - mr = pd->device->reg_dm_mr(pd, dm, &attr, attrs); + mr = pd->device->ops.reg_dm_mr(pd, dm, &attr, attrs); if (IS_ERR(mr)) return PTR_ERR(mr); @@ -115,6 +151,23 @@ err_dereg: } DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_ADVISE_MR, + UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_ADVISE_MR_ADVICE, + enum ib_uverbs_advise_mr_advice, + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_ADVISE_MR_FLAGS, + enum ib_uverbs_advise_mr_flag, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ADVISE_MR_SGE_LIST, + UVERBS_ATTR_MIN_SIZE(sizeof(struct ib_uverbs_sge)), + UA_MANDATORY, + UA_ALLOC_AND_COPY)); + +DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_DM_MR_REG, UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR, @@ -143,7 +196,22 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_MR_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_MR, UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), - &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); + &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG), + &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR)); + +const struct uapi_definition uverbs_def_obj_mr[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR, + UAPI_DEF_OBJ_NEEDS_FN(dereg_mr)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 86f3fc5e04b4..9ae08e4b78a3 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -8,6 +8,11 @@ #include "rdma_core.h" #include "uverbs.h" +static int ib_uverbs_notsupp(struct uverbs_attr_bundle *attrs) +{ + return -EOPNOTSUPP; +} + static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size) { void *elm; @@ -26,6 +31,70 @@ static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size) return elm; } +static void *uapi_add_get_elm(struct uverbs_api *uapi, u32 key, + size_t alloc_size, bool *exists) +{ + void *elm; + + elm = uapi_add_elm(uapi, key, alloc_size); + if (!IS_ERR(elm)) { + *exists = false; + return elm; + } + + if (elm != ERR_PTR(-EEXIST)) + return elm; + + elm = radix_tree_lookup(&uapi->radix, key); + if (WARN_ON(!elm)) + return ERR_PTR(-EINVAL); + *exists = true; + return elm; +} + +static int uapi_create_write(struct uverbs_api *uapi, + struct ib_device *ibdev, + const struct uapi_definition *def, + u32 obj_key, + u32 *cur_method_key) +{ + struct uverbs_api_write_method *method_elm; + u32 method_key = obj_key; + bool exists; + + if (def->write.is_ex) + method_key |= uapi_key_write_ex_method(def->write.command_num); + else + method_key |= uapi_key_write_method(def->write.command_num); + + method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm), + &exists); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + + if (WARN_ON(exists && (def->write.is_ex != method_elm->is_ex))) + return -EINVAL; + + method_elm->is_ex = def->write.is_ex; + method_elm->handler = def->func_write; + if (def->write.is_ex) + method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask & + BIT_ULL(def->write.command_num)); + else + method_elm->disabled = !(ibdev->uverbs_cmd_mask & + BIT_ULL(def->write.command_num)); + + if (!def->write.is_ex && def->func_write) { + method_elm->has_udata = def->write.has_udata; + method_elm->has_resp = def->write.has_resp; + method_elm->req_size = def->write.req_size; + method_elm->resp_size = def->write.resp_size; + } + + *cur_method_key = method_key; + return 0; +} + static int uapi_merge_method(struct uverbs_api *uapi, struct uverbs_api_object *obj_elm, u32 obj_key, const struct uverbs_method_def *method, @@ -34,23 +103,21 @@ static int uapi_merge_method(struct uverbs_api *uapi, u32 method_key = obj_key | uapi_key_ioctl_method(method->id); struct uverbs_api_ioctl_method *method_elm; unsigned int i; + bool exists; if (!method->attrs) return 0; - method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm)); - if (IS_ERR(method_elm)) { - if (method_elm != ERR_PTR(-EEXIST)) - return PTR_ERR(method_elm); - + method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm), + &exists); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + if (exists) { /* * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE */ if (WARN_ON(method->handler)) return -EINVAL; - method_elm = radix_tree_lookup(&uapi->radix, method_key); - if (WARN_ON(!method_elm)) - return -EINVAL; } else { WARN_ON(!method->handler); rcu_assign_pointer(method_elm->handler, method->handler); @@ -98,72 +165,183 @@ static int uapi_merge_method(struct uverbs_api *uapi, return 0; } -static int uapi_merge_tree(struct uverbs_api *uapi, - const struct uverbs_object_tree_def *tree, - bool is_driver) +static int uapi_merge_obj_tree(struct uverbs_api *uapi, + const struct uverbs_object_def *obj, + bool is_driver) { - unsigned int i, j; + struct uverbs_api_object *obj_elm; + unsigned int i; + u32 obj_key; + bool exists; int rc; - if (!tree->objects) + obj_key = uapi_key_obj(obj->id); + obj_elm = uapi_add_get_elm(uapi, obj_key, sizeof(*obj_elm), &exists); + if (IS_ERR(obj_elm)) + return PTR_ERR(obj_elm); + + if (obj->type_attrs) { + if (WARN_ON(obj_elm->type_attrs)) + return -EINVAL; + + obj_elm->id = obj->id; + obj_elm->type_attrs = obj->type_attrs; + obj_elm->type_class = obj->type_attrs->type_class; + /* + * Today drivers are only permitted to use idr_class + * types. They cannot use FD types because we currently have + * no way to revoke the fops pointer after device + * disassociation. + */ + if (WARN_ON(is_driver && + obj->type_attrs->type_class != &uverbs_idr_class)) + return -EINVAL; + } + + if (!obj->methods) return 0; - for (i = 0; i != tree->num_objects; i++) { - const struct uverbs_object_def *obj = (*tree->objects)[i]; - struct uverbs_api_object *obj_elm; - u32 obj_key; + for (i = 0; i != obj->num_methods; i++) { + const struct uverbs_method_def *method = (*obj->methods)[i]; - if (!obj) + if (!method) continue; - obj_key = uapi_key_obj(obj->id); - obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm)); - if (IS_ERR(obj_elm)) { - if (obj_elm != ERR_PTR(-EEXIST)) - return PTR_ERR(obj_elm); + rc = uapi_merge_method(uapi, obj_elm, obj_key, method, + is_driver); + if (rc) + return rc; + } - /* This occurs when a driver uses ADD_UVERBS_METHODS */ - if (WARN_ON(obj->type_attrs)) - return -EINVAL; - obj_elm = radix_tree_lookup(&uapi->radix, obj_key); - if (WARN_ON(!obj_elm)) + return 0; +} + +static int uapi_disable_elm(struct uverbs_api *uapi, + const struct uapi_definition *def, + u32 obj_key, + u32 method_key) +{ + bool exists; + + if (def->scope == UAPI_SCOPE_OBJECT) { + struct uverbs_api_object *obj_elm; + + obj_elm = uapi_add_get_elm( + uapi, obj_key, sizeof(*obj_elm), &exists); + if (IS_ERR(obj_elm)) + return PTR_ERR(obj_elm); + obj_elm->disabled = 1; + return 0; + } + + if (def->scope == UAPI_SCOPE_METHOD && + uapi_key_is_ioctl_method(method_key)) { + struct uverbs_api_ioctl_method *method_elm; + + method_elm = uapi_add_get_elm(uapi, method_key, + sizeof(*method_elm), &exists); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + method_elm->disabled = 1; + return 0; + } + + if (def->scope == UAPI_SCOPE_METHOD && + (uapi_key_is_write_method(method_key) || + uapi_key_is_write_ex_method(method_key))) { + struct uverbs_api_write_method *write_elm; + + write_elm = uapi_add_get_elm(uapi, method_key, + sizeof(*write_elm), &exists); + if (IS_ERR(write_elm)) + return PTR_ERR(write_elm); + write_elm->disabled = 1; + return 0; + } + + WARN_ON(true); + return -EINVAL; +} + +static int uapi_merge_def(struct uverbs_api *uapi, struct ib_device *ibdev, + const struct uapi_definition *def_list, + bool is_driver) +{ + const struct uapi_definition *def = def_list; + u32 cur_obj_key = UVERBS_API_KEY_ERR; + u32 cur_method_key = UVERBS_API_KEY_ERR; + bool exists; + int rc; + + if (!def_list) + return 0; + + for (;; def++) { + switch ((enum uapi_definition_kind)def->kind) { + case UAPI_DEF_CHAIN: + rc = uapi_merge_def(uapi, ibdev, def->chain, is_driver); + if (rc) + return rc; + continue; + + case UAPI_DEF_CHAIN_OBJ_TREE: + if (WARN_ON(def->object_start.object_id != + def->chain_obj_tree->id)) return -EINVAL; - } else { - obj_elm->type_attrs = obj->type_attrs; - if (obj->type_attrs) { - obj_elm->type_class = - obj->type_attrs->type_class; - /* - * Today drivers are only permitted to use - * idr_class types. They cannot use FD types - * because we currently have no way to revoke - * the fops pointer after device - * disassociation. - */ - if (WARN_ON(is_driver && - obj->type_attrs->type_class != - &uverbs_idr_class)) - return -EINVAL; - } - } - if (!obj->methods) + cur_obj_key = uapi_key_obj(def->object_start.object_id); + rc = uapi_merge_obj_tree(uapi, def->chain_obj_tree, + is_driver); + if (rc) + return rc; continue; - for (j = 0; j != obj->num_methods; j++) { - const struct uverbs_method_def *method = - (*obj->methods)[j]; - if (!method) + case UAPI_DEF_END: + return 0; + + case UAPI_DEF_IS_SUPPORTED_DEV_FN: { + void **ibdev_fn = + (void *)(&ibdev->ops) + def->needs_fn_offset; + + if (*ibdev_fn) continue; + rc = uapi_disable_elm( + uapi, def, cur_obj_key, cur_method_key); + if (rc) + return rc; + continue; + } - rc = uapi_merge_method(uapi, obj_elm, obj_key, method, - is_driver); + case UAPI_DEF_IS_SUPPORTED_FUNC: + if (def->func_is_supported(ibdev)) + continue; + rc = uapi_disable_elm( + uapi, def, cur_obj_key, cur_method_key); if (rc) return rc; + continue; + + case UAPI_DEF_OBJECT_START: { + struct uverbs_api_object *obj_elm; + + cur_obj_key = uapi_key_obj(def->object_start.object_id); + obj_elm = uapi_add_get_elm(uapi, cur_obj_key, + sizeof(*obj_elm), &exists); + if (IS_ERR(obj_elm)) + return PTR_ERR(obj_elm); + continue; } - } - return 0; + case UAPI_DEF_WRITE: + rc = uapi_create_write( + uapi, ibdev, def, cur_obj_key, &cur_method_key); + if (rc) + return rc; + continue; + } + WARN_ON(true); + return -EINVAL; + } } static int @@ -186,13 +364,16 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi, u32 attr_bkey = uapi_bkey_attr(attr_key); u8 type = elm->spec.type; - if (uapi_key_attr_to_method(iter.index) != - uapi_key_attr_to_method(method_key)) + if (uapi_key_attr_to_ioctl_method(iter.index) != + uapi_key_attr_to_ioctl_method(method_key)) break; if (elm->spec.mandatory) __set_bit(attr_bkey, method_elm->attr_mandatory); + if (elm->spec.is_udata) + method_elm->has_udata = true; + if (type == UVERBS_ATTR_TYPE_IDR || type == UVERBS_ATTR_TYPE_FD) { u8 access = elm->spec.u.obj.access; @@ -229,9 +410,13 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi, static int uapi_finalize(struct uverbs_api *uapi) { + const struct uverbs_api_write_method **data; + unsigned long max_write_ex = 0; + unsigned long max_write = 0; struct radix_tree_iter iter; void __rcu **slot; int rc; + int i; radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { struct uverbs_api_ioctl_method *method_elm = @@ -243,29 +428,209 @@ static int uapi_finalize(struct uverbs_api *uapi) if (rc) return rc; } + + if (uapi_key_is_write_method(iter.index)) + max_write = max(max_write, + iter.index & UVERBS_API_ATTR_KEY_MASK); + if (uapi_key_is_write_ex_method(iter.index)) + max_write_ex = + max(max_write_ex, + iter.index & UVERBS_API_ATTR_KEY_MASK); + } + + uapi->notsupp_method.handler = ib_uverbs_notsupp; + uapi->num_write = max_write + 1; + uapi->num_write_ex = max_write_ex + 1; + data = kmalloc_array(uapi->num_write + uapi->num_write_ex, + sizeof(*uapi->write_methods), GFP_KERNEL); + for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++) + data[i] = &uapi->notsupp_method; + uapi->write_methods = data; + uapi->write_ex_methods = data + uapi->num_write; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + if (uapi_key_is_write_method(iter.index)) + uapi->write_methods[iter.index & + UVERBS_API_ATTR_KEY_MASK] = + rcu_dereference_protected(*slot, true); + if (uapi_key_is_write_ex_method(iter.index)) + uapi->write_ex_methods[iter.index & + UVERBS_API_ATTR_KEY_MASK] = + rcu_dereference_protected(*slot, true); } return 0; } -void uverbs_destroy_api(struct uverbs_api *uapi) +static void uapi_remove_range(struct uverbs_api *uapi, u32 start, u32 last) { struct radix_tree_iter iter; void __rcu **slot; - if (!uapi) - return; - - radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + radix_tree_for_each_slot (slot, &uapi->radix, &iter, start) { + if (iter.index > last) + return; kfree(rcu_dereference_protected(*slot, true)); radix_tree_iter_delete(&uapi->radix, &iter, slot); } +} + +static void uapi_remove_object(struct uverbs_api *uapi, u32 obj_key) +{ + uapi_remove_range(uapi, obj_key, + obj_key | UVERBS_API_METHOD_KEY_MASK | + UVERBS_API_ATTR_KEY_MASK); +} + +static void uapi_remove_method(struct uverbs_api *uapi, u32 method_key) +{ + uapi_remove_range(uapi, method_key, + method_key | UVERBS_API_ATTR_KEY_MASK); +} + + +static u32 uapi_get_obj_id(struct uverbs_attr_spec *spec) +{ + if (spec->type == UVERBS_ATTR_TYPE_IDR || + spec->type == UVERBS_ATTR_TYPE_FD) + return spec->u.obj.obj_type; + if (spec->type == UVERBS_ATTR_TYPE_IDRS_ARRAY) + return spec->u2.objs_arr.obj_type; + return UVERBS_API_KEY_ERR; +} + +static void uapi_key_okay(u32 key) +{ + unsigned int count = 0; + + if (uapi_key_is_object(key)) + count++; + if (uapi_key_is_ioctl_method(key)) + count++; + if (uapi_key_is_write_method(key)) + count++; + if (uapi_key_is_write_ex_method(key)) + count++; + if (uapi_key_is_attr(key)) + count++; + WARN(count != 1, "Bad count %d key=%x", count, key); +} + +static void uapi_finalize_disable(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + u32 starting_key = 0; + bool scan_again = false; + void __rcu **slot; + +again: + radix_tree_for_each_slot (slot, &uapi->radix, &iter, starting_key) { + uapi_key_okay(iter.index); + + if (uapi_key_is_object(iter.index)) { + struct uverbs_api_object *obj_elm = + rcu_dereference_protected(*slot, true); + + if (obj_elm->disabled) { + /* Have to check all the attrs again */ + scan_again = true; + starting_key = iter.index; + uapi_remove_object(uapi, iter.index); + goto again; + } + continue; + } + + if (uapi_key_is_ioctl_method(iter.index)) { + struct uverbs_api_ioctl_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (method_elm->disabled) { + starting_key = iter.index; + uapi_remove_method(uapi, iter.index); + goto again; + } + continue; + } + + if (uapi_key_is_write_method(iter.index) || + uapi_key_is_write_ex_method(iter.index)) { + struct uverbs_api_write_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (method_elm->disabled) { + kfree(method_elm); + radix_tree_iter_delete(&uapi->radix, &iter, slot); + } + continue; + } + + if (uapi_key_is_attr(iter.index)) { + struct uverbs_api_attr *attr_elm = + rcu_dereference_protected(*slot, true); + const struct uverbs_api_object *tmp_obj; + u32 obj_key; + + /* + * If the method has a mandatory object handle + * attribute which relies on an object which is not + * present then the entire method is uncallable. + */ + if (!attr_elm->spec.mandatory) + continue; + obj_key = uapi_get_obj_id(&attr_elm->spec); + if (obj_key == UVERBS_API_KEY_ERR) + continue; + tmp_obj = uapi_get_object(uapi, obj_key); + if (IS_ERR(tmp_obj)) { + if (PTR_ERR(tmp_obj) == -ENOMSG) + continue; + } else { + if (!tmp_obj->disabled) + continue; + } + + starting_key = iter.index; + uapi_remove_method( + uapi, + iter.index & (UVERBS_API_OBJ_KEY_MASK | + UVERBS_API_METHOD_KEY_MASK)); + goto again; + } + + WARN_ON(false); + } + + if (!scan_again) + return; + scan_again = false; + starting_key = 0; + goto again; +} + +void uverbs_destroy_api(struct uverbs_api *uapi) +{ + if (!uapi) + return; + + uapi_remove_range(uapi, 0, U32_MAX); + kfree(uapi->write_methods); kfree(uapi); } -struct uverbs_api *uverbs_alloc_api( - const struct uverbs_object_tree_def *const *driver_specs, - enum rdma_driver_id driver_id) +static const struct uapi_definition uverbs_core_api[] = { + UAPI_DEF_CHAIN(uverbs_def_obj_counters), + UAPI_DEF_CHAIN(uverbs_def_obj_cq), + UAPI_DEF_CHAIN(uverbs_def_obj_device), + UAPI_DEF_CHAIN(uverbs_def_obj_dm), + UAPI_DEF_CHAIN(uverbs_def_obj_flow_action), + UAPI_DEF_CHAIN(uverbs_def_obj_intf), + UAPI_DEF_CHAIN(uverbs_def_obj_mr), + UAPI_DEF_CHAIN(uverbs_def_write_intf), + {}, +}; + +struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev) { struct uverbs_api *uapi; int rc; @@ -275,18 +640,16 @@ struct uverbs_api *uverbs_alloc_api( return ERR_PTR(-ENOMEM); INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL); - uapi->driver_id = driver_id; + uapi->driver_id = ibdev->driver_id; - rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false); + rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false); + if (rc) + goto err; + rc = uapi_merge_def(uapi, ibdev, ibdev->driver_def, true); if (rc) goto err; - for (; driver_specs && *driver_specs; driver_specs++) { - rc = uapi_merge_tree(uapi, *driver_specs, true); - if (rc) - goto err; - } - + uapi_finalize_disable(uapi); rc = uapi_finalize(uapi); if (rc) goto err; @@ -294,8 +657,9 @@ struct uverbs_api *uverbs_alloc_api( return uapi; err: if (rc != -ENOMEM) - pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n", - rc); + dev_err(&ibdev->dev, + "Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n", + rc); uverbs_destroy_api(uapi); return ERR_PTR(rc); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 178899e3ce73..ac011836bb54 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -141,6 +141,10 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) case IB_RATE_100_GBPS: return 40; case IB_RATE_200_GBPS: return 80; case IB_RATE_300_GBPS: return 120; + case IB_RATE_28_GBPS: return 11; + case IB_RATE_50_GBPS: return 20; + case IB_RATE_400_GBPS: return 160; + case IB_RATE_600_GBPS: return 240; default: return -1; } } @@ -166,6 +170,10 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult) case 40: return IB_RATE_100_GBPS; case 80: return IB_RATE_200_GBPS; case 120: return IB_RATE_300_GBPS; + case 11: return IB_RATE_28_GBPS; + case 20: return IB_RATE_50_GBPS; + case 160: return IB_RATE_400_GBPS; + case 240: return IB_RATE_600_GBPS; default: return IB_RATE_PORT_CURRENT; } } @@ -191,6 +199,10 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) case IB_RATE_100_GBPS: return 103125; case IB_RATE_200_GBPS: return 206250; case IB_RATE_300_GBPS: return 309375; + case IB_RATE_28_GBPS: return 28125; + case IB_RATE_50_GBPS: return 53125; + case IB_RATE_400_GBPS: return 425000; + case IB_RATE_600_GBPS: return 637500; default: return -1; } } @@ -214,8 +226,8 @@ EXPORT_SYMBOL(rdma_node_get_transport); enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num) { enum rdma_transport_type lt; - if (device->get_link_layer) - return device->get_link_layer(device, port_num); + if (device->ops.get_link_layer) + return device->ops.get_link_layer(device, port_num); lt = rdma_node_get_transport(device->node_type); if (lt == RDMA_TRANSPORT_IB) @@ -243,7 +255,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, struct ib_pd *pd; int mr_access_flags = 0; - pd = device->alloc_pd(device, NULL, NULL); + pd = device->ops.alloc_pd(device, NULL, NULL); if (IS_ERR(pd)) return pd; @@ -265,12 +277,12 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, pd->res.type = RDMA_RESTRACK_PD; rdma_restrack_set_task(&pd->res, caller); - rdma_restrack_add(&pd->res); + rdma_restrack_kadd(&pd->res); if (mr_access_flags) { struct ib_mr *mr; - mr = pd->device->get_dma_mr(pd, mr_access_flags); + mr = pd->device->ops.get_dma_mr(pd, mr_access_flags); if (IS_ERR(mr)) { ib_dealloc_pd(pd); return ERR_CAST(mr); @@ -307,7 +319,7 @@ void ib_dealloc_pd(struct ib_pd *pd) int ret; if (pd->__internal_mr) { - ret = pd->device->dereg_mr(pd->__internal_mr); + ret = pd->device->ops.dereg_mr(pd->__internal_mr); WARN_ON(ret); pd->__internal_mr = NULL; } @@ -319,7 +331,7 @@ void ib_dealloc_pd(struct ib_pd *pd) rdma_restrack_del(&pd->res); /* Making delalloc_pd a void return is a WIP, no driver should return an error here. */ - ret = pd->device->dealloc_pd(pd); + ret = pd->device->ops.dealloc_pd(pd); WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); } EXPORT_SYMBOL(ib_dealloc_pd); @@ -475,14 +487,17 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { struct ib_ah *ah; - if (!pd->device->create_ah) + might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); + + if (!pd->device->ops.create_ah) return ERR_PTR(-EOPNOTSUPP); - ah = pd->device->create_ah(pd, ah_attr, udata); + ah = pd->device->ops.create_ah(pd, ah_attr, flags, udata); if (!IS_ERR(ah)) { ah->device = pd->device; @@ -502,12 +517,14 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, * given address vector. * @pd: The protection domain associated with the address handle. * @ah_attr: The attributes of the address vector. + * @flags: Create address handle flags (see enum rdma_create_ah_flags). * * It returns 0 on success and returns appropriate error code on error. * The address handle is used to reference a local or global destination * in all UD QP post sends. */ -struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) +struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags) { const struct ib_gid_attr *old_sgid_attr; struct ib_ah *ah; @@ -517,7 +534,7 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) if (ret) return ERR_PTR(ret); - ah = _rdma_create_ah(pd, ah_attr, NULL); + ah = _rdma_create_ah(pd, ah_attr, flags, NULL); rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ah; @@ -557,7 +574,7 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, } } - ah = _rdma_create_ah(pd, ah_attr, udata); + ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata); out: rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); @@ -869,7 +886,7 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, if (ret) return ERR_PTR(ret); - ah = rdma_create_ah(pd, &ah_attr); + ah = rdma_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE); rdma_destroy_ah_attr(&ah_attr); return ah; @@ -888,8 +905,8 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) if (ret) return ret; - ret = ah->device->modify_ah ? - ah->device->modify_ah(ah, ah_attr) : + ret = ah->device->ops.modify_ah ? + ah->device->ops.modify_ah(ah, ah_attr) : -EOPNOTSUPP; ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr); @@ -902,20 +919,22 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { ah_attr->grh.sgid_attr = NULL; - return ah->device->query_ah ? - ah->device->query_ah(ah, ah_attr) : + return ah->device->ops.query_ah ? + ah->device->ops.query_ah(ah, ah_attr) : -EOPNOTSUPP; } EXPORT_SYMBOL(rdma_query_ah); -int rdma_destroy_ah(struct ib_ah *ah) +int rdma_destroy_ah(struct ib_ah *ah, u32 flags) { const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; int ret; + might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE); + pd = ah->pd; - ret = ah->device->destroy_ah(ah); + ret = ah->device->ops.destroy_ah(ah, flags); if (!ret) { atomic_dec(&pd->usecnt); if (sgid_attr) @@ -933,10 +952,10 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, { struct ib_srq *srq; - if (!pd->device->create_srq) + if (!pd->device->ops.create_srq) return ERR_PTR(-EOPNOTSUPP); - srq = pd->device->create_srq(pd, srq_init_attr, NULL); + srq = pd->device->ops.create_srq(pd, srq_init_attr, NULL); if (!IS_ERR(srq)) { srq->device = pd->device; @@ -965,17 +984,17 @@ int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask) { - return srq->device->modify_srq ? - srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) : - -EOPNOTSUPP; + return srq->device->ops.modify_srq ? + srq->device->ops.modify_srq(srq, srq_attr, srq_attr_mask, + NULL) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_modify_srq); int ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) { - return srq->device->query_srq ? - srq->device->query_srq(srq, srq_attr) : -EOPNOTSUPP; + return srq->device->ops.query_srq ? + srq->device->ops.query_srq(srq, srq_attr) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_query_srq); @@ -997,7 +1016,7 @@ int ib_destroy_srq(struct ib_srq *srq) if (srq_type == IB_SRQT_XRC) xrcd = srq->ext.xrc.xrcd; - ret = srq->device->destroy_srq(srq); + ret = srq->device->ops.destroy_srq(srq); if (!ret) { atomic_dec(&pd->usecnt); if (srq_type == IB_SRQT_XRC) @@ -1106,7 +1125,7 @@ static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp, if (!IS_ERR(qp)) __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); else - real_qp->device->destroy_qp(real_qp); + real_qp->device->ops.destroy_qp(real_qp); return qp; } @@ -1692,10 +1711,10 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width) if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) return -EINVAL; - if (!dev->get_netdev) + if (!dev->ops.get_netdev) return -EOPNOTSUPP; - netdev = dev->get_netdev(dev, port_num); + netdev = dev->ops.get_netdev(dev, port_num); if (!netdev) return -ENODEV; @@ -1753,9 +1772,9 @@ int ib_query_qp(struct ib_qp *qp, qp_attr->ah_attr.grh.sgid_attr = NULL; qp_attr->alt_ah_attr.grh.sgid_attr = NULL; - return qp->device->query_qp ? - qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : - -EOPNOTSUPP; + return qp->device->ops.query_qp ? + qp->device->ops.query_qp(qp->real_qp, qp_attr, qp_attr_mask, + qp_init_attr) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_query_qp); @@ -1841,7 +1860,7 @@ int ib_destroy_qp(struct ib_qp *qp) rdma_rw_cleanup_mrs(qp); rdma_restrack_del(&qp->res); - ret = qp->device->destroy_qp(qp); + ret = qp->device->ops.destroy_qp(qp); if (!ret) { if (alt_path_sgid_attr) rdma_put_gid_attr(alt_path_sgid_attr); @@ -1879,7 +1898,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, { struct ib_cq *cq; - cq = device->create_cq(device, cq_attr, NULL, NULL); + cq = device->ops.create_cq(device, cq_attr, NULL, NULL); if (!IS_ERR(cq)) { cq->device = device; @@ -1890,7 +1909,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, atomic_set(&cq->usecnt, 0); cq->res.type = RDMA_RESTRACK_CQ; rdma_restrack_set_task(&cq->res, caller); - rdma_restrack_add(&cq->res); + rdma_restrack_kadd(&cq->res); } return cq; @@ -1899,8 +1918,9 @@ EXPORT_SYMBOL(__ib_create_cq); int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) { - return cq->device->modify_cq ? - cq->device->modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP; + return cq->device->ops.modify_cq ? + cq->device->ops.modify_cq(cq, cq_count, + cq_period) : -EOPNOTSUPP; } EXPORT_SYMBOL(rdma_set_cq_moderation); @@ -1910,14 +1930,14 @@ int ib_destroy_cq(struct ib_cq *cq) return -EBUSY; rdma_restrack_del(&cq->res); - return cq->device->destroy_cq(cq); + return cq->device->ops.destroy_cq(cq); } EXPORT_SYMBOL(ib_destroy_cq); int ib_resize_cq(struct ib_cq *cq, int cqe) { - return cq->device->resize_cq ? - cq->device->resize_cq(cq, cqe, NULL) : -EOPNOTSUPP; + return cq->device->ops.resize_cq ? + cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_resize_cq); @@ -1930,7 +1950,7 @@ int ib_dereg_mr(struct ib_mr *mr) int ret; rdma_restrack_del(&mr->res); - ret = mr->device->dereg_mr(mr); + ret = mr->device->ops.dereg_mr(mr); if (!ret) { atomic_dec(&pd->usecnt); if (dm) @@ -1959,10 +1979,10 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, { struct ib_mr *mr; - if (!pd->device->alloc_mr) + if (!pd->device->ops.alloc_mr) return ERR_PTR(-EOPNOTSUPP); - mr = pd->device->alloc_mr(pd, mr_type, max_num_sg); + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg); if (!IS_ERR(mr)) { mr->device = pd->device; mr->pd = pd; @@ -1971,7 +1991,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, atomic_inc(&pd->usecnt); mr->need_inval = false; mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_add(&mr->res); + rdma_restrack_kadd(&mr->res); } return mr; @@ -1986,10 +2006,10 @@ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, { struct ib_fmr *fmr; - if (!pd->device->alloc_fmr) + if (!pd->device->ops.alloc_fmr) return ERR_PTR(-EOPNOTSUPP); - fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr); + fmr = pd->device->ops.alloc_fmr(pd, mr_access_flags, fmr_attr); if (!IS_ERR(fmr)) { fmr->device = pd->device; fmr->pd = pd; @@ -2008,7 +2028,7 @@ int ib_unmap_fmr(struct list_head *fmr_list) return 0; fmr = list_entry(fmr_list->next, struct ib_fmr, list); - return fmr->device->unmap_fmr(fmr_list); + return fmr->device->ops.unmap_fmr(fmr_list); } EXPORT_SYMBOL(ib_unmap_fmr); @@ -2018,7 +2038,7 @@ int ib_dealloc_fmr(struct ib_fmr *fmr) int ret; pd = fmr->pd; - ret = fmr->device->dealloc_fmr(fmr); + ret = fmr->device->ops.dealloc_fmr(fmr); if (!ret) atomic_dec(&pd->usecnt); @@ -2070,14 +2090,14 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { int ret; - if (!qp->device->attach_mcast) + if (!qp->device->ops.attach_mcast) return -EOPNOTSUPP; if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) return -EINVAL; - ret = qp->device->attach_mcast(qp, gid, lid); + ret = qp->device->ops.attach_mcast(qp, gid, lid); if (!ret) atomic_inc(&qp->usecnt); return ret; @@ -2088,14 +2108,14 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { int ret; - if (!qp->device->detach_mcast) + if (!qp->device->ops.detach_mcast) return -EOPNOTSUPP; if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) return -EINVAL; - ret = qp->device->detach_mcast(qp, gid, lid); + ret = qp->device->ops.detach_mcast(qp, gid, lid); if (!ret) atomic_dec(&qp->usecnt); return ret; @@ -2106,10 +2126,10 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) { struct ib_xrcd *xrcd; - if (!device->alloc_xrcd) + if (!device->ops.alloc_xrcd) return ERR_PTR(-EOPNOTSUPP); - xrcd = device->alloc_xrcd(device, NULL, NULL); + xrcd = device->ops.alloc_xrcd(device, NULL, NULL); if (!IS_ERR(xrcd)) { xrcd->device = device; xrcd->inode = NULL; @@ -2137,7 +2157,7 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd) return ret; } - return xrcd->device->dealloc_xrcd(xrcd); + return xrcd->device->ops.dealloc_xrcd(xrcd); } EXPORT_SYMBOL(ib_dealloc_xrcd); @@ -2160,10 +2180,10 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd, { struct ib_wq *wq; - if (!pd->device->create_wq) + if (!pd->device->ops.create_wq) return ERR_PTR(-EOPNOTSUPP); - wq = pd->device->create_wq(pd, wq_attr, NULL); + wq = pd->device->ops.create_wq(pd, wq_attr, NULL); if (!IS_ERR(wq)) { wq->event_handler = wq_attr->event_handler; wq->wq_context = wq_attr->wq_context; @@ -2193,7 +2213,7 @@ int ib_destroy_wq(struct ib_wq *wq) if (atomic_read(&wq->usecnt)) return -EBUSY; - err = wq->device->destroy_wq(wq); + err = wq->device->ops.destroy_wq(wq); if (!err) { atomic_dec(&pd->usecnt); atomic_dec(&cq->usecnt); @@ -2215,10 +2235,10 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, { int err; - if (!wq->device->modify_wq) + if (!wq->device->ops.modify_wq) return -EOPNOTSUPP; - err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL); + err = wq->device->ops.modify_wq(wq, wq_attr, wq_attr_mask, NULL); return err; } EXPORT_SYMBOL(ib_modify_wq); @@ -2240,12 +2260,12 @@ struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, int i; u32 table_size; - if (!device->create_rwq_ind_table) + if (!device->ops.create_rwq_ind_table) return ERR_PTR(-EOPNOTSUPP); table_size = (1 << init_attr->log_ind_tbl_size); - rwq_ind_table = device->create_rwq_ind_table(device, - init_attr, NULL); + rwq_ind_table = device->ops.create_rwq_ind_table(device, + init_attr, NULL); if (IS_ERR(rwq_ind_table)) return rwq_ind_table; @@ -2275,7 +2295,7 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) if (atomic_read(&rwq_ind_table->usecnt)) return -EBUSY; - err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table); + err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table); if (!err) { for (i = 0; i < table_size; i++) atomic_dec(&ind_tbl[i]->usecnt); @@ -2288,48 +2308,50 @@ EXPORT_SYMBOL(ib_destroy_rwq_ind_table); int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { - return mr->device->check_mr_status ? - mr->device->check_mr_status(mr, check_mask, mr_status) : -EOPNOTSUPP; + if (!mr->device->ops.check_mr_status) + return -EOPNOTSUPP; + + return mr->device->ops.check_mr_status(mr, check_mask, mr_status); } EXPORT_SYMBOL(ib_check_mr_status); int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state) { - if (!device->set_vf_link_state) + if (!device->ops.set_vf_link_state) return -EOPNOTSUPP; - return device->set_vf_link_state(device, vf, port, state); + return device->ops.set_vf_link_state(device, vf, port, state); } EXPORT_SYMBOL(ib_set_vf_link_state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info) { - if (!device->get_vf_config) + if (!device->ops.get_vf_config) return -EOPNOTSUPP; - return device->get_vf_config(device, vf, port, info); + return device->ops.get_vf_config(device, vf, port, info); } EXPORT_SYMBOL(ib_get_vf_config); int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats) { - if (!device->get_vf_stats) + if (!device->ops.get_vf_stats) return -EOPNOTSUPP; - return device->get_vf_stats(device, vf, port, stats); + return device->ops.get_vf_stats(device, vf, port, stats); } EXPORT_SYMBOL(ib_get_vf_stats); int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type) { - if (!device->set_vf_guid) + if (!device->ops.set_vf_guid) return -EOPNOTSUPP; - return device->set_vf_guid(device, vf, port, guid, type); + return device->ops.set_vf_guid(device, vf, port, guid, type); } EXPORT_SYMBOL(ib_set_vf_guid); @@ -2361,12 +2383,12 @@ EXPORT_SYMBOL(ib_set_vf_guid); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size) { - if (unlikely(!mr->device->map_mr_sg)) + if (unlikely(!mr->device->ops.map_mr_sg)) return -EOPNOTSUPP; mr->page_size = page_size; - return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset); + return mr->device->ops.map_mr_sg(mr, sg, sg_nents, sg_offset); } EXPORT_SYMBOL(ib_map_mr_sg); @@ -2565,8 +2587,8 @@ static void __ib_drain_rq(struct ib_qp *qp) */ void ib_drain_sq(struct ib_qp *qp) { - if (qp->device->drain_sq) - qp->device->drain_sq(qp); + if (qp->device->ops.drain_sq) + qp->device->ops.drain_sq(qp); else __ib_drain_sq(qp); } @@ -2593,8 +2615,8 @@ EXPORT_SYMBOL(ib_drain_sq); */ void ib_drain_rq(struct ib_qp *qp) { - if (qp->device->drain_rq) - qp->device->drain_rq(qp); + if (qp->device->ops.drain_rq) + qp->device->ops.drain_rq(qp); else __ib_drain_rq(qp); } @@ -2632,10 +2654,11 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, struct net_device *netdev; int rc; - if (!device->rdma_netdev_get_params) + if (!device->ops.rdma_netdev_get_params) return ERR_PTR(-EOPNOTSUPP); - rc = device->rdma_netdev_get_params(device, port_num, type, ¶ms); + rc = device->ops.rdma_netdev_get_params(device, port_num, type, + ¶ms); if (rc) return ERR_PTR(rc); @@ -2657,10 +2680,11 @@ int rdma_init_netdev(struct ib_device *device, u8 port_num, struct rdma_netdev_alloc_params params; int rc; - if (!device->rdma_netdev_get_params) + if (!device->ops.rdma_netdev_get_params) return -EOPNOTSUPP; - rc = device->rdma_netdev_get_params(device, port_num, type, ¶ms); + rc = device->ops.rdma_netdev_get_params(device, port_num, type, + ¶ms); if (rc) return rc; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 54fdd4cf5288..1e2515e2eb62 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -647,13 +647,14 @@ fail: } /* Address Handles */ -int bnxt_re_destroy_ah(struct ib_ah *ib_ah) +int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; int rc; - rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, + !(flags & RDMA_DESTROY_AH_SLEEPABLE)); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH"); return rc; @@ -664,6 +665,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah) struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); @@ -698,7 +700,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, ah->qplib_ah.flow_label = grh->flow_label; ah->qplib_ah.hop_limit = grh->hop_limit; ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr); - if (ib_pd->uobject && + if (udata && !rdma_is_multicast_addr((struct in6_addr *) grh->dgid.raw) && !rdma_link_local_addr((struct in6_addr *) @@ -722,14 +724,15 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, } memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); - rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, + !(flags & RDMA_CREATE_AH_SLEEPABLE)); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH"); goto fail; } /* Write AVID to shared page. */ - if (ib_pd->uobject) { + if (udata) { struct ib_ucontext *ib_uctx = ib_pd->uobject->context; struct bnxt_re_ucontext *uctx; unsigned long flag; @@ -818,7 +821,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) { rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, - &rdev->sqp_ah->qplib_ah); + &rdev->sqp_ah->qplib_ah, false); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH for shadow QP"); @@ -958,7 +961,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah /* Have DMAC same as SMAC */ ether_addr_copy(ah->qplib_ah.dmac, rdev->netdev->dev_addr); - rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, false); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH for Shadow QP"); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index aa33e7b82c84..c4af72604b4f 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -169,10 +169,11 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev, int bnxt_re_dealloc_pd(struct ib_pd *pd); struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int bnxt_re_destroy_ah(struct ib_ah *ah); +int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 77f095e5fbe3..e7a997f2a537 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -568,6 +568,50 @@ static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev) ib_unregister_device(&rdev->ibdev); } +static const struct ib_device_ops bnxt_re_dev_ops = { + .add_gid = bnxt_re_add_gid, + .alloc_hw_stats = bnxt_re_ib_alloc_hw_stats, + .alloc_mr = bnxt_re_alloc_mr, + .alloc_pd = bnxt_re_alloc_pd, + .alloc_ucontext = bnxt_re_alloc_ucontext, + .create_ah = bnxt_re_create_ah, + .create_cq = bnxt_re_create_cq, + .create_qp = bnxt_re_create_qp, + .create_srq = bnxt_re_create_srq, + .dealloc_pd = bnxt_re_dealloc_pd, + .dealloc_ucontext = bnxt_re_dealloc_ucontext, + .del_gid = bnxt_re_del_gid, + .dereg_mr = bnxt_re_dereg_mr, + .destroy_ah = bnxt_re_destroy_ah, + .destroy_cq = bnxt_re_destroy_cq, + .destroy_qp = bnxt_re_destroy_qp, + .destroy_srq = bnxt_re_destroy_srq, + .get_dev_fw_str = bnxt_re_query_fw_str, + .get_dma_mr = bnxt_re_get_dma_mr, + .get_hw_stats = bnxt_re_ib_get_hw_stats, + .get_link_layer = bnxt_re_get_link_layer, + .get_netdev = bnxt_re_get_netdev, + .get_port_immutable = bnxt_re_get_port_immutable, + .map_mr_sg = bnxt_re_map_mr_sg, + .mmap = bnxt_re_mmap, + .modify_ah = bnxt_re_modify_ah, + .modify_device = bnxt_re_modify_device, + .modify_qp = bnxt_re_modify_qp, + .modify_srq = bnxt_re_modify_srq, + .poll_cq = bnxt_re_poll_cq, + .post_recv = bnxt_re_post_recv, + .post_send = bnxt_re_post_send, + .post_srq_recv = bnxt_re_post_srq_recv, + .query_ah = bnxt_re_query_ah, + .query_device = bnxt_re_query_device, + .query_pkey = bnxt_re_query_pkey, + .query_port = bnxt_re_query_port, + .query_qp = bnxt_re_query_qp, + .query_srq = bnxt_re_query_srq, + .reg_user_mr = bnxt_re_reg_user_mr, + .req_notify_cq = bnxt_re_req_notify_cq, +}; + static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) { struct ib_device *ibdev = &rdev->ibdev; @@ -614,60 +658,10 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) (1ull << IB_USER_VERBS_CMD_DESTROY_AH); /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */ - /* Kernel verbs */ - ibdev->query_device = bnxt_re_query_device; - ibdev->modify_device = bnxt_re_modify_device; - - ibdev->query_port = bnxt_re_query_port; - ibdev->get_port_immutable = bnxt_re_get_port_immutable; - ibdev->get_dev_fw_str = bnxt_re_query_fw_str; - ibdev->query_pkey = bnxt_re_query_pkey; - ibdev->get_netdev = bnxt_re_get_netdev; - ibdev->add_gid = bnxt_re_add_gid; - ibdev->del_gid = bnxt_re_del_gid; - ibdev->get_link_layer = bnxt_re_get_link_layer; - - ibdev->alloc_pd = bnxt_re_alloc_pd; - ibdev->dealloc_pd = bnxt_re_dealloc_pd; - - ibdev->create_ah = bnxt_re_create_ah; - ibdev->modify_ah = bnxt_re_modify_ah; - ibdev->query_ah = bnxt_re_query_ah; - ibdev->destroy_ah = bnxt_re_destroy_ah; - - ibdev->create_srq = bnxt_re_create_srq; - ibdev->modify_srq = bnxt_re_modify_srq; - ibdev->query_srq = bnxt_re_query_srq; - ibdev->destroy_srq = bnxt_re_destroy_srq; - ibdev->post_srq_recv = bnxt_re_post_srq_recv; - - ibdev->create_qp = bnxt_re_create_qp; - ibdev->modify_qp = bnxt_re_modify_qp; - ibdev->query_qp = bnxt_re_query_qp; - ibdev->destroy_qp = bnxt_re_destroy_qp; - - ibdev->post_send = bnxt_re_post_send; - ibdev->post_recv = bnxt_re_post_recv; - - ibdev->create_cq = bnxt_re_create_cq; - ibdev->destroy_cq = bnxt_re_destroy_cq; - ibdev->poll_cq = bnxt_re_poll_cq; - ibdev->req_notify_cq = bnxt_re_req_notify_cq; - - ibdev->get_dma_mr = bnxt_re_get_dma_mr; - ibdev->dereg_mr = bnxt_re_dereg_mr; - ibdev->alloc_mr = bnxt_re_alloc_mr; - ibdev->map_mr_sg = bnxt_re_map_mr_sg; - - ibdev->reg_user_mr = bnxt_re_reg_user_mr; - ibdev->alloc_ucontext = bnxt_re_alloc_ucontext; - ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext; - ibdev->mmap = bnxt_re_mmap; - ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats; - ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats; rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ibdev->driver_id = RDMA_DRIVER_BNXT_RE; + ib_set_device_ops(ibdev, &bnxt_re_dev_ops); return ib_register_device(ibdev, "bnxt_re%d", NULL); } @@ -1203,6 +1197,35 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) return 0; } +static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) +{ + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct hwrm_ver_get_output resp = {0}; + struct hwrm_ver_get_input req = {0}; + struct bnxt_fw_msg fw_msg; + int rc = 0; + + memset(&fw_msg, 0, sizeof(fw_msg)); + bnxt_re_init_hwrm_hdr(rdev, (void *)&req, + HWRM_VER_GET, -1, -1); + req.hwrm_intf_maj = HWRM_VERSION_MAJOR; + req.hwrm_intf_min = HWRM_VERSION_MINOR; + req.hwrm_intf_upd = HWRM_VERSION_UPDATE; + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); + if (rc) { + dev_err(rdev_to_dev(rdev), + "Failed to query HW version, rc = 0x%x", rc); + return; + } + rdev->qplib_ctx.hwrm_intf_ver = + (u64)resp.hwrm_intf_major << 48 | + (u64)resp.hwrm_intf_minor << 32 | + (u64)resp.hwrm_intf_build << 16 | + resp.hwrm_intf_patch; +} + static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) { int rc; @@ -1285,10 +1308,13 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) } set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags); + bnxt_re_query_hwrm_intf_version(rdev); + /* Establish RCFW Communication Channel to initialize the context * memory for the function and all child VFs */ rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw, + &rdev->qplib_ctx, BNXT_RE_MAX_QPC_COUNT); if (rc) { pr_err("Failed to allocate RCFW Channel: %#x\n", rc); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index be4e33e9f962..326805461265 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -58,7 +58,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) u16 cbit; int rc; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; rc = wait_event_timeout(rcfw->waitq, !test_bit(cbit, rcfw->cmdq_bitmap), msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS)); @@ -70,7 +70,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT; u16 cbit; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; if (!test_bit(cbit, rcfw->cmdq_bitmap)) goto done; do { @@ -86,6 +86,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, { struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr; struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq; + u32 cmdq_depth = rcfw->cmdq_depth; struct bnxt_qplib_crsq *crsqe; u32 sw_prod, cmdq_prod; unsigned long flags; @@ -124,7 +125,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; if (is_block) cookie |= RCFW_CMD_IS_BLOCKING; @@ -153,7 +154,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, do { /* Locate the next cmdq slot */ sw_prod = HWQ_CMP(cmdq->prod, cmdq); - cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)]; + cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)] + [get_cmdq_idx(sw_prod, cmdq_depth)]; if (!cmdqe) { dev_err(&rcfw->pdev->dev, "RCFW request failed with no cmdqe!\n"); @@ -326,7 +328,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, mcookie = qp_event->cookie; blocked = cookie & RCFW_CMD_IS_BLOCKING; cookie &= RCFW_MAX_COOKIE_VALUE; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; crsqe = &rcfw->crsqe_tbl[cbit]; if (crsqe->resp && crsqe->resp->cookie == mcookie) { @@ -555,6 +557,7 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_ctx *ctx, int qp_tbl_sz) { rcfw->pdev = pdev; @@ -567,11 +570,18 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, "HW channel CREQ allocation failed\n"); goto fail; } - rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT; - if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->cmdq, NULL, 0, - &rcfw->cmdq.max_elements, - BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE, - HWQ_TYPE_CTX)) { + if (ctx->hwrm_intf_ver < HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK) + rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_256; + else + rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192; + + rcfw->cmdq.max_elements = rcfw->cmdq_depth; + if (bnxt_qplib_alloc_init_hwq + (rcfw->pdev, &rcfw->cmdq, NULL, 0, + &rcfw->cmdq.max_elements, + BNXT_QPLIB_CMDQE_UNITS, 0, + bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth), + HWQ_TYPE_CTX)) { dev_err(&rcfw->pdev->dev, "HW channel CMDQ allocation failed\n"); goto fail; @@ -674,7 +684,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, /* General */ rcfw->seq_num = 0; set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags); - bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD * + bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth * sizeof(unsigned long)); rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL); if (!rcfw->cmdq_bitmap) @@ -734,7 +744,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]); init.cmdq_size_cmdq_lvl = cpu_to_le16( - ((BNXT_QPLIB_CMDQE_MAX_CNT << CMDQ_INIT_CMDQ_SIZE_SFT) & + ((rcfw->cmdq_depth << CMDQ_INIT_CMDQ_SIZE_SFT) & CMDQ_INIT_CMDQ_SIZE_MASK) | ((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) & CMDQ_INIT_CMDQ_LVL_MASK)); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 9a8687dc0a79..be0ef0e8c53e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -63,32 +63,60 @@ #define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */ +/* Cmdq contains a fix number of a 16-Byte slots */ +struct bnxt_qplib_cmdqe { + u8 data[16]; +}; + /* CMDQ elements */ -#define BNXT_QPLIB_CMDQE_MAX_CNT 256 +#define BNXT_QPLIB_CMDQE_MAX_CNT_256 256 +#define BNXT_QPLIB_CMDQE_MAX_CNT_8192 8192 #define BNXT_QPLIB_CMDQE_UNITS sizeof(struct bnxt_qplib_cmdqe) -#define BNXT_QPLIB_CMDQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS) +#define BNXT_QPLIB_CMDQE_BYTES(depth) ((depth) * BNXT_QPLIB_CMDQE_UNITS) + +static inline u32 bnxt_qplib_cmdqe_npages(u32 depth) +{ + u32 npages; + + npages = BNXT_QPLIB_CMDQE_BYTES(depth) / PAGE_SIZE; + if (BNXT_QPLIB_CMDQE_BYTES(depth) % PAGE_SIZE) + npages++; + return npages; +} + +static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth) +{ + return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE); +} + +static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth) +{ + return (bnxt_qplib_cmdqe_page_size(depth) / + BNXT_QPLIB_CMDQE_UNITS); +} -#define MAX_CMDQ_IDX (BNXT_QPLIB_CMDQE_MAX_CNT - 1) -#define MAX_CMDQ_IDX_PER_PG (BNXT_QPLIB_CMDQE_CNT_PER_PG - 1) +#define MAX_CMDQ_IDX(depth) ((depth) - 1) + +static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth) +{ + return (bnxt_qplib_cmdqe_cnt_per_pg(depth) - 1); +} -#define RCFW_MAX_OUTSTANDING_CMD BNXT_QPLIB_CMDQE_MAX_CNT #define RCFW_MAX_COOKIE_VALUE 0x7FFF #define RCFW_CMD_IS_BLOCKING 0x8000 #define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20 -/* Cmdq contains a fix number of a 16-Byte slots */ -struct bnxt_qplib_cmdqe { - u8 data[16]; -}; +#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL -static inline u32 get_cmdq_pg(u32 val) +static inline u32 get_cmdq_pg(u32 val, u32 depth) { - return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG; + return (val & ~(bnxt_qplib_max_cmdq_idx_per_pg(depth))) / + (bnxt_qplib_cmdqe_cnt_per_pg(depth)); } -static inline u32 get_cmdq_idx(u32 val) +static inline u32 get_cmdq_idx(u32 val, u32 depth) { - return val & MAX_CMDQ_IDX_PER_PG; + return val & (bnxt_qplib_max_cmdq_idx_per_pg(depth)); } /* Crsq buf is 1024-Byte */ @@ -194,11 +222,14 @@ struct bnxt_qplib_rcfw { struct bnxt_qplib_qp_node *qp_tbl; u64 oos_prev; u32 init_oos_stats; + u32 cmdq_depth; }; void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, - struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz); + struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_ctx *ctx, + int qp_tbl_sz); void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill); void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 2e5c052da5a9..1e80aa7bbcce 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -177,6 +177,7 @@ struct bnxt_qplib_ctx { struct bnxt_qplib_hwq tqm_tbl[MAX_TQM_ALLOC_REQ]; struct bnxt_qplib_stats stats; struct bnxt_qplib_vf_res vf_res; + u64 hwrm_intf_ver; }; struct bnxt_qplib_res { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 5216b5f844cc..be03b5738f71 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -488,7 +488,8 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res, } /* AH */ -int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) +int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_create_ah req; @@ -522,7 +523,7 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) req.dest_mac[2] = cpu_to_le16(temp16[2]); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, - NULL, 1); + NULL, block); if (rc) return rc; @@ -530,7 +531,8 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) return 0; } -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) +int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_destroy_ah req; @@ -544,7 +546,7 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) req.ah_cid = cpu_to_le32(ah->id); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, - NULL, 1); + NULL, block); if (rc) return rc; return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 8079d7f5a008..39454b3f738d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -241,8 +241,10 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx); -int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah); -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah); +int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block); +int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block); int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw); int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index dcb4bba522ba..df4f7a3f043d 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -291,13 +291,12 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, if (!wq->sq) goto err3; - wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), + wq->queue = dma_zalloc_coherent(&(rdev_p->rnic_info.pdev->dev), depth * sizeof(union t3_wr), &(wq->dma_addr), GFP_KERNEL); if (!wq->queue) goto err4; - memset(wq->queue, 0, depth * sizeof(union t3_wr)); dma_unmap_addr_set(wq, mapping, wq->dma_addr); wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; if (!kernel_domain) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index ebbec02cebe0..b34b1a1bd94b 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -836,7 +836,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, * Kernel users need more wq space for fastreg WRs which can take * 2 WR fragments. */ - ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_iwch_ucontext(pd->uobject->context) : NULL; if (!ucontext && wqsize < (rqsize + (2 * sqsize))) wqsize = roundup_pow_of_two(rqsize + roundup_pow_of_two(attrs->cap.max_send_wr * 2)); @@ -1317,6 +1317,39 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } +static const struct ib_device_ops iwch_dev_ops = { + .alloc_hw_stats = iwch_alloc_stats, + .alloc_mr = iwch_alloc_mr, + .alloc_mw = iwch_alloc_mw, + .alloc_pd = iwch_allocate_pd, + .alloc_ucontext = iwch_alloc_ucontext, + .create_cq = iwch_create_cq, + .create_qp = iwch_create_qp, + .dealloc_mw = iwch_dealloc_mw, + .dealloc_pd = iwch_deallocate_pd, + .dealloc_ucontext = iwch_dealloc_ucontext, + .dereg_mr = iwch_dereg_mr, + .destroy_cq = iwch_destroy_cq, + .destroy_qp = iwch_destroy_qp, + .get_dev_fw_str = get_dev_fw_ver_str, + .get_dma_mr = iwch_get_dma_mr, + .get_hw_stats = iwch_get_mib, + .get_port_immutable = iwch_port_immutable, + .map_mr_sg = iwch_map_mr_sg, + .mmap = iwch_mmap, + .modify_qp = iwch_ib_modify_qp, + .poll_cq = iwch_poll_cq, + .post_recv = iwch_post_receive, + .post_send = iwch_post_send, + .query_device = iwch_query_device, + .query_gid = iwch_query_gid, + .query_pkey = iwch_query_pkey, + .query_port = iwch_query_port, + .reg_user_mr = iwch_reg_user_mr, + .req_notify_cq = iwch_arm_cq, + .resize_cq = iwch_resize_cq, +}; + int iwch_register_device(struct iwch_dev *dev) { int ret; @@ -1356,37 +1389,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; - dev->ibdev.query_device = iwch_query_device; - dev->ibdev.query_port = iwch_query_port; - dev->ibdev.query_pkey = iwch_query_pkey; - dev->ibdev.query_gid = iwch_query_gid; - dev->ibdev.alloc_ucontext = iwch_alloc_ucontext; - dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext; - dev->ibdev.mmap = iwch_mmap; - dev->ibdev.alloc_pd = iwch_allocate_pd; - dev->ibdev.dealloc_pd = iwch_deallocate_pd; - dev->ibdev.create_qp = iwch_create_qp; - dev->ibdev.modify_qp = iwch_ib_modify_qp; - dev->ibdev.destroy_qp = iwch_destroy_qp; - dev->ibdev.create_cq = iwch_create_cq; - dev->ibdev.destroy_cq = iwch_destroy_cq; - dev->ibdev.resize_cq = iwch_resize_cq; - dev->ibdev.poll_cq = iwch_poll_cq; - dev->ibdev.get_dma_mr = iwch_get_dma_mr; - dev->ibdev.reg_user_mr = iwch_reg_user_mr; - dev->ibdev.dereg_mr = iwch_dereg_mr; - dev->ibdev.alloc_mw = iwch_alloc_mw; - dev->ibdev.dealloc_mw = iwch_dealloc_mw; - dev->ibdev.alloc_mr = iwch_alloc_mr; - dev->ibdev.map_mr_sg = iwch_map_mr_sg; - dev->ibdev.req_notify_cq = iwch_arm_cq; - dev->ibdev.post_send = iwch_post_send; - dev->ibdev.post_recv = iwch_post_receive; - dev->ibdev.alloc_hw_stats = iwch_alloc_stats; - dev->ibdev.get_hw_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; - dev->ibdev.get_port_immutable = iwch_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) @@ -1405,6 +1408,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); + ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL); if (ret) kfree(dev->ibdev.iwcm); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 615413bd3e8d..8221813219e5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2058,8 +2058,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, - cxgb4_port_viid(pdev)); + ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(pdev) * step; @@ -2078,8 +2077,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, goto out; ep->mtu = dst_mtu(dst); ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, - cxgb4_port_viid(pdev)); + ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(pdev) * step; @@ -2795,7 +2793,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) break; case MPA_REQ_SENT: (void)stop_ep_timer(ep); - if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) + if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 || + (mpa_rev == 2 && ep->tried_with_mpa_v1)) connect_reply_upcall(ep, -ECONNRESET); else { /* @@ -3944,7 +3943,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) } else { vlan_eh = (struct vlan_ethhdr *)(req + 1); iph = (struct iphdr *)(vlan_eh + 1); - skb->vlan_tci = ntohs(cpl->vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan)); } if (iph->version != 0x4) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index cbb3c0ddd990..586b0c37481f 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -531,6 +531,44 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) c4iw_restrack_funcs[res->type](msg, res) : 0; } +static const struct ib_device_ops c4iw_dev_ops = { + .alloc_hw_stats = c4iw_alloc_stats, + .alloc_mr = c4iw_alloc_mr, + .alloc_mw = c4iw_alloc_mw, + .alloc_pd = c4iw_allocate_pd, + .alloc_ucontext = c4iw_alloc_ucontext, + .create_cq = c4iw_create_cq, + .create_qp = c4iw_create_qp, + .create_srq = c4iw_create_srq, + .dealloc_mw = c4iw_dealloc_mw, + .dealloc_pd = c4iw_deallocate_pd, + .dealloc_ucontext = c4iw_dealloc_ucontext, + .dereg_mr = c4iw_dereg_mr, + .destroy_cq = c4iw_destroy_cq, + .destroy_qp = c4iw_destroy_qp, + .destroy_srq = c4iw_destroy_srq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = c4iw_get_dma_mr, + .get_hw_stats = c4iw_get_mib, + .get_netdev = get_netdev, + .get_port_immutable = c4iw_port_immutable, + .map_mr_sg = c4iw_map_mr_sg, + .mmap = c4iw_mmap, + .modify_qp = c4iw_ib_modify_qp, + .modify_srq = c4iw_modify_srq, + .poll_cq = c4iw_poll_cq, + .post_recv = c4iw_post_receive, + .post_send = c4iw_post_send, + .post_srq_recv = c4iw_post_srq_recv, + .query_device = c4iw_query_device, + .query_gid = c4iw_query_gid, + .query_pkey = c4iw_query_pkey, + .query_port = c4iw_query_port, + .query_qp = c4iw_ib_query_qp, + .reg_user_mr = c4iw_reg_user_mr, + .req_notify_cq = c4iw_arm_cq, +}; + void c4iw_register_device(struct work_struct *work) { int ret; @@ -573,42 +611,7 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev; - dev->ibdev.query_device = c4iw_query_device; - dev->ibdev.query_port = c4iw_query_port; - dev->ibdev.query_pkey = c4iw_query_pkey; - dev->ibdev.query_gid = c4iw_query_gid; - dev->ibdev.alloc_ucontext = c4iw_alloc_ucontext; - dev->ibdev.dealloc_ucontext = c4iw_dealloc_ucontext; - dev->ibdev.mmap = c4iw_mmap; - dev->ibdev.alloc_pd = c4iw_allocate_pd; - dev->ibdev.dealloc_pd = c4iw_deallocate_pd; - dev->ibdev.create_qp = c4iw_create_qp; - dev->ibdev.modify_qp = c4iw_ib_modify_qp; - dev->ibdev.query_qp = c4iw_ib_query_qp; - dev->ibdev.destroy_qp = c4iw_destroy_qp; - dev->ibdev.create_srq = c4iw_create_srq; - dev->ibdev.modify_srq = c4iw_modify_srq; - dev->ibdev.destroy_srq = c4iw_destroy_srq; - dev->ibdev.create_cq = c4iw_create_cq; - dev->ibdev.destroy_cq = c4iw_destroy_cq; - dev->ibdev.poll_cq = c4iw_poll_cq; - dev->ibdev.get_dma_mr = c4iw_get_dma_mr; - dev->ibdev.reg_user_mr = c4iw_reg_user_mr; - dev->ibdev.dereg_mr = c4iw_dereg_mr; - dev->ibdev.alloc_mw = c4iw_alloc_mw; - dev->ibdev.dealloc_mw = c4iw_dealloc_mw; - dev->ibdev.alloc_mr = c4iw_alloc_mr; - dev->ibdev.map_mr_sg = c4iw_map_mr_sg; - dev->ibdev.req_notify_cq = c4iw_arm_cq; - dev->ibdev.post_send = c4iw_post_send; - dev->ibdev.post_recv = c4iw_post_receive; - dev->ibdev.post_srq_recv = c4iw_post_srq_recv; - dev->ibdev.alloc_hw_stats = c4iw_alloc_stats; - dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; - dev->ibdev.get_port_immutable = c4iw_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_str; - dev->ibdev.get_netdev = get_netdev; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) { @@ -630,6 +633,7 @@ void c4iw_register_device(struct work_struct *work) rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; + ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops); ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL); if (ret) goto err_kfree_iwcm; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 13478f3b7057..981ff5cfb5d1 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2163,7 +2163,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (sqsize < 8) sqsize = 8; - ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL; qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) @@ -2564,13 +2564,12 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >> T4_RQT_ENTRY_SHIFT; - wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->queue = dma_zalloc_coherent(&rdev->lldi.pdev->dev, wq->memsize, &wq->dma_addr, GFP_KERNEL); if (!wq->queue) goto err_free_rqtpool; - memset(wq->queue, 0, wq->memsize); dma_unmap_addr_set(wq, mapping, wq->dma_addr); wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS, @@ -2713,7 +2712,7 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, rqsize = attrs->attr.max_wr + 1; rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16)); - ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL; srq = kzalloc(sizeof(*srq), GFP_KERNEL); if (!srq) diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index ff790390c91a..3ce9dc8c3463 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -34,6 +34,7 @@ hfi1-y := \ ruc.o \ sdma.o \ sysfs.o \ + tid_rdma.o \ trace.o \ uc.o \ ud.o \ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 7e6d70936c63..b443642eac02 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1072,6 +1072,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); static void log_physical_state(struct hfi1_pportdata *ppd, u32 state); static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); @@ -10770,13 +10772,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; ppd->port_error_action = 0; - ppd->host_link_state = HLS_DN_POLL; if (quick_linkup) { /* quick linkup does not go into polling */ ret = do_quick_linkup(dd); } else { ret1 = set_physical_link_state(dd, PLS_POLLING); + if (!ret1) + ret1 = wait_phys_link_out_of_offline(ppd, + 3000); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, "Failed to transition to Polling link state, return 0x%x\n", @@ -10784,6 +10788,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; } } + + /* + * Change the host link state after requesting DC8051 to + * change its physical state so that we can ignore any + * interrupt with stale LNI(XX) error, which will not be + * cleared until DC8051 transitions to Polling state. + */ + ppd->host_link_state = HLS_DN_POLL; ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE); /* @@ -12928,6 +12940,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd, return read_state; } +/* + * wait_phys_link_out_of_offline - wait for any out of offline state + * @ppd: port device + * @msecs: the number of milliseconds to wait + * + * Wait up to msecs milliseconds for any out of offline physical link + * state change to occur. + * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT. + */ +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs) +{ + u32 read_state; + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msecs); + while (1) { + read_state = read_physical_state(ppd->dd); + if ((read_state & 0xF0) != PLS_OFFLINE) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n", + read_state, msecs); + return -ETIMEDOUT; + } + usleep_range(1950, 2050); /* sleep 2ms-ish */ + } + + log_state_transition(ppd, read_state); + return read_state; +} + #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index c6163a347e93..c0800ea5a3f8 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -935,6 +935,10 @@ #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_MASK 0x7FFull #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SHIFT 0 #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SMASK 0x7FFull +#define SEND_CTXT_CREDIT_STATUS (TXE + 0x000000100018) +#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK 0x7FFull +#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT 32 +#define SEND_CTXT_CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK 0x7FFull #define SEND_CTXT_CREDIT_FORCE (TXE + 0x000000100028) #define SEND_CTXT_CREDIT_FORCE_FORCE_RETURN_SMASK 0x1ull #define SEND_CTXT_CREDIT_RETURN_ADDR (TXE + 0x000000100020) diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 7108d4d92259..40d3cfb58bd1 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -136,18 +136,21 @@ HFI1_CAP_ALLOW_PERM_JKEY | \ HFI1_CAP_STATIC_RATE_CTRL | \ HFI1_CAP_PRINT_UNIMPL | \ - HFI1_CAP_TID_UNMAP) + HFI1_CAP_TID_UNMAP | \ + HFI1_CAP_OPFN) /* * A set of capability bits that are "global" and are not allowed to be * set in the user bitmask. */ #define HFI1_CAP_RESERVED_MASK ((HFI1_CAP_SDMA | \ - HFI1_CAP_USE_SDMA_HEAD | \ - HFI1_CAP_EXTENDED_PSN | \ - HFI1_CAP_PRINT_UNIMPL | \ - HFI1_CAP_NO_INTEGRITY | \ - HFI1_CAP_PKEY_CHECK) << \ - HFI1_CAP_USER_SHIFT) + HFI1_CAP_USE_SDMA_HEAD | \ + HFI1_CAP_EXTENDED_PSN | \ + HFI1_CAP_PRINT_UNIMPL | \ + HFI1_CAP_NO_INTEGRITY | \ + HFI1_CAP_PKEY_CHECK | \ + HFI1_CAP_TID_RDMA | \ + HFI1_CAP_OPFN) << \ + HFI1_CAP_USER_SHIFT) /* * Set of capabilities that need to be enabled for kernel context in * order to be allowed for user contexts, as well. diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 9f992ae36c89..0a557795563c 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -407,6 +407,54 @@ DEBUGFS_SEQ_FILE_OPS(rcds); DEBUGFS_SEQ_FILE_OPEN(rcds) DEBUGFS_FILE_OPS(rcds); +static void *_pios_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_ibdev *ibd; + struct hfi1_devdata *dd; + + ibd = (struct hfi1_ibdev *)s->private; + dd = dd_from_dev(ibd); + if (!dd->send_contexts || *pos >= dd->num_send_contexts) + return NULL; + return pos; +} + +static void *_pios_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + ++*pos; + if (!dd->send_contexts || *pos >= dd->num_send_contexts) + return NULL; + return pos; +} + +static void _pios_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _pios_seq_show(struct seq_file *s, void *v) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + struct send_context_info *sci; + loff_t *spos = v; + loff_t i = *spos; + unsigned long flags; + + spin_lock_irqsave(&dd->sc_lock, flags); + sci = &dd->send_contexts[i]; + if (sci && sci->type != SC_USER && sci->allocated && sci->sc) + seqfile_dump_sci(s, i, sci); + spin_unlock_irqrestore(&dd->sc_lock, flags); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(pios); +DEBUGFS_SEQ_FILE_OPEN(pios) +DEBUGFS_FILE_OPS(pios); + /* read the per-device counters */ static ssize_t dev_counters_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -1143,6 +1191,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a41f85558312..a8ad70730203 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -430,40 +430,60 @@ static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { [HFI1_PKT_TYPE_16B] = &return_cnp_16B }; -void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp) +/** + * hfi1_process_ecn_slowpath - Process FECN or BECN bits + * @qp: The packet's destination QP + * @pkt: The packet itself. + * @prescan: Is the caller the RXQ prescan + * + * Process the packet's FECN or BECN bits. By now, the packet + * has already been evaluated whether processing of those bit should + * be done. + * The significance of the @prescan argument is that if the caller + * is the RXQ prescan, a CNP will be send out instead of waiting for the + * normal packet processing to send an ACK with BECN set (or a CNP). + */ +bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool prescan) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = pkt->grh; - u32 rqpn = 0, bth1; + u32 rqpn = 0; u16 pkey; u32 rlid, slid, dlid = 0; - u8 hdr_type, sc, svc_type; - bool is_mcast = false; + u8 hdr_type, sc, svc_type, opcode; + bool is_mcast = false, ignore_fecn = false, do_cnp = false, + fecn, becn; /* can be called from prescan */ if (pkt->etype == RHF_RCV_TYPE_BYPASS) { - is_mcast = hfi1_is_16B_mcast(dlid); pkey = hfi1_16B_get_pkey(pkt->hdr); sc = hfi1_16B_get_sc(pkt->hdr); dlid = hfi1_16B_get_dlid(pkt->hdr); slid = hfi1_16B_get_slid(pkt->hdr); + is_mcast = hfi1_is_16B_mcast(dlid); + opcode = ib_bth_get_opcode(ohdr); hdr_type = HFI1_PKT_TYPE_16B; + fecn = hfi1_16B_get_fecn(pkt->hdr); + becn = hfi1_16B_get_becn(pkt->hdr); } else { - is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); pkey = ib_bth_get_pkey(ohdr); sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf); - dlid = ib_get_dlid(pkt->hdr); + dlid = qp->ibqp.qp_type != IB_QPT_UD ? ib_get_dlid(pkt->hdr) : + ppd->lid; slid = ib_get_slid(pkt->hdr); + is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); + opcode = ib_bth_get_opcode(ohdr); hdr_type = HFI1_PKT_TYPE_9B; + fecn = ib_bth_get_fecn(ohdr); + becn = ib_bth_get_becn(ohdr); } switch (qp->ibqp.qp_type) { case IB_QPT_UD: - dlid = ppd->lid; rlid = slid; rqpn = ib_get_sqpn(pkt->ohdr); svc_type = IB_CC_SVCTYPE_UD; @@ -485,22 +505,31 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, svc_type = IB_CC_SVCTYPE_RC; break; default: - return; + return false; } - bth1 = be32_to_cpu(ohdr->bth[1]); + ignore_fecn = is_mcast || (opcode == IB_OPCODE_CNP) || + (opcode == IB_OPCODE_RC_ACKNOWLEDGE); + /* + * ACKNOWLEDGE packets do not get a CNP but this will be + * guarded by ignore_fecn above. + */ + do_cnp = prescan || + (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST && + opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE); + /* Call appropriate CNP handler */ - if (do_cnp && (bth1 & IB_FECN_SMASK)) + if (!ignore_fecn && do_cnp && fecn) hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); - if (!is_mcast && (bth1 & IB_BECN_SMASK)) { - u32 lqpn = bth1 & RVT_QPN_MASK; + if (becn) { + u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc]; process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); } - + return !ignore_fecn && fecn; } struct ps_mdata { @@ -599,7 +628,6 @@ static void __prescan_rxq(struct hfi1_packet *packet) struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; - int is_ecn = 0; u8 lnh; if (ps_done(&mdata, rhf, rcd)) @@ -625,12 +653,10 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; /* just in case */ } - bth1 = be32_to_cpu(packet->ohdr->bth[1]); - is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK)); - - if (!is_ecn) + if (!hfi1_may_ecn(packet)) goto next; + bth1 = be32_to_cpu(packet->ohdr->bth[1]); qpn = bth1 & RVT_QPN_MASK; rcu_read_lock(); qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn); @@ -640,7 +666,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; } - process_ecn(qp, packet, true); + hfi1_process_ecn_slowpath(qp, packet, true); rcu_read_unlock(); /* turn off BECN, FECN */ @@ -1400,7 +1426,7 @@ static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet) if ((!(hfi1_is_16B_mcast(packet->dlid))) && (packet->dlid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) { - if (packet->dlid != ppd->lid) + if ((packet->dlid & ~((1 << ppd->lmc) - 1)) != ppd->lid) return -EINVAL; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2b882347d0c2..6db2276f5c13 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1804,13 +1804,20 @@ static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd) return &rcd->ppd->ibport_data; } -void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp); -static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp) +/** + * hfi1_may_ecn - Check whether FECN or BECN processing should be done + * @pkt: the packet to be evaluated + * + * Check whether the FECN or BECN bits in the packet's header are + * enabled, depending on packet type. + * + * This function only checks for FECN and BECN bits. Additional checks + * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to + * ensure correct handling. + */ +static inline bool hfi1_may_ecn(struct hfi1_packet *pkt) { - bool becn; - bool fecn; + bool fecn, becn; if (pkt->etype == RHF_RCV_TYPE_BYPASS) { fecn = hfi1_16B_get_fecn(pkt->hdr); @@ -1819,10 +1826,18 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, fecn = ib_bth_get_fecn(pkt->ohdr); becn = ib_bth_get_becn(pkt->ohdr); } - if (unlikely(fecn || becn)) { - hfi1_process_ecn_slowpath(qp, pkt, do_cnp); - return fecn; - } + return fecn || becn; +} + +bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool prescan); +static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt) +{ + bool do_work; + + do_work = hfi1_may_ecn(pkt); + if (unlikely(do_work)) + return hfi1_process_ecn_slowpath(qp, pkt, false); return false; } diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 88a0cf930136..4228393e6c4c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -305,7 +305,7 @@ static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid) rcu_read_lock(); qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) - ah = rdma_create_ah(qp0->ibqp.pd, &attr); + ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0); rcu_read_unlock(); return ah; } diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 475b769e120c..14d2a90964c3 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -68,8 +68,7 @@ struct mmu_rb_handler { static unsigned long mmu_node_start(struct mmu_rb_node *); static unsigned long mmu_node_last(struct mmu_rb_node *); static int mmu_notifier_range_start(struct mmu_notifier *, - struct mm_struct *, - unsigned long, unsigned long, bool); + const struct mmu_notifier_range *); static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, unsigned long, unsigned long); static void do_remove(struct mmu_rb_handler *handler, @@ -284,10 +283,7 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, } static int mmu_notifier_range_start(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end, - bool blockable) + const struct mmu_notifier_range *range) { struct mmu_rb_handler *handler = container_of(mn, struct mmu_rb_handler, mn); @@ -297,10 +293,11 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn, bool added = false; spin_lock_irqsave(&handler->lock, flags); - for (node = __mmu_int_rb_iter_first(root, start, end - 1); + for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1); node; node = ptr) { /* Guard against node removal. */ - ptr = __mmu_int_rb_iter_next(node, start, end - 1); + ptr = __mmu_int_rb_iter_next(node, range->start, + range->end - 1); trace_hfi1_mmu_mem_invalidate(node->addr, node->len); if (handler->ops->invalidate(handler->ops_arg, node)) { __mmu_int_rb_remove(node, root); diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 9ab50d2308dc..dd5a5c030066 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -742,6 +742,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, spin_lock_init(&sc->alloc_lock); spin_lock_init(&sc->release_lock); spin_lock_init(&sc->credit_ctrl_lock); + seqlock_init(&sc->waitlock); INIT_LIST_HEAD(&sc->piowait); INIT_WORK(&sc->halt_work, sc_halted); init_waitqueue_head(&sc->halt_wait); @@ -1593,7 +1594,6 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) static void sc_piobufavail(struct send_context *sc) { struct hfi1_devdata *dd = sc->dd; - struct hfi1_ibdev *dev = &dd->verbs_dev; struct list_head *list; struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE]; struct rvt_qp *qp; @@ -1612,7 +1612,7 @@ static void sc_piobufavail(struct send_context *sc) * could end up with QPs on the wait list with the interrupt * disabled. */ - write_seqlock_irqsave(&dev->iowait_lock, flags); + write_seqlock_irqsave(&sc->waitlock, flags); while (!list_empty(list)) { struct iowait *wait; @@ -1636,7 +1636,7 @@ static void sc_piobufavail(struct send_context *sc) if (!list_empty(list)) hfi1_sc_wantpiobuf_intr(sc, 1); } - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(&sc->waitlock, flags); /* Wake up the most starved one first */ if (n) @@ -2137,3 +2137,28 @@ void free_credit_return(struct hfi1_devdata *dd) kfree(dd->cr_base); dd->cr_base = NULL; } + +void seqfile_dump_sci(struct seq_file *s, u32 i, + struct send_context_info *sci) +{ + struct send_context *sc = sci->sc; + u64 reg; + + seq_printf(s, "SCI %u: type %u base %u credits %u\n", + i, sci->type, sci->base, sci->credits); + seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n", + sc->flags, sc->sw_index, sc->hw_context, sc->group); + seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n", + sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail); + seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n", + sc->fill, sc->free, sc->fill_wrap, sc->alloc_free); + seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n", + sc->credit_intr_count, sc->credit_ctrl); + reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS)); + seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n", + (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >> + CR_COUNTER_SHIFT, + (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) & + SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK), + reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK)); +} diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index aaf372c3e5d6..c9a58b642bdd 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -127,6 +127,8 @@ struct send_context { volatile __le64 *hw_free; /* HW free counter */ /* list for PIO waiters */ struct list_head piowait ____cacheline_aligned_in_smp; + seqlock_t waitlock; + spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp; u32 credit_intr_count; /* count of credit intr users */ u64 credit_ctrl; /* cache for credit control */ @@ -329,4 +331,7 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes); void seg_pio_copy_end(struct pio_buf *pbuf); +void seqfile_dump_sci(struct seq_file *s, u32 i, + struct send_context_info *sci); + #endif /* _PIO_H */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1a016248039f..5344e8993b28 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -375,20 +375,18 @@ bool _hfi1_schedule_send(struct rvt_qp *qp) static void qp_pio_drain(struct rvt_qp *qp) { - struct hfi1_ibdev *dev; struct hfi1_qp_priv *priv = qp->priv; if (!priv->s_sendcontext) return; - dev = to_idev(qp->ibqp.device); while (iowait_pio_pending(&priv->s_iowait)) { - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); iowait_pio_drain(&priv->s_iowait); - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); } } @@ -459,7 +457,6 @@ static int iowait_sleep( struct hfi1_qp_priv *priv; unsigned long flags; int ret = 0; - struct hfi1_ibdev *dev; qp = tx->qp; priv = qp->priv; @@ -472,9 +469,8 @@ static int iowait_sleep( * buffer and undoing the side effects of the copy. */ /* Make a common routine? */ - dev = &sde->dd->verbs_dev; list_add_tail(&stx->list, &wait->tx_head); - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); if (sdma_progress(sde, seq, stx)) goto eagain; if (list_empty(&priv->s_iowait.list)) { @@ -485,11 +481,11 @@ static int iowait_sleep( qp->s_flags |= RVT_S_WAIT_DMA_DESC; iowait_queue(pkts_sent, &priv->s_iowait, &sde->dmawait); - priv->s_iowait.lock = &dev->iowait_lock; + priv->s_iowait.lock = &sde->waitlock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); hfi1_qp_unbusy(qp, wait); spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EBUSY; @@ -499,7 +495,7 @@ static int iowait_sleep( } return ret; eagain: - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); spin_unlock_irqrestore(&qp->s_lock, flags); list_del_init(&stx->list); return -EAGAIN; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 188aa4f686a0..be603f35d7e4 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1157,6 +1157,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) @@ -1209,6 +1210,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, u32 s_last; rvt_put_swqe(wqe); + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) @@ -2049,8 +2051,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) struct ib_reth *reth; unsigned long flags; int ret; - bool is_fecn = false; - bool copy_last = false; + bool copy_last = false, fecn; u32 rkey; u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); @@ -2059,7 +2060,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet, false); + fecn = process_ecn(qp, packet); /* * Process responses (ACKs) before anything else. Note that the @@ -2070,8 +2071,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) { rc_rcv_resp(packet); - if (is_fecn) - goto send_ack; return; } @@ -2347,11 +2346,11 @@ send_last: /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; } @@ -2413,11 +2412,11 @@ send_last: /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; } @@ -2430,16 +2429,9 @@ send_last: qp->r_ack_psn = psn; qp->r_nak_state = 0; /* Send an ACK if requested or required. */ - if (psn & IB_BTH_REQ_ACK) { - if (packet->numpkt == 0) { - rc_cancel_ack(qp); - goto send_ack; - } - if (qp->r_adefered >= HFI1_PSN_CREDIT) { - rc_cancel_ack(qp); - goto send_ack; - } - if (unlikely(is_fecn)) { + if (psn & IB_BTH_REQ_ACK || fecn) { + if (packet->numpkt == 0 || fecn || + qp->r_adefered >= HFI1_PSN_CREDIT) { rc_cancel_ack(qp); goto send_ack; } @@ -2480,7 +2472,7 @@ nack_acc: qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; send_ack: - hfi1_send_rc_ack(packet, is_fecn); + hfi1_send_rc_ack(packet, fecn); } void hfi1_rc_hdrerr( diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 891d2386d1ca..b84356e1a4c1 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1424,6 +1424,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) seqlock_init(&sde->head_lock); spin_lock_init(&sde->senddmactrl_lock); spin_lock_init(&sde->flushlist_lock); + seqlock_init(&sde->waitlock); /* insure there is always a zero bit */ sde->ahg_bits = 0xfffffffe00000000ULL; @@ -1758,7 +1759,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) struct iowait *wait, *nw; struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; uint i, n = 0, seq, max_idx = 0; - struct hfi1_ibdev *dev = &sde->dd->verbs_dev; u8 max_starved_cnt = 0; #ifdef CONFIG_SDMA_VERBOSITY @@ -1768,10 +1768,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) #endif do { - seq = read_seqbegin(&dev->iowait_lock); + seq = read_seqbegin(&sde->waitlock); if (!list_empty(&sde->dmawait)) { /* at least one item */ - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); /* Harvest waiters wanting DMA descriptors */ list_for_each_entry_safe( wait, @@ -1794,10 +1794,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) list_del_init(&wait->list); waits[n++] = wait; } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); break; } - } while (read_seqretry(&dev->iowait_lock, seq)); + } while (read_seqretry(&sde->waitlock, seq)); /* Schedule the most starved one first */ if (n) diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 6dc63d7c5685..1e2e40f79cb2 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -382,6 +382,7 @@ struct sdma_engine { u64 progress_int_cnt; /* private: */ + seqlock_t waitlock; struct list_head dmawait; /* CONFIG SDMA for now, just blindly duplicate */ diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c new file mode 100644 index 000000000000..da1ecb68a928 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2018 Intel Corporation. + * + */ + +#include "hfi.h" +#include "verbs.h" +#include "tid_rdma.h" + +/** + * qp_to_rcd - determine the receive context used by a qp + * @qp - the qp + * + * This routine returns the receive context associated + * with a a qp's qpn. + * + * Returns the context. + */ +static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi, + struct rvt_qp *qp) +{ + struct hfi1_ibdev *verbs_dev = container_of(rdi, + struct hfi1_ibdev, + rdi); + struct hfi1_devdata *dd = container_of(verbs_dev, + struct hfi1_devdata, + verbs_dev); + unsigned int ctxt; + + if (qp->ibqp.qp_num == 0) + ctxt = 0; + else + ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) % + (dd->n_krcv_queues - 1)) + 1; + + return dd->rcd[ctxt]; +} + +int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_init_attr *init_attr) +{ + struct hfi1_qp_priv *qpriv = qp->priv; + + qpriv->rcd = qp_to_rcd(rdi, qp); + + return 0; +} diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h new file mode 100644 index 000000000000..6fcd3adcdcc3 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2018 Intel Corporation. + * + */ +#ifndef HFI1_TID_RDMA_H +#define HFI1_TID_RDMA_H + +int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_init_attr *init_attr); + +#endif /* HFI1_TID_RDMA_H */ + diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 6aca0c5a7f97..6ba47037c424 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -321,7 +321,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - process_ecn(qp, packet, true); + process_ecn(qp, packet); psn = ib_bth_get_psn(ohdr); /* Compare the PSN verses the expected PSN. */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 4baa8f4d49de..88242fe95eaa 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -51,6 +51,7 @@ #include "hfi.h" #include "mad.h" #include "verbs_txreq.h" +#include "trace_ibhdrs.h" #include "qp.h" /* We support only two types - 9B and 16B for now */ @@ -656,18 +657,19 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 bth0, plen, vl, hwords = 7; u16 len; u8 l4; - struct hfi1_16b_header hdr; + struct hfi1_opa_header hdr; struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 nwords; + hdr.hdr_type = HFI1_PKT_TYPE_16B; /* Populate length */ nwords = ((hfi1_get_16b_padding(hwords << 2, 0) + SIZE_OF_LT) >> 2) + SIZE_OF_CRC; if (old_grh) { - struct ib_grh *grh = &hdr.u.l.grh; + struct ib_grh *grh = &hdr.opah.u.l.grh; grh->version_tclass_flow = old_grh->version_tclass_flow; grh->paylen = cpu_to_be16( @@ -675,11 +677,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, grh->hop_limit = 0xff; grh->sgid = old_grh->dgid; grh->dgid = old_grh->sgid; - ohdr = &hdr.u.l.oth; + ohdr = &hdr.opah.u.l.oth; l4 = OPA_16B_L4_IB_GLOBAL; hwords += sizeof(struct ib_grh) / sizeof(u32); } else { - ohdr = &hdr.u.oth; + ohdr = &hdr.opah.u.oth; l4 = OPA_16B_L4_IB_LOCAL; } @@ -693,7 +695,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, /* Convert dwords to flits */ len = (hwords + nwords) >> 1; - hfi1_make_16b_hdr(&hdr, slid, dlid, len, pkey, 1, 0, l4, sc5); + hfi1_make_16b_hdr(&hdr.opah, slid, dlid, len, pkey, 1, 0, l4, sc5); plen = 2 /* PBC */ + hwords + nwords; pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; @@ -701,9 +703,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) + if (pbuf) { + trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); + } } } @@ -715,14 +719,15 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 bth0, plen, vl, hwords = 5; u16 lrh0; u8 sl = ibp->sc_to_sl[sc5]; - struct ib_header hdr; + struct hfi1_opa_header hdr; struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + hdr.hdr_type = HFI1_PKT_TYPE_9B; if (old_grh) { - struct ib_grh *grh = &hdr.u.l.grh; + struct ib_grh *grh = &hdr.ibh.u.l.grh; grh->version_tclass_flow = old_grh->version_tclass_flow; grh->paylen = cpu_to_be16( @@ -730,11 +735,11 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, grh->hop_limit = 0xff; grh->sgid = old_grh->dgid; grh->dgid = old_grh->sgid; - ohdr = &hdr.u.l.oth; + ohdr = &hdr.ibh.u.l.oth; lrh0 = HFI1_LRH_GRH; hwords += sizeof(struct ib_grh) / sizeof(u32); } else { - ohdr = &hdr.u.oth; + ohdr = &hdr.ibh.u.oth; lrh0 = HFI1_LRH_BTH; } @@ -746,16 +751,18 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT)); ohdr->bth[2] = 0; /* PSN 0 */ - hfi1_make_ib_hdr(&hdr, lrh0, hwords + SIZE_OF_CRC, dlid, slid); + hfi1_make_ib_hdr(&hdr.ibh, lrh0, hwords + SIZE_OF_CRC, dlid, slid); plen = 2 /* PBC */ + hwords; pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); vl = sc_to_vlt(ppd->dd, sc5); pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) + if (pbuf) { + trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); + } } } @@ -912,7 +919,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) src_qp = hfi1_16B_get_src_qpn(packet->mgmt); } - process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); + process_ecn(qp, packet); /* * Get the number of bytes the message was padded by * and drop incomplete packets. diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 3f0aadccd9f6..e5e7fad09f32 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -130,7 +130,6 @@ static int defer_packet_queue( { struct hfi1_user_sdma_pkt_q *pq = container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); - struct hfi1_ibdev *dev = &pq->dd->verbs_dev; struct user_sdma_txreq *tx = container_of(txreq, struct user_sdma_txreq, txreq); @@ -144,10 +143,10 @@ static int defer_packet_queue( * it is supposed to be enqueued. */ xchg(&pq->state, SDMA_PKT_Q_DEFERRED); - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); if (list_empty(&pq->busy.list)) iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); return -EBUSY; eagain: return -EAGAIN; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index a365089a9305..ec582d86025f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -765,7 +765,6 @@ static int pio_wait(struct rvt_qp *qp, { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_devdata *dd = sc->dd; - struct hfi1_ibdev *dev = &dd->verbs_dev; unsigned long flags; int ret = 0; @@ -777,7 +776,7 @@ static int pio_wait(struct rvt_qp *qp, */ spin_lock_irqsave(&qp->s_lock, flags); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { - write_seqlock(&dev->iowait_lock); + write_seqlock(&sc->waitlock); list_add_tail(&ps->s_txreq->txreq.list, &ps->wait->tx_head); if (list_empty(&priv->s_iowait.list)) { @@ -790,14 +789,14 @@ static int pio_wait(struct rvt_qp *qp, was_empty = list_empty(&sc->piowait); iowait_queue(ps->pkts_sent, &priv->s_iowait, &sc->piowait); - priv->s_iowait.lock = &dev->iowait_lock; + priv->s_iowait.lock = &sc->waitlock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ if (was_empty) hfi1_sc_wantpiobuf_intr(sc, 1); } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sc->waitlock); hfi1_qp_unbusy(qp, ps->wait); ret = -EBUSY; } @@ -919,6 +918,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (slen > len) slen = len; + if (slen > ss->sge.sge_length) + slen = ss->sge.sge_length; rvt_update_sge(ss, slen, false); seg_pio_copy_mid(pbuf, addr, slen); len -= slen; @@ -1616,6 +1617,16 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, return count; } +static const struct ib_device_ops hfi1_dev_ops = { + .alloc_hw_stats = alloc_hw_stats, + .alloc_rdma_netdev = hfi1_vnic_alloc_rn, + .get_dev_fw_str = hfi1_get_dev_fw_str, + .get_hw_stats = get_hw_stats, + .modify_device = modify_device, + /* keep process mad in the driver */ + .process_mad = hfi1_process_mad, +}; + /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1659,14 +1670,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->owner = THIS_MODULE; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; - ibdev->modify_device = modify_device; - ibdev->alloc_hw_stats = alloc_hw_stats; - ibdev->get_hw_stats = get_hw_stats; - ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn; - /* keep process mad in the driver */ - ibdev->process_mad = hfi1_process_mad; - ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; + ib_set_device_ops(ibdev, &hfi1_dev_ops); strlcpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); @@ -1704,6 +1709,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; + dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init; dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 64c9054db5f3..1ad0b14bdb3c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -71,6 +71,7 @@ struct hfi1_devdata; struct hfi1_packet; #include "iowait.h" +#include "tid_rdma.h" #define HFI1_MAX_RDMA_ATOMIC 16 @@ -156,6 +157,7 @@ struct hfi1_qp_priv { struct hfi1_ahg_info *s_ahg; /* ahg info for next header */ struct sdma_engine *s_sde; /* current sde */ struct send_context *s_sendcontext; /* current sendcontext */ + struct hfi1_ctxtdata *rcd; /* QP's receive context */ u8 s_sc; /* SC[0..4] for next packet */ struct iowait s_iowait; struct rvt_qp *owner; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index c9876d9e3cb9..a922db58be14 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -816,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - chip_sdma_engines(dd), dd->num_vnic_contexts); + dd->num_sdma, dd->num_vnic_contexts); if (!netdev) return ERR_PTR(-ENOMEM); rn = netdev_priv(netdev); vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; - vinfo->num_tx_q = chip_sdma_engines(dd); + vinfo->num_tx_q = dd->num_sdma; vinfo->num_rx_q = dd->num_vnic_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index 97bd940a056a..1f81c480e028 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -57,7 +57,6 @@ #define HFI1_VNIC_TXREQ_NAME_LEN 32 #define HFI1_VNIC_SDMA_DESC_WTRMRK 64 -#define HFI1_VNIC_SDMA_RETRY_COUNT 1 /* * struct vnic_txreq - VNIC transmit descriptor @@ -67,7 +66,6 @@ * @pad: pad buffer * @plen: pad length * @pbc_val: pbc value - * @retry_count: tx retry count */ struct vnic_txreq { struct sdma_txreq txreq; @@ -77,8 +75,6 @@ struct vnic_txreq { unsigned char pad[HFI1_VNIC_MAX_PAD]; u16 plen; __le64 pbc_val; - - u32 retry_count; }; static void vnic_sdma_complete(struct sdma_txreq *txreq, @@ -196,7 +192,6 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, ret = build_vnic_tx_desc(sde, tx, pbc); if (unlikely(ret)) goto free_desc; - tx->retry_count = 0; ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait), &tx->txreq, vnic_sdma->pkts_sent); @@ -237,18 +232,17 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, { struct hfi1_vnic_sdma *vnic_sdma = container_of(wait->iow, struct hfi1_vnic_sdma, wait); - struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; - struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); - if (sdma_progress(sde, seq, txreq)) - if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT) - return -EAGAIN; + write_seqlock(&sde->waitlock); + if (sdma_progress(sde, seq, txreq)) { + write_sequnlock(&sde->waitlock); + return -EAGAIN; + } vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; - write_seqlock(&dev->iowait_lock); if (list_empty(&vnic_sdma->wait.list)) iowait_queue(pkts_sent, wait->iow, &sde->dmawait); - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); return -EBUSY; } diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index cf03404b9d58..004c88b32e13 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -7,7 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ - hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o + hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o hns-roce-hw-v1-objs := hns_roce_hw_v1.o obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 9990dc9eb96a..b3c8c45ec1e3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -41,6 +41,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); @@ -110,7 +111,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int hns_roce_destroy_ah(struct ib_ah *ah) +int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_hr_ah(ah)); diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 46f65f9f59d0..6300033a448f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -239,6 +239,8 @@ err_free: void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + hns_roce_cleanup_srq_table(hr_dev); hns_roce_cleanup_qp_table(hr_dev); hns_roce_cleanup_cq_table(hr_dev); hns_roce_cleanup_mr_table(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 9549ae51a0dd..927701df5eff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -120,6 +120,10 @@ enum { HNS_ROCE_CMD_SQD2RTS_QP = 0x20, HNS_ROCE_CMD_2RST_QP = 0x21, HNS_ROCE_CMD_QUERY_QP = 0x22, + HNS_ROCE_CMD_SW2HW_SRQ = 0x70, + HNS_ROCE_CMD_MODIFY_SRQC = 0x72, + HNS_ROCE_CMD_QUERY_SRQC = 0x73, + HNS_ROCE_CMD_HW2SW_SRQ = 0x74, }; int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 93d4b4ec002d..f4c92a7ac1ce 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -376,9 +376,6 @@ #define ROCEE_RX_CMQ_TAIL_REG 0x07024 #define ROCEE_RX_CMQ_HEAD_REG 0x07028 -#define ROCEE_VF_MB_CFG0_REG 0x40 -#define ROCEE_VF_MB_STATUS_REG 0x58 - #define ROCEE_VF_EQ_DB_CFG0_REG 0x238 #define ROCEE_VF_EQ_DB_CFG1_REG 0x23C diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index d39bdfdb5de9..509e467843f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -111,6 +111,9 @@ #define PAGES_SHIFT_24 24 #define PAGES_SHIFT_32 32 +#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 +#define SRQ_DB_REG 0x230 + enum { HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, @@ -196,6 +199,7 @@ enum { HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), + HNS_ROCE_CAP_FLAG_SRQ = BIT(5), HNS_ROCE_CAP_FLAG_MW = BIT(7), HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), @@ -204,6 +208,8 @@ enum { enum hns_roce_mtt_type { MTT_TYPE_WQE, MTT_TYPE_CQE, + MTT_TYPE_SRQWQE, + MTT_TYPE_IDX }; enum { @@ -339,6 +345,10 @@ struct hns_roce_mr_table { struct hns_roce_hem_table mtpt_table; struct hns_roce_buddy mtt_cqe_buddy; struct hns_roce_hem_table mtt_cqe_table; + struct hns_roce_buddy mtt_srqwqe_buddy; + struct hns_roce_hem_table mtt_srqwqe_table; + struct hns_roce_buddy mtt_idx_buddy; + struct hns_roce_hem_table mtt_idx_table; }; struct hns_roce_wq { @@ -429,9 +439,37 @@ struct hns_roce_cq { struct completion free; }; +struct hns_roce_idx_que { + struct hns_roce_buf idx_buf; + int entry_sz; + u32 buf_size; + struct ib_umem *umem; + struct hns_roce_mtt mtt; + u64 *bitmap; +}; + struct hns_roce_srq { struct ib_srq ibsrq; - int srqn; + void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); + unsigned long srqn; + int max; + int max_gs; + int wqe_shift; + void __iomem *db_reg_l; + + atomic_t refcount; + struct completion free; + + struct hns_roce_buf buf; + u64 *wrid; + struct ib_umem *umem; + struct hns_roce_mtt mtt; + struct hns_roce_idx_que idx_que; + spinlock_t lock; + int head; + int tail; + u16 wqe_ctr; + struct mutex mutex; }; struct hns_roce_uar_table { @@ -453,6 +491,12 @@ struct hns_roce_cq_table { struct hns_roce_hem_table table; }; +struct hns_roce_srq_table { + struct hns_roce_bitmap bitmap; + struct xarray xa; + struct hns_roce_hem_table table; +}; + struct hns_roce_raq_table { struct hns_roce_buf_list *e_raq_buf; }; @@ -603,6 +647,12 @@ struct hns_roce_aeqe { } qp_event; struct { + __le32 srq; + u32 rsv0; + u32 rsv1; + } srq_event; + + struct { __le32 cq; u32 rsv0; u32 rsv1; @@ -679,7 +729,12 @@ struct hns_roce_caps { u32 max_extend_sg; int num_qps; /* 256k */ int reserved_qps; + u32 max_srq_sg; + int num_srqs; u32 max_wqes; /* 16k */ + u32 max_srqs; + u32 max_srq_wrs; + u32 max_srq_sges; u32 max_sq_desc_sz; /* 64 */ u32 max_rq_desc_sz; /* 64 */ u32 max_srq_desc_sz; @@ -690,12 +745,16 @@ struct hns_roce_caps { int min_cqes; u32 min_wqes; int reserved_cqs; + int reserved_srqs; + u32 max_srqwqes; int num_aeq_vectors; /* 1 */ int num_comp_vectors; int num_other_vectors; int num_mtpts; u32 num_mtt_segs; u32 num_cqe_segs; + u32 num_srqwqe_segs; + u32 num_idx_segs; int reserved_mrws; int reserved_uars; int num_pds; @@ -709,6 +768,8 @@ struct hns_roce_caps { int irrl_entry_sz; int trrl_entry_sz; int cqc_entry_sz; + int srqc_entry_sz; + int idx_entry_sz; u32 pbl_ba_pg_sz; u32 pbl_buf_pg_sz; u32 pbl_hop_num; @@ -737,6 +798,12 @@ struct hns_roce_caps { u32 cqe_ba_pg_sz; u32 cqe_buf_pg_sz; u32 cqe_hop_num; + u32 srqwqe_ba_pg_sz; + u32 srqwqe_buf_pg_sz; + u32 srqwqe_hop_num; + u32 idx_ba_pg_sz; + u32 idx_buf_pg_sz; + u32 idx_hop_num; u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; @@ -805,6 +872,19 @@ struct hns_roce_hw { int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); + void (*write_srqc)(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn, + void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx, + dma_addr_t dma_handle_wqe, + dma_addr_t dma_handle_idx); + int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); + int (*query_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *attr); + int (*post_srq_recv)(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); + const struct ib_device_ops *hns_roce_dev_ops; + const struct ib_device_ops *hns_roce_dev_srq_ops; }; struct hns_roce_dev { @@ -839,6 +919,7 @@ struct hns_roce_dev { struct hns_roce_uar_table uar_table; struct hns_roce_mr_table mr_table; struct hns_roce_cq_table cq_table; + struct hns_roce_srq_table srq_table; struct hns_roce_qp_table qp_table; struct hns_roce_eq_table eq_table; @@ -951,12 +1032,14 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev); int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj); void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj, @@ -973,9 +1056,10 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int hns_roce_destroy_ah(struct ib_ah *ah); +int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, struct ib_ucontext *context, @@ -1011,6 +1095,14 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem); +struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); +int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); +int hns_roce_destroy_srq(struct ib_srq *ibsrq); + struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); @@ -1052,6 +1144,7 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db); void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index f6faefed96e8..4cdbcafa5915 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -46,7 +46,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) || (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) || (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) || - (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT)) + (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) || + (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) || + (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX)) return true; return false; @@ -147,6 +149,22 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = mhop->bt_chunk_size / 8; mhop->hop_num = hr_dev->caps.cqe_hop_num; break; + case HEM_TYPE_SRQWQE: + mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->hop_num = hr_dev->caps.srqwqe_hop_num; + break; + case HEM_TYPE_IDX: + mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->hop_num = hr_dev->caps.idx_hop_num; + break; default: dev_err(dev, "Table %d not support multi-hop addressing!\n", table->type); @@ -906,6 +924,18 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, bt_chunk_size = buf_chunk_size; hop_num = hr_dev->caps.cqe_hop_num; break; + case HEM_TYPE_SRQWQE: + buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_SHIFT); + bt_chunk_size = buf_chunk_size; + hop_num = hr_dev->caps.srqwqe_hop_num; + break; + case HEM_TYPE_IDX: + buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz + + PAGE_SHIFT); + bt_chunk_size = buf_chunk_size; + hop_num = hr_dev->caps.idx_hop_num; + break; default: dev_err(dev, "Table %d not support to init hem table here!\n", @@ -1041,6 +1071,15 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { + if ((hr_dev->caps.num_idx_segs)) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_idx_table); + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table); + if (hr_dev->caps.srqc_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->srq_table.table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index e8850d59e780..a650278c6fbd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -48,6 +48,8 @@ enum { /* UNMAP HEM */ HEM_TYPE_MTT, HEM_TYPE_CQE, + HEM_TYPE_SRQWQE, + HEM_TYPE_IDX, HEM_TYPE_IRRL, HEM_TYPE_TRRL, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index ca05810c92dc..b74c742b000c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3926,7 +3926,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) struct hns_roce_qp_work *qp_work; struct hns_roce_v1_priv *priv; struct hns_roce_cq *send_cq, *recv_cq; - int is_user = !!ibqp->pd->uobject; + bool is_user = ibqp->uobject; int is_timeout = 0; int ret; @@ -4793,6 +4793,16 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev) kfree(eq_table->eq); } +static const struct ib_device_ops hns_roce_v1_dev_ops = { + .destroy_qp = hns_roce_v1_destroy_qp, + .modify_cq = hns_roce_v1_modify_cq, + .poll_cq = hns_roce_v1_poll_cq, + .post_recv = hns_roce_v1_post_recv, + .post_send = hns_roce_v1_post_send, + .query_qp = hns_roce_v1_query_qp, + .req_notify_cq = hns_roce_v1_req_notify_cq, +}; + static const struct hns_roce_hw hns_roce_hw_v1 = { .reset = hns_roce_v1_reset, .hw_profile = hns_roce_v1_profile, @@ -4818,6 +4828,7 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .destroy_cq = hns_roce_v1_destroy_cq, .init_eq = hns_roce_v1_init_eq_table, .cleanup_eq = hns_roce_v1_cleanup_eq_table, + .hns_roce_dev_ops = &hns_roce_v1_dev_ops, }; static const struct of_device_id hns_roce_of_match[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3beb1523e17c..3a669451cf86 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1082,6 +1082,33 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) return 0; } +static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, + int vf_id) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_vf_switch *swt; + int ret; + + swt = (struct hns_roce_vf_switch *)desc.data; + hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true); + swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL); + roce_set_field(swt->fun_id, + VF_SWITCH_DATA_FUN_ID_VF_ID_M, + VF_SWITCH_DATA_FUN_ID_VF_ID_S, + vf_id); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) + return ret; + desc.flag = + cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN); + desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc[2]; @@ -1269,6 +1296,15 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } + if (hr_dev->pci_dev->revision == 0x21) { + ret = hns_roce_set_vf_switch_param(hr_dev, 0); + if (ret) { + dev_err(hr_dev->dev, + "Set function switch param fail, ret = %d.\n", + ret); + return ret; + } + } hr_dev->vendor_part_id = hr_dev->pci_dev->device; hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); @@ -1276,11 +1312,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM; caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM; caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM; + caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM; caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; + caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM; caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM; caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; + caps->max_srq_sg = HNS_ROCE_V2_MAX_SRQ_SGE_NUM; caps->num_uars = HNS_ROCE_V2_UAR_NUM; caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM; caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM; @@ -1289,6 +1328,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; + caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; + caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM; caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA; caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA; @@ -1299,8 +1340,10 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; + caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ; caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; + caps->idx_entry_sz = 4; caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE; caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; caps->reserved_lkey = 0; @@ -1308,6 +1351,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->reserved_mrws = 1; caps->reserved_uars = 0; caps->reserved_cqs = 0; + caps->reserved_srqs = 0; caps->reserved_qps = HNS_ROCE_V2_RSV_QPS; caps->qpc_ba_pg_sz = 0; @@ -1331,6 +1375,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->cqe_ba_pg_sz = 0; caps->cqe_buf_pg_sz = 0; caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; + caps->srqwqe_ba_pg_sz = 0; + caps->srqwqe_buf_pg_sz = 0; + caps->srqwqe_hop_num = HNS_ROCE_SRQWQE_HOP_NUM; + caps->idx_ba_pg_sz = 0; + caps->idx_buf_pg_sz = 0; + caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM; caps->eqe_ba_pg_sz = 0; caps->eqe_buf_pg_sz = 0; caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; @@ -1354,8 +1404,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->local_ca_ack_delay = 0; caps->max_mtu = IB_MTU_4096; + caps->max_srqs = HNS_ROCE_V2_MAX_SRQ; + caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; + caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; + if (hr_dev->pci_dev->revision == 0x21) - caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC; + caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | + HNS_ROCE_CAP_FLAG_SRQ; ret = hns_roce_v2_set_bt(hr_dev); if (ret) @@ -1587,30 +1642,62 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) hns_roce_free_link_table(hr_dev, &priv->tsq); } +static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_mbox_status *mb_st = + (struct hns_roce_mbox_status *)desc.data; + enum hns_roce_cmd_return_status status; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); + + status = hns_roce_cmq_send(hr_dev, &desc, 1); + if (status) + return status; + + return cpu_to_le32(mb_st->mb_status_hw_run); +} + static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) { - u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); + u32 status = hns_roce_query_mbox_status(hr_dev); return status >> HNS_ROCE_HW_RUN_BIT_SHIFT; } static int hns_roce_v2_cmd_complete(struct hns_roce_dev *hr_dev) { - u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); + u32 status = hns_roce_query_mbox_status(hr_dev); return status & HNS_ROCE_HW_MB_STATUS_MASK; } +static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, u32 in_modifier, u8 op_modifier, + u16 op, u16 token, int event) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false); + + mb->in_param_l = cpu_to_le64(in_param); + mb->in_param_h = cpu_to_le64(in_param) >> 32; + mb->out_param_l = cpu_to_le64(out_param); + mb->out_param_h = cpu_to_le64(out_param) >> 32; + mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op); + mb->token_event_en = cpu_to_le32(event << 16 | token); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event) { struct device *dev = hr_dev->dev; - u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + - ROCEE_VF_MB_CFG0_REG); unsigned long end; - u32 val0 = 0; - u32 val1 = 0; + int ret; end = msecs_to_jiffies(HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS) + jiffies; while (hns_roce_v2_cmd_pending(hr_dev)) { @@ -1622,27 +1709,12 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, cond_resched(); } - roce_set_field(val0, HNS_ROCE_VF_MB4_TAG_MASK, - HNS_ROCE_VF_MB4_TAG_SHIFT, in_modifier); - roce_set_field(val0, HNS_ROCE_VF_MB4_CMD_MASK, - HNS_ROCE_VF_MB4_CMD_SHIFT, op); - roce_set_field(val1, HNS_ROCE_VF_MB5_EVENT_MASK, - HNS_ROCE_VF_MB5_EVENT_SHIFT, event); - roce_set_field(val1, HNS_ROCE_VF_MB5_TOKEN_MASK, - HNS_ROCE_VF_MB5_TOKEN_SHIFT, token); - - writeq(in_param, hcr + 0); - writeq(out_param, hcr + 2); - - /* Memory barrier */ - wmb(); - - writel(val0, hcr + 4); - writel(val1, hcr + 5); - - mmiowb(); + ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, token, event); + if (ret) + dev_err(dev, "Post mailbox fail(%d)\n", ret); - return 0; + return ret; } static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, @@ -2007,6 +2079,27 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) return get_sw_cqe_v2(hr_cq, hr_cq->cons_index); } +static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +{ + return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift); +} + +static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) +{ + u32 bitmap_num; + int bit_num; + + /* always called with interrupts disabled. */ + spin_lock(&srq->lock); + + bitmap_num = wqe_index / (sizeof(u64) * 8); + bit_num = wqe_index % (sizeof(u64) * 8); + srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num); + srq->tail++; + + spin_unlock(&srq->lock); +} + static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { *hr_cq->set_ci_db = cons_index & 0xffffff; @@ -2018,6 +2111,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, struct hns_roce_v2_cqe *cqe, *dest; u32 prod_index; int nfreed = 0; + int wqe_index; u8 owner_bit; for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index); @@ -2035,7 +2129,13 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if ((roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M, V2_CQE_BYTE_16_LCL_QPN_S) & HNS_ROCE_V2_CQE_QPN_MASK) == qpn) { - /* In v1 engine, not support SRQ */ + if (srq && + roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S)) { + wqe_index = roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S); + hns_roce_free_srq_wqe(srq, wqe_index); + } ++nfreed; } else if (nfreed) { dest = get_cqe_v2(hr_cq, (prod_index + nfreed) & @@ -2212,6 +2312,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_qp **cur_qp, struct ib_wc *wc) { + struct hns_roce_srq *srq = NULL; struct hns_roce_dev *hr_dev; struct hns_roce_v2_cqe *cqe; struct hns_roce_qp *hr_qp; @@ -2254,6 +2355,37 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, wc->qp = &(*cur_qp)->ibqp; wc->vendor_err = 0; + if (is_send) { + wq = &(*cur_qp)->sq; + if ((*cur_qp)->sq_signal_bits) { + /* + * If sg_signal_bit is 1, + * firstly tail pointer updated to wqe + * which current cqe correspond to + */ + wqe_ctr = (u16)roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S); + wq->tail += (wqe_ctr - (u16)wq->tail) & + (wq->wqe_cnt - 1); + } + + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else if ((*cur_qp)->ibqp.srq) { + srq = to_hr_srq((*cur_qp)->ibqp.srq); + wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S)); + wc->wr_id = srq->wrid[wqe_ctr]; + hns_roce_free_srq_wqe(srq, wqe_ctr); + } else { + /* Update tail pointer, record wr_id */ + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, V2_CQE_BYTE_4_STATUS_S); switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) { @@ -2373,23 +2505,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, wc->status = IB_WC_GENERAL_ERR; break; } - - wq = &(*cur_qp)->sq; - if ((*cur_qp)->sq_signal_bits) { - /* - * If sg_signal_bit is 1, - * firstly tail pointer updated to wqe - * which current cqe correspond to - */ - wqe_ctr = (u16)roce_get_field(cqe->byte_4, - V2_CQE_BYTE_4_WQE_INDX_M, - V2_CQE_BYTE_4_WQE_INDX_S); - wq->tail += (wqe_ctr - (u16)wq->tail) & - (wq->wqe_cnt - 1); - } - - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; - ++wq->tail; } else { /* RQ correspond to CQE */ wc->byte_len = le32_to_cpu(cqe->byte_cnt); @@ -2434,11 +2549,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, return -EAGAIN; } - /* Update tail pointer, record wr_id */ - wq = &(*cur_qp)->rq; - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; - ++wq->tail; - wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M, V2_CQE_BYTE_32_SL_S); wc->src_qp = (u8)roce_get_field(cqe->byte_32, @@ -2747,6 +2857,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || + hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 : ilog2((unsigned int)hr_qp->rq.wqe_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); @@ -3088,6 +3200,8 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || + hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 : ilog2((unsigned int)hr_qp->rq.wqe_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); @@ -3601,6 +3715,21 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return 0; } +static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + + if ((cur_state != IB_QPS_RESET && + (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) || + ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) && + (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) || + (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS)) + return true; + + return false; + +} + static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, @@ -3626,6 +3755,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, */ memset(qpc_mask, 0xff, sizeof(*qpc_mask)); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + memset(qpc_mask, 0, sizeof(*qpc_mask)); modify_qp_reset_to_init(ibqp, attr, attr_mask, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { @@ -3641,21 +3771,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, qpc_mask); if (ret) goto out; - } else if ((cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) { + } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) { /* Nothing */ ; } else { @@ -3789,6 +3905,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); + roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S, + ibqp->srq ? 1 : 0); + roce_set_bit(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_INV_CREDIT_S, 0); + /* Every status migrate must change state */ roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S, new_state); @@ -4012,7 +4133,7 @@ out: static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - int is_user) + bool is_user) { struct hns_roce_cq *send_cq, *recv_cq; struct device *dev = hr_dev->dev; @@ -4074,7 +4195,8 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_free_db(hr_dev, &hr_qp->rdb); } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hr_qp->rq.wqe_cnt) { kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); kfree(hr_qp->rq_inl_buf.wqe_list); } @@ -4088,7 +4210,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret; - ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject); + ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, ibqp->uobject); if (ret) { dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret); return ret; @@ -4384,6 +4506,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, int aeqe_found = 0; int event_type; int sub_type; + u32 srqn; u32 qpn; u32 cqn; @@ -4406,6 +4529,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, cqn = roce_get_field(aeqe->event.cq_event.cq, HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + srqn = roce_get_field(aeqe->event.srq_event.srq, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -4413,13 +4539,14 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_COMM_EST: case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: hns_roce_qp_event(hr_dev, qpn, event_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: - case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + hns_roce_srq_event(hr_dev, srqn, event_type); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: @@ -4964,13 +5091,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, eqe_alloc = i * (buf_chk_sz / eq->eqe_size); size = (eq->entries - eqe_alloc) * eq->eqe_size; } - eq->buf[i] = dma_alloc_coherent(dev, size, + eq->buf[i] = dma_zalloc_coherent(dev, size, &(eq->buf_dma[i]), GFP_KERNEL); if (!eq->buf[i]) goto err_dma_alloc_buf; - memset(eq->buf[i], 0, size); *(eq->bt_l0 + i) = eq->buf_dma[i]; eq_buf_cnt++; @@ -5000,13 +5126,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, size = (eq->entries - eqe_alloc) * eq->eqe_size; } - eq->buf[idx] = dma_alloc_coherent(dev, size, + eq->buf[idx] = dma_zalloc_coherent(dev, size, &(eq->buf_dma[idx]), GFP_KERNEL); if (!eq->buf[idx]) goto err_dma_alloc_buf; - memset(eq->buf[idx], 0, size); *(eq->bt_l1[i] + j) = eq->buf_dma[idx]; eq_buf_cnt++; @@ -5116,7 +5241,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, goto free_cmd_mbox; } - eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz, + eq->buf_list->buf = dma_zalloc_coherent(dev, buf_chk_sz, &(eq->buf_list->map), GFP_KERNEL); if (!eq->buf_list->buf) { @@ -5124,7 +5249,6 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, goto err_alloc_buf; } - memset(eq->buf_list->buf, 0, buf_chk_sz); } else { ret = hns_roce_mhop_alloc_eq(hr_dev, eq); if (ret) { @@ -5332,6 +5456,300 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) destroy_workqueue(hr_dev->irq_workq); } +static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, u32 pdn, u16 xrcd, + u32 cqn, void *mb_buf, u64 *mtts_wqe, + u64 *mtts_idx, dma_addr_t dma_handle_wqe, + dma_addr_t dma_handle_idx) +{ + struct hns_roce_srq_context *srq_context; + + srq_context = mb_buf; + memset(srq_context, 0, sizeof(*srq_context)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, + SRQC_BYTE_4_SRQ_ST_S, 1); + + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, + (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + hr_dev->caps.srqwqe_hop_num)); + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, + ilog2(srq->max)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, + SRQC_BYTE_4_SRQN_S, srq->srqn); + + roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, + SRQC_BYTE_12_SRQ_XRCD_S, xrcd); + + srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); + + roce_set_field(srq_context->byte_24_wqe_bt_ba, + SRQC_BYTE_24_SRQ_WQE_BT_BA_M, + SRQC_BYTE_24_SRQ_WQE_BT_BA_S, + cpu_to_le32(dma_handle_wqe >> 35)); + + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, + SRQC_BYTE_28_PD_S, pdn); + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, + SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : + fls(srq->max_gs - 1)); + + srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3); + srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba); + roce_set_field(srq_context->rsv_idx_bt_ba, + SRQC_BYTE_36_SRQ_IDX_BT_BA_M, + SRQC_BYTE_36_SRQ_IDX_BT_BA_S, + cpu_to_le32(dma_handle_idx >> 35)); + + srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT); + srq_context->idx_cur_blk_addr = + cpu_to_le32(srq_context->idx_cur_blk_addr); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, + cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT))); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, + hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + hr_dev->caps.idx_hop_num); + + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, + hr_dev->caps.idx_ba_pg_sz); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, + hr_dev->caps.idx_buf_pg_sz); + + srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT); + srq_context->idx_nxt_blk_addr = + cpu_to_le32(srq_context->idx_nxt_blk_addr); + roce_set_field(srq_context->rsv_idxnxtblkaddr, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, + cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT))); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, + cqn); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, + hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, + hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET); + + roce_set_bit(srq_context->db_record_addr_record_en, + SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); +} + +static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_srq_context *srqc_mask; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + if (srq_attr_mask & IB_SRQ_LIMIT) { + if (srq_attr->srq_limit >= srq->max) + return -EINVAL; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1; + + memset(srqc_mask, 0xff, sizeof(*srqc_mask)); + + roce_set_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit); + roce_set_field(srqc_mask->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0, + HNS_ROCE_CMD_MODIFY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) { + dev_err(hr_dev->dev, + "MODIFY SRQ Failed to cmd mailbox.\n"); + return ret; + } + } + + return 0; +} + +int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_cmd_mailbox *mailbox; + int limit_wl; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0, + HNS_ROCE_CMD_QUERY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + if (ret) { + dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n"); + goto out; + } + + limit_wl = roce_get_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S); + + attr->srq_limit = limit_wl; + attr->max_wr = srq->max - 1; + attr->max_sge = srq->max_gs; + + memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); + +out: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + +static int find_empty_entry(struct hns_roce_idx_que *idx_que) +{ + int bit_num; + int i; + + /* bitmap[i] is set zero if all bits are allocated */ + for (i = 0; idx_que->bitmap[i] == 0; ++i) + ; + bit_num = ffs(idx_que->bitmap[i]); + idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1)); + + return i * sizeof(u64) * 8 + (bit_num - 1); +} + +static void fill_idx_queue(struct hns_roce_idx_que *idx_que, + int cur_idx, int wqe_idx) +{ + unsigned int *addr; + + addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf, + cur_idx * idx_que->entry_sz); + *addr = wqe_idx; +} + +static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_v2_wqe_data_seg *dseg; + struct hns_roce_v2_db srq_db; + unsigned long flags; + int ret = 0; + int wqe_idx; + void *wqe; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&srq->lock, flags); + + ind = srq->head & (srq->max - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(wr->num_sge > srq->max_gs)) { + ret = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely(srq->head == srq->tail)) { + ret = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe_idx = find_empty_entry(&srq->idx_que); + fill_idx_queue(&srq->idx_que, ind, wqe_idx); + wqe = get_srq_wqe(srq, wqe_idx); + dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; + + for (i = 0; i < wr->num_sge; ++i) { + dseg[i].len = cpu_to_le32(wr->sg_list[i].length); + dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); + dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); + } + + if (i < srq->max_gs) { + dseg->len = 0; + dseg->lkey = cpu_to_le32(0x100); + dseg->addr = 0; + } + + srq->wrid[wqe_idx] = wr->wr_id; + ind = (ind + 1) & (srq->max - 1); + } + + if (likely(nreq)) { + srq->head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn; + srq_db.parameter = srq->head; + + hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l); + + } + + spin_unlock_irqrestore(&srq->lock, flags); + + return ret; +} + +static const struct ib_device_ops hns_roce_v2_dev_ops = { + .destroy_qp = hns_roce_v2_destroy_qp, + .modify_cq = hns_roce_v2_modify_cq, + .poll_cq = hns_roce_v2_poll_cq, + .post_recv = hns_roce_v2_post_recv, + .post_send = hns_roce_v2_post_send, + .query_qp = hns_roce_v2_query_qp, + .req_notify_cq = hns_roce_v2_req_notify_cq, +}; + +static const struct ib_device_ops hns_roce_v2_dev_srq_ops = { + .modify_srq = hns_roce_v2_modify_srq, + .post_srq_recv = hns_roce_v2_post_srq_recv, + .query_srq = hns_roce_v2_query_srq, +}; + static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, @@ -5359,6 +5777,12 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .poll_cq = hns_roce_v2_poll_cq, .init_eq = hns_roce_v2_init_eq_table, .cleanup_eq = hns_roce_v2_cleanup_eq_table, + .write_srqc = hns_roce_v2_write_srqc, + .modify_srq = hns_roce_v2_modify_srq, + .query_srq = hns_roce_v2_query_srq, + .post_srq_recv = hns_roce_v2_post_srq_recv, + .hns_roce_dev_ops = &hns_roce_v2_dev_ops, + .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 8bc820635bbd..b72d0443c835 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -46,10 +46,16 @@ #define HNS_ROCE_V2_MAX_QP_NUM 0x2000 #define HNS_ROCE_V2_MAX_WQE_NUM 0x8000 +#define HNS_ROCE_V2_MAX_SRQ 0x100000 +#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000 +#define HNS_ROCE_V2_MAX_SRQ_SGE 0x100 #define HNS_ROCE_V2_MAX_CQ_NUM 0x8000 +#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQE_NUM 0x10000 +#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff +#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 #define HNS_ROCE_V2_UAR_NUM 256 @@ -61,6 +67,8 @@ #define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000 #define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000 +#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000 +#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_PD_NUM 0x1000000 #define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128 #define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128 @@ -71,6 +79,7 @@ #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 #define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 #define HNS_ROCE_V2_CQC_ENTRY_SZ 64 +#define HNS_ROCE_V2_SRQC_ENTRY_SZ 64 #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 @@ -84,8 +93,10 @@ #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 #define HNS_ROCE_CQE_HOP_NUM 1 +#define HNS_ROCE_SRQWQE_HOP_NUM 1 #define HNS_ROCE_PBL_HOP_NUM 2 #define HNS_ROCE_EQE_HOP_NUM 2 +#define HNS_ROCE_IDX_HOP_NUM 1 #define HNS_ROCE_V2_GID_INDEX_NUM 256 @@ -113,6 +124,8 @@ ((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \ (step_idx == 1 && hop_num == 1) || \ (step_idx == 2 && hop_num == 2)) +#define HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT 0 +#define HNS_ICL_SWITCH_CMD_ROCEE_SEL BIT(HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT) #define CMD_CSQ_DESC_NUM 1024 #define CMD_CRQ_DESC_NUM 1024 @@ -213,7 +226,10 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, + HNS_ROCE_OPC_POST_MB = 0x8504, + HNS_ROCE_OPC_QUERY_MB_ST = 0x8505, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, + HNS_SWITCH_PARAMETER_CFG = 0x1033, }; enum { @@ -325,6 +341,90 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) +struct hns_roce_srq_context { + __le32 byte_4_srqn_srqst; + __le32 byte_8_limit_wl; + __le32 byte_12_xrcd; + __le32 byte_16_pi_ci; + __le32 wqe_bt_ba; + __le32 byte_24_wqe_bt_ba; + __le32 byte_28_rqws_pd; + __le32 idx_bt_ba; + __le32 rsv_idx_bt_ba; + __le32 idx_cur_blk_addr; + __le32 byte_44_idxbufpgsz_addr; + __le32 idx_nxt_blk_addr; + __le32 rsv_idxnxtblkaddr; + __le32 byte_56_xrc_cqn; + __le32 db_record_addr_record_en; + __le32 db_record_addr; +}; + +#define SRQC_BYTE_4_SRQ_ST_S 0 +#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0) + +#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S 2 +#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M GENMASK(3, 2) + +#define SRQC_BYTE_4_SRQ_SHIFT_S 4 +#define SRQC_BYTE_4_SRQ_SHIFT_M GENMASK(7, 4) + +#define SRQC_BYTE_4_SRQN_S 8 +#define SRQC_BYTE_4_SRQN_M GENMASK(31, 8) + +#define SRQC_BYTE_8_SRQ_LIMIT_WL_S 0 +#define SRQC_BYTE_8_SRQ_LIMIT_WL_M GENMASK(15, 0) + +#define SRQC_BYTE_12_SRQ_XRCD_S 0 +#define SRQC_BYTE_12_SRQ_XRCD_M GENMASK(23, 0) + +#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_S 0 +#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_M GENMASK(15, 0) + +#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_S 0 +#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_M GENMASK(31, 16) + +#define SRQC_BYTE_24_SRQ_WQE_BT_BA_S 0 +#define SRQC_BYTE_24_SRQ_WQE_BT_BA_M GENMASK(28, 0) + +#define SRQC_BYTE_28_PD_S 0 +#define SRQC_BYTE_28_PD_M GENMASK(23, 0) + +#define SRQC_BYTE_28_RQWS_S 24 +#define SRQC_BYTE_28_RQWS_M GENMASK(27, 24) + +#define SRQC_BYTE_36_SRQ_IDX_BT_BA_S 0 +#define SRQC_BYTE_36_SRQ_IDX_BT_BA_M GENMASK(28, 0) + +#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S 0 +#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M GENMASK(19, 0) + +#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S 22 +#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M GENMASK(23, 22) + +#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S 24 +#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M GENMASK(27, 24) + +#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S 28 +#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M GENMASK(31, 28) + +#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S 0 +#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M GENMASK(19, 0) + +#define SRQC_BYTE_56_SRQ_XRC_CQN_S 0 +#define SRQC_BYTE_56_SRQ_XRC_CQN_M GENMASK(23, 0) + +#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S 24 +#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M GENMASK(27, 24) + +#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S 28 +#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M GENMASK(31, 28) + +#define SRQC_BYTE_60_SRQ_RECORD_EN_S 0 + +#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1 +#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1) + enum{ V2_MPT_ST_VALID = 0x1, V2_MPT_ST_FREE = 0x2, @@ -1289,6 +1389,36 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_3_VF_SL_NUM_S 16 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) +struct hns_roce_vf_switch { + __le32 rocee_sel; + __le32 fun_id; + __le32 cfg; + __le32 resv1; + __le32 resv2; + __le32 resv3; +}; + +#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3 +#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3) + +#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1 +#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2 +#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3 + +struct hns_roce_post_mbox { + __le32 in_param_l; + __le32 in_param_h; + __le32 out_param_l; + __le32 out_param_h; + __le32 cmd_tag; + __le32 token_event_en; +}; + +struct hns_roce_mbox_status { + __le32 mb_status_hw_run; + __le32 rsv[5]; +}; + struct hns_roce_cfg_bt_attr { __le32 vf_qpc_cfg; __le32 vf_srqc_cfg; @@ -1372,18 +1502,6 @@ struct hns_roce_cmq_desc { #define HNS_ROCE_HW_RUN_BIT_SHIFT 31 #define HNS_ROCE_HW_MB_STATUS_MASK 0xFF -#define HNS_ROCE_VF_MB4_TAG_MASK 0xFFFFFF00 -#define HNS_ROCE_VF_MB4_TAG_SHIFT 8 - -#define HNS_ROCE_VF_MB4_CMD_MASK 0xFF -#define HNS_ROCE_VF_MB4_CMD_SHIFT 0 - -#define HNS_ROCE_VF_MB5_EVENT_MASK 0x10000 -#define HNS_ROCE_VF_MB5_EVENT_SHIFT 16 - -#define HNS_ROCE_VF_MB5_TOKEN_MASK 0xFFFF -#define HNS_ROCE_VF_MB5_TOKEN_SHIFT 0 - struct hns_roce_v2_cmq_ring { dma_addr_t desc_dma_addr; struct hns_roce_cmq_desc *desc; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 1b3ee514f2ef..c79054ba9495 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -220,6 +220,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev, IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->max_pkeys = 1; props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + props->max_srq = hr_dev->caps.max_srqs; + props->max_srq_wr = hr_dev->caps.max_srq_wrs; + props->max_srq_sge = hr_dev->caps.max_srq_sges; + } return 0; } @@ -440,6 +445,54 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) ib_unregister_device(&hr_dev->ib_dev); } +static const struct ib_device_ops hns_roce_dev_ops = { + .add_gid = hns_roce_add_gid, + .alloc_pd = hns_roce_alloc_pd, + .alloc_ucontext = hns_roce_alloc_ucontext, + .create_ah = hns_roce_create_ah, + .create_cq = hns_roce_ib_create_cq, + .create_qp = hns_roce_create_qp, + .dealloc_pd = hns_roce_dealloc_pd, + .dealloc_ucontext = hns_roce_dealloc_ucontext, + .del_gid = hns_roce_del_gid, + .dereg_mr = hns_roce_dereg_mr, + .destroy_ah = hns_roce_destroy_ah, + .destroy_cq = hns_roce_ib_destroy_cq, + .disassociate_ucontext = hns_roce_disassociate_ucontext, + .get_dma_mr = hns_roce_get_dma_mr, + .get_link_layer = hns_roce_get_link_layer, + .get_netdev = hns_roce_get_netdev, + .get_port_immutable = hns_roce_port_immutable, + .mmap = hns_roce_mmap, + .modify_device = hns_roce_modify_device, + .modify_port = hns_roce_modify_port, + .modify_qp = hns_roce_modify_qp, + .query_ah = hns_roce_query_ah, + .query_device = hns_roce_query_device, + .query_pkey = hns_roce_query_pkey, + .query_port = hns_roce_query_port, + .reg_user_mr = hns_roce_reg_user_mr, +}; + +static const struct ib_device_ops hns_roce_dev_mr_ops = { + .rereg_user_mr = hns_roce_rereg_user_mr, +}; + +static const struct ib_device_ops hns_roce_dev_mw_ops = { + .alloc_mw = hns_roce_alloc_mw, + .dealloc_mw = hns_roce_dealloc_mw, +}; + +static const struct ib_device_ops hns_roce_dev_frmr_ops = { + .alloc_mr = hns_roce_alloc_mr, + .map_mr_sg = hns_roce_map_mr_sg, +}; + +static const struct ib_device_ops hns_roce_dev_srq_ops = { + .create_srq = hns_roce_create_srq, + .destroy_srq = hns_roce_destroy_srq, +}; + static int hns_roce_register_device(struct hns_roce_dev *hr_dev) { int ret; @@ -479,73 +532,38 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - /* HCA||device||port */ - ib_dev->modify_device = hns_roce_modify_device; - ib_dev->query_device = hns_roce_query_device; - ib_dev->query_port = hns_roce_query_port; - ib_dev->modify_port = hns_roce_modify_port; - ib_dev->get_link_layer = hns_roce_get_link_layer; - ib_dev->get_netdev = hns_roce_get_netdev; - ib_dev->add_gid = hns_roce_add_gid; - ib_dev->del_gid = hns_roce_del_gid; - ib_dev->query_pkey = hns_roce_query_pkey; - ib_dev->alloc_ucontext = hns_roce_alloc_ucontext; - ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext; - ib_dev->mmap = hns_roce_mmap; - - /* PD */ - ib_dev->alloc_pd = hns_roce_alloc_pd; - ib_dev->dealloc_pd = hns_roce_dealloc_pd; - - /* AH */ - ib_dev->create_ah = hns_roce_create_ah; - ib_dev->query_ah = hns_roce_query_ah; - ib_dev->destroy_ah = hns_roce_destroy_ah; - - /* QP */ - ib_dev->create_qp = hns_roce_create_qp; - ib_dev->modify_qp = hns_roce_modify_qp; - ib_dev->query_qp = hr_dev->hw->query_qp; - ib_dev->destroy_qp = hr_dev->hw->destroy_qp; - ib_dev->post_send = hr_dev->hw->post_send; - ib_dev->post_recv = hr_dev->hw->post_recv; - - /* CQ */ - ib_dev->create_cq = hns_roce_ib_create_cq; - ib_dev->modify_cq = hr_dev->hw->modify_cq; - ib_dev->destroy_cq = hns_roce_ib_destroy_cq; - ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq; - ib_dev->poll_cq = hr_dev->hw->poll_cq; - - /* MR */ - ib_dev->get_dma_mr = hns_roce_get_dma_mr; - ib_dev->reg_user_mr = hns_roce_reg_user_mr; - ib_dev->dereg_mr = hns_roce_dereg_mr; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { - ib_dev->rereg_user_mr = hns_roce_rereg_user_mr; ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); + ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); } /* MW */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { - ib_dev->alloc_mw = hns_roce_alloc_mw; - ib_dev->dealloc_mw = hns_roce_dealloc_mw; ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) | (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops); } /* FRMR */ - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) { - ib_dev->alloc_mr = hns_roce_alloc_mr; - ib_dev->map_mr_sg = hns_roce_map_mr_sg; - } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) + ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops); - /* OTHERS */ - ib_dev->get_port_immutable = hns_roce_port_immutable; - ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext; + /* SRQ */ + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + ib_dev->uverbs_cmd_mask |= + (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV); + ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops); + ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); + } ib_dev->driver_id = RDMA_DRIVER_HNS; + ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); + ib_set_device_ops(ib_dev, &hns_roce_dev_ops); ret = ib_register_device(ib_dev, "hns_%d", NULL); if (ret) { dev_err(dev, "ib_register_device failed!\n"); @@ -646,8 +664,58 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) goto err_unmap_trrl; } + if (hr_dev->caps.srqc_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, + HEM_TYPE_SRQC, + hr_dev->caps.srqc_entry_sz, + hr_dev->caps.num_srqs, 1); + if (ret) { + dev_err(dev, + "Failed to init SRQ context memory, aborting.\n"); + goto err_unmap_cq; + } + } + + if (hr_dev->caps.num_srqwqe_segs) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table, + HEM_TYPE_SRQWQE, + hr_dev->caps.mtt_entry_sz, + hr_dev->caps.num_srqwqe_segs, 1); + if (ret) { + dev_err(dev, + "Failed to init MTT srqwqe memory, aborting.\n"); + goto err_unmap_srq; + } + } + + if (hr_dev->caps.num_idx_segs) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->mr_table.mtt_idx_table, + HEM_TYPE_IDX, + hr_dev->caps.idx_entry_sz, + hr_dev->caps.num_idx_segs, 1); + if (ret) { + dev_err(dev, + "Failed to init MTT idx memory, aborting.\n"); + goto err_unmap_srqwqe; + } + } + return 0; +err_unmap_srqwqe: + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table); + +err_unmap_srq: + if (hr_dev->caps.srqc_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); + +err_unmap_cq: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); + err_unmap_trrl: if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, @@ -727,8 +795,21 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) goto err_cq_table_free; } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + ret = hns_roce_init_srq_table(hr_dev); + if (ret) { + dev_err(dev, + "Failed to init share receive queue table.\n"); + goto err_qp_table_free; + } + } + return 0; +err_qp_table_free: + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + hns_roce_cleanup_qp_table(hr_dev); + err_cq_table_free: hns_roce_cleanup_cq_table(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 521ad2aa3a4e..ee5991bd4171 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -184,12 +184,27 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, struct hns_roce_buddy *buddy; int ret; - if (mtt_type == MTT_TYPE_WQE) { + switch (mtt_type) { + case MTT_TYPE_WQE: buddy = &mr_table->mtt_buddy; table = &mr_table->mtt_table; - } else { + break; + case MTT_TYPE_CQE: buddy = &mr_table->mtt_cqe_buddy; table = &mr_table->mtt_cqe_table; + break; + case MTT_TYPE_SRQWQE: + buddy = &mr_table->mtt_srqwqe_buddy; + table = &mr_table->mtt_srqwqe_table; + break; + case MTT_TYPE_IDX: + buddy = &mr_table->mtt_idx_buddy; + table = &mr_table->mtt_idx_table; + break; + default: + dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n", + mtt_type); + return -EINVAL; } ret = hns_roce_buddy_alloc(buddy, order, seg); @@ -242,18 +257,40 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) if (mtt->order < 0) return; - if (mtt->mtt_type == MTT_TYPE_WQE) { + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); - } else { + break; + case MTT_TYPE_CQE: hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); + break; + case MTT_TYPE_SRQWQE: + hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + break; + case MTT_TYPE_IDX: + hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + break; + default: + dev_err(hr_dev->dev, + "Unsupport mtt type %d, clean mtt failed\n", + mtt->mtt_type); + break; } } EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup); @@ -713,10 +750,26 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, u32 bt_page_size; u32 i; - if (mtt->mtt_type == MTT_TYPE_WQE) + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: + table = &hr_dev->mr_table.mtt_table; bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - else + break; + case MTT_TYPE_CQE: + table = &hr_dev->mr_table.mtt_cqe_table; bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_SRQWQE: + table = &hr_dev->mr_table.mtt_srqwqe_table; + bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_IDX: + table = &hr_dev->mr_table.mtt_idx_table; + bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); + break; + default: + return -EINVAL; + } /* All MTTs must fit in the same page */ if (start_index / (bt_page_size / sizeof(u64)) != @@ -726,11 +779,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) return -EINVAL; - if (mtt->mtt_type == MTT_TYPE_WQE) - table = &hr_dev->mr_table.mtt_table; - else - table = &hr_dev->mr_table.mtt_cqe_table; - mtts = hns_roce_table_find(hr_dev, table, mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, &dma_handle); @@ -759,10 +807,25 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, if (mtt->order < 0) return -EINVAL; - if (mtt->mtt_type == MTT_TYPE_WQE) + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - else + break; + case MTT_TYPE_CQE: bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_SRQWQE: + bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_IDX: + bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); + break; + default: + dev_err(hr_dev->dev, + "Unsupport mtt type %d, write mtt failed\n", + mtt->mtt_type); + return -EINVAL; + } while (npages > 0) { chunk = min_t(int, bt_page_size / sizeof(u64), npages); @@ -828,8 +891,31 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) if (ret) goto err_buddy_cqe; } + + if (hr_dev->caps.num_srqwqe_segs) { + ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy, + ilog2(hr_dev->caps.num_srqwqe_segs)); + if (ret) + goto err_buddy_srqwqe; + } + + if (hr_dev->caps.num_idx_segs) { + ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy, + ilog2(hr_dev->caps.num_idx_segs)); + if (ret) + goto err_buddy_idx; + } + return 0; +err_buddy_idx: + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); + +err_buddy_srqwqe: + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) + hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); + err_buddy_cqe: hns_roce_buddy_cleanup(&mr_table->mtt_buddy); @@ -842,6 +928,10 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + if (hr_dev->caps.num_idx_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy); + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); hns_roce_buddy_cleanup(&mr_table->mtt_buddy); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); @@ -897,8 +987,25 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, u32 bt_page_size; u32 n; - order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz : - hr_dev->caps.cqe_ba_pg_sz; + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: + order = hr_dev->caps.mtt_ba_pg_sz; + break; + case MTT_TYPE_CQE: + order = hr_dev->caps.cqe_ba_pg_sz; + break; + case MTT_TYPE_SRQWQE: + order = hr_dev->caps.srqwqe_ba_pg_sz; + break; + case MTT_TYPE_IDX: + order = hr_dev->caps.idx_ba_pg_sz; + break; + default: + dev_err(dev, "Unsupport mtt type %d, write mtt failed\n", + mtt->mtt_type); + return -EINVAL; + } + bt_page_size = 1 << (order + PAGE_SHIFT); pages = (u64 *) __get_free_pages(GFP_KERNEL, order); @@ -1021,14 +1128,14 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_umem; } } else { - int pbl_size = 1; + u64 pbl_size = 1; bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8; for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) pbl_size *= bt_size; if (n > pbl_size) { dev_err(dev, - " MR len %lld err. MR page num is limited to %d!\n", + " MR len %lld err. MR page num is limited to %lld!\n", length, pbl_size); ret = -EINVAL; goto err_umem; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 5ebf481a39d9..54031c5b53fa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -280,7 +280,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, EXPORT_SYMBOL_GPL(hns_roce_release_range_qp); static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, int is_user, int has_srq, + struct ib_qp_cap *cap, bool is_user, int has_rq, struct hns_roce_qp *hr_qp) { struct device *dev = hr_dev->dev; @@ -294,14 +294,12 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, return -EINVAL; } - /* If srq exit, set zero for relative number of rq */ - if (has_srq) { - if (cap->max_recv_wr) { - dev_dbg(dev, "srq no need config max_recv_wr\n"); - return -EINVAL; - } - - hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0; + /* If srq exist, set zero for relative number of rq */ + if (!has_rq) { + hr_qp->rq.wqe_cnt = 0; + hr_qp->rq.max_gs = 0; + cap->max_recv_wr = 0; + cap->max_recv_sge = 0; } else { if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) { dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n"); @@ -562,14 +560,15 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, else hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR); - ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject, - !!init_attr->srq, hr_qp); + ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata, + hns_roce_qp_has_rq(init_attr), hr_qp); if (ret) { dev_err(dev, "hns_roce_set_rq_size failed\n"); goto err_out; } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hns_roce_qp_has_rq(init_attr)) { /* allocate recv inline buf */ hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt, sizeof(struct hns_roce_rinl_wqe), @@ -599,7 +598,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, init_attr->cap.max_recv_sge]; } - if (ib_pd->uobject) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { dev_err(dev, "ib_copy_from_udata error for create qp\n"); ret = -EFAULT; @@ -784,7 +783,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, else hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn); - if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) && + if (udata && (udata->outlen >= sizeof(resp)) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) { /* indicate kernel supports rq record db */ @@ -811,7 +810,7 @@ err_qpn: hns_roce_release_range_qp(hr_dev, qpn, 1); err_wrid: - if (ib_pd->uobject) { + if (udata) { if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp)) && hns_roce_qp_has_rq(init_attr)) @@ -824,7 +823,7 @@ err_wrid: } err_sq_dbmap: - if (ib_pd->uobject) + if (udata) if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && (udata->inlen >= sizeof(ucmd)) && (udata->outlen >= sizeof(resp)) && @@ -837,13 +836,13 @@ err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); err_buf: - if (ib_pd->uobject) + if (hr_qp->umem) ib_umem_release(hr_qp->umem); else hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); err_db: - if (!ib_pd->uobject && hns_roce_qp_has_rq(init_attr) && + if (!udata && hns_roce_qp_has_rq(init_attr) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) hns_roce_free_db(hr_dev, &hr_qp->rdb); @@ -889,7 +888,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, } case IB_QPT_GSI: { /* Userspace is not allowed to create special QPs: */ - if (pd->uobject) { + if (udata) { dev_err(dev, "not support usr space GSI\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c new file mode 100644 index 000000000000..960b1946c365 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018 Hisilicon Limited. + */ + +#include <rdma/ib_umem.h> +#include <rdma/hns-abi.h> +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hem.h" + +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct hns_roce_srq *srq; + + xa_lock(&srq_table->xa); + srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1)); + if (srq) + atomic_inc(&srq->refcount); + xa_unlock(&srq_table->xa); + + if (!srq) { + dev_warn(hr_dev->dev, "Async event for bogus SRQ %08x\n", srqn); + return; + } + + srq->event(srq, event_type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); +} +EXPORT_SYMBOL_GPL(hns_roce_srq_event); + +static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, + enum hns_roce_event event_type) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); + struct ib_srq *ibsrq = &srq->ibsrq; + struct ib_event event; + + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + switch (event_type) { + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + break; + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + event.event = IB_EVENT_SRQ_ERR; + break; + default: + dev_err(hr_dev->dev, + "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n", + event_type, srq->srqn); + return; + } + + ibsrq->event_handler(&event, ibsrq->srq_context); + } +} + +static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) +{ + return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0, + HNS_ROCE_CMD_SW2HW_SRQ, + HNS_ROCE_CMD_TIMEOUT_MSECS); +} + +static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) +{ + return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num, + mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ, + HNS_ROCE_CMD_TIMEOUT_MSECS); +} + +int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, u16 xrcd, + struct hns_roce_mtt *hr_mtt, u64 db_rec_addr, + struct hns_roce_srq *srq) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct hns_roce_cmd_mailbox *mailbox; + dma_addr_t dma_handle_wqe; + dma_addr_t dma_handle_idx; + u64 *mtts_wqe; + u64 *mtts_idx; + int ret; + + /* Get the physical address of srq buf */ + mtts_wqe = hns_roce_table_find(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table, + srq->mtt.first_seg, + &dma_handle_wqe); + if (!mtts_wqe) { + dev_err(hr_dev->dev, + "SRQ alloc.Failed to find srq buf addr.\n"); + return -EINVAL; + } + + /* Get physical address of idx que buf */ + mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table, + srq->idx_que.mtt.first_seg, + &dma_handle_idx); + if (!mtts_idx) { + dev_err(hr_dev->dev, + "SRQ alloc.Failed to find idx que buf addr.\n"); + return -EINVAL; + } + + ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); + if (ret == -1) { + dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n"); + return -ENOMEM; + } + + ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); + if (ret) + goto err_out; + + ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); + if (ret) + goto err_put; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) { + ret = PTR_ERR(mailbox); + goto err_xa; + } + + hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf, + mtts_wqe, mtts_idx, dma_handle_wqe, + dma_handle_idx); + + ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) + goto err_xa; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + return ret; + +err_xa: + xa_erase(&srq_table->xa, srq->srqn); + +err_put: + hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); + +err_out: + hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); + return ret; +} + +void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + int ret; + + ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn); + if (ret) + dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n", + ret, srq->srqn); + + xa_erase(&srq_table->xa, srq->srqn); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); + hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); +} + +static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, + u32 page_shift) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_idx_que *idx_que = &srq->idx_que; + u32 bitmap_num; + int i; + + bitmap_num = HNS_ROCE_ALOGN_UP(srq->max, 8 * sizeof(u64)); + + idx_que->bitmap = kcalloc(1, bitmap_num / 8, GFP_KERNEL); + if (!idx_que->bitmap) + return -ENOMEM; + + bitmap_num = bitmap_num / (8 * sizeof(u64)); + + idx_que->buf_size = srq->idx_que.buf_size; + + if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2, + &idx_que->idx_buf, page_shift)) { + kfree(idx_que->bitmap); + return -ENOMEM; + } + + for (i = 0; i < bitmap_num; i++) + idx_que->bitmap[i] = ~(0UL); + + return 0; +} + +struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_srq *srq; + int srq_desc_size; + int srq_buf_size; + u32 page_shift; + int ret = 0; + u32 npages; + u32 cqn; + + /* Check the actual SRQ wqe and SRQ sge num */ + if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || + srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) + return ERR_PTR(-EINVAL); + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + + srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); + srq->max_gs = srq_init_attr->attr.max_sge; + + srq_desc_size = max(16, 16 * srq->max_gs); + + srq->wqe_shift = ilog2(srq_desc_size); + + srq_buf_size = srq->max * srq_desc_size; + + srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; + srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz; + srq->mtt.mtt_type = MTT_TYPE_SRQWQE; + srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; + + if (udata) { + struct hns_roce_ib_create_srq ucmd; + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ret = -EFAULT; + goto err_srq; + } + + srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, + srq_buf_size, 0, 0); + if (IS_ERR(srq->umem)) { + ret = PTR_ERR(srq->umem); + goto err_srq; + } + + if (hr_dev->caps.srqwqe_buf_pg_sz) { + npages = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + (1 << hr_dev->caps.srqwqe_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, npages, + page_shift, + &srq->mtt); + } else + ret = hns_roce_mtt_init(hr_dev, + ib_umem_page_count(srq->umem), + srq->umem->page_shift, + &srq->mtt); + if (ret) + goto err_buf; + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem); + if (ret) + goto err_srq_mtt; + + /* config index queue BA */ + srq->idx_que.umem = ib_umem_get(pd->uobject->context, + ucmd.que_addr, + srq->idx_que.buf_size, 0, 0); + if (IS_ERR(srq->idx_que.umem)) { + dev_err(hr_dev->dev, + "ib_umem_get error for index queue\n"); + ret = PTR_ERR(srq->idx_que.umem); + goto err_srq_mtt; + } + + if (hr_dev->caps.idx_buf_pg_sz) { + npages = (ib_umem_page_count(srq->idx_que.umem) + + (1 << hr_dev->caps.idx_buf_pg_sz) - 1) / + (1 << hr_dev->caps.idx_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, npages, + page_shift, &srq->idx_que.mtt); + } else { + ret = hns_roce_mtt_init(hr_dev, + ib_umem_page_count(srq->idx_que.umem), + srq->idx_que.umem->page_shift, + &srq->idx_que.mtt); + } + + if (ret) { + dev_err(hr_dev->dev, + "hns_roce_mtt_init error for idx que\n"); + goto err_idx_mtt; + } + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt, + srq->idx_que.umem); + if (ret) { + dev_err(hr_dev->dev, + "hns_roce_ib_umem_write_mtt error for idx que\n"); + goto err_idx_buf; + } + } else { + page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + if (hns_roce_buf_alloc(hr_dev, srq_buf_size, + (1 << page_shift) * 2, + &srq->buf, page_shift)) { + ret = -ENOMEM; + goto err_srq; + } + + srq->head = 0; + srq->tail = srq->max - 1; + + ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, + srq->buf.page_shift, &srq->mtt); + if (ret) + goto err_buf; + + ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf); + if (ret) + goto err_srq_mtt; + + page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_create_idx_que(pd, srq, page_shift); + if (ret) { + dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", + ret); + goto err_srq_mtt; + } + + /* Init mtt table for idx_que */ + ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages, + srq->idx_que.idx_buf.page_shift, + &srq->idx_que.mtt); + if (ret) + goto err_create_idx; + + /* Write buffer address into the mtt table */ + ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt, + &srq->idx_que.idx_buf); + if (ret) + goto err_idx_buf; + + srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); + if (!srq->wrid) { + ret = -ENOMEM; + goto err_idx_buf; + } + } + + cqn = ib_srq_has_cq(srq_init_attr->srq_type) ? + to_hr_cq(srq_init_attr->ext.cq)->cqn : 0; + + srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; + + ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(pd)->pdn, cqn, 0, + &srq->mtt, 0, srq); + if (ret) + goto err_wrid; + + srq->event = hns_roce_ib_srq_event; + srq->ibsrq.ext.xrc.srq_num = srq->srqn; + + if (udata) { + if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { + ret = -EFAULT; + goto err_wrid; + } + } + + return &srq->ibsrq; + +err_wrid: + kvfree(srq->wrid); + +err_idx_buf: + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + +err_idx_mtt: + if (udata) + ib_umem_release(srq->idx_que.umem); + +err_create_idx: + hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, + &srq->idx_que.idx_buf); + kfree(srq->idx_que.bitmap); + +err_srq_mtt: + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + +err_buf: + if (udata) + ib_umem_release(srq->umem); + else + hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + +err_srq: + kfree(srq); + return ERR_PTR(ret); +} + +int hns_roce_destroy_srq(struct ib_srq *ibsrq) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + + hns_roce_srq_free(hr_dev, srq); + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + + if (ibsrq->uobject) { + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + ib_umem_release(srq->idx_que.umem); + ib_umem_release(srq->umem); + } else { + kvfree(srq->wrid); + hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, + &srq->buf); + } + + kfree(srq); + + return 0; +} + +int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + + xa_init(&srq_table->xa); + + return hns_roce_bitmap_init(&srq_table->bitmap, hr_dev->caps.num_srqs, + hr_dev->caps.num_srqs - 1, + hr_dev->caps.reserved_srqs, 0); +} + +void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->srq_table.bitmap); +} diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 771eb6bd0785..206cfb0016f8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -404,7 +404,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, if (pdata) pd_len = pdata->size; - if (cm_node->vlan_id < VLAN_TAG_PRESENT) + if (cm_node->vlan_id <= VLAN_VID_MASK) eth_hlen += 4; if (cm_node->ipv4) @@ -433,7 +433,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, ether_addr_copy(ethh->h_dest, cm_node->rem_mac); ether_addr_copy(ethh->h_source, cm_node->loc_mac); - if (cm_node->vlan_id < VLAN_TAG_PRESENT) { + if (cm_node->vlan_id <= VLAN_VID_MASK) { ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q); vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag); @@ -463,7 +463,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, ether_addr_copy(ethh->h_dest, cm_node->rem_mac); ether_addr_copy(ethh->h_source, cm_node->loc_mac); - if (cm_node->vlan_id < VLAN_TAG_PRESENT) { + if (cm_node->vlan_id <= VLAN_VID_MASK) { ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q); vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag); @@ -3323,7 +3323,7 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node, tcp_info->flow_label = 0; tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss)); - if (cm_node->vlan_id < VLAN_TAG_PRESENT) { + if (cm_node->vlan_id <= VLAN_VID_MASK) { tcp_info->insert_vlan_tag = true; tcp_info->vlan_tag = cpu_to_le16(((u16)cm_node->user_pri << I40IW_VLAN_PRIO_SHIFT) | cm_node->vlan_id); @@ -3478,7 +3478,7 @@ static void i40iw_qp_disconnect(struct i40iw_qp *iwqp) /* Need to free the Last Streaming Mode Message */ if (iwqp->ietf_mem.va) { if (iwqp->lsmm_mr) - iwibdev->ibdev.dereg_mr(iwqp->lsmm_mr); + iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr); i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem); } } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 102875872bea..0b675b0742c2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -673,28 +673,26 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; - if (ibpd->uobject && ibpd->uobject->context) { - iwqp->user_mode = 1; - ucontext = to_ucontext(ibpd->uobject->context); - - if (req.user_wqe_buffers) { - struct i40iw_pbl *iwpbl; - - spin_lock_irqsave( - &ucontext->qp_reg_mem_list_lock, flags); - iwpbl = i40iw_get_pbl( - (unsigned long)req.user_wqe_buffers, - &ucontext->qp_reg_mem_list); - spin_unlock_irqrestore( - &ucontext->qp_reg_mem_list_lock, flags); - - if (!iwpbl) { - err_code = -ENODATA; - i40iw_pr_err("no pbl info\n"); - goto error; - } - memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl)); + iwqp->user_mode = 1; + ucontext = to_ucontext(ibpd->uobject->context); + + if (req.user_wqe_buffers) { + struct i40iw_pbl *iwpbl; + + spin_lock_irqsave( + &ucontext->qp_reg_mem_list_lock, flags); + iwpbl = i40iw_get_pbl( + (unsigned long)req.user_wqe_buffers, + &ucontext->qp_reg_mem_list); + spin_unlock_irqrestore( + &ucontext->qp_reg_mem_list_lock, flags); + + if (!iwpbl) { + err_code = -ENODATA; + i40iw_pr_err("no pbl info\n"); + goto error; } + memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl)); } err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info); } else { @@ -768,7 +766,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, iwdev->qp_table[qp_num] = iwqp; i40iw_add_pdusecount(iwqp->iwpd); i40iw_add_devusecount(iwdev); - if (ibpd->uobject && udata) { + if (udata) { memset(&uresp, 0, sizeof(uresp)); uresp.actual_sq_size = sq_size; uresp.actual_rq_size = rq_size; @@ -2092,7 +2090,8 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) ib_umem_release(iwmr->region); if (iwmr->type != IW_MEMREG_TYPE_MEM) { - if (ibpd->uobject) { + /* region is released. only test for userness. */ + if (iwmr->region) { struct i40iw_ucontext *ucontext; ucontext = to_ucontext(ibpd->uobject->context); @@ -2721,24 +2720,38 @@ static int i40iw_query_pkey(struct ib_device *ibdev, return 0; } -/** - * i40iw_get_vector_affinity - report IRQ affinity mask - * @ibdev: IB device - * @comp_vector: completion vector index - */ -static const struct cpumask *i40iw_get_vector_affinity(struct ib_device *ibdev, - int comp_vector) -{ - struct i40iw_device *iwdev = to_iwdev(ibdev); - struct i40iw_msix_vector *msix_vec; - - if (iwdev->msix_shared) - msix_vec = &iwdev->iw_msixtbl[comp_vector]; - else - msix_vec = &iwdev->iw_msixtbl[comp_vector + 1]; - - return irq_get_affinity_mask(msix_vec->irq); -} +static const struct ib_device_ops i40iw_dev_ops = { + .alloc_hw_stats = i40iw_alloc_hw_stats, + .alloc_mr = i40iw_alloc_mr, + .alloc_pd = i40iw_alloc_pd, + .alloc_ucontext = i40iw_alloc_ucontext, + .create_cq = i40iw_create_cq, + .create_qp = i40iw_create_qp, + .dealloc_pd = i40iw_dealloc_pd, + .dealloc_ucontext = i40iw_dealloc_ucontext, + .dereg_mr = i40iw_dereg_mr, + .destroy_cq = i40iw_destroy_cq, + .destroy_qp = i40iw_destroy_qp, + .drain_rq = i40iw_drain_rq, + .drain_sq = i40iw_drain_sq, + .get_dev_fw_str = i40iw_get_dev_fw_str, + .get_dma_mr = i40iw_get_dma_mr, + .get_hw_stats = i40iw_get_hw_stats, + .get_port_immutable = i40iw_port_immutable, + .map_mr_sg = i40iw_map_mr_sg, + .mmap = i40iw_mmap, + .modify_qp = i40iw_modify_qp, + .poll_cq = i40iw_poll_cq, + .post_recv = i40iw_post_recv, + .post_send = i40iw_post_send, + .query_device = i40iw_query_device, + .query_gid = i40iw_query_gid, + .query_pkey = i40iw_query_pkey, + .query_port = i40iw_query_port, + .query_qp = i40iw_query_qp, + .reg_user_mr = i40iw_reg_user_mr, + .req_notify_cq = i40iw_req_notify_cq, +}; /** * i40iw_init_rdma_device - initialization of iwarp device @@ -2786,30 +2799,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.phys_port_cnt = 1; iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; - iwibdev->ibdev.query_port = i40iw_query_port; - iwibdev->ibdev.query_pkey = i40iw_query_pkey; - iwibdev->ibdev.query_gid = i40iw_query_gid; - iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext; - iwibdev->ibdev.dealloc_ucontext = i40iw_dealloc_ucontext; - iwibdev->ibdev.mmap = i40iw_mmap; - iwibdev->ibdev.alloc_pd = i40iw_alloc_pd; - iwibdev->ibdev.dealloc_pd = i40iw_dealloc_pd; - iwibdev->ibdev.create_qp = i40iw_create_qp; - iwibdev->ibdev.modify_qp = i40iw_modify_qp; - iwibdev->ibdev.query_qp = i40iw_query_qp; - iwibdev->ibdev.destroy_qp = i40iw_destroy_qp; - iwibdev->ibdev.create_cq = i40iw_create_cq; - iwibdev->ibdev.destroy_cq = i40iw_destroy_cq; - iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr; - iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr; - iwibdev->ibdev.dereg_mr = i40iw_dereg_mr; - iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats; - iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats; - iwibdev->ibdev.query_device = i40iw_query_device; - iwibdev->ibdev.drain_sq = i40iw_drain_sq; - iwibdev->ibdev.drain_rq = i40iw_drain_rq; - iwibdev->ibdev.alloc_mr = i40iw_alloc_mr; - iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg; iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); if (!iwibdev->ibdev.iwcm) { ib_dealloc_device(&iwibdev->ibdev); @@ -2826,13 +2815,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen; memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name, sizeof(iwibdev->ibdev.iwcm->ifname)); - iwibdev->ibdev.get_port_immutable = i40iw_port_immutable; - iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str; - iwibdev->ibdev.poll_cq = i40iw_poll_cq; - iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq; - iwibdev->ibdev.post_send = i40iw_post_send; - iwibdev->ibdev.post_recv = i40iw_post_recv; - iwibdev->ibdev.get_vector_affinity = i40iw_get_vector_affinity; + ib_set_device_ops(&iwibdev->ibdev, &i40iw_dev_ops); return iwibdev; } diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index e9e3a6f390db..1672808262ba 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -144,7 +144,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, } struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct mlx4_ib_ah *ah; @@ -189,7 +189,7 @@ struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, slave_attr.grh.sgid_attr = NULL; slave_attr.grh.sgid_index = slave_sgid_index; - ah = mlx4_ib_create_ah(pd, &slave_attr, NULL); + ah = mlx4_ib_create_ah(pd, &slave_attr, 0, NULL); if (IS_ERR(ah)) return ah; @@ -250,7 +250,7 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx4_ib_destroy_ah(struct ib_ah *ah) +int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_mah(ah)); return 0; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 155b4dfc0ae8..782499abcd98 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -849,7 +849,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) spin_lock_init(&dev->sriov.alias_guid.ag_work_lock); for (i = 1; i <= dev->num_ports; ++i) { - if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) { + if (dev->ib_dev.ops.query_gid(&dev->ib_dev, i, 0, &gid)) { ret = -EFAULT; goto err_unregister; } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 82adc0d1d30e..43512347b4f0 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -181,6 +181,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_cq *cq; struct mlx4_uar *uar; + void *buf_addr; int err; if (entries < 1 || entries > dev->dev->caps.max_cqes) @@ -211,6 +212,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, goto err_cq; } + buf_addr = (void *)(unsigned long)ucmd.buf_addr; + err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem, ucmd.buf_addr, entries); if (err) @@ -237,6 +240,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, if (err) goto err_db; + buf_addr = &cq->buf.buf; + uar = &dev->priv_uar; cq->mcq.usage = MLX4_RES_USAGE_DRIVER; } @@ -246,7 +251,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, cq->db.dma, &cq->mcq, vector, 0, - !!(cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION)); + !!(cq->create_flags & + IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION), + buf_addr, !!context); if (err) goto err_dbmap; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 8942f5f7f04d..25439da8976c 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -202,13 +202,13 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) rdma_ah_set_port_num(&ah_attr, port_num); new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, - &ah_attr); + &ah_attr, 0); if (IS_ERR(new_ah)) return; spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) - rdma_destroy_ah(dev->sm_ah[port_num - 1]); + rdma_destroy_ah(dev->sm_ah[port_num - 1], 0); dev->sm_ah[port_num - 1] = new_ah; spin_unlock_irqrestore(&dev->sm_lock, flags); } @@ -567,7 +567,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, return -EINVAL; rdma_ah_set_grh(&attr, &dgid, 0, 0, 0, 0); } - ah = rdma_create_ah(tun_ctx->pd, &attr); + ah = rdma_create_ah(tun_ctx->pd, &attr, 0); if (IS_ERR(ah)) return -ENOMEM; @@ -584,7 +584,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr); if (tun_qp->tx_ring[tun_tx_ix].ah) - rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah); + rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah, 0); tun_qp->tx_ring[tun_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, @@ -657,7 +657,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&tun_qp->tx_lock); tun_qp->tx_ring[tun_tx_ix].ah = NULL; end: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, 0); return ret; } @@ -1024,7 +1024,7 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { if (mad_send_wc->send_buf->context[0]) - rdma_destroy_ah(mad_send_wc->send_buf->context[0]); + rdma_destroy_ah(mad_send_wc->send_buf->context[0], 0); ib_free_send_mad(mad_send_wc->send_buf); } @@ -1079,7 +1079,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) } if (dev->sm_ah[p]) - rdma_destroy_ah(dev->sm_ah[p]); + rdma_destroy_ah(dev->sm_ah[p], 0); } } @@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); if (sqp->tx_ring[wire_tx_ix].ah) - rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah); + rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1450,7 +1450,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - mlx4_ib_destroy_ah(ah); + mlx4_ib_destroy_ah(ah, 0); return ret; } @@ -1716,7 +1716,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); if (tun_qp->tx_ring[i].ah) - rdma_destroy_ah(tun_qp->tx_ring[i].ah); + rdma_destroy_ah(tun_qp->tx_ring[i].ah, 0); } kfree(tun_qp->tx_ring); kfree(tun_qp->ring); @@ -1749,7 +1749,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) "wrid=0x%llx, status=0x%x\n", wc.wr_id, wc.status); rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); @@ -1766,7 +1766,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); @@ -1903,7 +1903,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) switch (wc.opcode) { case IB_WC_SEND: rdma_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); @@ -1932,7 +1932,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { rdma_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0def2323459c..1f15ec3e2b83 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2220,6 +2220,11 @@ static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev, } } +static const struct ib_device_ops mlx4_ib_hw_stats_ops = { + .alloc_hw_stats = mlx4_ib_alloc_hw_stats, + .get_hw_stats = mlx4_ib_get_hw_stats, +}; + static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) { struct mlx4_ib_diag_counters *diag = ibdev->diag_counters; @@ -2246,8 +2251,7 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) diag[i].offset, i); } - ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats; - ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats; + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_hw_stats_ops); return 0; @@ -2352,6 +2356,32 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, event == NETDEV_UP || event == NETDEV_CHANGE)) update_qps_port = port; + if (dev == iboe->netdevs[port - 1] && + (event == NETDEV_UP || event == NETDEV_DOWN)) { + enum ib_port_state port_state; + struct ib_event ibev = { }; + + if (ib_get_cached_port_state(&ibdev->ib_dev, port, + &port_state)) + continue; + + if (event == NETDEV_UP && + (port_state != IB_PORT_ACTIVE || + iboe->last_port_state[port - 1] != IB_PORT_DOWN)) + continue; + if (event == NETDEV_DOWN && + (port_state != IB_PORT_DOWN || + iboe->last_port_state[port - 1] != IB_PORT_ACTIVE)) + continue; + iboe->last_port_state[port - 1] = port_state; + + ibev.device = &ibdev->ib_dev; + ibev.element.port_num = port; + ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE : + IB_EVENT_PORT_ERR; + ib_dispatch_event(&ibev); + } + } spin_unlock_bh(&iboe->lock); @@ -2499,6 +2529,88 @@ static void get_fw_ver_str(struct ib_device *device, char *str) (int) dev->dev->caps.fw_ver & 0xffff); } +static const struct ib_device_ops mlx4_ib_dev_ops = { + .add_gid = mlx4_ib_add_gid, + .alloc_mr = mlx4_ib_alloc_mr, + .alloc_pd = mlx4_ib_alloc_pd, + .alloc_ucontext = mlx4_ib_alloc_ucontext, + .attach_mcast = mlx4_ib_mcg_attach, + .create_ah = mlx4_ib_create_ah, + .create_cq = mlx4_ib_create_cq, + .create_qp = mlx4_ib_create_qp, + .create_srq = mlx4_ib_create_srq, + .dealloc_pd = mlx4_ib_dealloc_pd, + .dealloc_ucontext = mlx4_ib_dealloc_ucontext, + .del_gid = mlx4_ib_del_gid, + .dereg_mr = mlx4_ib_dereg_mr, + .destroy_ah = mlx4_ib_destroy_ah, + .destroy_cq = mlx4_ib_destroy_cq, + .destroy_qp = mlx4_ib_destroy_qp, + .destroy_srq = mlx4_ib_destroy_srq, + .detach_mcast = mlx4_ib_mcg_detach, + .disassociate_ucontext = mlx4_ib_disassociate_ucontext, + .drain_rq = mlx4_ib_drain_rq, + .drain_sq = mlx4_ib_drain_sq, + .get_dev_fw_str = get_fw_ver_str, + .get_dma_mr = mlx4_ib_get_dma_mr, + .get_link_layer = mlx4_ib_port_link_layer, + .get_netdev = mlx4_ib_get_netdev, + .get_port_immutable = mlx4_port_immutable, + .map_mr_sg = mlx4_ib_map_mr_sg, + .mmap = mlx4_ib_mmap, + .modify_cq = mlx4_ib_modify_cq, + .modify_device = mlx4_ib_modify_device, + .modify_port = mlx4_ib_modify_port, + .modify_qp = mlx4_ib_modify_qp, + .modify_srq = mlx4_ib_modify_srq, + .poll_cq = mlx4_ib_poll_cq, + .post_recv = mlx4_ib_post_recv, + .post_send = mlx4_ib_post_send, + .post_srq_recv = mlx4_ib_post_srq_recv, + .process_mad = mlx4_ib_process_mad, + .query_ah = mlx4_ib_query_ah, + .query_device = mlx4_ib_query_device, + .query_gid = mlx4_ib_query_gid, + .query_pkey = mlx4_ib_query_pkey, + .query_port = mlx4_ib_query_port, + .query_qp = mlx4_ib_query_qp, + .query_srq = mlx4_ib_query_srq, + .reg_user_mr = mlx4_ib_reg_user_mr, + .req_notify_cq = mlx4_ib_arm_cq, + .rereg_user_mr = mlx4_ib_rereg_user_mr, + .resize_cq = mlx4_ib_resize_cq, +}; + +static const struct ib_device_ops mlx4_ib_dev_wq_ops = { + .create_rwq_ind_table = mlx4_ib_create_rwq_ind_table, + .create_wq = mlx4_ib_create_wq, + .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table, + .destroy_wq = mlx4_ib_destroy_wq, + .modify_wq = mlx4_ib_modify_wq, +}; + +static const struct ib_device_ops mlx4_ib_dev_fmr_ops = { + .alloc_fmr = mlx4_ib_fmr_alloc, + .dealloc_fmr = mlx4_ib_fmr_dealloc, + .map_phys_fmr = mlx4_ib_map_phys_fmr, + .unmap_fmr = mlx4_ib_unmap_fmr, +}; + +static const struct ib_device_ops mlx4_ib_dev_mw_ops = { + .alloc_mw = mlx4_ib_alloc_mw, + .dealloc_mw = mlx4_ib_dealloc_mw, +}; + +static const struct ib_device_ops mlx4_ib_dev_xrc_ops = { + .alloc_xrcd = mlx4_ib_alloc_xrcd, + .dealloc_xrcd = mlx4_ib_dealloc_xrcd, +}; + +static const struct ib_device_ops mlx4_ib_dev_fs_ops = { + .create_flow = mlx4_ib_create_flow, + .destroy_flow = mlx4_ib_destroy_flow, +}; + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; @@ -2554,9 +2666,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) 1 : ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; - ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev; - ibdev->ib_dev.add_gid = mlx4_ib_add_gid; - ibdev->ib_dev.del_gid = mlx4_ib_del_gid; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; @@ -2589,116 +2698,53 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ibdev->ib_dev.query_device = mlx4_ib_query_device; - ibdev->ib_dev.query_port = mlx4_ib_query_port; - ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; - ibdev->ib_dev.query_gid = mlx4_ib_query_gid; - ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; - ibdev->ib_dev.modify_device = mlx4_ib_modify_device; - ibdev->ib_dev.modify_port = mlx4_ib_modify_port; - ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; - ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; - ibdev->ib_dev.mmap = mlx4_ib_mmap; - ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; - ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd; - ibdev->ib_dev.create_ah = mlx4_ib_create_ah; - ibdev->ib_dev.query_ah = mlx4_ib_query_ah; - ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; - ibdev->ib_dev.create_srq = mlx4_ib_create_srq; - ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; - ibdev->ib_dev.query_srq = mlx4_ib_query_srq; - ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; - ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; - ibdev->ib_dev.create_qp = mlx4_ib_create_qp; - ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; - ibdev->ib_dev.query_qp = mlx4_ib_query_qp; - ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; - ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq; - ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq; - ibdev->ib_dev.post_send = mlx4_ib_post_send; - ibdev->ib_dev.post_recv = mlx4_ib_post_recv; - ibdev->ib_dev.create_cq = mlx4_ib_create_cq; - ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; - ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; - ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; - ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; - ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; - ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; - ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; - ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; - ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; - ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr; - ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg; - ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; - ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; - ibdev->ib_dev.process_mad = mlx4_ib_process_mad; - ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; - ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; - ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; - + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == IB_LINK_LAYER_ETHERNET))) { - ibdev->ib_dev.create_wq = mlx4_ib_create_wq; - ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq; - ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq; - ibdev->ib_dev.create_rwq_ind_table = - mlx4_ib_create_rwq_ind_table; - ibdev->ib_dev.destroy_rwq_ind_table = - mlx4_ib_destroy_rwq_ind_table; ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); } - if (!mlx4_is_slave(ibdev->dev)) { - ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; - ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; - ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; - ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; - } + if (!mlx4_is_slave(ibdev->dev)) + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fmr_ops); if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { - ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; - ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; - ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops); } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { - ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; - ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops); } if (check_flow_steering_support(dev)) { ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; - ibdev->ib_dev.create_flow = mlx4_ib_create_flow; - ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; - ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); } - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); - mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); @@ -2710,6 +2756,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) for (i = 0; i < ibdev->num_ports; ++i) { mutex_init(&ibdev->counters_table[i].mutex); INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list); + iboe->last_port_state[i] = IB_PORT_DOWN; } num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 8850dfc3826d..e491f3eda6e7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -519,6 +519,7 @@ struct mlx4_ib_iboe { atomic64_t mac[MLX4_MAX_PORTS]; struct notifier_block nb; struct mlx4_port_gid_table gids[MLX4_MAX_PORTS]; + enum ib_port_state last_port_state[MLX4_MAX_PORTS]; }; struct pkey_mgt { @@ -753,13 +754,13 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); + u32 flags, struct ib_udata *udata); struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx4_ib_destroy_ah(struct ib_ah *ah); +int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 0711ca1dfb8f..971e9a9ebdaf 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -323,7 +323,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - int is_user, int has_rq, struct mlx4_ib_qp *qp, + bool is_user, int has_rq, struct mlx4_ib_qp *qp, u32 inl_recv_sz) { /* Sanity check RQ size before proceeding */ @@ -401,7 +401,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, * We need to leave 2 KB + 1 WR of headroom in the SQ to * allow HW to prefetch. */ - qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; + qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift); qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes); @@ -942,7 +942,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - if (pd->uobject) { + if (udata) { union { struct mlx4_ib_create_qp qp; struct mlx4_ib_create_wq wq; @@ -991,7 +991,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX4_IB_QP_SCATTER_FCS; } - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + err = set_rq_size(dev, &init_attr->cap, udata, qp_has_rq(init_attr), qp, qp->inl_recv_sz); if (err) goto err; @@ -1043,7 +1043,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS; } else { - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + err = set_rq_size(dev, &init_attr->cap, udata, qp_has_rq(init_attr), qp, 0); if (err) goto err; @@ -1189,7 +1189,7 @@ err_proxy: if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) free_proxy_bufs(pd->device, qp); err_wrid: - if (pd->uobject) { + if (udata) { if (qp_has_rq(init_attr)) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { @@ -1201,20 +1201,20 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &qp->mtt); err_buf: - if (pd->uobject) + if (qp->umem) ib_umem_release(qp->umem); else mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); err_db: - if (!pd->uobject && qp_has_rq(init_attr)) + if (!udata && qp_has_rq(init_attr)) mlx4_db_free(dev->dev, &qp->db); err: - if (sqp) - kfree(sqp); - else if (!*caller_qp) + if (!sqp && !*caller_qp) kfree(qp); + kfree(sqp); + return err; } @@ -1332,7 +1332,7 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, - enum mlx4_ib_source_type src, int is_user) + enum mlx4_ib_source_type src, bool is_user) { struct mlx4_ib_cq *send_cq, *recv_cq; unsigned long flags; @@ -1609,10 +1609,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (qp->rwq_ind_tbl) { destroy_qp_rss(dev, mqp); } else { - struct mlx4_ib_pd *pd; - - pd = get_pd(mqp); - destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, qp->uobject); } if (is_sqp(dev, mqp)) @@ -4044,7 +4041,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, struct mlx4_ib_create_wq ucmd; int err, required_cmd_sz; - if (!(udata && pd->uobject)) + if (!udata) return ERR_PTR(-EINVAL); required_cmd_sz = offsetof(typeof(ucmd), comp_mask) + diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 3731b31c3653..4456f1b8921d 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -105,7 +105,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, buf_size = srq->msrq.max * desc_size; - if (pd->uobject) { + if (udata) { struct mlx4_ib_create_srq ucmd; if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -191,7 +191,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, srq->msrq.event = mlx4_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (pd->uobject) + if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { err = -EFAULT; goto err_wrid; @@ -202,7 +202,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, return &srq->ibsrq; err_wrid: - if (pd->uobject) + if (udata) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); else kvfree(srq->wrid); @@ -211,13 +211,13 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &srq->mtt); err_buf: - if (pd->uobject) + if (srq->umem) ib_umem_release(srq->umem); else mlx4_buf_free(dev->dev, buf_size, &srq->buf); err_db: - if (!pd->uobject) + if (!udata) mlx4_db_free(dev->dev, &srq->db); err_srq: diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 752bdd536130..ea1f3a081b05 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -353,16 +353,12 @@ err: static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) { - char base_name[9]; - - /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ - strlcpy(name, pci_name(dev->dev->persist->pdev), max); - strncpy(base_name, name, 8); /*till xxxx:yy:*/ - base_name[8] = '\0'; - /* with no ARI only 3 last bits are used so when the fn is higher than 8 + /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n + * with no ARI only 3 last bits are used so when the fn is higher than 8 * need to add it to the dev num, so count in the last number will be * modulo 8 */ - sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8)); + snprintf(name, max, "%.8s%.2d.%d", pci_name(dev->dev->persist->pdev), + i / 8, i % 8); } struct mlx4_port { diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index b8e4b15e2674..33f5adb14e4e 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,6 +1,8 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ + srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ + cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index ffd03bf1a71e..420ae0897333 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -72,7 +72,7 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, } struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct mlx5_ib_ah *ah; @@ -131,7 +131,7 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx5_ib_destroy_ah(struct ib_ah *ah) +int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_mah(ah)); return 0; diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index ca060a2e2b36..356bccc715ee 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -240,6 +240,7 @@ int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + MLX5_SET(alloc_transport_domain_in, in, uid, uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) @@ -257,6 +258,7 @@ void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, uid, uid); MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -326,3 +328,20 @@ int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid) MLX5_SET(dealloc_xrcd_in, in, uid, uid); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, + u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0}; + int err; + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + MLX5_SET(alloc_q_counter_in, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *counter_id = MLX5_GET(alloc_q_counter_out, out, + counter_set_id); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index c03c56455534..1e76dc67a369 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -61,4 +61,6 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn, u16 uid); int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); +int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, + u16 uid); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7d769b5538b4..90f1b0bae5b5 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -35,6 +35,7 @@ #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> #include "mlx5_ib.h" +#include "srq.h" static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) { @@ -81,7 +82,7 @@ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; - if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && + if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { return cqe; } else { @@ -177,8 +178,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_core_srq *msrq = NULL; if (qp->ibqp.xrcd) { - msrq = mlx5_core_get_srq(dev->mdev, - be32_to_cpu(cqe->srqn)); + msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn)); srq = to_mibsrq(msrq); } else { srq = to_msrq(qp->ibqp.srq); @@ -197,7 +197,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, } wc->byte_len = be32_to_cpu(cqe->byte_cnt); - switch (cqe->op_own >> 4) { + switch (get_cqe_opcode(cqe)) { case MLX5_CQE_RESP_WR_IMM: wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IB_WC_WITH_IMM; @@ -330,67 +330,6 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, dump_cqe(dev, cqe); } -static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx) -{ - /* TBD: waiting decision - */ - return 0; -} - -static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx) -{ - struct mlx5_wqe_data_seg *dpseg; - void *addr; - - dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_atomic_seg); - addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr); - return addr; -} - -static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, - uint16_t idx) -{ - void *addr; - int byte_count; - int i; - - if (!is_atomic_response(qp, idx)) - return; - - byte_count = be32_to_cpu(cqe64->byte_cnt); - addr = mlx5_get_atomic_laddr(qp, idx); - - if (byte_count == 4) { - *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr)); - } else { - for (i = 0; i < byte_count; i += 8) { - *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr)); - addr += 8; - } - } - - return; -} - -static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, - u16 tail, u16 head) -{ - u16 idx; - - do { - idx = tail & (qp->sq.wqe_cnt - 1); - handle_atomic(qp, cqe64, idx); - if (idx == head) - break; - - tail = qp->sq.w_list[idx].next; - } while (1); - tail = qp->sq.w_list[idx].next; - qp->sq.last_poll = tail; -} - static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) { mlx5_frag_buf_free(dev->mdev, &buf->frag_buf); @@ -428,45 +367,15 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, item->key = be32_to_cpu(cqe->mkey); } -static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries, - struct ib_wc *wc, int *npolled) -{ - struct mlx5_ib_wq *wq; - unsigned int cur; - unsigned int idx; - int np; - int i; - - wq = &qp->sq; - cur = wq->head - wq->tail; - np = *npolled; - - if (cur == 0) - return; - - for (i = 0; i < cur && np < num_entries; i++) { - idx = wq->last_poll & (wq->wqe_cnt - 1); - wc->wr_id = wq->wrid[idx]; - wc->status = IB_WC_WR_FLUSH_ERR; - wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; - wq->tail++; - np++; - wc->qp = &qp->ibqp; - wc++; - wq->last_poll = wq->w_list[idx].next; - } - *npolled = np; -} - -static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries, - struct ib_wc *wc, int *npolled) +static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, + int *npolled, int is_send) { struct mlx5_ib_wq *wq; unsigned int cur; int np; int i; - wq = &qp->rq; + wq = (is_send) ? &qp->sq : &qp->rq; cur = wq->head - wq->tail; np = *npolled; @@ -493,13 +402,13 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, *npolled = 0; /* Find uncompleted WQEs belonging to that cq and return mmics ones */ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { - sw_send_comp(qp, num_entries, wc + *npolled, npolled); + sw_comp(qp, num_entries, wc + *npolled, npolled, true); if (*npolled >= num_entries) return; } list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { - sw_recv_comp(qp, num_entries, wc + *npolled, npolled); + sw_comp(qp, num_entries, wc + *npolled, npolled, false); if (*npolled >= num_entries) return; } @@ -537,7 +446,7 @@ repoll: */ rmb(); - opcode = cqe64->op_own >> 4; + opcode = get_cqe_opcode(cqe64); if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { if (likely(cq->resize_buf)) { free_cq_buf(dev, &cq->buf); @@ -567,7 +476,6 @@ repoll: wqe_ctr = be16_to_cpu(cqe64->wqe_counter); idx = wqe_ctr & (wq->wqe_cnt - 1); handle_good_req(wc, cqe64, wq, idx); - handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); wc->wr_id = wq->wrid[idx]; wq->tail = wq->wqe_head[idx] + 1; wc->status = IB_WC_SUCCESS; @@ -1295,7 +1203,7 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq) return -EINVAL; } - while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { + while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) { dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc, (i + 1) & cq->resize_buf->nent); dcqe64 = dsize == 64 ? dcqe : dcqe + 64; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 45c421c87100..5a588f3cfb1b 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -9,6 +9,7 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> #include <rdma/ib_umem.h> +#include <rdma/uverbs_std_types.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include "mlx5_ib.h" @@ -40,29 +41,32 @@ struct devx_umem_reg_cmd { u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; }; -static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file) +static struct mlx5_ib_ucontext * +devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) { - return to_mucontext(ib_uverbs_get_ucontext(file)); + return to_mucontext(ib_uverbs_get_ucontext(attrs)); } -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) { u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - u64 general_obj_types; - void *hdr; + void *uctx; int err; u16 uid; + u32 cap = 0; - hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); - - general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types); - if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) || - !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM)) + /* 0 means not supported */ + if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx)) return -EINVAL; - MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX); + uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx); + if (is_user && capable(CAP_NET_RAW) && + (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX)) + cap |= MLX5_UCTX_CAP_RAW_TX; + + MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); + MLX5_SET(uctx, uctx, cap, cap); err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); if (err) @@ -74,12 +78,11 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev) void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) { - u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid); + MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); + MLX5_SET(destroy_uctx_in, in, uid, uid); mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } @@ -106,6 +109,21 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) } } +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + *counter_id = MLX5_GET(dealloc_flow_counter_in, + devx_obj->dinbox, + flow_counter_id); + return true; + } + + return false; +} + /* * As the obj_id in the firmware is not globally unique the object type * must be considered upon checking for a valid object id. @@ -116,7 +134,7 @@ static u64 get_enc_obj_id(u16 opcode, u32 obj_id) return ((u64)opcode << 32) | obj_id; } -static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) +static u64 devx_get_obj_id(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); u64 obj_id; @@ -290,6 +308,8 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) MLX5_GET(query_dct_in, in, dctn)); break; case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(query_xrq_in, in, xrqn)); break; @@ -316,17 +336,107 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) MLX5_GET(drain_dct_in, in, dctn)); break; case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(arm_xrq_in, in, xrqn)); break; + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: + obj_id = get_enc_obj_id + (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT, + MLX5_GET(query_packet_reformat_context_in, + in, packet_reformat_id)); + break; default: + obj_id = 0; + } + + return obj_id; +} + +static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in) +{ + u64 obj_id = devx_get_obj_id(in); + + if (!obj_id) return false; + + switch (uobj_get_object_id(uobj)) { + case UVERBS_OBJECT_CQ: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ, + to_mcq(uobj->object)->mcq.cqn) == + obj_id; + + case UVERBS_OBJECT_SRQ: + { + struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq); + struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + u16 opcode; + + switch (srq->common.res) { + case MLX5_RES_XSRQ: + opcode = MLX5_CMD_OP_CREATE_XRC_SRQ; + break; + case MLX5_RES_XRQ: + opcode = MLX5_CMD_OP_CREATE_XRQ; + break; + default: + if (!dev->mdev->issi) + opcode = MLX5_CMD_OP_CREATE_SRQ; + else + opcode = MLX5_CMD_OP_CREATE_RMP; + } + + return get_enc_obj_id(opcode, + to_msrq(uobj->object)->msrq.srqn) == + obj_id; } - if (obj_id == obj->obj_id) - return true; + case UVERBS_OBJECT_QP: + { + struct mlx5_ib_qp *qp = to_mqp(uobj->object); + enum ib_qp_type qp_type = qp->ibqp.qp_type; + + if (qp_type == IB_QPT_RAW_PACKET || + (qp->flags & MLX5_IB_QP_UNDERLAY)) { + struct mlx5_ib_raw_packet_qp *raw_packet_qp = + &qp->raw_packet_qp; + struct mlx5_ib_rq *rq = &raw_packet_qp->rq; + struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + + return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + rq->base.mqp.qpn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ, + sq->base.mqp.qpn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR, + rq->tirn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS, + sq->tisn) == obj_id); + } + + if (qp_type == MLX5_IB_QPT_DCT) + return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, + qp->dct.mdct.mqp.qpn) == obj_id; + + return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + qp->ibqp.qp_num) == obj_id; + } - return false; + case UVERBS_OBJECT_WQ: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + to_mrwq(uobj->object)->core_qp.qpn) == + obj_id; + + case UVERBS_OBJECT_RWQ_IND_TBL: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT, + to_mrwq_ind_table(uobj->object)->rqtn) == + obj_id; + + case MLX5_IB_OBJECT_DEVX_OBJ: + return ((struct devx_obj *)uobj->object)->obj_id == obj_id; + + default: + return false; + } } static void devx_set_umem_valid(const void *in) @@ -494,6 +604,7 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_DRAIN_DCT: case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: return true; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: { @@ -535,6 +646,9 @@ static bool devx_is_obj_query_cmd(const void *in) case MLX5_CMD_OP_QUERY_XRC_SRQ: case MLX5_CMD_OP_QUERY_DCT: case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: return true; default: return false; @@ -572,15 +686,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in) if (!c->devx_uid) return -EINVAL; - if (!capable(CAP_NET_RAW)) - return -EPERM; - return c->devx_uid; } static bool devx_is_general_cmd(void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + if (opcode >= MLX5_CMD_OP_GENERAL_START && + opcode < MLX5_CMD_OP_GENERAL_END) + return true; + switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: @@ -603,7 +718,7 @@ static bool devx_is_general_cmd(void *in) } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; @@ -616,7 +731,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC)) return -EFAULT; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -653,14 +768,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( * queue or arm its CQ for event generation), no further harm is expected. */ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; u32 user_idx; s32 dev_idx; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -681,7 +796,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; @@ -693,7 +808,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( int err; int uid; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -740,6 +855,10 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); break; + case MLX5_CMD_OP_CREATE_UMEM: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_UMEM); + break; case MLX5_CMD_OP_CREATE_MKEY: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); break; @@ -908,7 +1027,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -970,7 +1089,7 @@ obj_free: } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -978,7 +1097,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE); struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); - struct devx_obj *obj = uobj->object; + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); void *cmd_out; int err; int uid; @@ -990,7 +1109,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( if (!devx_is_obj_modify_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(obj, cmd_in)) + if (!devx_is_valid_obj_id(uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1000,7 +1119,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); devx_set_umem_valid(cmd_in); - err = mlx5_cmd_exec(obj->mdev, cmd_in, + err = mlx5_cmd_exec(mdev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), cmd_out, cmd_out_len); if (err) @@ -1011,7 +1130,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -1019,10 +1138,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE); struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); - struct devx_obj *obj = uobj->object; void *cmd_out; int err; int uid; + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); uid = devx_get_uid(c, cmd_in); if (uid < 0) @@ -1031,7 +1150,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( if (!devx_is_obj_query_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(obj, cmd_in)) + if (!devx_is_valid_obj_id(uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1039,7 +1158,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( return PTR_ERR(cmd_out); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); - err = mlx5_cmd_exec(obj->mdev, cmd_in, + err = mlx5_cmd_exec(mdev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), cmd_out, cmd_out_len); if (err) @@ -1115,8 +1234,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); - MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM); + MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); MLX5_SET(umem, umem, log_page_size, obj->page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -1127,7 +1245,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct devx_umem_reg_cmd cmd; struct devx_umem *obj; @@ -1141,9 +1259,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (!c->devx_uid) return -EINVAL; - if (!capable(CAP_NET_RAW)) - return -EPERM; - obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); if (!obj) return -ENOMEM; @@ -1158,7 +1273,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( devx_umem_reg_cmd_build(dev, obj, &cmd); - MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid); + MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, sizeof(cmd.out)); if (err) @@ -1279,7 +1394,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_MODIFY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_IDR_ANY_OBJECT, UVERBS_ACCESS_WRITE, UA_MANDATORY), UVERBS_ATTR_PTR_IN( @@ -1295,7 +1410,7 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_QUERY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_IDR_ANY_OBJECT, UVERBS_ACCESS_READ, UA_MANDATORY), UVERBS_ATTR_PTR_IN( @@ -1325,12 +1440,22 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); -DECLARE_UVERBS_OBJECT_TREE(devx_objects, - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); - -const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void) +static bool devx_is_supported(struct ib_device *device) { - return &devx_objects; + struct mlx5_ib_dev *dev = to_mdev(device); + + return !dev->rep && MLX5_CAP_GEN(dev->mdev, log_max_uctx); } + +const struct uapi_definition mlx5_ib_devx_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_OBJ, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_UMEM, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index f86cdcafdafc..e8a1e4498e3f 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -60,7 +60,7 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_ib_flow_handler *flow_handler; @@ -77,6 +77,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); int len, ret, i; + u32 counter_id = 0; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -92,10 +93,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) return -EINVAL; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS && - (dest_devx || dest_qp)) - return -EINVAL; - if (dest_devx) { devx_obj = uverbs_attr_get_obj( attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); @@ -128,8 +125,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; } - if (dev->rep) - return -ENOTSUPP; + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); + if (len) { + devx_obj = arr_flow_actions[0]->object; + + if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id)) + return -EINVAL; + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } + + if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + return -EINVAL; cmd_in = uverbs_attr_get_alloced_ptr( attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); @@ -164,6 +172,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( } flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act, + counter_id, cmd_in, inlen, dest_id, dest_type); if (IS_ERR(flow_handler)) { @@ -194,7 +203,7 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); @@ -313,7 +322,6 @@ static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( @@ -321,9 +329,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); enum mlx5_ib_uapi_flow_table_type ft_type; struct ib_flow_action *action; - size_t num_actions; + int num_actions; void *in; - int len; int ret; if (!mlx5_ib_modify_header_supported(mdev)) @@ -331,18 +338,17 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); - len = uverbs_attr_get_len(attrs, - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); - if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)) - return -EINVAL; + num_actions = uverbs_attr_ptr_get_array_size( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)); + if (num_actions < 0) + return num_actions; ret = uverbs_get_const(&ft_type, attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); if (ret) return ret; - - num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto), action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); if (IS_ERR(action)) return PTR_ERR(action); @@ -435,7 +441,6 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx( } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, @@ -526,7 +531,11 @@ DECLARE_UVERBS_NAMED_METHOD( UA_OPTIONAL), UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, UVERBS_ATTR_TYPE(u32), - UA_OPTIONAL)); + UA_OPTIONAL), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, 1, 1, + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DESTROY_FLOW, @@ -610,16 +619,20 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); -DECLARE_UVERBS_OBJECT_TREE(flow_objects, - &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER)); - -int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) +static bool flow_is_supported(struct ib_device *device) { - int i = 0; - - root[i++] = &flow_objects; - root[i++] = &mlx5_ib_fs; - root[i++] = &mlx5_ib_flow_actions; - - return i; + return !to_mdev(device)->rep; } + +const struct uapi_definition mlx5_ib_flow_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_FLOW_MATCHER, + UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE( + UVERBS_OBJECT_FLOW, + &mlx5_ib_fs, + UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, + &mlx5_ib_flow_actions), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 584ff2ea7810..46a9ddc8ca56 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -4,6 +4,7 @@ */ #include "ib_rep.h" +#include "srq.h" static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, @@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_rep_roce_init, mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), @@ -44,13 +48,21 @@ static const struct mlx5_ib_profile rep_profile = { static int mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { + struct mlx5_ib_dev *ibdev; + + ibdev = mlx5_ib_rep_to_dev(rep); + if (!__mlx5_ib_add(ibdev, ibdev->profile)) + return -EINVAL; return 0; } static void mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep) { - rep->rep_if[REP_IB].priv = NULL; + struct mlx5_ib_dev *ibdev; + + ibdev = mlx5_ib_rep_to_dev(rep); + __mlx5_ib_remove(ibdev, ibdev->profile, MLX5_IB_STAGE_MAX); } static int @@ -85,6 +97,7 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) dev = mlx5_ib_rep_to_dev(rep); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); rep->rep_if[REP_IB].priv = NULL; + ib_dealloc_device(&dev->ib_dev); } static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 32a9e9228b13..558638468edb 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -526,11 +526,6 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, int ext_active_speed; int err = -ENOMEM; - if (port < 1 || port > dev->num_ports) { - mlx5_ib_warn(dev, "invalid port number %d\n", port); - return -EINVAL; - } - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) @@ -568,6 +563,14 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) { + props->port_cap_flags2 = + be16_to_cpup((__be16 *)(out_mad->data + 60)); + + if (props->port_cap_flags2 & IB_PORT_LINK_WIDTH_2X_SUP) + props->active_width = out_mad->data[31] & 0x1f; + } + /* Check if extended speeds (EDR/FDR/...) are supported */ if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { ext_active_speed = out_mad->data[62] >> 4; @@ -579,6 +582,11 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, case 2: props->active_speed = 32; /* EDR */ break; + case 4: + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP && + props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP) + props->active_speed = IB_SPEED_HDR; + break; } } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3569fda07e07..94fe253d4956 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -60,6 +60,7 @@ #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "srq.h" #include <linux/mlx5/fs_helpers.h> #include <linux/mlx5/accel.h> #include <rdma/uverbs_std_types.h> @@ -82,10 +83,13 @@ static char mlx5_version[] = struct mlx5_ib_event_work { struct work_struct work; - struct mlx5_core_dev *dev; - void *context; - enum mlx5_dev_event event; - unsigned long param; + union { + struct mlx5_ib_dev *dev; + struct mlx5_ib_multiport_info *mpi; + }; + bool is_slave; + unsigned int event; + void *param; }; enum { @@ -146,7 +150,7 @@ static int get_port_state(struct ib_device *ibdev, int ret; memset(&attr, 0, sizeof(attr)); - ret = ibdev->query_port(ibdev, port_num, &attr); + ret = ibdev->ops.query_port(ibdev, port_num, &attr); if (!ret) *state = attr.state; return ret; @@ -168,7 +172,6 @@ static int mlx5_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_REGISTER: - case NETDEV_UNREGISTER: write_lock(&roce->netdev_lock); if (ibdev->rep) { struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch; @@ -177,15 +180,20 @@ static int mlx5_netdev_event(struct notifier_block *this, rep_ndev = mlx5_ib_get_rep_netdev(esw, ibdev->rep->vport); if (rep_ndev == ndev) - roce->netdev = (event == NETDEV_UNREGISTER) ? - NULL : ndev; + roce->netdev = ndev; } else if (ndev->dev.parent == &mdev->pdev->dev) { - roce->netdev = (event == NETDEV_UNREGISTER) ? - NULL : ndev; + roce->netdev = ndev; } write_unlock(&roce->netdev_lock); break; + case NETDEV_UNREGISTER: + write_lock(&roce->netdev_lock); + if (roce->netdev == ndev) + roce->netdev = NULL; + write_unlock(&roce->netdev_lock); + break; + case NETDEV_CHANGE: case NETDEV_UP: case NETDEV_DOWN: { @@ -441,7 +449,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, if (!ndev) goto out; - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { rcu_read_lock(); upper = netdev_master_upper_dev_get_rcu(ndev); if (upper) { @@ -1014,6 +1022,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, cqe_128_always)) resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD; + if (MLX5_CAP_GEN(mdev, qp_packet_based)) + resp.flags |= + MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; } if (field_avail(typeof(resp), sw_parsing_caps, @@ -1101,6 +1112,8 @@ static void translate_active_width(struct ib_device *ibdev, u8 active_width, if (active_width & MLX5_IB_WIDTH_1X) *ib_width = IB_WIDTH_1X; + else if (active_width & MLX5_IB_WIDTH_2X) + *ib_width = IB_WIDTH_2X; else if (active_width & MLX5_IB_WIDTH_4X) *ib_width = IB_WIDTH_4X; else if (active_width & MLX5_IB_WIDTH_8X) @@ -1216,6 +1229,9 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, props->subnet_timeout = rep->subnet_timeout; props->init_type_reply = rep->init_type_reply; + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) + props->port_cap_flags2 = rep->cap_mask2; + err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); if (err) goto out; @@ -1752,7 +1768,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, #endif if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { - err = mlx5_ib_devx_create(dev); + err = mlx5_ib_devx_create(dev, true); if (err < 0) goto out_uars; context->devx_uid = err; @@ -1844,7 +1860,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { u8 port = mlx5_core_native_port_num(dev->mdev); atomic_set(&context->tx_port_affinity, @@ -2669,11 +2685,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, ntohs(ib_spec->gre.val.protocol)); memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - gre_key_h), + gre_key.nvgre.hi), &ib_spec->gre.mask.key, sizeof(ib_spec->gre.mask.key)); memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, - gre_key_h), + gre_key.nvgre.hi), &ib_spec->gre.val.key, sizeof(ib_spec->gre.val.key)); break; @@ -3706,7 +3722,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_flow_destination *dst, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_act *flow_act, - void *cmd_in, int inlen) + void *cmd_in, int inlen, + int dst_num) { struct mlx5_ib_flow_handler *handler; struct mlx5_flow_spec *spec; @@ -3728,7 +3745,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, spec->match_criteria_enable = fs_matcher->match_criteria_enable; handler->rule = mlx5_add_flow_rules(ft, spec, - flow_act, dst, 1); + flow_act, dst, dst_num); if (IS_ERR(handler->rule)) { err = PTR_ERR(handler->rule); @@ -3791,12 +3808,14 @@ struct mlx5_ib_flow_handler * mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_act *flow_act, + u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type) { struct mlx5_flow_destination *dst; struct mlx5_ib_flow_prio *ft_prio; struct mlx5_ib_flow_handler *handler; + int dst_num = 0; bool mcast; int err; @@ -3806,7 +3825,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); - dst = kzalloc(sizeof(*dst), GFP_KERNEL); + dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL); if (!dst) return ERR_PTR(-ENOMEM); @@ -3820,20 +3839,28 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, } if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { - dst->type = dest_type; - dst->tir_num = dest_id; + dst[dst_num].type = dest_type; + dst[dst_num].tir_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { - dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; - dst->ft_num = dest_id; + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; + dst[dst_num].ft_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else { - dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; } + dst_num++; + + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst[dst_num].counter_id = counter_id; + dst_num++; + } + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act, - cmd_in, inlen); + cmd_in, inlen, dst_num); if (IS_ERR(handler)) { err = PTR_ERR(handler); @@ -4226,6 +4253,63 @@ static void delay_drop_handler(struct work_struct *work) mutex_unlock(&delay_drop->lock); } +static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, + struct ib_event *ibev) +{ + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: + schedule_work(&ibdev->delay_drop.delay_drop_work); + break; + default: /* do nothing */ + return; + } +} + +static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, + struct ib_event *ibev) +{ + u8 port = (eqe->data.port.port >> 4) & 0xf; + + ibev->element.port_num = port; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + /* In RoCE, port up/down events are handled in + * mlx5_netdev_event(). + */ + if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET) + return -EINVAL; + + ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_LID: + ibev->event = IB_EVENT_LID_CHANGE; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + ibev->event = IB_EVENT_PKEY_CHANGE; + schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); + break; + + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + ibev->event = IB_EVENT_GID_CHANGE; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + ibev->event = IB_EVENT_CLIENT_REREGISTER; + break; + default: + return -EINVAL; + } + + return 0; +} + static void mlx5_ib_handle_event(struct work_struct *_work) { struct mlx5_ib_event_work *work = @@ -4233,65 +4317,37 @@ static void mlx5_ib_handle_event(struct work_struct *_work) struct mlx5_ib_dev *ibdev; struct ib_event ibev; bool fatal = false; - u8 port = (u8)work->param; - if (mlx5_core_is_mp_slave(work->dev)) { - ibdev = mlx5_ib_get_ibdev_from_mpi(work->context); + if (work->is_slave) { + ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi); if (!ibdev) goto out; } else { - ibdev = work->context; + ibdev = work->dev; } switch (work->event) { case MLX5_DEV_EVENT_SYS_ERROR: ibev.event = IB_EVENT_DEVICE_FATAL; mlx5_ib_handle_internal_error(ibdev); + ibev.element.port_num = (u8)(unsigned long)work->param; fatal = true; break; - - case MLX5_DEV_EVENT_PORT_UP: - case MLX5_DEV_EVENT_PORT_DOWN: - case MLX5_DEV_EVENT_PORT_INITIALIZED: - /* In RoCE, port up/down events are handled in - * mlx5_netdev_event(). - */ - if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == - IB_LINK_LAYER_ETHERNET) + case MLX5_EVENT_TYPE_PORT_CHANGE: + if (handle_port_change(ibdev, work->param, &ibev)) goto out; - - ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ? - IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; - break; - - case MLX5_DEV_EVENT_LID_CHANGE: - ibev.event = IB_EVENT_LID_CHANGE; - break; - - case MLX5_DEV_EVENT_PKEY_CHANGE: - ibev.event = IB_EVENT_PKEY_CHANGE; - schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); break; - - case MLX5_DEV_EVENT_GUID_CHANGE: - ibev.event = IB_EVENT_GID_CHANGE; - break; - - case MLX5_DEV_EVENT_CLIENT_REREG: - ibev.event = IB_EVENT_CLIENT_REREGISTER; - break; - case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: - schedule_work(&ibdev->delay_drop.delay_drop_work); - goto out; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + handle_general_event(ibdev, work->param, &ibev); + /* fall through */ default: goto out; } - ibev.device = &ibdev->ib_dev; - ibev.element.port_num = port; + ibev.device = &ibdev->ib_dev; - if (!rdma_is_port_valid(&ibdev->ib_dev, port)) { - mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); + if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) { + mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num); goto out; } @@ -4304,22 +4360,43 @@ out: kfree(work); } -static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, - enum mlx5_dev_event event, unsigned long param) +static int mlx5_ib_event(struct notifier_block *nb, + unsigned long event, void *param) { struct mlx5_ib_event_work *work; work = kmalloc(sizeof(*work), GFP_ATOMIC); if (!work) - return; + return NOTIFY_DONE; INIT_WORK(&work->work, mlx5_ib_handle_event); - work->dev = dev; + work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events); + work->is_slave = false; work->param = param; - work->context = context; work->event = event; queue_work(mlx5_ib_event_wq, &work->work); + + return NOTIFY_OK; +} + +static int mlx5_ib_event_slave_port(struct notifier_block *nb, + unsigned long event, void *param) +{ + struct mlx5_ib_event_work *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return NOTIFY_DONE; + + INIT_WORK(&work->work, mlx5_ib_handle_event); + work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events); + work->is_slave = true; + work->param = param; + work->event = event; + queue_work(mlx5_ib_event_wq, &work->work); + + return NOTIFY_OK; } static int set_has_smi_cap(struct mlx5_ib_dev *dev) @@ -4787,7 +4864,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) struct mlx5_flow_table *ft; int err; - if (!ns || !mlx5_lag_is_active(mdev)) + if (!ns || !mlx5_lag_is_roce(mdev)) return 0; err = mlx5_cmd_create_vport_lag(mdev); @@ -4801,6 +4878,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) } dev->flow_db->lag_demux_ft = ft; + dev->lag_active = true; return 0; err_destroy_vport_lag: @@ -4812,7 +4890,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; - if (dev->flow_db->lag_demux_ft) { + if (dev->lag_active) { + dev->lag_active = false; + mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); dev->flow_db->lag_demux_ft = NULL; @@ -5038,6 +5118,9 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) { int err = 0; int i; + bool is_shared; + + is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; for (i = 0; i < dev->num_ports; i++) { err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); @@ -5047,8 +5130,10 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, dev->port[i].cnts.offsets); - err = mlx5_core_alloc_q_counter(dev->mdev, - &dev->port[i].cnts.set_id); + err = mlx5_cmd_alloc_q_counter(dev->mdev, + &dev->port[i].cnts.set_id, + is_shared ? + MLX5_SHARED_RESOURCE_UID : 0); if (err) { mlx5_ib_warn(dev, "couldn't allocate queue counter for port %d, err %d\n", @@ -5325,14 +5410,6 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); } -static const struct cpumask * -mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - - return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector); -} - /* The mlx5_ib_multiport_mutex should be held when calling this function */ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) @@ -5350,6 +5427,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, spin_unlock(&port->mp.mpi_lock); return; } + + if (mpi->mdev_events.notifier_call) + mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); + mpi->mdev_events.notifier_call = NULL; + mpi->ibdev = NULL; spin_unlock(&port->mp.mpi_lock); @@ -5405,6 +5487,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, ibdev->port[port_num].mp.mpi = mpi; mpi->ibdev = ibdev; + mpi->mdev_events.notifier_call = NULL; spin_unlock(&ibdev->port[port_num].mp.mpi_lock); err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev); @@ -5422,6 +5505,9 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, goto unbind; } + mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port; + mlx5_notifier_register(mpi->mdev, &mpi->mdev_events); + err = mlx5_ib_init_cong_debugfs(ibdev, port_num); if (err) goto unbind; @@ -5551,30 +5637,17 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, enum mlx5_ib_uapi_flow_action_flags)); -static int populate_specs_root(struct mlx5_ib_dev *dev) -{ - const struct uverbs_object_tree_def **trees = dev->driver_trees; - size_t num_trees = 0; - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - trees[num_trees++] = &mlx5_ib_flow_action; - - if (MLX5_CAP_DEV_MEM(dev->mdev, memic)) - trees[num_trees++] = &mlx5_ib_dm; - - if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & - MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) - trees[num_trees++] = mlx5_ib_get_devx_tree(); - - num_trees += mlx5_ib_get_flow_trees(trees + num_trees); - - WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees)); - trees[num_trees] = NULL; - dev->ib_dev.driver_specs = trees; +static const struct uapi_definition mlx5_ib_defs[] = { +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) + UAPI_DEF_CHAIN(mlx5_ib_devx_defs), + UAPI_DEF_CHAIN(mlx5_ib_flow_defs), +#endif - return 0; -} + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, + &mlx5_ib_flow_action), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), + {} +}; static int mlx5_ib_read_counters(struct ib_counters *counters, struct ib_counters_read_attr *read_attr, @@ -5651,6 +5724,8 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_cleanup_multiport_master(dev); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING cleanup_srcu_struct(&dev->mr_srcu); + drain_workqueue(dev->advise_mr_wq); + destroy_workqueue(dev->advise_mr_wq); #endif kfree(dev->port); } @@ -5694,8 +5769,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; dev->ib_dev.phys_port_cnt = dev->num_ports; - dev->ib_dev.num_comp_vectors = - dev->mdev->priv.eq_table.num_comp_vectors; + dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); dev->ib_dev.dev.parent = &mdev->pdev->dev; mutex_init(&dev->cap_mask_mutex); @@ -5706,9 +5780,17 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->memic.dev = mdev; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + dev->advise_mr_wq = alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0); + if (!dev->advise_mr_wq) { + err = -ENOMEM; + goto err_mp; + } + err = init_srcu_struct(&dev->mr_srcu); - if (err) - goto err_free_port; + if (err) { + destroy_workqueue(dev->advise_mr_wq); + goto err_mp; + } #endif return 0; @@ -5752,6 +5834,94 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) kfree(dev->flow_db); } +static const struct ib_device_ops mlx5_ib_dev_ops = { + .add_gid = mlx5_ib_add_gid, + .alloc_mr = mlx5_ib_alloc_mr, + .alloc_pd = mlx5_ib_alloc_pd, + .alloc_ucontext = mlx5_ib_alloc_ucontext, + .attach_mcast = mlx5_ib_mcg_attach, + .check_mr_status = mlx5_ib_check_mr_status, + .create_ah = mlx5_ib_create_ah, + .create_counters = mlx5_ib_create_counters, + .create_cq = mlx5_ib_create_cq, + .create_flow = mlx5_ib_create_flow, + .create_qp = mlx5_ib_create_qp, + .create_srq = mlx5_ib_create_srq, + .dealloc_pd = mlx5_ib_dealloc_pd, + .dealloc_ucontext = mlx5_ib_dealloc_ucontext, + .del_gid = mlx5_ib_del_gid, + .dereg_mr = mlx5_ib_dereg_mr, + .destroy_ah = mlx5_ib_destroy_ah, + .destroy_counters = mlx5_ib_destroy_counters, + .destroy_cq = mlx5_ib_destroy_cq, + .destroy_flow = mlx5_ib_destroy_flow, + .destroy_flow_action = mlx5_ib_destroy_flow_action, + .destroy_qp = mlx5_ib_destroy_qp, + .destroy_srq = mlx5_ib_destroy_srq, + .detach_mcast = mlx5_ib_mcg_detach, + .disassociate_ucontext = mlx5_ib_disassociate_ucontext, + .drain_rq = mlx5_ib_drain_rq, + .drain_sq = mlx5_ib_drain_sq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = mlx5_ib_get_dma_mr, + .get_link_layer = mlx5_ib_port_link_layer, + .map_mr_sg = mlx5_ib_map_mr_sg, + .mmap = mlx5_ib_mmap, + .modify_cq = mlx5_ib_modify_cq, + .modify_device = mlx5_ib_modify_device, + .modify_port = mlx5_ib_modify_port, + .modify_qp = mlx5_ib_modify_qp, + .modify_srq = mlx5_ib_modify_srq, + .poll_cq = mlx5_ib_poll_cq, + .post_recv = mlx5_ib_post_recv, + .post_send = mlx5_ib_post_send, + .post_srq_recv = mlx5_ib_post_srq_recv, + .process_mad = mlx5_ib_process_mad, + .query_ah = mlx5_ib_query_ah, + .query_device = mlx5_ib_query_device, + .query_gid = mlx5_ib_query_gid, + .query_pkey = mlx5_ib_query_pkey, + .query_qp = mlx5_ib_query_qp, + .query_srq = mlx5_ib_query_srq, + .read_counters = mlx5_ib_read_counters, + .reg_user_mr = mlx5_ib_reg_user_mr, + .req_notify_cq = mlx5_ib_arm_cq, + .rereg_user_mr = mlx5_ib_rereg_user_mr, + .resize_cq = mlx5_ib_resize_cq, +}; + +static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { + .create_flow_action_esp = mlx5_ib_create_flow_action_esp, + .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, +}; + +static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { + .rdma_netdev_get_params = mlx5_ib_rn_get_params, +}; + +static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { + .get_vf_config = mlx5_ib_get_vf_config, + .get_vf_stats = mlx5_ib_get_vf_stats, + .set_vf_guid = mlx5_ib_set_vf_guid, + .set_vf_link_state = mlx5_ib_set_vf_link_state, +}; + +static const struct ib_device_ops mlx5_ib_dev_mw_ops = { + .alloc_mw = mlx5_ib_alloc_mw, + .dealloc_mw = mlx5_ib_dealloc_mw, +}; + +static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { + .alloc_xrcd = mlx5_ib_alloc_xrcd, + .dealloc_xrcd = mlx5_ib_dealloc_xrcd, +}; + +static const struct ib_device_ops mlx5_ib_dev_dm_ops = { + .alloc_dm = mlx5_ib_alloc_dm, + .dealloc_dm = mlx5_ib_dealloc_dm, + .reg_dm_mr = mlx5_ib_reg_dm_mr, +}; + int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -5790,104 +5960,45 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - - dev->ib_dev.query_device = mlx5_ib_query_device; - dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; - dev->ib_dev.query_gid = mlx5_ib_query_gid; - dev->ib_dev.add_gid = mlx5_ib_add_gid; - dev->ib_dev.del_gid = mlx5_ib_del_gid; - dev->ib_dev.query_pkey = mlx5_ib_query_pkey; - dev->ib_dev.modify_device = mlx5_ib_modify_device; - dev->ib_dev.modify_port = mlx5_ib_modify_port; - dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; - dev->ib_dev.mmap = mlx5_ib_mmap; - dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; - dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; - dev->ib_dev.create_ah = mlx5_ib_create_ah; - dev->ib_dev.query_ah = mlx5_ib_query_ah; - dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; - dev->ib_dev.create_srq = mlx5_ib_create_srq; - dev->ib_dev.modify_srq = mlx5_ib_modify_srq; - dev->ib_dev.query_srq = mlx5_ib_query_srq; - dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; - dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; - dev->ib_dev.create_qp = mlx5_ib_create_qp; - dev->ib_dev.modify_qp = mlx5_ib_modify_qp; - dev->ib_dev.query_qp = mlx5_ib_query_qp; - dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; - dev->ib_dev.drain_sq = mlx5_ib_drain_sq; - dev->ib_dev.drain_rq = mlx5_ib_drain_rq; - dev->ib_dev.post_send = mlx5_ib_post_send; - dev->ib_dev.post_recv = mlx5_ib_post_recv; - dev->ib_dev.create_cq = mlx5_ib_create_cq; - dev->ib_dev.modify_cq = mlx5_ib_modify_cq; - dev->ib_dev.resize_cq = mlx5_ib_resize_cq; - dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; - dev->ib_dev.poll_cq = mlx5_ib_poll_cq; - dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; - dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; - dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; - dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; - dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; - dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; - dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; - dev->ib_dev.process_mad = mlx5_ib_process_mad; - dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; - dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; - dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; - dev->ib_dev.get_dev_fw_str = get_dev_fw_str; - dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity; + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) - dev->ib_dev.rdma_netdev_get_params = mlx5_ib_rn_get_params; - - if (mlx5_core_is_pf(mdev)) { - dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; - dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; - dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats; - dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; - } + ib_set_device_ops(&dev->ib_dev, + &mlx5_ib_dev_ipoib_enhanced_ops); - dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext; + if (mlx5_core_is_pf(mdev)) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_sriov_ops); dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); if (MLX5_CAP_GEN(mdev, imaicl)) { - dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; - dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); } if (MLX5_CAP_GEN(mdev, xrc)) { - dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; - dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); } - if (MLX5_CAP_DEV_MEM(mdev, memic)) { - dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm; - dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm; - dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr; - } + if (MLX5_CAP_DEV_MEM(mdev, memic)) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); - dev->ib_dev.create_flow = mlx5_ib_create_flow; - dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); - dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp; - dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action; - dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; - dev->ib_dev.create_counters = mlx5_ib_create_counters; - dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters; - dev->ib_dev.read_counters = mlx5_ib_read_counters; + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); + + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) + dev->ib_dev.driver_def = mlx5_ib_defs; err = init_node_data(dev); if (err) @@ -5901,22 +6012,37 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) return 0; } +static const struct ib_device_ops mlx5_ib_dev_port_ops = { + .get_port_immutable = mlx5_port_immutable, + .query_port = mlx5_ib_query_port, +}; + static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) { - dev->ib_dev.get_port_immutable = mlx5_port_immutable; - dev->ib_dev.query_port = mlx5_ib_query_port; - + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_ops); return 0; } +static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { + .get_port_immutable = mlx5_port_rep_immutable, + .query_port = mlx5_ib_rep_query_port, +}; + int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) { - dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable; - dev->ib_dev.query_port = mlx5_ib_rep_query_port; - + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops); return 0; } +static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { + .create_rwq_ind_table = mlx5_ib_create_rwq_ind_table, + .create_wq = mlx5_ib_create_wq, + .destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table, + .destroy_wq = mlx5_ib_destroy_wq, + .get_netdev = mlx5_ib_get_netdev, + .modify_wq = mlx5_ib_modify_wq, +}; + static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) { u8 port_num; @@ -5928,19 +6054,13 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) dev->roce[i].last_port_state = IB_PORT_DOWN; } - dev->ib_dev.get_netdev = mlx5_ib_get_netdev; - dev->ib_dev.create_wq = mlx5_ib_create_wq; - dev->ib_dev.modify_wq = mlx5_ib_modify_wq; - dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; - dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; - dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; - dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); port_num = mlx5_core_native_port_num(dev->mdev) - 1; @@ -6034,11 +6154,20 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) return mlx5_ib_odp_init_one(dev); } +void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_ib_odp_cleanup_one(dev); +} + +static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { + .alloc_hw_stats = mlx5_ib_alloc_hw_stats, + .get_hw_stats = mlx5_ib_get_hw_stats, +}; + int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { - dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; - dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); return mlx5_ib_alloc_counters(dev); } @@ -6096,17 +6225,12 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) mlx5_free_bfreg(dev->mdev, &dev->bfreg); } -static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev) -{ - return populate_specs_root(dev); -} - int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group); - if (!mlx5_lag_is_active(dev->mdev)) + if (!mlx5_lag_is_roce(dev->mdev)) name = "mlx5_%d"; else name = "mlx5_bond_%d"; @@ -6140,16 +6264,32 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) cancel_delay_drop(dev); } -static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) { - mlx5_ib_register_vport_reps(dev); - + dev->mdev_events.notifier_call = mlx5_ib_event; + mlx5_notifier_register(dev->mdev, &dev->mdev_events); return 0; } -static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); +} + +static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) +{ + int uid; + + uid = mlx5_ib_devx_create(dev, false); + if (uid > 0) + dev->devx_whitelist_uid = uid; + + return 0; +} +static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) { - mlx5_ib_unregister_vport_reps(dev); + if (dev->devx_whitelist_uid) + mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); } void __mlx5_ib_remove(struct mlx5_ib_dev *dev, @@ -6162,10 +6302,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, if (profile->stage[stage].cleanup) profile->stage[stage].cleanup(dev); } - - if (dev->devx_whitelist_uid) - mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); - ib_dealloc_device((struct ib_device *)dev); } void *__mlx5_ib_add(struct mlx5_ib_dev *dev, @@ -6173,7 +6309,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, { int err; int i; - int uid; for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { @@ -6183,10 +6318,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, } } - uid = mlx5_ib_devx_create(dev); - if (uid > 0) - dev->devx_whitelist_uid = uid; - dev->profile = profile; dev->ib_active = true; @@ -6214,12 +6345,18 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_roce_init, mlx5_ib_stage_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, + mlx5_ib_stage_dev_notifier_init, + mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_ODP, mlx5_ib_stage_odp_init, - NULL), + mlx5_ib_stage_odp_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, mlx5_ib_stage_counters_init, mlx5_ib_stage_counters_cleanup), @@ -6235,9 +6372,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_SPECS, - mlx5_ib_stage_populate_specs, - NULL), + STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, + mlx5_ib_stage_devx_init, + mlx5_ib_stage_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -6265,9 +6402,15 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_rep_roce_init, mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, + mlx5_ib_stage_dev_notifier_init, + mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, mlx5_ib_stage_counters_init, mlx5_ib_stage_counters_cleanup), @@ -6280,18 +6423,12 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_SPECS, - mlx5_ib_stage_populate_specs, - NULL), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, mlx5_ib_stage_post_ib_reg_umr_init, NULL), - STAGE_CREATE(MLX5_IB_STAGE_REP_REG, - mlx5_ib_stage_rep_reg_init, - mlx5_ib_stage_rep_reg_cleanup), }; static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) @@ -6359,8 +6496,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (MLX5_ESWITCH_MANAGER(mdev) && mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0); - - return __mlx5_ib_add(dev, &nic_rep_profile); + dev->profile = &nic_rep_profile; + mlx5_ib_register_vport_reps(dev); + return dev; } return __mlx5_ib_add(dev, &pf_profile); @@ -6382,16 +6520,17 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) } dev = context; - __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + if (dev->profile == &nic_rep_profile) + mlx5_ib_unregister_vport_reps(dev); + else + __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + + ib_dealloc_device((struct ib_device *)dev); } static struct mlx5_interface mlx5_ib_interface = { .add = mlx5_ib_add, .remove = mlx5_ib_remove, - .event = mlx5_ib_event, -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - .pfault = mlx5_ib_pfault, -#endif .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b651a7a6fde9..b06d3b1efea8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -41,8 +41,6 @@ #include <linux/mlx5/cq.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/qp.h> -#include <linux/mlx5/srq.h> -#include <linux/mlx5/fs.h> #include <linux/types.h> #include <linux/mlx5/transobj.h> #include <rdma/ib_user_verbs.h> @@ -50,6 +48,8 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include "srq.h" + #define mlx5_ib_dbg(_dev, format, arg...) \ dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ __LINE__, current->pid, ##arg) @@ -257,6 +257,7 @@ enum mlx5_ib_rq_flags { }; struct mlx5_ib_wq { + struct mlx5_frag_buf_ctrl fbc; u64 *wrid; u32 *wr_data; struct wr_list *w_list; @@ -274,8 +275,7 @@ struct mlx5_ib_wq { unsigned head; unsigned tail; u16 cur_post; - u16 last_poll; - void *qend; + void *cur_edge; }; enum mlx5_ib_wq_flags { @@ -460,6 +460,7 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_UNDERLAY = 1 << 10, MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11, MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12, + MLX5_IB_QP_PACKET_BASED_CREDIT = 1 << 13, }; struct mlx5_umr_wr { @@ -523,6 +524,7 @@ struct mlx5_ib_srq { struct mlx5_core_srq msrq; struct mlx5_frag_buf buf; struct mlx5_db db; + struct mlx5_frag_buf_ctrl fbc; u64 *wrid; /* protect SRQ hanlding */ @@ -540,7 +542,6 @@ struct mlx5_ib_srq { struct mlx5_ib_xrcd { struct ib_xrcd ibxrcd; u32 xrcdn; - u16 uid; }; enum mlx5_ib_mtt_access_flags { @@ -774,19 +775,20 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_CAPS, MLX5_IB_STAGE_NON_DEFAULT_CB, MLX5_IB_STAGE_ROCE, + MLX5_IB_STAGE_SRQ, MLX5_IB_STAGE_DEVICE_RESOURCES, + MLX5_IB_STAGE_DEVICE_NOTIFIER, MLX5_IB_STAGE_ODP, MLX5_IB_STAGE_COUNTERS, MLX5_IB_STAGE_CONG_DEBUGFS, MLX5_IB_STAGE_UAR, MLX5_IB_STAGE_BFREG, MLX5_IB_STAGE_PRE_IB_REG_UMR, - MLX5_IB_STAGE_SPECS, + MLX5_IB_STAGE_WHITELIST_UID, MLX5_IB_STAGE_IB_REG, MLX5_IB_STAGE_POST_IB_REG_UMR, MLX5_IB_STAGE_DELAY_DROP, MLX5_IB_STAGE_CLASS_ATTR, - MLX5_IB_STAGE_REP_REG, MLX5_IB_STAGE_MAX, }; @@ -806,6 +808,7 @@ struct mlx5_ib_multiport_info { struct list_head list; struct mlx5_ib_dev *ibdev; struct mlx5_core_dev *mdev; + struct notifier_block mdev_events; struct completion unref_comp; u64 sys_image_guid; u32 mdev_refcnt; @@ -880,10 +883,19 @@ struct mlx5_ib_lb_state { bool enabled; }; +struct mlx5_ib_pf_eq { + struct mlx5_ib_dev *dev; + struct mlx5_eq *core; + struct work_struct work; + spinlock_t lock; /* Pagefaults spinlock */ + struct workqueue_struct *wq; + mempool_t *pool; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; - const struct uverbs_object_tree_def *driver_trees[7]; struct mlx5_core_dev *mdev; + struct notifier_block mdev_events; struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; /* serialize update of capability mask @@ -902,12 +914,15 @@ struct mlx5_ib_dev { #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; u64 odp_max_size; + struct mlx5_ib_pf_eq odp_pf_eq; + /* * Sleepable RCU that prevents destruction of MRs while they are still * being used by a page fault handler. */ struct srcu_struct mr_srcu; u32 null_mkey; + struct workqueue_struct *advise_mr_wq; #endif struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ @@ -920,6 +935,7 @@ struct mlx5_ib_dev { struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; struct mlx5_eswitch_rep *rep; + int lag_active; struct mlx5_ib_lb_state lb; u8 umr_fence; @@ -927,6 +943,7 @@ struct mlx5_ib_dev { u64 sys_image_guid; struct mlx5_memic memic; u16 devx_whitelist_uid; + struct mlx5_srq_table srq_table; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1025,9 +1042,9 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); + u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx5_ib_destroy_ah(struct ib_ah *ah); +int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); @@ -1053,7 +1070,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); -void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length, struct mlx5_ib_qp_base *base); @@ -1070,6 +1086,12 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +int mlx5_ib_advise_mr(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, + struct ib_sge *sg_list, + u32 num_sge, + struct uverbs_attr_bundle *attrs); struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); @@ -1158,9 +1180,8 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); -void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, - struct mlx5_pagefault *pfault); int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); +void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, @@ -1168,6 +1189,10 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags); + +int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1175,6 +1200,7 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) } static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } +static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} @@ -1182,6 +1208,13 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags) {} +static inline int +mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, u32 flags, + struct ib_sge *sg_list, u32 num_sge) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ /* Needed for rep profile */ @@ -1250,32 +1283,29 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev); +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); +extern const struct uapi_definition mlx5_ib_devx_defs[]; +extern const struct uapi_definition mlx5_ib_flow_defs[]; struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_act *flow_act, void *cmd_in, int inlen, - int dest_id, int dest_type); + struct mlx5_flow_act *flow_act, u32 counter_id, + void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id); int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else static inline int -mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; }; +mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + bool is_user) { return -EOPNOTSUPP; } static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} -static inline const struct uverbs_object_tree_def * -mlx5_ib_get_devx_tree(void) { return NULL; } static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) { return false; } -static inline int -mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) -{ - return 0; -} static inline void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9b195d65a13e..1bd8c1b1dba1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -73,7 +73,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); + if (mr->umem && mr->umem->is_odp) + synchronize_srcu(&dev->mr_srcu); #endif return err; @@ -237,6 +238,9 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + bool odp_mkey_exist = false; +#endif struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; LIST_HEAD(del_list); @@ -249,6 +253,10 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (mr->umem && mr->umem->is_odp) + odp_mkey_exist = true; +#endif list_move(&mr->list, &del_list); ent->cur--; ent->size--; @@ -257,7 +265,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); + if (odp_mkey_exist) + synchronize_srcu(&dev->mr_srcu); #endif list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -572,6 +581,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; + bool odp_mkey_exist = false; struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; LIST_HEAD(del_list); @@ -584,6 +594,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + if (mr->umem && mr->umem->is_odp) + odp_mkey_exist = true; list_move(&mr->list, &del_list); ent->cur--; ent->size--; @@ -592,7 +604,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); + if (odp_mkey_exist) + synchronize_srcu(&dev->mr_srcu); #endif list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -1211,7 +1224,7 @@ err_1: return ERR_PTR(err); } -static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, +static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, int npages, u64 length, int access_flags) { mr->npages = npages; @@ -1267,7 +1280,7 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, kfree(in); mr->umem = NULL; - set_mr_fileds(dev, mr, 0, length, acc); + set_mr_fields(dev, mr, 0, length, acc); return &mr->ibmr; @@ -1280,6 +1293,21 @@ err_free: return ERR_PTR(err); } +int mlx5_ib_advise_mr(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, + struct ib_sge *sg_list, + u32 num_sge, + struct uverbs_attr_bundle *attrs) +{ + if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE) + return -EOPNOTSUPP; + + return mlx5_ib_advise_mr_prefetch(pd, advice, flags, + sg_list, num_sge); +} + struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs) @@ -1369,7 +1397,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; - set_mr_fileds(dev, mr, npages, length, access_flags); + set_mr_fields(dev, mr, npages, length, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); @@ -1536,7 +1564,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - set_mr_fileds(dev, mr, npages, len, access_flags); + set_mr_fields(dev, mr, npages, len, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4dc6cc640ce0..01e0f6200631 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -37,6 +37,46 @@ #include "mlx5_ib.h" #include "cmd.h" +#include <linux/mlx5/eq.h> + +/* Contains the details of a pagefault. */ +struct mlx5_pagefault { + u32 bytes_committed; + u32 token; + u8 event_subtype; + u8 type; + union { + /* Initiator or send message responder pagefault details. */ + struct { + /* Received packet size, only valid for responders. */ + u32 packet_size; + /* + * Number of resource holding WQE, depends on type. + */ + u32 wq_num; + /* + * WQE index. Refers to either the send queue or + * receive queue, according to event_subtype. + */ + u16 wqe_index; + } wqe; + /* RDMA responder pagefault details */ + struct { + u32 r_key; + /* + * Received packet size, minimal size page fault + * resolution required for forward progress. + */ + u32 packet_size; + u32 rdma_op_len; + u64 rdma_va; + } rdma; + }; + + struct mlx5_ib_pf_eq *eq; + struct work_struct work; +}; + #define MAX_PREFETCH_LEN (4*1024*1024U) /* Timeout in ms to wait for an active mmu notifier to complete when handling @@ -304,14 +344,20 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, { int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ? pfault->wqe.wq_num : pfault->token; - int ret = mlx5_core_page_fault_resume(dev->mdev, - pfault->token, - wq_num, - pfault->type, - error); - if (ret) - mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n", - wq_num); + u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { }; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { }; + int err; + + MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME); + MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type); + MLX5_SET(page_fault_resume_in, in, token, pfault->token); + MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); + MLX5_SET(page_fault_resume_in, in, error, !!error); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + if (err) + mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n", + wq_num, err); } static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, @@ -503,12 +549,17 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); } +#define MLX5_PF_FLAGS_PREFETCH BIT(0) +#define MLX5_PF_FLAGS_DOWNGRADE BIT(1) static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt, u32 *bytes_mapped) + u64 io_virt, size_t bcnt, u32 *bytes_mapped, + u32 flags) { int npages = 0, current_seq, page_shift, ret, np; bool implicit = false; struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); + bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; + bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; u64 access_mask = ODP_READ_ALLOWED_BIT; u64 start_idx, page_mask; struct ib_umem_odp *odp; @@ -532,7 +583,15 @@ next_mr: page_mask = ~(BIT(page_shift) - 1); start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; - if (mr->umem->writable) + if (prefetch && !downgrade && !mr->umem->writable) { + /* prefetch with write-access must + * be supported by the MR + */ + ret = -EINVAL; + goto out; + } + + if (mr->umem->writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; current_seq = READ_ONCE(odp->notifiers_seq); @@ -606,8 +665,8 @@ out: if (!wait_for_completion_timeout( &odp->notifier_completion, timeout)) { - mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n", - current_seq, odp->notifiers_seq); + mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", + current_seq, odp->notifiers_seq, odp->notifiers_count); } } else { /* The MR is being killed, kill the QP as well. */ @@ -637,12 +696,13 @@ struct pf_frame { * -EFAULT when there's an error mapping the requested pages. The caller will * abort the page fault handling. */ -static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, - u32 key, u64 io_virt, size_t bcnt, +static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key, + u64 io_virt, size_t bcnt, u32 *bytes_committed, - u32 *bytes_mapped) + u32 *bytes_mapped, u32 flags) { int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; + bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mw *mw; @@ -664,6 +724,12 @@ next_mr: goto srcu_unlock; } + if (prefetch && mmkey->type != MLX5_MKEY_MR) { + mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n"); + ret = -EINVAL; + goto srcu_unlock; + } + switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); @@ -673,6 +739,11 @@ next_mr: goto srcu_unlock; } + if (prefetch && !mr->umem->is_odp) { + ret = -EINVAL; + goto srcu_unlock; + } + if (!mr->umem->is_odp) { mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", key); @@ -682,7 +753,7 @@ next_mr: goto srcu_unlock; } - ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped); + ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags); if (ret < 0) goto srcu_unlock; @@ -859,7 +930,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, key, io_virt, bcnt, &pfault->bytes_committed, - bytes_mapped); + bytes_mapped, 0); if (ret < 0) break; npages += ret; @@ -1025,16 +1096,31 @@ invalid_transport_or_opcode: return 0; } -static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev, - u32 wq_num) +static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev, + u32 wq_num, int pf_type) { - struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num); + enum mlx5_res_type res_type; - if (!mqp) { - mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num); + switch (pf_type) { + case MLX5_WQE_PF_TYPE_RMP: + res_type = MLX5_RES_SRQ; + break; + case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE: + case MLX5_WQE_PF_TYPE_RESP: + case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC: + res_type = MLX5_RES_QP; + break; + default: return NULL; } + return mlx5_core_res_hold(dev->mdev, wq_num, res_type); +} + +static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) +{ + struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res; + return to_mibqp(mqp); } @@ -1048,18 +1134,30 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, int resume_with_error = 1; u16 wqe_index = pfault->wqe.wqe_index; int requestor = pfault->type & MLX5_PFAULT_REQUESTOR; + struct mlx5_core_rsc_common *res; struct mlx5_ib_qp *qp; + res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); + if (!res) { + mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num); + return; + } + + switch (res->res) { + case MLX5_RES_QP: + qp = res_to_qp(res); + break; + default: + mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type); + goto resolve_page_fault; + } + buffer = (char *)__get_free_page(GFP_KERNEL); if (!buffer) { mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); goto resolve_page_fault; } - qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num); - if (!qp) - goto resolve_page_fault; - ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer, PAGE_SIZE, &qp->trans_qp.base); if (ret < 0) { @@ -1099,6 +1197,7 @@ resolve_page_fault: mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", pfault->wqe.wq_num, resume_with_error, pfault->type); + mlx5_core_res_put(res); free_page((unsigned long)buffer); } @@ -1142,7 +1241,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } ret = pagefault_single_data_segment(dev, rkey, address, length, - &pfault->bytes_committed, NULL); + &pfault->bytes_committed, NULL, + 0); if (ret == -EAGAIN) { /* We're racing with an invalidation, don't prefetch */ prefetch_activated = 0; @@ -1169,7 +1269,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, rkey, address, prefetch_len, - &bytes_committed, NULL); + &bytes_committed, NULL, + 0); if (ret < 0 && ret != -EAGAIN) { mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", ret, pfault->token, address, prefetch_len); @@ -1177,10 +1278,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } } -void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, - struct mlx5_pagefault *pfault) +static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { - struct mlx5_ib_dev *dev = context; u8 event_subtype = pfault->event_subtype; switch (event_subtype) { @@ -1197,6 +1296,203 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, } } +static void mlx5_ib_eqe_pf_action(struct work_struct *work) +{ + struct mlx5_pagefault *pfault = container_of(work, + struct mlx5_pagefault, + work); + struct mlx5_ib_pf_eq *eq = pfault->eq; + + mlx5_ib_pfault(eq->dev, pfault); + mempool_free(pfault, eq->pool); +} + +static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq) +{ + struct mlx5_eqe_page_fault *pf_eqe; + struct mlx5_pagefault *pfault; + struct mlx5_eqe *eqe; + int cc = 0; + + while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) { + pfault = mempool_alloc(eq->pool, GFP_ATOMIC); + if (!pfault) { + schedule_work(&eq->work); + break; + } + + pf_eqe = &eqe->data.page_fault; + pfault->event_subtype = eqe->sub_type; + pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); + + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", + eqe->sub_type, pfault->bytes_committed); + + switch (eqe->sub_type) { + case MLX5_PFAULT_SUBTYPE_RDMA: + /* RDMA based event */ + pfault->type = + be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; + pfault->token = + be32_to_cpu(pf_eqe->rdma.pftype_token) & + MLX5_24BIT_MASK; + pfault->rdma.r_key = + be32_to_cpu(pf_eqe->rdma.r_key); + pfault->rdma.packet_size = + be16_to_cpu(pf_eqe->rdma.packet_length); + pfault->rdma.rdma_op_len = + be32_to_cpu(pf_eqe->rdma.rdma_op_len); + pfault->rdma.rdma_va = + be64_to_cpu(pf_eqe->rdma.rdma_va); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", + pfault->type, pfault->token, + pfault->rdma.r_key); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", + pfault->rdma.rdma_op_len, + pfault->rdma.rdma_va); + break; + + case MLX5_PFAULT_SUBTYPE_WQE: + /* WQE based event */ + pfault->type = + (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; + pfault->token = + be32_to_cpu(pf_eqe->wqe.token); + pfault->wqe.wq_num = + be32_to_cpu(pf_eqe->wqe.pftype_wq) & + MLX5_24BIT_MASK; + pfault->wqe.wqe_index = + be16_to_cpu(pf_eqe->wqe.wqe_index); + pfault->wqe.packet_size = + be16_to_cpu(pf_eqe->wqe.packet_length); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", + pfault->type, pfault->token, + pfault->wqe.wq_num, + pfault->wqe.wqe_index); + break; + + default: + mlx5_ib_warn(eq->dev, + "Unsupported page fault event sub-type: 0x%02hhx\n", + eqe->sub_type); + /* Unsupported page faults should still be + * resolved by the page fault handler + */ + } + + pfault->eq = eq; + INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action); + queue_work(eq->wq, &pfault->work); + + cc = mlx5_eq_update_cc(eq->core, ++cc); + } + + mlx5_eq_update_ci(eq->core, cc, 1); +} + +static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr) +{ + struct mlx5_ib_pf_eq *eq = eq_ptr; + unsigned long flags; + + if (spin_trylock_irqsave(&eq->lock, flags)) { + mlx5_ib_eq_pf_process(eq); + spin_unlock_irqrestore(&eq->lock, flags); + } else { + schedule_work(&eq->work); + } + + return IRQ_HANDLED; +} + +/* mempool_refill() was proposed but unfortunately wasn't accepted + * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html + * Cheap workaround. + */ +static void mempool_refill(mempool_t *pool) +{ + while (pool->curr_nr < pool->min_nr) + mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); +} + +static void mlx5_ib_eq_pf_action(struct work_struct *work) +{ + struct mlx5_ib_pf_eq *eq = + container_of(work, struct mlx5_ib_pf_eq, work); + + mempool_refill(eq->pool); + + spin_lock_irq(&eq->lock); + mlx5_ib_eq_pf_process(eq); + spin_unlock_irq(&eq->lock); +} + +enum { + MLX5_IB_NUM_PF_EQE = 0x1000, + MLX5_IB_NUM_PF_DRAIN = 64, +}; + +static int +mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) +{ + struct mlx5_eq_param param = {}; + int err; + + INIT_WORK(&eq->work, mlx5_ib_eq_pf_action); + spin_lock_init(&eq->lock); + eq->dev = dev; + + eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN, + sizeof(struct mlx5_pagefault)); + if (!eq->pool) + return -ENOMEM; + + eq->wq = alloc_workqueue("mlx5_ib_page_fault", + WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, + MLX5_NUM_CMD_EQE); + if (!eq->wq) { + err = -ENOMEM; + goto err_mempool; + } + + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PFAULT_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + .nent = MLX5_IB_NUM_PF_EQE, + .context = eq, + .handler = mlx5_ib_eq_pf_int + }; + eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", ¶m); + if (IS_ERR(eq->core)) { + err = PTR_ERR(eq->core); + goto err_wq; + } + + return 0; +err_wq: + destroy_workqueue(eq->wq); +err_mempool: + mempool_destroy(eq->pool); + return err; +} + +static int +mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) +{ + int err; + + err = mlx5_eq_destroy_generic(dev->mdev, eq->core); + cancel_work_sync(&eq->work); + destroy_workqueue(eq->wq); + mempool_destroy(eq->pool); + + return err; +} + void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) { if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) @@ -1223,9 +1519,16 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) } } +static const struct ib_device_ops mlx5_ib_dev_odp_ops = { + .advise_mr = mlx5_ib_advise_mr, +}; + int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { - int ret; + int ret = 0; + + if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); @@ -1235,7 +1538,20 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) } } - return 0; + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return ret; + + ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq); + + return ret; +} + +void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev) +{ + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return; + + mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq); } int mlx5_ib_odp_init(void) @@ -1246,3 +1562,75 @@ int mlx5_ib_odp_init(void) return 0; } +struct prefetch_mr_work { + struct work_struct work; + struct mlx5_ib_dev *dev; + u32 pf_flags; + u32 num_sge; + struct ib_sge sg_list[0]; +}; + +static int mlx5_ib_prefetch_sg_list(struct mlx5_ib_dev *dev, u32 pf_flags, + struct ib_sge *sg_list, u32 num_sge) +{ + int i; + + for (i = 0; i < num_sge; ++i) { + struct ib_sge *sg = &sg_list[i]; + int bytes_committed = 0; + int ret; + + ret = pagefault_single_data_segment(dev, sg->lkey, sg->addr, + sg->length, + &bytes_committed, NULL, + pf_flags); + if (ret < 0) + return ret; + } + return 0; +} + +static void mlx5_ib_prefetch_mr_work(struct work_struct *work) +{ + struct prefetch_mr_work *w = + container_of(work, struct prefetch_mr_work, work); + + if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED) + mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list, + w->num_sge); + + kfree(w); +} + +int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + u32 pf_flags = MLX5_PF_FLAGS_PREFETCH; + struct prefetch_mr_work *work; + + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) + pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; + + if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) + return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list, + num_sge); + + if (dev->ib_dev.reg_state != IB_DEV_REGISTERED) + return -ENODEV; + + work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); + if (!work) + return -ENOMEM; + + memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); + + work->dev = dev; + work->pf_flags = pf_flags; + work->num_sge = num_sge; + + INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); + schedule_work(&work->work); + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 3747cc681b18..9c94c1b9ec35 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -108,21 +108,6 @@ static int is_sqp(enum ib_qp_type qp_type) return is_qp0(qp_type) || is_qp1(qp_type); } -static void *get_wqe(struct mlx5_ib_qp *qp, int offset) -{ - return mlx5_buf_offset(&qp->buf, offset); -} - -static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n) -{ - return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); -} - -void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n) -{ - return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE)); -} - /** * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space. * @@ -790,6 +775,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, __be64 *pas; void *qpc; int err; + u16 uid; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) { @@ -851,7 +837,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_umem; } - MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid); + uid = (attr->qp_type != IB_QPT_XRC_TGT) ? to_mpd(pd)->uid : 0; + MLX5_SET(create_qp_in, *in, uid, uid); pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); if (ubuffer->umem) mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); @@ -917,6 +904,30 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn); } +/* get_sq_edge - Get the next nearby edge. + * + * An 'edge' is defined as the first following address after the end + * of the fragment or the SQ. Accordingly, during the WQE construction + * which repetitively increases the pointer to write the next data, it + * simply should check if it gets to an edge. + * + * @sq - SQ buffer. + * @idx - Stride index in the SQ buffer. + * + * Return: + * The new edge. + */ +static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx) +{ + void *fragment_end; + + fragment_end = mlx5_frag_buf_get_wqe + (&sq->fbc, + mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx)); + + return fragment_end + MLX5_SEND_WQE_BB; +} + static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_qp *qp, @@ -955,13 +966,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); - err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf); + err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size, + &qp->buf, dev->mdev->priv.numa_node); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; } - qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); + if (qp->rq.wqe_cnt) + mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift, + ilog2(qp->rq.wqe_cnt), &qp->rq.fbc); + + if (qp->sq.wqe_cnt) { + int sq_strides_offset = (qp->sq.offset & (PAGE_SIZE - 1)) / + MLX5_SEND_WQE_BB; + mlx5_init_fbc_offset(qp->buf.frags + + (qp->sq.offset / PAGE_SIZE), + ilog2(MLX5_SEND_WQE_BB), + ilog2(qp->sq.wqe_cnt), + sq_strides_offset, &qp->sq.fbc); + + qp->sq.cur_edge = get_sq_edge(&qp->sq, 0); + } + *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages; *in = kvzalloc(*inlen, GFP_KERNEL); @@ -983,8 +1010,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->flags |= MLX5_IB_QP_SQPN_QP1; } - mlx5_fill_page_array(&qp->buf, - (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas)); + mlx5_fill_page_frag_array(&qp->buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, + *in, pas)); err = mlx5_db_alloc(dev->mdev, &qp->db); if (err) { @@ -1024,7 +1052,7 @@ err_free: kvfree(*in); err_buf: - mlx5_buf_free(dev->mdev, &qp->buf); + mlx5_frag_buf_free(dev->mdev, &qp->buf); return err; } @@ -1036,7 +1064,7 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) kvfree(qp->sq.wr_data); kvfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); - mlx5_buf_free(dev->mdev, &qp->buf); + mlx5_frag_buf_free(dev->mdev, &qp->buf); } static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) @@ -1876,7 +1904,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING; } - if (pd && pd->uobject) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { mlx5_ib_dbg(dev, "copy failed\n"); return -EFAULT; @@ -1889,7 +1917,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_QP_FLAG_BFREG_INDEX | MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI | - MLX5_QP_FLAG_ALLOW_SCATTER_CQE)) + MLX5_QP_FLAG_ALLOW_SCATTER_CQE | + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)) return -EINVAL; err = get_qp_user_index(to_mucontext(pd->uobject->context), @@ -1925,6 +1954,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; } + if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) { + if (init_attr->qp_type != IB_QPT_RC || + !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) { + mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n"); + return -EOPNOTSUPP; + } + qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT; + } + if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { if (init_attr->qp_type != IB_QPT_UD || (MLX5_CAP_GEN(dev->mdev, port_type) != @@ -1948,14 +1986,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->has_rq = qp_has_rq(init_attr); err = set_rq_size(dev, &init_attr->cap, qp->has_rq, - qp, (pd && pd->uobject) ? &ucmd : NULL); + qp, udata ? &ucmd : NULL); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; } if (pd) { - if (pd->uobject) { + if (udata) { __u32 max_wqes = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); @@ -2021,11 +2059,12 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, cd_slave_send, 1); if (qp->flags & MLX5_IB_QP_MANAGED_RECV) MLX5_SET(qpc, qpc, cd_slave_receive, 1); - + if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT) + MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1); if (qp->scat_cqe && is_connected(init_attr->qp_type)) { configure_responder_scat_cqe(init_attr, qpc); configure_requester_scat_cqe(dev, init_attr, - (pd && pd->uobject) ? &ucmd : NULL, + udata ? &ucmd : NULL, qpc); } @@ -2465,7 +2504,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, dev = to_mdev(pd->device); if (init_attr->qp_type == IB_QPT_RAW_PACKET) { - if (!pd->uobject) { + if (!udata) { mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n"); return ERR_PTR(-EINVAL); } else if (!to_mucontext(pd->uobject->context)->cqe_version) { @@ -2663,7 +2702,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) if (rate == IB_RATE_PORT_CURRENT) return 0; - if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) + if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS) return -EINVAL; while (rate != IB_RATE_PORT_CURRENT && @@ -3258,7 +3297,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (ibqp->qp_type == IB_QPT_RAW_PACKET) || (ibqp->qp_type == IB_QPT_XRC_INI) || (ibqp->qp_type == IB_QPT_XRC_TGT)) { - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { u8 p = mlx5_core_native_port_num(dev->mdev); tx_affinity = get_tx_affinity(dev, pd, base, p); context->flags |= cpu_to_be32(tx_affinity << 24); @@ -3475,7 +3514,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->sq.head = 0; qp->sq.tail = 0; qp->sq.cur_post = 0; - qp->sq.last_poll = 0; + if (qp->sq.wqe_cnt) + qp->sq.cur_edge = get_sq_edge(&qp->sq, 0); qp->db.db[MLX5_RCV_DBR] = 0; qp->db.db[MLX5_SND_DBR] = 0; } @@ -3515,7 +3555,7 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new return is_valid_mask(attr_mask, req, opt); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { req |= IB_QP_PATH_MTU; - opt = IB_QP_PKEY_INDEX; + opt = IB_QP_PKEY_INDEX | IB_QP_AV; return is_valid_mask(attr_mask, req, opt); } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | @@ -3749,6 +3789,62 @@ out: return err; } +static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + u32 idx; + + idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); + *cur_edge = get_sq_edge(sq, idx); + + *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); +} + +/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the + * next nearby edge and get new address translation for current WQE position. + * @sq - SQ buffer. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @cur_edge: Updated current edge. + */ +static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + if (likely(*seg != *cur_edge)) + return; + + _handle_post_send_edge(sq, seg, wqe_sz, cur_edge); +} + +/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's + * pointers. At the end @seg is aligned to 16B regardless the copied size. + * @sq - SQ buffer. + * @cur_edge: Updated current edge. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @src: Pointer to copy from. + * @n: Number of bytes to copy. + */ +static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, + void **seg, u32 *wqe_sz, const void *src, + size_t n) +{ + while (likely(n)) { + size_t leftlen = *cur_edge - *seg; + size_t copysz = min_t(size_t, leftlen, n); + size_t stride; + + memcpy(*seg, src, copysz); + + n -= copysz; + src += copysz; + stride = !n ? ALIGN(copysz, 16) : copysz; + *seg += stride; + *wqe_sz += stride >> 4; + handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); + } +} + static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) { struct mlx5_ib_cq *cq; @@ -3774,11 +3870,10 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, rseg->reserved = 0; } -static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, - const struct ib_send_wr *wr, void *qend, - struct mlx5_ib_qp *qp, int *size) +static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size, void **cur_edge) { - void *seg = eseg; + struct mlx5_wqe_eth_seg *eseg = *seg; memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); @@ -3786,45 +3881,41 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; - seg += sizeof(struct mlx5_wqe_eth_seg); - *size += sizeof(struct mlx5_wqe_eth_seg) / 16; - if (wr->opcode == IB_WR_LSO) { struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); - int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start); - u64 left, leftlen, copysz; + size_t left, copysz; void *pdata = ud_wr->header; + size_t stride; left = ud_wr->hlen; eseg->mss = cpu_to_be16(ud_wr->mss); eseg->inline_hdr.sz = cpu_to_be16(left); - /* - * check if there is space till the end of queue, if yes, - * copy all in one shot, otherwise copy till the end of queue, - * rollback and than the copy the left + /* memcpy_send_wqe should get a 16B align address. Hence, we + * first copy up to the current edge and then, if needed, + * fall-through to memcpy_send_wqe. */ - leftlen = qend - (void *)eseg->inline_hdr.start; - copysz = min_t(u64, leftlen, left); - - memcpy(seg - size_of_inl_hdr_start, pdata, copysz); - - if (likely(copysz > size_of_inl_hdr_start)) { - seg += ALIGN(copysz - size_of_inl_hdr_start, 16); - *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16; - } - - if (unlikely(copysz < left)) { /* the last wqe in the queue */ - seg = mlx5_get_send_wqe(qp, 0); + copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, + left); + memcpy(eseg->inline_hdr.start, pdata, copysz); + stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - + sizeof(eseg->inline_hdr.start) + copysz, 16); + *size += stride / 16; + *seg += stride; + + if (copysz < left) { + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); left -= copysz; pdata += copysz; - memcpy(seg, pdata, left); - seg += ALIGN(left, 16); - *size += ALIGN(left, 16) / 16; + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, + left); } + + return; } - return seg; + *seg += sizeof(struct mlx5_wqe_eth_seg); + *size += sizeof(struct mlx5_wqe_eth_seg) / 16; } static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, @@ -4083,24 +4174,6 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } -static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp, - struct mlx5_ib_mr *mr, int mr_list_size) -{ - void *qend = qp->sq.qend; - void *addr = mr->descs; - int copy; - - if (unlikely(seg + mr_list_size > qend)) { - copy = qend - seg; - memcpy(seg, addr, copy); - addr += copy; - mr_list_size -= copy; - seg = mlx5_get_send_wqe(qp, 0); - } - memcpy(seg, addr, mr_list_size); - seg += mr_list_size; -} - static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { @@ -4134,40 +4207,48 @@ static u8 wq_sig(void *wqe) } static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, - void *wqe, int *sz) + void **wqe, int *wqe_sz, void **cur_edge) { struct mlx5_wqe_inline_seg *seg; - void *qend = qp->sq.qend; - void *addr; + size_t offset; int inl = 0; - int copy; - int len; int i; - seg = wqe; - wqe += sizeof(*seg); + seg = *wqe; + *wqe += sizeof(*seg); + offset = sizeof(*seg); + for (i = 0; i < wr->num_sge; i++) { - addr = (void *)(unsigned long)(wr->sg_list[i].addr); - len = wr->sg_list[i].length; + size_t len = wr->sg_list[i].length; + void *addr = (void *)(unsigned long)(wr->sg_list[i].addr); + inl += len; if (unlikely(inl > qp->max_inline_data)) return -ENOMEM; - if (unlikely(wqe + len > qend)) { - copy = qend - wqe; - memcpy(wqe, addr, copy); - addr += copy; - len -= copy; - wqe = mlx5_get_send_wqe(qp, 0); + while (likely(len)) { + size_t leftlen; + size_t copysz; + + handle_post_send_edge(&qp->sq, wqe, + *wqe_sz + (offset >> 4), + cur_edge); + + leftlen = *cur_edge - *wqe; + copysz = min_t(size_t, leftlen, len); + + memcpy(*wqe, addr, copysz); + len -= copysz; + addr += copysz; + *wqe += copysz; + offset += copysz; } - memcpy(wqe, addr, len); - wqe += len; } seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); - *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16; + *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16; return 0; } @@ -4280,7 +4361,8 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, } static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, - struct mlx5_ib_qp *qp, void **seg, int *size) + struct mlx5_ib_qp *qp, void **seg, + int *size, void **cur_edge) { struct ib_sig_attrs *sig_attrs = wr->sig_attrs; struct ib_mr *sig_mr = wr->sig_mr; @@ -4364,8 +4446,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, *seg += wqe_size; *size += wqe_size / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); bsf = *seg; ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); @@ -4374,8 +4455,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, *seg += sizeof(*bsf); *size += sizeof(*bsf) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); return 0; } @@ -4413,7 +4493,8 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, static int set_sig_umr_wr(const struct ib_send_wr *send_wr, - struct mlx5_ib_qp *qp, void **seg, int *size) + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); @@ -4445,16 +4526,14 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr, set_sig_umr_segment(*seg, xlt_size); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - ret = set_sig_data_segment(wr, qp, seg, size); + ret = set_sig_data_segment(wr, qp, seg, size, cur_edge); if (ret) return ret; @@ -4491,11 +4570,11 @@ static int set_psv_wr(struct ib_sig_domain *domain, static int set_reg_wr(struct mlx5_ib_qp *qp, const struct ib_reg_wr *wr, - void **seg, int *size) + void **seg, int *size, void **cur_edge) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - int mr_list_size = mr->ndescs * mr->desc_size; + size_t mr_list_size = mr->ndescs * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { @@ -4507,18 +4586,17 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, set_reg_umr_seg(*seg, mr, umr_inline); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_reg_mkey_seg(*seg, mr, wr->key, wr->access); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); if (umr_inline) { - set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size); - *size += get_xlt_octo(mr_list_size); + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, + mr_list_size); + *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); } else { set_reg_data_seg(*seg, mr, pd); *seg += sizeof(struct mlx5_wqe_data_seg); @@ -4527,32 +4605,31 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, return 0; } -static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size) +static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { set_linv_umr_seg(*seg); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_linv_mkey_seg(*seg); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); } -static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) +static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) { __be32 *p = NULL; - int tidx = idx; + u32 tidx = idx; int i, j; - pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx)); + pr_debug("dump WQE index %u:\n", idx); for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { if ((i & 0xf) == 0) { - void *buf = mlx5_get_send_wqe(qp, tidx); tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); - p = buf; + p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx); + pr_debug("WQBB at %p:\n", (void *)p); j = 0; } pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), @@ -4562,15 +4639,16 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) } static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq, bool send_signaled, bool solicited) + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned int *idx, + int *size, void **cur_edge, int nreq, + bool send_signaled, bool solicited) { if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - *seg = mlx5_get_send_wqe(qp, *idx); + *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); *ctrl = *seg; *(uint32_t *)(*seg + 8) = 0; (*ctrl)->imm = send_ieth(wr); @@ -4580,6 +4658,7 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; + *cur_edge = qp->sq.cur_edge; return 0; } @@ -4587,17 +4666,18 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, const struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq) + int *size, void **cur_edge, int nreq) { - return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq, + return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, wr->send_flags & IB_SEND_SIGNALED, wr->send_flags & IB_SEND_SOLICITED); } static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, - u8 size, unsigned idx, u64 wr_id, - int nreq, u8 fence, u32 mlx5_opcode) + void *seg, u8 size, void *cur_edge, + unsigned int idx, u64 wr_id, int nreq, u8 fence, + u32 mlx5_opcode) { u8 opmod = 0; @@ -4613,6 +4693,15 @@ static void finish_wqe(struct mlx5_ib_qp *qp, qp->sq.wqe_head[idx] = qp->sq.head + nreq; qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); qp->sq.w_list[idx].next = qp->sq.cur_post; + + /* We save the edge which was possibly updated during the WQE + * construction, into SQ's cache. + */ + seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB); + qp->sq.cur_edge = (unlikely(seg == cur_edge)) ? + get_sq_edge(&qp->sq, qp->sq.cur_post & + (qp->sq.wqe_cnt - 1)) : + cur_edge; } static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -4623,11 +4712,10 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; - struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; + void *cur_edge; int uninitialized_var(size); - void *qend; unsigned long flags; unsigned idx; int err = 0; @@ -4649,7 +4737,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, qp = to_mqp(ibqp); bf = &qp->bf; - qend = qp->sq.qend; spin_lock_irqsave(&qp->sq.lock, flags); @@ -4669,7 +4756,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, + nreq); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4719,14 +4807,15 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, case IB_WR_LOCAL_INV: qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); - set_linv_wr(qp, &seg, &size); + set_linv_wr(qp, &seg, &size, &cur_edge); num_sge = 0; break; case IB_WR_REG_MR: qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); - err = set_reg_wr(qp, reg_wr(wr), &seg, &size); + err = set_reg_wr(qp, reg_wr(wr), &seg, &size, + &cur_edge); if (err) { *bad_wr = wr; goto out; @@ -4739,21 +4828,24 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, mr = to_mmr(sig_handover_wr(wr)->sig_mr); ctrl->imm = cpu_to_be32(mr->ibmr.rkey); - err = set_sig_umr_wr(wr, qp, &seg, &size); + err = set_sig_umr_wr(wr, qp, &seg, &size, + &cur_edge); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_UMR); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_UMR); /* * SET_PSV WQEs are not signaled and solicited * on error */ err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, nreq, false, true); + &size, &cur_edge, nreq, false, + true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4770,10 +4862,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_SET_PSV); err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, nreq, false, true); + &size, &cur_edge, nreq, false, + true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4790,8 +4884,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_SET_PSV); qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; num_sge = 0; goto skip_psv; @@ -4828,16 +4923,14 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); + break; case IB_QPT_UD: set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); /* handle qp that supports ud offload */ if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { @@ -4847,11 +4940,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); seg += sizeof(struct mlx5_wqe_eth_pad); size += sizeof(struct mlx5_wqe_eth_pad) / 16; - - seg = set_eth_seg(seg, wr, qend, qp, &size); - - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + set_eth_seg(wr, qp, &seg, &size, &cur_edge); + handle_post_send_edge(&qp->sq, &seg, size, + &cur_edge); } break; case MLX5_IB_QPT_REG_UMR: @@ -4867,13 +4958,11 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); set_reg_mkey_segment(seg, wr); seg += sizeof(struct mlx5_mkey_seg); size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); break; default: @@ -4881,33 +4970,29 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, } if (wr->send_flags & IB_SEND_INLINE && num_sge) { - int uninitialized_var(sz); - - err = set_data_inl_seg(qp, wr, seg, &sz); + err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge); if (unlikely(err)) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - size += sz; } else { - dpseg = seg; for (i = 0; i < num_sge; i++) { - if (unlikely(dpseg == qend)) { - seg = mlx5_get_send_wqe(qp, 0); - dpseg = seg; - } + handle_post_send_edge(&qp->sq, &seg, size, + &cur_edge); if (likely(wr->sg_list[i].length)) { - set_data_ptr_seg(dpseg, wr->sg_list + i); + set_data_ptr_seg + ((struct mlx5_wqe_data_seg *)seg, + wr->sg_list + i); size += sizeof(struct mlx5_wqe_data_seg) / 16; - dpseg++; + seg += sizeof(struct mlx5_wqe_data_seg); } } } qp->next_fence = next_fence; - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, - mlx5_ib_opcode[wr->opcode]); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, + fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) dump_wqe(qp, idx, size); @@ -4993,7 +5078,7 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, goto out; } - scat = get_recv_wqe(qp, ind); + scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind); if (qp->wq_sig) scat++; @@ -5441,7 +5526,6 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_xrcd *xrcd; int err; - u16 uid; if (!MLX5_CAP_GEN(dev->mdev, xrc)) return ERR_PTR(-ENOSYS); @@ -5450,14 +5534,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, if (!xrcd) return ERR_PTR(-ENOMEM); - uid = context ? to_mucontext(context)->devx_uid : 0; - err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid); + err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); if (err) { kfree(xrcd); return ERR_PTR(-ENOMEM); } - xrcd->uid = uid; return &xrcd->ibxrcd; } @@ -5465,10 +5547,9 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; - u16 uid = to_mxrcd(xrcd)->uid; int err; - err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid); + err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); if (err) mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index d012e7dbcc38..4e8d18009f58 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -1,50 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. */ #include <linux/module.h> #include <linux/mlx5/qp.h> -#include <linux/mlx5/srq.h> #include <linux/slab.h> #include <rdma/ib_umem.h> #include <rdma/ib_user_verbs.h> - #include "mlx5_ib.h" - -/* not supported currently */ -static int srq_signature; +#include "srq.h" static void *get_wqe(struct mlx5_ib_srq *srq, int n) { - return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); + return mlx5_frag_buf_get_wqe(&srq->fbc, n); } static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) @@ -144,7 +113,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; in->page_offset = offset; - in->uid = to_mpd(pd)->uid; + in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) in->user_index = uidx; @@ -173,12 +142,16 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, return err; } - if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) { + if (mlx5_frag_buf_alloc_node(dev->mdev, buf_size, &srq->buf, + dev->mdev->priv.numa_node)) { mlx5_ib_dbg(dev, "buf alloc failed\n"); err = -ENOMEM; goto err_db; } + mlx5_init_fbc(srq->buf.frags, srq->msrq.wqe_shift, ilog2(srq->msrq.max), + &srq->fbc); + srq->head = 0; srq->tail = srq->msrq.max - 1; srq->wqe_ctr = 0; @@ -195,14 +168,14 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, err = -ENOMEM; goto err_buf; } - mlx5_fill_page_array(&srq->buf, in->pas); + mlx5_fill_page_frag_array(&srq->buf, in->pas); srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL); if (!srq->wrid) { err = -ENOMEM; goto err_in; } - srq->wq_sig = !!srq_signature; + srq->wq_sig = 0; in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && @@ -215,7 +188,7 @@ err_in: kvfree(in->pas); err_buf: - mlx5_buf_free(dev->mdev, &srq->buf); + mlx5_frag_buf_free(dev->mdev, &srq->buf); err_db: mlx5_db_free(dev->mdev, &srq->db); @@ -232,7 +205,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) { kvfree(srq->wrid); - mlx5_buf_free(dev->mdev, &srq->buf); + mlx5_frag_buf_free(dev->mdev, &srq->buf); mlx5_db_free(dev->mdev, &srq->db); } @@ -287,14 +260,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, } in.type = init_attr->srq_type; - if (pd->uobject) + if (udata) err = create_srq_user(pd, srq, &in, udata, buf_size); else err = create_srq_kernel(dev, srq, &in, buf_size); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", - pd->uobject ? "user" : "kernel", err); + udata ? "user" : "kernel", err); goto err_srq; } @@ -327,7 +300,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, in.pd = to_mpd(pd)->pdn; in.db_record = srq->db.dma; - err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in); + err = mlx5_cmd_create_srq(dev, &srq->msrq, &in); kvfree(in.pas); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); @@ -339,7 +312,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, srq->msrq.event = mlx5_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (pd->uobject) + if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { mlx5_ib_dbg(dev, "copy to user failed\n"); err = -EFAULT; @@ -351,10 +324,10 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, return &srq->ibsrq; err_core: - mlx5_core_destroy_srq(dev->mdev, &srq->msrq); + mlx5_cmd_destroy_srq(dev, &srq->msrq); err_usr_kern_srq: - if (pd->uobject) + if (udata) destroy_srq_user(pd, srq); else destroy_srq_kernel(dev, srq); @@ -381,7 +354,7 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return -EINVAL; mutex_lock(&srq->mutex); - ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1); + ret = mlx5_cmd_arm_srq(dev, &srq->msrq, attr->srq_limit, 1); mutex_unlock(&srq->mutex); if (ret) @@ -402,7 +375,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) if (!out) return -ENOMEM; - ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out); + ret = mlx5_cmd_query_srq(dev, &srq->msrq, out); if (ret) goto out_box; @@ -420,7 +393,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq) struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); - mlx5_core_destroy_srq(dev->mdev, &msrq->msrq); + mlx5_cmd_destroy_srq(dev, &msrq->msrq); if (srq->uobject) { mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h new file mode 100644 index 000000000000..75eb5839ae95 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved. + */ + +#ifndef MLX5_IB_SRQ_H +#define MLX5_IB_SRQ_H + +enum { + MLX5_SRQ_FLAG_ERR = (1 << 0), + MLX5_SRQ_FLAG_WQ_SIG = (1 << 1), + MLX5_SRQ_FLAG_RNDV = (1 << 2), +}; + +struct mlx5_srq_attr { + u32 type; + u32 flags; + u32 log_size; + u32 wqe_shift; + u32 log_page_size; + u32 wqe_cnt; + u32 srqn; + u32 xrcd; + u32 page_offset; + u32 cqn; + u32 pd; + u32 lwm; + u32 user_index; + u64 db_record; + __be64 *pas; + u32 tm_log_list_size; + u32 tm_next_tag; + u32 tm_hw_phase_cnt; + u32 tm_sw_phase_cnt; + u16 uid; +}; + +struct mlx5_ib_dev; + +struct mlx5_core_srq { + struct mlx5_core_rsc_common common; /* must be first */ + u32 srqn; + int max; + size_t max_gs; + size_t max_avail_gather; + int wqe_shift; + void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e); + + atomic_t refcount; + struct completion free; + u16 uid; +}; + +struct mlx5_srq_table { + struct notifier_block nb; + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in); +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); +int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out); +int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq); +struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn); + +int mlx5_init_srq_table(struct mlx5_ib_dev *dev); +void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev); +#endif /* MLX5_IB_SRQ_H */ diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c new file mode 100644 index 000000000000..7aaaffbd4afa --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -0,0 +1,722 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <linux/kernel.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_ib.h" +#include "srq.h" + +static int get_pas_size(struct mlx5_srq_attr *in) +{ + u32 log_page_size = in->log_page_size + 12; + u32 log_srq_size = in->log_size; + u32 log_rq_stride = in->wqe_shift; + u32 page_offset = in->page_offset; + u32 po_quanta = 1 << (log_page_size - 6); + u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); + u32 page_size = 1 << log_page_size; + u32 rq_sz_po = rq_sz + (page_offset * po_quanta); + u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size); + + return rq_num_pas * sizeof(u64); +} + +static void set_wq(void *wq, struct mlx5_srq_attr *in) +{ + MLX5_SET(wq, wq, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); + MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); + MLX5_SET(wq, wq, log_wq_sz, in->log_size); + MLX5_SET(wq, wq, page_offset, in->page_offset); + MLX5_SET(wq, wq, lwm, in->lwm); + MLX5_SET(wq, wq, pd, in->pd); + MLX5_SET64(wq, wq, dbr_addr, in->db_record); +} + +static void set_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + MLX5_SET(srqc, srqc, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); + MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); + MLX5_SET(srqc, srqc, log_srq_size, in->log_size); + MLX5_SET(srqc, srqc, page_offset, in->page_offset); + MLX5_SET(srqc, srqc, lwm, in->lwm); + MLX5_SET(srqc, srqc, pd, in->pd); + MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); + MLX5_SET(srqc, srqc, xrcd, in->xrcd); + MLX5_SET(srqc, srqc, cqn, in->cqn); +} + +static void get_wq(void *wq, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(wq, wq, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); + in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; + in->log_size = MLX5_GET(wq, wq, log_wq_sz); + in->page_offset = MLX5_GET(wq, wq, page_offset); + in->lwm = MLX5_GET(wq, wq, lwm); + in->pd = MLX5_GET(wq, wq, pd); + in->db_record = MLX5_GET64(wq, wq, dbr_addr); +} + +static void get_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(srqc, srqc, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); + in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); + in->log_size = MLX5_GET(srqc, srqc, log_srq_size); + in->page_offset = MLX5_GET(srqc, srqc, page_offset); + in->lwm = MLX5_GET(srqc, srqc, lwm); + in->pd = MLX5_GET(srqc, srqc, pd); + in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); +} + +struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) +{ + struct mlx5_srq_table *table = &dev->srq_table; + struct mlx5_core_srq *srq; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + return srq; +} + +static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; + void *create_in; + void *srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + MLX5_SET(create_srq_in, create_in, uid, in->uid); + srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); + pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); + + set_srqc(srqc, in); + memcpy(pas, in->pas, pas_size); + + MLX5_SET(create_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_SRQ); + + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) { + srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); + srq->uid = in->uid; + } + + return err; +} + +static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; + + MLX5_SET(destroy_srq_in, srq_in, opcode, + MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + sizeof(srq_out)); +} + +static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + + MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); + MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, srq_in, lwm, lwm); + MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + sizeof(srq_out)); +} + +static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 *srq_out; + void *srqc; + int err; + + srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); + if (!srq_out) + return -ENOMEM; + + MLX5_SET(query_srq_in, srq_in, opcode, + MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); + err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + MLX5_ST_SZ_BYTES(query_srq_out)); + if (err) + goto out; + + srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + get_srqc(srqc, out); + if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; +out: + kvfree(srq_out); + return err; +} + +static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + void *create_in; + void *xrc_srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, + xrc_srq_context_entry); + pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); + + set_srqc(xrc_srqc, in); + MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); + memcpy(pas, in->pas, pas_size); + MLX5_SET(create_xrc_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(create_out, 0, sizeof(create_out)); + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, + sizeof(create_out)); + if (err) + goto out; + + srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); + srq->uid = in->uid; +out: + kvfree(create_in); + return err; +} + +static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; + + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; + + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 *xrcsrq_out; + void *xrc_srqc; + int err; + + xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); + if (!xrcsrq_out) + return -ENOMEM; + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + + MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + + err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (err) + goto out; + + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, + xrc_srq_context_entry); + get_srqc(xrc_srqc, out); + if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(xrcsrq_out); + return err; +} + +static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + void *create_out = NULL; + void *create_in = NULL; + void *rmpc; + void *wq; + int pas_size; + int outlen; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; + outlen = MLX5_ST_SZ_BYTES(create_rmp_out); + create_in = kvzalloc(inlen, GFP_KERNEL); + create_out = kvzalloc(outlen, GFP_KERNEL); + if (!create_in || !create_out) { + err = -ENOMEM; + goto out; + } + + rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(create_rmp_in, create_in, uid, in->uid); + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + + MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); + if (!err) { + srq->srqn = MLX5_GET(create_rmp_out, create_out, rmpn); + srq->uid = in->uid; + } + +out: + kvfree(create_in); + kvfree(create_out); + return err; +} + +static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(destroy_rmp_in, in, uid, srq->uid); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm) +{ + void *out = NULL; + void *in = NULL; + void *rmpc; + void *wq; + void *bitmask; + int outlen; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rmp_in); + outlen = MLX5_ST_SZ_BYTES(modify_rmp_out); + + in = kvzalloc(inlen, GFP_KERNEL); + out = kvzalloc(outlen, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(modify_rmp_in, in, uid, srq->uid); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); + + err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen); + +out: + kvfree(in); + kvfree(out); + return err; +} + +static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 *rmp_out = NULL; + u32 *rmp_in = NULL; + void *rmpc; + int outlen; + int inlen; + int err; + + outlen = MLX5_ST_SZ_BYTES(query_rmp_out); + inlen = MLX5_ST_SZ_BYTES(query_rmp_in); + + rmp_out = kvzalloc(outlen, GFP_KERNEL); + rmp_in = kvzalloc(inlen, GFP_KERNEL); + if (!rmp_out || !rmp_in) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP); + MLX5_SET(query_rmp_in, rmp_in, rmpn, srq->srqn); + err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen); + if (err) + goto out; + + rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); + get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); + if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(rmp_out); + kvfree(rmp_in); + return err; +} + +static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; + void *create_in; + void *xrqc; + void *wq; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); + wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + + if (in->type == IB_SRQT_TM) { + MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); + if (in->flags & MLX5_SRQ_FLAG_RNDV) + MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); + MLX5_SET(xrqc, xrqc, + tag_matching_topology_context.log_matching_list_sz, + in->tm_log_list_size); + } + MLX5_SET(xrqc, xrqc, user_index, in->user_index); + MLX5_SET(xrqc, xrqc, cqn, in->cqn); + MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); + MLX5_SET(create_xrq_in, create_in, uid, in->uid); + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) { + srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); + srq->uid = in->uid; + } + + return err; +} + +static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; + + MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + MLX5_SET(destroy_xrq_in, in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_xrq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); + MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; + u32 *xrq_out; + int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); + void *xrqc; + int err; + + xrq_out = kvzalloc(outlen, GFP_KERNEL); + if (!xrq_out) + return -ENOMEM; + + MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); + MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen); + if (err) + goto out; + + xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); + get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); + if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + out->tm_next_tag = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.append_next_index); + out->tm_hw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.hw_phase_cnt); + out->tm_sw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.sw_phase_cnt); + +out: + kvfree(xrq_out); + return err; +} + +static int create_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + if (!dev->mdev->issi) + return create_srq_cmd(dev, srq, in); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return create_xrc_srq_cmd(dev, srq, in); + case MLX5_RES_XRQ: + return create_xrq_cmd(dev, srq, in); + default: + return create_rmp_cmd(dev, srq, in); + } +} + +static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + if (!dev->mdev->issi) + return destroy_srq_cmd(dev, srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return destroy_xrc_srq_cmd(dev, srq); + case MLX5_RES_XRQ: + return destroy_xrq_cmd(dev, srq); + default: + return destroy_rmp_cmd(dev, srq); + } +} + +int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + struct mlx5_srq_table *table = &dev->srq_table; + int err; + + switch (in->type) { + case IB_SRQT_XRC: + srq->common.res = MLX5_RES_XSRQ; + break; + case IB_SRQT_TM: + srq->common.res = MLX5_RES_XRQ; + break; + default: + srq->common.res = MLX5_RES_SRQ; + } + + err = create_srq_split(dev, srq, in); + if (err) + return err; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, srq->srqn, srq); + spin_unlock_irq(&table->lock); + if (err) + goto err_destroy_srq_split; + + return 0; + +err_destroy_srq_split: + destroy_srq_split(dev, srq); + + return err; +} + +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + struct mlx5_srq_table *table = &dev->srq_table; + struct mlx5_core_srq *tmp; + int err; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, srq->srqn); + spin_unlock_irq(&table->lock); + if (!tmp || tmp != srq) + return -EINVAL; + + err = destroy_srq_split(dev, srq); + if (err) + return err; + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + return 0; +} + +int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + if (!dev->mdev->issi) + return query_srq_cmd(dev, srq, out); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return query_xrc_srq_cmd(dev, srq, out); + case MLX5_RES_XRQ: + return query_xrq_cmd(dev, srq, out); + default: + return query_rmp_cmd(dev, srq, out); + } +} + +int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + if (!dev->mdev->issi) + return arm_srq_cmd(dev, srq, lwm, is_srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return arm_xrc_srq_cmd(dev, srq, lwm); + case MLX5_RES_XRQ: + return arm_xrq_cmd(dev, srq, lwm); + default: + return arm_rmp_cmd(dev, srq, lwm); + } +} + +static int srq_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_srq_table *table; + struct mlx5_core_srq *srq; + struct mlx5_eqe *eqe; + u32 srqn; + + if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR && + type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT) + return NOTIFY_DONE; + + table = container_of(nb, struct mlx5_srq_table, nb); + + eqe = data; + srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + if (!srq) + return NOTIFY_OK; + + srq->event(srq, eqe->type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + + return NOTIFY_OK; +} + +int mlx5_init_srq_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_srq_table *table = &dev->srq_table; + + memset(table, 0, sizeof(*table)); + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + + table->nb.notifier_call = srq_event_notifier; + mlx5_notifier_register(dev->mdev, &table->nb); + + return 0; +} + +void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_srq_table *table = &dev->srq_table; + + mlx5_notifier_unregister(dev->mdev, &table->nb); +} diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 220a3e4717a3..bfd4eebc1182 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -510,7 +510,8 @@ int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe); int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, - struct ib_srq_attr *attr, struct mthca_srq *srq); + struct ib_srq_attr *attr, struct mthca_srq *srq, + struct ib_udata *udata); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); @@ -547,7 +548,8 @@ int mthca_alloc_qp(struct mthca_dev *dev, enum ib_qp_type type, enum ib_sig_type send_policy, struct ib_qp_cap *cap, - struct mthca_qp *qp); + struct mthca_qp *qp, + struct ib_udata *udata); int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, @@ -556,7 +558,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp); + struct mthca_sqp *sqp, + struct ib_udata *udata); void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp); int mthca_create_ah(struct mthca_dev *dev, struct mthca_pd *pd, diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 2e5dc0a67cfc..7ad517da4917 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -89,13 +89,13 @@ static void update_sm_ah(struct mthca_dev *dev, rdma_ah_set_port_num(&ah_attr, port_num); new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, - &ah_attr); + &ah_attr, 0); if (IS_ERR(new_ah)) return; spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) - rdma_destroy_ah(dev->sm_ah[port_num - 1]); + rdma_destroy_ah(dev->sm_ah[port_num - 1], 0); dev->sm_ah[port_num - 1] = new_ah; spin_unlock_irqrestore(&dev->sm_lock, flags); } @@ -347,6 +347,7 @@ void mthca_free_agents(struct mthca_dev *dev) } if (dev->sm_ah[p]) - rdma_destroy_ah(dev->sm_ah[p]); + rdma_destroy_ah(dev->sm_ah[p], + RDMA_DESTROY_AH_SLEEPABLE); } } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 691c6f048938..82cb6b71ac7c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -412,6 +412,7 @@ static int mthca_dealloc_pd(struct ib_pd *pd) static struct ib_ah *mthca_ah_create(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { @@ -431,7 +432,7 @@ static struct ib_ah *mthca_ah_create(struct ib_pd *pd, return &ah->ibah; } -static int mthca_ah_destroy(struct ib_ah *ah) +static int mthca_ah_destroy(struct ib_ah *ah, u32 flags) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); kfree(ah); @@ -455,7 +456,7 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, if (!srq) return ERR_PTR(-ENOMEM); - if (pd->uobject) { + if (udata) { context = to_mucontext(pd->uobject->context); if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -475,9 +476,9 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, } err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), - &init_attr->attr, srq); + &init_attr->attr, srq, udata); - if (err && pd->uobject) + if (err && udata) mthca_unmap_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.db_index); @@ -537,7 +538,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, if (!qp) return ERR_PTR(-ENOMEM); - if (pd->uobject) { + if (udata) { context = to_mucontext(pd->uobject->context); if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -574,9 +575,9 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), init_attr->qp_type, init_attr->sq_sig_type, - &init_attr->cap, qp); + &init_attr->cap, qp, udata); - if (err && pd->uobject) { + if (err && udata) { context = to_mucontext(pd->uobject->context); mthca_unmap_user_db(to_mdev(pd->device), @@ -596,7 +597,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_GSI: { /* Don't allow userspace to create special QPs */ - if (pd->uobject) + if (udata) return ERR_PTR(-EINVAL); qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); @@ -610,7 +611,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, to_mcq(init_attr->recv_cq), init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, - to_msqp(qp)); + to_msqp(qp), udata); break; } default: @@ -1193,6 +1194,81 @@ static void get_dev_fw_str(struct ib_device *device, char *str) (int) dev->fw_ver & 0xffff); } +static const struct ib_device_ops mthca_dev_ops = { + .alloc_pd = mthca_alloc_pd, + .alloc_ucontext = mthca_alloc_ucontext, + .attach_mcast = mthca_multicast_attach, + .create_ah = mthca_ah_create, + .create_cq = mthca_create_cq, + .create_qp = mthca_create_qp, + .dealloc_pd = mthca_dealloc_pd, + .dealloc_ucontext = mthca_dealloc_ucontext, + .dereg_mr = mthca_dereg_mr, + .destroy_ah = mthca_ah_destroy, + .destroy_cq = mthca_destroy_cq, + .destroy_qp = mthca_destroy_qp, + .detach_mcast = mthca_multicast_detach, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = mthca_get_dma_mr, + .get_port_immutable = mthca_port_immutable, + .mmap = mthca_mmap_uar, + .modify_device = mthca_modify_device, + .modify_port = mthca_modify_port, + .modify_qp = mthca_modify_qp, + .poll_cq = mthca_poll_cq, + .process_mad = mthca_process_mad, + .query_ah = mthca_ah_query, + .query_device = mthca_query_device, + .query_gid = mthca_query_gid, + .query_pkey = mthca_query_pkey, + .query_port = mthca_query_port, + .query_qp = mthca_query_qp, + .reg_user_mr = mthca_reg_user_mr, + .resize_cq = mthca_resize_cq, +}; + +static const struct ib_device_ops mthca_dev_arbel_srq_ops = { + .create_srq = mthca_create_srq, + .destroy_srq = mthca_destroy_srq, + .modify_srq = mthca_modify_srq, + .post_srq_recv = mthca_arbel_post_srq_recv, + .query_srq = mthca_query_srq, +}; + +static const struct ib_device_ops mthca_dev_tavor_srq_ops = { + .create_srq = mthca_create_srq, + .destroy_srq = mthca_destroy_srq, + .modify_srq = mthca_modify_srq, + .post_srq_recv = mthca_tavor_post_srq_recv, + .query_srq = mthca_query_srq, +}; + +static const struct ib_device_ops mthca_dev_arbel_fmr_ops = { + .alloc_fmr = mthca_alloc_fmr, + .dealloc_fmr = mthca_dealloc_fmr, + .map_phys_fmr = mthca_arbel_map_phys_fmr, + .unmap_fmr = mthca_unmap_fmr, +}; + +static const struct ib_device_ops mthca_dev_tavor_fmr_ops = { + .alloc_fmr = mthca_alloc_fmr, + .dealloc_fmr = mthca_dealloc_fmr, + .map_phys_fmr = mthca_tavor_map_phys_fmr, + .unmap_fmr = mthca_unmap_fmr, +}; + +static const struct ib_device_ops mthca_dev_arbel_ops = { + .post_recv = mthca_arbel_post_receive, + .post_send = mthca_arbel_post_send, + .req_notify_cq = mthca_arbel_arm_cq, +}; + +static const struct ib_device_ops mthca_dev_tavor_ops = { + .post_recv = mthca_tavor_post_receive, + .post_send = mthca_tavor_post_send, + .req_notify_cq = mthca_tavor_arm_cq, +}; + int mthca_register_device(struct mthca_dev *dev) { int ret; @@ -1226,26 +1302,8 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; - dev->ib_dev.query_device = mthca_query_device; - dev->ib_dev.query_port = mthca_query_port; - dev->ib_dev.modify_device = mthca_modify_device; - dev->ib_dev.modify_port = mthca_modify_port; - dev->ib_dev.query_pkey = mthca_query_pkey; - dev->ib_dev.query_gid = mthca_query_gid; - dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; - dev->ib_dev.mmap = mthca_mmap_uar; - dev->ib_dev.alloc_pd = mthca_alloc_pd; - dev->ib_dev.dealloc_pd = mthca_dealloc_pd; - dev->ib_dev.create_ah = mthca_ah_create; - dev->ib_dev.query_ah = mthca_ah_query; - dev->ib_dev.destroy_ah = mthca_ah_destroy; if (dev->mthca_flags & MTHCA_FLAG_SRQ) { - dev->ib_dev.create_srq = mthca_create_srq; - dev->ib_dev.modify_srq = mthca_modify_srq; - dev->ib_dev.query_srq = mthca_query_srq; - dev->ib_dev.destroy_srq = mthca_destroy_srq; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | @@ -1253,48 +1311,28 @@ int mthca_register_device(struct mthca_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); if (mthca_is_memfree(dev)) - dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_arbel_srq_ops); else - dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_tavor_srq_ops); } - dev->ib_dev.create_qp = mthca_create_qp; - dev->ib_dev.modify_qp = mthca_modify_qp; - dev->ib_dev.query_qp = mthca_query_qp; - dev->ib_dev.destroy_qp = mthca_destroy_qp; - dev->ib_dev.create_cq = mthca_create_cq; - dev->ib_dev.resize_cq = mthca_resize_cq; - dev->ib_dev.destroy_cq = mthca_destroy_cq; - dev->ib_dev.poll_cq = mthca_poll_cq; - dev->ib_dev.get_dma_mr = mthca_get_dma_mr; - dev->ib_dev.reg_user_mr = mthca_reg_user_mr; - dev->ib_dev.dereg_mr = mthca_dereg_mr; - dev->ib_dev.get_port_immutable = mthca_port_immutable; - dev->ib_dev.get_dev_fw_str = get_dev_fw_str; - if (dev->mthca_flags & MTHCA_FLAG_FMR) { - dev->ib_dev.alloc_fmr = mthca_alloc_fmr; - dev->ib_dev.unmap_fmr = mthca_unmap_fmr; - dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr; if (mthca_is_memfree(dev)) - dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_arbel_fmr_ops); else - dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_tavor_fmr_ops); } - dev->ib_dev.attach_mcast = mthca_multicast_attach; - dev->ib_dev.detach_mcast = mthca_multicast_detach; - dev->ib_dev.process_mad = mthca_process_mad; + ib_set_device_ops(&dev->ib_dev, &mthca_dev_ops); - if (mthca_is_memfree(dev)) { - dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq; - dev->ib_dev.post_send = mthca_arbel_post_send; - dev->ib_dev.post_recv = mthca_arbel_post_receive; - } else { - dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq; - dev->ib_dev.post_send = mthca_tavor_post_send; - dev->ib_dev.post_recv = mthca_tavor_post_receive; - } + if (mthca_is_memfree(dev)) + ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_ops); + else + ib_set_device_ops(&dev->ib_dev, &mthca_dev_tavor_ops); mutex_init(&dev->cap_mask_mutex); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 9d178ee3c96a..4e5b5cc17f1d 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -981,7 +981,8 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev, */ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, struct mthca_pd *pd, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int size; int err = -ENOMEM; @@ -1048,7 +1049,7 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, * allocate anything. All we need is to calculate the WQE * sizes and the send_wqe_offset, so we're done now. */ - if (pd->ibpd.uobject) + if (udata) return 0; size = PAGE_ALIGN(qp->send_wqe_offset + @@ -1155,7 +1156,8 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int ret; int i; @@ -1178,7 +1180,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, if (ret) return ret; - ret = mthca_alloc_wqe_buf(dev, pd, qp); + ret = mthca_alloc_wqe_buf(dev, pd, qp, udata); if (ret) { mthca_unmap_memfree(dev, qp); return ret; @@ -1191,7 +1193,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, * will be allocated and buffers will be initialized in * userspace. */ - if (pd->ibpd.uobject) + if (udata) return 0; ret = mthca_alloc_memfree(dev, qp); @@ -1285,7 +1287,8 @@ int mthca_alloc_qp(struct mthca_dev *dev, enum ib_qp_type type, enum ib_sig_type send_policy, struct ib_qp_cap *cap, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int err; @@ -1308,7 +1311,7 @@ int mthca_alloc_qp(struct mthca_dev *dev, qp->port = 0; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, qp); + send_policy, qp, udata); if (err) { mthca_free(&dev->qp_table.alloc, qp->qpn); return err; @@ -1360,7 +1363,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp) + struct mthca_sqp *sqp, + struct ib_udata *udata) { u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; @@ -1391,7 +1395,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev, sqp->qp.transport = MLX; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, &sqp->qp); + send_policy, &sqp->qp, udata); if (err) goto err_out_free; diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 9a3fc6fb0d7e..b8333c79e3fa 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -95,7 +95,8 @@ static inline int *wqe_to_link(void *wqe) static void mthca_tavor_init_srq_context(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq, - struct mthca_tavor_srq_context *context) + struct mthca_tavor_srq_context *context, + bool is_user) { memset(context, 0, sizeof *context); @@ -103,7 +104,7 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev, context->state_pd = cpu_to_be32(pd->pd_num); context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); - if (pd->ibpd.uobject) + if (is_user) context->uar = cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); else @@ -113,7 +114,8 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev, static void mthca_arbel_init_srq_context(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq, - struct mthca_arbel_srq_context *context) + struct mthca_arbel_srq_context *context, + bool is_user) { int logsize, max; @@ -129,7 +131,7 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev, context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); context->db_index = cpu_to_be32(srq->db_index); context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29); - if (pd->ibpd.uobject) + if (is_user) context->logstride_usrpage |= cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); else @@ -145,14 +147,14 @@ static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq) } static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, - struct mthca_srq *srq) + struct mthca_srq *srq, struct ib_udata *udata) { struct mthca_data_seg *scatter; void *wqe; int err; int i; - if (pd->ibpd.uobject) + if (udata) return 0; srq->wrid = kmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); @@ -197,7 +199,8 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, } int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, - struct ib_srq_attr *attr, struct mthca_srq *srq) + struct ib_srq_attr *attr, struct mthca_srq *srq, + struct ib_udata *udata) { struct mthca_mailbox *mailbox; int ds; @@ -235,7 +238,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (err) goto err_out; - if (!pd->ibpd.uobject) { + if (!udata) { srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, srq->srqn, &srq->db); if (srq->db_index < 0) { @@ -251,7 +254,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, goto err_out_db; } - err = mthca_alloc_srq_buf(dev, pd, srq); + err = mthca_alloc_srq_buf(dev, pd, srq, udata); if (err) goto err_out_mailbox; @@ -261,9 +264,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, mutex_init(&srq->mutex); if (mthca_is_memfree(dev)) - mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); + mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf, udata); else - mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); + mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf, udata); err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn); @@ -297,14 +300,14 @@ err_out_free_srq: mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); err_out_free_buf: - if (!pd->ibpd.uobject) + if (!udata) mthca_free_srq_buf(dev, srq); err_out_mailbox: mthca_free_mailbox(dev, mailbox); err_out_db: - if (!pd->ibpd.uobject && mthca_is_memfree(dev)) + if (!udata && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); err_out_icm: diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 2b67ace5b614..032883180f65 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -3033,7 +3033,7 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) /* Need to free the Last Streaming Mode Message */ if (nesqp->ietf_frame) { if (nesqp->lsmm_mr) - nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr); + nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr); pci_free_consistent(nesdev->pcidev, nesqp->private_data_len + nesqp->ietf_frame_size, nesqp->ietf_frame, nesqp->ietf_frame_pbase); diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index e96ffff61c3a..cc4dce5c3e5f 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -223,11 +223,11 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp } old_skb = skb; - skb = skb->next; + skb = skb_peek_next(skb, &nesqp->pau_list); skb_unlink(old_skb, &nesqp->pau_list); nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE); nes_rem_ref_cm_node(nesqp->cm_node); - if (skb == (struct sk_buff *)&nesqp->pau_list) + if (!skb) goto out; } return skb; @@ -551,14 +551,14 @@ static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct ne /* Queue skb by sequence number */ if (skb_queue_len(&nesqp->pau_list) == 0) { - skb_queue_head(&nesqp->pau_list, skb); + __skb_queue_head(&nesqp->pau_list, skb); } else { skb_queue_walk(&nesqp->pau_list, tmpskb) { cb = (struct nes_rskb_cb *)&tmpskb->cb[0]; if (before(seqnum, cb->seqnum)) break; } - skb_insert(tmpskb, skb, &nesqp->pau_list); + __skb_insert(skb, tmpskb->prev, tmpskb, &nesqp->pau_list); } if (nesqp->pau_state == PAU_READY) process_it = true; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 92d1cadd4cfd..4e7f08ee1907 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1066,7 +1066,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } if (req.user_qp_buffer) nesqp->nesuqp_addr = req.user_qp_buffer; - if ((ibpd->uobject) && (ibpd->uobject->context)) { + if (udata && (ibpd->uobject->context)) { nesqp->user_mode = 1; nes_ucontext = to_nesucontext(ibpd->uobject->context); if (virt_wqs) { @@ -1257,7 +1257,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_put_cqp_request(nesdev, cqp_request); - if (ibpd->uobject) { + if (udata) { uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; uresp.mmap_rq_db_index = 0; uresp.actual_sq_size = sq_size; @@ -3627,6 +3627,39 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); } +static const struct ib_device_ops nes_dev_ops = { + .alloc_mr = nes_alloc_mr, + .alloc_mw = nes_alloc_mw, + .alloc_pd = nes_alloc_pd, + .alloc_ucontext = nes_alloc_ucontext, + .create_cq = nes_create_cq, + .create_qp = nes_create_qp, + .dealloc_mw = nes_dealloc_mw, + .dealloc_pd = nes_dealloc_pd, + .dealloc_ucontext = nes_dealloc_ucontext, + .dereg_mr = nes_dereg_mr, + .destroy_cq = nes_destroy_cq, + .destroy_qp = nes_destroy_qp, + .drain_rq = nes_drain_rq, + .drain_sq = nes_drain_sq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = nes_get_dma_mr, + .get_port_immutable = nes_port_immutable, + .map_mr_sg = nes_map_mr_sg, + .mmap = nes_mmap, + .modify_qp = nes_modify_qp, + .poll_cq = nes_poll_cq, + .post_recv = nes_post_recv, + .post_send = nes_post_send, + .query_device = nes_query_device, + .query_gid = nes_query_gid, + .query_pkey = nes_query_pkey, + .query_port = nes_query_port, + .query_qp = nes_query_qp, + .reg_user_mr = nes_reg_user_mr, + .req_notify_cq = nes_req_notify_cq, +}; + /** * nes_init_ofa_device */ @@ -3673,36 +3706,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.phys_port_cnt = 1; nesibdev->ibdev.num_comp_vectors = 1; nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev; - nesibdev->ibdev.query_device = nes_query_device; - nesibdev->ibdev.query_port = nes_query_port; - nesibdev->ibdev.query_pkey = nes_query_pkey; - nesibdev->ibdev.query_gid = nes_query_gid; - nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext; - nesibdev->ibdev.dealloc_ucontext = nes_dealloc_ucontext; - nesibdev->ibdev.mmap = nes_mmap; - nesibdev->ibdev.alloc_pd = nes_alloc_pd; - nesibdev->ibdev.dealloc_pd = nes_dealloc_pd; - nesibdev->ibdev.create_qp = nes_create_qp; - nesibdev->ibdev.modify_qp = nes_modify_qp; - nesibdev->ibdev.query_qp = nes_query_qp; - nesibdev->ibdev.destroy_qp = nes_destroy_qp; - nesibdev->ibdev.create_cq = nes_create_cq; - nesibdev->ibdev.destroy_cq = nes_destroy_cq; - nesibdev->ibdev.poll_cq = nes_poll_cq; - nesibdev->ibdev.get_dma_mr = nes_get_dma_mr; - nesibdev->ibdev.reg_user_mr = nes_reg_user_mr; - nesibdev->ibdev.dereg_mr = nes_dereg_mr; - nesibdev->ibdev.alloc_mw = nes_alloc_mw; - nesibdev->ibdev.dealloc_mw = nes_dealloc_mw; - - nesibdev->ibdev.alloc_mr = nes_alloc_mr; - nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; - - nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; - nesibdev->ibdev.post_send = nes_post_send; - nesibdev->ibdev.post_recv = nes_post_recv; - nesibdev->ibdev.drain_sq = nes_drain_sq; - nesibdev->ibdev.drain_rq = nes_drain_rq; nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); if (nesibdev->ibdev.iwcm == NULL) { @@ -3717,8 +3720,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->reject = nes_reject; nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; - nesibdev->ibdev.get_port_immutable = nes_port_immutable; - nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str; + + ib_set_device_ops(&nesibdev->ibdev, &nes_dev_ops); memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, sizeof(nesibdev->ibdev.iwcm->ifname)); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 58188fe5aed2..a7295322efbc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -157,7 +157,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, } struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { u32 *ahid_addr; int status; @@ -219,7 +219,7 @@ av_err: return ERR_PTR(status); } -int ocrdma_destroy_ah(struct ib_ah *ibah) +int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) { struct ocrdma_ah *ah = get_ocrdma_ah(ibah); struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index c0c32c9b80ae..eb996e14b520 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -52,8 +52,8 @@ enum { }; struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); -int ocrdma_destroy_ah(struct ib_ah *ah); + u32 flags, struct ib_udata *udata); +int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 873cc7f6fe61..1f393842453a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -143,6 +143,50 @@ static const struct attribute_group ocrdma_attr_group = { .attrs = ocrdma_attributes, }; +static const struct ib_device_ops ocrdma_dev_ops = { + .alloc_mr = ocrdma_alloc_mr, + .alloc_pd = ocrdma_alloc_pd, + .alloc_ucontext = ocrdma_alloc_ucontext, + .create_ah = ocrdma_create_ah, + .create_cq = ocrdma_create_cq, + .create_qp = ocrdma_create_qp, + .dealloc_pd = ocrdma_dealloc_pd, + .dealloc_ucontext = ocrdma_dealloc_ucontext, + .dereg_mr = ocrdma_dereg_mr, + .destroy_ah = ocrdma_destroy_ah, + .destroy_cq = ocrdma_destroy_cq, + .destroy_qp = ocrdma_destroy_qp, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = ocrdma_get_dma_mr, + .get_link_layer = ocrdma_link_layer, + .get_netdev = ocrdma_get_netdev, + .get_port_immutable = ocrdma_port_immutable, + .map_mr_sg = ocrdma_map_mr_sg, + .mmap = ocrdma_mmap, + .modify_port = ocrdma_modify_port, + .modify_qp = ocrdma_modify_qp, + .poll_cq = ocrdma_poll_cq, + .post_recv = ocrdma_post_recv, + .post_send = ocrdma_post_send, + .process_mad = ocrdma_process_mad, + .query_ah = ocrdma_query_ah, + .query_device = ocrdma_query_device, + .query_pkey = ocrdma_query_pkey, + .query_port = ocrdma_query_port, + .query_qp = ocrdma_query_qp, + .reg_user_mr = ocrdma_reg_user_mr, + .req_notify_cq = ocrdma_arm_cq, + .resize_cq = ocrdma_resize_cq, +}; + +static const struct ib_device_ops ocrdma_dev_srq_ops = { + .create_srq = ocrdma_create_srq, + .destroy_srq = ocrdma_destroy_srq, + .modify_srq = ocrdma_modify_srq, + .post_srq_recv = ocrdma_post_srq_recv, + .query_srq = ocrdma_query_srq, +}; + static int ocrdma_register_device(struct ocrdma_dev *dev) { ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); @@ -182,50 +226,10 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->eq_cnt; - /* mandatory verbs. */ - dev->ibdev.query_device = ocrdma_query_device; - dev->ibdev.query_port = ocrdma_query_port; - dev->ibdev.modify_port = ocrdma_modify_port; - dev->ibdev.get_netdev = ocrdma_get_netdev; - dev->ibdev.get_link_layer = ocrdma_link_layer; - dev->ibdev.alloc_pd = ocrdma_alloc_pd; - dev->ibdev.dealloc_pd = ocrdma_dealloc_pd; - - dev->ibdev.create_cq = ocrdma_create_cq; - dev->ibdev.destroy_cq = ocrdma_destroy_cq; - dev->ibdev.resize_cq = ocrdma_resize_cq; - - dev->ibdev.create_qp = ocrdma_create_qp; - dev->ibdev.modify_qp = ocrdma_modify_qp; - dev->ibdev.query_qp = ocrdma_query_qp; - dev->ibdev.destroy_qp = ocrdma_destroy_qp; - - dev->ibdev.query_pkey = ocrdma_query_pkey; - dev->ibdev.create_ah = ocrdma_create_ah; - dev->ibdev.destroy_ah = ocrdma_destroy_ah; - dev->ibdev.query_ah = ocrdma_query_ah; - - dev->ibdev.poll_cq = ocrdma_poll_cq; - dev->ibdev.post_send = ocrdma_post_send; - dev->ibdev.post_recv = ocrdma_post_recv; - dev->ibdev.req_notify_cq = ocrdma_arm_cq; - - dev->ibdev.get_dma_mr = ocrdma_get_dma_mr; - dev->ibdev.dereg_mr = ocrdma_dereg_mr; - dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; - - dev->ibdev.alloc_mr = ocrdma_alloc_mr; - dev->ibdev.map_mr_sg = ocrdma_map_mr_sg; - /* mandatory to support user space verbs consumer. */ - dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext; - dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext; - dev->ibdev.mmap = ocrdma_mmap; dev->ibdev.dev.parent = &dev->nic_info.pdev->dev; - dev->ibdev.process_mad = ocrdma_process_mad; - dev->ibdev.get_port_immutable = ocrdma_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_str; + ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops); if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= @@ -235,11 +239,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) OCRDMA_UVERBS(DESTROY_SRQ) | OCRDMA_UVERBS(POST_SRQ_RECV); - dev->ibdev.create_srq = ocrdma_create_srq; - dev->ibdev.modify_srq = ocrdma_modify_srq; - dev->ibdev.query_srq = ocrdma_query_srq; - dev->ibdev.destroy_srq = ocrdma_destroy_srq; - dev->ibdev.post_srq_recv = ocrdma_post_srq_recv; + ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); } rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 290d776edf48..dd15474b19b7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -760,12 +760,13 @@ static const struct file_operations ocrdma_dbg_ops = { void ocrdma_add_port_stats(struct ocrdma_dev *dev) { + const struct pci_dev *pdev = dev->nic_info.pdev; + if (!ocrdma_dbgfs_dir) return; /* Create post stats base dir */ - dev->dir = - debugfs_create_dir(dev_name(&dev->ibdev.dev), ocrdma_dbgfs_dir); + dev->dir = debugfs_create_dir(pci_name(pdev), ocrdma_dbgfs_dir); if (!dev->dir) goto err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 06d2a7f3304c..c46bed0c5513 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -177,11 +177,6 @@ int ocrdma_query_port(struct ib_device *ibdev, /* props being zeroed by the caller, avoid zeroing it here */ dev = get_ocrdma_dev(ibdev); - if (port > 1) { - pr_err("%s(%d) invalid_port=0x%x\n", __func__, - dev->id, port); - return -EINVAL; - } netdev = dev->nic_info.netdev; if (netif_running(netdev) && netif_oper_up(netdev)) { port_state = IB_PORT_ACTIVE; @@ -215,13 +210,6 @@ int ocrdma_query_port(struct ib_device *ibdev, int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { - struct ocrdma_dev *dev; - - dev = get_ocrdma_dev(ibdev); - if (port > 1) { - pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port); - return -EINVAL; - } return 0; } @@ -1169,7 +1157,8 @@ static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) } static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, - struct ib_qp_init_attr *attrs) + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) { if ((attrs->qp_type != IB_QPT_GSI) && (attrs->qp_type != IB_QPT_RC) && @@ -1217,7 +1206,7 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, return -EINVAL; } /* unprivileged user space cannot create special QP */ - if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + if (udata && attrs->qp_type == IB_QPT_GSI) { pr_err ("%s(%d) Userspace can't create special QPs of type=0x%x\n", __func__, dev->id, attrs->qp_type); @@ -1374,7 +1363,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, struct ocrdma_create_qp_ureq ureq; u16 dpp_credit_lmt, dpp_offset; - status = ocrdma_check_qp_params(ibpd, dev, attrs); + status = ocrdma_check_qp_params(ibpd, dev, attrs, udata); if (status) goto gen_err; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 8d6ff9df49fe..75940e2a8791 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -160,12 +160,16 @@ static const struct attribute_group qedr_attr_group = { .attrs = qedr_attributes, }; +static const struct ib_device_ops qedr_iw_dev_ops = { + .get_port_immutable = qedr_iw_port_immutable, + .query_gid = qedr_iw_query_gid, +}; + static int qedr_iw_register_device(struct qedr_dev *dev) { dev->ibdev.node_type = RDMA_NODE_RNIC; - dev->ibdev.query_gid = qedr_iw_query_gid; - dev->ibdev.get_port_immutable = qedr_iw_port_immutable; + ib_set_device_ops(&dev->ibdev, &qedr_iw_dev_ops); dev->ibdev.iwcm = kzalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); if (!dev->ibdev.iwcm) @@ -186,13 +190,56 @@ static int qedr_iw_register_device(struct qedr_dev *dev) return 0; } +static const struct ib_device_ops qedr_roce_dev_ops = { + .get_port_immutable = qedr_roce_port_immutable, +}; + static void qedr_roce_register_device(struct qedr_dev *dev) { dev->ibdev.node_type = RDMA_NODE_IB_CA; - dev->ibdev.get_port_immutable = qedr_roce_port_immutable; + ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); } +static const struct ib_device_ops qedr_dev_ops = { + .alloc_mr = qedr_alloc_mr, + .alloc_pd = qedr_alloc_pd, + .alloc_ucontext = qedr_alloc_ucontext, + .create_ah = qedr_create_ah, + .create_cq = qedr_create_cq, + .create_qp = qedr_create_qp, + .create_srq = qedr_create_srq, + .dealloc_pd = qedr_dealloc_pd, + .dealloc_ucontext = qedr_dealloc_ucontext, + .dereg_mr = qedr_dereg_mr, + .destroy_ah = qedr_destroy_ah, + .destroy_cq = qedr_destroy_cq, + .destroy_qp = qedr_destroy_qp, + .destroy_srq = qedr_destroy_srq, + .get_dev_fw_str = qedr_get_dev_fw_str, + .get_dma_mr = qedr_get_dma_mr, + .get_link_layer = qedr_link_layer, + .get_netdev = qedr_get_netdev, + .map_mr_sg = qedr_map_mr_sg, + .mmap = qedr_mmap, + .modify_port = qedr_modify_port, + .modify_qp = qedr_modify_qp, + .modify_srq = qedr_modify_srq, + .poll_cq = qedr_poll_cq, + .post_recv = qedr_post_recv, + .post_send = qedr_post_send, + .post_srq_recv = qedr_post_srq_recv, + .process_mad = qedr_process_mad, + .query_device = qedr_query_device, + .query_pkey = qedr_query_pkey, + .query_port = qedr_query_port, + .query_qp = qedr_query_qp, + .query_srq = qedr_query_srq, + .reg_user_mr = qedr_reg_user_mr, + .req_notify_cq = qedr_arm_cq, + .resize_cq = qedr_resize_cq, +}; + static int qedr_register_device(struct qedr_dev *dev) { int rc; @@ -237,57 +284,11 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->num_cnq; - - dev->ibdev.query_device = qedr_query_device; - dev->ibdev.query_port = qedr_query_port; - dev->ibdev.modify_port = qedr_modify_port; - - dev->ibdev.alloc_ucontext = qedr_alloc_ucontext; - dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext; - dev->ibdev.mmap = qedr_mmap; - - dev->ibdev.alloc_pd = qedr_alloc_pd; - dev->ibdev.dealloc_pd = qedr_dealloc_pd; - - dev->ibdev.create_cq = qedr_create_cq; - dev->ibdev.destroy_cq = qedr_destroy_cq; - dev->ibdev.resize_cq = qedr_resize_cq; - dev->ibdev.req_notify_cq = qedr_arm_cq; - - dev->ibdev.create_qp = qedr_create_qp; - dev->ibdev.modify_qp = qedr_modify_qp; - dev->ibdev.query_qp = qedr_query_qp; - dev->ibdev.destroy_qp = qedr_destroy_qp; - - dev->ibdev.create_srq = qedr_create_srq; - dev->ibdev.destroy_srq = qedr_destroy_srq; - dev->ibdev.modify_srq = qedr_modify_srq; - dev->ibdev.query_srq = qedr_query_srq; - dev->ibdev.post_srq_recv = qedr_post_srq_recv; - dev->ibdev.query_pkey = qedr_query_pkey; - - dev->ibdev.create_ah = qedr_create_ah; - dev->ibdev.destroy_ah = qedr_destroy_ah; - - dev->ibdev.get_dma_mr = qedr_get_dma_mr; - dev->ibdev.dereg_mr = qedr_dereg_mr; - dev->ibdev.reg_user_mr = qedr_reg_user_mr; - dev->ibdev.alloc_mr = qedr_alloc_mr; - dev->ibdev.map_mr_sg = qedr_map_mr_sg; - - dev->ibdev.poll_cq = qedr_poll_cq; - dev->ibdev.post_send = qedr_post_send; - dev->ibdev.post_recv = qedr_post_recv; - - dev->ibdev.process_mad = qedr_process_mad; - - dev->ibdev.get_netdev = qedr_get_netdev; - dev->ibdev.dev.parent = &dev->pdev->dev; - dev->ibdev.get_link_layer = qedr_link_layer; - dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str; rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group); + ib_set_device_ops(&dev->ibdev, &qedr_dev_ops); + dev->ibdev.driver_id = RDMA_DRIVER_QEDR; return ib_register_device(&dev->ibdev, "qedr%d", NULL); } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 82ee4b4a7084..b342a70e2814 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -216,10 +216,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) struct qed_rdma_port *rdma_port; dev = get_qedr_dev(ibdev); - if (port > 1) { - DP_ERR(dev, "invalid_port=0x%x\n", port); - return -EINVAL; - } if (!dev->rdma_ctx) { DP_ERR(dev, "rdma_ctx is NULL\n"); @@ -263,14 +259,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { - struct qedr_dev *dev; - - dev = get_qedr_dev(ibdev); - if (port > 1) { - DP_ERR(dev, "invalid_port=0x%x\n", port); - return -EINVAL; - } - return 0; } @@ -1148,7 +1136,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, } static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, - struct ib_qp_init_attr *attrs) + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) { struct qedr_device_attr *qattr = &dev->attr; @@ -1189,7 +1178,7 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, } /* Unprivileged user space cannot create special QP */ - if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + if (udata && attrs->qp_type == IB_QPT_GSI) { DP_ERR(dev, "create qp: userspace can't create special QPs of type=0x%x\n", attrs->qp_type); @@ -1552,7 +1541,7 @@ int qedr_destroy_srq(struct ib_srq *ibsrq) in_params.srq_id = srq->srq_id; dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); - if (ibsrq->pd->uobject) + if (ibsrq->uobject) qedr_free_srq_user_params(srq); else qedr_free_srq_kernel_params(srq); @@ -2005,7 +1994,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", udata ? "user library" : "kernel", pd); - rc = qedr_check_qp_attrs(ibpd, dev, attrs); + rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata); if (rc) return ERR_PTR(rc); @@ -2626,7 +2615,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp) } struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct qedr_ah *ah; @@ -2639,7 +2628,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, return &ah->ibah; } -int qedr_destroy_ah(struct ib_ah *ibah) +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags) { struct qedr_ah *ah = get_qedr_ah(ibah); diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 0b7d0124b16c..1852b7012bf4 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -76,8 +76,8 @@ int qedr_destroy_srq(struct ib_srq *ibsrq); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata); -int qedr_destroy_ah(struct ib_ah *ibah); + u32 flags, struct ib_udata *udata); +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags); int qedr_dereg_mr(struct ib_mr *); struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index fb1ff59f40bd..cdbf707fa267 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3237,7 +3237,6 @@ static int init_6120_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_6120_tidtemplate(dd); diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 163a57a88742..9fde45538f6e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4043,7 +4043,6 @@ static int qib_init_7220_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7220_tidtemplate(dd); @@ -4252,7 +4251,6 @@ static int init_sdma_7220_regs(struct qib_pportdata *ppd) unsigned word = i / 64; unsigned bit = i & 63; - BUG_ON(word >= 3); senddmabufmask[word] |= 1ULL << bit; } qib_write_kreg(dd, kr_senddmabufmask0, senddmabufmask[0]); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index bf5e222eed8e..17d6b24b3473 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1382,7 +1382,6 @@ static void err_decode(char *msg, size_t len, u64 errs, *msg++ = ','; len--; } - BUG_ON(!msp->sz); /* msp->sz counts the nul */ took = min_t(size_t, msp->sz - (size_t)1, len); memcpy(msg, msp->msg, took); @@ -6599,7 +6598,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ dd->rcvegrbufsize = max(mtu, 2048); - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7322_tidtemplate(dd); @@ -6904,7 +6902,6 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd) unsigned word = erstbuf / BITS_PER_LONG; unsigned bit = erstbuf & (BITS_PER_LONG - 1); - BUG_ON(word >= 3); senddmabufmask[word] |= 1ULL << bit; } qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]); diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index d7cdc77d6306..9fd69903ca57 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -209,7 +209,6 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + rcd->rcvegrbufs_perchunk - 1) / rcd->rcvegrbufs_perchunk; - BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); rcd->rcvegrbufs_perchunk_shift = ilog2(rcd->rcvegrbufs_perchunk); } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 4845d000c22f..f92faf5ec369 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2494,5 +2494,6 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx) del_timer_sync(&dd->pport[port_idx].cong_stats.timer); if (dd->pport[port_idx].ibport_data.smi_ah) - rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah); + rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah, + RDMA_DESTROY_AH_SLEEPABLE); } diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 30595b358d8f..864f2af171f7 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -387,7 +387,7 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) static int qib_pcie_coalesce; module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO); -MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); +MODULE_PARM_DESC(pcie_coalesce, "tune PCIe coalescing on some Intel chipsets"); /* * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300 diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 757d4c9d713d..3d64081c4819 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -572,12 +572,13 @@ retry: len = sge->length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); dw = (len + 3) >> 2; addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr, dw << 2, DMA_TO_DEVICE); - if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) + if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) { + ret = -ENOMEM; goto unmap; + } sdmadesc[0] = 0; make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset); /* SDmaUseLargeBuf has to be set in every descriptor */ diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 4d4c31ea4e2d..868da0ece7ba 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -178,7 +178,6 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) len = length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false); sge->vaddr += len; sge->length -= len; diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 926f3c8eba69..31c523b2a9f5 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -237,7 +237,6 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, sdma_rb_node); - BUG_ON(ret == 0); } pq->sdma_rb_node = sdma_rb_node; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 4b0f5761a646..276304f611ab 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -150,7 +150,6 @@ static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length) len = length; if (len > sge.sge_length) len = sge.sge_length; - BUG_ON(len == 0); if (((long) sge.vaddr & (sizeof(u32) - 1)) || (len != length && (len & (sizeof(u32) - 1)))) { ndesc = 0; @@ -193,7 +192,6 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) len = length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); memcpy(data, sge->vaddr, len); sge->vaddr += len; sge->length -= len; @@ -449,7 +447,6 @@ static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss, len = length; if (len > ss->sge.sge_length) len = ss->sge.sge_length; - BUG_ON(len == 0); /* If the source address is not aligned, try to align it. */ off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); if (off) { @@ -1365,7 +1362,7 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) rcu_read_lock(); qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) - ah = rdma_create_ah(qp0->ibqp.pd, &attr); + ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0); rcu_read_unlock(); return ah; } @@ -1496,6 +1493,11 @@ static void qib_fill_device_attr(struct qib_devdata *dd) dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode; } +static const struct ib_device_ops qib_dev_ops = { + .modify_device = qib_modify_device, + .process_mad = qib_process_mad, +}; + /** * qib_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1558,8 +1560,6 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->node_guid = ppd->guid; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; - ibdev->modify_device = qib_modify_device; - ibdev->process_mad = qib_process_mad; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), "Intel Infiniband HCA %s", init_utsname()->nodename); @@ -1627,6 +1627,7 @@ int qib_register_ib_device(struct qib_devdata *dd) } rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group); + ib_set_device_ops(ibdev, &qib_dev_ops); ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB); if (ret) goto err_tx; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 73bd00f8d2c8..b2323a52a0dd 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -330,6 +330,37 @@ static void usnic_get_dev_fw_str(struct ib_device *device, char *str) snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } +static const struct ib_device_ops usnic_dev_ops = { + .alloc_pd = usnic_ib_alloc_pd, + .alloc_ucontext = usnic_ib_alloc_ucontext, + .create_ah = usnic_ib_create_ah, + .create_cq = usnic_ib_create_cq, + .create_qp = usnic_ib_create_qp, + .dealloc_pd = usnic_ib_dealloc_pd, + .dealloc_ucontext = usnic_ib_dealloc_ucontext, + .dereg_mr = usnic_ib_dereg_mr, + .destroy_ah = usnic_ib_destroy_ah, + .destroy_cq = usnic_ib_destroy_cq, + .destroy_qp = usnic_ib_destroy_qp, + .get_dev_fw_str = usnic_get_dev_fw_str, + .get_dma_mr = usnic_ib_get_dma_mr, + .get_link_layer = usnic_ib_port_link_layer, + .get_netdev = usnic_get_netdev, + .get_port_immutable = usnic_port_immutable, + .mmap = usnic_ib_mmap, + .modify_qp = usnic_ib_modify_qp, + .poll_cq = usnic_ib_poll_cq, + .post_recv = usnic_ib_post_recv, + .post_send = usnic_ib_post_send, + .query_device = usnic_ib_query_device, + .query_gid = usnic_ib_query_gid, + .query_pkey = usnic_ib_query_pkey, + .query_port = usnic_ib_query_port, + .query_qp = usnic_ib_query_qp, + .reg_user_mr = usnic_ib_reg_mr, + .req_notify_cq = usnic_ib_req_notify_cq, +}; + /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { @@ -386,35 +417,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); - us_ibdev->ib_dev.query_device = usnic_ib_query_device; - us_ibdev->ib_dev.query_port = usnic_ib_query_port; - us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey; - us_ibdev->ib_dev.query_gid = usnic_ib_query_gid; - us_ibdev->ib_dev.get_netdev = usnic_get_netdev; - us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer; - us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd; - us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd; - us_ibdev->ib_dev.create_qp = usnic_ib_create_qp; - us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp; - us_ibdev->ib_dev.query_qp = usnic_ib_query_qp; - us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp; - us_ibdev->ib_dev.create_cq = usnic_ib_create_cq; - us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq; - us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr; - us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr; - us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext; - us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext; - us_ibdev->ib_dev.mmap = usnic_ib_mmap; - us_ibdev->ib_dev.create_ah = usnic_ib_create_ah; - us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah; - us_ibdev->ib_dev.post_send = usnic_ib_post_send; - us_ibdev->ib_dev.post_recv = usnic_ib_post_recv; - us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq; - us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; - us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; - us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable; - us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str; - + ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops); us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC; rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); @@ -649,7 +652,7 @@ static int __init usnic_ib_init(void) err = usnic_uiom_init(DRV_NAME); if (err) { - usnic_err("Unable to initalize umem with err %d\n", err); + usnic_err("Unable to initialize umem with err %d\n", err); return err; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index bf5136533d49..0cdb156e165e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -681,7 +681,7 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport], res_spec); if (err) { - usnic_err("Spec does not meet miniumum req for transport %d\n", + usnic_err("Spec does not meet minimum req for transport %d\n", transport); log_spec(res_spec); return ERR_PTR(err); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 0b91ff36768a..1d4abef17e38 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -336,13 +336,16 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, usnic_dbg("\n"); - mutex_lock(&us_ibdev->usdev_lock); if (ib_get_eth_speed(ibdev, port, &props->active_speed, - &props->active_width)) { - mutex_unlock(&us_ibdev->usdev_lock); + &props->active_width)) return -EINVAL; - } + /* + * usdev_lock is acquired after (and not before) ib_get_eth_speed call + * because acquiring rtnl_lock in ib_get_eth_speed, while holding + * usdev_lock could lead to a deadlock. + */ + mutex_lock(&us_ibdev->usdev_lock); /* props being zeroed by the caller, avoid zeroing it here */ props->lid = 0; @@ -760,6 +763,7 @@ int usnic_ib_mmap(struct ib_ucontext *context, /* In ib callbacks section - Start of stub funcs */ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { @@ -767,7 +771,7 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, return ERR_PTR(-EPERM); } -int usnic_ib_destroy_ah(struct ib_ah *ah) +int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags) { usnic_dbg("\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 2a2c9beb715f..e33144261b9a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -77,9 +77,10 @@ int usnic_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); -int usnic_ib_destroy_ah(struct ib_ah *ah); +int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags); int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 398443f43dc3..eaa109dbc96a 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -161,6 +161,49 @@ static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev, return netdev; } +static const struct ib_device_ops pvrdma_dev_ops = { + .add_gid = pvrdma_add_gid, + .alloc_mr = pvrdma_alloc_mr, + .alloc_pd = pvrdma_alloc_pd, + .alloc_ucontext = pvrdma_alloc_ucontext, + .create_ah = pvrdma_create_ah, + .create_cq = pvrdma_create_cq, + .create_qp = pvrdma_create_qp, + .dealloc_pd = pvrdma_dealloc_pd, + .dealloc_ucontext = pvrdma_dealloc_ucontext, + .del_gid = pvrdma_del_gid, + .dereg_mr = pvrdma_dereg_mr, + .destroy_ah = pvrdma_destroy_ah, + .destroy_cq = pvrdma_destroy_cq, + .destroy_qp = pvrdma_destroy_qp, + .get_dev_fw_str = pvrdma_get_fw_ver_str, + .get_dma_mr = pvrdma_get_dma_mr, + .get_link_layer = pvrdma_port_link_layer, + .get_netdev = pvrdma_get_netdev, + .get_port_immutable = pvrdma_port_immutable, + .map_mr_sg = pvrdma_map_mr_sg, + .mmap = pvrdma_mmap, + .modify_port = pvrdma_modify_port, + .modify_qp = pvrdma_modify_qp, + .poll_cq = pvrdma_poll_cq, + .post_recv = pvrdma_post_recv, + .post_send = pvrdma_post_send, + .query_device = pvrdma_query_device, + .query_gid = pvrdma_query_gid, + .query_pkey = pvrdma_query_pkey, + .query_port = pvrdma_query_port, + .query_qp = pvrdma_query_qp, + .reg_user_mr = pvrdma_reg_user_mr, + .req_notify_cq = pvrdma_req_notify_cq, +}; + +static const struct ib_device_ops pvrdma_dev_srq_ops = { + .create_srq = pvrdma_create_srq, + .destroy_srq = pvrdma_destroy_srq, + .modify_srq = pvrdma_modify_srq, + .query_srq = pvrdma_query_srq, +}; + static int pvrdma_register_device(struct pvrdma_dev *dev) { int ret = -1; @@ -197,39 +240,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; - dev->ib_dev.query_device = pvrdma_query_device; - dev->ib_dev.query_port = pvrdma_query_port; - dev->ib_dev.query_gid = pvrdma_query_gid; - dev->ib_dev.query_pkey = pvrdma_query_pkey; - dev->ib_dev.modify_port = pvrdma_modify_port; - dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext; - dev->ib_dev.mmap = pvrdma_mmap; - dev->ib_dev.alloc_pd = pvrdma_alloc_pd; - dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd; - dev->ib_dev.create_ah = pvrdma_create_ah; - dev->ib_dev.destroy_ah = pvrdma_destroy_ah; - dev->ib_dev.create_qp = pvrdma_create_qp; - dev->ib_dev.modify_qp = pvrdma_modify_qp; - dev->ib_dev.query_qp = pvrdma_query_qp; - dev->ib_dev.destroy_qp = pvrdma_destroy_qp; - dev->ib_dev.post_send = pvrdma_post_send; - dev->ib_dev.post_recv = pvrdma_post_recv; - dev->ib_dev.create_cq = pvrdma_create_cq; - dev->ib_dev.destroy_cq = pvrdma_destroy_cq; - dev->ib_dev.poll_cq = pvrdma_poll_cq; - dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; - dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr; - dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr; - dev->ib_dev.dereg_mr = pvrdma_dereg_mr; - dev->ib_dev.alloc_mr = pvrdma_alloc_mr; - dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg; - dev->ib_dev.add_gid = pvrdma_add_gid; - dev->ib_dev.del_gid = pvrdma_del_gid; - dev->ib_dev.get_netdev = pvrdma_get_netdev; - dev->ib_dev.get_port_immutable = pvrdma_port_immutable; - dev->ib_dev.get_link_layer = pvrdma_port_link_layer; - dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str; + ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops); mutex_init(&dev->port_mutex); spin_lock_init(&dev->desc_lock); @@ -255,10 +266,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - dev->ib_dev.create_srq = pvrdma_create_srq; - dev->ib_dev.modify_srq = pvrdma_modify_srq; - dev->ib_dev.query_srq = pvrdma_query_srq; - dev->ib_dev.destroy_srq = pvrdma_destroy_srq; + ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, sizeof(struct pvrdma_srq *), diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index cf22f57a9f0d..3acf74cbe266 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -249,7 +249,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, init_completion(&qp->free); qp->state = IB_QPS_RESET; - qp->is_kernel = !(pd->uobject && udata); + qp->is_kernel = !udata; if (!qp->is_kernel) { dev_dbg(&dev->pdev->dev, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index dc0ce877c7a3..06ba7c7a2235 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -111,7 +111,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, unsigned long flags; int ret; - if (!(pd->uobject && udata)) { + if (!udata) { /* No support for kernel clients. */ dev_warn(&dev->pdev->dev, "no shared receive queue support for kernel client\n"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index b65d10b0a875..4d238d0e484b 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -533,11 +533,12 @@ int pvrdma_dealloc_pd(struct ib_pd *pd) * @pd: the protection domain * @ah_attr: the attributes of the AH * @udata: user data blob + * @flags: create address handle flags (see enum rdma_create_ah_flags) * * @return: the ib_ah pointer on success, otherwise errno. */ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(pd->device); struct pvrdma_ah *ah; @@ -555,7 +556,7 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) return ERR_PTR(-ENOMEM); - ah = kzalloc(sizeof(*ah), GFP_KERNEL); + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) { atomic_dec(&dev->num_ahs); return ERR_PTR(-ENOMEM); @@ -581,10 +582,11 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, /** * pvrdma_destroy_ah - destroy an address handle * @ah: the address handle to destroyed + * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * * @return: 0 on success. */ -int pvrdma_destroy_ah(struct ib_ah *ah) +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) { struct pvrdma_dev *dev = to_vdev(ah->device); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index b2e3ab50cb08..f7f758d60110 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -420,8 +420,8 @@ int pvrdma_destroy_cq(struct ib_cq *cq); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); -int pvrdma_destroy_ah(struct ib_ah *ah); + u32 flags, struct ib_udata *udata); +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 084bb4baebb5..fc10e4e26ca7 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -91,6 +91,7 @@ EXPORT_SYMBOL(rvt_check_ah); * rvt_create_ah - create an address handle * @pd: the protection domain * @ah_attr: the attributes of the AH + * @create_flags: create address handle flags (see enum rdma_create_ah_flags) * @udata: pointer to user's input output buffer information. * * This may be called from interrupt context. @@ -99,6 +100,7 @@ EXPORT_SYMBOL(rvt_check_ah); */ struct ib_ah *rvt_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 create_flags, struct ib_udata *udata) { struct rvt_ah *ah; @@ -135,10 +137,11 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, /** * rvt_destory_ah - Destory an address handle * @ibah: address handle + * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * * Return: 0 on success */ -int rvt_destroy_ah(struct ib_ah *ibah) +int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) { struct rvt_dev_info *dev = ib_to_rvt(ibah->device); struct rvt_ah *ah = ibah_to_rvtah(ibah); diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index 25271b48a683..72431a618d5d 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -52,8 +52,9 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 create_flags, struct ib_udata *udata); -int rvt_destroy_ah(struct ib_ah *ibah); +int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index d6981dc04adb..108c71e3ac23 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -160,7 +160,8 @@ void rvt_free_mad_agents(struct rvt_dev_info *rdi) ib_unregister_mad_agent(agent); } if (rvp->sm_ah) { - rdma_destroy_ah(&rvp->sm_ah->ibah); + rdma_destroy_ah(&rvp->sm_ah->ibah, + RDMA_DESTROY_AH_SLEEPABLE); rvp->sm_ah = NULL; } diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 1735deb1a9d4..a1bd8cfc2c25 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016, 2017 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1094,6 +1094,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; rvt_init_qp(rdi, qp, init_attr->qp_type); + if (rdi->driver_f.qp_priv_init) { + err = rdi->driver_f.qp_priv_init(rdi, qp, init_attr); + if (err) { + ret = ERR_PTR(err); + goto bail_rq_wq; + } + } break; default: diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 723d3daf2eba..aef3aa3fe667 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -392,16 +392,51 @@ enum { _VERB_IDX_MAX /* Must always be last! */ }; -static inline int check_driver_override(struct rvt_dev_info *rdi, - size_t offset, void *func) -{ - if (!*(void **)((void *)&rdi->ibdev + offset)) { - *(void **)((void *)&rdi->ibdev + offset) = func; - return 0; - } - - return 1; -} +static const struct ib_device_ops rvt_dev_ops = { + .alloc_fmr = rvt_alloc_fmr, + .alloc_mr = rvt_alloc_mr, + .alloc_pd = rvt_alloc_pd, + .alloc_ucontext = rvt_alloc_ucontext, + .attach_mcast = rvt_attach_mcast, + .create_ah = rvt_create_ah, + .create_cq = rvt_create_cq, + .create_qp = rvt_create_qp, + .create_srq = rvt_create_srq, + .dealloc_fmr = rvt_dealloc_fmr, + .dealloc_pd = rvt_dealloc_pd, + .dealloc_ucontext = rvt_dealloc_ucontext, + .dereg_mr = rvt_dereg_mr, + .destroy_ah = rvt_destroy_ah, + .destroy_cq = rvt_destroy_cq, + .destroy_qp = rvt_destroy_qp, + .destroy_srq = rvt_destroy_srq, + .detach_mcast = rvt_detach_mcast, + .get_dma_mr = rvt_get_dma_mr, + .get_port_immutable = rvt_get_port_immutable, + .map_mr_sg = rvt_map_mr_sg, + .map_phys_fmr = rvt_map_phys_fmr, + .mmap = rvt_mmap, + .modify_ah = rvt_modify_ah, + .modify_device = rvt_modify_device, + .modify_port = rvt_modify_port, + .modify_qp = rvt_modify_qp, + .modify_srq = rvt_modify_srq, + .poll_cq = rvt_poll_cq, + .post_recv = rvt_post_recv, + .post_send = rvt_post_send, + .post_srq_recv = rvt_post_srq_recv, + .query_ah = rvt_query_ah, + .query_device = rvt_query_device, + .query_gid = rvt_query_gid, + .query_pkey = rvt_query_pkey, + .query_port = rvt_query_port, + .query_qp = rvt_query_qp, + .query_srq = rvt_query_srq, + .reg_user_mr = rvt_reg_user_mr, + .req_notify_cq = rvt_req_notify_cq, + .resize_cq = rvt_resize_cq, + .unmap_fmr = rvt_unmap_fmr, +}; static noinline int check_support(struct rvt_dev_info *rdi, int verb) { @@ -416,76 +451,36 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) return -EINVAL; break; - case QUERY_DEVICE: - check_driver_override(rdi, offsetof(struct ib_device, - query_device), - rvt_query_device); - break; - case MODIFY_DEVICE: /* * rdmavt does not support modify device currently drivers must * provide. */ - if (!check_driver_override(rdi, offsetof(struct ib_device, - modify_device), - rvt_modify_device)) + if (!rdi->ibdev.ops.modify_device) return -EOPNOTSUPP; break; case QUERY_PORT: - if (!check_driver_override(rdi, offsetof(struct ib_device, - query_port), - rvt_query_port)) + if (!rdi->ibdev.ops.query_port) if (!rdi->driver_f.query_port_state) return -EINVAL; break; case MODIFY_PORT: - if (!check_driver_override(rdi, offsetof(struct ib_device, - modify_port), - rvt_modify_port)) + if (!rdi->ibdev.ops.modify_port) if (!rdi->driver_f.cap_mask_chg || !rdi->driver_f.shut_down_port) return -EINVAL; break; - case QUERY_PKEY: - check_driver_override(rdi, offsetof(struct ib_device, - query_pkey), - rvt_query_pkey); - break; - case QUERY_GID: - if (!check_driver_override(rdi, offsetof(struct ib_device, - query_gid), - rvt_query_gid)) + if (!rdi->ibdev.ops.query_gid) if (!rdi->driver_f.get_guid_be) return -EINVAL; break; - case ALLOC_UCONTEXT: - check_driver_override(rdi, offsetof(struct ib_device, - alloc_ucontext), - rvt_alloc_ucontext); - break; - - case DEALLOC_UCONTEXT: - check_driver_override(rdi, offsetof(struct ib_device, - dealloc_ucontext), - rvt_dealloc_ucontext); - break; - - case GET_PORT_IMMUTABLE: - check_driver_override(rdi, offsetof(struct ib_device, - get_port_immutable), - rvt_get_port_immutable); - break; - case CREATE_QP: - if (!check_driver_override(rdi, offsetof(struct ib_device, - create_qp), - rvt_create_qp)) + if (!rdi->ibdev.ops.create_qp) if (!rdi->driver_f.qp_priv_alloc || !rdi->driver_f.qp_priv_free || !rdi->driver_f.notify_qp_reset || @@ -496,9 +491,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) break; case MODIFY_QP: - if (!check_driver_override(rdi, offsetof(struct ib_device, - modify_qp), - rvt_modify_qp)) + if (!rdi->ibdev.ops.modify_qp) if (!rdi->driver_f.notify_qp_reset || !rdi->driver_f.schedule_send || !rdi->driver_f.get_pmtu_from_attr || @@ -512,9 +505,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) break; case DESTROY_QP: - if (!check_driver_override(rdi, offsetof(struct ib_device, - destroy_qp), - rvt_destroy_qp)) + if (!rdi->ibdev.ops.destroy_qp) if (!rdi->driver_f.qp_priv_free || !rdi->driver_f.notify_qp_reset || !rdi->driver_f.flush_qp_waiters || @@ -523,197 +514,14 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) return -EINVAL; break; - case QUERY_QP: - check_driver_override(rdi, offsetof(struct ib_device, - query_qp), - rvt_query_qp); - break; - case POST_SEND: - if (!check_driver_override(rdi, offsetof(struct ib_device, - post_send), - rvt_post_send)) + if (!rdi->ibdev.ops.post_send) if (!rdi->driver_f.schedule_send || !rdi->driver_f.do_send || !rdi->post_parms) return -EINVAL; break; - case POST_RECV: - check_driver_override(rdi, offsetof(struct ib_device, - post_recv), - rvt_post_recv); - break; - case POST_SRQ_RECV: - check_driver_override(rdi, offsetof(struct ib_device, - post_srq_recv), - rvt_post_srq_recv); - break; - - case CREATE_AH: - check_driver_override(rdi, offsetof(struct ib_device, - create_ah), - rvt_create_ah); - break; - - case DESTROY_AH: - check_driver_override(rdi, offsetof(struct ib_device, - destroy_ah), - rvt_destroy_ah); - break; - - case MODIFY_AH: - check_driver_override(rdi, offsetof(struct ib_device, - modify_ah), - rvt_modify_ah); - break; - - case QUERY_AH: - check_driver_override(rdi, offsetof(struct ib_device, - query_ah), - rvt_query_ah); - break; - - case CREATE_SRQ: - check_driver_override(rdi, offsetof(struct ib_device, - create_srq), - rvt_create_srq); - break; - - case MODIFY_SRQ: - check_driver_override(rdi, offsetof(struct ib_device, - modify_srq), - rvt_modify_srq); - break; - - case DESTROY_SRQ: - check_driver_override(rdi, offsetof(struct ib_device, - destroy_srq), - rvt_destroy_srq); - break; - - case QUERY_SRQ: - check_driver_override(rdi, offsetof(struct ib_device, - query_srq), - rvt_query_srq); - break; - - case ATTACH_MCAST: - check_driver_override(rdi, offsetof(struct ib_device, - attach_mcast), - rvt_attach_mcast); - break; - - case DETACH_MCAST: - check_driver_override(rdi, offsetof(struct ib_device, - detach_mcast), - rvt_detach_mcast); - break; - - case GET_DMA_MR: - check_driver_override(rdi, offsetof(struct ib_device, - get_dma_mr), - rvt_get_dma_mr); - break; - - case REG_USER_MR: - check_driver_override(rdi, offsetof(struct ib_device, - reg_user_mr), - rvt_reg_user_mr); - break; - - case DEREG_MR: - check_driver_override(rdi, offsetof(struct ib_device, - dereg_mr), - rvt_dereg_mr); - break; - - case ALLOC_FMR: - check_driver_override(rdi, offsetof(struct ib_device, - alloc_fmr), - rvt_alloc_fmr); - break; - - case ALLOC_MR: - check_driver_override(rdi, offsetof(struct ib_device, - alloc_mr), - rvt_alloc_mr); - break; - - case MAP_MR_SG: - check_driver_override(rdi, offsetof(struct ib_device, - map_mr_sg), - rvt_map_mr_sg); - break; - - case MAP_PHYS_FMR: - check_driver_override(rdi, offsetof(struct ib_device, - map_phys_fmr), - rvt_map_phys_fmr); - break; - - case UNMAP_FMR: - check_driver_override(rdi, offsetof(struct ib_device, - unmap_fmr), - rvt_unmap_fmr); - break; - - case DEALLOC_FMR: - check_driver_override(rdi, offsetof(struct ib_device, - dealloc_fmr), - rvt_dealloc_fmr); - break; - - case MMAP: - check_driver_override(rdi, offsetof(struct ib_device, - mmap), - rvt_mmap); - break; - - case CREATE_CQ: - check_driver_override(rdi, offsetof(struct ib_device, - create_cq), - rvt_create_cq); - break; - - case DESTROY_CQ: - check_driver_override(rdi, offsetof(struct ib_device, - destroy_cq), - rvt_destroy_cq); - break; - - case POLL_CQ: - check_driver_override(rdi, offsetof(struct ib_device, - poll_cq), - rvt_poll_cq); - break; - - case REQ_NOTFIY_CQ: - check_driver_override(rdi, offsetof(struct ib_device, - req_notify_cq), - rvt_req_notify_cq); - break; - - case RESIZE_CQ: - check_driver_override(rdi, offsetof(struct ib_device, - resize_cq), - rvt_resize_cq); - break; - - case ALLOC_PD: - check_driver_override(rdi, offsetof(struct ib_device, - alloc_pd), - rvt_alloc_pd); - break; - - case DEALLOC_PD: - check_driver_override(rdi, offsetof(struct ib_device, - dealloc_pd), - rvt_dealloc_pd); - break; - - default: - return -EINVAL; } return 0; @@ -745,6 +553,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) return -EINVAL; } + ib_set_device_ops(&rdi->ibdev, &rvt_dev_ops); /* Once we get past here we can use rvt_pr macros and tracepoints */ trace_rvt_dbg(rdi, "Driver attempting registration"); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index d9ec2de68738..5bde2ad964d2 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -65,8 +65,9 @@ */ #define RXE_UVERBS_ABI_VERSION 2 -#define IB_PHYS_STATE_LINK_UP (5) -#define IB_PHYS_STATE_LINK_DOWN (3) +#define RDMA_LINK_PHYS_STATE_LINK_UP (5) +#define RDMA_LINK_PHYS_STATE_DISABLED (3) +#define RDMA_LINK_PHYS_STATE_POLLING (2) #define RXE_ROCE_V2_SPORT (0xc000) @@ -109,5 +110,6 @@ struct rxe_dev *get_rxe_by_name(const char *name); void rxe_port_up(struct rxe_dev *rxe); void rxe_port_down(struct rxe_dev *rxe); +void rxe_set_port_state(struct rxe_dev *rxe); #endif /* RXE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index ea089cb091ad..e996da67a851 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -439,6 +439,7 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, */ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_cqe cqe; if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || @@ -451,6 +452,11 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) advance_consumer(qp->sq.queue); } + if (wqe->wr.opcode == IB_WR_SEND || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_INV) + rxe_counter_inc(rxe, RXE_CNT_RDMA_SEND); + /* * we completed something so let req run again * if it is trying to fence diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c index 6aeb7a165e46..636edb5f4cf4 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c @@ -37,15 +37,18 @@ static const char * const rxe_counter_name[] = { [RXE_CNT_SENT_PKTS] = "sent_pkts", [RXE_CNT_RCVD_PKTS] = "rcvd_pkts", [RXE_CNT_DUP_REQ] = "duplicate_request", - [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_sequence", + [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_seq_request", [RXE_CNT_RCV_RNR] = "rcvd_rnr_err", [RXE_CNT_SND_RNR] = "send_rnr_err", [RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err", - [RXE_CNT_COMPLETER_SCHED] = "ack_deffered", + [RXE_CNT_COMPLETER_SCHED] = "ack_deferred", [RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err", [RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err", [RXE_CNT_COMP_RETRY] = "completer_retry_err", [RXE_CNT_SEND_ERR] = "send_err", + [RXE_CNT_LINK_DOWNED] = "link_downed", + [RXE_CNT_RDMA_SEND] = "rdma_sends", + [RXE_CNT_RDMA_RECV] = "rdma_recvs", }; int rxe_ib_get_hw_stats(struct ib_device *ibdev, @@ -59,7 +62,7 @@ int rxe_ib_get_hw_stats(struct ib_device *ibdev, return -EINVAL; for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++) - stats->value[cnt] = dev->stats_counters[cnt]; + stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]); return ARRAY_SIZE(rxe_counter_name); } diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h index f44df1b76742..72c0d63c79e0 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h @@ -50,6 +50,9 @@ enum rxe_counters { RXE_CNT_RNR_RETRY_EXCEEDED, RXE_CNT_COMP_RETRY, RXE_CNT_SEND_ERR, + RXE_CNT_LINK_DOWNED, + RXE_CNT_RDMA_SEND, + RXE_CNT_RDMA_RECV, RXE_NUM_OF_COUNTERS }; diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index afd53f57a62b..01b74597b36a 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -157,7 +157,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init); int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct ib_qp_init_attr *init, struct rxe_create_qp_resp __user *uresp, - struct ib_pd *ibpd); + struct ib_pd *ibpd, struct ib_udata *udata); int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init); @@ -250,11 +250,12 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp) return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type]; } -static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp, - struct rxe_pkt_info *pkt, struct sk_buff *skb) +static inline int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { int err; int is_request = pkt->mask & RXE_REQ_MASK; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); if ((is_request && (qp->req.state != QP_STATE_READY)) || (!is_request && (qp->resp.state != QP_STATE_READY))) { diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 40e82e0f6c2d..8fd03ae20efc 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -607,7 +607,6 @@ void rxe_port_up(struct rxe_dev *rxe) port = &rxe->port; port->attr.state = IB_PORT_ACTIVE; - port->attr.phys_state = IB_PHYS_STATE_LINK_UP; rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); dev_info(&rxe->ib_dev.dev, "set active\n"); @@ -620,12 +619,20 @@ void rxe_port_down(struct rxe_dev *rxe) port = &rxe->port; port->attr.state = IB_PORT_DOWN; - port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN; rxe_port_event(rxe, IB_EVENT_PORT_ERR); + rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED); dev_info(&rxe->ib_dev.dev, "set down\n"); } +void rxe_set_port_state(struct rxe_dev *rxe) +{ + if (netif_running(rxe->ndev) && netif_carrier_ok(rxe->ndev)) + rxe_port_up(rxe); + else + rxe_port_down(rxe); +} + static int rxe_notify(struct notifier_block *not_blk, unsigned long event, void *arg) @@ -652,10 +659,7 @@ static int rxe_notify(struct notifier_block *not_blk, rxe_set_mtu(rxe, ndev->mtu); break; case NETDEV_CHANGE: - if (netif_running(ndev) && netif_carrier_ok(ndev)) - rxe_port_up(rxe); - else - rxe_port_down(rxe); + rxe_set_port_state(rxe); break; case NETDEV_REBOOT: case NETDEV_GOING_DOWN: diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 36b53fb94a49..b5c91df22047 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -112,6 +112,18 @@ static inline struct kmem_cache *pool_cache(struct rxe_pool *pool) return rxe_type_info[pool->type].cache; } +static void rxe_cache_clean(size_t cnt) +{ + int i; + struct rxe_type_info *type; + + for (i = 0; i < cnt; i++) { + type = &rxe_type_info[i]; + kmem_cache_destroy(type->cache); + type->cache = NULL; + } +} + int rxe_cache_init(void) { int err; @@ -136,24 +148,14 @@ int rxe_cache_init(void) return 0; err1: - while (--i >= 0) { - kmem_cache_destroy(type->cache); - type->cache = NULL; - } + rxe_cache_clean(i); return err; } void rxe_cache_exit(void) { - int i; - struct rxe_type_info *type; - - for (i = 0; i < RXE_NUM_TYPES; i++) { - type = &rxe_type_info[i]; - kmem_cache_destroy(type->cache); - type->cache = NULL; - } + rxe_cache_clean(RXE_NUM_TYPES); } static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) @@ -241,7 +243,7 @@ static void rxe_pool_put(struct rxe_pool *pool) kref_put(&pool->ref_cnt, rxe_pool_release); } -int rxe_pool_cleanup(struct rxe_pool *pool) +void rxe_pool_cleanup(struct rxe_pool *pool) { unsigned long flags; @@ -253,8 +255,6 @@ int rxe_pool_cleanup(struct rxe_pool *pool) write_unlock_irqrestore(&pool->pool_lock, flags); rxe_pool_put(pool); - - return 0; } static u32 alloc_index(struct rxe_pool *pool) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index aa4ba307097b..72968c29e01f 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -126,7 +126,7 @@ int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, enum rxe_elem_type type, u32 max_elem); /* free resources from object pool */ -int rxe_pool_cleanup(struct rxe_pool *pool); +void rxe_pool_cleanup(struct rxe_pool *pool); /* allocate an object from pool */ void *rxe_alloc(struct rxe_pool *pool); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index b9710907dac2..fd86fd2fbb26 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -97,7 +97,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) goto err1; if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) { - if (port_num != 1) { + if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { pr_warn("invalid port = %d\n", port_num); goto err1; } @@ -336,13 +336,14 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct ib_qp_init_attr *init, struct rxe_create_qp_resp __user *uresp, - struct ib_pd *ibpd) + struct ib_pd *ibpd, + struct ib_udata *udata) { int err; struct rxe_cq *rcq = to_rcq(init->recv_cq); struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; - struct ib_ucontext *context = ibpd->uobject ? ibpd->uobject->context : NULL; + struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; rxe_add_ref(pd); rxe_add_ref(rcq); @@ -433,7 +434,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, } if (mask & IB_QP_PORT) { - if (attr->port_num != 1) { + if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) { pr_warn("invalid port %d\n", attr->port_num); goto err1; } @@ -448,7 +449,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_ALT_PATH) { if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) goto err1; - if (attr->alt_port_num != 1) { + if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) { pr_warn("invalid alt port %d\n", attr->alt_port_num); goto err1; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 6c361d70d7cd..c5d9b558fa90 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -643,6 +643,7 @@ next_wqe: rmr->access = wqe->wr.wr.reg.access; rmr->lkey = wqe->wr.wr.reg.key; rmr->rkey = wqe->wr.wr.reg.key; + rmr->iova = wqe->wr.wr.reg.mr->iova; wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; } else { @@ -728,7 +729,7 @@ next_wqe: save_state(wqe, qp, &rollback_wqe, &rollback_psn); update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); - ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); + ret = rxe_xmit_packet(qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c962160292f4..231528188250 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -124,12 +124,9 @@ static inline enum resp_states get_req(struct rxe_qp *qp, struct sk_buff *skb; if (qp->resp.state == QP_STATE_ERROR) { - skb = skb_dequeue(&qp->req_pkts); - if (skb) { - /* drain request packet queue */ + while ((skb = skb_dequeue(&qp->req_pkts))) { rxe_drop_ref(qp); kfree_skb(skb); - return RESPST_GET_REQ; } /* go drain recv wr queue */ @@ -660,7 +657,6 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, static enum resp_states read_reply(struct rxe_qp *qp, struct rxe_pkt_info *req_pkt) { - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_pkt_info ack_pkt; struct sk_buff *skb; int mtu = qp->mtu; @@ -739,7 +735,7 @@ static enum resp_states read_reply(struct rxe_qp *qp, p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); *p = ~icrc; - err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); + err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) { pr_err("Failed sending RDMA reply.\n"); return RESPST_ERR_RNR; @@ -838,18 +834,25 @@ static enum resp_states do_complete(struct rxe_qp *qp, struct ib_wc *wc = &cqe.ibwc; struct ib_uverbs_wc *uwc = &cqe.uibwc; struct rxe_recv_wqe *wqe = qp->resp.wqe; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); if (unlikely(!wqe)) return RESPST_CLEANUP; memset(&cqe, 0, sizeof(cqe)); - wc->wr_id = wqe->wr_id; - wc->status = qp->resp.status; - wc->qp = &qp->ibqp; + if (qp->rcq->is_user) { + uwc->status = qp->resp.status; + uwc->qp_num = qp->ibqp.qp_num; + uwc->wr_id = wqe->wr_id; + } else { + wc->status = qp->resp.status; + wc->qp = &qp->ibqp; + wc->wr_id = wqe->wr_id; + } - /* fields after status are not required for errors */ if (wc->status == IB_WC_SUCCESS) { + rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV); wc->opcode = (pkt->mask & RXE_IMMDT_MASK && pkt->mask & RXE_WRITE_MASK) ? IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; @@ -898,7 +901,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, } if (pkt->mask & RXE_IETH_MASK) { - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_mem *rmr; wc->wc_flags |= IB_WC_WITH_INVALIDATE; @@ -950,7 +952,6 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, int err = 0; struct rxe_pkt_info ack_pkt; struct sk_buff *skb; - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE, 0, psn, syndrome, NULL); @@ -959,7 +960,7 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, goto err1; } - err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); + err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) pr_err_ratelimited("Failed sending ack\n"); @@ -973,7 +974,6 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, int rc = 0; struct rxe_pkt_info ack_pkt; struct sk_buff *skb; - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct resp_res *res; skb = prepare_ack_packet(qp, pkt, &ack_pkt, @@ -1001,7 +1001,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, res->last_psn = ack_pkt.psn; res->cur_psn = ack_pkt.psn; - rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); + rc = rxe_xmit_packet(qp, &ack_pkt, skb); if (rc) { pr_err_ratelimited("Failed sending ack\n"); rxe_drop_ref(qp); @@ -1131,8 +1131,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, if (res) { skb_get(res->atomic.skb); /* Resend the result. */ - rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, - pkt, res->atomic.skb); + rc = rxe_xmit_packet(qp, pkt, res->atomic.skb); if (rc) { pr_err("Failed resending result. This flow is not handled - skb ignored\n"); rc = RESPST_CLEANUP; diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c index 73a19f808e1b..95a15892f7e6 100644 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -53,22 +53,6 @@ static int sanitize_arg(const char *val, char *intf, int intf_len) return len; } -static void rxe_set_port_state(struct net_device *ndev) -{ - struct rxe_dev *rxe = net_to_rxe(ndev); - bool is_up = netif_running(ndev) && netif_carrier_ok(ndev); - - if (!rxe) - goto out; - - if (is_up) - rxe_port_up(rxe); - else - rxe_port_down(rxe); /* down for unknown state */ -out: - return; -} - static int rxe_param_set_add(const char *val, const struct kernel_param *kp) { int len; @@ -104,7 +88,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) goto err; } - rxe_set_port_state(ndev); + rxe_set_port_state(rxe); dev_info(&rxe->ib_dev.dev, "added %s\n", intf); err: if (ndev) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9c19f2027511..b20e6e0415f5 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -56,12 +56,7 @@ static int rxe_query_port(struct ib_device *dev, { struct rxe_dev *rxe = to_rdev(dev); struct rxe_port *port; - int rc = -EINVAL; - - if (unlikely(port_num != 1)) { - pr_warn("invalid port_number %d\n", port_num); - goto out; - } + int rc; port = &rxe->port; @@ -71,9 +66,16 @@ static int rxe_query_port(struct ib_device *dev, mutex_lock(&rxe->usdev_lock); rc = ib_get_eth_speed(dev, port_num, &attr->active_speed, &attr->active_width); + + if (attr->state == IB_PORT_ACTIVE) + attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP; + else if (dev_get_flags(rxe->ndev) & IFF_UP) + attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING; + else + attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED; + mutex_unlock(&rxe->usdev_lock); -out: return rc; } @@ -96,12 +98,6 @@ static int rxe_query_pkey(struct ib_device *device, struct rxe_dev *rxe = to_rdev(device); struct rxe_port *port; - if (unlikely(port_num != 1)) { - dev_warn(device->dev.parent, "invalid port_num = %d\n", - port_num); - goto err1; - } - port = &rxe->port; if (unlikely(index >= port->attr.pkey_tbl_len)) { @@ -139,11 +135,6 @@ static int rxe_modify_port(struct ib_device *dev, struct rxe_dev *rxe = to_rdev(dev); struct rxe_port *port; - if (unlikely(port_num != 1)) { - pr_warn("invalid port_num = %d\n", port_num); - goto err1; - } - port = &rxe->port; port->attr.port_cap_flags |= attr->set_port_cap_mask; @@ -153,9 +144,6 @@ static int rxe_modify_port(struct ib_device *dev, port->attr.qkey_viol_cntr = 0; return 0; - -err1: - return -EINVAL; } static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, @@ -231,6 +219,7 @@ static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, + u32 flags, struct ib_udata *udata) { @@ -278,7 +267,7 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -static int rxe_destroy_ah(struct ib_ah *ibah) +static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); @@ -498,7 +487,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, rxe_add_index(qp); - err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd); + err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd, udata); if (err) goto err3; @@ -1157,6 +1146,52 @@ static const struct attribute_group rxe_attr_group = { .attrs = rxe_dev_attributes, }; +static const struct ib_device_ops rxe_dev_ops = { + .alloc_hw_stats = rxe_ib_alloc_hw_stats, + .alloc_mr = rxe_alloc_mr, + .alloc_pd = rxe_alloc_pd, + .alloc_ucontext = rxe_alloc_ucontext, + .attach_mcast = rxe_attach_mcast, + .create_ah = rxe_create_ah, + .create_cq = rxe_create_cq, + .create_qp = rxe_create_qp, + .create_srq = rxe_create_srq, + .dealloc_pd = rxe_dealloc_pd, + .dealloc_ucontext = rxe_dealloc_ucontext, + .dereg_mr = rxe_dereg_mr, + .destroy_ah = rxe_destroy_ah, + .destroy_cq = rxe_destroy_cq, + .destroy_qp = rxe_destroy_qp, + .destroy_srq = rxe_destroy_srq, + .detach_mcast = rxe_detach_mcast, + .get_dma_mr = rxe_get_dma_mr, + .get_hw_stats = rxe_ib_get_hw_stats, + .get_link_layer = rxe_get_link_layer, + .get_netdev = rxe_get_netdev, + .get_port_immutable = rxe_port_immutable, + .map_mr_sg = rxe_map_mr_sg, + .mmap = rxe_mmap, + .modify_ah = rxe_modify_ah, + .modify_device = rxe_modify_device, + .modify_port = rxe_modify_port, + .modify_qp = rxe_modify_qp, + .modify_srq = rxe_modify_srq, + .peek_cq = rxe_peek_cq, + .poll_cq = rxe_poll_cq, + .post_recv = rxe_post_recv, + .post_send = rxe_post_send, + .post_srq_recv = rxe_post_srq_recv, + .query_ah = rxe_query_ah, + .query_device = rxe_query_device, + .query_pkey = rxe_query_pkey, + .query_port = rxe_query_port, + .query_qp = rxe_query_qp, + .query_srq = rxe_query_srq, + .reg_user_mr = rxe_reg_user_mr, + .req_notify_cq = rxe_req_notify_cq, + .resize_cq = rxe_resize_cq, +}; + int rxe_register_device(struct rxe_dev *rxe) { int err; @@ -1211,49 +1246,7 @@ int rxe_register_device(struct rxe_dev *rxe) | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) ; - dev->query_device = rxe_query_device; - dev->modify_device = rxe_modify_device; - dev->query_port = rxe_query_port; - dev->modify_port = rxe_modify_port; - dev->get_link_layer = rxe_get_link_layer; - dev->get_netdev = rxe_get_netdev; - dev->query_pkey = rxe_query_pkey; - dev->alloc_ucontext = rxe_alloc_ucontext; - dev->dealloc_ucontext = rxe_dealloc_ucontext; - dev->mmap = rxe_mmap; - dev->get_port_immutable = rxe_port_immutable; - dev->alloc_pd = rxe_alloc_pd; - dev->dealloc_pd = rxe_dealloc_pd; - dev->create_ah = rxe_create_ah; - dev->modify_ah = rxe_modify_ah; - dev->query_ah = rxe_query_ah; - dev->destroy_ah = rxe_destroy_ah; - dev->create_srq = rxe_create_srq; - dev->modify_srq = rxe_modify_srq; - dev->query_srq = rxe_query_srq; - dev->destroy_srq = rxe_destroy_srq; - dev->post_srq_recv = rxe_post_srq_recv; - dev->create_qp = rxe_create_qp; - dev->modify_qp = rxe_modify_qp; - dev->query_qp = rxe_query_qp; - dev->destroy_qp = rxe_destroy_qp; - dev->post_send = rxe_post_send; - dev->post_recv = rxe_post_recv; - dev->create_cq = rxe_create_cq; - dev->destroy_cq = rxe_destroy_cq; - dev->resize_cq = rxe_resize_cq; - dev->poll_cq = rxe_poll_cq; - dev->peek_cq = rxe_peek_cq; - dev->req_notify_cq = rxe_req_notify_cq; - dev->get_dma_mr = rxe_get_dma_mr; - dev->reg_user_mr = rxe_reg_user_mr; - dev->dereg_mr = rxe_dereg_mr; - dev->alloc_mr = rxe_alloc_mr; - dev->map_mr_sg = rxe_map_mr_sg; - dev->attach_mcast = rxe_attach_mcast; - dev->detach_mcast = rxe_detach_mcast; - dev->get_hw_stats = rxe_ib_get_hw_stats; - dev->alloc_hw_stats = rxe_ib_alloc_hw_stats; + ib_set_device_ops(dev, &rxe_dev_ops); tfm = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(tfm)) { @@ -1279,11 +1272,9 @@ err1: return err; } -int rxe_unregister_device(struct rxe_dev *rxe) +void rxe_unregister_device(struct rxe_dev *rxe) { struct ib_device *dev = &rxe->ib_dev; ib_unregister_device(dev); - - return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 82e670d6eeea..74e04801d34d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -409,16 +409,16 @@ struct rxe_dev { spinlock_t mmap_offset_lock; /* guard mmap_offset */ int mmap_offset; - u64 stats_counters[RXE_NUM_OF_COUNTERS]; + atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; struct rxe_port port; struct list_head list; struct crypto_shash *tfm; }; -static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters cnt) +static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index) { - rxe->stats_counters[cnt]++; + atomic64_inc(&rxe->stats_counters[index]); } static inline struct rxe_dev *to_rdev(struct ib_device *dev) @@ -467,7 +467,7 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw) } int rxe_register_device(struct rxe_dev *rxe); -int rxe_unregister_device(struct rxe_dev *rxe); +void rxe_unregister_device(struct rxe_dev *rxe); void rxe_mc_cleanup(struct rxe_pool_entry *arg); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 9006a13af1de..6d35570092d6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -66,7 +66,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev, ah->last_send = 0; kref_init(&ah->ref); - vah = rdma_create_ah(pd, attr); + vah = rdma_create_ah(pd, attr, RDMA_CREATE_AH_SLEEPABLE); if (IS_ERR(vah)) { kfree(ah); ah = (struct ipoib_ah *)vah; @@ -678,7 +678,7 @@ static void __ipoib_reap_ah(struct net_device *dev) list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) if ((int) priv->tx_tail - (int) ah->last_send >= 0) { list_del(&ah->list); - rdma_destroy_ah(ah->ah); + rdma_destroy_ah(ah->ah, 0); kfree(ah); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 8710214594d8..d932f99201d1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -167,7 +167,7 @@ int ipoib_open(struct net_device *dev) if (flags & IFF_UP) continue; - dev_change_flags(cpriv->dev, flags | IFF_UP); + dev_change_flags(cpriv->dev, flags | IFF_UP, NULL); } up_read(&priv->vlan_rwsem); } @@ -207,7 +207,7 @@ static int ipoib_stop(struct net_device *dev) if (!(flags & IFF_UP)) continue; - dev_change_flags(cpriv->dev, flags & ~IFF_UP); + dev_change_flags(cpriv->dev, flags & ~IFF_UP, NULL); } up_read(&priv->vlan_rwsem); } @@ -1823,7 +1823,7 @@ static void ipoib_parent_unregister_pre(struct net_device *ndev) * running ensures the it will not add more work. */ rtnl_lock(); - dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); + dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP, NULL); rtnl_unlock(); /* ipoib_event() cannot be running once this returns */ @@ -2453,8 +2453,8 @@ static struct net_device *ipoib_add_port(const char *format, return ERR_PTR(result); } - if (hca->rdma_netdev_get_params) { - int rc = hca->rdma_netdev_get_params(hca, port, + if (hca->ops.rdma_netdev_get_params) { + int rc = hca->ops.rdma_netdev_get_params(hca, port, RDMA_NETDEV_IPOIB, ¶ms); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 3fecd87c9f2b..8c707accd148 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -997,7 +997,6 @@ static struct scsi_host_template iscsi_iser_sht = { .eh_device_reset_handler= iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, .target_alloc = iscsi_target_alloc, - .use_clustering = ENABLE_CLUSTERING, .slave_alloc = iscsi_iser_slave_alloc, .proc_name = "iscsi_iser", .this_id = -1, diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 009be8889d71..e9b7efc302d0 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -77,8 +77,8 @@ int iser_assign_reg_ops(struct iser_device *device) struct ib_device *ib_dev = device->ib_device; /* Assign function handles - based on FMR support */ - if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr && - ib_dev->map_phys_fmr && ib_dev->unmap_fmr) { + if (ib_dev->ops.alloc_fmr && ib_dev->ops.dealloc_fmr && + ib_dev->ops.map_phys_fmr && ib_dev->ops.unmap_fmr) { iser_info("FMR supported, using FMR for registration\n"); device->reg_ops = &fmr_ops; } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { @@ -277,16 +277,13 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; - int ret; if (!reg->mem_h) return; iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h); - ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); - if (ret) - iser_err("ib_fmr_pool_unmap failed %d\n", ret); + ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); reg->mem_h = NULL; } diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c index 61558788b3fa..ae70cd18903e 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -330,10 +330,10 @@ struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev, struct rdma_netdev *rn; int rc; - netdev = ibdev->alloc_rdma_netdev(ibdev, port_num, - RDMA_NETDEV_OPA_VNIC, - "veth%d", NET_NAME_UNKNOWN, - ether_setup); + netdev = ibdev->ops.alloc_rdma_netdev(ibdev, port_num, + RDMA_NETDEV_OPA_VNIC, + "veth%d", NET_NAME_UNKNOWN, + ether_setup); if (!netdev) return ERR_PTR(-ENOMEM); else if (IS_ERR(netdev)) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index d119d9afa845..560e4f2d466e 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -606,7 +606,7 @@ static void vema_set(struct opa_vnic_vema_port *port, static void vema_send(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_wc) { - rdma_destroy_ah(mad_wc->send_buf->ah); + rdma_destroy_ah(mad_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(mad_wc->send_buf); } @@ -680,7 +680,7 @@ static void vema_recv(struct ib_mad_agent *mad_agent, ib_free_send_mad(rsp); err_rsp: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); free_recv_mad: ib_free_recv_mad(mad_wc); } @@ -777,7 +777,7 @@ void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter, } rdma_ah_set_dlid(&ah_attr, trap_lid); - ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr); + ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr, 0); if (IS_ERR(ah)) { c_err("%s:Couldn't create new AH = %p\n", __func__, ah); c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__, @@ -848,7 +848,7 @@ void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter, } err_sndbuf: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, 0); err_exit: v_err("Aborting trap\n"); } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index eed0eb3bb04c..31d91538bbf4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -132,6 +132,15 @@ MODULE_PARM_DESC(dev_loss_tmo, " if fast_io_fail_tmo has not been set. \"off\" means that" " this functionality is disabled."); +static bool srp_use_imm_data = true; +module_param_named(use_imm_data, srp_use_imm_data, bool, 0644); +MODULE_PARM_DESC(use_imm_data, + "Whether or not to request permission to use immediate data during SRP login."); + +static unsigned int srp_max_imm_data = 8 * 1024; +module_param_named(max_imm_data, srp_max_imm_data, uint, 0644); +MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size."); + static unsigned ch_count; module_param(ch_count, uint, 0444); MODULE_PARM_DESC(ch_count, @@ -573,7 +582,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) init_attr->cap.max_send_wr = m * target->queue_size; init_attr->cap.max_recv_wr = target->queue_size + 1; init_attr->cap.max_recv_sge = 1; - init_attr->cap.max_send_sge = 1; + init_attr->cap.max_send_sge = SRP_MAX_SGE; init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; init_attr->qp_type = IB_QPT_RC; init_attr->send_cq = send_cq; @@ -823,7 +832,8 @@ static u8 srp_get_subnet_timeout(struct srp_host *host) return subnet_timeout; } -static int srp_send_req(struct srp_rdma_ch *ch, bool multich) +static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len, + bool multich) { struct srp_target_port *target = ch->target; struct { @@ -852,11 +862,15 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) req->ib_req.opcode = SRP_LOGIN_REQ; req->ib_req.tag = 0; - req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len); + req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len); req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI : SRP_MULTICHAN_SINGLE); + if (srp_use_imm_data) { + req->ib_req.req_flags |= SRP_IMMED_REQUESTED; + req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET); + } if (target->using_rdma_cm) { req->rdma_param.flow_control = req->ib_param.flow_control; @@ -873,6 +887,7 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len; req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt; req->rdma_req.req_flags = req->ib_req.req_flags; + req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset; ipi = req->rdma_req.initiator_port_id; tpi = req->rdma_req.target_port_id; @@ -1145,7 +1160,8 @@ static int srp_connected_ch(struct srp_target_port *target) return c; } -static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) +static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len, + bool multich) { struct srp_target_port *target = ch->target; int ret; @@ -1158,7 +1174,7 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) while (1) { init_completion(&ch->done); - ret = srp_send_req(ch, multich); + ret = srp_send_req(ch, max_iu_len, multich); if (ret) goto out; ret = wait_for_completion_interruptible(&ch->done); @@ -1344,6 +1360,20 @@ static void srp_terminate_io(struct srp_rport *rport) } } +/* Calculate maximum initiator to target information unit length. */ +static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data) +{ + uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN + + sizeof(struct srp_indirect_buf) + + cmd_sg_cnt * sizeof(struct srp_direct_buf); + + if (use_imm_data) + max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET + + srp_max_imm_data); + + return max_iu_len; +} + /* * It is up to the caller to ensure that srp_rport_reconnect() calls are * serialized and that no concurrent srp_queuecommand(), srp_abort(), @@ -1357,6 +1387,8 @@ static int srp_rport_reconnect(struct srp_rport *rport) { struct srp_target_port *target = rport->lld_data; struct srp_rdma_ch *ch; + uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + srp_use_imm_data); int i, j, ret = 0; bool multich = false; @@ -1402,7 +1434,7 @@ static int srp_rport_reconnect(struct srp_rport *rport) ch = &target->ch[i]; if (ret) break; - ret = srp_connect_ch(ch, multich); + ret = srp_connect_ch(ch, max_iu_len, multich); multich = true; } @@ -1764,25 +1796,29 @@ static void srp_check_mapping(struct srp_map_state *state, * @req: SRP request * * Returns the length in bytes of the SRP_CMD IU or a negative value if - * mapping failed. + * mapping failed. The size of any immediate data is not included in the + * return value. */ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_request *req) { struct srp_target_port *target = ch->target; - struct scatterlist *scat; + struct scatterlist *scat, *sg; struct srp_cmd *cmd = req->cmd->buf; - int len, nents, count, ret; + int i, len, nents, count, ret; struct srp_device *dev; struct ib_device *ibdev; struct srp_map_state state; struct srp_indirect_buf *indirect_hdr; + u64 data_len; u32 idb_len, table_len; __be32 idb_rkey; u8 fmt; + req->cmd->num_sge = 1; + if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) - return sizeof (struct srp_cmd); + return sizeof(struct srp_cmd) + cmd->add_cdb_len; if (scmnd->sc_data_direction != DMA_FROM_DEVICE && scmnd->sc_data_direction != DMA_TO_DEVICE) { @@ -1794,6 +1830,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, nents = scsi_sg_count(scmnd); scat = scsi_sglist(scmnd); + data_len = scsi_bufflen(scmnd); dev = target->srp_host->srp_dev; ibdev = dev->dev; @@ -1802,8 +1839,31 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, if (unlikely(count == 0)) return -EIO; + if (ch->use_imm_data && + count <= SRP_MAX_IMM_SGE && + SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len && + scmnd->sc_data_direction == DMA_TO_DEVICE) { + struct srp_imm_buf *buf; + struct ib_sge *sge = &req->cmd->sge[1]; + + fmt = SRP_DATA_DESC_IMM; + len = SRP_IMM_DATA_OFFSET; + req->nmdesc = 0; + buf = (void *)cmd->add_data + cmd->add_cdb_len; + buf->len = cpu_to_be32(data_len); + WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len); + for_each_sg(scat, sg, count, i) { + sge[i].addr = ib_sg_dma_address(ibdev, sg); + sge[i].length = ib_sg_dma_len(ibdev, sg); + sge[i].lkey = target->lkey; + } + req->cmd->num_sge += count; + goto map_complete; + } + fmt = SRP_DATA_DESC_DIRECT; - len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); + len = sizeof(struct srp_cmd) + cmd->add_cdb_len + + sizeof(struct srp_direct_buf); if (count == 1 && target->global_rkey) { /* @@ -1812,8 +1872,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, * single entry. So a direct descriptor along with * the DMA MR suffices. */ - struct srp_direct_buf *buf = (void *) cmd->add_data; + struct srp_direct_buf *buf; + buf = (void *)cmd->add_data + cmd->add_cdb_len; buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); buf->key = cpu_to_be32(target->global_rkey); buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); @@ -1826,7 +1887,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, * We have more than one scatter/gather entry, so build our indirect * descriptor table, trying to merge as many entries as we can. */ - indirect_hdr = (void *) cmd->add_data; + indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len; ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, target->indirect_size, DMA_TO_DEVICE); @@ -1861,8 +1922,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, * Memory registration collapsed the sg-list into one entry, * so use a direct descriptor. */ - struct srp_direct_buf *buf = (void *) cmd->add_data; + struct srp_direct_buf *buf; + buf = (void *)cmd->add_data + cmd->add_cdb_len; *buf = req->indirect_desc[0]; goto map_complete; } @@ -1880,7 +1942,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, idb_len = sizeof(struct srp_indirect_buf) + table_len; fmt = SRP_DATA_DESC_INDIRECT; - len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); + len = sizeof(struct srp_cmd) + cmd->add_cdb_len + + sizeof(struct srp_indirect_buf); len += count * sizeof (struct srp_direct_buf); memcpy(indirect_hdr->desc_list, req->indirect_desc, @@ -2001,22 +2064,30 @@ static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) list_add(&iu->list, &ch->free_tx); } +/** + * srp_post_send() - send an SRP information unit + * @ch: RDMA channel over which to send the information unit. + * @iu: Information unit to send. + * @len: Length of the information unit excluding immediate data. + */ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) { struct srp_target_port *target = ch->target; - struct ib_sge list; struct ib_send_wr wr; - list.addr = iu->dma; - list.length = len; - list.lkey = target->lkey; + if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE)) + return -EINVAL; + + iu->sge[0].addr = iu->dma; + iu->sge[0].length = len; + iu->sge[0].lkey = target->lkey; iu->cqe.done = srp_send_done; wr.next = NULL; wr.wr_cqe = &iu->cqe; - wr.sg_list = &list; - wr.num_sge = 1; + wr.sg_list = &iu->sge[0]; + wr.num_sge = iu->num_sge; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; @@ -2129,6 +2200,7 @@ static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta, return 1; } + iu->num_sge = 1; ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); memcpy(iu->buf, rsp, len); ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); @@ -2312,7 +2384,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) req = &ch->req_ring[idx]; dev = target->srp_host->srp_dev->dev; - ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, + ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len, DMA_TO_DEVICE); scmnd->host_scribble = (void *) req; @@ -2324,6 +2396,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) int_to_scsilun(scmnd->device->lun, &cmd->lun); cmd->tag = tag; memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); + if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) { + cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb), + 4); + if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN)) + goto err_iu; + } req->scmnd = scmnd; req->cmd = iu; @@ -2343,11 +2421,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) goto err_iu; } - ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len, + ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len, DMA_TO_DEVICE); if (srp_post_send(ch, iu, len)) { shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); + scmnd->result = DID_ERROR << 16; goto err_unmap; } @@ -2410,7 +2489,7 @@ static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch) for (i = 0; i < target->queue_size; ++i) { ch->tx_ring[i] = srp_alloc_iu(target->srp_host, - target->max_iu_len, + ch->max_it_iu_len, GFP_KERNEL, DMA_TO_DEVICE); if (!ch->tx_ring[i]) goto err; @@ -2476,6 +2555,15 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id, if (lrsp->opcode == SRP_LOGIN_RSP) { ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); + ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP; + ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + ch->use_imm_data); + WARN_ON_ONCE(ch->max_it_iu_len > + be32_to_cpu(lrsp->max_it_iu_len)); + + if (ch->use_imm_data) + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "using immediate data\n"); /* * Reserve credits for task management so we don't @@ -2864,6 +2952,8 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, return -1; } + iu->num_sge = 1; + ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, DMA_TO_DEVICE); tsk_mgmt = iu->buf; @@ -3215,7 +3305,6 @@ static struct scsi_host_template srp_template = { .can_queue = SRP_DEFAULT_CMD_SQ_SIZE, .this_id = -1, .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = srp_host_attrs, .track_queue_depth = 1, }; @@ -3403,6 +3492,9 @@ static const match_table_t srp_opt_tokens = { /** * srp_parse_in - parse an IP address and port number combination + * @net: [in] Network namespace. + * @sa: [out] Address family, IP address and port number. + * @addr_port_str: [in] IP address and port number. * * Parse the following address formats: * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5. @@ -3720,6 +3812,7 @@ static ssize_t srp_create_target(struct device *dev, int ret, node_idx, node, cpu, i; unsigned int max_sectors_per_mr, mr_per_cmd = 0; bool multich = false; + uint32_t max_iu_len; target_host = scsi_host_alloc(&srp_template, sizeof (struct srp_target_port)); @@ -3825,9 +3918,7 @@ static ssize_t srp_create_target(struct device *dev, target->mr_per_cmd = mr_per_cmd; target->indirect_size = target->sg_tablesize * sizeof (struct srp_direct_buf); - target->max_iu_len = sizeof (struct srp_cmd) + - sizeof (struct srp_indirect_buf) + - target->cmd_sg_cnt * sizeof (struct srp_direct_buf); + max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data); INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); @@ -3882,7 +3973,7 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_disconnect; - ret = srp_connect_ch(ch, multich); + ret = srp_connect_ch(ch, max_iu_len, multich); if (ret) { char dst[64]; @@ -4063,8 +4154,10 @@ static void srp_add_one(struct ib_device *device) srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, max_pages_per_mr); - srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && - device->map_phys_fmr && device->unmap_fmr); + srp_dev->has_fmr = (device->ops.alloc_fmr && + device->ops.dealloc_fmr && + device->ops.map_phys_fmr && + device->ops.unmap_fmr); srp_dev->has_fr = (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS); if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { @@ -4172,6 +4265,11 @@ static int __init srp_init_module(void) { int ret; + BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4); + BUILD_BUG_ON(sizeof(struct srp_login_req) != 64); + BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56); + BUILD_BUG_ON(sizeof(struct srp_cmd) != 48); + if (srp_sg_tablesize) { pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); if (!cmd_sg_entries) diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index a2706086b9c7..b2861cd2087a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -67,6 +67,17 @@ enum { SRP_TAG_TSK_MGMT = 1U << 31, SRP_MAX_PAGES_PER_MR = 512, + + SRP_MAX_ADD_CDB_LEN = 16, + + SRP_MAX_IMM_SGE = 2, + SRP_MAX_SGE = SRP_MAX_IMM_SGE + 1, + /* + * Choose the immediate data offset such that a 32 byte CDB still fits. + */ + SRP_IMM_DATA_OFFSET = sizeof(struct srp_cmd) + + SRP_MAX_ADD_CDB_LEN + + sizeof(struct srp_imm_buf), }; enum srp_target_state { @@ -130,6 +141,8 @@ struct srp_request { /** * struct srp_rdma_ch * @comp_vector: Completion vector used by this RDMA channel. + * @max_it_iu_len: Maximum initiator-to-target information unit length. + * @max_ti_iu_len: Maximum target-to-initiator information unit length. */ struct srp_rdma_ch { /* These are RW in the hot path, and commonly used together */ @@ -146,6 +159,9 @@ struct srp_rdma_ch { struct ib_fmr_pool *fmr_pool; struct srp_fr_pool *fr_pool; }; + uint32_t max_it_iu_len; + uint32_t max_ti_iu_len; + bool use_imm_data; /* Everything above this point is used in the hot path of * command processing. Try to keep them packed into cachelines. @@ -169,7 +185,6 @@ struct srp_rdma_ch { struct srp_iu **tx_ring; struct srp_iu **rx_ring; struct srp_request *req_ring; - int max_ti_iu_len; int comp_vector; u64 tsk_mgmt_tag; @@ -194,7 +209,6 @@ struct srp_target_port { u32 ch_count; u32 lkey; enum srp_target_state state; - unsigned int max_iu_len; unsigned int cmd_sg_cnt; unsigned int indirect_size; bool allow_ext_sg; @@ -259,6 +273,8 @@ struct srp_iu { void *buf; size_t size; enum dma_data_direction direction; + u32 num_sge; + struct ib_sge sge[SRP_MAX_SGE]; struct ib_cqe cqe; }; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 2357aa727dcf..e9c336cff8f5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -51,8 +51,6 @@ /* Name of this kernel module. */ #define DRV_NAME "ib_srpt" -#define DRV_VERSION "2.0.0" -#define DRV_RELDATE "2011-02-14" #define SRPT_ID_STRING "Linux SRP target" @@ -60,8 +58,7 @@ #define pr_fmt(fmt) DRV_NAME " " fmt MODULE_AUTHOR("Vu Pham and Bart Van Assche"); -MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target " - "v" DRV_VERSION " (" DRV_RELDATE ")"); +MODULE_DESCRIPTION("SCSI RDMA Protocol target driver"); MODULE_LICENSE("Dual BSD/GPL"); /* @@ -89,8 +86,7 @@ static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, 0444); MODULE_PARM_DESC(srpt_service_guid, - "Using this value for ioc_guid, id_ext, and cm_listen_id" - " instead of using the node_guid of the first HCA."); + "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); static struct ib_client srpt_client; /* Protects both rdma_cm_port and rdma_cm_id. */ @@ -462,7 +458,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_wc) { - rdma_destroy_ah(mad_wc->send_buf->ah); + rdma_destroy_ah(mad_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(mad_wc->send_buf); } @@ -529,7 +525,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, ib_free_send_mad(rsp); err_rsp: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); err: ib_free_recv_mad(mad_wc); } @@ -652,31 +648,33 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev) * srpt_alloc_ioctx - allocate a SRPT I/O context structure * @sdev: SRPT HCA pointer. * @ioctx_size: I/O context size. - * @dma_size: Size of I/O context DMA buffer. + * @buf_cache: I/O buffer cache. * @dir: DMA data direction. */ static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev, - int ioctx_size, int dma_size, + int ioctx_size, + struct kmem_cache *buf_cache, enum dma_data_direction dir) { struct srpt_ioctx *ioctx; - ioctx = kmalloc(ioctx_size, GFP_KERNEL); + ioctx = kzalloc(ioctx_size, GFP_KERNEL); if (!ioctx) goto err; - ioctx->buf = kmalloc(dma_size, GFP_KERNEL); + ioctx->buf = kmem_cache_alloc(buf_cache, GFP_KERNEL); if (!ioctx->buf) goto err_free_ioctx; - ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, dma_size, dir); + ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, + kmem_cache_size(buf_cache), dir); if (ib_dma_mapping_error(sdev->device, ioctx->dma)) goto err_free_buf; return ioctx; err_free_buf: - kfree(ioctx->buf); + kmem_cache_free(buf_cache, ioctx->buf); err_free_ioctx: kfree(ioctx); err: @@ -687,17 +685,19 @@ err: * srpt_free_ioctx - free a SRPT I/O context structure * @sdev: SRPT HCA pointer. * @ioctx: I/O context pointer. - * @dma_size: Size of I/O context DMA buffer. + * @buf_cache: I/O buffer cache. * @dir: DMA data direction. */ static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx, - int dma_size, enum dma_data_direction dir) + struct kmem_cache *buf_cache, + enum dma_data_direction dir) { if (!ioctx) return; - ib_dma_unmap_single(sdev->device, ioctx->dma, dma_size, dir); - kfree(ioctx->buf); + ib_dma_unmap_single(sdev->device, ioctx->dma, + kmem_cache_size(buf_cache), dir); + kmem_cache_free(buf_cache, ioctx->buf); kfree(ioctx); } @@ -706,33 +706,38 @@ static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx, * @sdev: Device to allocate the I/O context ring for. * @ring_size: Number of elements in the I/O context ring. * @ioctx_size: I/O context size. - * @dma_size: DMA buffer size. + * @buf_cache: I/O buffer cache. + * @alignment_offset: Offset in each ring buffer at which the SRP information + * unit starts. * @dir: DMA data direction. */ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, int ring_size, int ioctx_size, - int dma_size, enum dma_data_direction dir) + struct kmem_cache *buf_cache, + int alignment_offset, + enum dma_data_direction dir) { struct srpt_ioctx **ring; int i; - WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) - && ioctx_size != sizeof(struct srpt_send_ioctx)); + WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) && + ioctx_size != sizeof(struct srpt_send_ioctx)); ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); if (!ring) goto out; for (i = 0; i < ring_size; ++i) { - ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, dma_size, dir); + ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, buf_cache, dir); if (!ring[i]) goto err; ring[i]->index = i; + ring[i]->offset = alignment_offset; } goto out; err: while (--i >= 0) - srpt_free_ioctx(sdev, ring[i], dma_size, dir); + srpt_free_ioctx(sdev, ring[i], buf_cache, dir); kvfree(ring); ring = NULL; out: @@ -744,12 +749,13 @@ out: * @ioctx_ring: I/O context ring to be freed. * @sdev: SRPT HCA pointer. * @ring_size: Number of ring elements. - * @dma_size: Size of I/O context DMA buffer. + * @buf_cache: I/O buffer cache. * @dir: DMA data direction. */ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, struct srpt_device *sdev, int ring_size, - int dma_size, enum dma_data_direction dir) + struct kmem_cache *buf_cache, + enum dma_data_direction dir) { int i; @@ -757,7 +763,7 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, return; for (i = 0; i < ring_size; ++i) - srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); + srpt_free_ioctx(sdev, ioctx_ring[i], buf_cache, dir); kvfree(ioctx_ring); } @@ -819,7 +825,7 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, struct ib_recv_wr wr; BUG_ON(!sdev); - list.addr = ioctx->ioctx.dma; + list.addr = ioctx->ioctx.dma + ioctx->ioctx.offset; list.length = srp_max_req_size; list.lkey = sdev->lkey; @@ -985,23 +991,28 @@ static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd) /** * srpt_get_desc_tbl - parse the data descriptors of a SRP_CMD request - * @ioctx: Pointer to the I/O context associated with the request. + * @recv_ioctx: I/O context associated with the received command @srp_cmd. + * @ioctx: I/O context that will be used for responding to the initiator. * @srp_cmd: Pointer to the SRP_CMD request data. * @dir: Pointer to the variable to which the transfer direction will be * written. - * @sg: [out] scatterlist allocated for the parsed SRP_CMD. + * @sg: [out] scatterlist for the parsed SRP_CMD. * @sg_cnt: [out] length of @sg. * @data_len: Pointer to the variable to which the total data length of all * descriptors in the SRP_CMD request will be written. + * @imm_data_offset: [in] Offset in SRP_CMD requests at which immediate data + * starts. * * This function initializes ioctx->nrbuf and ioctx->r_bufs. * * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors; * -ENOMEM when memory allocation fails and zero upon success. */ -static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, +static int srpt_get_desc_tbl(struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *ioctx, struct srp_cmd *srp_cmd, enum dma_data_direction *dir, - struct scatterlist **sg, unsigned *sg_cnt, u64 *data_len) + struct scatterlist **sg, unsigned int *sg_cnt, u64 *data_len, + u16 imm_data_offset) { BUG_ON(!dir); BUG_ON(!data_len); @@ -1025,7 +1036,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) || ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) { - struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd); + struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd); *data_len = be32_to_cpu(db->len); return srpt_alloc_rw_ctxs(ioctx, db, 1, sg, sg_cnt); @@ -1037,8 +1048,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, if (nbufs > (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { - pr_err("received unsupported SRP_CMD request" - " type (%u out + %u in != %u / %zu)\n", + pr_err("received unsupported SRP_CMD request type (%u out + %u in != %u / %zu)\n", srp_cmd->data_out_desc_cnt, srp_cmd->data_in_desc_cnt, be32_to_cpu(idb->table_desc.len), @@ -1049,6 +1059,40 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, *data_len = be32_to_cpu(idb->len); return srpt_alloc_rw_ctxs(ioctx, idb->desc_list, nbufs, sg, sg_cnt); + } else if ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_IMM) { + struct srp_imm_buf *imm_buf = srpt_get_desc_buf(srp_cmd); + void *data = (void *)srp_cmd + imm_data_offset; + uint32_t len = be32_to_cpu(imm_buf->len); + uint32_t req_size = imm_data_offset + len; + + if (req_size > srp_max_req_size) { + pr_err("Immediate data (length %d + %d) exceeds request size %d\n", + imm_data_offset, len, srp_max_req_size); + return -EINVAL; + } + if (recv_ioctx->byte_len < req_size) { + pr_err("Received too few data - %d < %d\n", + recv_ioctx->byte_len, req_size); + return -EIO; + } + /* + * The immediate data buffer descriptor must occur before the + * immediate data itself. + */ + if ((void *)(imm_buf + 1) > (void *)data) { + pr_err("Received invalid write request\n"); + return -EINVAL; + } + *data_len = len; + ioctx->recv_ioctx = recv_ioctx; + if ((uintptr_t)data & 511) { + pr_warn_once("Internal error - the receive buffers are not aligned properly.\n"); + return -EINVAL; + } + sg_init_one(&ioctx->imm_sg, data, len); + *sg = &ioctx->imm_sg; + *sg_cnt = 1; + return 0; } else { *data_len = 0; return 0; @@ -1191,6 +1235,7 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch) BUG_ON(ioctx->ch != ch); ioctx->state = SRPT_STATE_NEW; + WARN_ON_ONCE(ioctx->recv_ioctx); ioctx->n_rdma = 0; ioctx->n_rw_ctx = 0; ioctx->queue_status_only = false; @@ -1352,8 +1397,8 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); if (sense_data_len > max_sense_len) { - pr_warn("truncated sense data from %d to %d" - " bytes\n", sense_data_len, max_sense_len); + pr_warn("truncated sense data from %d to %d bytes\n", + sense_data_len, max_sense_len); sense_data_len = max_sense_len; } @@ -1433,7 +1478,7 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch, BUG_ON(!send_ioctx); - srp_cmd = recv_ioctx->ioctx.buf; + srp_cmd = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset; cmd = &send_ioctx->cmd; cmd->tag = srp_cmd->tag; @@ -1453,8 +1498,8 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch, break; } - rc = srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &sg, &sg_cnt, - &data_len); + rc = srpt_get_desc_tbl(recv_ioctx, send_ioctx, srp_cmd, &dir, + &sg, &sg_cnt, &data_len, ch->imm_data_offset); if (rc) { if (rc != -EAGAIN) { pr_err("0x%llx: parsing SRP descriptor table failed.\n", @@ -1521,7 +1566,7 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, BUG_ON(!send_ioctx); - srp_tsk = recv_ioctx->ioctx.buf; + srp_tsk = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset; cmd = &send_ioctx->cmd; pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld ch %p sess %p\n", @@ -1564,10 +1609,11 @@ srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx) goto push; ib_dma_sync_single_for_cpu(ch->sport->sdev->device, - recv_ioctx->ioctx.dma, srp_max_req_size, + recv_ioctx->ioctx.dma, + recv_ioctx->ioctx.offset + srp_max_req_size, DMA_FROM_DEVICE); - srp_cmd = recv_ioctx->ioctx.buf; + srp_cmd = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset; opcode = srp_cmd->opcode; if (opcode == SRP_CMD || opcode == SRP_TSK_MGMT) { send_ioctx = srpt_get_send_ioctx(ch); @@ -1604,7 +1650,8 @@ srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx) break; } - srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); + if (!send_ioctx || !send_ioctx->recv_ioctx) + srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); res = true; out: @@ -1630,6 +1677,7 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) req_lim = atomic_dec_return(&ch->req_lim); if (unlikely(req_lim < 0)) pr_err("req_lim = %d < 0\n", req_lim); + ioctx->byte_len = wc->byte_len; srpt_handle_new_iu(ch, ioctx); } else { pr_info_ratelimited("receiving failed for ioctx %p with status %d\n", @@ -1693,14 +1741,14 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail); if (wc->status != IB_WC_SUCCESS) - pr_info("sending response for ioctx 0x%p failed" - " with status %d\n", ioctx, wc->status); + pr_info("sending response for ioctx 0x%p failed with status %d\n", + ioctx, wc->status); if (state != SRPT_STATE_DONE) { transport_generic_free_cmd(&ioctx->cmd, 0); } else { - pr_err("IB completion has been received too late for" - " wr_id = %u.\n", ioctx->ioctx.index); + pr_err("IB completion has been received too late for wr_id = %u.\n", + ioctx->ioctx.index); } srpt_process_wait_list(ch); @@ -1754,6 +1802,8 @@ retry: qp_init->cap.max_rdma_ctxs = sq_size / 2; qp_init->cap.max_send_sge = min(attrs->max_send_sge, SRPT_MAX_SG_PER_WQE); + qp_init->cap.max_recv_sge = min(attrs->max_recv_sge, + SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; if (sdev->use_srq) { qp_init->srq = sdev->srq; @@ -2010,6 +2060,14 @@ static void srpt_free_ch(struct kref *kref) kfree_rcu(ch, rcu); } +/* + * Shut down the SCSI target session, tell the connection manager to + * disconnect the associated RDMA channel, transition the QP to the error + * state and remove the channel from the channel list. This function is + * typically called from inside srpt_zerolength_write_done(). Concurrent + * srpt_zerolength_write() calls from inside srpt_close_ch() are possible + * as long as the channel is on sport->nexus_list. + */ static void srpt_release_channel_work(struct work_struct *w) { struct srpt_rdma_ch *ch; @@ -2037,20 +2095,24 @@ static void srpt_release_channel_work(struct work_struct *w) else ib_destroy_cm_id(ch->ib_cm.cm_id); + sport = ch->sport; + mutex_lock(&sport->mutex); + list_del_rcu(&ch->list); + mutex_unlock(&sport->mutex); + srpt_destroy_ch_ib(ch); srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, ch->sport->sdev, ch->rq_size, - ch->max_rsp_size, DMA_TO_DEVICE); + ch->rsp_buf_cache, DMA_TO_DEVICE); + + kmem_cache_destroy(ch->rsp_buf_cache); srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, sdev, ch->rq_size, - srp_max_req_size, DMA_FROM_DEVICE); + ch->req_buf_cache, DMA_FROM_DEVICE); - sport = ch->sport; - mutex_lock(&sport->mutex); - list_del_rcu(&ch->list); - mutex_unlock(&sport->mutex); + kmem_cache_destroy(ch->req_buf_cache); wake_up(&sport->ch_releaseQ); @@ -2174,14 +2236,19 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, INIT_LIST_HEAD(&ch->cmd_wait_list); ch->max_rsp_size = ch->sport->port_attrib.srp_max_rsp_size; + ch->rsp_buf_cache = kmem_cache_create("srpt-rsp-buf", ch->max_rsp_size, + 512, 0, NULL); + if (!ch->rsp_buf_cache) + goto free_ch; + ch->ioctx_ring = (struct srpt_send_ioctx **) srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, sizeof(*ch->ioctx_ring[0]), - ch->max_rsp_size, DMA_TO_DEVICE); + ch->rsp_buf_cache, 0, DMA_TO_DEVICE); if (!ch->ioctx_ring) { pr_err("rejected SRP_LOGIN_REQ because creating a new QP SQ ring failed.\n"); rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); - goto free_ch; + goto free_rsp_cache; } INIT_LIST_HEAD(&ch->free_list); @@ -2190,16 +2257,39 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list); } if (!sdev->use_srq) { + u16 imm_data_offset = req->req_flags & SRP_IMMED_REQUESTED ? + be16_to_cpu(req->imm_data_offset) : 0; + u16 alignment_offset; + u32 req_sz; + + if (req->req_flags & SRP_IMMED_REQUESTED) + pr_debug("imm_data_offset = %d\n", + be16_to_cpu(req->imm_data_offset)); + if (imm_data_offset >= sizeof(struct srp_cmd)) { + ch->imm_data_offset = imm_data_offset; + rsp->rsp_flags |= SRP_LOGIN_RSP_IMMED_SUPP; + } else { + ch->imm_data_offset = 0; + } + alignment_offset = round_up(imm_data_offset, 512) - + imm_data_offset; + req_sz = alignment_offset + imm_data_offset + srp_max_req_size; + ch->req_buf_cache = kmem_cache_create("srpt-req-buf", req_sz, + 512, 0, NULL); + if (!ch->req_buf_cache) + goto free_rsp_ring; + ch->ioctx_recv_ring = (struct srpt_recv_ioctx **) srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, sizeof(*ch->ioctx_recv_ring[0]), - srp_max_req_size, + ch->req_buf_cache, + alignment_offset, DMA_FROM_DEVICE); if (!ch->ioctx_recv_ring) { pr_err("rejected SRP_LOGIN_REQ because creating a new QP RQ ring failed.\n"); rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); - goto free_ring; + goto free_recv_cache; } for (i = 0; i < ch->rq_size; i++) INIT_LIST_HEAD(&ch->ioctx_recv_ring[i]->wait_list); @@ -2249,17 +2339,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { struct srpt_rdma_ch *ch2; - rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN; - list_for_each_entry(ch2, &nexus->ch_list, list) { if (srpt_disconnect_ch(ch2) < 0) continue; pr_info("Relogin - closed existing channel %s\n", ch2->sess_name); - rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED; + rsp->rsp_flags |= SRP_LOGIN_RSP_MULTICHAN_TERMINATED; } } else { - rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; + rsp->rsp_flags |= SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; } list_add_tail_rcu(&ch->list, &nexus->ch_list); @@ -2289,7 +2377,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, /* create srp_login_response */ rsp->opcode = SRP_LOGIN_RSP; rsp->tag = req->tag; - rsp->max_it_iu_len = req->req_it_iu_len; + rsp->max_it_iu_len = cpu_to_be32(srp_max_req_size); rsp->max_ti_iu_len = req->req_it_iu_len; ch->max_ti_iu_len = it_iu_len; rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | @@ -2353,12 +2441,18 @@ destroy_ib: free_recv_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, ch->sport->sdev, ch->rq_size, - srp_max_req_size, DMA_FROM_DEVICE); + ch->req_buf_cache, DMA_FROM_DEVICE); + +free_recv_cache: + kmem_cache_destroy(ch->req_buf_cache); -free_ring: +free_rsp_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, ch->sport->sdev, ch->rq_size, - ch->max_rsp_size, DMA_TO_DEVICE); + ch->rsp_buf_cache, DMA_TO_DEVICE); + +free_rsp_cache: + kmem_cache_destroy(ch->rsp_buf_cache); free_ch: if (rdma_cm_id) @@ -2439,6 +2533,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id, req.req_flags = req_rdma->req_flags; memcpy(req.initiator_port_id, req_rdma->initiator_port_id, 16); memcpy(req.target_port_id, req_rdma->target_port_id, 16); + req.imm_data_offset = req_rdma->imm_data_offset; snprintf(src_addr, sizeof(src_addr), "%pIS", &cm_id->route.addr.src_addr); @@ -2629,6 +2724,12 @@ static int srpt_write_pending(struct se_cmd *se_cmd) enum srpt_command_state new_state; int ret, i; + if (ioctx->recv_ioctx) { + srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); + target_execute_cmd(&ioctx->cmd); + return 0; + } + new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); WARN_ON(new_state == SRPT_STATE_DONE); @@ -2908,7 +3009,9 @@ static void srpt_free_srq(struct srpt_device *sdev) ib_destroy_srq(sdev->srq); srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, - sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); + sdev->srq_size, sdev->req_buf_cache, + DMA_FROM_DEVICE); + kmem_cache_destroy(sdev->req_buf_cache); sdev->srq = NULL; } @@ -2935,14 +3038,17 @@ static int srpt_alloc_srq(struct srpt_device *sdev) pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size, sdev->device->attrs.max_srq_wr, dev_name(&device->dev)); + sdev->req_buf_cache = kmem_cache_create("srpt-srq-req-buf", + srp_max_req_size, 0, 0, NULL); + if (!sdev->req_buf_cache) + goto free_srq; + sdev->ioctx_ring = (struct srpt_recv_ioctx **) srpt_alloc_ioctx_ring(sdev, sdev->srq_size, sizeof(*sdev->ioctx_ring[0]), - srp_max_req_size, DMA_FROM_DEVICE); - if (!sdev->ioctx_ring) { - ib_destroy_srq(srq); - return -ENOMEM; - } + sdev->req_buf_cache, 0, DMA_FROM_DEVICE); + if (!sdev->ioctx_ring) + goto free_cache; sdev->use_srq = true; sdev->srq = srq; @@ -2953,6 +3059,13 @@ static int srpt_alloc_srq(struct srpt_device *sdev) } return 0; + +free_cache: + kmem_cache_destroy(sdev->req_buf_cache); + +free_srq: + ib_destroy_srq(srq); + return -ENOMEM; } static int srpt_use_srq(struct srpt_device *sdev, bool use_srq) @@ -3015,9 +3128,8 @@ static void srpt_add_one(struct ib_device *device) } /* print out target login information */ - pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx," - "pkey=ffff,service_id=%016llx\n", srpt_service_guid, - srpt_service_guid, srpt_service_guid); + pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n", + srpt_service_guid, srpt_service_guid, srpt_service_guid); /* * We do not have a consistent service_id (ie. also id_ext of target_id) @@ -3147,11 +3259,6 @@ static int srpt_check_false(struct se_portal_group *se_tpg) return 0; } -static char *srpt_get_fabric_name(void) -{ - return "srpt"; -} - static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg) { return tpg->se_tpg_wwn->priv; @@ -3182,11 +3289,18 @@ static void srpt_release_cmd(struct se_cmd *se_cmd) struct srpt_send_ioctx *ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); struct srpt_rdma_ch *ch = ioctx->ch; + struct srpt_recv_ioctx *recv_ioctx = ioctx->recv_ioctx; unsigned long flags; WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE && !(ioctx->cmd.transport_state & CMD_T_ABORTED)); + if (recv_ioctx) { + WARN_ON_ONCE(!list_empty(&recv_ioctx->wait_list)); + ioctx->recv_ioctx = NULL; + srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); + } + if (ioctx->n_rw_ctx) { srpt_free_rw_ctxs(ch, ioctx); ioctx->n_rw_ctx = 0; @@ -3572,7 +3686,7 @@ static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page) struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0); + return snprintf(page, PAGE_SIZE, "%d\n", sport->enabled); } static ssize_t srpt_tpg_enable_store(struct config_item *item, @@ -3581,7 +3695,7 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); unsigned long tmp; - int ret; + int ret; ret = kstrtoul(page, 0, &tmp); if (ret < 0) { @@ -3617,7 +3731,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, const char *name) { struct srpt_port *sport = wwn->priv; - static struct se_portal_group *tpg; + struct se_portal_group *tpg; int res; WARN_ON_ONCE(wwn != &sport->port_guid_wwn && @@ -3666,7 +3780,7 @@ static void srpt_drop_tport(struct se_wwn *wwn) static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf) { - return scnprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION); + return scnprintf(buf, PAGE_SIZE, "\n"); } CONFIGFS_ATTR_RO(srpt_wwn_, version); @@ -3678,8 +3792,7 @@ static struct configfs_attribute *srpt_wwn_attrs[] = { static const struct target_core_fabric_ops srpt_template = { .module = THIS_MODULE, - .name = "srpt", - .get_fabric_name = srpt_get_fabric_name, + .fabric_name = "srpt", .tpg_get_wwn = srpt_get_fabric_wwn, .tpg_get_tag = srpt_get_tag, .tpg_check_demo_mode = srpt_check_false, @@ -3730,16 +3843,14 @@ static int __init srpt_init_module(void) ret = -EINVAL; if (srp_max_req_size < MIN_MAX_REQ_SIZE) { - pr_err("invalid value %d for kernel module parameter" - " srp_max_req_size -- must be at least %d.\n", + pr_err("invalid value %d for kernel module parameter srp_max_req_size -- must be at least %d.\n", srp_max_req_size, MIN_MAX_REQ_SIZE); goto out; } if (srpt_srq_size < MIN_SRPT_SRQ_SIZE || srpt_srq_size > MAX_SRPT_SRQ_SIZE) { - pr_err("invalid value %d for kernel module parameter" - " srpt_srq_size -- must be in the range [%d..%d].\n", + pr_err("invalid value %d for kernel module parameter srpt_srq_size -- must be in the range [%d..%d].\n", srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE); goto out; } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 444dfd7281b5..39b3e50baf3d 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -104,10 +104,6 @@ enum { SRP_CMD_ORDERED_Q = 0x2, SRP_CMD_ACA = 0x4, - SRP_LOGIN_RSP_MULTICHAN_NO_CHAN = 0x0, - SRP_LOGIN_RSP_MULTICHAN_TERMINATED = 0x1, - SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, - SRPT_DEF_SG_TABLESIZE = 128, /* * An experimentally determined value that avoids that QP creation @@ -124,11 +120,18 @@ enum { MAX_SRPT_RDMA_SIZE = 1U << 24, MAX_SRPT_RSP_SIZE = 1024, + SRP_MAX_ADD_CDB_LEN = 16, + SRP_MAX_IMM_DATA_OFFSET = 80, + SRP_MAX_IMM_DATA = 8 * 1024, MIN_MAX_REQ_SIZE = 996, - DEFAULT_MAX_REQ_SIZE - = sizeof(struct srp_cmd)/*48*/ - + sizeof(struct srp_indirect_buf)/*20*/ - + 128 * sizeof(struct srp_direct_buf)/*16*/, + DEFAULT_MAX_REQ_SIZE_1 = sizeof(struct srp_cmd)/*48*/ + + SRP_MAX_ADD_CDB_LEN + + sizeof(struct srp_indirect_buf)/*20*/ + + 128 * sizeof(struct srp_direct_buf)/*16*/, + DEFAULT_MAX_REQ_SIZE_2 = SRP_MAX_IMM_DATA_OFFSET + + sizeof(struct srp_imm_buf) + SRP_MAX_IMM_DATA, + DEFAULT_MAX_REQ_SIZE = DEFAULT_MAX_REQ_SIZE_1 > DEFAULT_MAX_REQ_SIZE_2 ? + DEFAULT_MAX_REQ_SIZE_1 : DEFAULT_MAX_REQ_SIZE_2, MIN_MAX_RSP_SIZE = sizeof(struct srp_rsp)/*36*/ + 4, DEFAULT_MAX_RSP_SIZE = 256, /* leaves 220 bytes for sense data */ @@ -165,12 +168,14 @@ enum srpt_command_state { * @cqe: Completion queue element. * @buf: Pointer to the buffer. * @dma: DMA address of the buffer. + * @offset: Offset of the first byte in @buf and @dma that is actually used. * @index: Index of the I/O context in its ioctx_ring array. */ struct srpt_ioctx { struct ib_cqe cqe; void *buf; dma_addr_t dma; + uint32_t offset; uint32_t index; }; @@ -178,12 +183,14 @@ struct srpt_ioctx { * struct srpt_recv_ioctx - SRPT receive I/O context * @ioctx: See above. * @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list. + * @byte_len: Number of bytes in @ioctx.buf. */ struct srpt_recv_ioctx { struct srpt_ioctx ioctx; struct list_head wait_list; + int byte_len; }; - + struct srpt_rw_ctx { struct rdma_rw_ctx rw; struct scatterlist *sg; @@ -194,8 +201,11 @@ struct srpt_rw_ctx { * struct srpt_send_ioctx - SRPT send I/O context * @ioctx: See above. * @ch: Channel pointer. + * @recv_ioctx: Receive I/O context associated with this send I/O context. + * Only used for processing immediate data. * @s_rw_ctx: @rw_ctxs points here if only a single rw_ctx is needed. * @rw_ctxs: RDMA read/write contexts. + * @imm_sg: Scatterlist for immediate data. * @rdma_cqe: RDMA completion queue element. * @free_list: Node in srpt_rdma_ch.free_list. * @state: I/O context state. @@ -209,10 +219,13 @@ struct srpt_rw_ctx { struct srpt_send_ioctx { struct srpt_ioctx ioctx; struct srpt_rdma_ch *ch; + struct srpt_recv_ioctx *recv_ioctx; struct srpt_rw_ctx s_rw_ctx; struct srpt_rw_ctx *rw_ctxs; + struct scatterlist imm_sg; + struct ib_cqe rdma_cqe; struct list_head free_list; enum srpt_command_state state; @@ -245,7 +258,10 @@ enum rdma_ch_state { * struct srpt_rdma_ch - RDMA channel * @nexus: I_T nexus this channel is associated with. * @qp: IB queue pair used for communicating over this channel. - * @cm_id: IB CM ID associated with the channel. + * @ib_cm: See below. + * @ib_cm.cm_id: IB CM ID associated with the channel. + * @rdma_cm: See below. + * @rdma_cm.cm_id: RDMA CM ID associated with the channel. * @cq: IB completion queue for this channel. * @zw_cqe: Zero-length write CQE. * @rcu: RCU head. @@ -259,12 +275,15 @@ enum rdma_ch_state { * @req_lim: request limit: maximum number of requests that may be sent * by the initiator without having received a response. * @req_lim_delta: Number of credits not yet sent back to the initiator. + * @imm_data_offset: Offset from start of SRP_CMD for immediate data. * @spinlock: Protects free_list and state. * @free_list: Head of list with free send I/O contexts. * @state: channel state. See also enum rdma_ch_state. * @using_rdma_cm: Whether the RDMA/CM or IB/CM is used for this channel. * @processing_wait_list: Whether or not cmd_wait_list is being processed. + * @rsp_buf_cache: kmem_cache for @ioctx_ring. * @ioctx_ring: Send ring. + * @req_buf_cache: kmem_cache for @ioctx_recv_ring. * @ioctx_recv_ring: Receive I/O context ring. * @list: Node in srpt_nexus.ch_list. * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This @@ -297,10 +316,13 @@ struct srpt_rdma_ch { int max_ti_iu_len; atomic_t req_lim; atomic_t req_lim_delta; + u16 imm_data_offset; spinlock_t spinlock; struct list_head free_list; enum rdma_ch_state state; + struct kmem_cache *rsp_buf_cache; struct srpt_send_ioctx **ioctx_ring; + struct kmem_cache *req_buf_cache; struct srpt_recv_ioctx **ioctx_recv_ring; struct list_head list; struct list_head cmd_wait_list; @@ -395,6 +417,7 @@ struct srpt_port { * @srq_size: SRQ size. * @sdev_mutex: Serializes use_srq changes. * @use_srq: Whether or not to use SRQ. + * @req_buf_cache: kmem_cache for @ioctx_ring buffers. * @ioctx_ring: Per-HCA SRQ. * @event_handler: Per-HCA asynchronous IB event handler. * @list: Node in srpt_dev_list. @@ -409,6 +432,7 @@ struct srpt_device { int srq_size; struct mutex sdev_mutex; bool use_srq; + struct kmem_cache *req_buf_cache; struct srpt_recv_ioctx **ioctx_ring; struct ib_event_handler event_handler; struct list_head list; |