From 0194621b225348428c212f330c26d194fc77bd15 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:50:24 -0800 Subject: IB/rdmavt: Create module framework and handle driver registration This patch introduces the basics for a new module called rdma_vt. This new driver is a software implementation of the InfiniBand verbs and aims to replace the multiple implementations that exist and duplicate each others' code. While the call to actually register the device with the IB core happens in rdma_vt, most of the work is still done in the drivers themselves. This will be changing in a follow on patch this is just laying the groundwork for this infrastructure. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 include/rdma/rdma_vt.h (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h new file mode 100644 index 000000000000..0438bf229306 --- /dev/null +++ b/include/rdma/rdma_vt.h @@ -0,0 +1,70 @@ +#ifndef DEF_RDMA_VT_H +#define DEF_RDMA_VT_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Structure that low level drivers will populate in order to register with the + * rdmavt layer. + */ + +#include "ib_verbs.h" +struct rvt_dev_info { + struct ib_device ibdev; + int (*port_callback)(struct ib_device *, u8, struct kobject *); + + /* + * TODO: + * need to reflect module parameters that may vary by dev + */ +}; + +int rvt_register_device(struct rvt_dev_info *rvd); +void rvt_unregister_device(struct rvt_dev_info *rvd); + +#endif /* DEF_RDMA_VT_H */ -- cgit v1.2.3 From 8afd32eb58b6885fc3e268c69b1b1b627aa2afaf Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:51:48 -0800 Subject: IB/rdmavt: Add protection domain to rdmavt. Add datastructure for and allocation/deallocation of protection domains for RDMAVT. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/Makefile | 3 +- drivers/infiniband/sw/rdmavt/pd.c | 103 ++++++++++++++++++++++++++++++++++ drivers/infiniband/sw/rdmavt/pd.h | 58 +++++++++++++++++++ drivers/infiniband/sw/rdmavt/vt.c | 12 ++++ drivers/infiniband/sw/rdmavt/vt.h | 1 + include/rdma/rdma_vt.h | 34 +++++++++-- 6 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 drivers/infiniband/sw/rdmavt/pd.c create mode 100644 drivers/infiniband/sw/rdmavt/pd.h (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index 134d2d05e829..c6751bbf3070 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,4 +7,5 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o dma.o +rdmavt-y := vt.o dma.o pd.o + diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c new file mode 100644 index 000000000000..e6e153f13629 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -0,0 +1,103 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include "pd.h" + +struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct rvt_dev_info *dev = ib_to_rvt(ibdev); + struct rvt_pd *pd; + struct ib_pd *ret; + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + /* + * While we could continue allocating protecetion domains, being + * constrained only by system resources. The IBTA spec defines that + * there is a max_pd limit that can be set and we need to check for + * that. + */ + + spin_lock(&dev->n_pds_lock); + if (dev->n_pds_allocated == dev->dparms.max_pds) { + spin_unlock(&dev->n_pds_lock); + kfree(pd); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + dev->n_pds_allocated++; + spin_unlock(&dev->n_pds_lock); + + /* ib_alloc_pd() will initialize pd->ibpd. */ + pd->user = udata ? 1 : 0; + + ret = &pd->ibpd; + +bail: + return ret; +} + +int rvt_dealloc_pd(struct ib_pd *ibpd) +{ + struct rvt_pd *pd = ibpd_to_rvtpd(ibpd); + struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); + + spin_lock(&dev->n_pds_lock); + dev->n_pds_allocated--; + spin_unlock(&dev->n_pds_lock); + + kfree(pd); + + return 0; +} diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h new file mode 100644 index 000000000000..56d75e6cb21f --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -0,0 +1,58 @@ +#ifndef DEF_RDMAVTPD_H +#define DEF_RDMAVTPD_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata); +int rvt_dealloc_pd(struct ib_pd *ibpd); + +#endif /* DEF_RDMAVTPD_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index d82fddad585d..b65cde435f19 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -76,9 +76,21 @@ int rvt_register_device(struct rvt_dev_info *rdi) * come up with a better mechanism that simplifies the code at some * point. */ + + /* DMA Operations */ rdi->ibdev.dma_ops = rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops; + /* Protection Domain */ + rdi->ibdev.alloc_pd = + rdi->ibdev.alloc_pd ? : rvt_alloc_pd; + rdi->ibdev.dealloc_pd = + rdi->ibdev.dealloc_pd ? : rvt_dealloc_pd; + + spin_lock_init(&rdi->n_pds_lock); + rdi->n_pds_allocated = 0; + + /* We are now good to announce we exist */ return ib_register_device(&rdi->ibdev, rdi->port_callback); } EXPORT_SYMBOL(rvt_register_device); diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index cfe59a7ba428..ba2f806cec9d 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -50,5 +50,6 @@ #include #include "dma.h" +#include "pd.h" #endif /* DEF_RDMAVT_H */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 0438bf229306..6bf5fd40081d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -54,16 +54,42 @@ */ #include "ib_verbs.h" + +/* + * Things that are driver specific, module parameters in hfi1 and qib + */ +struct rvt_driver_params { + int max_pds; +}; + +/* Protection domain */ +struct rvt_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + struct rvt_dev_info { struct ib_device ibdev; + + /* Driver specific */ + struct rvt_driver_params dparms; int (*port_callback)(struct ib_device *, u8, struct kobject *); - /* - * TODO: - * need to reflect module parameters that may vary by dev - */ + /* Internal use */ + int n_pds_allocated; + spinlock_t n_pds_lock; /* Protect pd allocated count */ }; +static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct rvt_pd, ibpd); +} + +static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) +{ + return container_of(ibdev, struct rvt_dev_info, ibdev); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); -- cgit v1.2.3 From b1070a7a4d304e680eb6c1158d76645cf5a923f1 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:52:19 -0800 Subject: IB/rdmavt: Add ib core device attributes to rvt driver params list Instead of trying to handle each parameter separately, add ib_device_attr to rvt_driver_params. This means drivers will fill this in and pass to the rvt registration function. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/pd.c | 2 +- include/rdma/rdma_vt.h | 57 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index e6e153f13629..f8dba88880e5 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -69,7 +69,7 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, */ spin_lock(&dev->n_pds_lock); - if (dev->n_pds_allocated == dev->dparms.max_pds) { + if (dev->n_pds_allocated == dev->dparms.props.max_pd) { spin_unlock(&dev->n_pds_lock); kfree(pd); ret = ERR_PTR(-ENOMEM); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 6bf5fd40081d..2990e03bdd9e 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -59,7 +59,45 @@ * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { - int max_pds; + /* + * driver required fields: + * node_guid + * phys_port_cnt + * dma_device + * owner + * driver optional fields (rvt will provide generic value if blank): + * name + * node_desc + * rvt fields, driver value ignored: + * uverbs_abi_ver + * node_type + * num_comp_vectors + * uverbs_cmd_mask + */ + struct ib_device_attr props; + + /* + * Drivers will need to support a number of notifications to rvt in + * accordance with certain events. This structure should contain a mask + * of the supported events. Such events that the rvt may need to know + * about include: + * port errors + * port active + * lid change + * sm change + * client reregister + * pkey change + * + * There may also be other events that the rvt layers needs to know + * about this is not an exhaustive list. Some events though rvt does not + * need to rely on the driver for such as completion queue error. + */ + int rvt_signal_supported; + + /* + * Anything driver specific that is not covered by props + * For instance special module parameters. Goes here. + */ }; /* Protection domain */ @@ -69,10 +107,25 @@ struct rvt_pd { }; struct rvt_dev_info { + /* + * Prior to calling for registration the driver will be responsible for + * allocating space for this structure. + * + * The driver will also be responsible for filling in certain members of + * dparms.props + */ + struct ib_device ibdev; - /* Driver specific */ + /* Driver specific properties */ struct rvt_driver_params dparms; + + /* + * The work to create port files in /sys/class Infiniband is different + * depending on the driver. This should not be extracted away and + * instead drivers are responsible for setting the correct callback for + * this. + */ int (*port_callback)(struct ib_device *, u8, struct kobject *); /* Internal use */ -- cgit v1.2.3 From 30588643f95e1bb1239e2568de7a653722832a5e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:54:16 -0800 Subject: IB/rdmavt: Add pkey query stub The pkey table will reside in the rvt structure but it will be modified only when the driver requests then rvt will simply read the value to return in the query. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 21 +++++++++++++++++++++ include/rdma/rdma_vt.h | 3 ++- 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 5ac241c7966f..fc5372d75280 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -137,6 +137,26 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port, return -EOPNOTSUPP; } +/** + * rvt_query_pkey - Return a pkey from the table at a given index + * @ibdev: Verbs IB dev + * @port: Port number + * @intex: Index into pkey table + * + * Returns 0 on failure pkey otherwise + */ +static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + /* + * Driver will be responsible for keeping rvt_dev_info.pkey_table up to + * date. This function will just return that value. There is no need to + * lock, if a stale value is read and sent to the user so be it there is + * no way to protect against that anyway. + */ + return 0; +} + /* * Check driver override. If driver passes a value use it, otherwise we use our * own value. @@ -154,6 +174,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, modify_device); CHECK_DRIVER_OVERRIDE(rdi, query_port); CHECK_DRIVER_OVERRIDE(rdi, modify_port); + CHECK_DRIVER_OVERRIDE(rdi, query_pkey); /* DMA Operations */ rdi->ibdev.dma_ops = diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 2990e03bdd9e..bf072a436a34 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -114,12 +114,13 @@ struct rvt_dev_info { * The driver will also be responsible for filling in certain members of * dparms.props */ - struct ib_device ibdev; /* Driver specific properties */ struct rvt_driver_params dparms; + /* PKey Table goes here */ + /* * The work to create port files in /sys/class Infiniband is different * depending on the driver. This should not be extracted away and -- cgit v1.2.3 From b92a7568037e2a28f61c3f79c2320431bb24dfab Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:01:42 -0800 Subject: IB/rdmavt: Move MR datastructures into rvt This patch adds the MR datastructures based on hfi1 into rvt. For now the data structures are defined in include/rdma/rdma_vt.h but once all MR functionality has been moved from the drivers into rvt these should move to rdmavt/mr.h Reviewed-by: Ira Weiny Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index bf072a436a34..f232e39a5d69 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,56 @@ #include "ib_verbs.h" +/* + * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once + * drivers no longer need access to the MR directly. + */ + +/* + * A segment is a linear region of low physical memory. + * Used by the verbs layer. + */ +struct rvt_seg { + void *vaddr; + size_t length; +}; + +/* The number of rvt_segs that fit in a page. */ +#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) + +struct rvt_segarray { + struct rvt_seg segs[RVT_SEGSZ]; +}; + +struct rvt_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of rvt_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + atomic_t refcount; + struct rvt_segarray *map[0]; /* the segments */ +}; + +#define RVT_MAX_LKEY_TABLE_BITS 23 + +struct rvt_lkey_table { + spinlock_t lock; /* protect changes in this struct */ + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct rvt_mregion __rcu **table; +}; + +/* End Memmory Region */ + /* * Things that are driver specific, module parameters in hfi1 and qib */ @@ -119,6 +169,9 @@ struct rvt_dev_info { /* Driver specific properties */ struct rvt_driver_params dparms; + struct rvt_mregion __rcu *dma_mr; + struct rvt_lkey_table lkey_table; + /* PKey Table goes here */ /* -- cgit v1.2.3 From ca889e8ad3af9f1dfeb827356bc9839fb20f32be Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:41 -0800 Subject: IB/rdmavt: Add queue pair data structure to rdmavt Add queue pair data structure as well as supporting structures to rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.h | 5 - include/rdma/rdma_vt.h | 233 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 233 insertions(+), 5 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 10bc636d0423..9c2999db528b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -50,11 +50,6 @@ #include -struct rvt_qp { - struct ib_qp *ibqp; - /* Other stuff */ -}; - struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index f232e39a5d69..9baa7f04e8d0 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -105,6 +105,239 @@ struct rvt_lkey_table { /* End Memmory Region */ +/* + * Things needed for the Queue Pair definition. Like the MR stuff above the + * following should probably get moved to qp.h once drivers stop trying to make + * and manipulate thier own QPs. For the few instnaces where a driver may need + * to look into a queue pair there should be a pointer to a driver priavte data + * structure that they can look at. + */ + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct rvt_sge { + struct rvt_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +/* + * Send work request queue entry. + * The size of the sg_list is determined when the QP is created and stored + * in qp->s_max_sge. + */ +struct rvt_swqe { + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_reg_wr reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; + u32 psn; /* first packet sequence number */ + u32 lpsn; /* last packet sequence number */ + u32 ssn; /* send sequence number */ + u32 length; /* total length of data in sg_list */ + struct rvt_sge sg_list[0]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct rvt_rwqe { + u64 wr_id; + u8 num_sge; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct rvt_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct rvt_rwqe wq[0]; +}; + +struct rvt_rq { + struct rvt_rwq *wq; + u32 size; /* size of RWQE array */ + u8 max_sge; + /* protect changes in this struct */ + spinlock_t lock ____cacheline_aligned_in_smp; +}; + +/* + * This structure is used by rvt_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct rvt_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + unsigned size; +}; + +#define RVT_MAX_RDMA_ATOMIC 16 + +/* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct rvt_ack_entry { + u8 opcode; + u8 sent; + u32 psn; + u32 lpsn; + union { + struct rvt_sge rdma_sge; + u64 atomic_data; + }; +}; + +struct rvt_sge_state { + struct rvt_sge *sg_list; /* next SGE to be used if any */ + struct rvt_sge sge; /* progress state for the current SGE */ + u32 total_len; + u8 num_sge; +}; + +/* + * Variables prefixed with s_ are for the requester (sender). + * Variables prefixed with r_ are for the responder (receiver). + * Variables prefixed with ack_ are for responder replies. + * + * Common variables are protected by both r_rq.lock and s_lock in that order + * which only happens in modify_qp() or changing the QP 'state'. + */ +struct rvt_qp { + struct ib_qp ibqp; + void *priv; /* Driver private data */ + /* read mostly fields above and below */ + struct ib_ah_attr remote_ah_attr; + struct ib_ah_attr alt_ah_attr; + struct rvt_qp __rcu *next; /* link list for QPN hash table */ + struct rvt_swqe *s_wq; /* send work queue */ + struct rvt_mmap_info *ip; + + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + int srate_mbps; /* s_srate (below) converted to Mbit/s */ + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + u32 s_ahgpsn; /* set to the psn in the copy of the header */ + + u8 state; /* QP state */ + u8 allowed_ops; /* high order bits of allowed opcodes */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct rvt_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waiting to respond */ + + struct rvt_sge_state r_sge; /* current receive data */ + struct rvt_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; + struct rvt_sge_state *s_cur_sge; + u32 s_flags; + struct rvt_swqe *s_wqe; + struct rvt_sge_state s_sge; /* current send request data */ + struct rvt_mregion *s_rdma_mr; + struct sdma_engine *s_sde; /* current sde */ + u32 s_cur_size; /* size of send packet in bytes */ + u32 s_len; /* total length of s_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ + u32 s_next_psn; /* PSN for next request */ + u32 s_last_psn; /* last response PSN processed */ + u32 s_sending_psn; /* lowest PSN that is being sent */ + u32 s_sending_hpsn; /* highest PSN that is being sent */ + u32 s_psn; /* current packet sequence number */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ + u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u16 s_rdma_ack_cnt; + s8 s_ahgidx; + u8 s_state; /* opcode of last packet sent */ + u8 s_ack_state; /* opcode of packet to ACK */ + u8 s_nak_state; /* non-zero if NAK is pending */ + u8 r_nak_state; /* non-zero if NAK is pending */ + u8 s_retry; /* requester retry counter */ + u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + + struct rvt_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + + /* + * This sge list MUST be last. Do not add anything below here. + */ + struct rvt_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; +}; + +/* End QP section */ + /* * Things that are driver specific, module parameters in hfi1 and qib */ -- cgit v1.2.3 From aec5778775ac03ee6cfd6480adbbf6b05513d77b Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:52 -0800 Subject: IB/rdmavt: Move driver helper functions to a common structure Drivers are going to need to provide multiple functions for rdmavt to call in to. We already have one, so go ahead and push this into a data structure designated for driver supplied functions. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 6 +++++- include/rdma/rdma_vt.h | 22 +++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 367bc45e06e7..b4ce97e6ff12 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -280,8 +280,12 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; + /* Validate that drivers have provided the right functions */ + if (!rdi->driver_f.port_callback) + return -EINVAL; + /* We are now good to announce we exist */ - return ib_register_device(&rdi->ibdev, rdi->port_callback); + return ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); } EXPORT_SYMBOL(rvt_register_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 9baa7f04e8d0..e0beedc6110e 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -383,6 +383,19 @@ struct rvt_driver_params { */ }; +/* + * Functions that drivers are required to support + */ +struct rvt_driver_provided { + /* + * The work to create port files in /sys/class Infiniband is different + * depending on the driver. This should not be extracted away and + * instead drivers are responsible for setting the correct callback for + * this. + */ + int (*port_callback)(struct ib_device *, u8, struct kobject *); +}; + /* Protection domain */ struct rvt_pd { struct ib_pd ibpd; @@ -407,13 +420,8 @@ struct rvt_dev_info { /* PKey Table goes here */ - /* - * The work to create port files in /sys/class Infiniband is different - * depending on the driver. This should not be extracted away and - * instead drivers are responsible for setting the correct callback for - * this. - */ - int (*port_callback)(struct ib_device *, u8, struct kobject *); + /* Driver specific helper functions */ + struct rvt_driver_provided driver_f; /* Internal use */ int n_pds_allocated; -- cgit v1.2.3 From b534875d5ab348fb9193692589e2ee82ae768e3a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:59 -0800 Subject: IB/rdmavt: Add device specific info prints Follow hfi1's example for printing information about the driver and incorporate into rdmavt. This requires two new functions to be provided by the driver, one to get_card_name and one to get_pci_dev. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 13 ++++++++++--- drivers/infiniband/sw/rdmavt/vt.h | 28 ++++++++++++++++++++++++++++ include/rdma/rdma_vt.h | 3 +++ 3 files changed, 41 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index b4ce97e6ff12..2a13e361c808 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -213,9 +213,18 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, int rvt_register_device(struct rvt_dev_info *rdi) { + /* Validate that drivers have provided the right information */ if (!rdi) return -EINVAL; + if ((!rdi->driver_f.port_callback) || + (!rdi->driver_f.get_card_name) || + (!rdi->driver_f.get_pci_dev)) { + return -EINVAL; + } + + /* Once we get past here we can use the rvt_pr macros */ + /* Dev Ops */ CHECK_DRIVER_OVERRIDE(rdi, query_device); CHECK_DRIVER_OVERRIDE(rdi, modify_device); @@ -280,9 +289,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; - /* Validate that drivers have provided the right functions */ - if (!rdi->driver_f.port_callback) - return -EINVAL; + rvt_pr_info(rdi, "Registration with rdmavt done.\n"); /* We are now good to announce we exist */ return ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index fdb52a8221bc..54ee05a4a9b9 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -49,6 +49,7 @@ */ #include +#include #include "dma.h" #include "pd.h" #include "qp.h" @@ -59,4 +60,31 @@ #include "mmap.h" #include "cq.h" +#define rvt_pr_info(rdi, fmt, ...) \ + __rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \ + rdi->driver_f.get_card_name(rdi), \ + fmt, \ + ##__VA_ARGS__) + +#define rvt_pr_warn(rdi, fmt, ...) \ + __rvt_pr_warn(rdi->driver_f.get_pci_dev(rdi), \ + rdi->driver_f.get_card_name(rdi), \ + fmt, \ + ##__VA_ARGS__) + +#define rvt_pr_err(rdi, fmt, ...) \ + __rvt_pr_err(rdi->driver_f.get_pci_dev(rdi), \ + rdi->driver_f.get_card_name(rdi), \ + fmt, \ + ##__VA_ARGS__) + +#define __rvt_pr_info(pdev, name, fmt, ...) \ + dev_info(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) + +#define __rvt_pr_warn(pdev, name, fmt, ...) \ + dev_warn(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) + +#define __rvt_pr_err(pdev, name, fmt, ...) \ + dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) + #endif /* DEF_RDMAVT_H */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e0beedc6110e..4b83770bc312 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -386,6 +386,7 @@ struct rvt_driver_params { /* * Functions that drivers are required to support */ +struct rvt_dev_info; struct rvt_driver_provided { /* * The work to create port files in /sys/class Infiniband is different @@ -394,6 +395,8 @@ struct rvt_driver_provided { * this. */ int (*port_callback)(struct ib_device *, u8, struct kobject *); + const char * (*get_card_name)(struct rvt_dev_info *rdi); + struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); }; /* Protection domain */ -- cgit v1.2.3 From 0b8a8aae02abfbd724186cffe400fbdbf0cb41d6 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:03:07 -0800 Subject: IB/rdmavt: Add the start of capability flags Drivers will need a set of flags to dictate behavior to rdmavt. This patch adds a placeholder and a spot for it to live, as well as a few flags that will be used. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4b83770bc312..b44ac176217b 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,16 @@ #include "ib_verbs.h" +/* + * For some of the IBTA objects there will likely be some + * initializations required. We need flags to determine whether it is OK + * for rdmavt to do this or not. This does not imply any functions of a + * partiuclar IBTA object are overridden. + */ +#define RVT_FLAG_MR_INIT_DRIVER BIT(1) +#define RVT_FLAG_QP_INIT_DRIVER BIT(2) +#define RVT_FLAG_CQ_INIT_DRIVER BIT(3) + /* * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once * drivers no longer need access to the MR directly. @@ -429,6 +439,8 @@ struct rvt_dev_info { /* Internal use */ int n_pds_allocated; spinlock_t n_pds_lock; /* Protect pd allocated count */ + + int flags; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) -- cgit v1.2.3 From 7b1e2099adc8e66f78fee2dd2f10cb8a11362083 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:03:31 -0800 Subject: IB/rdmavt: Move memory registration into rdmavt Use the memory registration routines in hfi1 and move them to rdmavt. A follow on patch will address removing the duplicated code in the hfi1 and qib drivers. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 683 +++++++++++++++++++++++++++++++++++++- drivers/infiniband/sw/rdmavt/mr.h | 23 ++ drivers/infiniband/sw/rdmavt/vt.c | 24 +- include/rdma/rdma_vt.h | 19 +- 4 files changed, 734 insertions(+), 15 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index c67204396088..f1dcaf4d6c3e 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -46,8 +46,252 @@ */ #include +#include +#include +#include +#include "vt.h" #include "mr.h" +/* + * Do any intilization needed when a driver registers with rdmavt. + */ +int rvt_driver_mr_init(struct rvt_dev_info *rdi) +{ + unsigned int lkey_table_size = rdi->dparms.lkey_table_size; + unsigned lk_tab_size; + int i; + + if (rdi->flags & RVT_FLAG_MR_INIT_DRIVER) { + rvt_pr_info(rdi, "Driver is doing MR init.\n"); + return 0; + } + + /* + * The top hfi1_lkey_table_size bits are used to index the + * table. The lower 8 bits can be owned by the user (copied from + * the LKEY). The remaining bits act as a generation number or tag. + */ + if (!lkey_table_size) + return -EINVAL; + + spin_lock_init(&rdi->lkey_table.lock); + + rdi->lkey_table.max = 1 << lkey_table_size; + + /* ensure generation is at least 4 bits */ + if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) { + rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n", + lkey_table_size, RVT_MAX_LKEY_TABLE_BITS); + rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS; + lkey_table_size = rdi->dparms.lkey_table_size; + } + lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); + rdi->lkey_table.table = (struct rvt_mregion __rcu **) + vmalloc(lk_tab_size); + if (!rdi->lkey_table.table) + return -ENOMEM; + + RCU_INIT_POINTER(rdi->dma_mr, NULL); + for (i = 0; i < rdi->lkey_table.max; i++) + RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); + + return 0; +} + +/* + * called when drivers have unregistered or perhaps failed to register with us + */ +void rvt_mr_exit(struct rvt_dev_info *rdi) +{ + if (rdi->dma_mr) + rvt_pr_err(rdi, "DMA MR not null!\n"); + + vfree(rdi->lkey_table.table); +} + +static void rvt_deinit_mregion(struct rvt_mregion *mr) +{ + int i = mr->mapsz; + + mr->mapsz = 0; + while (i) + kfree(mr->map[--i]); +} + +static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, + int count) +{ + int m, i = 0; + + mr->mapsz = 0; + m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; + for (; i < m; i++) { + mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL); + if (!mr->map[i]) { + rvt_deinit_mregion(mr); + return -ENOMEM; + } + mr->mapsz++; + } + init_completion(&mr->comp); + /* count returning the ptr to user */ + atomic_set(&mr->refcount, 1); + mr->pd = pd; + mr->max_segs = count; + return 0; +} + +/** + * rvt_alloc_lkey - allocate an lkey + * @mr: memory region that this lkey protects + * @dma_region: 0->normal key, 1->restricted DMA key + * + * Returns 0 if successful, otherwise returns -errno. + * + * Increments mr reference count as required. + * + * Sets the lkey field mr for non-dma regions. + * + */ +static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) +{ + unsigned long flags; + u32 r; + u32 n; + int ret = 0; + struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); + struct rvt_lkey_table *rkt = &dev->lkey_table; + + rvt_get_mr(mr); + spin_lock_irqsave(&rkt->lock, flags); + + /* special case for dma_mr lkey == 0 */ + if (dma_region) { + struct rvt_mregion *tmr; + + tmr = rcu_access_pointer(dev->dma_mr); + if (!tmr) { + rcu_assign_pointer(dev->dma_mr, mr); + mr->lkey_published = 1; + } else { + rvt_put_mr(mr); + } + goto success; + } + + /* Find the next available LKEY */ + r = rkt->next; + n = r; + for (;;) { + if (!rcu_access_pointer(rkt->table[r])) + break; + r = (r + 1) & (rkt->max - 1); + if (r == n) + goto bail; + } + rkt->next = (r + 1) & (rkt->max - 1); + /* + * Make sure lkey is never zero which is reserved to indicate an + * unrestricted LKEY. + */ + rkt->gen++; + /* + * bits are capped to ensure enough bits for generation number + */ + mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) | + ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen) + << 8); + if (mr->lkey == 0) { + mr->lkey |= 1 << 8; + rkt->gen++; + } + rcu_assign_pointer(rkt->table[r], mr); + mr->lkey_published = 1; +success: + spin_unlock_irqrestore(&rkt->lock, flags); +out: + return ret; +bail: + rvt_put_mr(mr); + spin_unlock_irqrestore(&rkt->lock, flags); + ret = -ENOMEM; + goto out; +} + +/** + * rvt_free_lkey - free an lkey + * @mr: mr to free from tables + */ +static void rvt_free_lkey(struct rvt_mregion *mr) +{ + unsigned long flags; + u32 lkey = mr->lkey; + u32 r; + struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); + struct rvt_lkey_table *rkt = &dev->lkey_table; + int freed = 0; + + spin_lock_irqsave(&rkt->lock, flags); + if (!mr->lkey_published) + goto out; + if (lkey == 0) { + RCU_INIT_POINTER(dev->dma_mr, NULL); + } else { + r = lkey >> (32 - dev->dparms.lkey_table_size); + RCU_INIT_POINTER(rkt->table[r], NULL); + } + mr->lkey_published = 0; + freed++; +out: + spin_unlock_irqrestore(&rkt->lock, flags); + if (freed) { + synchronize_rcu(); + rvt_put_mr(mr); + } +} + +static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) +{ + struct rvt_mr *mr; + int rval = -ENOMEM; + int m; + + /* Allocate struct plus pointers to first level page tables. */ + m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; + mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL); + if (!mr) + goto bail; + + rval = rvt_init_mregion(&mr->mr, pd, count); + if (rval) + goto bail; + /* + * ib_reg_phys_mr() will initialize mr->ibmr except for + * lkey and rkey. + */ + rval = rvt_alloc_lkey(&mr->mr, 0); + if (rval) + goto bail_mregion; + mr->ibmr.lkey = mr->mr.lkey; + mr->ibmr.rkey = mr->mr.lkey; +done: + return mr; + +bail_mregion: + rvt_deinit_mregion(&mr->mr); +bail: + kfree(mr); + mr = ERR_PTR(rval); + goto done; +} + +static void __rvt_free_mr(struct rvt_mr *mr) +{ + rvt_deinit_mregion(&mr->mr); + rvt_free_lkey(&mr->mr); + vfree(mr); +} + /** * rvt_get_dma_mr - get a DMA memory region * @pd: protection domain for this memory region @@ -59,11 +303,41 @@ */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) { - /* - * Alloc mr and init it. - * Alloc lkey. - */ - return ERR_PTR(-EOPNOTSUPP); + struct rvt_mr *mr; + struct ib_mr *ret; + int rval; + + if (ibpd_to_rvtpd(pd)->user) + return ERR_PTR(-EPERM); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + rval = rvt_init_mregion(&mr->mr, pd, 0); + if (rval) { + ret = ERR_PTR(rval); + goto bail; + } + + rval = rvt_alloc_lkey(&mr->mr, 1); + if (rval) { + ret = ERR_PTR(rval); + goto bail_mregion; + } + + mr->mr.access_flags = acc; + ret = &mr->ibmr; +done: + return ret; + +bail_mregion: + rvt_deinit_mregion(&mr->mr); +bail: + kfree(mr); + goto done; } /** @@ -80,7 +354,64 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_mr *mr; + struct ib_umem *umem; + struct scatterlist *sg; + int n, m, entry; + struct ib_mr *ret; + + if (length == 0) + return ERR_PTR(-EINVAL); + + umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); + if (IS_ERR(umem)) + return (void *)umem; + + n = umem->nmap; + + mr = __rvt_alloc_mr(n, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; + goto bail_umem; + } + + mr->mr.user_base = start; + mr->mr.iova = virt_addr; + mr->mr.length = length; + mr->mr.offset = ib_umem_offset(umem); + mr->mr.access_flags = mr_access_flags; + mr->umem = umem; + + if (is_power_of_2(umem->page_size)) + mr->mr.page_shift = ilog2(umem->page_size); + m = 0; + n = 0; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + void *vaddr; + + vaddr = page_address(sg_page(sg)); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail_inval; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; + mr->mr.map[m]->segs[n].length = umem->page_size; + n++; + if (n == RVT_SEGSZ) { + m++; + n = 0; + } + } + return &mr->ibmr; + +bail_inval: + __rvt_free_mr(mr); + +bail_umem: + ib_umem_release(umem); + + return ret; } /** @@ -94,7 +425,29 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, */ int rvt_dereg_mr(struct ib_mr *ibmr) { - return -EOPNOTSUPP; + struct rvt_mr *mr = to_imr(ibmr); + struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device); + int ret = 0; + unsigned long timeout; + + rvt_free_lkey(&mr->mr); + + rvt_put_mr(&mr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); + if (!timeout) { + rvt_pr_err(rdi, + "rvt_dereg_mr timeout mr %p pd %p refcount %u\n", + mr, mr->mr.pd, atomic_read(&mr->mr.refcount)); + rvt_get_mr(&mr->mr); + ret = -EBUSY; + goto out; + } + rvt_deinit_mregion(&mr->mr); + if (mr->umem) + ib_umem_release(mr->umem); + kfree(mr); +out: + return ret; } /** @@ -109,7 +462,16 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_mr *mr; + + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + mr = __rvt_alloc_mr(max_num_sg, pd); + if (IS_ERR(mr)) + return (struct ib_mr *)mr; + + return &mr->ibmr; } /** @@ -123,7 +485,48 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_fmr *fmr; + int m; + struct ib_fmr *ret; + int rval = -ENOMEM; + + /* Allocate struct plus pointers to first level page tables. */ + m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; + fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL); + if (!fmr) + goto bail; + + rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages); + if (rval) + goto bail; + + /* + * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & + * rkey. + */ + rval = rvt_alloc_lkey(&fmr->mr, 0); + if (rval) + goto bail_mregion; + fmr->ibfmr.rkey = fmr->mr.lkey; + fmr->ibfmr.lkey = fmr->mr.lkey; + /* + * Resources are allocated but no valid mapping (RKEY can't be + * used). + */ + fmr->mr.access_flags = mr_access_flags; + fmr->mr.max_segs = fmr_attr->max_pages; + fmr->mr.page_shift = fmr_attr->page_shift; + + ret = &fmr->ibfmr; +done: + return ret; + +bail_mregion: + rvt_deinit_mregion(&fmr->mr); +bail: + kfree(fmr); + ret = ERR_PTR(rval); + goto done; } /** @@ -139,7 +542,38 @@ struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int list_len, u64 iova) { - return -EOPNOTSUPP; + struct rvt_fmr *fmr = to_ifmr(ibfmr); + struct rvt_lkey_table *rkt; + unsigned long flags; + int m, n, i; + u32 ps; + struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); + + i = atomic_read(&fmr->mr.refcount); + if (i > 2) + return -EBUSY; + + if (list_len > fmr->mr.max_segs) + return -EINVAL; + + rkt = &rdi->lkey_table; + spin_lock_irqsave(&rkt->lock, flags); + fmr->mr.user_base = iova; + fmr->mr.iova = iova; + ps = 1 << fmr->mr.page_shift; + fmr->mr.length = list_len * ps; + m = 0; + n = 0; + for (i = 0; i < list_len; i++) { + fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; + fmr->mr.map[m]->segs[n].length = ps; + if (++n == RVT_SEGSZ) { + m++; + n = 0; + } + } + spin_unlock_irqrestore(&rkt->lock, flags); + return 0; } /** @@ -150,7 +584,21 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, */ int rvt_unmap_fmr(struct list_head *fmr_list) { - return -EOPNOTSUPP; + struct rvt_fmr *fmr; + struct rvt_lkey_table *rkt; + unsigned long flags; + struct rvt_dev_info *rdi; + + list_for_each_entry(fmr, fmr_list, ibfmr.list) { + rdi = ib_to_rvt(fmr->ibfmr.device); + rkt = &rdi->lkey_table; + spin_lock_irqsave(&rkt->lock, flags); + fmr->mr.user_base = 0; + fmr->mr.iova = 0; + fmr->mr.length = 0; + spin_unlock_irqrestore(&rkt->lock, flags); + } + return 0; } /** @@ -161,5 +609,216 @@ int rvt_unmap_fmr(struct list_head *fmr_list) */ int rvt_dealloc_fmr(struct ib_fmr *ibfmr) { - return -EOPNOTSUPP; + struct rvt_fmr *fmr = to_ifmr(ibfmr); + int ret = 0; + unsigned long timeout; + + rvt_free_lkey(&fmr->mr); + rvt_put_mr(&fmr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ); + if (!timeout) { + rvt_get_mr(&fmr->mr); + ret = -EBUSY; + goto out; + } + rvt_deinit_mregion(&fmr->mr); + kfree(fmr); +out: + return ret; +} + +/** + * rvt_lkey_ok - check IB SGE for validity and initialize + * @rkt: table containing lkey to check SGE against + * @pd: protection domain + * @isge: outgoing internal SGE + * @sge: SGE to check + * @acc: access flags + * + * Return 1 if valid and successful, otherwise returns 0. + * + * increments the reference count upon success + * + * Check the IB SGE for validity and initialize our internal version + * of it. + */ +int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, + struct rvt_sge *isge, struct ib_sge *sge, int acc) +{ + struct rvt_mregion *mr; + unsigned n, m; + size_t off; + struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); + + /* + * We use LKEY == zero for kernel virtual addresses + * (see rvt_get_dma_mr and dma.c). + */ + rcu_read_lock(); + if (sge->lkey == 0) { + if (pd->user) + goto bail; + mr = rcu_dereference(dev->dma_mr); + if (!mr) + goto bail; + atomic_inc(&mr->refcount); + rcu_read_unlock(); + + isge->mr = mr; + isge->vaddr = (void *)sge->addr; + isge->length = sge->length; + isge->sge_length = sge->length; + isge->m = 0; + isge->n = 0; + goto ok; + } + mr = rcu_dereference( + rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); + if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) + goto bail; + + off = sge->addr - mr->user_base; + if (unlikely(sge->addr < mr->user_base || + off + sge->length > mr->length || + (mr->access_flags & acc) != acc)) + goto bail; + atomic_inc(&mr->refcount); + rcu_read_unlock(); + + off += mr->offset; + if (mr->page_shift) { + /* + * page sizes are uniform power of 2 so no loop is necessary + * entries_spanned_by_off is the number of times the loop below + * would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off / RVT_SEGSZ; + n = entries_spanned_by_off % RVT_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= RVT_SEGSZ) { + m++; + n = 0; + } + } + } + isge->mr = mr; + isge->vaddr = mr->map[m]->segs[n].vaddr + off; + isge->length = mr->map[m]->segs[n].length - off; + isge->sge_length = sge->length; + isge->m = m; + isge->n = n; +ok: + return 1; +bail: + rcu_read_unlock(); + return 0; +} +EXPORT_SYMBOL(rvt_lkey_ok); + +/** + * rvt_rkey_ok - check the IB virtual address, length, and RKEY + * @qp: qp for validation + * @sge: SGE state + * @len: length of data + * @vaddr: virtual address to place data + * @rkey: rkey to check + * @acc: access flags + * + * Return 1 if successful, otherwise 0. + * + * increments the reference count upon success + */ +int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, + u32 len, u64 vaddr, u32 rkey, int acc) +{ + struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); + struct rvt_lkey_table *rkt = &dev->lkey_table; + struct rvt_mregion *mr; + unsigned n, m; + size_t off; + + /* + * We use RKEY == zero for kernel virtual addresses + * (see rvt_get_dma_mr and dma.c). + */ + rcu_read_lock(); + if (rkey == 0) { + struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); + struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device); + + if (pd->user) + goto bail; + mr = rcu_dereference(rdi->dma_mr); + if (!mr) + goto bail; + atomic_inc(&mr->refcount); + rcu_read_unlock(); + + sge->mr = mr; + sge->vaddr = (void *)vaddr; + sge->length = len; + sge->sge_length = len; + sge->m = 0; + sge->n = 0; + goto ok; + } + + mr = rcu_dereference( + rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); + if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + goto bail; + + off = vaddr - mr->iova; + if (unlikely(vaddr < mr->iova || off + len > mr->length || + (mr->access_flags & acc) == 0)) + goto bail; + atomic_inc(&mr->refcount); + rcu_read_unlock(); + + off += mr->offset; + if (mr->page_shift) { + /* + * page sizes are uniform power of 2 so no loop is necessary + * entries_spanned_by_off is the number of times the loop below + * would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off / RVT_SEGSZ; + n = entries_spanned_by_off % RVT_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= RVT_SEGSZ) { + m++; + n = 0; + } + } + } + sge->mr = mr; + sge->vaddr = mr->map[m]->segs[n].vaddr + off; + sge->length = mr->map[m]->segs[n].length - off; + sge->sge_length = len; + sge->m = m; + sge->n = n; +ok: + return 1; +bail: + rcu_read_unlock(); + return 0; } +EXPORT_SYMBOL(rvt_rkey_ok); diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index f19e9dafc618..c5339aa2d241 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -49,6 +49,29 @@ */ #include +struct rvt_fmr { + struct ib_fmr ibfmr; + struct rvt_mregion mr; /* must be last */ +}; + +struct rvt_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct rvt_mregion mr; /* must be last */ +}; + +static inline struct rvt_fmr *to_ifmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct rvt_fmr, ibfmr); +} + +static inline struct rvt_mr *to_imr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct rvt_mr, ibmr); +} + +int rvt_driver_mr_init(struct rvt_dev_info *rdi); +void rvt_mr_exit(struct rvt_dev_info *rdi); /* Mem Regions */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 2a13e361c808..516c810fad65 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -214,6 +214,8 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, int rvt_register_device(struct rvt_dev_info *rdi) { /* Validate that drivers have provided the right information */ + int ret = 0; + if (!rdi) return -EINVAL; @@ -262,6 +264,12 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, detach_mcast); /* Mem Region */ + ret = rvt_driver_mr_init(rdi); + if (ret) { + rvt_pr_err(rdi, "Error in driver MR init.\n"); + goto bail_no_mr; + } + CHECK_DRIVER_OVERRIDE(rdi, get_dma_mr); CHECK_DRIVER_OVERRIDE(rdi, reg_user_mr); CHECK_DRIVER_OVERRIDE(rdi, dereg_mr); @@ -289,10 +297,21 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; + /* We are now good to announce we exist */ + ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); + if (ret) { + rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); + goto bail_mr; + } + rvt_pr_info(rdi, "Registration with rdmavt done.\n"); + return ret; - /* We are now good to announce we exist */ - return ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); +bail_mr: + rvt_mr_exit(rdi); + +bail_no_mr: + return ret; } EXPORT_SYMBOL(rvt_register_device); @@ -302,5 +321,6 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) return; ib_unregister_device(&rdi->ibdev); + rvt_mr_exit(rdi); } EXPORT_SYMBOL(rvt_unregister_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index b44ac176217b..9a479575078f 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -391,6 +391,7 @@ struct rvt_driver_params { * Anything driver specific that is not covered by props * For instance special module parameters. Goes here. */ + unsigned int lkey_table_size; }; /* @@ -416,6 +417,8 @@ struct rvt_pd { }; struct rvt_dev_info { + struct ib_device ibdev; /* Keep this first. Nothing above here */ + /* * Prior to calling for registration the driver will be responsible for * allocating space for this structure. @@ -423,7 +426,6 @@ struct rvt_dev_info { * The driver will also be responsible for filling in certain members of * dparms.props */ - struct ib_device ibdev; /* Driver specific properties */ struct rvt_driver_params dparms; @@ -453,7 +455,22 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) return container_of(ibdev, struct rvt_dev_info, ibdev); } +static inline void rvt_put_mr(struct rvt_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + complete(&mr->comp); +} + +static inline void rvt_get_mr(struct rvt_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); +int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, + u32 len, u64 vaddr, u32 rkey, int acc); +int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, + struct rvt_sge *isge, struct ib_sge *sge, int acc); #endif /* DEF_RDMA_VT_H */ -- cgit v1.2.3 From f2f342115ef2b0755abd73573831351e371f6242 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 6 Jan 2016 10:03:47 -0800 Subject: IB/rdmavt: Add common LID defines to rdmavt Original patch is from Kamal Heib . It has been split into separate patches. This patch adds RVT_PERMISSIVE_LID and RVT_MULTICAST_LID_BASE to rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Kamal Heib Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 9a479575078f..dbb45bcd1fea 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,9 @@ #include "ib_verbs.h" +#define RVT_MULTICAST_LID_BASE 0xC000 +#define RVT_PERMISSIVE_LID 0xFFFF + /* * For some of the IBTA objects there will likely be some * initializations required. We need flags to determine whether it is OK -- cgit v1.2.3 From 119a8e708d16d38eedfa3d920b89b709dda41a8f Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 6 Jan 2016 10:03:59 -0800 Subject: IB/rdmavt: Add AH to rdmavt Original patch is from Kamal Heib . It has been split into three separate patches. This one for rdmavt, a follow on for qib, and one for hfi1. Create datastructure for address handle and implement the create/destroy/modify/query of address handle for rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Kamal Heib Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/ah.c | 98 +++++++++++++++++++++++++++++++++++++-- drivers/infiniband/sw/rdmavt/vt.c | 5 +- include/rdma/rdma_vt.h | 17 +++++++ 3 files changed, 115 insertions(+), 5 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index d36895597304..2519db98be6c 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -45,7 +45,49 @@ * */ +#include #include "ah.h" +#include "vt.h" /* for prints */ + +/** + * rvt_check_ah - validate the attributes of AH + * @ibdev: the ib device + * @ah_attr: the attributes of the AH + */ +int rvt_check_ah(struct ib_device *ibdev, + struct ib_ah_attr *ah_attr) +{ + int err; + struct ib_port_attr port_attr; + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, + ah_attr->port_num); + + err = ib_query_port(ibdev, ah_attr->port_num, &port_attr); + if (err) + return -EINVAL; + if (ah_attr->port_num < 1 || + ah_attr->port_num > ibdev->phys_port_cnt) + return -EINVAL; + if (ah_attr->static_rate != IB_RATE_PORT_CURRENT && + ib_rate_to_mbps(ah_attr->static_rate) < 0) + return -EINVAL; + if ((ah_attr->ah_flags & IB_AH_GRH) && + ah_attr->grh.sgid_index >= port_attr.gid_tbl_len) + return -EINVAL; + if (link != IB_LINK_LAYER_ETHERNET) { + if (ah_attr->dlid == 0) + return -EINVAL; + if (ah_attr->dlid >= RVT_MULTICAST_LID_BASE && + ah_attr->dlid != RVT_PERMISSIVE_LID && + !(ah_attr->ah_flags & IB_AH_GRH)) + return -EINVAL; + } + if (rdi->driver_f.check_ah(ibdev, ah_attr)) + return -EINVAL; + return 0; +} +EXPORT_SYMBOL(rvt_check_ah); /** * rvt_create_ah - create an address handle @@ -57,20 +99,68 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_ah *ah; + struct rvt_dev_info *dev = ib_to_rvt(pd->device); + unsigned long flags; + + if (rvt_check_ah(pd->device, ah_attr)) + return ERR_PTR(-EINVAL); + + ah = kmalloc(sizeof(*ah), GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + spin_lock_irqsave(&dev->n_ahs_lock, flags); + if (dev->n_ahs_allocated == dev->dparms.props.max_ah) { + spin_unlock(&dev->n_ahs_lock); + kfree(ah); + return ERR_PTR(-ENOMEM); + } + + dev->n_ahs_allocated++; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + + ah->attr = *ah_attr; + atomic_set(&ah->refcount, 0); + + return &ah->ibah; } int rvt_destroy_ah(struct ib_ah *ibah) { - return -EOPNOTSUPP; + struct rvt_dev_info *dev = ib_to_rvt(ibah->device); + struct rvt_ah *ah = ibah_to_rvtah(ibah); + unsigned long flags; + + if (atomic_read(&ah->refcount) != 0) + return -EBUSY; + + spin_lock_irqsave(&dev->n_ahs_lock, flags); + dev->n_ahs_allocated--; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + + kfree(ah); + + return 0; } int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { - return -EOPNOTSUPP; + struct rvt_ah *ah = ibah_to_rvtah(ibah); + + if (rvt_check_ah(ibah->device, ah_attr)) + return -EINVAL; + + ah->attr = *ah_attr; + + return 0; } int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { - return -EOPNOTSUPP; + struct rvt_ah *ah = ibah_to_rvtah(ibah); + + *ah_attr = ah->attr; + + return 0; } diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index e92af9c6521e..7dab0cac5937 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -221,7 +221,8 @@ int rvt_register_device(struct rvt_dev_info *rdi) if ((!rdi->driver_f.port_callback) || (!rdi->driver_f.get_card_name) || - (!rdi->driver_f.get_pci_dev)) { + (!rdi->driver_f.get_pci_dev) || + (!rdi->driver_f.check_ah)) { return -EINVAL; } @@ -252,6 +253,8 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, destroy_ah); CHECK_DRIVER_OVERRIDE(rdi, modify_ah); CHECK_DRIVER_OVERRIDE(rdi, query_ah); + spin_lock_init(&rdi->n_ahs_lock); + rdi->n_ahs_allocated = 0; /* Shared Receive Queue */ CHECK_DRIVER_OVERRIDE(rdi, create_srq); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index dbb45bcd1fea..36cced63af77 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -411,6 +411,7 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); + int (*check_ah)(struct ib_device *, struct ib_ah_attr *); }; /* Protection domain */ @@ -419,6 +420,13 @@ struct rvt_pd { int user; /* non-zero if created from user space */ }; +/* Address handle */ +struct rvt_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; + atomic_t refcount; +}; + struct rvt_dev_info { struct ib_device ibdev; /* Keep this first. Nothing above here */ @@ -445,6 +453,9 @@ struct rvt_dev_info { int n_pds_allocated; spinlock_t n_pds_lock; /* Protect pd allocated count */ + int n_ahs_allocated; + spinlock_t n_ahs_lock; /* Protect ah allocated count */ + int flags; }; @@ -453,6 +464,11 @@ static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) return container_of(ibpd, struct rvt_pd, ibpd); } +static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah) +{ + return container_of(ibah, struct rvt_ah, ibah); +} + static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) { return container_of(ibdev, struct rvt_dev_info, ibdev); @@ -471,6 +487,7 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); +int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, -- cgit v1.2.3 From 70a1a351626073123ab79de24119977c4a297fdf Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:06 -0800 Subject: IB/rdmavt: Move SRQ data structure into rdmavt Patch moves the srq data structure into rdmavt in preparation for removal from qib and hfi1 which will follow in subsequent patches. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 36cced63af77..fcf3ec05da70 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -349,6 +349,14 @@ struct rvt_qp { ____cacheline_aligned_in_smp; }; +struct rvt_srq { + struct ib_srq ibsrq; + struct rvt_rq rq; + struct rvt_mmap_info *ip; + /* send signal when number of RWQEs < limit */ + u32 limit; +}; + /* End QP section */ /* @@ -485,6 +493,11 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) atomic_inc(&mr->refcount); } +static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct rvt_srq, ibsrq); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -- cgit v1.2.3 From f3d01bbcdc47a728336008a9254732c1652aeddd Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:13 -0800 Subject: IB/rdmavt: Add an ibport data structure to rdmavt Converge the ibport data structures of qib and hfi1 into a common ib port structure. Also provides a place to keep track of these ports in case rdmavt needs it. Along with this goes an attach and detach function for drivers to use to notify rdmavt of the ports. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 24 ++++++++++++++ include/rdma/rdma_vt.h | 66 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7dab0cac5937..44de2807fc9e 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -300,6 +300,19 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; + if (rdi->dparms.nports) { + rdi->ports = kcalloc(rdi->dparms.nports, + sizeof(struct rvt_ibport **), + GFP_KERNEL); + if (!rdi->ports) { + rvt_pr_err(rdi, "Could not allocate port mem.\n"); + ret = -ENOMEM; + goto bail_mr; + } + } else { + rvt_pr_warn(rdi, "Driver says it has no ports.\n"); + } + /* We are now good to announce we exist */ ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); if (ret) { @@ -327,3 +340,14 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) rvt_mr_exit(rdi); } EXPORT_SYMBOL(rvt_unregister_device); + +/* + * Keep track of a list of ports. No need to have a detach port. + * They persist until the driver goes away. + */ +void rvt_attach_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int portnum) +{ + rdi->ports[portnum] = port; +} +EXPORT_SYMBOL(rvt_attach_port); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index fcf3ec05da70..a3d6a5bd0c02 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -53,6 +53,8 @@ * rdmavt layer. */ +#include +#include #include "ib_verbs.h" #define RVT_MULTICAST_LID_BASE 0xC000 @@ -359,6 +361,65 @@ struct rvt_srq { /* End QP section */ +struct rvt_ibport { + struct rvt_qp __rcu *qp[2]; + struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ + struct rb_root mcast_tree; + spinlock_t lock; /* protect changes in this struct */ + + /* non-zero when timer is set */ + unsigned long mkey_lease_timeout; + unsigned long trap_timeout; + __be64 gid_prefix; /* in network order */ + __be64 mkey; + u64 tid; + u32 port_cap_flags; + u32 pma_sample_start; + u32 pma_sample_interval; + __be16 pma_counter_select[5]; + u16 pma_tag; + u16 mkey_lease_period; + u16 sm_lid; + u8 sm_sl; + u8 mkeyprot; + u8 subnet_timeout; + u8 vl_high_limit; + + /* + * Driver is expected to keep these up to date. These + * counters are informational only and not required to be + * completely accurate. + */ + u64 n_rc_resends; + u64 n_seq_naks; + u64 n_rdma_seq; + u64 n_rnr_naks; + u64 n_other_naks; + u64 n_loop_pkts; + u64 n_pkt_drops; + u64 n_vl15_dropped; + u64 n_rc_timeouts; + u64 n_dmawait; + u64 n_unaligned; + u64 n_rc_dupreq; + u64 n_rc_seqnak; + u16 pkey_violations; + u16 qkey_violations; + u16 mkey_violations; + + /* Hot-path per CPU counters to avoid cacheline trading to update */ + u64 z_rc_acks; + u64 z_rc_qacks; + u64 z_rc_delayed_comp; + u64 __percpu *rc_acks; + u64 __percpu *rc_qacks; + u64 __percpu *rc_delayed_comp; + + void *priv; /* driver private data */ + + /* TODO: Move sm_ah and smi_ah into here as well*/ +}; + /* * Things that are driver specific, module parameters in hfi1 and qib */ @@ -403,6 +464,7 @@ struct rvt_driver_params { * For instance special module parameters. Goes here. */ unsigned int lkey_table_size; + int nports; }; /* @@ -465,6 +527,7 @@ struct rvt_dev_info { spinlock_t n_ahs_lock; /* Protect ah allocated count */ int flags; + struct rvt_ibport **ports; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) @@ -501,9 +564,10 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +void rvt_attach_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int portnum); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); - #endif /* DEF_RDMA_VT_H */ -- cgit v1.2.3 From b036db83c0ec8d1e81df19410a494be4cfe0b186 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:23 -0800 Subject: IB/rdmavt: Add driver notification for new AH Drivers may need to do some work once an address handle has been created. Add a driver function for this purpose. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/ah.c | 7 +++++-- include/rdma/rdma_vt.h | 41 ++++++++++++++++++++++++--------------- 2 files changed, 30 insertions(+), 18 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 2519db98be6c..621afc305183 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -83,8 +83,8 @@ int rvt_check_ah(struct ib_device *ibdev, !(ah_attr->ah_flags & IB_AH_GRH)) return -EINVAL; } - if (rdi->driver_f.check_ah(ibdev, ah_attr)) - return -EINVAL; + if (rdi->driver_f.check_ah) + return rdi->driver_f.check_ah(ibdev, ah_attr); return 0; } EXPORT_SYMBOL(rvt_check_ah); @@ -123,6 +123,9 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, ah->attr = *ah_attr; atomic_set(&ah->refcount, 0); + if (dev->driver_f.notify_new_ah) + dev->driver_f.notify_new_ah(pd->device, ah_attr, ah); + return &ah->ibah; } diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index a3d6a5bd0c02..ef66d2b0ec37 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -467,9 +467,21 @@ struct rvt_driver_params { int nports; }; -/* - * Functions that drivers are required to support - */ +/* Protection domain */ +struct rvt_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + +/* Address handle */ +struct rvt_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; + atomic_t refcount; + u8 vl; + u8 log_pmtu; +}; + struct rvt_dev_info; struct rvt_driver_provided { /* @@ -478,23 +490,20 @@ struct rvt_driver_provided { * instead drivers are responsible for setting the correct callback for * this. */ + + /* -------------------*/ + /* Required functions */ + /* -------------------*/ int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); - int (*check_ah)(struct ib_device *, struct ib_ah_attr *); -}; -/* Protection domain */ -struct rvt_pd { - struct ib_pd ibpd; - int user; /* non-zero if created from user space */ -}; - -/* Address handle */ -struct rvt_ah { - struct ib_ah ibah; - struct ib_ah_attr attr; - atomic_t refcount; + /*--------------------*/ + /* Optional functions */ + /*--------------------*/ + int (*check_ah)(struct ib_device *, struct ib_ah_attr *); + void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, + struct rvt_ah *); }; struct rvt_dev_info { -- cgit v1.2.3 From b4e64397dabc946b83ffb1defa1215ede84c3b97 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:31 -0800 Subject: IB/rdmavt: Break rdma_vt main include header file up Until all functionality is moved over to rdmavt drivers still need to access a number of fields in data structures that are predominantly meant to be used by rdmavt. Once these rdmavt_.h header files are no longer being touched by drivers their content should be moved to rdmavt/.h. While here move a couple #defines over to more general IB verbs header files because they fit better. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/ah.c | 4 +- include/rdma/ib_verbs.h | 1 + include/rdma/rdma_vt.h | 309 +------------------------------------- include/rdma/rdmavt_mr.h | 130 ++++++++++++++++ include/rdma/rdmavt_qp.h | 262 ++++++++++++++++++++++++++++++++ 5 files changed, 398 insertions(+), 308 deletions(-) create mode 100644 include/rdma/rdmavt_mr.h create mode 100644 include/rdma/rdmavt_qp.h (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 621afc305183..c194d9d9bd25 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -78,8 +78,8 @@ int rvt_check_ah(struct ib_device *ibdev, if (link != IB_LINK_LAYER_ETHERNET) { if (ah_attr->dlid == 0) return -EINVAL; - if (ah_attr->dlid >= RVT_MULTICAST_LID_BASE && - ah_attr->dlid != RVT_PERMISSIVE_LID && + if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) && + ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) && !(ah_attr->ah_flags & IB_AH_GRH)) return -EINVAL; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c8fea4..d7d531cf00b7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -613,6 +613,7 @@ enum { }; #define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF) +#define IB_MULTICAST_LID_BASE cpu_to_be16(0xC000) enum ib_ah_flags { IB_AH_GRH = 1 diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index ef66d2b0ec37..79da8ee3e2b3 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,10 +55,9 @@ #include #include -#include "ib_verbs.h" - -#define RVT_MULTICAST_LID_BASE 0xC000 -#define RVT_PERMISSIVE_LID 0xFFFF +#include +#include +#include /* * For some of the IBTA objects there will likely be some @@ -70,297 +69,6 @@ #define RVT_FLAG_QP_INIT_DRIVER BIT(2) #define RVT_FLAG_CQ_INIT_DRIVER BIT(3) -/* - * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once - * drivers no longer need access to the MR directly. - */ - -/* - * A segment is a linear region of low physical memory. - * Used by the verbs layer. - */ -struct rvt_seg { - void *vaddr; - size_t length; -}; - -/* The number of rvt_segs that fit in a page. */ -#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) - -struct rvt_segarray { - struct rvt_seg segs[RVT_SEGSZ]; -}; - -struct rvt_mregion { - struct ib_pd *pd; /* shares refcnt of ibmr.pd */ - u64 user_base; /* User's address for this region */ - u64 iova; /* IB start address of this region */ - size_t length; - u32 lkey; - u32 offset; /* offset (bytes) to start of region */ - int access_flags; - u32 max_segs; /* number of rvt_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - u8 page_shift; /* 0 - non unform/non powerof2 sizes */ - u8 lkey_published; /* in global table */ - struct completion comp; /* complete when refcount goes to zero */ - atomic_t refcount; - struct rvt_segarray *map[0]; /* the segments */ -}; - -#define RVT_MAX_LKEY_TABLE_BITS 23 - -struct rvt_lkey_table { - spinlock_t lock; /* protect changes in this struct */ - u32 next; /* next unused index (speeds search) */ - u32 gen; /* generation count */ - u32 max; /* size of the table */ - struct rvt_mregion __rcu **table; -}; - -/* End Memmory Region */ - -/* - * Things needed for the Queue Pair definition. Like the MR stuff above the - * following should probably get moved to qp.h once drivers stop trying to make - * and manipulate thier own QPs. For the few instnaces where a driver may need - * to look into a queue pair there should be a pointer to a driver priavte data - * structure that they can look at. - */ - -/* - * These keep track of the copy progress within a memory region. - * Used by the verbs layer. - */ -struct rvt_sge { - struct rvt_mregion *mr; - void *vaddr; /* kernel virtual address of segment */ - u32 sge_length; /* length of the SGE */ - u32 length; /* remaining length of the segment */ - u16 m; /* current index: mr->map[m] */ - u16 n; /* current index: mr->map[m]->segs[n] */ -}; - -/* - * Send work request queue entry. - * The size of the sg_list is determined when the QP is created and stored - * in qp->s_max_sge. - */ -struct rvt_swqe { - union { - struct ib_send_wr wr; /* don't use wr.sg_list */ - struct ib_ud_wr ud_wr; - struct ib_reg_wr reg_wr; - struct ib_rdma_wr rdma_wr; - struct ib_atomic_wr atomic_wr; - }; - u32 psn; /* first packet sequence number */ - u32 lpsn; /* last packet sequence number */ - u32 ssn; /* send sequence number */ - u32 length; /* total length of data in sg_list */ - struct rvt_sge sg_list[0]; -}; - -/* - * Receive work request queue entry. - * The size of the sg_list is determined when the QP (or SRQ) is created - * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). - */ -struct rvt_rwqe { - u64 wr_id; - u8 num_sge; - struct ib_sge sg_list[0]; -}; - -/* - * This structure is used to contain the head pointer, tail pointer, - * and receive work queue entries as a single memory allocation so - * it can be mmap'ed into user space. - * Note that the wq array elements are variable size so you can't - * just index into the array to get the N'th element; - * use get_rwqe_ptr() instead. - */ -struct rvt_rwq { - u32 head; /* new work requests posted to the head */ - u32 tail; /* receives pull requests from here. */ - struct rvt_rwqe wq[0]; -}; - -struct rvt_rq { - struct rvt_rwq *wq; - u32 size; /* size of RWQE array */ - u8 max_sge; - /* protect changes in this struct */ - spinlock_t lock ____cacheline_aligned_in_smp; -}; - -/* - * This structure is used by rvt_mmap() to validate an offset - * when an mmap() request is made. The vm_area_struct then uses - * this as its vm_private_data. - */ -struct rvt_mmap_info { - struct list_head pending_mmaps; - struct ib_ucontext *context; - void *obj; - __u64 offset; - struct kref ref; - unsigned size; -}; - -#define RVT_MAX_RDMA_ATOMIC 16 - -/* - * This structure holds the information that the send tasklet needs - * to send a RDMA read response or atomic operation. - */ -struct rvt_ack_entry { - u8 opcode; - u8 sent; - u32 psn; - u32 lpsn; - union { - struct rvt_sge rdma_sge; - u64 atomic_data; - }; -}; - -struct rvt_sge_state { - struct rvt_sge *sg_list; /* next SGE to be used if any */ - struct rvt_sge sge; /* progress state for the current SGE */ - u32 total_len; - u8 num_sge; -}; - -/* - * Variables prefixed with s_ are for the requester (sender). - * Variables prefixed with r_ are for the responder (receiver). - * Variables prefixed with ack_ are for responder replies. - * - * Common variables are protected by both r_rq.lock and s_lock in that order - * which only happens in modify_qp() or changing the QP 'state'. - */ -struct rvt_qp { - struct ib_qp ibqp; - void *priv; /* Driver private data */ - /* read mostly fields above and below */ - struct ib_ah_attr remote_ah_attr; - struct ib_ah_attr alt_ah_attr; - struct rvt_qp __rcu *next; /* link list for QPN hash table */ - struct rvt_swqe *s_wq; /* send work queue */ - struct rvt_mmap_info *ip; - - unsigned long timeout_jiffies; /* computed from timeout */ - - enum ib_mtu path_mtu; - int srate_mbps; /* s_srate (below) converted to Mbit/s */ - u32 remote_qpn; - u32 pmtu; /* decoded from path_mtu */ - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - u32 s_ahgpsn; /* set to the psn in the copy of the header */ - - u8 state; /* QP state */ - u8 allowed_ops; /* high order bits of allowed opcodes */ - u8 qp_access_flags; - u8 alt_timeout; /* Alternate path timeout for this QP */ - u8 timeout; /* Timeout for this QP */ - u8 s_srate; - u8 s_mig_state; - u8 port_num; - u8 s_pkey_index; /* PKEY index to use */ - u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_draining; - - /* start of read/write fields */ - atomic_t refcount ____cacheline_aligned_in_smp; - wait_queue_head_t wait; - - struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] - ____cacheline_aligned_in_smp; - struct rvt_sge_state s_rdma_read_sge; - - spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ - unsigned long r_aflags; - u64 r_wr_id; /* ID for current receive WQE */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ - - u8 r_state; /* opcode of last packet received */ - u8 r_flags; - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - - struct list_head rspwait; /* link for waiting to respond */ - - struct rvt_sge_state r_sge; /* current receive data */ - struct rvt_rq r_rq; /* receive work queue */ - - spinlock_t s_lock ____cacheline_aligned_in_smp; - struct rvt_sge_state *s_cur_sge; - u32 s_flags; - struct rvt_swqe *s_wqe; - struct rvt_sge_state s_sge; /* current send request data */ - struct rvt_mregion *s_rdma_mr; - struct sdma_engine *s_sde; /* current sde */ - u32 s_cur_size; /* size of send packet in bytes */ - u32 s_len; /* total length of s_sge */ - u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ - u32 s_last_psn; /* last response PSN processed */ - u32 s_sending_psn; /* lowest PSN that is being sent */ - u32 s_sending_hpsn; /* highest PSN that is being sent */ - u32 s_psn; /* current packet sequence number */ - u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ - u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_acked; /* last un-ACK'ed entry */ - u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - u16 s_hdrwords; /* size of s_hdr in 32 bit words */ - u16 s_rdma_ack_cnt; - s8 s_ahgidx; - u8 s_state; /* opcode of last packet sent */ - u8 s_ack_state; /* opcode of packet to ACK */ - u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_nak_state; /* non-zero if NAK is pending */ - u8 s_retry; /* requester retry counter */ - u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ - u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - - struct rvt_sge_state s_ack_rdma_sge; - struct timer_list s_timer; - - /* - * This sge list MUST be last. Do not add anything below here. - */ - struct rvt_sge r_sg_list[0] /* verified SGEs */ - ____cacheline_aligned_in_smp; -}; - -struct rvt_srq { - struct ib_srq ibsrq; - struct rvt_rq rq; - struct rvt_mmap_info *ip; - /* send signal when number of RWQEs < limit */ - u32 limit; -}; - -/* End QP section */ - struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -554,17 +262,6 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) return container_of(ibdev, struct rvt_dev_info, ibdev); } -static inline void rvt_put_mr(struct rvt_mregion *mr) -{ - if (unlikely(atomic_dec_and_test(&mr->refcount))) - complete(&mr->comp); -} - -static inline void rvt_get_mr(struct rvt_mregion *mr) -{ - atomic_inc(&mr->refcount); -} - static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct rvt_srq, ibsrq); diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h new file mode 100644 index 000000000000..ea60476c6b6b --- /dev/null +++ b/include/rdma/rdmavt_mr.h @@ -0,0 +1,130 @@ +#ifndef DEF_RDMAVT_INCMR_H +#define DEF_RDMAVT_INCMR_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once + * drivers no longer need access to the MR directly. + */ + +/* + * A segment is a linear region of low physical memory. + * Used by the verbs layer. + */ +struct rvt_seg { + void *vaddr; + size_t length; +}; + +/* The number of rvt_segs that fit in a page. */ +#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) + +struct rvt_segarray { + struct rvt_seg segs[RVT_SEGSZ]; +}; + +struct rvt_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of rvt_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + atomic_t refcount; + struct rvt_segarray *map[0]; /* the segments */ +}; + +#define RVT_MAX_LKEY_TABLE_BITS 23 + +struct rvt_lkey_table { + spinlock_t lock; /* protect changes in this struct */ + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct rvt_mregion __rcu **table; +}; + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct rvt_sge { + struct rvt_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +struct rvt_sge_state { + struct rvt_sge *sg_list; /* next SGE to be used if any */ + struct rvt_sge sge; /* progress state for the current SGE */ + u32 total_len; + u8 num_sge; +}; + +static inline void rvt_put_mr(struct rvt_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + complete(&mr->comp); +} + +static inline void rvt_get_mr(struct rvt_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + +#endif /* DEF_RDMAVT_INCMRH */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h new file mode 100644 index 000000000000..f33fbb0b3824 --- /dev/null +++ b/include/rdma/rdmavt_qp.h @@ -0,0 +1,262 @@ +#ifndef DEF_RDMAVT_INCQP_H +#define DEF_RDMAVT_INCQP_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Send work request queue entry. + * The size of the sg_list is determined when the QP is created and stored + * in qp->s_max_sge. + */ +struct rvt_swqe { + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_reg_wr reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; + u32 psn; /* first packet sequence number */ + u32 lpsn; /* last packet sequence number */ + u32 ssn; /* send sequence number */ + u32 length; /* total length of data in sg_list */ + struct rvt_sge sg_list[0]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct rvt_rwqe { + u64 wr_id; + u8 num_sge; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct rvt_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct rvt_rwqe wq[0]; +}; + +struct rvt_rq { + struct rvt_rwq *wq; + u32 size; /* size of RWQE array */ + u8 max_sge; + /* protect changes in this struct */ + spinlock_t lock ____cacheline_aligned_in_smp; +}; + +/* + * This structure is used by rvt_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct rvt_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + unsigned size; +}; + +#define RVT_MAX_RDMA_ATOMIC 16 + +/* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct rvt_ack_entry { + u8 opcode; + u8 sent; + u32 psn; + u32 lpsn; + union { + struct rvt_sge rdma_sge; + u64 atomic_data; + }; +}; + +/* + * Variables prefixed with s_ are for the requester (sender). + * Variables prefixed with r_ are for the responder (receiver). + * Variables prefixed with ack_ are for responder replies. + * + * Common variables are protected by both r_rq.lock and s_lock in that order + * which only happens in modify_qp() or changing the QP 'state'. + */ +struct rvt_qp { + struct ib_qp ibqp; + void *priv; /* Driver private data */ + /* read mostly fields above and below */ + struct ib_ah_attr remote_ah_attr; + struct ib_ah_attr alt_ah_attr; + struct rvt_qp __rcu *next; /* link list for QPN hash table */ + struct rvt_swqe *s_wq; /* send work queue */ + struct rvt_mmap_info *ip; + + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + int srate_mbps; /* s_srate (below) converted to Mbit/s */ + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + u32 s_ahgpsn; /* set to the psn in the copy of the header */ + + u8 state; /* QP state */ + u8 allowed_ops; /* high order bits of allowed opcodes */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct rvt_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waiting to respond */ + + struct rvt_sge_state r_sge; /* current receive data */ + struct rvt_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; + struct rvt_sge_state *s_cur_sge; + u32 s_flags; + struct rvt_swqe *s_wqe; + struct rvt_sge_state s_sge; /* current send request data */ + struct rvt_mregion *s_rdma_mr; + struct sdma_engine *s_sde; /* current sde */ + u32 s_cur_size; /* size of send packet in bytes */ + u32 s_len; /* total length of s_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ + u32 s_next_psn; /* PSN for next request */ + u32 s_last_psn; /* last response PSN processed */ + u32 s_sending_psn; /* lowest PSN that is being sent */ + u32 s_sending_hpsn; /* highest PSN that is being sent */ + u32 s_psn; /* current packet sequence number */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ + u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u16 s_rdma_ack_cnt; + s8 s_ahgidx; + u8 s_state; /* opcode of last packet sent */ + u8 s_ack_state; /* opcode of packet to ACK */ + u8 s_nak_state; /* non-zero if NAK is pending */ + u8 r_nak_state; /* non-zero if NAK is pending */ + u8 s_retry; /* requester retry counter */ + u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + + struct rvt_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + + /* + * This sge list MUST be last. Do not add anything below here. + */ + struct rvt_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; +}; + +struct rvt_srq { + struct ib_srq ibsrq; + struct rvt_rq rq; + struct rvt_mmap_info *ip; + /* send signal when number of RWQEs < limit */ + u32 limit; +}; + +#endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 0acb0cc7ecc1e4860b056368566c0c2c254ae281 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:46 -0800 Subject: IB/rdmavt: Initialize and teardown of qpn table Add table init as well as teardown for handling qpn maps. Drivers can still provide this functionality by setting the QP_INIT_DRIVER bit. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 197 ++++++++++++++++++++++++++++++++++++++ drivers/infiniband/sw/rdmavt/qp.h | 2 + drivers/infiniband/sw/rdmavt/vt.c | 35 ++++--- include/rdma/rdma_vt.h | 9 ++ include/rdma/rdmavt_qp.h | 33 +++++++ 5 files changed, 263 insertions(+), 13 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 23a5f686e211..17dd6ab193fa 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -45,8 +45,205 @@ * */ +#include +#include +#include "vt.h" #include "qp.h" +static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) +{ + unsigned long page = get_zeroed_page(GFP_KERNEL); + + /* + * Free the page if someone raced with us installing it. + */ + + spin_lock(&qpt->lock); + if (map->page) + free_page(page); + else + map->page = (void *)page; + spin_unlock(&qpt->lock); +} + +/** + * init_qpn_table - initialize the QP number table for a device + * @qpt: the QPN table + */ +static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) +{ + u32 offset, i; + struct rvt_qpn_map *map; + int ret = 0; + + if (!(rdi->dparms.qpn_res_end > rdi->dparms.qpn_res_start)) + return -EINVAL; + + spin_lock_init(&qpt->lock); + + qpt->last = rdi->dparms.qpn_start; + qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift; + + /* + * Drivers may want some QPs beyond what we need for verbs let them use + * our qpn table. No need for two. Lets go ahead and mark the bitmaps + * for those. The reserved range must be *after* the range which verbs + * will pick from. + */ + + /* Figure out number of bit maps needed before reserved range */ + qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE; + + /* This should always be zero */ + offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK; + + /* Starting with the first reserved bit map */ + map = &qpt->map[qpt->nmaps]; + + rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n", + rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); + for (i = rdi->dparms.qpn_res_start; i < rdi->dparms.qpn_res_end; i++) { + if (!map->page) { + get_map_page(qpt, map); + if (!map->page) { + ret = -ENOMEM; + break; + } + } + set_bit(offset, map->page); + offset++; + if (offset == RVT_BITS_PER_PAGE) { + /* next page */ + qpt->nmaps++; + map++; + offset = 0; + } + } + return ret; +} + +/** + * free_qpn_table - free the QP number table for a device + * @qpt: the QPN table + */ +static void free_qpn_table(struct rvt_qpn_table *qpt) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(qpt->map); i++) + free_page((unsigned long)qpt->map[i].page); +} + +int rvt_driver_qp_init(struct rvt_dev_info *rdi) +{ + int i; + int ret = -ENOMEM; + + if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) { + rvt_pr_info(rdi, "Driver is doing QP init.\n"); + return 0; + } + + if (!rdi->dparms.qp_table_size) + return -EINVAL; + + /* + * If driver is not doing any QP allocation then make sure it is + * providing the necessary QP functions. + */ + if (!rdi->driver_f.free_all_qps) + return -EINVAL; + + /* allocate parent object */ + rdi->qp_dev = kzalloc(sizeof(*rdi->qp_dev), GFP_KERNEL); + if (!rdi->qp_dev) + return -ENOMEM; + + /* allocate hash table */ + rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size; + rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size); + rdi->qp_dev->qp_table = + kmalloc(rdi->qp_dev->qp_table_size * + sizeof(*rdi->qp_dev->qp_table), + GFP_KERNEL); + if (!rdi->qp_dev->qp_table) + goto no_qp_table; + + for (i = 0; i < rdi->qp_dev->qp_table_size; i++) + RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL); + + spin_lock_init(&rdi->qp_dev->qpt_lock); + + /* initialize qpn map */ + if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table)) + goto fail_table; + + return ret; + +fail_table: + kfree(rdi->qp_dev->qp_table); + free_qpn_table(&rdi->qp_dev->qpn_table); + +no_qp_table: + kfree(rdi->qp_dev); + + return ret; +} + +/** + * free_all_qps - check for QPs still in use + * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. + */ +static unsigned free_all_qps(struct rvt_dev_info *rdi) +{ + unsigned long flags; + struct rvt_qp *qp; + unsigned n, qp_inuse = 0; + spinlock_t *ql; /* work around too long line below */ + + rdi->driver_f.free_all_qps(rdi); + + if (!rdi->qp_dev) + return 0; + + ql = &rdi->qp_dev->qpt_lock; + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + for (n = 0; n < rdi->qp_dev->qp_table_size; n++) { + qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n], + lockdep_is_held(ql)); + RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL); + qp = rcu_dereference_protected(qp->next, + lockdep_is_held(ql)); + while (qp) { + qp_inuse++; + qp = rcu_dereference_protected(qp->next, + lockdep_is_held(ql)); + } + } + spin_unlock_irqrestore(ql, flags); + synchronize_rcu(); + return qp_inuse; +} + +void rvt_qp_exit(struct rvt_dev_info *rdi) +{ + u32 qps_inuse = free_all_qps(rdi); + + qps_inuse = free_all_qps(rdi); + if (qps_inuse) + rvt_pr_err(rdi, "QP memory leak! %u still in use\n", + qps_inuse); + if (!rdi->qp_dev) + return; + + kfree(rdi->qp_dev->qp_table); + free_qpn_table(&rdi->qp_dev->qpn_table); + kfree(rdi->qp_dev); +} + /** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 9c2999db528b..f438809e18e2 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -50,6 +50,8 @@ #include +int rvt_driver_qp_init(struct rvt_dev_info *rdi); +void rvt_qp_exit(struct rvt_dev_info *rdi); struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 44de2807fc9e..f2d995d2f62c 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -223,9 +223,23 @@ int rvt_register_device(struct rvt_dev_info *rdi) (!rdi->driver_f.get_card_name) || (!rdi->driver_f.get_pci_dev) || (!rdi->driver_f.check_ah)) { + pr_err("Driver not supporting req func\n"); return -EINVAL; } + if (!rdi->dparms.nports) { + rvt_pr_err(rdi, "Driver says it has no ports.\n"); + return -EINVAL; + } + + rdi->ports = kcalloc(rdi->dparms.nports, + sizeof(struct rvt_ibport **), + GFP_KERNEL); + if (!rdi->ports) { + rvt_pr_err(rdi, "Could not allocate port mem.\n"); + return -ENOMEM; + } + /* Once we get past here we can use the rvt_pr macros */ /* Dev Ops */ @@ -240,6 +254,12 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, get_port_immutable); /* Queue Pairs */ + ret = rvt_driver_qp_init(rdi); + if (ret) { + pr_err("Error in driver QP init.\n"); + return -EINVAL; + } + CHECK_DRIVER_OVERRIDE(rdi, create_qp); CHECK_DRIVER_OVERRIDE(rdi, modify_qp); CHECK_DRIVER_OVERRIDE(rdi, destroy_qp); @@ -300,19 +320,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; - if (rdi->dparms.nports) { - rdi->ports = kcalloc(rdi->dparms.nports, - sizeof(struct rvt_ibport **), - GFP_KERNEL); - if (!rdi->ports) { - rvt_pr_err(rdi, "Could not allocate port mem.\n"); - ret = -ENOMEM; - goto bail_mr; - } - } else { - rvt_pr_warn(rdi, "Driver says it has no ports.\n"); - } - /* We are now good to announce we exist */ ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); if (ret) { @@ -327,6 +334,8 @@ bail_mr: rvt_mr_exit(rdi); bail_no_mr: + rvt_qp_exit(rdi); + return ret; } EXPORT_SYMBOL(rvt_register_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 79da8ee3e2b3..950c2910e3f4 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -172,7 +172,13 @@ struct rvt_driver_params { * For instance special module parameters. Goes here. */ unsigned int lkey_table_size; + unsigned int qp_table_size; + int qpn_start; + int qpn_inc; + int qpn_res_start; + int qpn_res_end; int nports; + u8 qos_shift; }; /* Protection domain */ @@ -205,6 +211,7 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); + void (*free_all_qps)(struct rvt_dev_info *rdi); /*--------------------*/ /* Optional functions */ @@ -245,6 +252,8 @@ struct rvt_dev_info { int flags; struct rvt_ibport **ports; + + struct rvt_qp_ibdev *qp_dev; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f33fbb0b3824..e6a7d17dcd30 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -259,4 +259,37 @@ struct rvt_srq { u32 limit; }; +#define RVT_QPN_MAX BIT(24) +#define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) +#define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) +#define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) + +/* + * QPN-map pages start out as NULL, they get allocated upon + * first use and are never deallocated. This way, + * large bitmaps are not allocated unless large numbers of QPs are used. + */ +struct rvt_qpn_map { + void *page; +}; + +struct rvt_qpn_table { + spinlock_t lock; /* protect changes to the qp table */ + unsigned flags; /* flags for QP0/1 allocated for each port */ + u32 last; /* last QP number allocated */ + u32 nmaps; /* size of the map table */ + u16 limit; + u8 incr; + /* bit map of free QP numbers other than 0/1 */ + struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES]; +}; + +struct rvt_qp_ibdev { + u32 qp_table_size; + u32 qp_table_bits; + struct rvt_qp __rcu **qp_table; + spinlock_t qpt_lock; /* qptable lock */ + struct rvt_qpn_table qpn_table; +}; + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 822514d75a9647662fff39d728c1f4636b75d904 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:57 -0800 Subject: IB/rdmavt: Add mmap related functions The mmap data structure was moved in a previous commit. This patch now pulls in the related functions. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mmap.c | 140 +++++++++++++++++++++++++++++++++++- drivers/infiniband/sw/rdmavt/mmap.h | 2 +- drivers/infiniband/sw/rdmavt/vt.c | 1 + include/rdma/rdma_vt.h | 15 ++++ 4 files changed, 156 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index d09f3a05a747..fc30ff7f24ea 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -46,8 +46,61 @@ */ #include +#include +#include +#include #include "mmap.h" +void rvt_mmap_init(struct rvt_dev_info *rdi) +{ + INIT_LIST_HEAD(&rdi->pending_mmaps); + spin_lock_init(&rdi->pending_lock); + rdi->mmap_offset = PAGE_SIZE; + spin_lock_init(&rdi->mmap_offset_lock); +} + +/** + * rvt_release_mmap_info - free mmap info structure + * @ref: a pointer to the kref within struct rvt_mmap_info + */ +void rvt_release_mmap_info(struct kref *ref) +{ + struct rvt_mmap_info *ip = + container_of(ref, struct rvt_mmap_info, ref); + struct rvt_dev_info *rdi = ib_to_rvt(ip->context->device); + + spin_lock_irq(&rdi->pending_lock); + list_del(&ip->pending_mmaps); + spin_unlock_irq(&rdi->pending_lock); + + vfree(ip->obj); + kfree(ip); +} +EXPORT_SYMBOL(rvt_release_mmap_info); + +/* + * open and close keep track of how many times the CQ is mapped, + * to avoid releasing it. + */ +static void rvt_vma_open(struct vm_area_struct *vma) +{ + struct rvt_mmap_info *ip = vma->vm_private_data; + + kref_get(&ip->ref); +} + +static void rvt_vma_close(struct vm_area_struct *vma) +{ + struct rvt_mmap_info *ip = vma->vm_private_data; + + kref_put(&ip->ref, rvt_release_mmap_info); +} + +static const struct vm_operations_struct rvt_vm_ops = { + .open = rvt_vma_open, + .close = rvt_vma_close, +}; + /** * rvt_mmap - create a new mmap region * @context: the IB user context of the process making the mmap() call @@ -56,5 +109,90 @@ */ int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { - return -EOPNOTSUPP; + struct rvt_dev_info *rdi = ib_to_rvt(context->device); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + struct rvt_mmap_info *ip, *pp; + int ret = -EINVAL; + + /* + * Search the device's list of objects waiting for a mmap call. + * Normally, this list is very short since a call to create a + * CQ, QP, or SRQ is soon followed by a call to mmap(). + */ + spin_lock_irq(&rdi->pending_lock); + list_for_each_entry_safe(ip, pp, &rdi->pending_mmaps, + pending_mmaps) { + /* Only the creator is allowed to mmap the object */ + if (context != ip->context || (__u64)offset != ip->offset) + continue; + /* Don't allow a mmap larger than the object. */ + if (size > ip->size) + break; + + list_del_init(&ip->pending_mmaps); + spin_unlock_irq(&rdi->pending_lock); + + ret = remap_vmalloc_range(vma, ip->obj, 0); + if (ret) + goto done; + vma->vm_ops = &rvt_vm_ops; + vma->vm_private_data = ip; + rvt_vma_open(vma); + goto done; + } + spin_unlock_irq(&rdi->pending_lock); +done: + return ret; +} +EXPORT_SYMBOL(rvt_mmap); + +/* + * Allocate information for hfi1_mmap + */ +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, + u32 size, + struct ib_ucontext *context, + void *obj) +{ + struct rvt_mmap_info *ip; + + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) + return ip; + + size = PAGE_ALIGN(size); + + spin_lock_irq(&rdi->mmap_offset_lock); + if (rdi->mmap_offset == 0) + rdi->mmap_offset = PAGE_SIZE; + ip->offset = rdi->mmap_offset; + rdi->mmap_offset += size; + spin_unlock_irq(&rdi->mmap_offset_lock); + + INIT_LIST_HEAD(&ip->pending_mmaps); + ip->size = size; + ip->context = context; + ip->obj = obj; + kref_init(&ip->ref); + + return ip; +} +EXPORT_SYMBOL(rvt_create_mmap_info); + +void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, + u32 size, void *obj) +{ + size = PAGE_ALIGN(size); + + spin_lock_irq(&rdi->mmap_offset_lock); + if (rdi->mmap_offset == 0) + rdi->mmap_offset = PAGE_SIZE; + ip->offset = rdi->mmap_offset; + rdi->mmap_offset += size; + spin_unlock_irq(&rdi->mmap_offset_lock); + + ip->size = size; + ip->obj = obj; } +EXPORT_SYMBOL(rvt_update_mmap_info); diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h index 94f6377cef3c..3513e25a8491 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.h +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -50,6 +50,6 @@ #include -int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +void rvt_mmap_init(struct rvt_dev_info *rdi); #endif /* DEF_RDMAVTMMAP_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index f2d995d2f62c..ab4105a8d120 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -241,6 +241,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) } /* Once we get past here we can use the rvt_pr macros */ + rvt_mmap_init(rdi); /* Dev Ops */ CHECK_DRIVER_OVERRIDE(rdi, query_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 950c2910e3f4..fd25d2309ee3 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -254,6 +254,12 @@ struct rvt_dev_info { struct rvt_ibport **ports; struct rvt_qp_ibdev *qp_dev; + + /* memory maps */ + struct list_head pending_mmaps; + spinlock_t mmap_offset_lock; /* protect mmap_offset */ + u32 mmap_offset; + spinlock_t pending_lock; /* protect pending mmap list */ }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) @@ -285,4 +291,13 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); +int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +void rvt_release_mmap_info(struct kref *ref); +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, + u32 size, + struct ib_ucontext *context, + void *obj); +void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, + u32 size, void *obj); + #endif /* DEF_RDMA_VT_H */ -- cgit v1.2.3 From 38ce2c6f3ae8dda0ee42dc8474759ff949994bea Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:05:12 -0800 Subject: IB/rdmavt: Add pkey support Add pkey table in rdi per port data structure. Also bring in related pkey functions. Drivers will still be responsible for allocating and maintaining the pkey table. However they need to tell rdmavt where to find the pkey table. We can not move the pkey table up into rdmavt because drivers need to manipulate this long before registering with it. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 46 +++++++++++++++++++++++++-------------- include/rdma/rdma_vt.h | 38 ++++++++++++++++++++++++++++---- 2 files changed, 64 insertions(+), 20 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index ab4105a8d120..18b5f43e6824 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -154,6 +154,17 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, * lock, if a stale value is read and sent to the user so be it there is * no way to protect against that anyway. */ + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + int port_index; + + if (index >= rvt_get_npkeys(rdi)) + return -EINVAL; + + port_index = port - 1; /* IB ports start at 1 our array at 0 */ + if ((port_index < 0) || (port_index >= rdi->dparms.nports)) + return -EINVAL; + + *pkey = rvt_get_pkey(rdi, port_index, index); return 0; } @@ -227,19 +238,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) return -EINVAL; } - if (!rdi->dparms.nports) { - rvt_pr_err(rdi, "Driver says it has no ports.\n"); - return -EINVAL; - } - - rdi->ports = kcalloc(rdi->dparms.nports, - sizeof(struct rvt_ibport **), - GFP_KERNEL); - if (!rdi->ports) { - rvt_pr_err(rdi, "Could not allocate port mem.\n"); - return -ENOMEM; - } - /* Once we get past here we can use the rvt_pr macros */ rvt_mmap_init(rdi); @@ -355,9 +353,25 @@ EXPORT_SYMBOL(rvt_unregister_device); * Keep track of a list of ports. No need to have a detach port. * They persist until the driver goes away. */ -void rvt_attach_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum) +int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int portnum, u16 *pkey_table) { + if (!rdi->dparms.nports) { + rvt_pr_err(rdi, "Driver says it has no ports.\n"); + return -EINVAL; + } + + rdi->ports = kcalloc(rdi->dparms.nports, + sizeof(struct rvt_ibport **), + GFP_KERNEL); + if (!rdi->ports) { + rvt_pr_err(rdi, "Could not allocate port mem.\n"); + return -ENOMEM; + } + rdi->ports[portnum] = port; + rdi->ports[portnum]->pkey_table = pkey_table; + + return 0; } -EXPORT_SYMBOL(rvt_attach_port); +EXPORT_SYMBOL(rvt_init_port); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index fd25d2309ee3..3a78f20cbf2d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -69,6 +69,8 @@ #define RVT_FLAG_QP_INIT_DRIVER BIT(2) #define RVT_FLAG_CQ_INIT_DRIVER BIT(3) +#define RVT_MAX_PKEY_VALUES 16 + struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -125,6 +127,14 @@ struct rvt_ibport { void *priv; /* driver private data */ + /* + * The pkey table is allocated and maintained by the driver. Drivers + * need to have access to this before registering with rdmav. However + * rdmavt will need access to it so drivers need to proviee this during + * the attach port API call. + */ + u16 *pkey_table; + /* TODO: Move sm_ah and smi_ah into here as well*/ }; @@ -178,6 +188,7 @@ struct rvt_driver_params { int qpn_res_start; int qpn_res_end; int nports; + int npkeys; u8 qos_shift; }; @@ -238,8 +249,6 @@ struct rvt_dev_info { struct rvt_mregion __rcu *dma_mr; struct rvt_lkey_table lkey_table; - /* PKey Table goes here */ - /* Driver specific helper functions */ struct rvt_driver_provided driver_f; @@ -282,11 +291,32 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) return container_of(ibsrq, struct rvt_srq, ibsrq); } +static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) +{ + /* + * All ports have same number of pkeys. + */ + return rdi->dparms.npkeys; +} + +/* + * Return the indexed PKEY from the port PKEY table. + */ +static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, + int port_index, + unsigned index) +{ + if (index >= rvt_get_npkeys(rdi)) + return 0; + else + return rdi->ports[port_index]->pkey_table[index]; +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -void rvt_attach_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum); +int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int portnum, u16 *pkey_table); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, -- cgit v1.2.3 From 050eb7fbe0ff2bcd95833ff180337116d5907483 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:50:11 -0800 Subject: IB/rdmavt: Add R and S flags for queue pairs Use the flags originally provided for hfi1 in the rdmavt driver. These will be made available to drivers in the qp header file. Reviewed-by: Harish Chegondi Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index e6a7d17dcd30..1aa8b5b40f9f 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -48,6 +48,86 @@ * */ +#include +/* + * Atomic bit definitions for r_aflags. + */ +#define RVT_R_WRID_VALID 0 +#define RVT_R_REWIND_SGE 1 + +/* + * Bit definitions for r_flags. + */ +#define RVT_R_REUSE_SGE 0x01 +#define RVT_R_RDMAR_SEQ 0x02 +#define RVT_R_RSP_NAK 0x04 +#define RVT_R_RSP_SEND 0x08 +#define RVT_R_COMM_EST 0x10 + +/* + * Bit definitions for s_flags. + * + * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled + * RVT_S_BUSY - send tasklet is processing the QP + * RVT_S_TIMER - the RC retry timer is active + * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics + * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs + * before processing the next SWQE + * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete + * before processing the next SWQE + * RVT_S_WAIT_RNR - waiting for RNR timeout + * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating + * next send completion entry not via send DMA + * RVT_S_WAIT_PIO - waiting for a send buffer to be available + * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available + * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available + * RVT_S_WAIT_KMEM - waiting for kernel memory to be available + * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue + * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests + * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK + * RVT_S_ECN - a BECN was queued to the send engine + */ +#define RVT_S_SIGNAL_REQ_WR 0x0001 +#define RVT_S_BUSY 0x0002 +#define RVT_S_TIMER 0x0004 +#define RVT_S_RESP_PENDING 0x0008 +#define RVT_S_ACK_PENDING 0x0010 +#define RVT_S_WAIT_FENCE 0x0020 +#define RVT_S_WAIT_RDMAR 0x0040 +#define RVT_S_WAIT_RNR 0x0080 +#define RVT_S_WAIT_SSN_CREDIT 0x0100 +#define RVT_S_WAIT_DMA 0x0200 +#define RVT_S_WAIT_PIO 0x0400 +#define RVT_S_WAIT_TX 0x0800 +#define RVT_S_WAIT_DMA_DESC 0x1000 +#define RVT_S_WAIT_KMEM 0x2000 +#define RVT_S_WAIT_PSN 0x4000 +#define RVT_S_WAIT_ACK 0x8000 +#define RVT_S_SEND_ONE 0x10000 +#define RVT_S_UNLIMITED_CREDIT 0x20000 +#define RVT_S_AHG_VALID 0x40000 +#define RVT_S_AHG_CLEAR 0x80000 +#define RVT_S_ECN 0x100000 + +/* + * Wait flags that would prevent any packet type from being sent. + */ +#define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ + RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) + +/* + * Wait flags that would prevent send work requests from making progress. + */ +#define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \ + RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \ + RVT_S_WAIT_PSN | RVT_S_WAIT_ACK) + +#define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) + +/* Number of bits to pay attention to in the opcode for checking qp type */ +#define RVT_OPCODE_QP_MASK 0xE0 + /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored -- cgit v1.2.3 From 515667f8f8b48bdbcad61c5681291cb970e36ac3 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:50:17 -0800 Subject: IB/rdmavt: Add create queue pair functionality Add create queue pair verbs call as well as supporting functions. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 425 ++++++++++++++++++++++++++++++++++++-- drivers/infiniband/sw/rdmavt/vt.c | 1 + include/rdma/rdma_vt.h | 10 +- 3 files changed, 413 insertions(+), 23 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 17dd6ab193fa..7d1f02eb2779 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -47,8 +47,11 @@ #include #include -#include "vt.h" +#include +#include +#include #include "qp.h" +#include "vt.h" static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) { @@ -151,7 +154,10 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) * If driver is not doing any QP allocation then make sure it is * providing the necessary QP functions. */ - if (!rdi->driver_f.free_all_qps) + if (!rdi->driver_f.free_all_qps || + !rdi->driver_f.qp_priv_alloc || + !rdi->driver_f.qp_priv_free || + !rdi->driver_f.notify_qp_reset) return -EINVAL; /* allocate parent object */ @@ -178,7 +184,9 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table)) goto fail_table; - return ret; + spin_lock_init(&rdi->n_qps_lock); + + return 0; fail_table: kfree(rdi->qp_dev->qp_table); @@ -197,31 +205,29 @@ no_qp_table: * There should not be any QPs still in use. * Free memory for table. */ -static unsigned free_all_qps(struct rvt_dev_info *rdi) +static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) { unsigned long flags; struct rvt_qp *qp; unsigned n, qp_inuse = 0; spinlock_t *ql; /* work around too long line below */ - rdi->driver_f.free_all_qps(rdi); + if (rdi->driver_f.free_all_qps) + qp_inuse = rdi->driver_f.free_all_qps(rdi); if (!rdi->qp_dev) - return 0; + return qp_inuse; ql = &rdi->qp_dev->qpt_lock; - spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + spin_lock_irqsave(ql, flags); for (n = 0; n < rdi->qp_dev->qp_table_size; n++) { qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n], lockdep_is_held(ql)); RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL); - qp = rcu_dereference_protected(qp->next, - lockdep_is_held(ql)); - while (qp) { + + for (; qp; qp = rcu_dereference_protected(qp->next, + lockdep_is_held(ql))) qp_inuse++; - qp = rcu_dereference_protected(qp->next, - lockdep_is_held(ql)); - } } spin_unlock_irqrestore(ql, flags); synchronize_rcu(); @@ -230,26 +236,190 @@ static unsigned free_all_qps(struct rvt_dev_info *rdi) void rvt_qp_exit(struct rvt_dev_info *rdi) { - u32 qps_inuse = free_all_qps(rdi); + u32 qps_inuse = rvt_free_all_qps(rdi); - qps_inuse = free_all_qps(rdi); if (qps_inuse) rvt_pr_err(rdi, "QP memory leak! %u still in use\n", qps_inuse); if (!rdi->qp_dev) return; + if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) + return; /* driver did the qp init so nothing else to do */ + kfree(rdi->qp_dev->qp_table); free_qpn_table(&rdi->qp_dev->qpn_table); kfree(rdi->qp_dev); } +static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, + struct rvt_qpn_map *map, unsigned off) +{ + return (map - qpt->map) * RVT_BITS_PER_PAGE + off; +} + +/* + * Allocate the next available QPN or + * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. + */ +static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port) +{ + u32 i, offset, max_scan, qpn; + struct rvt_qpn_map *map; + u32 ret; + + if (rdi->driver_f.alloc_qpn) + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port); + + if (type == IB_QPT_SMI || type == IB_QPT_GSI) { + unsigned n; + + ret = type == IB_QPT_GSI; + n = 1 << (ret + 2 * (port - 1)); + spin_lock(&qpt->lock); + if (qpt->flags & n) + ret = -EINVAL; + else + qpt->flags |= n; + spin_unlock(&qpt->lock); + goto bail; + } + + qpn = qpt->last + qpt->incr; + if (qpn >= RVT_QPN_MAX) + qpn = qpt->incr | ((qpt->last & 1) ^ 1); + /* offset carries bit 0 */ + offset = qpn & RVT_BITS_PER_PAGE_MASK; + map = &qpt->map[qpn / RVT_BITS_PER_PAGE]; + max_scan = qpt->nmaps - !offset; + for (i = 0;;) { + if (unlikely(!map->page)) { + get_map_page(qpt, map); + if (unlikely(!map->page)) + break; + } + do { + if (!test_and_set_bit(offset, map->page)) { + qpt->last = qpn; + ret = qpn; + goto bail; + } + offset += qpt->incr; + /* + * This qpn might be bogus if offset >= BITS_PER_PAGE. + * That is OK. It gets re-assigned below + */ + qpn = mk_qpn(qpt, map, offset); + } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX); + /* + * In order to keep the number of pages allocated to a + * minimum, we scan the all existing pages before increasing + * the size of the bitmap table. + */ + if (++i > max_scan) { + if (qpt->nmaps == RVT_QPNMAP_ENTRIES) + break; + map = &qpt->map[qpt->nmaps++]; + /* start at incr with current bit 0 */ + offset = qpt->incr | (offset & 1); + } else if (map < &qpt->map[qpt->nmaps]) { + ++map; + /* start at incr with current bit 0 */ + offset = qpt->incr | (offset & 1); + } else { + map = &qpt->map[0]; + /* wrap to first map page, invert bit 0 */ + offset = qpt->incr | ((offset & 1) ^ 1); + } + /* there can be no bits at shift and below */ + WARN_ON(offset & (rdi->dparms.qos_shift - 1)); + qpn = mk_qpn(qpt, map, offset); + } + + ret = -ENOMEM; + +bail: + return ret; +} + +static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) +{ + struct rvt_qpn_map *map; + + map = qpt->map + qpn / RVT_BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); +} + +/** + * reset_qp - initialize the QP state to the reset state + * @qp: the QP to reset + * @type: the QP type + */ +static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) +{ + qp->remote_qpn = 0; + qp->qkey = 0; + qp->qp_access_flags = 0; + + /* + * Let driver do anything it needs to for a new/reset qp + */ + rdi->driver_f.notify_qp_reset(qp); + + qp->s_flags &= RVT_S_SIGNAL_REQ_WR; + qp->s_hdrwords = 0; + qp->s_wqe = NULL; + qp->s_draining = 0; + qp->s_next_psn = 0; + qp->s_last_psn = 0; + qp->s_sending_psn = 0; + qp->s_sending_hpsn = 0; + qp->s_psn = 0; + qp->r_psn = 0; + qp->r_msn = 0; + if (type == IB_QPT_RC) { + qp->s_state = IB_OPCODE_RC_SEND_LAST; + qp->r_state = IB_OPCODE_RC_SEND_LAST; + } else { + qp->s_state = IB_OPCODE_UC_SEND_LAST; + qp->r_state = IB_OPCODE_UC_SEND_LAST; + } + qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; + qp->r_nak_state = 0; + qp->r_aflags = 0; + qp->r_flags = 0; + qp->s_head = 0; + qp->s_tail = 0; + qp->s_cur = 0; + qp->s_acked = 0; + qp->s_last = 0; + qp->s_ssn = 1; + qp->s_lsn = 0; + qp->s_mig_state = IB_MIG_MIGRATED; + memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + qp->r_head_ack_queue = 0; + qp->s_tail_ack_queue = 0; + qp->s_num_rd_atomic = 0; + if (qp->r_rq.wq) { + qp->r_rq.wq->head = 0; + qp->r_rq.wq->tail = 0; + } + qp->r_sge.num_sge = 0; +} + /** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair * @udata: user data for libibverbs.so * + * Queue pair creation is mostly an rvt issue. However, drivers have their own + * unique idea of what queue pair numbers mean. For instance there is a reserved + * range for PSM. + * * Returns the queue pair on success, otherwise returns an errno. * * Called by the ib_create_qp() core verbs function. @@ -258,15 +428,226 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { + struct rvt_qp *qp; + int err; + struct rvt_swqe *swq = NULL; + size_t sz; + size_t sg_list_sz; + struct ib_qp *ret = ERR_PTR(-ENOMEM); + struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); + void *priv = NULL; + + if (!rdi) + return ERR_PTR(-EINVAL); + + if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || + init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || + init_attr->create_flags) + return ERR_PTR(-EINVAL); + + /* Check receive queue parameters if no SRQ is specified. */ + if (!init_attr->srq) { + if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || + init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) + return ERR_PTR(-EINVAL); + + if (init_attr->cap.max_send_sge + + init_attr->cap.max_send_wr + + init_attr->cap.max_recv_sge + + init_attr->cap.max_recv_wr == 0) + return ERR_PTR(-EINVAL); + } + + switch (init_attr->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + if (init_attr->port_num == 0 || + init_attr->port_num > ibpd->device->phys_port_cnt) + return ERR_PTR(-EINVAL); + case IB_QPT_UC: + case IB_QPT_RC: + case IB_QPT_UD: + sz = sizeof(struct rvt_sge) * + init_attr->cap.max_send_sge + + sizeof(struct rvt_swqe); + swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + if (!swq) + return ERR_PTR(-ENOMEM); + + sz = sizeof(*qp); + sg_list_sz = 0; + if (init_attr->srq) { + struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); + + if (srq->rq.max_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (srq->rq.max_sge - 1); + } else if (init_attr->cap.max_recv_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (init_attr->cap.max_recv_sge - 1); + qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + if (!qp) + goto bail_swq; + + RCU_INIT_POINTER(qp->next, NULL); + + /* + * Driver needs to set up it's private QP structure and do any + * initialization that is needed. + */ + priv = rdi->driver_f.qp_priv_alloc(rdi, qp); + if (!priv) + goto bail_qp; + qp->priv = priv; + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + if (init_attr->srq) { + sz = 0; + } else { + qp->r_rq.size = init_attr->cap.max_recv_wr + 1; + qp->r_rq.max_sge = init_attr->cap.max_recv_sge; + sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + + sizeof(struct rvt_rwqe); + qp->r_rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + + qp->r_rq.size * sz); + if (!qp->r_rq.wq) + goto bail_driver_priv; + } + + /* + * ib_create_qp() will initialize qp->ibqp + * except for qp->ibqp.qp_num. + */ + spin_lock_init(&qp->r_lock); + spin_lock_init(&qp->s_lock); + spin_lock_init(&qp->r_rq.lock); + atomic_set(&qp->refcount, 0); + init_waitqueue_head(&qp->wait); + init_timer(&qp->s_timer); + qp->s_timer.data = (unsigned long)qp; + INIT_LIST_HEAD(&qp->rspwait); + qp->state = IB_QPS_RESET; + qp->s_wq = swq; + qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_max_sge = init_attr->cap.max_send_sge; + if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) + qp->s_flags = RVT_S_SIGNAL_REQ_WR; + + err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, + init_attr->qp_type, + init_attr->port_num); + if (err < 0) { + ret = ERR_PTR(err); + goto bail_rq_wq; + } + qp->ibqp.qp_num = err; + qp->port_num = init_attr->port_num; + reset_qp(rdi, qp, init_attr->qp_type); + break; + + default: + /* Don't support raw QPs */ + return ERR_PTR(-EINVAL); + } + + init_attr->cap.max_inline_data = 0; + /* - * Queue pair creation is mostly an rvt issue. However, drivers have - * their own unique idea of what queue pare numbers mean. For instance - * there is a reserved range for PSM. - * - * VI-DRIVER-API: make_qpn() - * Returns a valid QPN for verbs to use + * Return the address of the RWQ as the offset to mmap. + * See hfi1_mmap() for details. */ - return ERR_PTR(-EOPNOTSUPP); + if (udata && udata->outlen >= sizeof(__u64)) { + if (!qp->r_rq.wq) { + __u64 offset = 0; + + err = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_qpn; + } + } else { + u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; + + qp->ip = rvt_create_mmap_info(rdi, s, + ibpd->uobject->context, + qp->r_rq.wq); + if (!qp->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_qpn; + } + + err = ib_copy_to_udata(udata, &qp->ip->offset, + sizeof(qp->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } + } + + spin_lock(&rdi->n_qps_lock); + if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) { + spin_unlock(&rdi->n_qps_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + rdi->n_qps_allocated++; + spin_unlock(&rdi->n_qps_lock); + + if (qp->ip) { + spin_lock_irq(&rdi->pending_lock); + list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps); + spin_unlock_irq(&rdi->pending_lock); + } + + ret = &qp->ibqp; + + /* + * We have our QP and its good, now keep track of what types of opcodes + * can be processed on this QP. We do this by keeping track of what the + * 3 high order bits of the opcode are. + */ + switch (init_attr->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_UD: + qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK; + break; + case IB_QPT_RC: + qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK; + break; + case IB_QPT_UC: + qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK; + break; + default: + ret = ERR_PTR(-EINVAL); + goto bail_ip; + } + + return ret; + +bail_ip: + kref_put(&qp->ip->ref, rvt_release_mmap_info); + +bail_qpn: + free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); + +bail_rq_wq: + vfree(qp->r_rq.wq); + +bail_driver_priv: + rdi->driver_f.qp_priv_free(rdi, qp); + +bail_qp: + kfree(qp); + +bail_swq: + vfree(swq); + + return ret; } /** diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index df2df361c342..e75eb3d2f8a2 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -362,6 +362,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) ib_unregister_device(&rdi->ibdev); rvt_mr_exit(rdi); + rvt_qp_exit(rdi); } EXPORT_SYMBOL(rvt_unregister_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 3a78f20cbf2d..3bdeac7b9a48 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -222,7 +222,10 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); - void (*free_all_qps)(struct rvt_dev_info *rdi); + unsigned (*free_all_qps)(struct rvt_dev_info *rdi); + void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void (*notify_qp_reset)(struct rvt_qp *qp); /*--------------------*/ /* Optional functions */ @@ -230,6 +233,8 @@ struct rvt_driver_provided { int (*check_ah)(struct ib_device *, struct ib_ah_attr *); void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); + int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port); }; struct rvt_dev_info { @@ -262,7 +267,10 @@ struct rvt_dev_info { int flags; struct rvt_ibport **ports; + /* QP */ struct rvt_qp_ibdev *qp_dev; + u32 n_qps_allocated; /* number of QPs allocated for device */ + spinlock_t n_qps_lock; /* keep track of number of qps */ /* memory maps */ struct list_head pending_mmaps; -- cgit v1.2.3 From 5a9cf6f27e36ece71cc8a192a4ca39b62a460807 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:50:24 -0800 Subject: IB/rdmavt: Export reset_qp in rdmavt Until all queue pair functionality is moved to rdmavt we need to provide access to the reset function. This is only temporary and will be reverted back to a static, non exported function in the end. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 7 ++++--- include/rdma/rdma_vt.h | 3 +++ include/rdma/rdmavt_qp.h | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 7d1f02eb2779..44485ada8281 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -357,8 +357,8 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) * @qp: the QP to reset * @type: the QP type */ -static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type) +void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) { qp->remote_qpn = 0; qp->qkey = 0; @@ -409,6 +409,7 @@ static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } qp->r_sge.num_sge = 0; } +EXPORT_SYMBOL(rvt_reset_qp); /** * rvt_create_qp - create a queue pair for a device @@ -543,7 +544,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; - reset_qp(rdi, qp, init_attr->qp_type); + rvt_reset_qp(rdi, qp, init_attr->qp_type); break; default: diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 3bdeac7b9a48..e412e670e687 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -338,4 +338,7 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj); +/* Temporary export */ +void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type); #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 1aa8b5b40f9f..bce0a03a7c07 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -48,6 +48,7 @@ * */ +#include #include /* * Atomic bit definitions for r_aflags. -- cgit v1.2.3 From feaeb6e26fd15f6531e28f2900e0b59705bfc3d4 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:50:36 -0800 Subject: IB/rdmavt: Add support for rvt_query_device function With this commit, the drivers using rdmavt need not define query_device function. But they should fill in the IB device attributes structure rvt_dev_info.dparms.props Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 9 +++++++-- include/rdma/rdma_vt.h | 5 ++++- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index e75eb3d2f8a2..f2b643843374 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -68,10 +68,15 @@ static int rvt_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + + if (uhw->inlen || uhw->outlen) + return -EINVAL; /* - * Return rvt_dev_info.props contents + * Return rvt_dev_info.dparms.props contents */ - return -EOPNOTSUPP; + *props = rdi->dparms.props; + return 0; } static int rvt_modify_device(struct ib_device *device, diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e412e670e687..725778a6781d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -245,7 +245,10 @@ struct rvt_dev_info { * allocating space for this structure. * * The driver will also be responsible for filling in certain members of - * dparms.props + * dparms.props. The driver needs to fill in dparms exactly as it would + * want values reported to a ULP. This will be returned to the caller + * in rdmavt's device. The driver should also therefore refrain from + * modifying this directly after registration with rdmavt. */ /* Driver specific properties */ -- cgit v1.2.3 From d2b8d4da1ca5052b72e043d2ce68157abf3f2d24 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 22 Jan 2016 12:50:43 -0800 Subject: IB/rdmavt: Support creating qps with GFP_NOIO flag The current code is problematic when the QP creation and ipoib is used to support NFS and NFS desires to do IO for paging purposes. In that case, the GFP_KERNEL allocation within create_qp causes a deadlock in tight memory situations. This fix adds support to create queue pair with GFP_NOIO flag for connected mode only to cleanly fail the create queue pair in those situations. This was previously fixed in qib but needed to get ported to hfi1. This patch handles that for both hardwares in the new rdmavt common layer. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 56 ++++++++++++++++++++++++++++++--------- include/rdma/rdma_vt.h | 5 ++-- 2 files changed, 46 insertions(+), 15 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ee19eae38d0b..43346a773ff3 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -53,9 +53,11 @@ #include "qp.h" #include "vt.h" -static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) +static void get_map_page(struct rvt_qpn_table *qpt, + struct rvt_qpn_map *map, + gfp_t gfp) { - unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long page = get_zeroed_page(gfp); /* * Free the page if someone raced with us installing it. @@ -107,7 +109,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) { if (!map->page) { - get_map_page(qpt, map); + get_map_page(qpt, map, GFP_KERNEL); if (!map->page) { ret = -ENOMEM; break; @@ -263,14 +265,15 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port) + enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; if (rdi->driver_f.alloc_qpn) - return rdi->driver_f.alloc_qpn(rdi, qpt, type, port); + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port, + GFP_KERNEL); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -295,7 +298,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map); + get_map_page(qpt, map, gfp); if (unlikely(!map->page)) break; } @@ -437,15 +440,25 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; + gfp_t gfp; if (!rdi) return ERR_PTR(-EINVAL); if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - init_attr->create_flags) + init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); + /* GFP_NOIO is applicable to RC QP's only */ + + if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && + init_attr->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + + gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? + GFP_NOIO : GFP_KERNEL; + /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || @@ -471,7 +484,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, sz = sizeof(struct rvt_sge) * init_attr->cap.max_send_sge + sizeof(struct rvt_swqe); - swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + if (gfp == GFP_NOIO) + swq = __vmalloc( + (init_attr->cap.max_send_wr + 1) * sz, + gfp, PAGE_KERNEL); + else + swq = vmalloc( + (init_attr->cap.max_send_wr + 1) * sz); if (!swq) return ERR_PTR(-ENOMEM); @@ -486,7 +505,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + qp = kzalloc(sz + sg_list_sz, gfp); if (!qp) goto bail_swq; @@ -496,7 +515,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * Driver needs to set up it's private QP structure and do any * initialization that is needed. */ - priv = rdi->driver_f.qp_priv_alloc(rdi, qp); + priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); if (!priv) goto bail_qp; qp->priv = priv; @@ -510,8 +529,19 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct rvt_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + - qp->r_rq.size * sz); + if (udata) + qp->r_rq.wq = vmalloc_user( + sizeof(struct rvt_rwq) + + qp->r_rq.size * sz); + else if (gfp == GFP_NOIO) + qp->r_rq.wq = __vmalloc( + sizeof(struct rvt_rwq) + + qp->r_rq.size * sz, + gfp, PAGE_KERNEL); + else + qp->r_rq.wq = vmalloc( + sizeof(struct rvt_rwq) + + qp->r_rq.size * sz); if (!qp->r_rq.wq) goto bail_driver_priv; } @@ -537,7 +567,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, - init_attr->port_num); + init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); goto bail_rq_wq; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 725778a6781d..70a9596b859d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -223,7 +223,8 @@ struct rvt_driver_provided { const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); unsigned (*free_all_qps)(struct rvt_dev_info *rdi); - void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + gfp_t gfp); void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); @@ -234,7 +235,7 @@ struct rvt_driver_provided { void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port); + enum ib_qp_type type, u8 port, gfp_t gfp); }; struct rvt_dev_info { -- cgit v1.2.3 From 6f6387ae75c1499b315ddcd3c74402d44423e1cc Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:15 -0800 Subject: IB/rdmavt: Add completion queue functions Brings in completion queue functionality. A kthread worker is added to the rvt_dev_info to serve as a worker for completion queues. Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/cq.c | 445 +++++++++++++++++++++++++++++++++++++- drivers/infiniband/sw/rdmavt/cq.h | 4 +- drivers/infiniband/sw/rdmavt/vt.c | 11 +- include/rdma/rdma_vt.h | 9 + include/rdma/rdmavt_cq.h | 99 +++++++++ 5 files changed, 561 insertions(+), 7 deletions(-) create mode 100644 include/rdma/rdmavt_cq.h (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 8d9619478807..7308a274643d 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -45,7 +45,126 @@ * */ +#include +#include +#include #include "cq.h" +#include "vt.h" + +/** + * rvt_cq_enter - add a new entry to the completion queue + * @cq: completion queue + * @entry: work completion entry to add + * @sig: true if @entry is solicited + * + * This may be called with qp->s_lock held. + */ +void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) +{ + struct rvt_cq_wc *wc; + unsigned long flags; + u32 head; + u32 next; + + spin_lock_irqsave(&cq->lock, flags); + + /* + * Note that the head pointer might be writable by user processes. + * Take care to verify it is a sane value. + */ + wc = cq->queue; + head = wc->head; + if (head >= (unsigned)cq->ibcq.cqe) { + head = cq->ibcq.cqe; + next = 0; + } else { + next = head + 1; + } + + if (unlikely(next == wc->tail)) { + spin_unlock_irqrestore(&cq->lock, flags); + if (cq->ibcq.event_handler) { + struct ib_event ev; + + ev.device = cq->ibcq.device; + ev.element.cq = &cq->ibcq; + ev.event = IB_EVENT_CQ_ERR; + cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); + } + return; + } + if (cq->ip) { + wc->uqueue[head].wr_id = entry->wr_id; + wc->uqueue[head].status = entry->status; + wc->uqueue[head].opcode = entry->opcode; + wc->uqueue[head].vendor_err = entry->vendor_err; + wc->uqueue[head].byte_len = entry->byte_len; + wc->uqueue[head].ex.imm_data = + (__u32 __force)entry->ex.imm_data; + wc->uqueue[head].qp_num = entry->qp->qp_num; + wc->uqueue[head].src_qp = entry->src_qp; + wc->uqueue[head].wc_flags = entry->wc_flags; + wc->uqueue[head].pkey_index = entry->pkey_index; + wc->uqueue[head].slid = entry->slid; + wc->uqueue[head].sl = entry->sl; + wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; + wc->uqueue[head].port_num = entry->port_num; + /* Make sure entry is written before the head index. */ + smp_wmb(); + } else { + wc->kqueue[head] = *entry; + } + wc->head = next; + + if (cq->notify == IB_CQ_NEXT_COMP || + (cq->notify == IB_CQ_SOLICITED && + (solicited || entry->status != IB_WC_SUCCESS))) { + struct kthread_worker *worker; + /* + * This will cause send_complete() to be called in + * another thread. + */ + smp_read_barrier_depends(); /* see rvt_cq_exit */ + worker = cq->rdi->worker; + if (likely(worker)) { + cq->notify = RVT_CQ_NONE; + cq->triggered++; + queue_kthread_work(worker, &cq->comptask); + } + } + + spin_unlock_irqrestore(&cq->lock, flags); +} +EXPORT_SYMBOL(rvt_cq_enter); + +static void send_complete(struct kthread_work *work) +{ + struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask); + + /* + * The completion handler will most likely rearm the notification + * and poll for all pending entries. If a new completion entry + * is added while we are in this routine, queue_work() + * won't call us again until we return so we check triggered to + * see if we need to call the handler again. + */ + for (;;) { + u8 triggered = cq->triggered; + + /* + * IPoIB connected mode assumes the callback is from a + * soft IRQ. We simulate this by blocking "bottom halves". + * See the implementation for ipoib_cm_handle_tx_wc(), + * netif_tx_lock_bh() and netif_tx_lock(). + */ + local_bh_disable(); + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); + local_bh_enable(); + + if (cq->triggered == triggered) + return; + } +} /** * rvt_create_cq - create a completion queue @@ -64,7 +183,103 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + struct rvt_cq *cq; + struct rvt_cq_wc *wc; + struct ib_cq *ret; + u32 sz; + unsigned int entries = attr->cqe; + + if (attr->flags) + return ERR_PTR(-EINVAL); + + if (entries < 1 || entries > rdi->dparms.props.max_cqe) + return ERR_PTR(-EINVAL); + + /* Allocate the completion queue structure. */ + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return ERR_PTR(-ENOMEM); + + /* + * Allocate the completion queue entries and head/tail pointers. + * This is allocated separately so that it can be resized and + * also mapped into user space. + * We need to use vmalloc() in order to support mmap and large + * numbers of entries. + */ + sz = sizeof(*wc); + if (udata && udata->outlen >= sizeof(__u64)) + sz += sizeof(struct ib_uverbs_wc) * (entries + 1); + else + sz += sizeof(struct ib_wc) * (entries + 1); + wc = vmalloc_user(sz); + if (!wc) { + ret = ERR_PTR(-ENOMEM); + goto bail_cq; + } + + /* + * Return the address of the WC as the offset to mmap. + * See rvt_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + int err; + + cq->ip = rvt_create_mmap_info(rdi, sz, context, wc); + if (!cq->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wc; + } + + err = ib_copy_to_udata(udata, &cq->ip->offset, + sizeof(cq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } + + spin_lock(&rdi->n_cqs_lock); + if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { + spin_unlock(&rdi->n_cqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + rdi->n_cqs_allocated++; + spin_unlock(&rdi->n_cqs_lock); + + if (cq->ip) { + spin_lock_irq(&rdi->pending_lock); + list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps); + spin_unlock_irq(&rdi->pending_lock); + } + + /* + * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. + * The number of entries should be >= the number requested or return + * an error. + */ + cq->rdi = rdi; + cq->ibcq.cqe = entries; + cq->notify = RVT_CQ_NONE; + spin_lock_init(&cq->lock); + init_kthread_work(&cq->comptask, send_complete); + cq->queue = wc; + + ret = &cq->ibcq; + + goto done; + +bail_ip: + kfree(cq->ip); +bail_wc: + vfree(wc); +bail_cq: + kfree(cq); +done: + return ret; } /** @@ -77,12 +292,53 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, */ int rvt_destroy_cq(struct ib_cq *ibcq) { - return -EOPNOTSUPP; + struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); + struct rvt_dev_info *rdi = cq->rdi; + + flush_kthread_work(&cq->comptask); + spin_lock(&rdi->n_cqs_lock); + rdi->n_cqs_allocated--; + spin_unlock(&rdi->n_cqs_lock); + if (cq->ip) + kref_put(&cq->ip->ref, rvt_release_mmap_info); + else + vfree(cq->queue); + kfree(cq); + + return 0; } +/** + * rvt_req_notify_cq - change the notification type for a completion queue + * @ibcq: the completion queue + * @notify_flags: the type of notification to request + * + * Returns 0 for success. + * + * This may be called from interrupt context. Also called by + * ib_req_notify_cq() in the generic verbs code. + */ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) { - return -EOPNOTSUPP; + struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cq->lock, flags); + /* + * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow + * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). + */ + if (cq->notify != IB_CQ_NEXT_COMP) + cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; + + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && + cq->queue->head != cq->queue->tail) + ret = 1; + + spin_unlock_irqrestore(&cq->lock, flags); + + return ret; } /** @@ -93,7 +349,107 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) */ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { - return -EOPNOTSUPP; + struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); + struct rvt_cq_wc *old_wc; + struct rvt_cq_wc *wc; + u32 head, tail, n; + int ret; + u32 sz; + struct rvt_dev_info *rdi = cq->rdi; + + if (cqe < 1 || cqe > rdi->dparms.props.max_cqe) + return -EINVAL; + + /* + * Need to use vmalloc() if we want to support large #s of entries. + */ + sz = sizeof(*wc); + if (udata && udata->outlen >= sizeof(__u64)) + sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); + else + sz += sizeof(struct ib_wc) * (cqe + 1); + wc = vmalloc_user(sz); + if (!wc) + return -ENOMEM; + + /* Check that we can write the offset to mmap. */ + if (udata && udata->outlen >= sizeof(__u64)) { + __u64 offset = 0; + + ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (ret) + goto bail_free; + } + + spin_lock_irq(&cq->lock); + /* + * Make sure head and tail are sane since they + * might be user writable. + */ + old_wc = cq->queue; + head = old_wc->head; + if (head > (u32)cq->ibcq.cqe) + head = (u32)cq->ibcq.cqe; + tail = old_wc->tail; + if (tail > (u32)cq->ibcq.cqe) + tail = (u32)cq->ibcq.cqe; + if (head < tail) + n = cq->ibcq.cqe + 1 + head - tail; + else + n = head - tail; + if (unlikely((u32)cqe < n)) { + ret = -EINVAL; + goto bail_unlock; + } + for (n = 0; tail != head; n++) { + if (cq->ip) + wc->uqueue[n] = old_wc->uqueue[tail]; + else + wc->kqueue[n] = old_wc->kqueue[tail]; + if (tail == (u32)cq->ibcq.cqe) + tail = 0; + else + tail++; + } + cq->ibcq.cqe = cqe; + wc->head = n; + wc->tail = 0; + cq->queue = wc; + spin_unlock_irq(&cq->lock); + + vfree(old_wc); + + if (cq->ip) { + struct rvt_mmap_info *ip = cq->ip; + + rvt_update_mmap_info(rdi, ip, sz, wc); + + /* + * Return the offset to mmap. + * See rvt_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + + spin_lock_irq(&rdi->pending_lock); + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, &rdi->pending_mmaps); + spin_unlock_irq(&rdi->pending_lock); + } + + return 0; + +bail_unlock: + spin_unlock_irq(&cq->lock); +bail_free: + vfree(wc); +bail: + return ret; + } /** @@ -109,5 +465,84 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) */ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { - return -EOPNOTSUPP; + struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); + struct rvt_cq_wc *wc; + unsigned long flags; + int npolled; + u32 tail; + + /* The kernel can only poll a kernel completion queue */ + if (cq->ip) + return -EINVAL; + + spin_lock_irqsave(&cq->lock, flags); + + wc = cq->queue; + tail = wc->tail; + if (tail > (u32)cq->ibcq.cqe) + tail = (u32)cq->ibcq.cqe; + for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { + if (tail == wc->head) + break; + /* The kernel doesn't need a RMB since it has the lock. */ + *entry = wc->kqueue[tail]; + if (tail >= cq->ibcq.cqe) + tail = 0; + else + tail++; + } + wc->tail = tail; + + spin_unlock_irqrestore(&cq->lock, flags); + + return npolled; +} + +int rvt_driver_cq_init(struct rvt_dev_info *rdi) +{ + int ret = 0; + int cpu; + struct task_struct *task; + + if (rdi->flags & RVT_FLAG_CQ_INIT_DRIVER) { + rvt_pr_info(rdi, "Driver is doing CQ init.\n"); + return 0; + } + + if (rdi->worker) + return 0; + rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); + if (!rdi->worker) + return -ENOMEM; + init_kthread_worker(rdi->worker); + task = kthread_create_on_node( + kthread_worker_fn, + rdi->worker, + rdi->dparms.node, + "%s", rdi->dparms.cq_name); + if (IS_ERR(task)) { + kfree(rdi->worker); + rdi->worker = NULL; + return PTR_ERR(task); + } + + cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); + kthread_bind(task, cpu); + wake_up_process(task); + return ret; +} + +void rvt_cq_exit(struct rvt_dev_info *rdi) +{ + struct kthread_worker *worker; + + worker = rdi->worker; + if (!worker) + return; + /* blocks future queuing from send_complete() */ + rdi->worker = NULL; + smp_wmb(); /* See rdi_cq_enter */ + flush_kthread_worker(worker); + kthread_stop(worker->task); + kfree(worker); } diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 63a517db6dbf..3813d90efad2 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -49,6 +49,7 @@ */ #include +#include struct ib_cq *rvt_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, @@ -58,5 +59,6 @@ int rvt_destroy_cq(struct ib_cq *ibcq); int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); - +int rvt_driver_cq_init(struct rvt_dev_info *rdi); +void rvt_cq_exit(struct rvt_dev_info *rdi); #endif /* DEF_RVTCQ_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index f2b643843374..136cc212caa5 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -324,6 +324,11 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, mmap); /* Completion queues */ + ret = rvt_driver_cq_init(rdi); + if (ret) { + pr_err("Error in driver CQ init.\n"); + goto bail_mr; + } CHECK_DRIVER_OVERRIDE(rdi, create_cq); CHECK_DRIVER_OVERRIDE(rdi, destroy_cq); CHECK_DRIVER_OVERRIDE(rdi, poll_cq); @@ -344,12 +349,15 @@ int rvt_register_device(struct rvt_dev_info *rdi) ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); if (ret) { rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); - goto bail_mr; + goto bail_cq; } rvt_pr_info(rdi, "Registration with rdmavt done.\n"); return ret; +bail_cq: + rvt_cq_exit(rdi); + bail_mr: rvt_mr_exit(rdi); @@ -366,6 +374,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) return; ib_unregister_device(&rdi->ibdev); + rvt_cq_exit(rdi); rvt_mr_exit(rdi); rvt_qp_exit(rdi); } diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 70a9596b859d..79080e3b09f8 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -138,6 +138,8 @@ struct rvt_ibport { /* TODO: Move sm_ah and smi_ah into here as well*/ }; +#define RVT_CQN_MAX 16 /* maximum length of cq name */ + /* * Things that are driver specific, module parameters in hfi1 and qib */ @@ -190,6 +192,8 @@ struct rvt_driver_params { int nports; int npkeys; u8 qos_shift; + char cq_name[RVT_CQN_MAX]; + int node; }; /* Protection domain */ @@ -281,6 +285,11 @@ struct rvt_dev_info { spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; spinlock_t pending_lock; /* protect pending mmap list */ + + /* CQ */ + struct kthread_worker *worker; /* per device cq worker */ + u32 n_cqs_allocated; /* number of CQs allocated for device */ + spinlock_t n_cqs_lock; /* protect count of in use cqs */ }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h new file mode 100644 index 000000000000..51fd00b243d0 --- /dev/null +++ b/include/rdma/rdmavt_cq.h @@ -0,0 +1,99 @@ +#ifndef DEF_RDMAVT_INCCQ_H +#define DEF_RDMAVT_INCCQ_H + +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +/* + * Define an ib_cq_notify value that is not valid so we know when CQ + * notifications are armed. + */ +#define RVT_CQ_NONE (IB_CQ_NEXT_COMP + 1) + +/* + * This structure is used to contain the head pointer, tail pointer, + * and completion queue entries as a single memory allocation so + * it can be mmap'ed into user space. + */ +struct rvt_cq_wc { + u32 head; /* index of next entry to fill */ + u32 tail; /* index of next ib_poll_cq() entry */ + union { + /* these are actually size ibcq.cqe + 1 */ + struct ib_uverbs_wc uqueue[0]; + struct ib_wc kqueue[0]; + }; +}; + +/* + * The completion queue structure. + */ +struct rvt_cq { + struct ib_cq ibcq; + struct kthread_work comptask; + spinlock_t lock; /* protect changes in this struct */ + u8 notify; + u8 triggered; + struct rvt_dev_info *rdi; + struct rvt_cq_wc *queue; + struct rvt_mmap_info *ip; +}; + +static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct rvt_cq, ibcq); +} + +void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited); + +#endif /* DEF_RDMAVT_INCCQH */ -- cgit v1.2.3 From bfbac097b6e8023e10fdadab2527d0a1a3160d7e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:22 -0800 Subject: IB/rdmavt: Add post send to rdmavt Add in a post_send and post_one_send to rdmavt. The ULP will provide a WQE to rdmavt which will then walk and queue each element. Rdmavt will then queue the work to be done in the driver or kick the driver's progress routine. There needs to be a follow on patch which adds in another lock for the head of the queue so that it can be added to and read from in parallel. This will touch protocol handlers and require other changes in the drivers. This will be done separately. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 183 +++++++++++++++++++++++++++++++++++--- include/rdma/rdma_vt.h | 7 ++ include/rdma/rdmavt_qp.h | 26 ++++++ 3 files changed, 204 insertions(+), 12 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 43346a773ff3..bd2d91a5b19a 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -53,6 +53,27 @@ #include "qp.h" #include "vt.h" +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; rvt_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ +const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = 0, + [IB_QPS_INIT] = RVT_POST_RECV_OK, + [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK, + [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK | + RVT_PROCESS_NEXT_SEND_OK, + [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK, + [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_FLUSH_SEND, + [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV | + RVT_POST_SEND_OK | RVT_FLUSH_SEND, +}; +EXPORT_SYMBOL(ib_rvt_state_ops); + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) @@ -586,7 +607,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, /* * Return the address of the RWQ as the offset to mmap. - * See hfi1_mmap() for details. + * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { if (!qp->r_rq.wq) { @@ -749,6 +770,118 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, return -EOPNOTSUPP; } +/** + * rvt_post_one_wr - post one RC, UC, or UD send work request + * @qp: the QP to post on + * @wr: the work request to send + */ +static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) +{ + struct rvt_swqe *wqe; + u32 next; + int i; + int j; + int acc; + struct rvt_lkey_table *rkt; + struct rvt_pd *pd; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + /* IB spec says that num_sge == 0 is OK. */ + if (unlikely(wr->num_sge > qp->s_max_sge)) + return -EINVAL; + + /* + * Don't allow RDMA reads or atomic operations on UC or + * undefined operations. + * Make sure buffer is large enough to hold the result for atomics. + */ + if (qp->ibqp.qp_type == IB_QPT_UC) { + if ((unsigned)wr->opcode >= IB_WR_RDMA_READ) + return -EINVAL; + } else if (qp->ibqp.qp_type != IB_QPT_RC) { + /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ + if (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM) + return -EINVAL; + /* Check UD destination address PD */ + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) + return -EINVAL; + } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { + return -EINVAL; + } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && + (wr->num_sge == 0 || + wr->sg_list[0].length < sizeof(u64) || + wr->sg_list[0].addr & (sizeof(u64) - 1))) { + return -EINVAL; + } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { + return -EINVAL; + } + + next = qp->s_head + 1; + if (next >= qp->s_size) + next = 0; + if (next == qp->s_last) + return -ENOMEM; + + rkt = &rdi->lkey_table; + pd = ibpd_to_rvtpd(qp->ibqp.pd); + wqe = rvt_get_swqe_ptr(qp, qp->s_head); + + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) + memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE || + wr->opcode == IB_WR_RDMA_READ) + memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); + else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); + else + memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + + wqe->length = 0; + j = 0; + if (wr->num_sge) { + acc = wr->opcode >= IB_WR_RDMA_READ ? + IB_ACCESS_LOCAL_WRITE : 0; + for (i = 0; i < wr->num_sge; i++) { + u32 length = wr->sg_list[i].length; + int ok; + + if (length == 0) + continue; + ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], + &wr->sg_list[i], acc); + if (!ok) + goto bail_inval_free; + wqe->length += length; + j++; + } + wqe->wr.num_sge = j; + } + if (qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_RC) { + if (wqe->length > 0x80000000U) + goto bail_inval_free; + } else { + atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); + } + wqe->ssn = qp->s_ssn++; + qp->s_head = next; + + return 0; + +bail_inval_free: + /* release mr holds */ + while (j) { + struct rvt_sge *sge = &wqe->sg_list[--j]; + + rvt_put_mr(sge->mr); + } + return -EINVAL; +} + /** * rvt_post_send - post a send on a QP * @ibqp: the QP to post the send on @@ -760,20 +893,46 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + unsigned long flags = 0; + int call_send; + unsigned nreq = 0; + int err = 0; + + spin_lock_irqsave(&qp->s_lock, flags); + /* - * VT-DRIVER-API: do_send() - * Driver needs to have a do_send() call which is a single entry point - * to take an already formed packet and throw it out on the wire. Once - * the packet is sent the driver needs to make an upcall to rvt so the - * completion queue can be notified and/or any other outstanding - * work/book keeping can be finished. - * - * Note that there should also be a way for rvt to protect itself - * against hangs in the driver layer. If a send doesn't actually - * complete in a timely manor rvt needs to return an error event. + * Ensure QP state is such that we can send. If not bail out early, + * there is no need to do this every time we post a send. */ + if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) { + spin_unlock_irqrestore(&qp->s_lock, flags); + return -EINVAL; + } - return -EOPNOTSUPP; + /* + * If the send queue is empty, and we only have a single WR then just go + * ahead and kick the send engine into gear. Otherwise we will always + * just schedule the send to happen later. + */ + call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next; + + for (; wr; wr = wr->next) { + err = rvt_post_one_wr(qp, wr); + if (unlikely(err)) { + *bad_wr = wr; + goto bail; + } + nreq++; + } +bail: + if (nreq && !call_send) + rdi->driver_f.schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + if (nreq && call_send) + rdi->driver_f.do_send(qp); + return err; } /** diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 79080e3b09f8..36e4fb4c0df3 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -231,6 +231,8 @@ struct rvt_driver_provided { gfp_t gfp); void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); + void (*schedule_send)(struct rvt_qp *qp); + void (*do_send)(struct rvt_qp *qp); /*--------------------*/ /* Optional functions */ @@ -312,6 +314,11 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) return container_of(ibsrq, struct rvt_srq, ibsrq); } +static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct rvt_qp, ibqp); +} + static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) { /* diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index bce0a03a7c07..3189f195538c 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -129,6 +129,17 @@ /* Number of bits to pay attention to in the opcode for checking qp type */ #define RVT_OPCODE_QP_MASK 0xE0 +/* Flags for checking QP state (see ib_rvt_state_ops[]) */ +#define RVT_POST_SEND_OK 0x01 +#define RVT_POST_RECV_OK 0x02 +#define RVT_PROCESS_RECV_OK 0x04 +#define RVT_PROCESS_SEND_OK 0x08 +#define RVT_PROCESS_NEXT_SEND_OK 0x10 +#define RVT_FLUSH_SEND 0x20 +#define RVT_FLUSH_RECV 0x40 +#define RVT_PROCESS_OR_FLUSH_SEND \ + (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND) + /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored @@ -373,4 +384,19 @@ struct rvt_qp_ibdev { struct rvt_qpn_table qpn_table; }; +/* + * Since struct rvt_swqe is not a fixed size, we can't simply index into + * struct hfi1_qp.s_wq. This function does the array index computation. + */ +static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, + unsigned n) +{ + return (struct rvt_swqe *)((char *)qp->s_wq + + (sizeof(struct rvt_swqe) + + qp->s_max_sge * + sizeof(struct rvt_sge)) * n); +} + +extern const int ib_rvt_state_ops[]; + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 3b0b3fb3c1bbf50a2f88ea7345448a41dcba3c57 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:35 -0800 Subject: IB/rdmavt: Add modify qp Add modify qp and supporting functions. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 503 ++++++++++++++++++++++++++++++++++- drivers/infiniband/sw/rdmavt/trace.h | 31 +++ include/rdma/rdma_vt.h | 42 +++ include/rdma/rdmavt_mr.h | 9 + include/rdma/rdmavt_qp.h | 20 ++ 5 files changed, 596 insertions(+), 9 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index bd2d91a5b19a..94421268b84c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -45,6 +45,7 @@ * */ +#include #include #include #include @@ -52,6 +53,7 @@ #include #include "qp.h" #include "vt.h" +#include "trace.h" /* * Note that it is OK to post send work requests in the SQE and ERR @@ -380,19 +382,47 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) * reset_qp - initialize the QP state to the reset state * @qp: the QP to reset * @type: the QP type + * r and s lock are required to be held by the caller */ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type) { - qp->remote_qpn = 0; - qp->qkey = 0; - qp->qp_access_flags = 0; + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + + /* Let drivers flush their waitlist */ + rdi->driver_f.flush_qp_waiters(qp); + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + + /* Stop the send queue and the retry timer */ + rdi->driver_f.stop_send_queue(qp); + del_timer_sync(&qp->s_timer); + + /* Wait for things to stop */ + rdi->driver_f.quiesce_qp(qp); + + /* take qp out the hash and wait for it to be unused */ + rvt_remove_qp(rdi, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + + /* grab the lock b/c it was locked at call time */ + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + + rvt_clear_mr_refs(qp, 1); + } /* - * Let driver do anything it needs to for a new/reset qp + * Let the driver do any tear down it needs to for a qp + * that has been reset */ rdi->driver_f.notify_qp_reset(qp); + qp->remote_qpn = 0; + qp->qkey = 0; + qp->qp_access_flags = 0; qp->s_flags &= RVT_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; @@ -702,6 +732,208 @@ bail_swq: return ret; } +void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) +{ + unsigned n; + + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) + rvt_put_ss(&qp->s_rdma_read_sge); + + rvt_put_ss(&qp->r_sge); + + if (clr_sends) { + while (qp->s_last != qp->s_head) { + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); + unsigned i; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct rvt_sge *sge = &wqe->sg_list[i]; + + rvt_put_mr(sge->mr); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + atomic_dec(&ibah_to_rvtah( + wqe->ud_wr.ah)->refcount); + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } + if (qp->s_rdma_mr) { + rvt_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + } + + if (qp->ibqp.qp_type != IB_QPT_RC) + return; + + for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + struct rvt_ack_entry *e = &qp->s_ack_queue[n]; + + if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && + e->rdma_sge.mr) { + rvt_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + } +} +EXPORT_SYMBOL(rvt_clear_mr_refs); + +/** + * rvt_error_qp - put a QP into the error state + * @qp: the QP to put into the error state + * @err: the receive completion error to signal if a RWQE is active + * + * Flushes both send and receive work queues. + * Returns true if last WQE event should be generated. + * The QP r_lock and s_lock should be held and interrupts disabled. + * If we are already in error state, just return. + */ +int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) +{ + struct ib_wc wc; + int ret = 0; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) + goto bail; + + qp->state = IB_QPS_ERR; + + if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); + del_timer(&qp->s_timer); + } + + if (qp->s_flags & RVT_S_ANY_WAIT_SEND) + qp->s_flags &= ~RVT_S_ANY_WAIT_SEND; + + rdi->driver_f.notify_error_qp(qp); + + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + rdi->driver_f.schedule_send(qp); + + rvt_clear_mr_refs(qp, 0); + + memset(&wc, 0, sizeof(wc)); + wc.qp = &qp->ibqp; + wc.opcode = IB_WC_RECV; + + if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { + wc.wr_id = qp->r_wr_id; + wc.status = err; + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); + } + wc.status = IB_WC_WR_FLUSH_ERR; + + if (qp->r_rq.wq) { + struct rvt_rwq *wq; + u32 head; + u32 tail; + + spin_lock(&qp->r_rq.lock); + + /* sanity check pointers before trusting them */ + wq = qp->r_rq.wq; + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + while (tail != head) { + wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id; + if (++tail >= qp->r_rq.size) + tail = 0; + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); + } + wq->tail = tail; + + spin_unlock(&qp->r_rq.lock); + } else if (qp->ibqp.event_handler) { + ret = 1; + } + +bail: + return ret; +} +EXPORT_SYMBOL(rvt_error_qp); + +/* + * Put the QP into the hash table. + * The hash table holds a reference to the QP. + */ +static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; + unsigned long flags; + + atomic_inc(&qp->refcount); + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + + if (qp->ibqp.qp_num <= 1) { + rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp); + } else { + u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); + + qp->next = rdi->qp_dev->qp_table[n]; + rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp); + trace_rvt_qpinsert(qp, n); + } + + spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); +} + +/* + * Remove the QP from the table so it can't be found asynchronously by + * the receive routine. + */ +void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; + u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); + unsigned long flags; + int removed = 1; + + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + + if (rcu_dereference_protected(rvp->qp[0], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[0], NULL); + } else if (rcu_dereference_protected(rvp->qp[1], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[1], NULL); + } else { + struct rvt_qp *q; + struct rvt_qp __rcu **qpp; + + removed = 0; + qpp = &rdi->qp_dev->qp_table[n]; + for (; (q = rcu_dereference_protected(*qpp, + lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL; + qpp = &q->next) { + if (q == qp) { + RCU_INIT_POINTER(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&rdi->qp_dev->qpt_lock))); + removed = 1; + trace_rvt_qpremove(qp, n); + break; + } + } + } + + spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); + if (removed) { + synchronize_rcu(); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} +EXPORT_SYMBOL(rvt_remove_qp); + /** * qib_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying @@ -714,13 +946,248 @@ bail_swq: int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + enum ib_qp_state cur_state, new_state; + struct ib_event ev; + int lastwqe = 0; + int mig = 0; + int pmtu = 0; /* for gcc warning only */ + enum rdma_link_layer link; + + link = rdma_port_get_link_layer(ibqp->device, qp->port_num); + + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + + cur_state = attr_mask & IB_QP_CUR_STATE ? + attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, + attr_mask, link)) + goto inval; + + if (attr_mask & IB_QP_AV) { + if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) + goto inval; + if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr)) + goto inval; + } + + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_ah_attr.dlid >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) + goto inval; + if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) + goto inval; + if (attr->alt_pkey_index >= rvt_get_npkeys(rdi)) + goto inval; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + if (attr->pkey_index >= rvt_get_npkeys(rdi)) + goto inval; + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + if (attr->min_rnr_timer > 31) + goto inval; + + if (attr_mask & IB_QP_PORT) + if (qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI || + attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) + goto inval; + + if (attr_mask & IB_QP_DEST_QPN) + if (attr->dest_qp_num > RVT_QPN_MASK) + goto inval; + + if (attr_mask & IB_QP_RETRY_CNT) + if (attr->retry_cnt > 7) + goto inval; + + if (attr_mask & IB_QP_RNR_RETRY) + if (attr->rnr_retry > 7) + goto inval; + /* - * VT-DRIVER-API: qp_mtu() - * OPA devices have a per VL MTU the driver has a mapping of IB SL to SC - * to VL and the mapping table of MTUs per VL. This is not something - * that IB has and should not live in the rvt. + * Don't allow invalid path_mtu values. OK to set greater + * than the active mtu (or even the max_cap, if we have tuned + * that to a small mtu. We'll set qp->path_mtu + * to the lesser of requested attribute mtu and active, + * for packetizing messages. + * Note that the QP port has to be set in INIT and MTU in RTR. */ - return -EOPNOTSUPP; + if (attr_mask & IB_QP_PATH_MTU) { + pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr); + if (pmtu < 0) + goto inval; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + if (attr->path_mig_state == IB_MIG_REARM) { + if (qp->s_mig_state == IB_MIG_ARMED) + goto inval; + if (new_state != IB_QPS_RTS) + goto inval; + } else if (attr->path_mig_state == IB_MIG_MIGRATED) { + if (qp->s_mig_state == IB_MIG_REARM) + goto inval; + if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) + goto inval; + if (qp->s_mig_state == IB_MIG_ARMED) + mig = 1; + } else { + goto inval; + } + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic) + goto inval; + + switch (new_state) { + case IB_QPS_RESET: + if (qp->state != IB_QPS_RESET) + rvt_reset_qp(rdi, qp, ibqp->qp_type); + break; + + case IB_QPS_RTR: + /* Allow event to re-trigger if QP set to RTR more than once */ + qp->r_flags &= ~RVT_R_COMM_EST; + qp->state = new_state; + break; + + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; + break; + + case IB_QPS_ERR: + lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); + break; + + default: + qp->state = new_state; + break; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + qp->s_pkey_index = attr->pkey_index; + + if (attr_mask & IB_QP_PORT) + qp->port_num = attr->port_num; + + if (attr_mask & IB_QP_DEST_QPN) + qp->remote_qpn = attr->dest_qp_num; + + if (attr_mask & IB_QP_SQ_PSN) { + qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask; + qp->s_psn = qp->s_next_psn; + qp->s_sending_psn = qp->s_next_psn; + qp->s_last_psn = qp->s_next_psn - 1; + qp->s_sending_hpsn = qp->s_last_psn; + } + + if (attr_mask & IB_QP_RQ_PSN) + qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->qp_access_flags = attr->qp_access_flags; + + if (attr_mask & IB_QP_AV) { + qp->remote_ah_attr = attr->ah_attr; + qp->s_srate = attr->ah_attr.static_rate; + qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); + } + + if (attr_mask & IB_QP_ALT_PATH) { + qp->alt_ah_attr = attr->alt_ah_attr; + qp->s_alt_pkey_index = attr->alt_pkey_index; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + qp->s_mig_state = attr->path_mig_state; + if (mig) { + qp->remote_ah_attr = qp->alt_ah_attr; + qp->port_num = qp->alt_ah_attr.port_num; + qp->s_pkey_index = qp->s_alt_pkey_index; + + /* + * Ignored by drivers which do not support it. Not + * really worth creating a call back into the driver + * just to set a flag. + */ + qp->s_flags |= RVT_S_AHG_CLEAR; + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu); + qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu); + } + + if (attr_mask & IB_QP_RETRY_CNT) { + qp->s_retry_cnt = attr->retry_cnt; + qp->s_retry = attr->retry_cnt; + } + + if (attr_mask & IB_QP_RNR_RETRY) { + qp->s_rnr_retry_cnt = attr->rnr_retry; + qp->s_rnr_retry = attr->rnr_retry; + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + qp->r_min_rnr_timer = attr->min_rnr_timer; + + if (attr_mask & IB_QP_TIMEOUT) { + qp->timeout = attr->timeout; + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + } + + if (attr_mask & IB_QP_QKEY) + qp->qkey = attr->qkey; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->r_max_rd_atomic = attr->max_dest_rd_atomic; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + qp->s_max_rd_atomic = attr->max_rd_atomic; + + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + rvt_insert_qp(rdi, qp); + + if (lastwqe) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + if (mig) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_PATH_MIG; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + return 0; + +inval: + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + return -EINVAL; } /** @@ -948,3 +1415,21 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, { return -EOPNOTSUPP; } + +void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) +{ + struct rvt_qpn_map *map; + + map = qpt->map + qpn / RVT_BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); +} +EXPORT_SYMBOL(rvt_free_qpn); + +void rvt_dec_qp_cnt(struct rvt_dev_info *rdi) +{ + spin_lock(&rdi->n_qps_lock); + rdi->n_qps_allocated--; + spin_unlock(&rdi->n_qps_lock); +} +EXPORT_SYMBOL(rvt_dec_qp_cnt); diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 22e86ff95012..b269291b6dc9 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -77,6 +77,37 @@ TRACE_EVENT(rvt_dbg, TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) ); +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_qphash +DECLARE_EVENT_CLASS(rvt_qphash_template, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, bucket) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->bucket = bucket; + ), + TP_printk( + "[%s] qpn 0x%x bucket %u", + __get_str(dev), + __entry->qpn, + __entry->bucket + ) +); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + #endif /* __RDMAVT_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 36e4fb4c0df3..1c7123ff3656 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,7 @@ #include #include +#include #include #include #include @@ -194,6 +195,10 @@ struct rvt_driver_params { u8 qos_shift; char cq_name[RVT_CQN_MAX]; int node; + int max_rdma_atomic; + int psn_mask; + int psn_shift; + int psn_modify_mask; }; /* Protection domain */ @@ -233,6 +238,15 @@ struct rvt_driver_provided { void (*notify_qp_reset)(struct rvt_qp *qp); void (*schedule_send)(struct rvt_qp *qp); void (*do_send)(struct rvt_qp *qp); + int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr); + void (*flush_qp_waiters)(struct rvt_qp *qp); + void (*stop_send_queue)(struct rvt_qp *qp); + void (*quiesce_qp)(struct rvt_qp *qp); + void (*notify_error_qp)(struct rvt_qp *qp); + u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + u32 pmtu); + int (*mtu_to_path_mtu)(u32 mtu); /*--------------------*/ /* Optional functions */ @@ -340,6 +354,34 @@ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, return rdi->ports[port_index]->pkey_table[index]; } +/** + * rvt_lookup_qpn - return the QP with the given QPN + * @ibp: the ibport + * @qpn: the QP number to look up + * + * The caller must hold the rcu_read_lock(), and keep the lock until + * the returned qp is no longer in use. + */ +/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */ +static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, + struct rvt_ibport *rvp, + u32 qpn) __must_hold(RCU) +{ + struct rvt_qp *qp = NULL; + + if (unlikely(qpn <= 1)) { + qp = rcu_dereference(rvp->qp[qpn]); + } else { + u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); + + for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) + if (qp->ibqp.qp_num == qpn) + break; + } + return qp; +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index ea60476c6b6b..4aa81713b4f3 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -127,4 +127,13 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) atomic_inc(&mr->refcount); } +static inline void rvt_put_ss(struct rvt_sge_state *ss) +{ + while (ss->num_sge) { + rvt_put_mr(ss->sge.mr); + if (--ss->num_sge) + ss->sge = *ss->sg_list++; + } +} + #endif /* DEF_RDMAVT_INCMRH */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 3189f195538c..e66bcc96d273 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -355,6 +355,7 @@ struct rvt_srq { #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) +#define RVT_QPN_MASK 0xFFFFFF /* * QPN-map pages start out as NULL, they get allocated upon @@ -397,6 +398,25 @@ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, sizeof(struct rvt_sge)) * n); } +/* + * Since struct rvt_rwqe is not a fixed size, we can't simply index into + * struct rvt_rwq.wq. This function does the array index computation. + */ +static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) +{ + return (struct rvt_rwqe *) + ((char *)rq->wq->wq + + (sizeof(struct rvt_rwqe) + + rq->max_sge * sizeof(struct ib_sge)) * n); +} + extern const int ib_rvt_state_ops[]; +struct rvt_dev_info; +void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp); +void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends); +int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); +void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn); +void rvt_dec_qp_cnt(struct rvt_dev_info *rdi); + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 4e74080b248701c0c2d1af2764bf02f9c531020a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:55 -0800 Subject: IB/rdmavt: Add multicast functions This patch adds in the multicast add and remove functions as well as the ancillary infrastructure needed. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mcast.c | 335 ++++++++++++++++++++++++++++++++++- drivers/infiniband/sw/rdmavt/mcast.h | 2 + drivers/infiniband/sw/rdmavt/qp.c | 2 + drivers/infiniband/sw/rdmavt/vt.c | 1 + include/rdma/rdma_vt.h | 8 + include/rdma/rdmavt_qp.h | 22 ++- 6 files changed, 367 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 5a78dc7310c3..528c1ca798a9 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -45,14 +45,345 @@ * */ +#include +#include +#include +#include +#include + #include "mcast.h" +void rvt_driver_mcast_init(struct rvt_dev_info *rdi) +{ + /* + * Anything that needs setup for multicast on a per driver or per rdi + * basis should be done in here. + */ + spin_lock_init(&rdi->n_mcast_grps_lock); +} + +/** + * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct + * @qp: the QP to link + */ +static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) +{ + struct rvt_mcast_qp *mqp; + + mqp = kmalloc(sizeof(*mqp), GFP_KERNEL); + if (!mqp) + goto bail; + + mqp->qp = qp; + atomic_inc(&qp->refcount); + +bail: + return mqp; +} + +static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) +{ + struct rvt_qp *qp = mqp->qp; + + /* Notify hfi1_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + + kfree(mqp); +} + +/** + * mcast_alloc - allocate the multicast GID structure + * @mgid: the multicast GID + * + * A list of QPs will be attached to this structure. + */ +static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid) +{ + struct rvt_mcast *mcast; + + mcast = kzalloc(sizeof(*mcast), GFP_KERNEL); + if (!mcast) + goto bail; + + mcast->mgid = *mgid; + INIT_LIST_HEAD(&mcast->qp_list); + init_waitqueue_head(&mcast->wait); + atomic_set(&mcast->refcount, 0); + +bail: + return mcast; +} + +static void rvt_mcast_free(struct rvt_mcast *mcast) +{ + struct rvt_mcast_qp *p, *tmp; + + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) + rvt_mcast_qp_free(p); + + kfree(mcast); +} + +/** + * rvt_mcast_find - search the global table for the given multicast GID + * @ibp: the IB port structure + * @mgid: the multicast GID to search for + * + * Returns NULL if not found. + * + * The caller is responsible for decrementing the reference count if found. + */ +struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid) +{ + struct rb_node *n; + unsigned long flags; + struct rvt_mcast *found = NULL; + + spin_lock_irqsave(&ibp->lock, flags); + n = ibp->mcast_tree.rb_node; + while (n) { + int ret; + struct rvt_mcast *mcast; + + mcast = rb_entry(n, struct rvt_mcast, rb_node); + + ret = memcmp(mgid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) { + n = n->rb_left; + } else if (ret > 0) { + n = n->rb_right; + } else { + atomic_inc(&mcast->refcount); + found = mcast; + break; + } + } + spin_unlock_irqrestore(&ibp->lock, flags); + return found; +} +EXPORT_SYMBOL(rvt_mcast_find); + +/** + * mcast_add - insert mcast GID into table and attach QP struct + * @mcast: the mcast GID table + * @mqp: the QP to attach + * + * Return zero if both were added. Return EEXIST if the GID was already in + * the table but the QP was added. Return ESRCH if the QP was already + * attached and neither structure was added. + */ +static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp, + struct rvt_mcast *mcast, struct rvt_mcast_qp *mqp) +{ + struct rb_node **n = &ibp->mcast_tree.rb_node; + struct rb_node *pn = NULL; + int ret; + + spin_lock_irq(&ibp->lock); + + while (*n) { + struct rvt_mcast *tmcast; + struct rvt_mcast_qp *p; + + pn = *n; + tmcast = rb_entry(pn, struct rvt_mcast, rb_node); + + ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) { + n = &pn->rb_left; + continue; + } + if (ret > 0) { + n = &pn->rb_right; + continue; + } + + /* Search the QP list to see if this is already there. */ + list_for_each_entry_rcu(p, &tmcast->qp_list, list) { + if (p->qp == mqp->qp) { + ret = ESRCH; + goto bail; + } + } + if (tmcast->n_attached == + rdi->dparms.props.max_mcast_qp_attach) { + ret = ENOMEM; + goto bail; + } + + tmcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &tmcast->qp_list); + ret = EEXIST; + goto bail; + } + + spin_lock(&rdi->n_mcast_grps_lock); + if (rdi->n_mcast_grps_allocated == rdi->dparms.props.max_mcast_grp) { + spin_unlock(&rdi->n_mcast_grps_lock); + ret = ENOMEM; + goto bail; + } + + rdi->n_mcast_grps_allocated++; + spin_unlock(&rdi->n_mcast_grps_lock); + + mcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &mcast->qp_list); + + atomic_inc(&mcast->refcount); + rb_link_node(&mcast->rb_node, pn, n); + rb_insert_color(&mcast->rb_node, &ibp->mcast_tree); + + ret = 0; + +bail: + spin_unlock_irq(&ibp->lock); + + return ret; +} + int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return -EOPNOTSUPP; + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1]; + struct rvt_mcast *mcast; + struct rvt_mcast_qp *mqp; + int ret = -ENOMEM; + + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + return -EINVAL; + + /* + * Allocate data structures since its better to do this outside of + * spin locks and it will most likely be needed. + */ + mcast = rvt_mcast_alloc(gid); + if (!mcast) + return -ENOMEM; + + mqp = rvt_mcast_qp_alloc(qp); + if (!mqp) + goto bail_mcast; + + switch (rvt_mcast_add(rdi, ibp, mcast, mqp)) { + case ESRCH: + /* Neither was used: OK to attach the same QP twice. */ + ret = 0; + goto bail_mqp; + case EEXIST: /* The mcast wasn't used */ + ret = 0; + goto bail_mcast; + case ENOMEM: + /* Exceeded the maximum number of mcast groups. */ + ret = -ENOMEM; + goto bail_mqp; + default: + break; + } + + return 0; + +bail_mqp: + rvt_mcast_qp_free(mqp); + +bail_mcast: + rvt_mcast_free(mcast); + + return ret; } int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return -EOPNOTSUPP; + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1]; + struct rvt_mcast *mcast = NULL; + struct rvt_mcast_qp *p, *tmp, *delp = NULL; + struct rb_node *n; + int last = 0; + int ret = 0; + + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + return -EINVAL; + + spin_lock_irq(&ibp->lock); + + /* Find the GID in the mcast table. */ + n = ibp->mcast_tree.rb_node; + while (1) { + if (!n) { + spin_unlock_irq(&ibp->lock); + return -EINVAL; + } + + mcast = rb_entry(n, struct rvt_mcast, rb_node); + ret = memcmp(gid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + break; + } + + /* Search the QP list. */ + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) { + if (p->qp != qp) + continue; + /* + * We found it, so remove it, but don't poison the forward + * link until we are sure there are no list walkers. + */ + list_del_rcu(&p->list); + mcast->n_attached--; + delp = p; + + /* If this was the last attached QP, remove the GID too. */ + if (list_empty(&mcast->qp_list)) { + rb_erase(&mcast->rb_node, &ibp->mcast_tree); + last = 1; + } + break; + } + + spin_unlock_irq(&ibp->lock); + /* QP not attached */ + if (!delp) + return -EINVAL; + + /* + * Wait for any list walkers to finish before freeing the + * list element. + */ + wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); + rvt_mcast_qp_free(delp); + + if (last) { + atomic_dec(&mcast->refcount); + wait_event(mcast->wait, !atomic_read(&mcast->refcount)); + rvt_mcast_free(mcast); + spin_lock_irq(&rdi->n_mcast_grps_lock); + rdi->n_mcast_grps_allocated--; + spin_unlock_irq(&rdi->n_mcast_grps_lock); + } + + return 0; +} + +int rvt_mcast_tree_empty(struct rvt_dev_info *rdi) +{ + int i; + int in_use = 0; + + for (i = 0; i < rdi->dparms.nports; i++) + if (rdi->ports[i]->mcast_tree.rb_node) + in_use++; + return in_use; } diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h index 21647c367340..cd15a981d7bf 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.h +++ b/drivers/infiniband/sw/rdmavt/mcast.h @@ -50,7 +50,9 @@ #include +void rvt_driver_mcast_init(struct rvt_dev_info *rdi); int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); +int rvt_mcast_tree_empty(struct rvt_dev_info *rdi); #endif /* DEF_RVTMCAST_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0eeef49e7d72..64b9c0191366 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -240,6 +240,8 @@ static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) if (rdi->driver_f.free_all_qps) qp_inuse = rdi->driver_f.free_all_qps(rdi); + qp_inuse += rvt_mcast_tree_empty(rdi); + if (!rdi->qp_dev) return qp_inuse; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7f56a4266cc4..5a094ebeb8c5 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -305,6 +305,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, query_srq); /* Multicast */ + rvt_driver_mcast_init(rdi); CHECK_DRIVER_OVERRIDE(rdi, attach_mcast); CHECK_DRIVER_OVERRIDE(rdi, detach_mcast); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1c7123ff3656..04e90192a50d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -306,6 +306,11 @@ struct rvt_dev_info { struct kthread_worker *worker; /* per device cq worker */ u32 n_cqs_allocated; /* number of CQs allocated for device */ spinlock_t n_cqs_lock; /* protect count of in use cqs */ + + /* Multicast */ + u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ + spinlock_t n_mcast_grps_lock; + }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) @@ -399,8 +404,11 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, void *obj); void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj); +int rvt_reg_mr(struct rvt_qp *qp, struct ib_reg_wr *wr); +struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid); /* Temporary export */ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type); + #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index e66bcc96d273..a97b95ba893f 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -50,6 +50,7 @@ #include #include +#include /* * Atomic bit definitions for r_aflags. */ @@ -385,9 +386,28 @@ struct rvt_qp_ibdev { struct rvt_qpn_table qpn_table; }; +/* + * There is one struct rvt_mcast for each multicast GID. + * All attached QPs are then stored as a list of + * struct rvt_mcast_qp. + */ +struct rvt_mcast_qp { + struct list_head list; + struct rvt_qp *qp; +}; + +struct rvt_mcast { + struct rb_node rb_node; + union ib_gid mgid; + struct list_head qp_list; + wait_queue_head_t wait; + atomic_t refcount; + int n_attached; +}; + /* * Since struct rvt_swqe is not a fixed size, we can't simply index into - * struct hfi1_qp.s_wq. This function does the array index computation. + * struct rvt_qp.s_wq. This function does the array index computation. */ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, unsigned n) -- cgit v1.2.3 From 2b047ea7a3ceef0322e666782e0a82e98424f6f1 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 22 Jan 2016 13:04:32 -0800 Subject: IB/rdmavt: Remove unused variable from Queue Pair s_sde should be in the low level driver QP private data. Remove the definition from rvt_qp. Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index a97b95ba893f..f0e24266bdb4 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -304,7 +304,6 @@ struct rvt_qp { struct rvt_swqe *s_wqe; struct rvt_sge_state s_sge; /* current send request data */ struct rvt_mregion *s_rdma_mr; - struct sdma_engine *s_sde; /* current sde */ u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ -- cgit v1.2.3 From e85ec33d820e1f3f763a46f9fd41230ca0ce40c6 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 22 Jan 2016 13:04:38 -0800 Subject: IB/rdmavt: add modify queue pair driver helpers Low level drivers need to be able to check incoming attributes as well as be able to adjust their private data on queue pair modification. Add 2 driver callbacks, check_modify_qp and modify_qp, to facilitate this. Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 7 +++++++ include/rdma/rdma_vt.h | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 64b9c0191366..615358ec394d 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -970,6 +970,10 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr_mask, link)) goto inval; + if (rdi->driver_f.check_modify_qp && + rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata)) + goto inval; + if (attr_mask & IB_QP_AV) { if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) goto inval; @@ -1166,6 +1170,9 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) qp->s_max_rd_atomic = attr->max_rd_atomic; + if (rdi->driver_f.modify_qp) + rdi->driver_f.modify_qp(qp, attr, attr_mask, udata); + spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 04e90192a50d..e382cca3fc4f 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -256,6 +256,13 @@ struct rvt_driver_provided { struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, enum ib_qp_type type, u8 port, gfp_t gfp); + /** + * Return 0 if modification is valid, -errno otherwise + */ + int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); }; struct rvt_dev_info { -- cgit v1.2.3 From ff6acd69518e0a84bd9c9b7f1bd4313f7076db97 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:04:45 -0800 Subject: IB/rdmavt: Add device structure allocation This patch adds rdmavt device structure allocation in rdamvt. The ib_device alloc is now done in rdmavt instead of the driver. Drivers need to tell rdmavt the number of ports when calling. A side of effect of this patch is fixing a bug with port initialization where the device structure port array was allocated over top of an existing one. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 30 ++++++++++++++++++------------ include/rdma/rdma_vt.h | 1 + 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index cf7cac610248..450caa7e38cf 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -67,6 +67,24 @@ static void rvt_cleanup(void) } module_exit(rvt_cleanup); +struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) +{ + struct rvt_dev_info *rdi = ERR_PTR(-ENOMEM); + + rdi = (struct rvt_dev_info *)ib_alloc_device(size); + if (!rdi) + return rdi; + + rdi->ports = kcalloc(nports, + sizeof(struct rvt_ibport **), + GFP_KERNEL); + if (!rdi->ports) + ib_dealloc_device(&rdi->ibdev); + + return rdi; +} +EXPORT_SYMBOL(rvt_alloc_device); + static int rvt_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) @@ -434,18 +452,6 @@ EXPORT_SYMBOL(rvt_unregister_device); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, int portnum, u16 *pkey_table) { - if (!rdi->dparms.nports) { - rvt_pr_err(rdi, "Driver says it has no ports.\n"); - return -EINVAL; - } - - rdi->ports = kcalloc(rdi->dparms.nports, - sizeof(struct rvt_ibport **), - GFP_KERNEL); - if (!rdi->ports) { - rvt_pr_err(rdi, "Could not allocate port mem.\n"); - return -ENOMEM; - } rdi->ports[portnum] = port; rdi->ports[portnum]->pkey_table = pkey_table; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e382cca3fc4f..7768e041f244 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -394,6 +394,7 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, return qp; } +struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -- cgit v1.2.3 From 3711baf27d78475436b063f33399908ba208a8f2 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:04:51 -0800 Subject: IB/rdmavt: Add mad agents to rdmavt This patch adds mad agent create and free to rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mad.c | 72 ++++++++++++++++++++++++++++++++++++++ drivers/infiniband/sw/rdmavt/mad.h | 3 +- drivers/infiniband/sw/rdmavt/vt.c | 4 +++ drivers/infiniband/sw/rdmavt/vt.h | 1 + include/rdma/rdma_vt.h | 6 +++- 5 files changed, 84 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index eef702943451..e01f3fb47c5c 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -45,6 +45,7 @@ * */ +#include #include "mad.h" /** @@ -83,3 +84,74 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, */ return IB_MAD_RESULT_FAILURE; } + +static void rvt_send_mad_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc) +{ + ib_free_send_mad(mad_send_wc->send_buf); +} + +int rvt_create_mad_agents(struct rvt_dev_info *rdi) +{ + struct ib_mad_agent *agent; + struct rvt_ibport *rvp; + int p; + int ret; + + for (p = 0; p < rdi->dparms.nports; p++) { + rvp = rdi->ports[p]; + agent = ib_register_mad_agent(&rdi->ibdev, p + 1, + IB_QPT_SMI, + NULL, 0, rvt_send_mad_handler, + NULL, NULL, 0); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + goto err; + } + + rvp->send_agent = agent; + + if (rdi->driver_f.notify_create_mad_agent) + rdi->driver_f.notify_create_mad_agent(rdi, p); + } + + return 0; + +err: + for (p = 0; p < rdi->dparms.nports; p++) { + rvp = rdi->ports[p]; + if (rvp->send_agent) { + agent = rvp->send_agent; + rvp->send_agent = NULL; + ib_unregister_mad_agent(agent); + if (rdi->driver_f.notify_free_mad_agent) + rdi->driver_f.notify_free_mad_agent(rdi, p); + } + } + + return ret; +} + +void rvt_free_mad_agents(struct rvt_dev_info *rdi) +{ + struct ib_mad_agent *agent; + struct rvt_ibport *rvp; + int p; + + for (p = 0; p < rdi->dparms.nports; p++) { + rvp = rdi->ports[p]; + if (rvp->send_agent) { + agent = rvp->send_agent; + rvp->send_agent = NULL; + ib_unregister_mad_agent(agent); + } + if (rvp->sm_ah) { + ib_destroy_ah(&rvp->sm_ah->ibah); + rvp->sm_ah = NULL; + } + + if (rdi->driver_f.notify_free_mad_agent) + rdi->driver_f.notify_free_mad_agent(rdi, p); + } +} + diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h index ee740e9ed387..5d8a6a9f1f8c 100644 --- a/drivers/infiniband/sw/rdmavt/mad.h +++ b/drivers/infiniband/sw/rdmavt/mad.h @@ -55,5 +55,6 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index); - +int rvt_create_mad_agents(struct rvt_dev_info *rdi); +void rvt_free_mad_agents(struct rvt_dev_info *rdi); #endif /* DEF_RVTMAD_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 450caa7e38cf..7496d43685ab 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -416,6 +416,8 @@ int rvt_register_device(struct rvt_dev_info *rdi) goto bail_cq; } + rvt_create_mad_agents(rdi); + rvt_pr_info(rdi, "Registration with rdmavt done.\n"); return ret; @@ -438,6 +440,8 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) if (!rdi) return; + rvt_free_mad_agents(rdi); + ib_unregister_device(&rdi->ibdev); rvt_cq_exit(rdi); rvt_mr_exit(rdi); diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 54ee05a4a9b9..d9f78ccf1c35 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -59,6 +59,7 @@ #include "mcast.h" #include "mmap.h" #include "cq.h" +#include "mad.h" #define rvt_pr_info(rdi, fmt, ...) \ __rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 7768e041f244..31f9e5a08da0 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -136,7 +136,8 @@ struct rvt_ibport { */ u16 *pkey_table; - /* TODO: Move sm_ah and smi_ah into here as well*/ + struct rvt_ah *sm_ah; + struct rvt_ah *smi_ah; }; #define RVT_CQN_MAX 16 /* maximum length of cq name */ @@ -263,6 +264,9 @@ struct rvt_driver_provided { int attr_mask, struct ib_udata *udata); void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + + void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); + void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); }; struct rvt_dev_info { -- cgit v1.2.3 From fe31419501ba133a967da7b7da0d32945ef21840 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:04:58 -0800 Subject: IB/rdmavt: Fix copyright date Update all files added by rdmavt which do not yet have 2016 as the copyright year. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/ah.c | 2 +- drivers/infiniband/sw/rdmavt/ah.h | 2 +- drivers/infiniband/sw/rdmavt/cq.c | 2 +- drivers/infiniband/sw/rdmavt/cq.h | 2 +- drivers/infiniband/sw/rdmavt/dma.c | 2 +- drivers/infiniband/sw/rdmavt/dma.h | 2 +- drivers/infiniband/sw/rdmavt/mad.c | 2 +- drivers/infiniband/sw/rdmavt/mad.h | 2 +- drivers/infiniband/sw/rdmavt/mcast.c | 2 +- drivers/infiniband/sw/rdmavt/mcast.h | 2 +- drivers/infiniband/sw/rdmavt/mmap.c | 2 +- drivers/infiniband/sw/rdmavt/mmap.h | 2 +- drivers/infiniband/sw/rdmavt/mr.c | 2 +- drivers/infiniband/sw/rdmavt/mr.h | 2 +- drivers/infiniband/sw/rdmavt/pd.c | 2 +- drivers/infiniband/sw/rdmavt/pd.h | 2 +- drivers/infiniband/sw/rdmavt/qp.c | 2 +- drivers/infiniband/sw/rdmavt/qp.h | 2 +- drivers/infiniband/sw/rdmavt/srq.c | 2 +- drivers/infiniband/sw/rdmavt/srq.h | 2 +- drivers/infiniband/sw/rdmavt/trace.c | 2 +- drivers/infiniband/sw/rdmavt/trace.h | 2 +- drivers/infiniband/sw/rdmavt/vt.c | 2 +- drivers/infiniband/sw/rdmavt/vt.h | 2 +- include/rdma/rdma_vt.h | 2 +- include/rdma/rdmavt_mr.h | 2 +- include/rdma/rdmavt_qp.h | 2 +- 27 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index c194d9d9bd25..9372c4321858 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index 8cd7ea7303e4..e9c36be87d79 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -2,7 +2,7 @@ #define DEF_RVTAH_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 7308a274643d..055aa71bed18 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 3813d90efad2..6182c29eff66 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -2,7 +2,7 @@ #define DEF_RVTCQ_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c index c0701416136a..33076a5eee2f 100644 --- a/drivers/infiniband/sw/rdmavt/dma.c +++ b/drivers/infiniband/sw/rdmavt/dma.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/dma.h b/drivers/infiniband/sw/rdmavt/dma.h index a80cc3556915..979f07e09195 100644 --- a/drivers/infiniband/sw/rdmavt/dma.h +++ b/drivers/infiniband/sw/rdmavt/dma.h @@ -2,7 +2,7 @@ #define DEF_RDMAVTDMA_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index e01f3fb47c5c..5c720d35304d 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h index 5d8a6a9f1f8c..c89faf411d09 100644 --- a/drivers/infiniband/sw/rdmavt/mad.h +++ b/drivers/infiniband/sw/rdmavt/mad.h @@ -2,7 +2,7 @@ #define DEF_RVTMAD_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 528c1ca798a9..e06a8755cbef 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h index cd15a981d7bf..29f579267608 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.h +++ b/drivers/infiniband/sw/rdmavt/mcast.h @@ -2,7 +2,7 @@ #define DEF_RVTMCAST_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index fc30ff7f24ea..d6330d7b4405 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h index 3513e25a8491..e8067471c722 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.h +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -2,7 +2,7 @@ #define DEF_RDMAVTMMAP_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index f1dcaf4d6c3e..ee36be37c55d 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index c5339aa2d241..69380512c6d1 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -2,7 +2,7 @@ #define DEF_RVTMR_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index f8dba88880e5..62fee44be3a3 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h index 56d75e6cb21f..1892ca4a9746 100644 --- a/drivers/infiniband/sw/rdmavt/pd.h +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -2,7 +2,7 @@ #define DEF_RDMAVTPD_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 615358ec394d..8d3563a4249c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index f438809e18e2..8409f80d5f25 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -2,7 +2,7 @@ #define DEF_RVTQP_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index bbb623a5f679..c9eb8b33cae3 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h index 0c3c5a7e64cb..9f07880e9e07 100644 --- a/drivers/infiniband/sw/rdmavt/srq.h +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -2,7 +2,7 @@ #define DEF_RVTSRQ_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/trace.c b/drivers/infiniband/sw/rdmavt/trace.c index 19afe3988f67..d593285a349c 100644 --- a/drivers/infiniband/sw/rdmavt/trace.c +++ b/drivers/infiniband/sw/rdmavt/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index b269291b6dc9..d5b128118b73 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7496d43685ab..571463eca65c 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index d9f78ccf1c35..a5c36d32fa4d 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 31f9e5a08da0..f6569b24497f 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -2,7 +2,7 @@ #define DEF_RDMA_VT_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index 4aa81713b4f3..5edffdca8c53 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCMR_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f0e24266bdb4..91f20fd91e00 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCQP_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. -- cgit v1.2.3 From b8f881b913f34f712185b2ff7a41645dcad9a868 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Wed, 3 Feb 2016 14:14:36 -0800 Subject: IB/rdmavt: Add srq functionality to rdmavt Fill in srq function stubs with code derived from hfi1 and qib. Move necessary functions and data structure members as well. Reviewed-by: Dennis Dalessandro Reviewed-by: Harish Chegondi Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 37 +++++- drivers/infiniband/sw/rdmavt/srq.c | 257 ++++++++++++++++++++++++++++++++++++- drivers/infiniband/sw/rdmavt/srq.h | 1 + drivers/infiniband/sw/rdmavt/vt.c | 1 + include/rdma/rdma_vt.h | 3 + 5 files changed, 293 insertions(+), 6 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 354fdac2b625..4711e148f34d 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1517,7 +1517,42 @@ bail: int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { - return -EOPNOTSUPP; + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + struct rvt_rwq *wq; + unsigned long flags; + + for (; wr; wr = wr->next) { + struct rvt_rwqe *wqe; + u32 next; + int i; + + if ((unsigned)wr->num_sge > srq->rq.max_sge) { + *bad_wr = wr; + return -EINVAL; + } + + spin_lock_irqsave(&srq->rq.lock, flags); + wq = srq->rq.wq; + next = wq->head + 1; + if (next >= srq->rq.size) + next = 0; + if (next == wq->tail) { + spin_unlock_irqrestore(&srq->rq.lock, flags); + *bad_wr = wr; + return -ENOMEM; + } + + wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); + wq->head = next; + spin_unlock_irqrestore(&srq->rq.lock, flags); + } + return 0; } void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index c9eb8b33cae3..4960a89f91b2 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -45,8 +45,21 @@ * */ +#include +#include +#include + #include "srq.h" +/* + * Do any initialization needed when a driver registers with rdmavt. + */ +void rvt_driver_srq_init(struct rvt_dev_info *rdi) +{ + spin_lock_init(&rdi->n_srqs_lock); + rdi->n_srqs_allocated = 0; +} + /** * rvt_create_srq - create a shared receive queue * @ibpd: the protection domain of the SRQ to create @@ -57,7 +70,96 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); + struct rvt_srq *srq; + u32 sz; + struct ib_srq *ret; + + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + + if (srq_init_attr->attr.max_sge == 0 || + srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge || + srq_init_attr->attr.max_wr == 0 || + srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr) + return ERR_PTR(-EINVAL); + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + /* + * Need to use vmalloc() if we want to support large #s of entries. + */ + srq->rq.size = srq_init_attr->attr.max_wr + 1; + srq->rq.max_sge = srq_init_attr->attr.max_sge; + sz = sizeof(struct ib_sge) * srq->rq.max_sge + + sizeof(struct rvt_rwqe); + srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz); + if (!srq->rq.wq) { + ret = ERR_PTR(-ENOMEM); + goto bail_srq; + } + + /* + * Return the address of the RWQ as the offset to mmap. + * See rvt_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + int err; + u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; + + srq->ip = + rvt_create_mmap_info(dev, s, ibpd->uobject->context, + srq->rq.wq); + if (!srq->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + + err = ib_copy_to_udata(udata, &srq->ip->offset, + sizeof(srq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else { + srq->ip = NULL; + } + + /* + * ib_create_srq() will initialize srq->ibsrq. + */ + spin_lock_init(&srq->rq.lock); + srq->rq.wq->head = 0; + srq->rq.wq->tail = 0; + srq->limit = srq_init_attr->attr.srq_limit; + + spin_lock(&dev->n_srqs_lock); + if (dev->n_srqs_allocated == dev->dparms.props.max_srq) { + spin_unlock(&dev->n_srqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_srqs_allocated++; + spin_unlock(&dev->n_srqs_lock); + + if (srq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + return &srq->ibsrq; + +bail_ip: + kfree(srq->ip); +bail_wq: + vfree(srq->rq.wq); +bail_srq: + kfree(srq); + return ret; } /** @@ -71,16 +173,161 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { - return -EOPNOTSUPP; + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); + struct rvt_rwq *wq; + int ret = 0; + + if (attr_mask & IB_SRQ_MAX_WR) { + struct rvt_rwq *owq; + struct rvt_rwqe *p; + u32 sz, size, n, head, tail; + + /* Check that the requested sizes are below the limits. */ + if ((attr->max_wr > dev->dparms.props.max_srq_wr) || + ((attr_mask & IB_SRQ_LIMIT) ? + attr->srq_limit : srq->limit) > attr->max_wr) + return -EINVAL; + + sz = sizeof(struct rvt_rwqe) + + srq->rq.max_sge * sizeof(struct ib_sge); + size = attr->max_wr + 1; + wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz); + if (!wq) + return -ENOMEM; + + /* Check that we can write the offset to mmap. */ + if (udata && udata->inlen >= sizeof(__u64)) { + __u64 offset_addr; + __u64 offset = 0; + + ret = ib_copy_from_udata(&offset_addr, udata, + sizeof(offset_addr)); + if (ret) + goto bail_free; + udata->outbuf = (void __user *) + (unsigned long)offset_addr; + ret = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (ret) + goto bail_free; + } + + spin_lock_irq(&srq->rq.lock); + /* + * validate head and tail pointer values and compute + * the number of remaining WQEs. + */ + owq = srq->rq.wq; + head = owq->head; + tail = owq->tail; + if (head >= srq->rq.size || tail >= srq->rq.size) { + ret = -EINVAL; + goto bail_unlock; + } + n = head; + if (n < tail) + n += srq->rq.size - tail; + else + n -= tail; + if (size <= n) { + ret = -EINVAL; + goto bail_unlock; + } + n = 0; + p = wq->wq; + while (tail != head) { + struct rvt_rwqe *wqe; + int i; + + wqe = rvt_get_rwqe_ptr(&srq->rq, tail); + p->wr_id = wqe->wr_id; + p->num_sge = wqe->num_sge; + for (i = 0; i < wqe->num_sge; i++) + p->sg_list[i] = wqe->sg_list[i]; + n++; + p = (struct rvt_rwqe *)((char *)p + sz); + if (++tail >= srq->rq.size) + tail = 0; + } + srq->rq.wq = wq; + srq->rq.size = size; + wq->head = n; + wq->tail = 0; + if (attr_mask & IB_SRQ_LIMIT) + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + + vfree(owq); + + if (srq->ip) { + struct rvt_mmap_info *ip = srq->ip; + struct rvt_dev_info *dev = ib_to_rvt(srq->ibsrq.device); + u32 s = sizeof(struct rvt_rwq) + size * sz; + + rvt_update_mmap_info(dev, ip, s, wq); + + /* + * Return the offset to mmap. + * See rvt_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + return ret; + } + + /* + * Put user mapping info onto the pending list + * unless it already is on the list. + */ + spin_lock_irq(&dev->pending_lock); + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, + &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + } else if (attr_mask & IB_SRQ_LIMIT) { + spin_lock_irq(&srq->rq.lock); + if (attr->srq_limit >= srq->rq.size) + ret = -EINVAL; + else + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + } + return ret; + +bail_unlock: + spin_unlock_irq(&srq->rq.lock); +bail_free: + vfree(wq); + return ret; } int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { - return -EOPNOTSUPP; + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + + attr->max_wr = srq->rq.size - 1; + attr->max_sge = srq->rq.max_sge; + attr->srq_limit = srq->limit; + return 0; } int rvt_destroy_srq(struct ib_srq *ibsrq) { - return -EOPNOTSUPP; -} + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); + spin_lock(&dev->n_srqs_lock); + dev->n_srqs_allocated--; + spin_unlock(&dev->n_srqs_lock); + if (srq->ip) + kref_put(&srq->ip->ref, rvt_release_mmap_info); + else + vfree(srq->rq.wq); + kfree(srq); + + return 0; +} diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h index 9f07880e9e07..bf0eaaf56465 100644 --- a/drivers/infiniband/sw/rdmavt/srq.h +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -49,6 +49,7 @@ */ #include +void rvt_driver_srq_init(struct rvt_dev_info *rdi); struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 571463eca65c..d45206c2359e 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -323,6 +323,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, modify_srq); CHECK_DRIVER_OVERRIDE(rdi, destroy_srq); CHECK_DRIVER_OVERRIDE(rdi, query_srq); + rvt_driver_srq_init(rdi); /* Multicast */ rvt_driver_mcast_init(rdi); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index f6569b24497f..1b770650cf60 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -299,6 +299,9 @@ struct rvt_dev_info { int n_ahs_allocated; spinlock_t n_ahs_lock; /* Protect ah allocated count */ + u32 n_srqs_allocated; + spinlock_t n_srqs_lock; /* Protect srqs allocated count */ + int flags; struct rvt_ibport **ports; -- cgit v1.2.3 From 60c30f572595e46c819503b5a8c3a8e2f922de7a Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 3 Feb 2016 14:14:45 -0800 Subject: IB/rdmavt: Add hardware driver send work request check Some hardware drivers requires additional checks on send WRs. Create an optional call back to allow hardware drivers to reject a send WR. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 4 ++++ include/rdma/rdma_vt.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 4711e148f34d..e9e3138d1203 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1394,6 +1394,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) if (next == qp->s_last) return -ENOMEM; + if (rdi->driver_f.check_send_wr && + rdi->driver_f.check_send_wr(qp, wr)) + return -EINVAL; + rkt = &rdi->lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.pd); wqe = rvt_get_swqe_ptr(qp, qp->s_head); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1b770650cf60..52dfa9cf8621 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -265,6 +265,8 @@ struct rvt_driver_provided { void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + int (*check_send_wr)(struct rvt_qp *qp, struct ib_send_wr *wr); + void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); }; -- cgit v1.2.3 From f1badc716349cc2ac6e55ad50dcff598ef97bad5 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 3 Feb 2016 14:15:02 -0800 Subject: IB/rdmavt: Clean up distinction between port number and index IB core uses 1 relative indexing for ports. All of our data structures use 0 based indexing. Add an inline function that we can use whenever we need to validate a legal value and try to convert a port number to a port index at the entrance into rdmavt. Try to follow the policy that when we are talking about a port from IB core point of view we refer to it as a port number. When port is an index into our arrays refer to it as a port index. Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mad.c | 8 ++++++-- drivers/infiniband/sw/rdmavt/mad.h | 2 +- drivers/infiniband/sw/rdmavt/qp.c | 17 +++++++++++------ drivers/infiniband/sw/rdmavt/vt.c | 36 ++++++++++++++++++++++-------------- drivers/infiniband/sw/rdmavt/vt.h | 12 ++++++++++++ include/rdma/rdma_vt.h | 4 ++-- 6 files changed, 54 insertions(+), 25 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index 5c720d35304d..2feae47492df 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -47,12 +47,13 @@ #include #include "mad.h" +#include "vt.h" /** * rvt_process_mad - process an incoming MAD packet * @ibdev: the infiniband device this packet came in on * @mad_flags: MAD flags - * @port: the port number this packet came in on + * @port_num: the port number this packet came in on, 1 based from ib core * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet * @in_mad: the incoming MAD @@ -67,7 +68,7 @@ * * This is called by the ib_mad module. */ -int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, +int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, @@ -82,6 +83,9 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, *VT-DRIVER-API: ???? * */ + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; + return IB_MAD_RESULT_FAILURE; } diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h index c89faf411d09..a9d6eecc3723 100644 --- a/drivers/infiniband/sw/rdmavt/mad.h +++ b/drivers/infiniband/sw/rdmavt/mad.h @@ -50,7 +50,7 @@ #include -int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, +int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 471d9c59f765..2647dbae32e4 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -286,26 +286,31 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, return (map - qpt->map) * RVT_BITS_PER_PAGE + off; } -/* - * Allocate the next available QPN or - * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. +/** + * alloc_qpn - Allocate the next available qpn or zero/one for QP type + * IB_QPT_SMI/IB_QPT_GSI + *@rdi: rvt device info structure + *@qpt: queue pair number table pointer + *@port_num: IB port number, 1 based, comes from core + * + * Return: The queue pair number */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp) + enum ib_qp_type type, u8 port_num, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; if (rdi->driver_f.alloc_qpn) - return rdi->driver_f.alloc_qpn(rdi, qpt, type, port, + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, GFP_KERNEL); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; ret = type == IB_QPT_GSI; - n = 1 << (ret + 2 * (port - 1)); + n = 1 << (ret + 2 * (port_num - 1)); spin_lock(&qpt->lock); if (qpt->flags & n) ret = -EINVAL; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index d45206c2359e..9f9cb9ab170b 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -120,14 +120,17 @@ static int rvt_modify_device(struct ib_device *device, /** * rvt_query_port: Passes the query port call to the driver * @ibdev: Verbs IB dev - * @port: port number + * @port_num: port number, 1 based from ib core * @props: structure to hold returned properties * * Returns 0 on success */ -static int rvt_query_port(struct ib_device *ibdev, u8 port, +static int rvt_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *props) { + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; + /* * VT-DRIVER-API: query_port_state() * driver returns pretty much everything in ib_port_attr @@ -138,13 +141,13 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port, /** * rvt_modify_port * @ibdev: Verbs IB dev - * @port: Port number + * @port_num: Port number, 1 based from ib core * @port_modify_mask: How to change the port * @props: Structure to fill in * * Returns 0 on success */ -static int rvt_modify_port(struct ib_device *ibdev, u8 port, +static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, int port_modify_mask, struct ib_port_modify *props) { /* @@ -160,18 +163,21 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port, * TBD: send_trap() and post_mad_send() need examined to see where they * fit in. */ + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; + return -EOPNOTSUPP; } /** * rvt_query_pkey - Return a pkey from the table at a given index * @ibdev: Verbs IB dev - * @port: Port number + * @port_num: Port number, 1 based from ib core * @intex: Index into pkey table * * Returns 0 on failure pkey otherwise */ -static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, +static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey) { /* @@ -183,11 +189,11 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, struct rvt_dev_info *rdi = ib_to_rvt(ibdev); int port_index; - if (index >= rvt_get_npkeys(rdi)) + port_index = ibport_num_to_idx(ibdev, port_num); + if (port_index < 0) return -EINVAL; - port_index = port - 1; /* IB ports start at 1 our array at 0 */ - if ((port_index < 0) || (port_index >= rdi->dparms.nports)) + if (index >= rvt_get_npkeys(rdi)) return -EINVAL; *pkey = rvt_get_pkey(rdi, port_index, index); @@ -197,13 +203,13 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, /** * rvt_query_gid - Return a gid from the table * @ibdev: Verbs IB dev - * @port: Port number + * @port_num: Port number, 1 based from ib core * @index: = Index in table * @gid: Gid to return * * Returns 0 on success */ -static int rvt_query_gid(struct ib_device *ibdev, u8 port, +static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid) { /* @@ -211,6 +217,8 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port, * to craft the return value. This will work similar to how query_pkey() * is being done. */ + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; return -EOPNOTSUPP; } @@ -455,11 +463,11 @@ EXPORT_SYMBOL(rvt_unregister_device); * They persist until the driver goes away. */ int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum, u16 *pkey_table) + int port_index, u16 *pkey_table) { - rdi->ports[portnum] = port; - rdi->ports[portnum]->pkey_table = pkey_table; + rdi->ports[port_index] = port; + rdi->ports[port_index]->pkey_table = pkey_table; return 0; } diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index a5c36d32fa4d..e26f9e94d1ea 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -88,4 +88,16 @@ #define __rvt_pr_err(pdev, name, fmt, ...) \ dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) +static inline int ibport_num_to_idx(struct ib_device *ibdev, u8 port_num) +{ + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + int port_index; + + port_index = port_num - 1; /* IB ports start at 1 our arrays at 0 */ + if ((port_index < 0) || (port_index >= rdi->dparms.nports)) + return -EINVAL; + + return port_index; +} + #endif /* DEF_RDMAVT_H */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 52dfa9cf8621..5d1c694a2731 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -256,7 +256,7 @@ struct rvt_driver_provided { void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp); + enum ib_qp_type type, u8 port_num, gfp_t gfp); /** * Return 0 if modification is valid, -errno otherwise */ @@ -408,7 +408,7 @@ int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum, u16 *pkey_table); + int port_index, u16 *pkey_table); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, -- cgit v1.2.3 From 1f024992ef05d1eb9b3a0becd1611ecfa21854a6 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 3 Feb 2016 14:15:11 -0800 Subject: IB/rdmavt: Add query gid support. Addin query gid support. Rdmavt still relies on the driver to maintain the gid table. Rdmavt simply calls into the driver to retrive the guid for a particular port. Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 17 ++++++++++++++--- include/rdma/rdma_vt.h | 2 ++ 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 9f9cb9ab170b..e017117586af 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -210,17 +210,28 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, * Returns 0 on success */ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, - int index, union ib_gid *gid) + int guid_index, union ib_gid *gid) { + struct rvt_dev_info *rdi; + struct rvt_ibport *rvp; + int port_index; + /* * Driver is responsible for updating the guid table. Which will be used * to craft the return value. This will work similar to how query_pkey() * is being done. */ - if (ibport_num_to_idx(ibdev, port_num) < 0) + port_index = ibport_num_to_idx(ibdev, port_num); + if (port_index < 0) return -EINVAL; - return -EOPNOTSUPP; + rdi = ib_to_rvt(ibdev); + rvp = rdi->ports[port_index]; + + gid->global.subnet_prefix = rvp->gid_prefix; + + return rdi->driver_f.get_guid_be(rdi, rvp, guid_index, + &gid->global.interface_id); } struct rvt_ucontext { diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 5d1c694a2731..dabf4d52b4fc 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -248,6 +248,8 @@ struct rvt_driver_provided { u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); int (*mtu_to_path_mtu)(u32 mtu); + int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, + int guid_index, __be64 *guid); /*--------------------*/ /* Optional functions */ -- cgit v1.2.3 From 61a650c14d728354b2d493bed3f1b0531f033dac Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 3 Feb 2016 14:15:20 -0800 Subject: IB/rdmavt: Add support for query_port, modify_port and get_port_immutable rvt_query_port calls into the driver through a call back function query_port_state to populate the rest of ib_port_attr elements. rvt_modify_port calls into the driver if needed through a call back function shut_down_port() Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 77 ++++++++++++++++++++++++++++----------- include/rdma/rdma_vt.h | 6 +++ 2 files changed, 61 insertions(+), 22 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index e017117586af..2ccf6103e5fa 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -128,14 +128,27 @@ static int rvt_modify_device(struct ib_device *device, static int rvt_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *props) { - if (ibport_num_to_idx(ibdev, port_num) < 0) + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + struct rvt_ibport *rvp; + int port_index = ibport_num_to_idx(ibdev, port_num); + + if (port_index < 0) return -EINVAL; - /* - * VT-DRIVER-API: query_port_state() - * driver returns pretty much everything in ib_port_attr - */ - return -EOPNOTSUPP; + rvp = rdi->ports[port_index]; + memset(props, 0, sizeof(*props)); + props->sm_lid = rvp->sm_lid; + props->sm_sl = rvp->sm_sl; + props->port_cap_flags = rvp->port_cap_flags; + props->max_msg_sz = 0x80000000; + props->pkey_tbl_len = rvt_get_npkeys(rdi); + props->bad_pkey_cntr = rvp->pkey_violations; + props->qkey_viol_cntr = rvp->qkey_violations; + props->subnet_timeout = rvp->subnet_timeout; + props->init_type_reply = 0; + + /* Populate the remaining ib_port_attr elements */ + return rdi->driver_f.query_port_state(rdi, port_num, props); } /** @@ -150,23 +163,26 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num, static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, int port_modify_mask, struct ib_port_modify *props) { - /* - * VT-DRIVER-API: set_link_state() - * driver will set the link state using the IB enumeration - * - * VT-DRIVER-API: clear_qkey_violations() - * clears driver private qkey counter - * - * VT-DRIVER-API: get_lid() - * driver needs to return the LID - * - * TBD: send_trap() and post_mad_send() need examined to see where they - * fit in. - */ - if (ibport_num_to_idx(ibdev, port_num) < 0) + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + struct rvt_ibport *rvp; + int ret = 0; + int port_index = ibport_num_to_idx(ibdev, port_num); + + if (port_index < 0) return -EINVAL; - return -EOPNOTSUPP; + rvp = rdi->ports[port_index]; + rvp->port_cap_flags |= props->set_port_cap_mask; + rvp->port_cap_flags &= ~props->clr_port_cap_mask; + + if (props->set_port_cap_mask || props->clr_port_cap_mask) + rdi->driver_f.cap_mask_chg(rdi, port_num); + if (port_modify_mask & IB_PORT_SHUTDOWN) + ret = rdi->driver_f.shut_down_port(rdi, port_num); + if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) + rvp->qkey_violations = 0; + + return ret; } /** @@ -273,7 +289,24 @@ static int rvt_dealloc_ucontext(struct ib_ucontext *context) static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { - return -EOPNOTSUPP; + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + struct ib_port_attr attr; + int err, port_index; + + port_index = ibport_num_to_idx(ibdev, port_num); + if (port_index < 0) + return -EINVAL; + + err = rvt_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = rdi->dparms.core_cap_flags; + immutable->max_mad_size = rdi->dparms.max_mad_size; + + return 0; } /* diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index dabf4d52b4fc..4242fea9cf4e 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -200,6 +200,8 @@ struct rvt_driver_params { int psn_mask; int psn_shift; int psn_modify_mask; + u32 core_cap_flags; + u32 max_mad_size; }; /* Protection domain */ @@ -250,6 +252,10 @@ struct rvt_driver_provided { int (*mtu_to_path_mtu)(u32 mtu); int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid); + int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num, + struct ib_port_attr *props); + int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num); + void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num); /*--------------------*/ /* Optional functions */ -- cgit v1.2.3 From e1bf0d5ecdc49cd4e2014da0d60efa74f5714fba Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 3 Feb 2016 14:36:58 -0800 Subject: staging/rdma/hfi1, IB/core: Fix LinkDownReason define for consistency LinkDownReason LocalMediaNotInstalled lacked an underscore and was inconsistent with other defines in the same family. This patch fixes this. Reviewed-by: Ira Weiny Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/chip.c | 4 ++-- drivers/staging/rdma/hfi1/platform.c | 2 +- include/rdma/opa_port_info.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/rdma') diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 8c06e3b0185a..f31cc238d6db 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -5950,12 +5950,12 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) if ((ppd->offline_disabled_reason > HFI1_ODR_MASK( - OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED)) || + OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED)) || (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))) ppd->offline_disabled_reason = HFI1_ODR_MASK( - OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED); + OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED); if (ppd->host_link_state == HLS_DN_POLL) { /* diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c index c3df1d892754..506a82766b33 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/staging/rdma/hfi1/platform.c @@ -816,7 +816,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) } else ppd->offline_disabled_reason = HFI1_ODR_MASK( - OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED); + OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED); break; default: dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__); diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index a0fa975cd1c1..2b95c2c336eb 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -97,7 +97,7 @@ #define OPA_LINKDOWN_REASON_WIDTH_POLICY 41 /* 42-48 reserved */ #define OPA_LINKDOWN_REASON_DISCONNECTED 49 -#define OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED 50 +#define OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED 50 #define OPA_LINKDOWN_REASON_NOT_INSTALLED 51 #define OPA_LINKDOWN_REASON_CHASSIS_CONFIG 52 /* 53 reserved */ -- cgit v1.2.3 From 066fad289408e7d48ce00b54a292e7a90602cb30 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 11:03:36 -0800 Subject: IB/rdmavt: remove unused qp field The field is a vestige from ipath. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 91f20fd91e00..aed13e13591c 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -251,7 +251,6 @@ struct rvt_qp { u32 pmtu; /* decoded from path_mtu */ u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ u32 s_ahgpsn; /* set to the psn in the copy of the header */ u8 state; /* QP state */ -- cgit v1.2.3 From bfee5e32e701b98634b380a9eef8b5820feb7488 Mon Sep 17 00:00:00 2001 From: Vennila Megavannan Date: Tue, 9 Feb 2016 14:29:49 -0800 Subject: IB/rdmavt, staging/rdma/hfi1: use qps to dynamically scale timeout value A busy_jiffies variable is maintained and updated when rc qps are created and deleted. busy_jiffies is a scaled value of the number of rc qps in the device. busy_jiffies is incremented every rc qp scaling interval. busy_jiffies is added to the rc timeout in add_retry_timer and mod_retry_timer. The rc qp scaling interval is selected based on extensive performance evaluation of targeted workloads. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Vennila Megavannan Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 17 +++++++++++++++++ drivers/staging/rdma/hfi1/rc.c | 12 ++++++++++-- include/rdma/rdma_vt.h | 4 +++- include/rdma/rdmavt_qp.h | 2 ++ 4 files changed, 32 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 439213c37537..7dc837c6554b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -685,6 +685,19 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } rdi->n_qps_allocated++; + /* + * Maintain a busy_jiffies variable that will be added to the timeout + * period in mod_retry_timer and add_retry_timer. This busy jiffies + * is scaled by the number of rc qps created for the device to reduce + * the number of timeouts occurring when there is a large number of + * qps. busy_jiffies is incremented every rc qp scaling interval. + * The scaling interval is selected based on extensive performance + * evaluation of targeted workloads. + */ + if (init_attr->qp_type == IB_QPT_RC) { + rdi->n_rc_qps++; + rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL; + } spin_unlock(&rdi->n_qps_lock); if (qp->ip) { @@ -1223,6 +1236,10 @@ int rvt_destroy_qp(struct ib_qp *ibqp) spin_lock(&rdi->n_qps_lock); rdi->n_qps_allocated--; + if (qp->ibqp.qp_type == IB_QPT_RC) { + rdi->n_rc_qps--; + rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL; + } spin_unlock(&rdi->n_qps_lock); if (qp->ip) diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index ba2a2ccac6f2..a4a44d33d857 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -68,9 +68,13 @@ */ static inline void hfi1_add_retry_timer(struct rvt_qp *qp) { + struct ib_qp *ibqp = &qp->ibqp; + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ - qp->s_timer.expires = jiffies + qp->timeout_jiffies; + qp->s_timer.expires = jiffies + qp->timeout_jiffies + + rdi->busy_jiffies; add_timer(&qp->s_timer); } @@ -99,9 +103,13 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) */ static inline void hfi1_mod_retry_timer(struct rvt_qp *qp) { + struct ib_qp *ibqp = &qp->ibqp; + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ - mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); + mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies + + rdi->busy_jiffies); } /** diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4242fea9cf4e..5ccf683b28f1 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -318,7 +318,9 @@ struct rvt_dev_info { /* QP */ struct rvt_qp_ibdev *qp_dev; u32 n_qps_allocated; /* number of QPs allocated for device */ - spinlock_t n_qps_lock; /* keep track of number of qps */ + u32 n_rc_qps; /* number of RC QPs allocated for device */ + u32 busy_jiffies; /* timeout scaling based on RC QP count */ + spinlock_t n_qps_lock; /* protect qps, rc qps and busy jiffy counts */ /* memory maps */ struct list_head pending_mmaps; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index aed13e13591c..b3ea74579316 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -225,6 +225,8 @@ struct rvt_ack_entry { }; }; +#define RC_QP_SCALING_INTERVAL 5 + /* * Variables prefixed with s_ are for the requester (sender). * Variables prefixed with r_ are for the responder (receiver). -- cgit v1.2.3 From 46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:10:04 -0800 Subject: IB/qib, staging/rdma/hfi1: add s_hlock for use in post send This patch adds an additional lock to reduce contention on the s_lock. This lock is used in post_send() so that the post_send is not serialized with the send engine and other send related processing. To do this the s_next_psn is now maintained on post_send() while post_send() related fields are moved to a new cache line. There is an s_avail maintained for the post_send() to mitigate trading cache lines with the send engine. The lock is released/acquired around releasing the just built packet to the egress mechanism. Reviewed-by: Jubin John Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Harish Chegondi Signed-off-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_qp.c | 36 +++++++++++++ drivers/infiniband/hw/qib/qib_rc.c | 44 ++++------------ drivers/infiniband/hw/qib/qib_ruc.c | 11 ++-- drivers/infiniband/hw/qib/qib_uc.c | 22 ++++---- drivers/infiniband/hw/qib/qib_ud.c | 22 ++++---- drivers/infiniband/hw/qib/qib_verbs.c | 37 +++++++++---- drivers/infiniband/hw/qib/qib_verbs.h | 6 +-- drivers/infiniband/sw/rdmavt/qp.c | 97 +++++++++++++++++++++++++++-------- drivers/staging/rdma/hfi1/qp.c | 79 +++++++++++++++++++++++++--- drivers/staging/rdma/hfi1/qp.h | 37 +------------ drivers/staging/rdma/hfi1/rc.c | 44 ++++------------ drivers/staging/rdma/hfi1/ruc.c | 40 ++++++++------- drivers/staging/rdma/hfi1/uc.c | 21 ++++---- drivers/staging/rdma/hfi1/ud.c | 22 ++++---- drivers/staging/rdma/hfi1/verbs.c | 3 +- drivers/staging/rdma/hfi1/verbs.h | 2 +- include/rdma/rdma_vt.h | 4 +- include/rdma/rdmavt_qp.h | 13 +++-- 18 files changed, 319 insertions(+), 221 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 01d49dc91de2..6ffa0221da9f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -474,6 +474,42 @@ void qib_get_credit(struct rvt_qp *qp, u32 aeth) } } +/** + * qib_check_send_wqe - validate wr/wqe + * @qp - The qp + * @wqe - The built wqe + * + * validate wr/wqe. This is called + * prior to inserting the wqe into + * the ring but after the wqe has been + * setup. + * + * Returns 0 on success, -EINVAL on failure + */ +int qib_check_send_wqe(struct rvt_qp *qp, + struct rvt_swqe *wqe) +{ + struct rvt_ah *ah; + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + if (wqe->length > 0x80000000U) + return -EINVAL; + break; + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_UD: + ah = ibah_to_rvtah(wqe->ud_wr.ah); + if (wqe->length > (1 << ah->log_pmtu)) + return -EINVAL; + break; + default: + break; + } + return 0; +} + #ifdef CONFIG_DEBUG_FS struct qib_qp_iter { diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index ce886b2ade74..9088e26d3ac8 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -226,6 +226,8 @@ bail: * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_rc_req(struct rvt_qp *qp) @@ -241,7 +243,6 @@ int qib_make_rc_req(struct rvt_qp *qp) u32 bth2; u32 pmtu = qp->pmtu; char newreq; - unsigned long flags; int ret = 0; int delta; @@ -249,12 +250,6 @@ int qib_make_rc_req(struct rvt_qp *qp) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ohdr = &priv->s_hdr->u.l.oth; - /* - * The lock is needed to synchronize between the sending tasklet, - * the receive interrupt handler, and timeout resends. - */ - spin_lock_irqsave(&qp->s_lock, flags); - /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && qib_make_rc_ack(dev, qp, ohdr, pmtu)) @@ -264,7 +259,8 @@ int qib_make_rc_req(struct rvt_qp *qp) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -321,8 +317,8 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } - wqe->psn = qp->s_next_psn; newreq = 1; + qp->s_psn = wqe->psn; } /* * Note that we have to be careful not to modify the @@ -341,9 +337,7 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(SEND_FIRST); len = pmtu; break; @@ -381,9 +375,7 @@ int qib_make_rc_req(struct rvt_qp *qp) cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(RDMA_WRITE_FIRST); len = pmtu; break; @@ -418,13 +410,6 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* - * Adjust s_next_psn to count the - * expected number of responses. - */ - if (len > pmtu) - qp->s_next_psn += (len - 1) / pmtu; - wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = @@ -456,7 +441,6 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - wqe->lpsn = wqe->psn; } if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); @@ -499,11 +483,8 @@ int qib_make_rc_req(struct rvt_qp *qp) } if (wqe->wr.opcode == IB_WR_RDMA_READ) qp->s_psn = wqe->lpsn + 1; - else { + else qp->s_psn++; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - } break; case OP(RDMA_READ_RESPONSE_FIRST): @@ -523,8 +504,6 @@ int qib_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = qp->s_psn++ & QIB_PSN_MASK; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -564,8 +543,6 @@ int qib_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = qp->s_psn++ & QIB_PSN_MASK; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -630,13 +607,9 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_cur_size = len; qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -1454,7 +1427,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, goto ack_done; /* Ignore invalid responses. */ - if (qib_cmp24(psn, qp->s_next_psn) >= 0) + smp_read_barrier_depends(); /* see post_one_send */ + if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0) goto ack_done; /* Ignore duplicate responses. */ diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 2623684745f0..a5f07a64b228 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -391,7 +391,8 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) sqp->s_flags |= RVT_S_BUSY; again: - if (sqp->s_last == sqp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); @@ -765,22 +766,24 @@ void qib_do_send(struct rvt_qp *qp) qp->s_flags |= RVT_S_BUSY; - spin_unlock_irqrestore(&qp->s_lock, flags); - do { /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { + spin_unlock_irqrestore(&qp->s_lock, flags); /* * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) - break; + return; /* Record that s_hdr is empty. */ qp->s_hdrwords = 0; + spin_lock_irqsave(&qp->s_lock, flags); } } while (make_req(qp)); + + spin_unlock_irqrestore(&qp->s_lock, flags); } /* diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 1b2fc69855b2..7bdbc79ceaa3 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -41,6 +41,8 @@ * qib_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_uc_req(struct rvt_qp *qp) @@ -48,20 +50,18 @@ int qib_make_uc_req(struct rvt_qp *qp) struct qib_qp_priv *priv = qp->priv; struct qib_other_headers *ohdr; struct rvt_swqe *wqe; - unsigned long flags; u32 hwords; u32 bth0; u32 len; u32 pmtu = qp->pmtu; int ret = 0; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -90,13 +90,13 @@ int qib_make_uc_req(struct rvt_qp *qp) RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* Check if send work queue is empty. */ - if (qp->s_cur == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) goto bail; /* * Start a new request. */ - wqe->psn = qp->s_next_psn; - qp->s_psn = qp->s_next_psn; + qp->s_psn = wqe->psn; qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.num_sge = wqe->wr.num_sge; @@ -215,15 +215,11 @@ int qib_make_uc_req(struct rvt_qp *qp) qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), - qp->s_next_psn++ & QIB_PSN_MASK); + qp->s_psn++ & QIB_PSN_MASK); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index fe4917272b89..d9502137de62 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -234,6 +234,8 @@ drop: * qib_make_ud_req - construct a UD request packet * @qp: the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_ud_req(struct rvt_qp *qp) @@ -244,7 +246,6 @@ int qib_make_ud_req(struct rvt_qp *qp) struct qib_pportdata *ppd; struct qib_ibport *ibp; struct rvt_swqe *wqe; - unsigned long flags; u32 nwords; u32 extra_bytes; u32 bth0; @@ -253,13 +254,12 @@ int qib_make_ud_req(struct rvt_qp *qp) int ret = 0; int next_cur; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -271,7 +271,9 @@ int qib_make_ud_req(struct rvt_qp *qp) goto done; } - if (qp->s_cur == qp->s_head) + /* see post_one_send() */ + smp_read_barrier_depends(); + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) goto bail; wqe = rvt_get_swqe_ptr(qp, qp->s_cur); @@ -292,6 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp) this_cpu_inc(ibp->pmastats->n_unicast_xmit); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(lid == ppd->lid)) { + unsigned long flags; /* * If DMAs are in progress, we can't generate * a completion for the loopback packet since @@ -304,6 +307,7 @@ int qib_make_ud_req(struct rvt_qp *qp) goto bail; } qp->s_cur = next_cur; + local_irq_save(flags); spin_unlock_irqrestore(&qp->s_lock, flags); qib_ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); @@ -378,7 +382,7 @@ int qib_make_ud_req(struct rvt_qp *qp) ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ? cpu_to_be32(QIB_MULTICAST_QPN) : cpu_to_be32(wqe->ud_wr.remote_qpn); - ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); + ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). @@ -388,13 +392,9 @@ int qib_make_ud_req(struct rvt_qp *qp) ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index fa94f78073cf..5cf019fb50d9 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1662,6 +1662,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name; dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; + dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe; dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc; @@ -1677,6 +1678,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu; dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr; + dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send; dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port; dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port; dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg; @@ -1778,17 +1780,34 @@ void qib_unregister_ib_device(struct qib_devdata *dd) dev->pio_hdrs, dev->pio_hdrs_phys); } -/* - * This must be called with s_lock held. +/** + * _qib_schedule_send - schedule progress + * @qp - the qp + * + * This schedules progress w/o regard to the s_flags. + * + * It is only used in post send, which doesn't hold + * the s_lock. */ -void qib_schedule_send(struct rvt_qp *qp) +void _qib_schedule_send(struct rvt_qp *qp) { + struct qib_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); struct qib_qp_priv *priv = qp->priv; - if (qib_send_ok(qp)) { - struct qib_ibport *ibp = - to_iport(qp->ibqp.device, qp->port_num); - struct qib_pportdata *ppd = ppd_from_ibp(ibp); - queue_work(ppd->qib_wq, &priv->s_work); - } + queue_work(ppd->qib_wq, &priv->s_work); +} + +/** + * qib_schedule_send - schedule progress + * @qp - the qp + * + * This schedules qp progress. The s_lock + * should be held. + */ +void qib_schedule_send(struct rvt_qp *qp) +{ + if (qib_send_ok(qp)) + _qib_schedule_send(qp); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index b88e027b6cb0..d137d714935d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -298,9 +298,7 @@ static inline int qib_send_ok(struct rvt_qp *qp) !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); } -/* - * This must be called with s_lock held. - */ +void _qib_schedule_send(struct rvt_qp *qp); void qib_schedule_send(struct rvt_qp *qp); static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) @@ -392,6 +390,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); + struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); void qib_rc_rnr_retry(unsigned long arg); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 7dc837c6554b..522404ac7c38 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -401,6 +401,7 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, rdi->driver_f.flush_qp_waiters(qp); qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); /* Stop the send queue and the retry timer */ @@ -415,6 +416,7 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, /* grab the lock b/c it was locked at call time */ spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); rvt_clear_mr_refs(qp, 1); @@ -610,6 +612,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * except for qp->ibqp.qp_num. */ spin_lock_init(&qp->r_lock); + spin_lock_init(&qp->s_hlock); spin_lock_init(&qp->s_lock); spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); @@ -620,6 +623,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->state = IB_QPS_RESET; qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_avail = init_attr->cap.max_send_wr; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = RVT_S_SIGNAL_REQ_WR; @@ -779,6 +783,7 @@ void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) wqe->ud_wr.ah)->refcount); if (++qp->s_last >= qp->s_size) qp->s_last = 0; + smp_wmb(); /* see qp_set_savail */ } if (qp->s_rdma_mr) { rvt_put_mr(qp->s_rdma_mr); @@ -833,7 +838,7 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) rdi->driver_f.notify_error_qp(qp); /* Schedule the sending tasklet to drain the send work queue. */ - if (qp->s_last != qp->s_head) + if (ACCESS_ONCE(qp->s_last) != qp->s_head) rdi->driver_f.schedule_send(qp); rvt_clear_mr_refs(qp, 0); @@ -979,6 +984,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, link = rdma_port_get_link_layer(ibqp->device, qp->port_num); spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); cur_state = attr_mask & IB_QP_CUR_STATE ? @@ -1151,6 +1157,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_PATH_MTU) { qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu); qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu); + qp->log_pmtu = ilog2(qp->pmtu); } if (attr_mask & IB_QP_RETRY_CNT) { @@ -1186,6 +1193,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, rdi->driver_f.modify_qp(qp, attr, attr_mask, udata); spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) @@ -1207,6 +1215,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, inval: spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); return -EINVAL; } @@ -1226,9 +1235,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp) struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); rvt_reset_qp(rdi, qp, ibqp->qp_type); spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); /* qpn is now available for use again */ @@ -1357,6 +1368,28 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, return 0; } +/** + * qp_get_savail - return number of avail send entries + * + * @qp - the qp + * + * This assumes the s_hlock is held but the s_last + * qp variable is uncontrolled. + */ +static inline u32 qp_get_savail(struct rvt_qp *qp) +{ + u32 slast; + u32 ret; + + smp_read_barrier_depends(); /* see rc.c */ + slast = ACCESS_ONCE(qp->s_last); + if (qp->s_head >= slast) + ret = qp->s_size - (qp->s_head - slast); + else + ret = slast - qp->s_head; + return ret - 1; +} + /** * rvt_post_one_wr - post one RC, UC, or UD send work request * @qp: the QP to post on @@ -1372,6 +1405,8 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) struct rvt_lkey_table *rkt; struct rvt_pd *pd; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + u8 log_pmtu; + int ret; /* IB spec says that num_sge == 0 is OK. */ if (unlikely(wr->num_sge > qp->s_max_sge)) @@ -1403,16 +1438,16 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { return -EINVAL; } - + /* check for avail */ + if (unlikely(!qp->s_avail)) { + qp->s_avail = qp_get_savail(qp); + WARN_ON(qp->s_avail > (qp->s_size - 1)); + if (!qp->s_avail) + return -ENOMEM; + } next = qp->s_head + 1; if (next >= qp->s_size) next = 0; - if (next == qp->s_last) - return -ENOMEM; - - if (rdi->driver_f.check_send_wr && - rdi->driver_f.check_send_wr(qp, wr)) - return -EINVAL; rkt = &rdi->lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.pd); @@ -1444,21 +1479,39 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) continue; ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], &wr->sg_list[i], acc); - if (!ok) + if (!ok) { + ret = -EINVAL; goto bail_inval_free; + } wqe->length += length; j++; } wqe->wr.num_sge = j; } - if (qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_RC) { - if (wqe->length > 0x80000000U) + + /* general part of wqe valid - allow for driver checks */ + if (rdi->driver_f.check_send_wqe) { + ret = rdi->driver_f.check_send_wqe(qp, wqe); + if (ret) goto bail_inval_free; - } else { + } + + log_pmtu = qp->log_pmtu; + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) { + struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah); + + log_pmtu = ah->log_pmtu; atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); } + wqe->ssn = qp->s_ssn++; + wqe->psn = qp->s_next_psn; + wqe->lpsn = wqe->psn + + (wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0); + qp->s_next_psn = wqe->lpsn + 1; + smp_wmb(); /* see request builders */ + qp->s_avail--; qp->s_head = next; return 0; @@ -1470,7 +1523,7 @@ bail_inval_free: rvt_put_mr(sge->mr); } - return -EINVAL; + return ret; } /** @@ -1491,14 +1544,14 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned nreq = 0; int err = 0; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock_irqsave(&qp->s_hlock, flags); /* * Ensure QP state is such that we can send. If not bail out early, * there is no need to do this every time we post a send. */ if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) { - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irqrestore(&qp->s_hlock, flags); return -EINVAL; } @@ -1518,11 +1571,13 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, nreq++; } bail: - if (nreq && !call_send) - rdi->driver_f.schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, flags); - if (nreq && call_send) - rdi->driver_f.do_send(qp); + spin_unlock_irqrestore(&qp->s_hlock, flags); + if (nreq) { + if (call_send) + rdi->driver_f.schedule_send_no_lock(qp); + else + rdi->driver_f.do_send(qp); + } return err; } diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index ec9ee726267b..00866c07fddc 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -226,16 +226,45 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, } } -int hfi1_check_send_wr(struct rvt_qp *qp, struct ib_send_wr *wr) +/** + * hfi1_check_send_wqe - validate wqe + * @qp - The qp + * @wqe - The built wqe + * + * validate wqe. This is called + * prior to inserting the wqe into + * the ring but after the wqe has been + * setup. + * + * Returns 0 on success, -EINVAL on failure + * + */ +int hfi1_check_send_wqe(struct rvt_qp *qp, + struct rvt_swqe *wqe) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct rvt_ah *ah = ibah_to_rvtah(ud_wr(wr)->ah); + struct rvt_ah *ah; - if (qp->ibqp.qp_type != IB_QPT_RC && - qp->ibqp.qp_type != IB_QPT_UC && - qp->ibqp.qp_type != IB_QPT_SMI && - ibp->sl_to_sc[ah->attr.sl] == 0xf) { - return -EINVAL; + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + if (wqe->length > 0x80000000U) + return -EINVAL; + break; + case IB_QPT_SMI: + ah = ibah_to_rvtah(wqe->ud_wr.ah); + if (wqe->length > (1 << ah->log_pmtu)) + return -EINVAL; + break; + case IB_QPT_GSI: + case IB_QPT_UD: + ah = ibah_to_rvtah(wqe->ud_wr.ah); + if (wqe->length > (1 << ah->log_pmtu)) + return -EINVAL; + if (ibp->sl_to_sc[ah->attr.sl] == 0xf) + return -EINVAL; + default: + break; } return 0; } @@ -301,6 +330,42 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp) return cpu_to_be32(aeth); } +/** + * _hfi1_schedule_send - schedule progress + * @qp: the QP + * + * This schedules qp progress w/o regard to the s_flags. + * + * It is only used in the post send, which doesn't hold + * the s_lock. + */ +void _hfi1_schedule_send(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct hfi1_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + + iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, + priv->s_sde ? + priv->s_sde->cpu : + cpumask_first(cpumask_of_node(dd->node))); +} + +/** + * hfi1_schedule_send - schedule progress + * @qp: the QP + * + * This schedules qp progress and caller should hold + * the s_lock. + */ +void hfi1_schedule_send(struct rvt_qp *qp) +{ + if (hfi1_send_ok(qp)) + _hfi1_schedule_send(qp); +} + /** * hfi1_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 973c14b5268a..98827b5dd2a1 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -137,41 +137,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter); */ void qp_comm_est(struct rvt_qp *qp); -/** - * _hfi1_schedule_send - schedule progress - * @qp: the QP - * - * This schedules qp progress w/o regard to the s_flags. - * - * It is only used in the post send, which doesn't hold - * the s_lock. - */ -static inline void _hfi1_schedule_send(struct rvt_qp *qp) -{ - struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ibport *ibp = - to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); - - iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, - priv->s_sde ? - priv->s_sde->cpu : - cpumask_first(cpumask_of_node(dd->node))); -} - -/** - * hfi1_schedule_send - schedule progress - * @qp: the QP - * - * This schedules qp progress and caller should hold - * the s_lock. - */ -static inline void hfi1_schedule_send(struct rvt_qp *qp) -{ - if (hfi1_send_ok(qp)) - _hfi1_schedule_send(qp); -} +void _hfi1_schedule_send(struct rvt_qp *qp); +void hfi1_schedule_send(struct rvt_qp *qp); void hfi1_migrate_qp(struct rvt_qp *qp); diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index a4a44d33d857..a62c9424fa86 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -367,6 +367,8 @@ bail: * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP * + * Assumes s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int hfi1_make_rc_req(struct rvt_qp *qp) @@ -383,7 +385,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) u32 bth2; u32 pmtu = qp->pmtu; char newreq; - unsigned long flags; int ret = 0; int middle = 0; int delta; @@ -392,12 +393,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ohdr = &priv->s_hdr->ibh.u.l.oth; - /* - * The lock is needed to synchronize between the sending tasklet, - * the receive interrupt handler, and timeout re-sends. - */ - spin_lock_irqsave(&qp->s_lock, flags); - /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && make_rc_ack(dev, qp, ohdr, pmtu)) @@ -407,7 +402,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { @@ -463,8 +459,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } - wqe->psn = qp->s_next_psn; newreq = 1; + qp->s_psn = wqe->psn; } /* * Note that we have to be careful not to modify the @@ -483,9 +479,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(SEND_FIRST); len = pmtu; break; @@ -522,9 +516,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(RDMA_WRITE_FIRST); len = pmtu; break; @@ -559,13 +551,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* - * Adjust s_next_psn to count the - * expected number of responses. - */ - if (len > pmtu) - qp->s_next_psn += (len - 1) / pmtu; - wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr); @@ -596,7 +581,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - wqe->lpsn = wqe->psn; } if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); @@ -639,11 +623,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) } if (wqe->wr.opcode == IB_WR_RDMA_READ) qp->s_psn = wqe->lpsn + 1; - else { + else qp->s_psn++; - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - } break; case OP(RDMA_READ_RESPONSE_FIRST): @@ -663,8 +644,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = mask_psn(qp->s_psn++); - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -705,8 +684,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = mask_psn(qp->s_psn++); - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -777,13 +754,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp) bth2, middle); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -1563,7 +1536,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, trace_hfi1_rc_ack(qp, psn); /* Ignore invalid responses. */ - if (cmp_psn(psn, qp->s_next_psn) >= 0) + smp_read_barrier_depends(); /* see post_one_send */ + if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0) goto ack_done; /* Ignore duplicate responses. */ diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index a7add3c5d0f2..6114550bb73f 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -392,7 +392,8 @@ static void ruc_loopback(struct rvt_qp *sqp) sqp->s_flags |= RVT_S_BUSY; again: - if (sqp->s_last == sqp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); @@ -871,40 +872,43 @@ void hfi1_do_send(struct rvt_qp *qp) qp->s_flags |= RVT_S_BUSY; - spin_unlock_irqrestore(&qp->s_lock, flags); - timeout = jiffies + (timeout_int) / 8; cpu = priv->s_sde ? priv->s_sde->cpu : cpumask_first(cpumask_of_node(ps.ppd->dd->node)); do { /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { + spin_unlock_irqrestore(&qp->s_lock, flags); /* * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ if (hfi1_verbs_send(qp, &ps)) - break; + return; /* Record that s_hdr is empty. */ qp->s_hdrwords = 0; - } - - /* allow other tasks to run */ - if (unlikely(time_after(jiffies, timeout))) { - if (workqueue_congested(cpu, ps.ppd->hfi1_wq)) { - spin_lock_irqsave(&qp->s_lock, flags); - qp->s_flags &= ~RVT_S_BUSY; - hfi1_schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, - flags); + /* allow other tasks to run */ + if (unlikely(time_after(jiffies, timeout))) { + if (workqueue_congested(cpu, + ps.ppd->hfi1_wq)) { + spin_lock_irqsave(&qp->s_lock, flags); + qp->s_flags &= ~RVT_S_BUSY; + hfi1_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, + flags); + this_cpu_inc( + *ps.ppd->dd->send_schedule); + return; + } + cond_resched(); this_cpu_inc(*ps.ppd->dd->send_schedule); - return; + timeout = jiffies + (timeout_int) / 8; } - cond_resched(); - this_cpu_inc(*ps.ppd->dd->send_schedule); - timeout = jiffies + (timeout_int) / 8; + spin_lock_irqsave(&qp->s_lock, flags); } } while (make_req(qp)); + + spin_unlock_irqrestore(&qp->s_lock, flags); } /* diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 0aa604b7557b..f884b5c8051b 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -59,6 +59,8 @@ * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP * + * Assume s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int hfi1_make_uc_req(struct rvt_qp *qp) @@ -66,7 +68,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; struct hfi1_other_headers *ohdr; struct rvt_swqe *wqe; - unsigned long flags; u32 hwords = 5; u32 bth0 = 0; u32 len; @@ -74,13 +75,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp) int ret = 0; int middle = 0; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { @@ -106,15 +106,15 @@ int hfi1_make_uc_req(struct rvt_qp *qp) RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* Check if send work queue is empty. */ - if (qp->s_cur == qp->s_head) { + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) { clear_ahg(qp); goto bail; } /* * Start a new request. */ - wqe->psn = qp->s_next_psn; - qp->s_psn = qp->s_next_psn; + qp->s_psn = wqe->psn; qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.num_sge = wqe->wr.num_sge; @@ -235,15 +235,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp) qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), - mask_psn(qp->s_next_psn++), middle); + mask_psn(qp->s_psn++), middle); done: - ret = 1; - goto unlock; + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index fdf6e3bee8f1..ba78e2e3e0bb 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -261,6 +261,8 @@ drop: * hfi1_make_ud_req - construct a UD request packet * @qp: the QP * + * Assume s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int hfi1_make_ud_req(struct rvt_qp *qp) @@ -271,7 +273,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp) struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; struct rvt_swqe *wqe; - unsigned long flags; u32 nwords; u32 extra_bytes; u32 bth0; @@ -281,13 +282,12 @@ int hfi1_make_ud_req(struct rvt_qp *qp) int next_cur; u8 sc5; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { @@ -299,7 +299,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp) goto done; } - if (qp->s_cur == qp->s_head) + /* see post_one_send() */ + smp_read_barrier_depends(); + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) goto bail; wqe = rvt_get_swqe_ptr(qp, qp->s_cur); @@ -317,6 +319,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) if (unlikely(!loopback && (lid == ppd->lid || (lid == be16_to_cpu(IB_LID_PERMISSIVE) && qp->ibqp.qp_type == IB_QPT_GSI)))) { + unsigned long flags; /* * If DMAs are in progress, we can't generate * a completion for the loopback packet since @@ -329,6 +332,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) goto bail; } qp->s_cur = next_cur; + local_irq_save(flags); spin_unlock_irqrestore(&qp->s_lock, flags); ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); @@ -408,7 +412,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); - ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++)); + ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn)); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). @@ -423,13 +427,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp) priv->s_hdr->sde = NULL; done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 35f6d92a6249..1df464815247 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1533,6 +1533,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; + dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; @@ -1543,7 +1544,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; - dd->verbs_dev.rdi.driver_f.check_send_wr = hfi1_check_send_wr; + dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; /* completeion queue */ snprintf(dd->verbs_dev.rdi.dparms.cq_name, diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index adb63bb6fae2..d00c55d06c8c 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -427,7 +427,7 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -int hfi1_check_send_wr(struct rvt_qp *qp, struct ib_send_wr *wr); +int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 5ccf683b28f1..aabd2e5bc5d7 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -220,6 +220,7 @@ struct rvt_ah { }; struct rvt_dev_info; +struct rvt_swqe; struct rvt_driver_provided { /* * The work to create port files in /sys/class Infiniband is different @@ -240,6 +241,7 @@ struct rvt_driver_provided { void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); void (*schedule_send)(struct rvt_qp *qp); + void (*schedule_send_no_lock)(struct rvt_qp *qp); void (*do_send)(struct rvt_qp *qp); int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_attr *attr); @@ -273,7 +275,7 @@ struct rvt_driver_provided { void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); - int (*check_send_wr)(struct rvt_qp *qp, struct ib_send_wr *wr); + int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index b3ea74579316..1066b5d1b4d2 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -250,11 +250,12 @@ struct rvt_qp { enum ib_mtu path_mtu; int srate_mbps; /* s_srate (below) converted to Mbit/s */ u32 remote_qpn; - u32 pmtu; /* decoded from path_mtu */ u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ u32 s_ahgpsn; /* set to the psn in the copy of the header */ + u16 pmtu; /* decoded from path_mtu */ + u8 log_pmtu; /* shift for pmtu */ u8 state; /* QP state */ u8 allowed_ops; /* high order bits of allowed opcodes */ u8 qp_access_flags; @@ -299,6 +300,13 @@ struct rvt_qp { struct rvt_sge_state r_sge; /* current receive data */ struct rvt_rq r_rq; /* receive work queue */ + /* post send line */ + spinlock_t s_hlock ____cacheline_aligned_in_smp; + u32 s_head; /* new entries added here */ + u32 s_next_psn; /* PSN for next request */ + u32 s_avail; /* number of entries avail */ + u32 s_ssn; /* SSN of tail entry */ + spinlock_t s_lock ____cacheline_aligned_in_smp; struct rvt_sge_state *s_cur_sge; u32 s_flags; @@ -308,19 +316,16 @@ struct rvt_qp { u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ u32 s_last_psn; /* last response PSN processed */ u32 s_sending_psn; /* lowest PSN that is being sent */ u32 s_sending_hpsn; /* highest PSN that is being sent */ u32 s_psn; /* current packet sequence number */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_head; /* new entries added here */ u32 s_tail; /* next entry to process */ u32 s_cur; /* current work queue entry */ u32 s_acked; /* last un-ACK'ed entry */ u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ u32 s_lsn; /* limit sequence number (credit) */ u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u16 s_rdma_ack_cnt; -- cgit v1.2.3 From 90793f7179478df19ac4b2244cfd9764b28e4b38 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:10:29 -0800 Subject: IB/rdmavt: Clean up comments and add more documentation Add, remove, and otherwise clean up existing comments that are leftover from the initial code postings of rdmavt. Many of the comments were added to provide an idea on the direction we were thinking of going. Now that the design is solidified make a pass over and clean everything up. Also add details where lacking. Ensure all non static functions have nano comments. Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/ah.c | 27 ++++++++++++++++ drivers/infiniband/sw/rdmavt/cq.c | 30 +++++++++++------ drivers/infiniband/sw/rdmavt/mad.c | 30 +++++++++++------ drivers/infiniband/sw/rdmavt/mcast.c | 34 ++++++++++++++++++-- drivers/infiniband/sw/rdmavt/mmap.c | 28 ++++++++++++---- drivers/infiniband/sw/rdmavt/mr.c | 38 ++++++++++++++-------- drivers/infiniband/sw/rdmavt/pd.c | 16 ++++++++++ drivers/infiniband/sw/rdmavt/qp.c | 57 +++++++++++++++++++++++++++++---- drivers/infiniband/sw/rdmavt/srq.c | 21 +++++++++++- drivers/infiniband/sw/rdmavt/vt.c | 62 ++++++++++++++++++++++++++++-------- include/rdma/rdma_vt.h | 15 --------- 11 files changed, 280 insertions(+), 78 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 9372c4321858..16c446142c2a 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -53,6 +53,11 @@ * rvt_check_ah - validate the attributes of AH * @ibdev: the ib device * @ah_attr: the attributes of the AH + * + * If driver supports a more detailed check_ah function call back to it + * otherwise just check the basics. + * + * Return: 0 on success */ int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) @@ -95,6 +100,8 @@ EXPORT_SYMBOL(rvt_check_ah); * @ah_attr: the attributes of the AH * * This may be called from interrupt context. + * + * Return: newly allocated ah */ struct ib_ah *rvt_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) @@ -129,6 +136,12 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, return &ah->ibah; } +/** + * rvt_destory_ah - Destory an address handle + * @ibah: address handle + * + * Return: 0 on success + */ int rvt_destroy_ah(struct ib_ah *ibah) { struct rvt_dev_info *dev = ib_to_rvt(ibah->device); @@ -147,6 +160,13 @@ int rvt_destroy_ah(struct ib_ah *ibah) return 0; } +/** + * rvt_modify_ah - modify an ah with given attrs + * @ibah: address handle to modify + * @ah_attr: attrs to apply + * + * Return: 0 on success + */ int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct rvt_ah *ah = ibah_to_rvtah(ibah); @@ -159,6 +179,13 @@ int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) return 0; } +/** + * rvt_query_ah - return attrs for ah + * @ibah: address handle to query + * @ah_attr: return info in this + * + * Return: always 0 + */ int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct rvt_ah *ah = ibah_to_rvtah(ibah); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 0e6dbe5904ff..c69c0709696a 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -173,10 +173,10 @@ static void send_complete(struct kthread_work *work) * @context: unused by the QLogic_IB driver * @udata: user data for libibverbs.so * - * Returns a pointer to the completion queue or negative errno values - * for failure. - * * Called by ib_create_cq() in the generic verbs code. + * + * Return: pointer to the completion queue or negative errno values + * for failure. */ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, @@ -286,9 +286,9 @@ done: * rvt_destroy_cq - destroy a completion queue * @ibcq: the completion queue to destroy. * - * Returns 0 for success. - * * Called by ib_destroy_cq() in the generic verbs code. + * + * Return: always 0 */ int rvt_destroy_cq(struct ib_cq *ibcq) { @@ -313,10 +313,10 @@ int rvt_destroy_cq(struct ib_cq *ibcq) * @ibcq: the completion queue * @notify_flags: the type of notification to request * - * Returns 0 for success. - * * This may be called from interrupt context. Also called by * ib_req_notify_cq() in the generic verbs code. + * + * Return: 0 for success. */ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) { @@ -345,7 +345,7 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) * rvt_resize_cq - change the size of the CQ * @ibcq: the completion queue * - * Returns 0 for success. + * Return: 0 for success. */ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { @@ -456,10 +456,10 @@ bail_free: * @num_entries: the maximum number of entries to return * @entry: pointer to array where work completions are placed * - * Returns the number of completion entries polled. - * * This may be called from interrupt context. Also called by ib_poll_cq() * in the generic verbs code. + * + * Return: the number of completion entries polled. */ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { @@ -496,6 +496,12 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) return npolled; } +/** + * rvt_driver_cq_init - Init cq resources on behalf of driver + * @rdi: rvt dev structure + * + * Return: 0 on success + */ int rvt_driver_cq_init(struct rvt_dev_info *rdi) { int ret = 0; @@ -530,6 +536,10 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) return ret; } +/** + * rvt_cq_exit - tear down cq reources + * @rdi: rvt dev structure + */ void rvt_cq_exit(struct rvt_dev_info *rdi) { struct kthread_worker *worker; diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index 2feae47492df..f6e99778d7ca 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -59,14 +59,13 @@ * @in_mad: the incoming MAD * @out_mad: any outgoing MAD reply * - * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not - * interested in processing. - * * Note that the verbs framework has already done the MAD sanity checks, * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE * MADs. * * This is called by the ib_mad module. + * + * Return: IB_MAD_RESULT_SUCCESS or error */ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, @@ -75,13 +74,10 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, u16 *out_mad_pkey_index) { /* - * Drivers will need to provide a number of things. For exmaple counters - * will need to be maintained by the driver but shoud live in the rvt - * structure. More study will be needed to finalize the interface - * between drivers and rvt for mad packets. - * - *VT-DRIVER-API: ???? - * + * MAD processing is quite different between hfi1 and qib. Therfore this + * is expected to be provided by the driver. Other drivers in the future + * may chose to implement this but it should not be made into a + * requirement. */ if (ibport_num_to_idx(ibdev, port_num) < 0) return -EINVAL; @@ -95,6 +91,14 @@ static void rvt_send_mad_handler(struct ib_mad_agent *agent, ib_free_send_mad(mad_send_wc->send_buf); } +/** + * rvt_create_mad_agents - create mad agents + * @rdi: rvt dev struct + * + * If driver needs to be notified of mad agent creation then call back + * + * Return 0 on success + */ int rvt_create_mad_agents(struct rvt_dev_info *rdi) { struct ib_mad_agent *agent; @@ -136,6 +140,12 @@ err: return ret; } +/** + * rvt_free_mad_agents - free up mad agents + * @rdi: rvt dev struct + * + * If driver needs notification of mad agent removal make the call back + */ void rvt_free_mad_agents(struct rvt_dev_info *rdi) { struct ib_mad_agent *agent; diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index e06a8755cbef..983d319ac976 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -53,6 +53,12 @@ #include "mcast.h" +/** + * rvt_driver_mcast - init resources for multicast + * @rdi: rvt dev struct + * + * This is per device that registers with rdmavt + */ void rvt_driver_mcast_init(struct rvt_dev_info *rdi) { /* @@ -130,9 +136,9 @@ static void rvt_mcast_free(struct rvt_mcast *mcast) * @ibp: the IB port structure * @mgid: the multicast GID to search for * - * Returns NULL if not found. - * * The caller is responsible for decrementing the reference count if found. + * + * Return: NULL if not found. */ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid) { @@ -170,7 +176,7 @@ EXPORT_SYMBOL(rvt_mcast_find); * @mcast: the mcast GID table * @mqp: the QP to attach * - * Return zero if both were added. Return EEXIST if the GID was already in + * Return: zero if both were added. Return EEXIST if the GID was already in * the table but the QP was added. Return ESRCH if the QP was already * attached and neither structure was added. */ @@ -247,6 +253,14 @@ bail: return ret; } +/** + * rvt_attach_mcast - attach a qp to a multicast group + * @ibqp: Infiniband qp + * @igd: multicast guid + * @lid: multicast lid + * + * Return: 0 on success + */ int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); @@ -298,6 +312,14 @@ bail_mcast: return ret; } +/** + * rvt_detach_mcast - remove a qp from a multicast group + * @ibqp: Infiniband qp + * @igd: multicast guid + * @lid: multicast lid + * + * Return: 0 on success + */ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); @@ -377,6 +399,12 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return 0; } +/** + *rvt_mast_tree_empty - determine if any qps are attached to any mcast group + *@rdi: rvt dev struct + * + * Return: in use count + */ int rvt_mcast_tree_empty(struct rvt_dev_info *rdi) { int i; diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index 49180c4eb76e..273974fb7d1f 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -51,6 +51,10 @@ #include #include "mmap.h" +/** + * rvt_mmap_init - init link list and lock for mem map + * @rdi: rvt dev struct + */ void rvt_mmap_init(struct rvt_dev_info *rdi) { INIT_LIST_HEAD(&rdi->pending_mmaps); @@ -78,10 +82,6 @@ void rvt_release_mmap_info(struct kref *ref) } EXPORT_SYMBOL(rvt_release_mmap_info); -/* - * open and close keep track of how many times the CQ is mapped, - * to avoid releasing it. - */ static void rvt_vma_open(struct vm_area_struct *vma) { struct rvt_mmap_info *ip = vma->vm_private_data; @@ -105,7 +105,8 @@ static const struct vm_operations_struct rvt_vm_ops = { * rvt_mmap - create a new mmap region * @context: the IB user context of the process making the mmap() call * @vma: the VMA to be initialized - * Return zero if the mmap is OK. Otherwise, return an errno. + * + * Return: zero if the mmap is OK. Otherwise, return an errno. */ int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { @@ -147,8 +148,14 @@ done: } EXPORT_SYMBOL(rvt_mmap); -/* - * Allocate information for hfi1_mmap +/** + * rvt_create_mmap_info - allocate information for hfi1_mmap + * @rdi: rvt dev struct + * @size: size in bytes to map + * @context: user context + * @obj: opaque pointer to a cq, wq etc + * + * Return: rvt_mmap struct on success */ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, u32 size, @@ -180,6 +187,13 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, } EXPORT_SYMBOL(rvt_create_mmap_info); +/** + * rvt_update_mmap_info - update a mem map + * @rdi: rvt dev struct + * @ip: mmap info pointer + * @size: size to grow by + * @obj: opaque pointer to cq, wq, etc. + */ void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj) { diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8bff6bbfece2..8549652ffd06 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -52,8 +52,13 @@ #include "vt.h" #include "mr.h" -/* +/** + * rvt_driver_mr_init - Init MR resources per driver + * @rdi: rvt dev struct + * * Do any intilization needed when a driver registers with rdmavt. + * + * Return: 0 on success or errno on failure */ int rvt_driver_mr_init(struct rvt_dev_info *rdi) { @@ -98,7 +103,10 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) return 0; } -/* +/** + *rvt_mr_exit: clean up MR + *@rdi: rvt dev structure + * * called when drivers have unregistered or perhaps failed to register with us */ void rvt_mr_exit(struct rvt_dev_info *rdi) @@ -297,7 +305,7 @@ static void __rvt_free_mr(struct rvt_mr *mr) * @pd: protection domain for this memory region * @acc: access flags * - * Returns the memory region on success, otherwise returns an errno. + * Return: the memory region on success, otherwise returns an errno. * Note that all DMA addresses should be created via the * struct ib_dma_mapping_ops functions (see dma.c). */ @@ -348,7 +356,7 @@ bail: * @mr_access_flags: access flags for this memory region * @udata: unused by the driver * - * Returns the memory region on success, otherwise returns an errno. + * Return: the memory region on success, otherwise returns an errno. */ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, @@ -418,10 +426,11 @@ bail_umem: * rvt_dereg_mr - unregister and free a memory region * @ibmr: the memory region to free * - * Returns 0 on success. * * Note that this is called to free MRs created by rvt_get_dma_mr() * or rvt_reg_user_mr(). + * + * Returns 0 on success. */ int rvt_dereg_mr(struct ib_mr *ibmr) { @@ -456,7 +465,7 @@ out: * @mr_type: mem region type * @max_num_sg: Max number of segments allowed * - * Return the memory region on success, otherwise return an errno. + * Return: the memory region on success, otherwise return an errno. */ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, @@ -480,7 +489,7 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, * @mr_access_flags: access flags for this memory region * @fmr_attr: fast memory region attributes * - * Returns the memory region on success, otherwise returns an errno. + * Return: the memory region on success, otherwise returns an errno. */ struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr) @@ -537,6 +546,8 @@ bail: * @iova: the virtual address of the start of the fast memory region * * This may be called from interrupt context. + * + * Return: 0 on success */ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, @@ -580,7 +591,7 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, * rvt_unmap_fmr - unmap fast memory regions * @fmr_list: the list of fast memory regions to unmap * - * Returns 0 on success. + * Return: 0 on success. */ int rvt_unmap_fmr(struct list_head *fmr_list) { @@ -605,7 +616,7 @@ int rvt_unmap_fmr(struct list_head *fmr_list) * rvt_dealloc_fmr - deallocate a fast memory region * @ibfmr: the fast memory region to deallocate * - * Returns 0 on success. + * Return: 0 on success. */ int rvt_dealloc_fmr(struct ib_fmr *ibfmr) { @@ -635,12 +646,13 @@ out: * @sge: SGE to check * @acc: access flags * - * Return 1 if valid and successful, otherwise returns 0. + * Check the IB SGE for validity and initialize our internal version + * of it. + * + * Return: 1 if valid and successful, otherwise returns 0. * * increments the reference count upon success * - * Check the IB SGE for validity and initialize our internal version - * of it. */ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc) @@ -733,7 +745,7 @@ EXPORT_SYMBOL(rvt_lkey_ok); * @rkey: rkey to check * @acc: access flags * - * Return 1 if successful, otherwise 0. + * Return: 1 if successful, otherwise 0. * * increments the reference count upon success */ diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index 62fee44be3a3..d1292f324c67 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -48,6 +48,16 @@ #include #include "pd.h" +/** + * rvt_alloc_pd - allocate a protection domain + * @ibdev: ib device + * @context: optional user context + * @udata: optional user data + * + * Allocate and keep track of a PD. + * + * Return: 0 on success + */ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) @@ -88,6 +98,12 @@ bail: return ret; } +/** + * rvt_dealloc_pd - Free PD + * @ibpd: Free up PD + * + * Return: always 0 + */ int rvt_dealloc_pd(struct ib_pd *ibpd) { struct rvt_pd *pd = ibpd_to_rvtpd(ibpd); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d629911ab0ab..5809562fefda 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -162,6 +162,12 @@ static void free_qpn_table(struct rvt_qpn_table *qpt) free_page((unsigned long)qpt->map[i].page); } +/** + * rvt_driver_qp_init - Init driver qp resources + * @rdi: rvt dev strucutre + * + * Return: 0 on success + */ int rvt_driver_qp_init(struct rvt_dev_info *rdi) { int i; @@ -262,6 +268,12 @@ static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) return qp_inuse; } +/** + * rvt_qp_exit - clean up qps on device exit + * @rdi: rvt dev structure + * + * Check for qp leaks and free resources. + */ void rvt_qp_exit(struct rvt_dev_info *rdi) { u32 qps_inuse = rvt_free_all_qps(rdi); @@ -483,7 +495,7 @@ EXPORT_SYMBOL(rvt_reset_qp); * unique idea of what queue pair numbers mean. For instance there is a reserved * range for PSM. * - * Returns the queue pair on success, otherwise returns an errno. + * Return: the queue pair on success, otherwise returns an errno. * * Called by the ib_create_qp() core verbs function. */ @@ -757,6 +769,11 @@ bail_swq: return ret; } +/** + * rvt_clear_mr_refs - Drop help mr refs + * @qp: rvt qp data structure + * @clr_sends: If shoudl clear send side or not + */ void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; @@ -812,7 +829,8 @@ EXPORT_SYMBOL(rvt_clear_mr_refs); * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. - * Returns true if last WQE event should be generated. + * + * Return: true if last WQE event should be generated. * The QP r_lock and s_lock should be held and interrupts disabled. * If we are already in error state, just return. */ @@ -912,7 +930,11 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); } -/* +/** + * rvt_remove_qp - remove qp form table + * @rdi: rvt dev struct + * @qp: qp to remove + * * Remove the QP from the table so it can't be found asynchronously by * the receive routine. */ @@ -967,7 +989,7 @@ EXPORT_SYMBOL(rvt_remove_qp); * @attr_mask: the mask of attributes to modify * @udata: user data for libibverbs.so * - * Returns 0 on success, otherwise returns an errno. + * Return: 0 on success, otherwise returns an errno. */ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) @@ -1224,10 +1246,10 @@ inval: * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy * - * Returns 0 on success. - * * Note that this can be called while the QP is actively sending or * receiving! + * + * Return: 0 on success. */ int rvt_destroy_qp(struct ib_qp *ibqp) { @@ -1263,6 +1285,15 @@ int rvt_destroy_qp(struct ib_qp *ibqp) return 0; } +/** + * rvt_query_qp - query an ipbq + * @ibqp: IB qp to query + * @attr: attr struct to fill in + * @attr_mask: attr mask ignored + * @init_attr: struct to fill in + * + * Return: always 0 + */ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { @@ -1321,6 +1352,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, * @bad_wr: the first bad WR is put here * * This may be called from interrupt context. + * + * Return: 0 on success otherwise errno */ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) @@ -1539,6 +1572,8 @@ bail_inval_free: * @bad_wr: the first bad WR is put here * * This may be called from interrupt context. + * + * Return: 0 on success else errno */ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) @@ -1594,6 +1629,8 @@ bail: * @bad_wr: A pointer to the first WR to cause a problem is put here * * This may be called from interrupt context. + * + * Return: 0 on success else errno */ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) @@ -1636,6 +1673,10 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, return 0; } +/** rvt_free_qpn - Free a qpn from the bit map + * @qpt: QP table + * @qpn: queue pair number to free + */ void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { struct rvt_qpn_map *map; @@ -1646,6 +1687,10 @@ void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) } EXPORT_SYMBOL(rvt_free_qpn); +/** + * rvt_dec_qp_cnt - decrement qp count + * rdi: rvt dev struct + */ void rvt_dec_qp_cnt(struct rvt_dev_info *rdi) { spin_lock(&rdi->n_qps_lock); diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 4960a89f91b2..98c492797c53 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -51,7 +51,10 @@ #include "srq.h" -/* +/** + * rvt_driver_srq_init - init srq resources on a per driver basis + * @rdi: rvt dev structure + * * Do any initialization needed when a driver registers with rdmavt. */ void rvt_driver_srq_init(struct rvt_dev_info *rdi) @@ -65,6 +68,8 @@ void rvt_driver_srq_init(struct rvt_dev_info *rdi) * @ibpd: the protection domain of the SRQ to create * @srq_init_attr: the attributes of the SRQ * @udata: data from libibverbs when creating a user SRQ + * + * Return: Allocated srq object */ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, @@ -168,6 +173,8 @@ bail_srq: * @attr: the new attributes of the SRQ * @attr_mask: indicates which attributes to modify * @udata: user data for libibverbs.so + * + * Return: 0 on success */ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, @@ -305,6 +312,12 @@ bail_free: return ret; } +/** rvt_query_srq - query srq data + * @ibsrq: srq to query + * @attr: return info in attr + * + * Return: always 0 + */ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); @@ -315,6 +328,12 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) return 0; } +/** + * rvt_destroy_srq - destory an srq + * @ibsrq: srq object to destroy + * + * Return always 0 + */ int rvt_destroy_srq(struct ib_srq *ibsrq) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 2ccf6103e5fa..f5cb09b718be 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -57,16 +57,37 @@ MODULE_DESCRIPTION("RDMA Verbs Transport Library"); static int rvt_init(void) { - /* Do any work needed prior to drivers calling for registration*/ + /* + * rdmavt does not need to do anything special when it starts up. All it + * needs to do is sit and wait until a driver attempts registration. + */ return 0; } module_init(rvt_init); static void rvt_cleanup(void) { + /* + * Nothing to do at exit time either. The module won't be able to be + * removed until all drivers are gone which means all the dev structs + * are gone so there is really nothing to do. + */ } module_exit(rvt_cleanup); +/** + * rvt_alloc_device - allocate rdi + * @size: how big of a structure to allocate + * @nports: number of ports to allocate array slots for + * + * Use IB core device alloc to allocate space for the rdi which is assumed to be + * inside of the ib_device. Any extra space that drivers require should be + * included in size. + * + * We also allocate a port array based on the number of ports. + * + * Return: pointer to allocated rdi + */ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) { struct rvt_dev_info *rdi = ERR_PTR(-ENOMEM); @@ -105,15 +126,10 @@ static int rvt_modify_device(struct ib_device *device, struct ib_device_modify *device_modify) { /* - * Change dev props. Planned support is for node desc change and sys - * guid change only. This matches hfi1 and qib behavior. Other drivers - * that support existing modifications will need to add their support. + * There is currently no need to supply this based on qib and hfi1. + * Future drivers may need to implement this though. */ - /* - * VT-DRIVER-API: node_desc_change() - * VT-DRIVER-API: sys_guid_change() - */ return -EOPNOTSUPP; } @@ -123,7 +139,7 @@ static int rvt_modify_device(struct ib_device *device, * @port_num: port number, 1 based from ib core * @props: structure to hold returned properties * - * Returns 0 on success + * Return: 0 on success */ static int rvt_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *props) @@ -158,7 +174,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num, * @port_modify_mask: How to change the port * @props: Structure to fill in * - * Returns 0 on success + * Return: 0 on success */ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, int port_modify_mask, struct ib_port_modify *props) @@ -191,7 +207,7 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, * @port_num: Port number, 1 based from ib core * @intex: Index into pkey table * - * Returns 0 on failure pkey otherwise + * Return: 0 on failure pkey otherwise */ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey) @@ -223,7 +239,7 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, * @index: = Index in table * @gid: Gid to return * - * Returns 0 on success + * Return: 0 on success */ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, int guid_index, union ib_gid *gid) @@ -316,6 +332,15 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, #define CHECK_DRIVER_OVERRIDE(rdi, x) \ rdi->ibdev.x = rdi->ibdev.x ? : rvt_ ##x +/** + * rvt_register_device - register a driver + * @rdi: main dev structure for all of rdmavt operations + * + * It is up to drivers to allocate the rdi and fill in the appropriate + * information. + * + * Return: 0 on success otherwise an errno. + */ int rvt_register_device(struct rvt_dev_info *rdi) { /* Validate that drivers have provided the right information */ @@ -487,6 +512,10 @@ bail_no_mr: } EXPORT_SYMBOL(rvt_register_device); +/** + * rvt_unregister_device - remove a driver + * @rdi: rvt dev struct + */ void rvt_unregister_device(struct rvt_dev_info *rdi) { trace_rvt_dbg(rdi, "Driver is unregistering."); @@ -502,9 +531,16 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) } EXPORT_SYMBOL(rvt_unregister_device); -/* +/** + * rvt_init_port - init internal data for driver port + * @rdi: rvt dev strut + * @port: rvt port + * @port_index: 0 based index of ports, different from IB core port num + * * Keep track of a list of ports. No need to have a detach port. * They persist until the driver goes away. + * + * Return: always 0 */ int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, int port_index, u16 *pkey_table) diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index aabd2e5bc5d7..57c708dddab4 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -146,21 +146,6 @@ struct rvt_ibport { * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { - /* - * driver required fields: - * node_guid - * phys_port_cnt - * dma_device - * owner - * driver optional fields (rvt will provide generic value if blank): - * name - * node_desc - * rvt fields, driver value ignored: - * uverbs_abi_ver - * node_type - * num_comp_vectors - * uverbs_cmd_mask - */ struct ib_device_attr props; /* -- cgit v1.2.3 From 1348d706f254fe7030221251a5e1685ff3d9f86a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:10:37 -0800 Subject: IB/rdmavt: Add per verb driver callback checking For each verb validate that all requirements for driver callbacks are met. If a function is called without checking for a valid pointer, it is a required function. Also document what each callback function does. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 441 ++++++++++++++++++++++++++++++++------ include/rdma/rdma_vt.h | 123 +++++++++-- 2 files changed, 489 insertions(+), 75 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index f5cb09b718be..9566a920a244 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -325,12 +325,375 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -/* - * Check driver override. If driver passes a value use it, otherwise we use our - * own value. - */ -#define CHECK_DRIVER_OVERRIDE(rdi, x) \ - rdi->ibdev.x = rdi->ibdev.x ? : rvt_ ##x +enum { + MISC, + QUERY_DEVICE, + MODIFY_DEVICE, + QUERY_PORT, + MODIFY_PORT, + QUERY_PKEY, + QUERY_GID, + ALLOC_UCONTEXT, + DEALLOC_UCONTEXT, + GET_PORT_IMMUTABLE, + CREATE_QP, + MODIFY_QP, + DESTROY_QP, + QUERY_QP, + POST_SEND, + POST_RECV, + POST_SRQ_RECV, + CREATE_AH, + DESTROY_AH, + MODIFY_AH, + QUERY_AH, + CREATE_SRQ, + MODIFY_SRQ, + DESTROY_SRQ, + QUERY_SRQ, + ATTACH_MCAST, + DETACH_MCAST, + GET_DMA_MR, + REG_USER_MR, + DEREG_MR, + ALLOC_MR, + ALLOC_FMR, + MAP_PHYS_FMR, + UNMAP_FMR, + DEALLOC_FMR, + MMAP, + CREATE_CQ, + DESTROY_CQ, + POLL_CQ, + REQ_NOTFIY_CQ, + RESIZE_CQ, + ALLOC_PD, + DEALLOC_PD, + _VERB_IDX_MAX /* Must always be last! */ +}; + +static inline int check_driver_override(struct rvt_dev_info *rdi, + size_t offset, void *func) +{ + if (!*(void **)((void *)&rdi->ibdev + offset)) { + *(void **)((void *)&rdi->ibdev + offset) = func; + return 0; + } + + return 1; +} + +static int check_support(struct rvt_dev_info *rdi, int verb) +{ + switch (verb) { + case MISC: + /* + * These functions are not part of verbs specifically but are + * required for rdmavt to function. + */ + if ((!rdi->driver_f.port_callback) || + (!rdi->driver_f.get_card_name) || + (!rdi->driver_f.get_pci_dev)) + return -EINVAL; + break; + + case QUERY_DEVICE: + check_driver_override(rdi, offsetof(struct ib_device, + query_device), + rvt_query_device); + break; + + case MODIFY_DEVICE: + /* + * rdmavt does not support modify device currently drivers must + * provide. + */ + if (!check_driver_override(rdi, offsetof(struct ib_device, + modify_device), + rvt_modify_device)) + return -EOPNOTSUPP; + break; + + case QUERY_PORT: + if (!check_driver_override(rdi, offsetof(struct ib_device, + query_port), + rvt_query_port)) + if (!rdi->driver_f.query_port_state) + return -EINVAL; + break; + + case MODIFY_PORT: + if (!check_driver_override(rdi, offsetof(struct ib_device, + modify_port), + rvt_modify_port)) + if (!rdi->driver_f.cap_mask_chg || + !rdi->driver_f.shut_down_port) + return -EINVAL; + break; + + case QUERY_PKEY: + check_driver_override(rdi, offsetof(struct ib_device, + query_pkey), + rvt_query_pkey); + break; + + case QUERY_GID: + if (!check_driver_override(rdi, offsetof(struct ib_device, + query_gid), + rvt_query_gid)) + if (!rdi->driver_f.get_guid_be) + return -EINVAL; + break; + + case ALLOC_UCONTEXT: + check_driver_override(rdi, offsetof(struct ib_device, + alloc_ucontext), + rvt_alloc_ucontext); + break; + + case DEALLOC_UCONTEXT: + check_driver_override(rdi, offsetof(struct ib_device, + dealloc_ucontext), + rvt_dealloc_ucontext); + break; + + case GET_PORT_IMMUTABLE: + check_driver_override(rdi, offsetof(struct ib_device, + get_port_immutable), + rvt_get_port_immutable); + break; + + case CREATE_QP: + if (!check_driver_override(rdi, offsetof(struct ib_device, + create_qp), + rvt_create_qp)) + if (!rdi->driver_f.qp_priv_alloc || + !rdi->driver_f.qp_priv_free || + !rdi->driver_f.notify_qp_reset || + !rdi->driver_f.flush_qp_waiters || + !rdi->driver_f.stop_send_queue || + !rdi->driver_f.quiesce_qp) + return -EINVAL; + break; + + case MODIFY_QP: + if (!check_driver_override(rdi, offsetof(struct ib_device, + modify_qp), + rvt_modify_qp)) + if (!rdi->driver_f.notify_qp_reset || + !rdi->driver_f.schedule_send || + !rdi->driver_f.get_pmtu_from_attr || + !rdi->driver_f.flush_qp_waiters || + !rdi->driver_f.stop_send_queue || + !rdi->driver_f.quiesce_qp || + !rdi->driver_f.notify_error_qp || + !rdi->driver_f.mtu_from_qp || + !rdi->driver_f.mtu_to_path_mtu || + !rdi->driver_f.shut_down_port || + !rdi->driver_f.cap_mask_chg) + return -EINVAL; + break; + + case DESTROY_QP: + if (!check_driver_override(rdi, offsetof(struct ib_device, + destroy_qp), + rvt_destroy_qp)) + if (!rdi->driver_f.qp_priv_free || + !rdi->driver_f.notify_qp_reset || + !rdi->driver_f.flush_qp_waiters || + !rdi->driver_f.stop_send_queue || + !rdi->driver_f.quiesce_qp) + return -EINVAL; + break; + + case QUERY_QP: + check_driver_override(rdi, offsetof(struct ib_device, + query_qp), + rvt_query_qp); + break; + + case POST_SEND: + if (!check_driver_override(rdi, offsetof(struct ib_device, + post_send), + rvt_post_send)) + if (!rdi->driver_f.schedule_send || + !rdi->driver_f.do_send) + return -EINVAL; + break; + + case POST_RECV: + check_driver_override(rdi, offsetof(struct ib_device, + post_recv), + rvt_post_recv); + break; + case POST_SRQ_RECV: + check_driver_override(rdi, offsetof(struct ib_device, + post_srq_recv), + rvt_post_srq_recv); + break; + + case CREATE_AH: + check_driver_override(rdi, offsetof(struct ib_device, + create_ah), + rvt_create_ah); + break; + + case DESTROY_AH: + check_driver_override(rdi, offsetof(struct ib_device, + destroy_ah), + rvt_destroy_ah); + break; + + case MODIFY_AH: + check_driver_override(rdi, offsetof(struct ib_device, + modify_ah), + rvt_modify_ah); + break; + + case QUERY_AH: + check_driver_override(rdi, offsetof(struct ib_device, + query_ah), + rvt_query_ah); + break; + + case CREATE_SRQ: + check_driver_override(rdi, offsetof(struct ib_device, + create_srq), + rvt_create_srq); + break; + + case MODIFY_SRQ: + check_driver_override(rdi, offsetof(struct ib_device, + modify_srq), + rvt_modify_srq); + break; + + case DESTROY_SRQ: + check_driver_override(rdi, offsetof(struct ib_device, + destroy_srq), + rvt_destroy_srq); + break; + + case QUERY_SRQ: + check_driver_override(rdi, offsetof(struct ib_device, + query_srq), + rvt_query_srq); + break; + + case ATTACH_MCAST: + check_driver_override(rdi, offsetof(struct ib_device, + attach_mcast), + rvt_attach_mcast); + break; + + case DETACH_MCAST: + check_driver_override(rdi, offsetof(struct ib_device, + detach_mcast), + rvt_detach_mcast); + break; + + case GET_DMA_MR: + check_driver_override(rdi, offsetof(struct ib_device, + get_dma_mr), + rvt_get_dma_mr); + break; + + case REG_USER_MR: + check_driver_override(rdi, offsetof(struct ib_device, + reg_user_mr), + rvt_reg_user_mr); + break; + + case DEREG_MR: + check_driver_override(rdi, offsetof(struct ib_device, + dereg_mr), + rvt_dereg_mr); + break; + + case ALLOC_FMR: + check_driver_override(rdi, offsetof(struct ib_device, + alloc_fmr), + rvt_alloc_fmr); + break; + + case ALLOC_MR: + check_driver_override(rdi, offsetof(struct ib_device, + alloc_mr), + rvt_alloc_mr); + break; + + case MAP_PHYS_FMR: + check_driver_override(rdi, offsetof(struct ib_device, + map_phys_fmr), + rvt_map_phys_fmr); + break; + + case UNMAP_FMR: + check_driver_override(rdi, offsetof(struct ib_device, + unmap_fmr), + rvt_unmap_fmr); + break; + + case DEALLOC_FMR: + check_driver_override(rdi, offsetof(struct ib_device, + dealloc_fmr), + rvt_dealloc_fmr); + break; + + case MMAP: + check_driver_override(rdi, offsetof(struct ib_device, + mmap), + rvt_mmap); + break; + + case CREATE_CQ: + check_driver_override(rdi, offsetof(struct ib_device, + create_cq), + rvt_create_cq); + break; + + case DESTROY_CQ: + check_driver_override(rdi, offsetof(struct ib_device, + destroy_cq), + rvt_destroy_cq); + break; + + case POLL_CQ: + check_driver_override(rdi, offsetof(struct ib_device, + poll_cq), + rvt_poll_cq); + break; + + case REQ_NOTFIY_CQ: + check_driver_override(rdi, offsetof(struct ib_device, + req_notify_cq), + rvt_req_notify_cq); + break; + + case RESIZE_CQ: + check_driver_override(rdi, offsetof(struct ib_device, + resize_cq), + rvt_resize_cq); + break; + + case ALLOC_PD: + check_driver_override(rdi, offsetof(struct ib_device, + alloc_pd), + rvt_alloc_pd); + break; + + case DEALLOC_PD: + check_driver_override(rdi, offsetof(struct ib_device, + dealloc_pd), + rvt_dealloc_pd); + break; + + default: + return -EINVAL; + } + + return 0; +} /** * rvt_register_device - register a driver @@ -343,35 +706,26 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, */ int rvt_register_device(struct rvt_dev_info *rdi) { - /* Validate that drivers have provided the right information */ - int ret = 0; + int ret = 0, i; if (!rdi) return -EINVAL; - if ((!rdi->driver_f.port_callback) || - (!rdi->driver_f.get_card_name) || - (!rdi->driver_f.get_pci_dev) || - (!rdi->driver_f.check_ah)) { - pr_err("Driver not supporting req func\n"); - return -EINVAL; - } + /* + * Check to ensure drivers have setup the required helpers for the verbs + * they want rdmavt to handle + */ + for (i = 0; i < _VERB_IDX_MAX; i++) + if (check_support(rdi, i)) { + pr_err("Driver support req not met at %d\n", i); + return -EINVAL; + } + /* Once we get past here we can use rvt_pr macros and tracepoints */ trace_rvt_dbg(rdi, "Driver attempting registration"); rvt_mmap_init(rdi); - /* Dev Ops */ - CHECK_DRIVER_OVERRIDE(rdi, query_device); - CHECK_DRIVER_OVERRIDE(rdi, modify_device); - CHECK_DRIVER_OVERRIDE(rdi, query_port); - CHECK_DRIVER_OVERRIDE(rdi, modify_port); - CHECK_DRIVER_OVERRIDE(rdi, query_pkey); - CHECK_DRIVER_OVERRIDE(rdi, query_gid); - CHECK_DRIVER_OVERRIDE(rdi, alloc_ucontext); - CHECK_DRIVER_OVERRIDE(rdi, dealloc_ucontext); - CHECK_DRIVER_OVERRIDE(rdi, get_port_immutable); - /* Queue Pairs */ ret = rvt_driver_qp_init(rdi); if (ret) { @@ -379,33 +733,15 @@ int rvt_register_device(struct rvt_dev_info *rdi) return -EINVAL; } - CHECK_DRIVER_OVERRIDE(rdi, create_qp); - CHECK_DRIVER_OVERRIDE(rdi, modify_qp); - CHECK_DRIVER_OVERRIDE(rdi, destroy_qp); - CHECK_DRIVER_OVERRIDE(rdi, query_qp); - CHECK_DRIVER_OVERRIDE(rdi, post_send); - CHECK_DRIVER_OVERRIDE(rdi, post_recv); - CHECK_DRIVER_OVERRIDE(rdi, post_srq_recv); - /* Address Handle */ - CHECK_DRIVER_OVERRIDE(rdi, create_ah); - CHECK_DRIVER_OVERRIDE(rdi, destroy_ah); - CHECK_DRIVER_OVERRIDE(rdi, modify_ah); - CHECK_DRIVER_OVERRIDE(rdi, query_ah); spin_lock_init(&rdi->n_ahs_lock); rdi->n_ahs_allocated = 0; /* Shared Receive Queue */ - CHECK_DRIVER_OVERRIDE(rdi, create_srq); - CHECK_DRIVER_OVERRIDE(rdi, modify_srq); - CHECK_DRIVER_OVERRIDE(rdi, destroy_srq); - CHECK_DRIVER_OVERRIDE(rdi, query_srq); rvt_driver_srq_init(rdi); /* Multicast */ rvt_driver_mcast_init(rdi); - CHECK_DRIVER_OVERRIDE(rdi, attach_mcast); - CHECK_DRIVER_OVERRIDE(rdi, detach_mcast); /* Mem Region */ ret = rvt_driver_mr_init(rdi); @@ -414,35 +750,18 @@ int rvt_register_device(struct rvt_dev_info *rdi) goto bail_no_mr; } - CHECK_DRIVER_OVERRIDE(rdi, get_dma_mr); - CHECK_DRIVER_OVERRIDE(rdi, reg_user_mr); - CHECK_DRIVER_OVERRIDE(rdi, dereg_mr); - CHECK_DRIVER_OVERRIDE(rdi, alloc_mr); - CHECK_DRIVER_OVERRIDE(rdi, alloc_fmr); - CHECK_DRIVER_OVERRIDE(rdi, map_phys_fmr); - CHECK_DRIVER_OVERRIDE(rdi, unmap_fmr); - CHECK_DRIVER_OVERRIDE(rdi, dealloc_fmr); - CHECK_DRIVER_OVERRIDE(rdi, mmap); - /* Completion queues */ ret = rvt_driver_cq_init(rdi); if (ret) { pr_err("Error in driver CQ init.\n"); goto bail_mr; } - CHECK_DRIVER_OVERRIDE(rdi, create_cq); - CHECK_DRIVER_OVERRIDE(rdi, destroy_cq); - CHECK_DRIVER_OVERRIDE(rdi, poll_cq); - CHECK_DRIVER_OVERRIDE(rdi, req_notify_cq); - CHECK_DRIVER_OVERRIDE(rdi, resize_cq); /* DMA Operations */ rdi->ibdev.dma_ops = rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops; /* Protection Domain */ - CHECK_DRIVER_OVERRIDE(rdi, alloc_pd); - CHECK_DRIVER_OVERRIDE(rdi, dealloc_pd); spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 57c708dddab4..ec658d8bf34e 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -208,62 +208,157 @@ struct rvt_dev_info; struct rvt_swqe; struct rvt_driver_provided { /* - * The work to create port files in /sys/class Infiniband is different - * depending on the driver. This should not be extracted away and - * instead drivers are responsible for setting the correct callback for - * this. + * Which functions are required depends on which verbs rdmavt is + * providing and which verbs the driver is overriding. See + * check_support() for details. */ - /* -------------------*/ - /* Required functions */ - /* -------------------*/ + /* Passed to ib core registration. Callback to create syfs files */ int (*port_callback)(struct ib_device *, u8, struct kobject *); + + /* + * Returns a string to represent the device for which is being + * registered. This is primarily used for error and debug messages on + * the console. + */ const char * (*get_card_name)(struct rvt_dev_info *rdi); + + /* + * Returns a pointer to the undelying hardware's PCI device. This is + * used to display information as to what hardware is being referenced + * in an output message + */ struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); - unsigned (*free_all_qps)(struct rvt_dev_info *rdi); + + /* + * Allocate a private queue pair data structure for driver specific + * information which is opaque to rdmavt. + */ void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); + + /* + * Free the driver's private qp structure. + */ void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + + /* + * Inform the driver the particular qp in quesiton has been reset so + * that it can clean up anything it needs to. + */ void (*notify_qp_reset)(struct rvt_qp *qp); + + /* + * Give the driver a notice that there is send work to do. It is up to + * the driver to generally push the packets out, this just queues the + * work with the driver. There are two variants here. The no_lock + * version requires the s_lock not to be held. The other assumes the + * s_lock is held. + */ void (*schedule_send)(struct rvt_qp *qp); void (*schedule_send_no_lock)(struct rvt_qp *qp); + + /* + * Sometimes rdmavt needs to kick the driver's send progress. That is + * done by this call back. + */ void (*do_send)(struct rvt_qp *qp); + + /* + * Get a path mtu from the driver based on qp attributes. + */ int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_attr *attr); + + /* + * Notify driver that it needs to flush any outstanding IO requests that + * are waiting on a qp. + */ void (*flush_qp_waiters)(struct rvt_qp *qp); + + /* + * Notify driver to stop its queue of sending packets. Nothing else + * should be posted to the queue pair after this has been called. + */ void (*stop_send_queue)(struct rvt_qp *qp); + + /* + * Have the drivr drain any in progress operations + */ void (*quiesce_qp)(struct rvt_qp *qp); + + /* + * Inform the driver a qp has went to error state. + */ void (*notify_error_qp)(struct rvt_qp *qp); + + /* + * Get an MTU for a qp. + */ u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); + /* + * Convert an mtu to a path mtu + */ int (*mtu_to_path_mtu)(u32 mtu); + + /* + * Get the guid of a port in big endian byte order + */ int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid); + + /* + * Query driver for the state of the port. + */ int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num, struct ib_port_attr *props); + + /* + * Tell driver to shutdown a port + */ int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num); + + /* Tell driver to send a trap for changed port capabilities */ void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num); - /*--------------------*/ - /* Optional functions */ - /*--------------------*/ + /* + * The following functions can be safely ignored completely. Any use of + * these is checked for NULL before blindly calling. Rdmavt should also + * be functional if drivers omit these. + */ + + /* Called to inform the driver that all qps should now be freed. */ + unsigned (*free_all_qps)(struct rvt_dev_info *rdi); + + /* Driver specific AH validation */ int (*check_ah)(struct ib_device *, struct ib_ah_attr *); + + /* Inform the driver a new AH has been created */ void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); + + /* Let the driver pick the next queue pair number*/ int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, enum ib_qp_type type, u8 port_num, gfp_t gfp); - /** - * Return 0 if modification is valid, -errno otherwise - */ + + /* Determine if its safe or allowed to modify the qp */ int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + + /* Driver specific QP modification/notification-of */ void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + /* Driver specific work request checking */ int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); + /* Notify driver a mad agent has been created */ void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); + + /* Notify driver a mad agent has been removed */ void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); + }; struct rvt_dev_info { -- cgit v1.2.3 From 4eadd8ff21bffcaf3338267dcca571accc612c8e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:10:55 -0800 Subject: IB/qib,rdmavt: Move smi_ah to qib Rdmavt adopted an smi_ah from qib which is not needed by hfi1. Move this back to qib and get it out of the common library. Reviewed-by: Ira Weiny Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_iba7322.c | 10 +++++----- drivers/infiniband/hw/qib/qib_verbs.h | 1 + include/rdma/rdma_vt.h | 1 - 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index a15992259257..ca28c19d9618 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2910,8 +2910,8 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd) spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data); } - if (dd->pport[i].ibport_data.rvp.smi_ah) - ib_destroy_ah(&dd->pport[i].ibport_data.rvp.smi_ah->ibah); + if (dd->pport[i].ibport_data.smi_ah) + ib_destroy_ah(&dd->pport[i].ibport_data.smi_ah->ibah); } } @@ -5507,7 +5507,7 @@ static void try_7322_ipg(struct qib_pportdata *ppd) if (IS_ERR(send_buf)) goto retry; - if (!ibp->rvp.smi_ah) { + if (!ibp->smi_ah) { struct ib_ah *ah; ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE)); @@ -5515,11 +5515,11 @@ static void try_7322_ipg(struct qib_pportdata *ppd) ret = PTR_ERR(ah); else { send_buf->ah = ah; - ibp->rvp.smi_ah = ibah_to_rvtah(ah); + ibp->smi_ah = ibah_to_rvtah(ah); ret = 0; } } else { - send_buf->ah = &ibp->rvp.smi_ah->ibah; + send_buf->ah = &ibp->smi_ah->ibah; ret = 0; } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index d137d714935d..4b76a8d59337 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -219,6 +219,7 @@ struct qib_pma_counters { struct qib_ibport { struct rvt_ibport rvp; + struct rvt_ah *smi_ah; __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ struct qib_pma_counters __percpu *pmastats; u64 z_unicast_xmit; /* starting count for PMA */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index ec658d8bf34e..2c3a04c630fd 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -137,7 +137,6 @@ struct rvt_ibport { u16 *pkey_table; struct rvt_ah *sm_ah; - struct rvt_ah *smi_ah; }; #define RVT_CQN_MAX 16 /* maximum length of cq name */ -- cgit v1.2.3 From ce73fe25a6bd4a4deabed57e2553100e10fb8362 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:11:03 -0800 Subject: IB/rdmavt: Remove RVT_FLAGs While hfi1 and qib were still supporting bits and pieces of core verbs components there needed to be a way to convey if rdmavt should handle allocation and initialize of resources like the queue pair table. Now that all of this is moved into rdmavt there is no need for these flags. They are no longer used in the drivers. Reviewed-by: Ira Weiny Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/cq.c | 5 ----- drivers/infiniband/sw/rdmavt/mr.c | 5 ----- drivers/infiniband/sw/rdmavt/qp.c | 8 -------- include/rdma/rdma_vt.h | 10 ---------- 4 files changed, 28 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index c69c0709696a..b1ffc8b4a6c0 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -508,11 +508,6 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) int cpu; struct task_struct *task; - if (rdi->flags & RVT_FLAG_CQ_INIT_DRIVER) { - rvt_pr_info(rdi, "Driver is doing CQ init.\n"); - return 0; - } - if (rdi->worker) return 0; rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8549652ffd06..4d5ef73e7291 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -66,11 +66,6 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) unsigned lk_tab_size; int i; - if (rdi->flags & RVT_FLAG_MR_INIT_DRIVER) { - rvt_pr_info(rdi, "Driver is doing MR init.\n"); - return 0; - } - /* * The top hfi1_lkey_table_size bits are used to index the * table. The lower 8 bits can be owned by the user (copied from diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 5809562fefda..441e17a0467f 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -173,11 +173,6 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) int i; int ret = -ENOMEM; - if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) { - rvt_pr_info(rdi, "Driver is doing QP init.\n"); - return 0; - } - if (!rdi->dparms.qp_table_size) return -EINVAL; @@ -284,9 +279,6 @@ void rvt_qp_exit(struct rvt_dev_info *rdi) if (!rdi->qp_dev) return; - if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) - return; /* driver did the qp init so nothing else to do */ - kfree(rdi->qp_dev->qp_table); free_qpn_table(&rdi->qp_dev->qpn_table); kfree(rdi->qp_dev); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 2c3a04c630fd..264c514c7da2 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -60,16 +60,6 @@ #include #include -/* - * For some of the IBTA objects there will likely be some - * initializations required. We need flags to determine whether it is OK - * for rdmavt to do this or not. This does not imply any functions of a - * partiuclar IBTA object are overridden. - */ -#define RVT_FLAG_MR_INIT_DRIVER BIT(1) -#define RVT_FLAG_QP_INIT_DRIVER BIT(2) -#define RVT_FLAG_CQ_INIT_DRIVER BIT(3) - #define RVT_MAX_PKEY_VALUES 16 struct rvt_ibport { -- cgit v1.2.3 From 0765b01b8e2da50ad56f6544f935f5eaef1389f2 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:11:12 -0800 Subject: IB/rdmavt: Remove signal_supported and comments Initially it was intended that rdmavt would support some signaling between the underlying driver and itself. However this turned out to be unnecessary for qib and hfi1. If we need to add something like this in later to support another driver we should do it then. As of now this essentially dead code so remove it. Reviewed-by: Ira Weiny Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 264c514c7da2..4c50bbb75d77 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -137,24 +137,6 @@ struct rvt_ibport { struct rvt_driver_params { struct ib_device_attr props; - /* - * Drivers will need to support a number of notifications to rvt in - * accordance with certain events. This structure should contain a mask - * of the supported events. Such events that the rvt may need to know - * about include: - * port errors - * port active - * lid change - * sm change - * client reregister - * pkey change - * - * There may also be other events that the rvt layers needs to know - * about this is not an exhaustive list. Some events though rvt does not - * need to rely on the driver for such as completion queue error. - */ - int rvt_signal_supported; - /* * Anything driver specific that is not covered by props * For instance special module parameters. Goes here. -- cgit v1.2.3 From 79a225be38932b17707009767e85d6edf450e7cc Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:11:20 -0800 Subject: IB/rdmavt: Remove unnecessary exported functions Remove exported functions which are no longer required as the functionality has moved into rdmavt. This also requires re-ordering some of the functions since their prototype no longer appears in a header file. Rather than add forward declarations it is just cleaner to re-order some of the functions. Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mmap.c | 4 - drivers/infiniband/sw/rdmavt/mmap.h | 8 ++ drivers/infiniband/sw/rdmavt/qp.c | 252 +++++++++++++++++------------------- drivers/infiniband/sw/rdmavt/srq.c | 1 + drivers/infiniband/sw/rdmavt/vt.h | 1 + include/rdma/rdma_vt.h | 13 -- include/rdma/rdmavt_qp.h | 4 - 7 files changed, 128 insertions(+), 155 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index 273974fb7d1f..e202b8142759 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -80,7 +80,6 @@ void rvt_release_mmap_info(struct kref *ref) vfree(ip->obj); kfree(ip); } -EXPORT_SYMBOL(rvt_release_mmap_info); static void rvt_vma_open(struct vm_area_struct *vma) { @@ -146,7 +145,6 @@ int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) done: return ret; } -EXPORT_SYMBOL(rvt_mmap); /** * rvt_create_mmap_info - allocate information for hfi1_mmap @@ -185,7 +183,6 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, return ip; } -EXPORT_SYMBOL(rvt_create_mmap_info); /** * rvt_update_mmap_info - update a mem map @@ -209,4 +206,3 @@ void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, ip->size = size; ip->obj = obj; } -EXPORT_SYMBOL(rvt_update_mmap_info); diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h index e8067471c722..fab0e7b1daf9 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.h +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -51,5 +51,13 @@ #include void rvt_mmap_init(struct rvt_dev_info *rdi); +void rvt_release_mmap_info(struct kref *ref); +int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, + u32 size, + struct ib_ucontext *context, + void *obj); +void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, + u32 size, void *obj); #endif /* DEF_RDMAVTMMAP_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 441e17a0467f..dbf124db1fd1 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -389,13 +389,117 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); } +/** + * rvt_clear_mr_refs - Drop help mr refs + * @qp: rvt qp data structure + * @clr_sends: If shoudl clear send side or not + */ +static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) +{ + unsigned n; + + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) + rvt_put_ss(&qp->s_rdma_read_sge); + + rvt_put_ss(&qp->r_sge); + + if (clr_sends) { + while (qp->s_last != qp->s_head) { + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); + unsigned i; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct rvt_sge *sge = &wqe->sg_list[i]; + + rvt_put_mr(sge->mr); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + atomic_dec(&ibah_to_rvtah( + wqe->ud_wr.ah)->refcount); + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + smp_wmb(); /* see qp_set_savail */ + } + if (qp->s_rdma_mr) { + rvt_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + } + + if (qp->ibqp.qp_type != IB_QPT_RC) + return; + + for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + struct rvt_ack_entry *e = &qp->s_ack_queue[n]; + + if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && + e->rdma_sge.mr) { + rvt_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + } +} + +/** + * rvt_remove_qp - remove qp form table + * @rdi: rvt dev struct + * @qp: qp to remove + * + * Remove the QP from the table so it can't be found asynchronously by + * the receive routine. + */ +static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; + u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); + unsigned long flags; + int removed = 1; + + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + + if (rcu_dereference_protected(rvp->qp[0], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[0], NULL); + } else if (rcu_dereference_protected(rvp->qp[1], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[1], NULL); + } else { + struct rvt_qp *q; + struct rvt_qp __rcu **qpp; + + removed = 0; + qpp = &rdi->qp_dev->qp_table[n]; + for (; (q = rcu_dereference_protected(*qpp, + lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL; + qpp = &q->next) { + if (q == qp) { + RCU_INIT_POINTER(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&rdi->qp_dev->qpt_lock))); + removed = 1; + trace_rvt_qpremove(qp, n); + break; + } + } + } + + spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); + if (removed) { + synchronize_rcu(); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} + /** * reset_qp - initialize the QP state to the reset state * @qp: the QP to reset * @type: the QP type * r and s lock are required to be held by the caller */ -void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type) { if (qp->state != IB_QPS_RESET) { @@ -475,7 +579,6 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } qp->r_sge.num_sge = 0; } -EXPORT_SYMBOL(rvt_reset_qp); /** * rvt_create_qp - create a queue pair for a device @@ -761,60 +864,6 @@ bail_swq: return ret; } -/** - * rvt_clear_mr_refs - Drop help mr refs - * @qp: rvt qp data structure - * @clr_sends: If shoudl clear send side or not - */ -void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) -{ - unsigned n; - - if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) - rvt_put_ss(&qp->s_rdma_read_sge); - - rvt_put_ss(&qp->r_sge); - - if (clr_sends) { - while (qp->s_last != qp->s_head) { - struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); - unsigned i; - - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&ibah_to_rvtah( - wqe->ud_wr.ah)->refcount); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - smp_wmb(); /* see qp_set_savail */ - } - if (qp->s_rdma_mr) { - rvt_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - } - - if (qp->ibqp.qp_type != IB_QPT_RC) - return; - - for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { - struct rvt_ack_entry *e = &qp->s_ack_queue[n]; - - if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && - e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } - } -} -EXPORT_SYMBOL(rvt_clear_mr_refs); - /** * rvt_error_qp - put a QP into the error state * @qp: the QP to put into the error state @@ -922,58 +971,6 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); } -/** - * rvt_remove_qp - remove qp form table - * @rdi: rvt dev struct - * @qp: qp to remove - * - * Remove the QP from the table so it can't be found asynchronously by - * the receive routine. - */ -void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) -{ - struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; - u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); - unsigned long flags; - int removed = 1; - - spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); - - if (rcu_dereference_protected(rvp->qp[0], - lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(rvp->qp[0], NULL); - } else if (rcu_dereference_protected(rvp->qp[1], - lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(rvp->qp[1], NULL); - } else { - struct rvt_qp *q; - struct rvt_qp __rcu **qpp; - - removed = 0; - qpp = &rdi->qp_dev->qp_table[n]; - for (; (q = rcu_dereference_protected(*qpp, - lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL; - qpp = &q->next) { - if (q == qp) { - RCU_INIT_POINTER(*qpp, - rcu_dereference_protected(qp->next, - lockdep_is_held(&rdi->qp_dev->qpt_lock))); - removed = 1; - trace_rvt_qpremove(qp, n); - break; - } - } - } - - spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); - if (removed) { - synchronize_rcu(); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } -} -EXPORT_SYMBOL(rvt_remove_qp); - /** * qib_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying @@ -1234,6 +1231,19 @@ inval: return -EINVAL; } +/** rvt_free_qpn - Free a qpn from the bit map + * @qpt: QP table + * @qpn: queue pair number to free + */ +static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) +{ + struct rvt_qpn_map *map; + + map = qpt->map + qpn / RVT_BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); +} + /** * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy @@ -1664,29 +1674,3 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } return 0; } - -/** rvt_free_qpn - Free a qpn from the bit map - * @qpt: QP table - * @qpn: queue pair number to free - */ -void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) -{ - struct rvt_qpn_map *map; - - map = qpt->map + qpn / RVT_BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); -} -EXPORT_SYMBOL(rvt_free_qpn); - -/** - * rvt_dec_qp_cnt - decrement qp count - * rdi: rvt dev struct - */ -void rvt_dec_qp_cnt(struct rvt_dev_info *rdi) -{ - spin_lock(&rdi->n_qps_lock); - rdi->n_qps_allocated--; - spin_unlock(&rdi->n_qps_lock); -} -EXPORT_SYMBOL(rvt_dec_qp_cnt); diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 98c492797c53..f7c48e9023de 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -50,6 +50,7 @@ #include #include "srq.h" +#include "vt.h" /** * rvt_driver_srq_init - init srq resources on a per driver basis diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index e26f9e94d1ea..6b01eaa4461b 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -60,6 +60,7 @@ #include "mmap.h" #include "cq.h" #include "mad.h" +#include "mmap.h" #define rvt_pr_info(rdi, fmt, ...) \ __rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4c50bbb75d77..a8696551abb1 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -476,19 +476,6 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); -int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -void rvt_release_mmap_info(struct kref *ref); -struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, - u32 size, - struct ib_ucontext *context, - void *obj); -void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, - u32 size, void *obj); -int rvt_reg_mr(struct rvt_qp *qp, struct ib_reg_wr *wr); struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid); -/* Temporary export */ -void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type); - #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 1066b5d1b4d2..933f14f92da6 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -438,10 +438,6 @@ static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) extern const int ib_rvt_state_ops[]; struct rvt_dev_info; -void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp); -void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends); int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); -void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn); -void rvt_dec_qp_cnt(struct rvt_dev_info *rdi); #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From d2421a82f6d8ad407d3f4acdbacedfb06d9f47f5 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:44:26 -0800 Subject: IB/rdmvt: close send engine struct holes pahole noted the wasted 4 bytes after s_lock and r_lock. Move s_flags and r_psn to fill the holes. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 933f14f92da6..5c307ed4d195 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -283,12 +283,12 @@ struct rvt_qp { struct rvt_sge_state s_rdma_read_sge; spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + u32 r_psn; /* expected rcv packet sequence number */ unsigned long r_aflags; u64 r_wr_id; /* ID for current receive WQE */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u32 r_len; /* total length of r_sge */ u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ u32 r_msn; /* message sequence number */ u8 r_state; /* opcode of last packet received */ @@ -308,8 +308,8 @@ struct rvt_qp { u32 s_ssn; /* SSN of tail entry */ spinlock_t s_lock ____cacheline_aligned_in_smp; - struct rvt_sge_state *s_cur_sge; u32 s_flags; + struct rvt_sge_state *s_cur_sge; struct rvt_swqe *s_wqe; struct rvt_sge_state s_sge; /* current send request data */ struct rvt_mregion *s_rdma_mr; -- cgit v1.2.3 From 14553ca11039732bcba3c160a26d702dbe71dd49 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:45:36 -0800 Subject: staging/rdma/hfi1: Adaptive PIO for short messages The change requires a new pio_busy field in the iowait structure to track the number of outstanding pios. The new counter together with the sdma counter serve as the basis for a packet by packet decision as to which egress mechanism to use. Since packets given to different egress mechanisms are not ordered, this scheme will preserve the order. The iowait drain/wait mechanisms are extended for a pio case. An additional qp wait flag is added for the PIO drain wait case. Currently the only pio wait is for buffers, so the no_bufs_available() routine name is changed to pio_wait() and a third argument is passed with one of the two pio wait flags to generalize the routine. A module parameter is added to hold a configurable threshold. For now, the module parameter is zero. A heuristic routine is added to return the func pointer of the proper egress routine to use. The heuristic is as follows: - SMI always uses pio - GSI,UD qps <= threshold use pio - UD qps > threadhold use sdma o No coordination with sdma is required because order is not required and this qp pio count is not maintained for UD - RC/UC ONLY packets <= threshold chose as follows: o If sdmas pending, use SDMA o Otherwise use pio and enable the pio tracking count at the time the pio buffer is allocated - RC/UC ONLY packets > threshold use SDMA o If pio's are pending the pio_wait with the new wait flag is called to delay for pios to drain The threshold is potentially reduced by the QP's mtu. The sc_buffer_alloc() has two additional args (a callback, a void *) which are exploited by the RC/UC cases to pass a new complete routine and a qp *. When the shadow ring completes the credit associated with a packet, the new complete routine is called. The verbs_pio_complete() will then decrement the busy count and trigger any drain waiters in qp destroy or reset. Reviewed-by: Jubin John Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/chip.c | 10 +++ drivers/staging/rdma/hfi1/chip.h | 1 + drivers/staging/rdma/hfi1/hfi.h | 7 +- drivers/staging/rdma/hfi1/iowait.h | 89 +++++++++++++++++++++++ drivers/staging/rdma/hfi1/pio.c | 3 +- drivers/staging/rdma/hfi1/qp.c | 25 ++++++- drivers/staging/rdma/hfi1/rc.c | 17 ++++- drivers/staging/rdma/hfi1/sdma.c | 14 ++-- drivers/staging/rdma/hfi1/uc.c | 10 ++- drivers/staging/rdma/hfi1/ud.c | 4 +- drivers/staging/rdma/hfi1/verbs.c | 123 +++++++++++++++++++++++--------- drivers/staging/rdma/hfi1/verbs.h | 16 +++++ drivers/staging/rdma/hfi1/verbs_txreq.h | 5 ++ include/rdma/rdmavt_qp.h | 22 +++--- 14 files changed, 286 insertions(+), 60 deletions(-) (limited to 'include/rdma') diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 129461770186..36e8e3e9b012 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -1588,6 +1588,14 @@ static u64 access_sw_pio_wait(const struct cntr_entry *entry, return dd->verbs_dev.n_piowait; } +static u64 access_sw_pio_drain(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + return dd->verbs_dev.n_piodrain; +} + static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { @@ -4129,6 +4137,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, access_sw_pio_wait), +[C_SW_PIO_DRAIN] = CNTR_ELEM("PioDrain", 0, 0, CNTR_NORMAL, + access_sw_pio_drain), [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL, access_sw_kmem_wait), [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL, diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h index b86c220161e5..6c581e0bd65f 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/staging/rdma/hfi1/chip.h @@ -800,6 +800,7 @@ enum { C_SW_CPU_RCV_LIM, C_SW_VTX_WAIT, C_SW_PIO_WAIT, + C_SW_PIO_DRAIN, C_SW_KMEM_WAIT, C_SW_SEND_SCHED, C_SDMA_DESC_FETCHED_CNT, diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h index 702723b3ff90..43d48613d48e 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/staging/rdma/hfi1/hfi.h @@ -811,6 +811,7 @@ struct sdma_vl_map; #define BOARD_VERS_MAX 96 /* how long the version string can be */ #define SERIAL_MAX 16 /* length of the serial number */ +typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64); struct hfi1_devdata { struct hfi1_ibdev verbs_dev; /* must be first */ struct list_head list; @@ -1121,10 +1122,8 @@ struct hfi1_devdata { * Handlers for outgoing data so that snoop/capture does not * have to have its hooks in the send path */ - int (*process_pio_send)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc); - int (*process_dma_send)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc); + send_routine process_pio_send; + send_routine process_dma_send; void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/staging/rdma/hfi1/iowait.h index e007eb82cbc8..b5eb1e0a5aa2 100644 --- a/drivers/staging/rdma/hfi1/iowait.h +++ b/drivers/staging/rdma/hfi1/iowait.h @@ -55,6 +55,7 @@ #include #include "sdma_txreq.h" + /* * typedef (*restart_t)() - restart callback * @work: pointer to work structure @@ -71,6 +72,7 @@ struct sdma_engine; * @wakeup: space callback * @iowork: workqueue overhead * @wait_dma: wait for sdma_busy == 0 + * @wait_pio: wait for pio_busy == 0 * @sdma_busy: # of packets in flight * @count: total number of descriptors in tx_head'ed list * @tx_limit: limit for overflow queuing @@ -104,7 +106,9 @@ struct iowait { void (*wakeup)(struct iowait *wait, int reason); struct work_struct iowork; wait_queue_head_t wait_dma; + wait_queue_head_t wait_pio; atomic_t sdma_busy; + atomic_t pio_busy; u32 count; u32 tx_limit; u32 tx_count; @@ -141,7 +145,9 @@ static inline void iowait_init( INIT_LIST_HEAD(&wait->tx_head); INIT_WORK(&wait->iowork, func); init_waitqueue_head(&wait->wait_dma); + init_waitqueue_head(&wait->wait_pio); atomic_set(&wait->sdma_busy, 0); + atomic_set(&wait->pio_busy, 0); wait->tx_limit = tx_limit; wait->sleep = sleep; wait->wakeup = wakeup; @@ -174,6 +180,88 @@ static inline void iowait_sdma_drain(struct iowait *wait) wait_event(wait->wait_dma, !atomic_read(&wait->sdma_busy)); } +/** + * iowait_sdma_pending() - return sdma pending count + * + * @wait: iowait structure + * + */ +static inline int iowait_sdma_pending(struct iowait *wait) +{ + return atomic_read(&wait->sdma_busy); +} + +/** + * iowait_sdma_inc - note sdma io pending + * @wait: iowait structure + */ +static inline void iowait_sdma_inc(struct iowait *wait) +{ + atomic_inc(&wait->sdma_busy); +} + +/** + * iowait_sdma_add - add count to pending + * @wait: iowait structure + */ +static inline void iowait_sdma_add(struct iowait *wait, int count) +{ + atomic_add(count, &wait->sdma_busy); +} + +/** + * iowait_sdma_dec - note sdma complete + * @wait: iowait structure + */ +static inline int iowait_sdma_dec(struct iowait *wait) +{ + return atomic_dec_and_test(&wait->sdma_busy); +} + +/** + * iowait_pio_drain() - wait for pios to drain + * + * @wait: iowait structure + * + * This will delay until the iowait pios have + * completed. + */ +static inline void iowait_pio_drain(struct iowait *wait) +{ + wait_event_timeout(wait->wait_pio, + !atomic_read(&wait->pio_busy), + HZ); +} + +/** + * iowait_pio_pending() - return pio pending count + * + * @wait: iowait structure + * + */ +static inline int iowait_pio_pending(struct iowait *wait) +{ + return atomic_read(&wait->pio_busy); +} + +/** + * iowait_pio_inc - note pio pending + * @wait: iowait structure + */ +static inline void iowait_pio_inc(struct iowait *wait) +{ + atomic_inc(&wait->pio_busy); +} + +/** + * iowait_sdma_dec - note pio complete + * @wait: iowait structure + */ +static inline int iowait_pio_dec(struct iowait *wait) +{ + return atomic_dec_and_test(&wait->pio_busy); +} + /** * iowait_drain_wakeup() - trigger iowait_drain() waiter * @@ -184,6 +272,7 @@ static inline void iowait_sdma_drain(struct iowait *wait) static inline void iowait_drain_wakeup(struct iowait *wait) { wake_up(&wait->wait_dma); + wake_up(&wait->wait_pio); } /** diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c index be0dcc345f4b..f5aab0ed39d7 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/staging/rdma/hfi1/pio.c @@ -1564,7 +1564,8 @@ full: write_sequnlock_irqrestore(&dev->iowait_lock, flags); for (i = 0; i < n; i++) - hfi1_qp_wakeup(qps[i], RVT_S_WAIT_PIO); + hfi1_qp_wakeup(qps[i], + RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); } /* translate a send credit update to a bit code of reasons */ diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 571e78fa2633..c7b83d66b59b 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -359,6 +359,25 @@ void _hfi1_schedule_send(struct rvt_qp *qp) cpumask_first(cpumask_of_node(dd->node))); } +static void qp_pio_drain(struct rvt_qp *qp) +{ + struct hfi1_ibdev *dev; + struct hfi1_qp_priv *priv = qp->priv; + + if (!priv->s_sendcontext) + return; + dev = to_idev(qp->ibqp.device); + while (iowait_pio_pending(&priv->s_iowait)) { + write_seqlock_irq(&dev->iowait_lock); + hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); + write_sequnlock_irq(&dev->iowait_lock); + iowait_pio_drain(&priv->s_iowait); + write_seqlock_irq(&dev->iowait_lock); + hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); + write_sequnlock_irq(&dev->iowait_lock); + } +} + /** * hfi1_schedule_send - schedule progress * @qp: the QP @@ -620,7 +639,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%u LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p\n", + "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -630,7 +649,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe ? wqe->wr.opcode : 0, qp->s_hdrwords, qp->s_flags, - atomic_read(&priv->s_iowait.sdma_busy), + iowait_sdma_pending(&priv->s_iowait), + iowait_pio_pending(&priv->s_iowait), !list_empty(&priv->s_iowait.list), qp->timeout, wqe ? wqe->ssn : 0, @@ -739,6 +759,7 @@ void quiesce_qp(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; iowait_sdma_drain(&priv->s_iowait); + qp_pio_drain(qp); flush_tx_list(qp); } diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 27042876ca62..443fda8df380 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -181,6 +181,18 @@ void hfi1_del_timers_sync(struct rvt_qp *qp) del_timer_sync(&priv->s_rnr_timer); } +/* only opcode mask for adaptive pio */ +const u32 rc_only_opcode = + BIT(OP(SEND_ONLY) & 0x1f) | + BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | + BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | + BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) | + BIT(OP(RDMA_READ_REQUEST & 0x1f)) | + BIT(OP(ACKNOWLEDGE & 0x1f)) | + BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) | + BIT(OP(COMPARE_SWAP & 0x1f)) | + BIT(OP(FETCH_ADD & 0x1f)); + static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 psn, u32 pmtu) { @@ -217,6 +229,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, u32 bth2; int middle = 0; u32 pmtu = qp->pmtu; + struct hfi1_qp_priv *priv = qp->priv; /* Don't send an ACK if we aren't supposed to. */ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) @@ -350,6 +363,7 @@ normal: qp->s_hdrwords = hwords; /* pbc */ ps->s_txreq->hdr_dwords = hwords + 2; + ps->s_txreq->sde = priv->s_sde; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); return 1; @@ -413,7 +427,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&priv->s_iowait.sdma_busy)) { + if (iowait_sdma_pending(&priv->s_iowait)) { qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } @@ -754,6 +768,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_hdrwords = hwords; /* pbc */ ps->s_txreq->hdr_dwords = hwords + 2; + ps->s_txreq->sde = priv->s_sde; qp->s_cur_sge = ss; qp->s_cur_size = len; hfi1_make_ruc_header( diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index 579d82109932..ff38fa3b7ca5 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -410,7 +410,7 @@ static void sdma_flush(struct sdma_engine *sde) #endif sdma_txclean(sde->dd, txp); if (wait) - drained = atomic_dec_and_test(&wait->sdma_busy); + drained = iowait_sdma_dec(wait); if (txp->complete) (*txp->complete)(txp, SDMA_TXREQ_S_ABORTED, drained); if (wait && drained) @@ -584,7 +584,7 @@ static void sdma_flush_descq(struct sdma_engine *sde) /* remove from list */ sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL; if (wait) - drained = atomic_dec_and_test(&wait->sdma_busy); + drained = iowait_sdma_dec(wait); #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER trace_hfi1_sdma_out_sn(sde, txp->sn); if (WARN_ON_ONCE(sde->head_sn != txp->sn)) @@ -1498,7 +1498,7 @@ retry: /* remove from list */ sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL; if (wait) - drained = atomic_dec_and_test(&wait->sdma_busy); + drained = iowait_sdma_dec(wait); #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER trace_hfi1_sdma_out_sn(sde, txp->sn); if (WARN_ON_ONCE(sde->head_sn != txp->sn)) @@ -2092,14 +2092,14 @@ retry: goto nodesc; tail = submit_tx(sde, tx); if (wait) - atomic_inc(&wait->sdma_busy); + iowait_sdma_inc(wait); sdma_update_tail(sde, tail); unlock: spin_unlock_irqrestore(&sde->tail_lock, flags); return ret; unlock_noconn: if (wait) - atomic_inc(&wait->sdma_busy); + iowait_sdma_inc(wait); tx->next_descq_idx = 0; #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER tx->sn = sde->tail_sn++; @@ -2181,7 +2181,7 @@ retry: } update_tail: if (wait) - atomic_add(count, &wait->sdma_busy); + iowait_sdma_add(wait, count); if (tail != INVALID_TAIL) sdma_update_tail(sde, tail); spin_unlock_irqrestore(&sde->tail_lock, flags); @@ -2192,7 +2192,7 @@ unlock_noconn: tx->wait = wait; list_del_init(&tx->list); if (wait) - atomic_inc(&wait->sdma_busy); + iowait_sdma_inc(wait); tx->next_descq_idx = 0; #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER tx->sn = sde->tail_sn++; diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 32705618900d..e58ec15dd892 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -55,6 +55,13 @@ /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_UC_##x +/* only opcode mask for adaptive pio */ +const u32 uc_only_opcode = + BIT(OP(SEND_ONLY) & 0x1f) | + BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | + BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | + BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)); + /** * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP @@ -86,7 +93,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&priv->s_iowait.sdma_busy)) { + if (iowait_sdma_pending(&priv->s_iowait)) { qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } @@ -237,6 +244,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_hdrwords = hwords; /* pbc */ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; + ps->s_txreq->sde = priv->s_sde; qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index bae5ccdfa7f4..da4e465ae846 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -294,7 +294,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ - if (atomic_read(&priv->s_iowait.sdma_busy)) { + if (iowait_sdma_pending(&priv->s_iowait)) { qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } @@ -331,7 +331,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) * Instead of waiting, we could queue a * zero length descriptor so we get a callback. */ - if (atomic_read(&priv->s_iowait.sdma_busy)) { + if (iowait_sdma_pending(&priv->s_iowait)) { qp->s_flags |= RVT_S_WAIT_DMA; goto bail; } diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index a4f8b26f76fb..d900374abe70 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -124,11 +124,20 @@ unsigned int hfi1_max_srq_wrs = 0x1FFFF; module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO); MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); +unsigned short piothreshold; +module_param(piothreshold, ushort, S_IRUGO); +MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); + static void verbs_sdma_complete( struct sdma_txreq *cookie, int status, int drained); +static int pio_wait(struct rvt_qp *qp, + struct send_context *sc, + struct hfi1_pkt_state *ps, + u32 flag); + /* Length of buffer to create verbs txreq cache name */ #define TXREQ_NAME_LEN 24 @@ -742,9 +751,10 @@ bail_build: * If we are now in the error state, return zero to flush the * send work request. */ -static int no_bufs_available(struct rvt_qp *qp, - struct send_context *sc, - struct hfi1_pkt_state *ps) +static int pio_wait(struct rvt_qp *qp, + struct send_context *sc, + struct hfi1_pkt_state *ps, + u32 flag) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_devdata *dd = sc->dd; @@ -767,8 +777,10 @@ static int no_bufs_available(struct rvt_qp *qp, struct hfi1_ibdev *dev = &dd->verbs_dev; int was_empty; + dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); + dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); dev->n_piowait++; - qp->s_flags |= RVT_S_WAIT_PIO; + qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); @@ -797,6 +809,15 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) return dd->vld[vl].sc; } +static void verbs_pio_complete(void *arg, int code) +{ + struct rvt_qp *qp = (struct rvt_qp *)arg; + struct hfi1_qp_priv *priv = qp->priv; + + if (iowait_pio_dec(&priv->s_iowait)) + iowait_drain_wakeup(&priv->s_iowait); +} + int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { @@ -815,6 +836,17 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct pio_buf *pbuf; int wc_status = IB_WC_SUCCESS; int ret = 0; + pio_release_cb cb = NULL; + + /* only RC/UC use complete */ + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + cb = verbs_pio_complete; + break; + default: + break; + } /* vl15 special case taken care of in ud.c */ sc5 = priv->s_sc; @@ -830,8 +862,12 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); } - pbuf = sc_buffer_alloc(sc, plen, NULL, NULL); + if (cb) + iowait_pio_inc(&priv->s_iowait); + pbuf = sc_buffer_alloc(sc, plen, cb, qp); if (unlikely(pbuf == NULL)) { + if (cb) + verbs_pio_complete(qp, 0); if (ppd->host_link_state != HLS_UP_ACTIVE) { /* * If we have filled the PIO buffers to capacity and are @@ -851,8 +887,9 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, * so lets continue to queue the request. */ hfi1_cdbg(PIO, "alloc failed. state active, queuing"); - ret = no_bufs_available(qp, sc, ps); + ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO); if (!ret) + /* txreq not queued - free */ goto bail; /* tx consumed in wait */ return ret; @@ -984,6 +1021,48 @@ bad: return 1; } +/** + * get_send_routine - choose an egress routine + * + * Choose an egress routine based on QP type + * and size + */ +static inline send_routine get_send_routine(struct rvt_qp *qp, + struct hfi1_ib_header *h) +{ + struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_qp_priv *priv = qp->priv; + + if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) + return dd->process_pio_send; + switch (qp->ibqp.qp_type) { + case IB_QPT_SMI: + return dd->process_pio_send; + case IB_QPT_GSI: + case IB_QPT_UD: + if (piothreshold && qp->s_cur_size <= piothreshold) + return dd->process_pio_send; + break; + case IB_QPT_RC: + if (piothreshold && + qp->s_cur_size <= min(piothreshold, qp->pmtu) && + (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) && + iowait_sdma_pending(&priv->s_iowait) == 0) + return dd->process_pio_send; + break; + case IB_QPT_UC: + if (piothreshold && + qp->s_cur_size <= min(piothreshold, qp->pmtu) && + (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) && + iowait_sdma_pending(&priv->s_iowait) == 0) + return dd->process_pio_send; + break; + default: + break; + } + return dd->process_dma_send; +} + /** * hfi1_verbs_send - send a packet * @qp: the QP to send on @@ -995,19 +1074,10 @@ bad: int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + send_routine sr; int ret; - int pio = 0; - unsigned long flags = 0; - - /* - * VL15 packets (IB_QPT_SMI) will always use PIO, so we - * can defer SDMA restart until link goes ACTIVE without - * worrying about just how we got there. - */ - if ((qp->ibqp.qp_type == IB_QPT_SMI) || - !(dd->flags & HFI1_HAS_SEND_DMA)) - pio = 1; + sr = get_send_routine(qp, &ps->s_txreq->phdr.hdr); ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp); if (unlikely(ret)) { /* @@ -1018,7 +1088,9 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) * mechanism for handling the errors. So for SDMA we can just * return. */ - if (pio) { + if (sr == dd->process_pio_send) { + unsigned long flags; + hfi1_cdbg(PIO, "%s() Failed. Completing with err", __func__); spin_lock_irqsave(&qp->s_lock, flags); @@ -1027,20 +1099,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } return -EINVAL; } - - if (pio) { - ret = dd->process_pio_send(qp, ps, 0); - } else { -#ifdef CONFIG_SDMA_VERBOSITY - dd_dev_err(dd, "CONFIG SDMA %s:%d %s()\n", - slashstrip(__FILE__), __LINE__, __func__); - dd_dev_err(dd, "SDMA hdrwords = %u, len = %u\n", qp->s_hdrwords, - qp->s_cur_size); -#endif - ret = dd->process_dma_send(qp, ps, 0); - } - - return ret; + return sr(qp, ps, 0); } /** diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 3d25ad406af7..8f1fde847c14 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -265,6 +265,7 @@ struct hfi1_ibdev { struct timer_list mem_timer; u64 n_piowait; + u64 n_piodrain; u64 n_txwait; u64 n_kmem_wait; @@ -425,6 +426,19 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); +extern const u32 rc_only_opcode; +extern const u32 uc_only_opcode; + +static inline u8 get_opcode(struct hfi1_ib_header *h) +{ + u16 lnh = be16_to_cpu(h->lrh[0]) & 3; + + if (lnh == IB_LNH_IBA_LOCAL) + return be32_to_cpu(h->u.oth.bth[0]) >> 24; + else + return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; +} + int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); @@ -494,6 +508,8 @@ extern unsigned int hfi1_max_srq_sges; extern unsigned int hfi1_max_srq_wrs; +extern unsigned short piothreshold; + extern const u32 ib_hfi1_rnr_table[]; #endif /* HFI1_VERBS_H */ diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h index f56149eb51ca..1cf69b2fe4a5 100644 --- a/drivers/staging/rdma/hfi1/verbs_txreq.h +++ b/drivers/staging/rdma/hfi1/verbs_txreq.h @@ -93,6 +93,11 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, return tx; } +static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx) +{ + return &tx->txreq; +} + static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp) { struct sdma_txreq *stx; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 5c307ed4d195..f2f4df023aaa 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -82,6 +82,7 @@ * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating * next send completion entry not via send DMA * RVT_S_WAIT_PIO - waiting for a send buffer to be available + * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available * RVT_S_WAIT_KMEM - waiting for kernel memory to be available @@ -101,16 +102,17 @@ #define RVT_S_WAIT_SSN_CREDIT 0x0100 #define RVT_S_WAIT_DMA 0x0200 #define RVT_S_WAIT_PIO 0x0400 -#define RVT_S_WAIT_TX 0x0800 -#define RVT_S_WAIT_DMA_DESC 0x1000 -#define RVT_S_WAIT_KMEM 0x2000 -#define RVT_S_WAIT_PSN 0x4000 -#define RVT_S_WAIT_ACK 0x8000 -#define RVT_S_SEND_ONE 0x10000 -#define RVT_S_UNLIMITED_CREDIT 0x20000 -#define RVT_S_AHG_VALID 0x40000 -#define RVT_S_AHG_CLEAR 0x80000 -#define RVT_S_ECN 0x100000 +#define RVT_S_WAIT_PIO_DRAIN 0x0800 +#define RVT_S_WAIT_TX 0x1000 +#define RVT_S_WAIT_DMA_DESC 0x2000 +#define RVT_S_WAIT_KMEM 0x4000 +#define RVT_S_WAIT_PSN 0x8000 +#define RVT_S_WAIT_ACK 0x10000 +#define RVT_S_SEND_ONE 0x20000 +#define RVT_S_UNLIMITED_CREDIT 0x40000 +#define RVT_S_AHG_VALID 0x80000 +#define RVT_S_AHG_CLEAR 0x100000 +#define RVT_S_ECN 0x200000 /* * Wait flags that would prevent any packet type from being sent. -- cgit v1.2.3 From ef086c0d5dd9a151578c72b6f257e5b0e77d65eb Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 Mar 2016 11:35:08 -0800 Subject: IB/hfi1: Report pid in qp_stats to aid debug Tracking user/QP ownership is needed to debug issues with user ULPs like OpenMPI. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 1 + drivers/staging/rdma/hfi1/qp.c | 5 +++-- include/rdma/rdmavt_qp.h | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ef82abf2d89e..de34474b0dfb 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -786,6 +786,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, goto bail_ip; } } + qp->pid = current->pid; } spin_lock(&rdi->n_qps_lock); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 9e831a162f19..6f8571518ad1 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -678,7 +678,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u CQ %u %u\n", + "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u CQ %u %u PID %d\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -712,7 +712,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) send_context, send_context ? send_context->sw_index : 0, ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, - ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail); + ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, + qp->pid); } void qp_comm_est(struct rvt_qp *qp) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f2f4df023aaa..497e59065c2c 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -251,6 +251,7 @@ struct rvt_qp { enum ib_mtu path_mtu; int srate_mbps; /* s_srate (below) converted to Mbit/s */ + pid_t pid; /* pid for user mode QPs */ u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ -- cgit v1.2.3 From fb532d6a79b96a4c8f678024d7ed3549ff0ca916 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 Feb 2016 10:25:25 +0200 Subject: IB/{core, ulp} Support above 32 possible device capability flags The old bitwise device_cap_flags variable was limited to u32 which has all bits already defined. In order to overcome it, we converted device_cap_flags variable to be u64 type. Signed-off-by: Leon Romanovsky Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- include/rdma/ib_verbs.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index bb1b93c0a016..6fdc7ecdaca0 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -402,7 +402,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file, resp->hw_ver = attr->hw_ver; resp->max_qp = attr->max_qp; resp->max_qp_wr = attr->max_qp_wr; - resp->device_cap_flags = attr->device_cap_flags; + resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); resp->max_sge = attr->max_sge; resp->max_sge_rd = attr->max_sge_rd; resp->max_cq = attr->max_cq; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 85be0de3ab26..caec8e9c4666 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -388,7 +388,7 @@ struct ipoib_dev_priv { struct dentry *mcg_dentry; struct dentry *path_dentry; #endif - int hca_caps; + u64 hca_caps; struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; unsigned max_send_sge; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3a03c1d18afa..c9b62344d22e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -274,7 +274,7 @@ struct ib_device_attr { u32 hw_ver; int max_qp; int max_qp_wr; - int device_cap_flags; + u64 device_cap_flags; int max_sge; int max_sge_rd; int max_cq; -- cgit v1.2.3 From fad61ad4e755f5dd13c7702a87cd907207392534 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:36 +0200 Subject: IB/core: Add subnet prefix to port info The subnet prefix is a part of the port_info MAD returned and should be available at the ib_port_attr struct. We define it here and provide a default implementation in case the hardware driver does not provide one. The subnet prefix is required when creating the address vector to access the SA in networks where GRH must be used. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 15 ++++++++++++++- include/rdma/ib_verbs.h | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 270c7ff6cba7..10979844026a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -650,10 +650,23 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { + union ib_gid gid; + int err; + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; - return device->query_port(device, port_num, port_attr); + memset(port_attr, 0, sizeof(*port_attr)); + err = device->query_port(device, port_num, port_attr); + if (err || port_attr->subnet_prefix) + return err; + + err = ib_query_gid(device, port_num, 0, &gid, NULL); + if (err) + return err; + + port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); + return 0; } EXPORT_SYMBOL(ib_query_port); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c9b62344d22e..7239b9a4499e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -490,6 +490,7 @@ union rdma_protocol_stats { | RDMA_CORE_CAP_OPA_MAD) struct ib_port_attr { + u64 subnet_prefix; enum ib_port_state state; enum ib_mtu max_mtu; enum ib_mtu active_mtu; -- cgit v1.2.3 From a0c1b2a3508714281f604db818fa0cc83c2f9b6a Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:37 +0200 Subject: IB/core: Support accessing SA in virtualized environment Per the ongoing standardisation process, when virtual HCAs are present in a network, traffic is routed based on a destination GID. In order to access the SA we use the well known SA GID. We also add a GRH required boolean field to the port attributes which is used to report to the verbs consumer whether this port is connected to a virtual network. We use this field to realize whether we need to create an address vector with GRH to access the subnet administrator. We clear the port attributes struct before calling the hardware driver to make sure the default remains that GRH is not required. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford --- drivers/infiniband/core/sa_query.c | 5 +++++ include/rdma/ib_verbs.h | 6 ++++++ 2 files changed, 11 insertions(+) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8e3bf6c8d3c3..d2214a55ac4a 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -885,6 +885,11 @@ static void update_sm_ah(struct work_struct *work) ah_attr.dlid = port_attr.sm_lid; ah_attr.sl = port_attr.sm_sl; ah_attr.port_num = port->port_num; + if (port_attr.grh_required) { + ah_attr.ah_flags = IB_AH_GRH; + ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix); + ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID); + } new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); if (IS_ERR(new_ah->ah)) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7239b9a4499e..3a5a66b7a33f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -97,6 +97,11 @@ enum rdma_node_type { RDMA_NODE_USNIC_UDP, }; +enum { + /* set the local administered indication */ + IB_SA_WELL_KNOWN_GUID = BIT_ULL(57) | 2, +}; + enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP, @@ -510,6 +515,7 @@ struct ib_port_attr { u8 active_width; u8 active_speed; u8 phys_state; + bool grh_required; }; enum ib_device_modify_flags { -- cgit v1.2.3 From 50174a7f2c24d13cdeec435ee1ba70b1e0b1318f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:38 +0200 Subject: IB/core: Add interfaces to control VF attributes Following the practice exercised for network devices which allow the PF net device to configure attributes of its virtual functions, we introduce the following functions to be used by IPoIB which is the network driver implementation for IB devices. ib_set_vf_link_state - set the policy for a VF link. More below. ib_get_vf_config - read configuration information of a VF ib_get_vf_stats - read VF statistics ib_set_vf_guid - set the node or port GUID of a VF Also add an indication in the device cap flags that indicates that this IB devices is based on a virtual function. A VF shares the physical port with the PF and other VFs. When setting the link state we have three options: 1. Auto - in this mode, the virtual port follows the state of the physical port and becomes active only if the physical port's state is active. In all other cases it remains in a Down state. 2. Down - sets the state of the virtual port to Down 3. Up - causes the virtual port to transition into Initialize state if it was not already in this state. A virtualization aware subnet manager can then bring the state of the port into the Active state. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 40 ++++++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 19 +++++++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5cd1e3987f2b..15b8adbf39c0 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1551,6 +1551,46 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, } EXPORT_SYMBOL(ib_check_mr_status); +int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, + int state) +{ + if (!device->set_vf_link_state) + return -ENOSYS; + + return device->set_vf_link_state(device, vf, port, state); +} +EXPORT_SYMBOL(ib_set_vf_link_state); + +int ib_get_vf_config(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *info) +{ + if (!device->get_vf_config) + return -ENOSYS; + + return device->get_vf_config(device, vf, port, info); +} +EXPORT_SYMBOL(ib_get_vf_config); + +int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats) +{ + if (!device->get_vf_stats) + return -ENOSYS; + + return device->get_vf_stats(device, vf, port, stats); +} +EXPORT_SYMBOL(ib_get_vf_stats); + +int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, + int type) +{ + if (!device->set_vf_guid) + return -ENOSYS; + + return device->set_vf_guid(device, vf, port, guid, type); +} +EXPORT_SYMBOL(ib_set_vf_guid); + /** * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list * and set it the memory region. diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3a5a66b7a33f..8a245a7f981a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -56,6 +56,7 @@ #include #include +#include #include #include #include @@ -218,6 +219,7 @@ enum ib_device_cap_flags { IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), IB_DEVICE_SG_GAPS_REG = (1ULL << 32), + IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33), }; enum ib_signature_prot_cap { @@ -1867,6 +1869,14 @@ struct ib_device { void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); void (*drain_rq)(struct ib_qp *qp); void (*drain_sq)(struct ib_qp *qp); + int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, + int state); + int (*get_vf_config)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *ivf); + int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats); + int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, + int type); struct ib_dma_mapping_ops *dma_ops; @@ -2310,6 +2320,15 @@ int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid, struct ib_gid_attr *attr); +int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, + int state); +int ib_get_vf_config(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *info); +int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats); +int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, + int type); + int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); -- cgit v1.2.3