diff options
author | Moni Shoua <monis@mellanox.com> | 2016-06-16 16:45:23 +0300 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-08-04 18:13:12 +0300 |
commit | 8700e3e7c4857d28ebaa824509934556da0b3e76 (patch) | |
tree | 16df6bc7f4764311d5ec0c6a311eaf3b95a269e3 /drivers/infiniband/sw/rxe/rxe_mmap.c | |
parent | 33688abb2802ff3a230bd2441f765477b94cc89e (diff) | |
download | linux-8700e3e7c4857d28ebaa824509934556da0b3e76.tar.xz |
Soft RoCE driver
Soft RoCE (RXE) - The software RoCE driver
ib_rxe implements the RDMA transport and registers to the RDMA core
device as a kernel verbs provider. It also implements the packet IO
layer. On the other hand ib_rxe registers to the Linux netdev stack
as a udp encapsulating protocol, in that case RDMA, for sending and
receiving packets over any Ethernet device. This yields a RDMA
transport over the UDP/Ethernet network layer forming a RoCEv2
compatible device.
The configuration procedure of the Soft RoCE drivers requires
binding to any existing Ethernet network device. This is done with
/sys interface.
A userspace Soft RoCE library (librxe) provides user applications
the ability to run with Soft RoCE devices. The use of rxe verbs ins
user space requires the inclusion of librxe as a device specifics
plug-in to libibverbs. librxe is packaged separately.
Architecture:
+-----------------------------------------------------------+
| Application |
+-----------------------------------------------------------+
+-----------------------------------+
| libibverbs |
User +-----------------------------------+
+----------------+ +----------------+
| librxe | | HW RoCE lib |
+----------------+ +----------------+
+---------------------------------------------------------------+
+--------------+ +------------+
| Sockets | | RDMA ULP |
+--------------+ +------------+
+--------------+ +---------------------+
| TCP/IP | | ib_core |
+--------------+ +---------------------+
+------------+ +----------------+
Kernel | ib_rxe | | HW RoCE driver |
+------------+ +----------------+
+------------------------------------+
| NIC driver |
+------------------------------------+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------------------------------------+
| Application |
+-----------------------------------------------------------+
+-----------------------------------+
| libibverbs |
User +-----------------------------------+
+----------------+ +----------------+
| librxe | | HW RoCE lib |
+----------------+ +----------------+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------+ +------------+
| Sockets | | RDMA ULP |
+--------------+ +------------+
+--------------+ +---------------------+
| TCP/IP | | ib_core |
+--------------+ +---------------------+
+------------+ +----------------+
Kernel | ib_rxe | | HW RoCE driver |
+------------+ +----------------+
+------------------------------------+
| NIC driver |
+------------------------------------+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Soft RoCE resources:
[1[ https://github.com/SoftRoCE/librxe-dev librxe - source code in
Github
[2] https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home - Soft RoCE
Wiki page
[3] https://github.com/SoftRoCE/librxe-dev - Soft RoCE userspace library
Signed-off-by: Kamal Heib <kamalh@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_mmap.c')
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_mmap.c | 173 |
1 files changed, 173 insertions, 0 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c new file mode 100644 index 000000000000..54b3c7c99eff --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <asm/pgtable.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +void rxe_mmap_release(struct kref *ref) +{ + struct rxe_mmap_info *ip = container_of(ref, + struct rxe_mmap_info, ref); + struct rxe_dev *rxe = to_rdev(ip->context->device); + + spin_lock_bh(&rxe->pending_lock); + + if (!list_empty(&ip->pending_mmaps)) + list_del(&ip->pending_mmaps); + + spin_unlock_bh(&rxe->pending_lock); + + vfree(ip->obj); /* buf */ + kfree(ip); +} + +/* + * open and close keep track of how many times the memory region is mapped, + * to avoid releasing it. + */ +static void rxe_vma_open(struct vm_area_struct *vma) +{ + struct rxe_mmap_info *ip = vma->vm_private_data; + + kref_get(&ip->ref); +} + +static void rxe_vma_close(struct vm_area_struct *vma) +{ + struct rxe_mmap_info *ip = vma->vm_private_data; + + kref_put(&ip->ref, rxe_mmap_release); +} + +static struct vm_operations_struct rxe_vm_ops = { + .open = rxe_vma_open, + .close = rxe_vma_close, +}; + +/** + * rxe_mmap - create a new mmap region + * @context: the IB user context of the process making the mmap() call + * @vma: the VMA to be initialized + * Return zero if the mmap is OK. Otherwise, return an errno. + */ +int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct rxe_dev *rxe = to_rdev(context->device); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + struct rxe_mmap_info *ip, *pp; + int ret; + + /* + * Search the device's list of objects waiting for a mmap call. + * Normally, this list is very short since a call to create a + * CQ, QP, or SRQ is soon followed by a call to mmap(). + */ + spin_lock_bh(&rxe->pending_lock); + list_for_each_entry_safe(ip, pp, &rxe->pending_mmaps, pending_mmaps) { + if (context != ip->context || (__u64)offset != ip->info.offset) + continue; + + /* Don't allow a mmap larger than the object. */ + if (size > ip->info.size) { + pr_err("mmap region is larger than the object!\n"); + spin_unlock_bh(&rxe->pending_lock); + ret = -EINVAL; + goto done; + } + + goto found_it; + } + pr_warn("unable to find pending mmap info\n"); + spin_unlock_bh(&rxe->pending_lock); + ret = -EINVAL; + goto done; + +found_it: + list_del_init(&ip->pending_mmaps); + spin_unlock_bh(&rxe->pending_lock); + + ret = remap_vmalloc_range(vma, ip->obj, 0); + if (ret) { + pr_err("rxe: err %d from remap_vmalloc_range\n", ret); + goto done; + } + + vma->vm_ops = &rxe_vm_ops; + vma->vm_private_data = ip; + rxe_vma_open(vma); +done: + return ret; +} + +/* + * Allocate information for rxe_mmap + */ +struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, + u32 size, + struct ib_ucontext *context, + void *obj) +{ + struct rxe_mmap_info *ip; + + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) + return NULL; + + size = PAGE_ALIGN(size); + + spin_lock_bh(&rxe->mmap_offset_lock); + + if (rxe->mmap_offset == 0) + rxe->mmap_offset = PAGE_SIZE; + + ip->info.offset = rxe->mmap_offset; + rxe->mmap_offset += size; + + spin_unlock_bh(&rxe->mmap_offset_lock); + + INIT_LIST_HEAD(&ip->pending_mmaps); + ip->info.size = size; + ip->context = context; + ip->obj = obj; + kref_init(&ip->ref); + + return ip; +} |