diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-17 22:44:48 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-17 22:44:48 +0300 |
commit | 9bd553929f68921be0f2014dd06561e0c8249a0d (patch) | |
tree | 720e556374e3500af9a0210178fabfc6bd0f754c /drivers/infiniband/core/uverbs_ioctl.c | |
parent | 022ff62c3d8c3758d15ccc6b58615fd8f257ba85 (diff) | |
parent | 0a3173a5f09bc58a3638ecfd0a80bdbae55e123c (diff) | |
download | linux-9bd553929f68921be0f2014dd06561e0c8249a0d.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This has been a large cycle for RDMA, with several major patch series
reworking parts of the core code.
- Rework the so-called 'gid cache' and internal APIs to use a kref'd
pointer to a struct instead of copying, push this upwards into the
callers and add more stuff to the struct. The new design avoids
some ugly races the old one suffered with. This is part of the
namespace enablement work as the new struct is learning to be
namespace aware.
- Various uapi cleanups, moving more stuff to include/uapi and fixing
some long standing bugs that have recently been discovered.
- Driver updates for mlx5, mlx4 i40iw, rxe, cxgb4, hfi1, usnic,
pvrdma, and hns
- Provide max_send_sge and max_recv_sge attributes to better support
HW where these values are asymmetric.
- mlx5 user API 'devx' allows sending commands directly to the device
FW, instead of trying to cram every wild and niche feature into the
common API. Sort of like what GPU does.
- Major write() and ioctl() API rework to cleanly support PCI device
hot unplug and advance the ioctl conversion work
- Sparse and compile warning cleanups
- Add 'const' to the ib_poll_cq() signature, and permit a NULL
'bad_wr', which is the common use case
- Various patches to avoid high order allocations across the stack
- SRQ support for cxgb4, hns and qedr
- Changes to IPoIB to better follow the netdev model for working with
struct net_device liftime"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (312 commits)
Revert "net/smc: Replace ib_query_gid with rdma_get_gid_attr"
RDMA/hns: Fix usage of bitmap allocation functions return values
IB/core: Change filter function return type from int to bool
IB/core: Update GID entries for netdevice whose mac address changes
IB/core: Add default GIDs of the bond master netdev
IB/core: Consider adding default GIDs of bond device
IB/core: Delete lower netdevice default GID entries in bonding scenario
IB/core: Avoid confusing del_netdev_default_ips
IB/core: Add comment for change upper netevent handling
qedr: Add user space support for SRQ
qedr: Add support for kernel mode SRQ's
qedr: Add wrapping generic structure for qpidr and adjust idr routines.
IB/mlx5: Fix leaking stack memory to userspace
Update the e-mail address of Bart Van Assche
IB/ucm: Fix compiling ucm.c
IB/uverbs: Do not check for device disassociation during ioctl
IB/uverbs: Remove struct uverbs_root_spec and all supporting code
IB/uverbs: Use uverbs_api to unmarshal ioctl commands
IB/uverbs: Use uverbs_alloc for allocations
IB/uverbs: Add a simple allocator to uverbs_attr_bundle
...
Diffstat (limited to 'drivers/infiniband/core/uverbs_ioctl.c')
-rw-r--r-- | drivers/infiniband/core/uverbs_ioctl.c | 709 |
1 files changed, 450 insertions, 259 deletions
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 8d32c4ae368c..1a6b229e3db3 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -35,6 +35,103 @@ #include "rdma_core.h" #include "uverbs.h" +struct bundle_alloc_head { + struct bundle_alloc_head *next; + u8 data[]; +}; + +struct bundle_priv { + /* Must be first */ + struct bundle_alloc_head alloc_head; + struct bundle_alloc_head *allocated_mem; + size_t internal_avail; + size_t internal_used; + + struct radix_tree_root *radix; + const struct uverbs_api_ioctl_method *method_elm; + void __rcu **radix_slots; + unsigned long radix_slots_len; + u32 method_key; + + struct ib_uverbs_attr __user *user_attrs; + struct ib_uverbs_attr *uattrs; + + DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); + + /* + * Must be last. bundle ends in a flex array which overlaps + * internal_buffer. + */ + struct uverbs_attr_bundle bundle; + u64 internal_buffer[32]; +}; + +/* + * Each method has an absolute minimum amount of memory it needs to allocate, + * precompute that amount and determine if the onstack memory can be used or + * if allocation is need. + */ +void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, + unsigned int num_attrs) +{ + struct bundle_priv *pbundle; + size_t bundle_size = + offsetof(struct bundle_priv, internal_buffer) + + sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len + + sizeof(*pbundle->uattrs) * num_attrs; + + method_elm->use_stack = bundle_size <= sizeof(*pbundle); + method_elm->bundle_size = + ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer)); + + /* Do not want order-2 allocations for this. */ + WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE); +} + +/** + * uverbs_alloc() - Quickly allocate memory for use with a bundle + * @bundle: The bundle + * @size: Number of bytes to allocate + * @flags: Allocator flags + * + * The bundle allocator is intended for allocations that are connected with + * processing the system call related to the bundle. The allocated memory is + * always freed once the system call completes, and cannot be freed any other + * way. + * + * This tries to use a small pool of pre-allocated memory for performance. + */ +__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, + gfp_t flags) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + size_t new_used; + void *res; + + if (check_add_overflow(size, pbundle->internal_used, &new_used)) + return ERR_PTR(-EOVERFLOW); + + if (new_used > pbundle->internal_avail) { + struct bundle_alloc_head *buf; + + buf = kvmalloc(struct_size(buf, data, size), flags); + if (!buf) + return ERR_PTR(-ENOMEM); + buf->next = pbundle->allocated_mem; + pbundle->allocated_mem = buf; + return buf->data; + } + + res = (void *)pbundle->internal_buffer + pbundle->internal_used; + pbundle->internal_used = + ALIGN(new_used, sizeof(*pbundle->internal_buffer)); + if (flags & __GFP_ZERO) + memset(res, 0, size); + return res; +} +EXPORT_SYMBOL(_uverbs_alloc); + static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, u16 len) { @@ -46,45 +143,24 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, 0, uattr->len - len); } -static int uverbs_process_attr(struct ib_device *ibdev, - struct ib_ucontext *ucontext, - const struct ib_uverbs_attr *uattr, - u16 attr_id, - const struct uverbs_attr_spec_hash *attr_spec_bucket, - struct uverbs_attr_bundle_hash *attr_bundle_h, - struct ib_uverbs_attr __user *uattr_ptr) +static int uverbs_process_attr(struct bundle_priv *pbundle, + const struct uverbs_api_attr *attr_uapi, + struct ib_uverbs_attr *uattr, u32 attr_bkey) { - const struct uverbs_attr_spec *spec; - const struct uverbs_attr_spec *val_spec; - struct uverbs_attr *e; - const struct uverbs_object_spec *object; + const struct uverbs_attr_spec *spec = &attr_uapi->spec; + struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey]; + const struct uverbs_attr_spec *val_spec = spec; struct uverbs_obj_attr *o_attr; - struct uverbs_attr *elements = attr_bundle_h->attrs; - - if (attr_id >= attr_spec_bucket->num_attrs) { - if (uattr->flags & UVERBS_ATTR_F_MANDATORY) - return -EINVAL; - else - return 0; - } - - if (test_bit(attr_id, attr_bundle_h->valid_bitmap)) - return -EINVAL; - - spec = &attr_spec_bucket->attrs[attr_id]; - val_spec = spec; - e = &elements[attr_id]; - e->uattr = uattr_ptr; switch (spec->type) { case UVERBS_ATTR_TYPE_ENUM_IN: - if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems) + if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems) return -EOPNOTSUPP; if (uattr->attr_data.enum_data.reserved) return -EINVAL; - val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id]; + val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id]; /* Currently we only support PTR_IN based enums */ if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN) @@ -98,64 +174,75 @@ static int uverbs_process_attr(struct ib_device *ibdev, * longer struct will fail here if used with an old kernel and * non-zero content, making ABI compat/discovery simpler. */ - if (uattr->len > val_spec->ptr.len && - val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO && - !uverbs_is_attr_cleared(uattr, val_spec->ptr.len)) + if (uattr->len > val_spec->u.ptr.len && + val_spec->zero_trailing && + !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len)) return -EOPNOTSUPP; /* fall through */ case UVERBS_ATTR_TYPE_PTR_OUT: - if (uattr->len < val_spec->ptr.min_len || - (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) && - uattr->len > val_spec->ptr.len)) + if (uattr->len < val_spec->u.ptr.min_len || + (!val_spec->zero_trailing && + uattr->len > val_spec->u.ptr.len)) return -EINVAL; if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN && uattr->attr_data.reserved) return -EINVAL; - e->ptr_attr.data = uattr->data; + e->ptr_attr.uattr_idx = uattr - pbundle->uattrs; e->ptr_attr.len = uattr->len; - e->ptr_attr.flags = uattr->flags; + + if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) { + void *p; + + p = uverbs_alloc(&pbundle->bundle, uattr->len); + if (IS_ERR(p)) + return PTR_ERR(p); + + e->ptr_attr.ptr = p; + + if (copy_from_user(p, u64_to_user_ptr(uattr->data), + uattr->len)) + return -EFAULT; + } else { + e->ptr_attr.data = uattr->data; + } break; case UVERBS_ATTR_TYPE_IDR: - if (uattr->data >> 32) - return -EINVAL; - /* fall through */ case UVERBS_ATTR_TYPE_FD: if (uattr->attr_data.reserved) return -EINVAL; - if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX) + if (uattr->len != 0) return -EINVAL; o_attr = &e->obj_attr; - object = uverbs_get_object(ibdev, spec->obj.obj_type); - if (!object) - return -EINVAL; - o_attr->type = object->type_attrs; - - o_attr->id = (int)uattr->data; - o_attr->uobject = uverbs_get_uobject_from_context( - o_attr->type, - ucontext, - spec->obj.access, - o_attr->id); + o_attr->attr_elm = attr_uapi; + /* + * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and + * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64 + * here without caring about truncation as we know that the + * IDR implementation today rejects negative IDs + */ + o_attr->uobject = uverbs_get_uobject_from_file( + spec->u.obj.obj_type, + pbundle->bundle.ufile, + spec->u.obj.access, + uattr->data_s64); if (IS_ERR(o_attr->uobject)) return PTR_ERR(o_attr->uobject); + __set_bit(attr_bkey, pbundle->uobj_finalize); - if (spec->obj.access == UVERBS_ACCESS_NEW) { - u64 id = o_attr->uobject->id; + if (spec->u.obj.access == UVERBS_ACCESS_NEW) { + unsigned int uattr_idx = uattr - pbundle->uattrs; + s64 id = o_attr->uobject->id; /* Copy the allocated id to the user-space */ - if (put_user(id, &e->uattr->data)) { - uverbs_finalize_object(o_attr->uobject, - UVERBS_ACCESS_NEW, - false); + if (put_user(id, &pbundle->user_attrs[uattr_idx].data)) return -EFAULT; - } } break; @@ -163,220 +250,225 @@ static int uverbs_process_attr(struct ib_device *ibdev, return -EOPNOTSUPP; } - set_bit(attr_id, attr_bundle_h->valid_bitmap); return 0; } -static int uverbs_uattrs_process(struct ib_device *ibdev, - struct ib_ucontext *ucontext, - const struct ib_uverbs_attr *uattrs, - size_t num_uattrs, - const struct uverbs_method_spec *method, - struct uverbs_attr_bundle *attr_bundle, - struct ib_uverbs_attr __user *uattr_ptr) +/* + * We search the radix tree with the method prefix and now we want to fast + * search the suffix bits to get a particular attribute pointer. It is not + * totally clear to me if this breaks the radix tree encasulation or not, but + * it uses the iter data to determine if the method iter points at the same + * chunk that will store the attribute, if so it just derefs it directly. By + * construction in most kernel configs the method and attrs will all fit in a + * single radix chunk, so in most cases this will have no search. Other cases + * this falls back to a full search. + */ +static void __rcu **uapi_get_attr_for_method(struct bundle_priv *pbundle, + u32 attr_key) { - size_t i; - int ret = 0; - int num_given_buckets = 0; - - for (i = 0; i < num_uattrs; i++) { - const struct ib_uverbs_attr *uattr = &uattrs[i]; - u16 attr_id = uattr->attr_id; - struct uverbs_attr_spec_hash *attr_spec_bucket; - - ret = uverbs_ns_idx(&attr_id, method->num_buckets); - if (ret < 0) { - if (uattr->flags & UVERBS_ATTR_F_MANDATORY) { - uverbs_finalize_objects(attr_bundle, - method->attr_buckets, - num_given_buckets, - false); - return ret; - } - continue; - } + void __rcu **slot; - /* - * ret is the found ns, so increase num_given_buckets if - * necessary. - */ - if (ret >= num_given_buckets) - num_given_buckets = ret + 1; - - attr_spec_bucket = method->attr_buckets[ret]; - ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id, - attr_spec_bucket, &attr_bundle->hash[ret], - uattr_ptr++); - if (ret) { - uverbs_finalize_objects(attr_bundle, - method->attr_buckets, - num_given_buckets, - false); - return ret; - } + if (likely(attr_key < pbundle->radix_slots_len)) { + void *entry; + + slot = pbundle->radix_slots + attr_key; + entry = rcu_dereference_raw(*slot); + if (likely(!radix_tree_is_internal_node(entry) && entry)) + return slot; } - return num_given_buckets; + return radix_tree_lookup_slot(pbundle->radix, + pbundle->method_key | attr_key); } -static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec, - struct uverbs_attr_bundle *attr_bundle) +static int uverbs_set_attr(struct bundle_priv *pbundle, + struct ib_uverbs_attr *uattr) { - unsigned int i; - - for (i = 0; i < attr_bundle->num_buckets; i++) { - struct uverbs_attr_spec_hash *attr_spec_bucket = - method_spec->attr_buckets[i]; + u32 attr_key = uapi_key_attr(uattr->attr_id); + u32 attr_bkey = uapi_bkey_attr(attr_key); + const struct uverbs_api_attr *attr; + void __rcu **slot; + int ret; - if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask, - attr_bundle->hash[i].valid_bitmap, - attr_spec_bucket->num_attrs)) - return -EINVAL; + slot = uapi_get_attr_for_method(pbundle, attr_key); + if (!slot) { + /* + * Kernel does not support the attribute but user-space says it + * is mandatory + */ + if (uattr->flags & UVERBS_ATTR_F_MANDATORY) + return -EPROTONOSUPPORT; + return 0; } + attr = srcu_dereference( + *slot, &pbundle->bundle.ufile->device->disassociate_srcu); - for (; i < method_spec->num_buckets; i++) { - struct uverbs_attr_spec_hash *attr_spec_bucket = - method_spec->attr_buckets[i]; + /* Reject duplicate attributes from user-space */ + if (test_bit(attr_bkey, pbundle->bundle.attr_present)) + return -EINVAL; - if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask, - attr_spec_bucket->num_attrs)) - return -EINVAL; - } + ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey); + if (ret) + return ret; + + __set_bit(attr_bkey, pbundle->bundle.attr_present); return 0; } -static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, - const struct ib_uverbs_attr *uattrs, - size_t num_uattrs, - struct ib_device *ibdev, - struct ib_uverbs_file *ufile, - const struct uverbs_method_spec *method_spec, - struct uverbs_attr_bundle *attr_bundle) +static int ib_uverbs_run_method(struct bundle_priv *pbundle, + unsigned int num_attrs) { + int (*handler)(struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); + size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs); + unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey; + unsigned int i; int ret; - int finalize_ret; - int num_given_buckets; - num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs, - num_uattrs, method_spec, - attr_bundle, uattr_ptr); - if (num_given_buckets <= 0) + /* See uverbs_disassociate_api() */ + handler = srcu_dereference( + pbundle->method_elm->handler, + &pbundle->bundle.ufile->device->disassociate_srcu); + if (!handler) + return -EIO; + + pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size); + if (IS_ERR(pbundle->uattrs)) + return PTR_ERR(pbundle->uattrs); + if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size)) + return -EFAULT; + + for (i = 0; i != num_attrs; i++) { + ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]); + if (unlikely(ret)) + return ret; + } + + /* User space did not provide all the mandatory attributes */ + if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory, + pbundle->bundle.attr_present, + pbundle->method_elm->key_bitmap_len))) return -EINVAL; - attr_bundle->num_buckets = num_given_buckets; - ret = uverbs_validate_kernel_mandatory(method_spec, attr_bundle); - if (ret) - goto cleanup; + if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) { + struct uverbs_obj_attr *destroy_attr = + &pbundle->bundle.attrs[destroy_bkey].obj_attr; - ret = method_spec->handler(ibdev, ufile, attr_bundle); -cleanup: - finalize_ret = uverbs_finalize_objects(attr_bundle, - method_spec->attr_buckets, - attr_bundle->num_buckets, - !ret); + ret = uobj_destroy(destroy_attr->uobject); + if (ret) + return ret; + __clear_bit(destroy_bkey, pbundle->uobj_finalize); - return ret ? ret : finalize_ret; -} + ret = handler(pbundle->bundle.ufile, &pbundle->bundle); + uobj_put_destroy(destroy_attr->uobject); + } else { + ret = handler(pbundle->bundle.ufile, &pbundle->bundle); + } -#define UVERBS_OPTIMIZE_USING_STACK_SZ 256 -static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, - struct ib_uverbs_file *file, - struct ib_uverbs_ioctl_hdr *hdr, - void __user *buf) -{ - const struct uverbs_object_spec *object_spec; - const struct uverbs_method_spec *method_spec; - long err = 0; - unsigned int i; - struct { - struct ib_uverbs_attr *uattrs; - struct uverbs_attr_bundle *uverbs_attr_bundle; - } *ctx = NULL; - struct uverbs_attr *curr_attr; - unsigned long *curr_bitmap; - size_t ctx_size; - uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)]; - - if (hdr->driver_id != ib_dev->driver_id) + /* + * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can + * not invoke the method because the request is not supported. No + * other cases should return this code. + */ + if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT)) return -EINVAL; - object_spec = uverbs_get_object(ib_dev, hdr->object_id); - if (!object_spec) - return -EPROTONOSUPPORT; + return ret; +} - method_spec = uverbs_get_method(object_spec, hdr->method_id); - if (!method_spec) - return -EPROTONOSUPPORT; +static int bundle_destroy(struct bundle_priv *pbundle, bool commit) +{ + unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len; + struct bundle_alloc_head *memblock; + unsigned int i; + int ret = 0; - if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext) - return -EINVAL; + i = -1; + while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, + i + 1)) < key_bitmap_len) { + struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; + int current_ret; + + current_ret = uverbs_finalize_object( + attr->obj_attr.uobject, + attr->obj_attr.attr_elm->spec.u.obj.access, commit); + if (!ret) + ret = current_ret; + } - ctx_size = sizeof(*ctx) + - sizeof(struct uverbs_attr_bundle) + - sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets + - sizeof(*ctx->uattrs) * hdr->num_attrs + - sizeof(*ctx->uverbs_attr_bundle->hash[0].attrs) * - method_spec->num_child_attrs + - sizeof(*ctx->uverbs_attr_bundle->hash[0].valid_bitmap) * - (method_spec->num_child_attrs / BITS_PER_LONG + - method_spec->num_buckets); - - if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ) - ctx = (void *)data; - if (!ctx) - ctx = kmalloc(ctx_size, GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - ctx->uverbs_attr_bundle = (void *)ctx + sizeof(*ctx); - ctx->uattrs = (void *)(ctx->uverbs_attr_bundle + 1) + - (sizeof(ctx->uverbs_attr_bundle->hash[0]) * - method_spec->num_buckets); - curr_attr = (void *)(ctx->uattrs + hdr->num_attrs); - curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs); + for (memblock = pbundle->allocated_mem; memblock;) { + struct bundle_alloc_head *tmp = memblock; - /* - * We just fill the pointers and num_attrs here. The data itself will be - * filled at a later stage (uverbs_process_attr) - */ - for (i = 0; i < method_spec->num_buckets; i++) { - unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs; - - ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr; - curr_attr += curr_num_attrs; - ctx->uverbs_attr_bundle->hash[i].num_attrs = curr_num_attrs; - ctx->uverbs_attr_bundle->hash[i].valid_bitmap = curr_bitmap; - bitmap_zero(curr_bitmap, curr_num_attrs); - curr_bitmap += BITS_TO_LONGS(curr_num_attrs); + memblock = memblock->next; + kvfree(tmp); } - err = copy_from_user(ctx->uattrs, buf, - sizeof(*ctx->uattrs) * hdr->num_attrs); - if (err) { - err = -EFAULT; - goto out; - } + return ret; +} - err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev, - file, method_spec, ctx->uverbs_attr_bundle); +static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, + struct ib_uverbs_ioctl_hdr *hdr, + struct ib_uverbs_attr __user *user_attrs) +{ + const struct uverbs_api_ioctl_method *method_elm; + struct uverbs_api *uapi = ufile->device->uapi; + struct radix_tree_iter attrs_iter; + struct bundle_priv *pbundle; + struct bundle_priv onstack; + void __rcu **slot; + int destroy_ret; + int ret; - /* - * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can - * not invoke the method because the request is not supported. No - * other cases should return this code. - */ - if (unlikely(err == -EPROTONOSUPPORT)) { - WARN_ON_ONCE(err == -EPROTONOSUPPORT); - err = -EINVAL; + if (unlikely(hdr->driver_id != uapi->driver_id)) + return -EINVAL; + + slot = radix_tree_iter_lookup( + &uapi->radix, &attrs_iter, + uapi_key_obj(hdr->object_id) | + uapi_key_ioctl_method(hdr->method_id)); + if (unlikely(!slot)) + return -EPROTONOSUPPORT; + method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu); + + if (!method_elm->use_stack) { + pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL); + if (!pbundle) + return -ENOMEM; + pbundle->internal_avail = + method_elm->bundle_size - + offsetof(struct bundle_priv, internal_buffer); + pbundle->alloc_head.next = NULL; + pbundle->allocated_mem = &pbundle->alloc_head; + } else { + pbundle = &onstack; + pbundle->internal_avail = sizeof(pbundle->internal_buffer); + pbundle->allocated_mem = NULL; } -out: - if (ctx != (void *)data) - kfree(ctx); - return err; -} -#define IB_UVERBS_MAX_CMD_SZ 4096 + /* Space for the pbundle->bundle.attrs flex array */ + pbundle->method_elm = method_elm; + pbundle->method_key = attrs_iter.index; + pbundle->bundle.ufile = ufile; + pbundle->radix = &uapi->radix; + pbundle->radix_slots = slot; + pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter); + pbundle->user_attrs = user_attrs; + + pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len * + sizeof(*pbundle->bundle.attrs), + sizeof(*pbundle->internal_buffer)); + memset(pbundle->bundle.attr_present, 0, + sizeof(pbundle->bundle.attr_present)); + memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); + + ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); + destroy_ret = bundle_destroy(pbundle, ret == 0); + if (unlikely(destroy_ret && !ret)) + return destroy_ret; + + return ret; +} long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -384,39 +476,138 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct ib_uverbs_ioctl_hdr __user *user_hdr = (struct ib_uverbs_ioctl_hdr __user *)arg; struct ib_uverbs_ioctl_hdr hdr; - struct ib_device *ib_dev; int srcu_key; - long err; + int err; + + if (unlikely(cmd != RDMA_VERBS_IOCTL)) + return -ENOIOCTLCMD; + + err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + if (err) + return -EFAULT; + + if (hdr.length > PAGE_SIZE || + hdr.length != struct_size(&hdr, attrs, hdr.num_attrs)) + return -EINVAL; + + if (hdr.reserved1 || hdr.reserved2) + return -EPROTONOSUPPORT; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - err = -EIO; - goto out; + err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs); + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + return err; +} + +int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + const struct uverbs_attr *attr; + u64 flags; + + attr = uverbs_attr_get(attrs_bundle, idx); + /* Missing attribute means 0 flags */ + if (IS_ERR(attr)) { + *to = 0; + return 0; } - if (cmd == RDMA_VERBS_IOCTL) { - err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + /* + * New userspace code should use 8 bytes to pass flags, but we + * transparently support old userspaces that were using 4 bytes as + * well. + */ + if (attr->ptr_attr.len == 8) + flags = attr->ptr_attr.data; + else if (attr->ptr_attr.len == 4) + flags = *(u32 *)&attr->ptr_attr.data; + else + return -EINVAL; - if (err || hdr.length > IB_UVERBS_MAX_CMD_SZ || - hdr.length != sizeof(hdr) + hdr.num_attrs * sizeof(struct ib_uverbs_attr)) { - err = -EINVAL; - goto out; - } + if (flags & ~allowed_bits) + return -EINVAL; - if (hdr.reserved1 || hdr.reserved2) { - err = -EPROTONOSUPPORT; - goto out; - } + *to = flags; + return 0; +} +EXPORT_SYMBOL(uverbs_get_flags64); - err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, - (__user void *)arg + sizeof(hdr)); +int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + u64 flags; + int ret; + + ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits); + if (ret) + return ret; + + if (flags > U32_MAX) + return -EINVAL; + *to = flags; + + return 0; +} +EXPORT_SYMBOL(uverbs_get_flags32); + +/* + * This is for ease of conversion. The purpose is to convert all drivers to + * use uverbs_attr_bundle instead of ib_udata. Assume attr == 0 is input and + * attr == 1 is output. + */ +void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + const struct uverbs_attr *uhw_in = + uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN); + const struct uverbs_attr *uhw_out = + uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT); + + if (!IS_ERR(uhw_in)) { + udata->inlen = uhw_in->ptr_attr.len; + if (uverbs_attr_ptr_is_inline(uhw_in)) + udata->inbuf = + &pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx] + .data; + else + udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data); } else { - err = -ENOIOCTLCMD; + udata->inbuf = NULL; + udata->inlen = 0; } -out: - srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); - return err; + if (!IS_ERR(uhw_out)) { + udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data); + udata->outlen = uhw_out->ptr_attr.len; + } else { + udata->outbuf = NULL; + udata->outlen = 0; + } +} + +int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx, + const void *from, size_t size) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + u16 flags; + size_t min_size; + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + min_size = min_t(size_t, attr->ptr_attr.len, size); + if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size)) + return -EFAULT; + + flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags | + UVERBS_ATTR_F_VALID_OUTPUT; + if (put_user(flags, + &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags)) + return -EFAULT; + + return 0; } +EXPORT_SYMBOL(uverbs_copy_to); |