diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-09 19:02:46 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-09 19:02:46 +0300 |
commit | dce45af5c2e9e85f22578f2f8065f225f5d11764 (patch) | |
tree | e01e7a294586c3074142fb485516ce718a1a82d2 /include | |
parent | 055128ee008b00fba14e3638e7e84fc2cff8d77d (diff) | |
parent | b79656ed44c6865e17bcd93472ec39488bcc4984 (diff) | |
download | linux-dce45af5c2e9e85f22578f2f8065f225f5d11764.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This has been a smaller cycle than normal. One new driver was
accepted, which is unusual, and at least one more driver remains in
review on the list.
Summary:
- Driver fixes for hns, hfi1, nes, rxe, i40iw, mlx5, cxgb4,
vmw_pvrdma
- Many patches from MatthewW converting radix tree and IDR users to
use xarray
- Introduction of tracepoints to the MAD layer
- Build large SGLs at the start for DMA mapping and get the driver to
split them
- Generally clean SGL handling code throughout the subsystem
- Support for restricting RDMA devices to net namespaces for
containers
- Progress to remove object allocation boilerplate code from drivers
- Change in how the mlx5 driver shows representor ports linked to VFs
- mlx5 uapi feature to access the on chip SW ICM memory
- Add a new driver for 'EFA'. This is HW that supports user space
packet processing through QPs in Amazon's cloud"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (186 commits)
RDMA/ipoib: Allow user space differentiate between valid dev_port
IB/core, ipoib: Do not overreact to SM LID change event
RDMA/device: Don't fire uevent before device is fully initialized
lib/scatterlist: Remove leftover from sg_page_iter comment
RDMA/efa: Add driver to Kconfig/Makefile
RDMA/efa: Add the efa module
RDMA/efa: Add EFA verbs implementation
RDMA/efa: Add common command handlers
RDMA/efa: Implement functions that submit and complete admin commands
RDMA/efa: Add the ABI definitions
RDMA/efa: Add the com service API definitions
RDMA/efa: Add the efa_com.h file
RDMA/efa: Add the efa.h header file
RDMA/efa: Add EFA device definitions
RDMA: Add EFA related definitions
RDMA/umem: Remove hugetlb flag
RDMA/bnxt_re: Use core helpers to get aligned DMA address
RDMA/i40iw: Use core helpers to get aligned DMA address within a supported page size
RDMA/verbs: Add a DMA iterator to return aligned contiguous memory blocks
RDMA/umem: Add API to find best driver supported page size in an MR
...
Diffstat (limited to 'include')
25 files changed, 1177 insertions, 228 deletions
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index c2be029b9b53..6c809440f319 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -71,6 +71,13 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, const struct net_device *dev, const char *fmt, ...); +struct ib_device; + +extern __printf(3, 4) +void __dynamic_ibdev_dbg(struct _ddebug *descriptor, + const struct ib_device *ibdev, + const char *fmt, ...); + #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ static struct _ddebug __aligned(8) \ __attribute__((section("__verbose"))) name = { \ @@ -154,6 +161,10 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, _dynamic_func_call(fmt, __dynamic_netdev_dbg, \ dev, fmt, ##__VA_ARGS__) +#define dynamic_ibdev_dbg(dev, fmt, ...) \ + _dynamic_func_call(fmt, __dynamic_ibdev_dbg, \ + dev, fmt, ##__VA_ARGS__) + #define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ _dynamic_func_call_no_desc(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5a39b323c52e..5a27246db883 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -689,7 +689,6 @@ struct mlx5_core_dev { #endif struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; - struct page *clock_info_page; struct mlx5_fw_tracer *tracer; }; diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 40b48e2133cb..15eb85de9226 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -36,6 +36,12 @@ #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) +/* + * Avoids triggering -Wtype-limits compilation warning, + * while using unsigned data types to check a < 0. + */ +#define is_non_negative(a) ((a) > 0 || (a) == 0) +#define is_negative(a) (!(is_non_negative(a))) #ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /* @@ -227,10 +233,10 @@ typeof(d) _d = d; \ u64 _a_full = _a; \ unsigned int _to_shift = \ - _s >= 0 && _s < 8 * sizeof(*d) ? _s : 0; \ + is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \ *_d = (_a_full << _to_shift); \ - (_to_shift != _s || *_d < 0 || _a < 0 || \ - (*_d >> _to_shift) != _a); \ + (_to_shift != _s || is_negative(*_d) || is_negative(_a) || \ + (*_d >> _to_shift) != _a); \ }) /** diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index b4be960c7e5d..30a9a55c28ba 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -340,11 +340,11 @@ int sg_alloc_table_chained(struct sg_table *table, int nents, * sg page iterator * * Iterates over sg entries page-by-page. On each successful iteration, you - * can call sg_page_iter_page(@piter) to get the current page and its dma - * address. @piter->sg will point to the sg holding this page and - * @piter->sg_pgoffset to the page's page offset within the sg. The iteration - * will stop either when a maximum number of sg entries was reached or a - * terminating sg (sg_last(sg) == true) was reached. + * can call sg_page_iter_page(@piter) to get the current page. + * @piter->sg will point to the sg holding this page and @piter->sg_pgoffset to + * the page's page offset within the sg. The iteration will stop either when a + * maximum number of sg entries was reached or a terminating sg + * (sg_last(sg) == true) was reached. */ struct sg_page_iter { struct scatterlist *sg; /* sg holding the page */ diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 62e990b620aa..870b5e6c06db 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -54,6 +54,10 @@ const struct ib_gid_attr *rdma_find_gid_by_filter( void *), void *context); +int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, + u16 *vlan_id, u8 *smac); +struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); + /** * ib_get_cached_pkey - Returns a cached PKey table entry * @device: The device to query. diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 79ba8219e7dc..eea946fcc819 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -198,7 +198,7 @@ struct ib_sa_hdr { __be16 attr_offset; __be16 reserved; ib_sa_comp_mask comp_mask; -} __attribute__ ((packed)); +} __packed; struct ib_mad { struct ib_mad_hdr mad_hdr; @@ -227,7 +227,7 @@ struct ib_sa_mad { struct ib_rmpp_hdr rmpp_hdr; struct ib_sa_hdr sa_hdr; u8 data[IB_MGMT_SA_DATA]; -} __attribute__ ((packed)); +} __packed; struct ib_vendor_mad { struct ib_mad_hdr mad_hdr; diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index b439e988408e..7be0028f155c 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -61,7 +61,7 @@ struct ib_smp { u8 data[IB_SMP_DATA_SIZE]; u8 initial_path[IB_SMP_MAX_PATH_HOPS]; u8 return_path[IB_SMP_MAX_PATH_HOPS]; -} __attribute__ ((packed)); +} __packed; #define IB_SMP_DIRECTION cpu_to_be16(0x8000) diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 73af05db04c7..040d853077c6 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -48,12 +48,11 @@ struct ib_umem { unsigned long address; int page_shift; u32 writable : 1; - u32 hugetlb : 1; u32 is_odp : 1; struct work_struct work; struct sg_table sg_head; int nmap; - int npages; + unsigned int sg_nents; }; /* Returns the offset of the umem start relative to the first page. */ @@ -87,6 +86,9 @@ void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length); +unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, + unsigned long pgsz_bitmap, + unsigned long virt); #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -104,6 +106,12 @@ static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offs size_t length) { return -EINVAL; } +static inline int ib_umem_find_best_pgsz(struct ib_umem *umem, + unsigned long pgsz_bitmap, + unsigned long virt) { + return -EINVAL; +} + #endif /* CONFIG_INFINIBAND_USER_MEM */ #endif /* IB_UMEM_H */ diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index dadc96dea39c..eeec4e53c448 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -69,6 +69,7 @@ struct ib_umem_odp { int notifiers_seq; int notifiers_count; + int npages; /* Tree tracking */ struct umem_odp_node interval_tree; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9b9e17bcc201..0742095355f2 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -59,6 +59,8 @@ #include <linux/mmu_notifier.h> #include <linux/uaccess.h> #include <linux/cgroup_rdma.h> +#include <linux/irqflags.h> +#include <linux/preempt.h> #include <uapi/rdma/ib_user_verbs.h> #include <rdma/restrack.h> #include <uapi/rdma/rdma_user_ioctl.h> @@ -72,6 +74,36 @@ extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; extern struct workqueue_struct *ib_comp_unbound_wq; +__printf(3, 4) __cold +void ibdev_printk(const char *level, const struct ib_device *ibdev, + const char *format, ...); +__printf(2, 3) __cold +void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_alert(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_crit(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_err(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_warn(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_notice(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_info(const struct ib_device *ibdev, const char *format, ...); + +#if defined(CONFIG_DYNAMIC_DEBUG) +#define ibdev_dbg(__dev, format, args...) \ + dynamic_ibdev_dbg(__dev, format, ##args) +#elif defined(DEBUG) +#define ibdev_dbg(__dev, format, args...) \ + ibdev_printk(KERN_DEBUG, __dev, format, ##args) +#else +__printf(2, 3) __cold +static inline +void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {} +#endif + union ib_gid { u8 raw[16]; struct { @@ -92,7 +124,7 @@ enum ib_gid_type { #define ROCE_V2_UDP_DPORT 4791 struct ib_gid_attr { - struct net_device *ndev; + struct net_device __rcu *ndev; struct ib_device *device; union ib_gid gid; enum ib_gid_type gid_type; @@ -108,6 +140,7 @@ enum rdma_node_type { RDMA_NODE_RNIC, RDMA_NODE_USNIC, RDMA_NODE_USNIC_UDP, + RDMA_NODE_UNSPECIFIED, }; enum { @@ -119,7 +152,8 @@ enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP, RDMA_TRANSPORT_USNIC, - RDMA_TRANSPORT_USNIC_UDP + RDMA_TRANSPORT_USNIC_UDP, + RDMA_TRANSPORT_UNSPECIFIED, }; enum rdma_protocol_type { @@ -2189,8 +2223,6 @@ struct ib_cache { struct ib_event_handler event_handler; }; -struct iw_cm_verbs; - struct ib_port_immutable { int pkey_tbl_len; int gid_tbl_len; @@ -2272,6 +2304,8 @@ struct ib_counters_read_attr { }; struct uverbs_attr_bundle; +struct iw_cm_id; +struct iw_cm_conn_param; #define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \ .size_##ib_struct = \ @@ -2281,8 +2315,11 @@ struct uverbs_attr_bundle; !__same_type(((struct drv_struct *)NULL)->member, \ struct ib_struct))) +#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \ + ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp)) + #define rdma_zalloc_drv_obj(ib_dev, ib_type) \ - ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, GFP_KERNEL)) + rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL) #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct @@ -2394,23 +2431,21 @@ struct ib_device_ops { void (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); - int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata); - void (*dealloc_pd)(struct ib_pd *pd); - struct ib_ah *(*create_ah)(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, u32 flags, - struct ib_udata *udata); + int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); + void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); + int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); - int (*destroy_ah)(struct ib_ah *ah, u32 flags); - struct ib_srq *(*create_srq)(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); + void (*destroy_ah)(struct ib_ah *ah, u32 flags); + int (*create_srq)(struct ib_srq *srq, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); - int (*destroy_srq)(struct ib_srq *srq); + void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata); struct ib_qp *(*create_qp)(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); @@ -2418,13 +2453,12 @@ struct ib_device_ops { int qp_attr_mask, struct ib_udata *udata); int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); - int (*destroy_qp)(struct ib_qp *qp); + int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata); struct ib_cq *(*create_cq)(struct ib_device *device, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); - int (*destroy_cq)(struct ib_cq *cq); + int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, @@ -2433,9 +2467,9 @@ struct ib_device_ops { int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); - int (*dereg_mr)(struct ib_mr *mr); + int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg); + u32 max_num_sg, struct ib_udata *udata); int (*advise_mr)(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge, @@ -2456,9 +2490,8 @@ struct ib_device_ops { int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, - struct ib_ucontext *ucontext, struct ib_udata *udata); - int (*dealloc_xrcd)(struct ib_xrcd *xrcd); + int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); struct ib_flow *(*create_flow)(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain, struct ib_udata *udata); @@ -2483,7 +2516,7 @@ struct ib_device_ops { struct ib_wq *(*create_wq)(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); - int (*destroy_wq)(struct ib_wq *wq); + int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata); int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *(*create_rwq_ind_table)( @@ -2495,7 +2528,7 @@ struct ib_device_ops { struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, struct uverbs_attr_bundle *attrs); - int (*dealloc_dm)(struct ib_dm *dm); + int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs); struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs); @@ -2550,12 +2583,37 @@ struct ib_device_ops { */ void (*dealloc_driver)(struct ib_device *dev); + /* iWarp CM callbacks */ + void (*iw_add_ref)(struct ib_qp *qp); + void (*iw_rem_ref)(struct ib_qp *qp); + struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn); + int (*iw_connect)(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + int (*iw_accept)(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata, + u8 pdata_len); + int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog); + int (*iw_destroy_listen)(struct iw_cm_id *cm_id); + + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_pd); + DECLARE_RDMA_OBJ_SIZE(ib_srq); DECLARE_RDMA_OBJ_SIZE(ib_ucontext); }; -struct rdma_restrack_root; +struct ib_core_device { + /* device must be the first element in structure until, + * union of ib_core_device and device exists in ib_device. + */ + struct device dev; + possible_net_t rdma_net; + struct kobject *ports_kobj; + struct list_head port_list; + struct ib_device *owner; /* reach back to owner ib_device */ +}; +struct rdma_restrack_root; struct ib_device { /* Do not access @dma_device directly from ULP nor from HW drivers. */ struct device *dma_device; @@ -2578,19 +2636,18 @@ struct ib_device { int num_comp_vectors; - struct iw_cm_verbs *iwcm; - struct module *owner; - struct device dev; + union { + struct device dev; + struct ib_core_device coredev; + }; + /* First group for device attributes, * Second group for driver provided attributes (optional). * It is NULL terminated array. */ const struct attribute_group *groups[3]; - struct kobject *ports_kobj; - struct list_head port_list; - int uverbs_abi_ver; u64 uverbs_cmd_mask; u64 uverbs_ex_cmd_mask; @@ -2626,6 +2683,15 @@ struct ib_device { struct work_struct unregistration_work; const struct rdma_link_ops *link_ops; + + /* Protects compat_devs xarray modifications */ + struct mutex compat_devs_mutex; + /* Maintains compat devices for each net namespace */ + struct xarray compat_devs; + + /* Used by iWarp CM */ + char iw_ifname[IFNAMSIZ]; + u32 iw_driver_flags; }; struct ib_client { @@ -2662,6 +2728,21 @@ struct ib_client { u8 no_kverbs_req:1; }; +/* + * IB block DMA iterator + * + * Iterates the DMA-mapped SGL in contiguous memory blocks aligned + * to a HW supported page size. + */ +struct ib_block_iter { + /* internal states */ + struct scatterlist *__sg; /* sg holding the current aligned block */ + dma_addr_t __dma_addr; /* unaligned DMA address of this block */ + unsigned int __sg_nents; /* number of SG entries */ + unsigned int __sg_advance; /* number of bytes to advance in sg in next step */ + unsigned int __pg_bit; /* alignment of current block */ +}; + struct ib_device *_ib_alloc_device(size_t size); #define ib_alloc_device(drv_struct, member) \ container_of(_ib_alloc_device(sizeof(struct drv_struct) + \ @@ -2682,6 +2763,38 @@ void ib_unregister_device_queued(struct ib_device *ib_dev); int ib_register_client (struct ib_client *client); void ib_unregister_client(struct ib_client *client); +void __rdma_block_iter_start(struct ib_block_iter *biter, + struct scatterlist *sglist, + unsigned int nents, + unsigned long pgsz); +bool __rdma_block_iter_next(struct ib_block_iter *biter); + +/** + * rdma_block_iter_dma_address - get the aligned dma address of the current + * block held by the block iterator. + * @biter: block iterator holding the memory block + */ +static inline dma_addr_t +rdma_block_iter_dma_address(struct ib_block_iter *biter) +{ + return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1); +} + +/** + * rdma_for_each_block - iterate over contiguous memory blocks of the sg list + * @sglist: sglist to iterate over + * @biter: block iterator holding the memory block + * @nents: maximum number of sg entries to iterate over + * @pgsz: best HW supported page size to use + * + * Callers may use rdma_block_iter_dma_address() to get each + * blocks aligned DMA address. + */ +#define rdma_for_each_block(sglist, biter, nents, pgsz) \ + for (__rdma_block_iter_start(biter, sglist, nents, \ + pgsz); \ + __rdma_block_iter_next(biter);) + /** * ib_get_client_data - Get IB client context * @device:Device to get context for @@ -2705,9 +2818,6 @@ void ib_set_device_ops(struct ib_device *device, #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, unsigned long pfn, unsigned long size, pgprot_t prot); -int rdma_user_mmap_page(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, struct page *page, - unsigned long size); #else static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, @@ -2716,12 +2826,6 @@ static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, { return -EINVAL; } -static inline int rdma_user_mmap_page(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, struct page *page, - unsigned long size) -{ - return -EINVAL; -} #endif static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) @@ -2978,8 +3082,8 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) */ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) { - return (device->port_data[port_num].immutable.core_cap_flags & - RDMA_CORE_CAP_OPA_MAD) == RDMA_CORE_CAP_OPA_MAD; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_OPA_MAD; } /** @@ -3195,6 +3299,30 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num) return rdma_protocol_iwarp(dev, port_num); } +/** + * rdma_find_pg_bit - Find page bit given address and HW supported page sizes + * + * @addr: address + * @pgsz_bitmap: bitmap of HW supported page sizes + */ +static inline unsigned int rdma_find_pg_bit(unsigned long addr, + unsigned long pgsz_bitmap) +{ + unsigned long align; + unsigned long pgsz; + + align = addr & -addr; + + /* Find page bit such that addr is aligned to the highest supported + * HW page size + */ + pgsz = pgsz_bitmap & ~(-align << 1); + if (!pgsz) + return __ffs(pgsz_bitmap); + + return __fls(pgsz); +} + int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, @@ -3236,9 +3364,27 @@ enum ib_pd_flags { struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); + #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) -void ib_dealloc_pd(struct ib_pd *pd); + +/** + * ib_dealloc_pd_user - Deallocate kernel/user PD + * @pd: The protection domain + * @udata: Valid user data or NULL for kernel objects + */ +void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata); + +/** + * ib_dealloc_pd - Deallocate kernel PD + * @pd: The protection domain + * + * NOTE: for user PD use ib_dealloc_pd_user with valid udata! + */ +static inline void ib_dealloc_pd(struct ib_pd *pd) +{ + ib_dealloc_pd_user(pd, NULL); +} enum rdma_create_ah_flags { /* In a sleepable context */ @@ -3351,11 +3497,24 @@ enum rdma_destroy_ah_flags { }; /** - * rdma_destroy_ah - Destroys an address handle. + * rdma_destroy_ah_user - Destroys an address handle. * @ah: The address handle to destroy. * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags). + * @udata: Valid user data or NULL for kernel objects */ -int rdma_destroy_ah(struct ib_ah *ah, u32 flags); +int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata); + +/** + * rdma_destroy_ah - Destroys an kernel address handle. + * @ah: The address handle to destroy. + * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags). + * + * NOTE: for user ah use rdma_destroy_ah_user with valid udata! + */ +static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return rdma_destroy_ah_user(ah, flags, NULL); +} /** * ib_create_srq - Creates a SRQ associated with the specified protection @@ -3399,10 +3558,22 @@ int ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); /** - * ib_destroy_srq - Destroys the specified SRQ. + * ib_destroy_srq_user - Destroys the specified SRQ. * @srq: The SRQ to destroy. + * @udata: Valid user data or NULL for kernel objects */ -int ib_destroy_srq(struct ib_srq *srq); +int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata); + +/** + * ib_destroy_srq - Destroys the specified kernel SRQ. + * @srq: The SRQ to destroy. + * + * NOTE: for user srq use ib_destroy_srq_user with valid udata! + */ +static inline int ib_destroy_srq(struct ib_srq *srq) +{ + return ib_destroy_srq_user(srq, NULL); +} /** * ib_post_srq_recv - Posts a list of work requests to the specified SRQ. @@ -3422,15 +3593,34 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, } /** - * ib_create_qp - Creates a QP associated with the specified protection + * ib_create_qp_user - Creates a QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. * @qp_init_attr: A list of initial attributes required to create the * QP. If QP creation succeeds, then the attributes are updated to * the actual capabilities of the created QP. + * @udata: Valid user data or NULL for kernel objects */ -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); +struct ib_qp *ib_create_qp_user(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); + +/** + * ib_create_qp - Creates a kernel QP associated with the specified protection + * domain. + * @pd: The protection domain associated with the QP. + * @qp_init_attr: A list of initial attributes required to create the + * QP. If QP creation succeeds, then the attributes are updated to + * the actual capabilities of the created QP. + * @udata: Valid user data or NULL for kernel objects + * + * NOTE: for user qp use ib_create_qp_user with valid udata! + */ +static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr) +{ + return ib_create_qp_user(pd, qp_init_attr, NULL); +} /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. @@ -3480,8 +3670,20 @@ int ib_query_qp(struct ib_qp *qp, /** * ib_destroy_qp - Destroys the specified QP. * @qp: The QP to destroy. + * @udata: Valid udata or NULL for kernel objects */ -int ib_destroy_qp(struct ib_qp *qp); +int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata); + +/** + * ib_destroy_qp - Destroys the specified kernel QP. + * @qp: The QP to destroy. + * + * NOTE: for user qp use ib_destroy_qp_user with valid udata! + */ +static inline int ib_destroy_qp(struct ib_qp *qp) +{ + return ib_destroy_qp_user(qp, NULL); +} /** * ib_open_qp - Obtain a reference to an existing sharable QP. @@ -3541,13 +3743,66 @@ static inline int ib_post_recv(struct ib_qp *qp, return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy); } -struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, - int nr_cqe, int comp_vector, - enum ib_poll_context poll_ctx, const char *caller); -#define ib_alloc_cq(device, priv, nr_cqe, comp_vect, poll_ctx) \ - __ib_alloc_cq((device), (priv), (nr_cqe), (comp_vect), (poll_ctx), KBUILD_MODNAME) +struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx, + const char *caller, struct ib_udata *udata); + +/** + * ib_alloc_cq_user: Allocate kernel/user CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @comp_vector: Completion vector used for the IRQs + * @poll_ctx: Context used for polling the CQ + * @udata: Valid user data or NULL for kernel objects + */ +static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev, + void *private, int nr_cqe, + int comp_vector, + enum ib_poll_context poll_ctx, + struct ib_udata *udata) +{ + return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + KBUILD_MODNAME, udata); +} + +/** + * ib_alloc_cq: Allocate kernel CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @comp_vector: Completion vector used for the IRQs + * @poll_ctx: Context used for polling the CQ + * + * NOTE: for user cq use ib_alloc_cq_user with valid udata! + */ +static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx) +{ + return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + NULL); +} + +/** + * ib_free_cq_user - Free kernel/user CQ + * @cq: The CQ to free + * @udata: Valid user data or NULL for kernel objects + */ +void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata); + +/** + * ib_free_cq - Free kernel CQ + * @cq: The CQ to free + * + * NOTE: for user cq use ib_free_cq_user with valid udata! + */ +static inline void ib_free_cq(struct ib_cq *cq) +{ + ib_free_cq_user(cq, NULL); +} -void ib_free_cq(struct ib_cq *cq); int ib_process_cq_direct(struct ib_cq *cq, int budget); /** @@ -3591,10 +3846,22 @@ int ib_resize_cq(struct ib_cq *cq, int cqe); int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period); /** - * ib_destroy_cq - Destroys the specified CQ. + * ib_destroy_cq_user - Destroys the specified CQ. * @cq: The CQ to destroy. + * @udata: Valid user data or NULL for kernel objects */ -int ib_destroy_cq(struct ib_cq *cq); +int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata); + +/** + * ib_destroy_cq - Destroys the specified kernel CQ. + * @cq: The CQ to destroy. + * + * NOTE: for user cq use ib_destroy_cq_user with valid udata! + */ +static inline int ib_destroy_cq(struct ib_cq *cq) +{ + return ib_destroy_cq_user(cq, NULL); +} /** * ib_poll_cq - poll a CQ for completion(s) @@ -3848,17 +4115,37 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, } /** - * ib_dereg_mr - Deregisters a memory region and removes it from the + * ib_dereg_mr_user - Deregisters a memory region and removes it from the + * HCA translation table. + * @mr: The memory region to deregister. + * @udata: Valid user data or NULL for kernel object + * + * This function can fail, if the memory region has memory windows bound to it. + */ +int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata); + +/** + * ib_dereg_mr - Deregisters a kernel memory region and removes it from the * HCA translation table. * @mr: The memory region to deregister. * * This function can fail, if the memory region has memory windows bound to it. + * + * NOTE: for user mr use ib_dereg_mr_user with valid udata! */ -int ib_dereg_mr(struct ib_mr *mr); +static inline int ib_dereg_mr(struct ib_mr *mr) +{ + return ib_dereg_mr_user(mr, NULL); +} + +struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); -struct ib_mr *ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, u32 max_num_sg) +{ + return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL); +} /** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR @@ -3956,8 +4243,9 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller); /** * ib_dealloc_xrcd - Deallocates an XRC domain. * @xrcd: The XRC domain to deallocate. + * @udata: Valid user data or NULL for kernel object */ -int ib_dealloc_xrcd(struct ib_xrcd *xrcd); +int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); static inline int ib_check_mr_access(int flags) { @@ -4033,7 +4321,7 @@ struct net_device *ib_device_netdev(struct ib_device *dev, u8 port); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); -int ib_destroy_wq(struct ib_wq *wq); +int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask); struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, @@ -4349,7 +4637,10 @@ rdma_set_device_sysfs_group(struct ib_device *dev, */ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) { - return container_of(device, struct ib_device, dev); + struct ib_core_device *coredev = + container_of(device, struct ib_core_device, dev); + + return coredev->owner; } /** @@ -4362,4 +4653,7 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) */ #define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \ container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member) + +bool rdma_dev_access_netns(const struct ib_device *device, + const struct net *net); #endif /* IB_VERBS_H */ diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 0e1f02815643..5aa8a9c76aa0 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -118,31 +118,6 @@ enum iw_flags { IW_F_NO_PORT_MAP = (1 << 0), }; -struct iw_cm_verbs { - void (*add_ref)(struct ib_qp *qp); - - void (*rem_ref)(struct ib_qp *qp); - - struct ib_qp * (*get_qp)(struct ib_device *device, - int qpn); - - int (*connect)(struct iw_cm_id *cm_id, - struct iw_cm_conn_param *conn_param); - - int (*accept)(struct iw_cm_id *cm_id, - struct iw_cm_conn_param *conn_param); - - int (*reject)(struct iw_cm_id *cm_id, - const void *pdata, u8 pdata_len); - - int (*create_listen)(struct iw_cm_id *cm_id, - int backlog); - - int (*destroy_listen)(struct iw_cm_id *cm_id); - char ifname[IFNAMSIZ]; - enum iw_flags driver_flags; -}; - /** * iw_create_cm_id - Create an IW CM identifier. * diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index b4f0ac02f283..7147a9263011 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -413,6 +413,6 @@ struct opa_port_info { u8 local_port_num; u8 reserved12; u8 reserved13; /* was guid_cap */ -} __attribute__ ((packed)); +} __packed; #endif /* OPA_PORT_INFO_H */ diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h index f7896117936e..c7b2ef12792d 100644 --- a/include/rdma/opa_smi.h +++ b/include/rdma/opa_smi.h @@ -98,7 +98,7 @@ struct opa_smp { struct opa_node_description { u8 data[64]; -} __attribute__ ((packed)); +} __packed; struct opa_node_info { u8 base_version; @@ -114,7 +114,7 @@ struct opa_node_info { __be32 revision; u8 local_port_num; u8 vendor_id[3]; /* network byte order */ -} __attribute__ ((packed)); +} __packed; #define OPA_PARTITION_TABLE_BLK_SIZE 32 diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4c257aff7d32..b9cd06db1a71 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -59,7 +59,6 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_mad.h> #include <rdma/rdmavt_mr.h> -#include <rdma/rdmavt_qp.h> #define RVT_MAX_PKEY_VALUES 16 @@ -72,6 +71,8 @@ struct trap_list { struct list_head list; }; +struct rvt_qp; +struct rvt_qpn_table; struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -206,6 +207,20 @@ struct rvt_ah { u8 log_pmtu; }; +/* + * This structure is used by rvt_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct rvt_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + u32 size; +}; + /* memory working set size */ struct rvt_wss { unsigned long *entries; @@ -501,16 +516,6 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) return container_of(ibdev, struct rvt_dev_info, ibdev); } -static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) -{ - return container_of(ibsrq, struct rvt_srq, ibsrq); -} - -static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct rvt_qp, ibqp); -} - static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) { /* @@ -548,57 +553,6 @@ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, return rdi->ports[port_index]->pkey_table[index]; } -/** - * rvt_lookup_qpn - return the QP with the given QPN - * @ibp: the ibport - * @qpn: the QP number to look up - * - * The caller must hold the rcu_read_lock(), and keep the lock until - * the returned qp is no longer in use. - */ -/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */ -static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, - struct rvt_ibport *rvp, - u32 qpn) __must_hold(RCU) -{ - struct rvt_qp *qp = NULL; - - if (unlikely(qpn <= 1)) { - qp = rcu_dereference(rvp->qp[qpn]); - } else { - u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); - - for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; - qp = rcu_dereference(qp->next)) - if (qp->ibqp.qp_num == qpn) - break; - } - return qp; -} - -/** - * rvt_mod_retry_timer - mod a retry timer - * @qp - the QP - * @shift - timeout shift to wait for multiple packets - * Modify a potentially already running retry timer - */ -static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift) -{ - struct ib_qp *ibqp = &qp->ibqp; - struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); - - lockdep_assert_held(&qp->s_lock); - qp->s_flags |= RVT_S_TIMER; - /* 4.096 usec. * (1 << qp->timeout) */ - mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies + - (qp->timeout_jiffies << shift)); -} - -static inline void rvt_mod_retry_timer(struct rvt_qp *qp) -{ - return rvt_mod_retry_timer_ext(qp, 0); -} - struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); void rvt_dealloc_device(struct rvt_dev_info *rdi); int rvt_register_device(struct rvt_dev_info *rvd, u32 driver_id); diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f0fbd4063fef..68e38c20afc0 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -83,7 +83,6 @@ * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating * next send completion entry not via send DMA * RVT_S_WAIT_PIO - waiting for a send buffer to be available - * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available * RVT_S_WAIT_KMEM - waiting for kernel memory to be available @@ -212,20 +211,6 @@ struct rvt_rq { }; /* - * This structure is used by rvt_mmap() to validate an offset - * when an mmap() request is made. The vm_area_struct then uses - * this as its vm_private_data. - */ -struct rvt_mmap_info { - struct list_head pending_mmaps; - struct ib_ucontext *context; - void *obj; - __u64 offset; - struct kref ref; - unsigned size; -}; - -/* * This structure holds the information that the send tasklet needs * to send a RDMA read response or atomic operation. */ @@ -399,6 +384,16 @@ struct rvt_srq { u32 limit; }; +static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct rvt_srq, ibsrq); +} + +static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct rvt_qp, ibqp); +} + #define RVT_QPN_MAX BIT(24) #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) @@ -678,6 +673,70 @@ static inline unsigned long rvt_timeout_to_jiffies(u8 timeout) return usecs_to_jiffies(1U << timeout) * 4096UL / 1000UL; } +/** + * rvt_lookup_qpn - return the QP with the given QPN + * @ibp: the ibport + * @qpn: the QP number to look up + * + * The caller must hold the rcu_read_lock(), and keep the lock until + * the returned qp is no longer in use. + */ +static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, + struct rvt_ibport *rvp, + u32 qpn) __must_hold(RCU) +{ + struct rvt_qp *qp = NULL; + + if (unlikely(qpn <= 1)) { + qp = rcu_dereference(rvp->qp[qpn]); + } else { + u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); + + for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) + if (qp->ibqp.qp_num == qpn) + break; + } + return qp; +} + +/** + * rvt_mod_retry_timer - mod a retry timer + * @qp - the QP + * @shift - timeout shift to wait for multiple packets + * Modify a potentially already running retry timer + */ +static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift) +{ + struct ib_qp *ibqp = &qp->ibqp; + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + + lockdep_assert_held(&qp->s_lock); + qp->s_flags |= RVT_S_TIMER; + /* 4.096 usec. * (1 << qp->timeout) */ + mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies + + (qp->timeout_jiffies << shift)); +} + +static inline void rvt_mod_retry_timer(struct rvt_qp *qp) +{ + return rvt_mod_retry_timer_ext(qp, 0); +} + +/** + * rvt_put_qp_swqe - drop refs held by swqe + * @qp: the send qp + * @wqe: the send wqe + * + * This drops any references held by the swqe + */ +static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe) +{ + rvt_put_swqe(wqe); + if (qp->allowed_ops == IB_OPCODE_UD) + atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); +} + extern const int ib_rvt_state_ops[]; struct rvt_dev_info; diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 794c47565971..05eabfd5d0d3 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -48,17 +48,15 @@ #define uobj_get_type(_attrs, _object) \ uapi_get_object((_attrs)->ufile->device->uapi, _object) -struct ib_uobject *_uobj_get_read(enum uverbs_default_objects type, - u32 object_id, - struct uverbs_attr_bundle *attrs); - #define uobj_get_read(_type, _id, _attrs) \ - _uobj_get_read(_type, _uobj_check_id(_id), _attrs) + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ + _uobj_check_id(_id), UVERBS_LOOKUP_READ, \ + _attrs) #define ufd_get_read(_type, _fdnum, _attrs) \ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ (_fdnum)*typecheck(s32, _fdnum), \ - UVERBS_LOOKUP_READ) + UVERBS_LOOKUP_READ, _attrs) static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) { @@ -70,22 +68,19 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) ((struct ib_##_object *)_uobj_get_obj_read( \ uobj_get_read(_type, _id, _attrs))) -struct ib_uobject *_uobj_get_write(enum uverbs_default_objects type, - u32 object_id, - struct uverbs_attr_bundle *attrs); - #define uobj_get_write(_type, _id, _attrs) \ - _uobj_get_write(_type, _uobj_check_id(_id), _attrs) + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ + _uobj_check_id(_id), UVERBS_LOOKUP_WRITE, \ + _attrs) int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, - const struct uverbs_attr_bundle *attrs); + struct uverbs_attr_bundle *attrs); #define uobj_perform_destroy(_type, _id, _attrs) \ __uobj_perform_destroy(uobj_get_type(_attrs, _type), \ _uobj_check_id(_id), _attrs) struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, - u32 id, - const struct uverbs_attr_bundle *attrs); + u32 id, struct uverbs_attr_bundle *attrs); #define uobj_get_destroy(_type, _id, _attrs) \ __uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id), \ @@ -109,30 +104,31 @@ static inline void uobj_put_write(struct ib_uobject *uobj) rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); } -static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj) +static inline int __must_check +uobj_alloc_commit(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { - int ret = rdma_alloc_commit_uobject(uobj); + int ret = rdma_alloc_commit_uobject(uobj, attrs); if (ret) return ret; return 0; } -static inline void uobj_alloc_abort(struct ib_uobject *uobj) +static inline void uobj_alloc_abort(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) { - rdma_alloc_abort_uobject(uobj); + rdma_alloc_abort_uobject(uobj, attrs); } static inline struct ib_uobject * __uobj_alloc(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev) { - struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs->ufile); + struct ib_uobject *uobj = + rdma_alloc_begin_uobject(obj, attrs->ufile, attrs); - if (!IS_ERR(uobj)) { - *ib_dev = uobj->context->device; - attrs->context = uobj->context; - } + if (!IS_ERR(uobj)) + *ib_dev = attrs->context->device; return uobj; } diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 175d761695e1..d57a5ba00c74 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -95,7 +95,8 @@ struct uverbs_obj_type_class { void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode); /* This does not consume the kref on uobj */ int __must_check (*destroy_hw)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); u8 needs_kfree_rcu; }; @@ -126,18 +127,23 @@ struct uverbs_obj_idr_type { * completely unchanged. */ int __must_check (*destroy_object)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs); }; struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, - enum rdma_lookup_mode mode); + enum rdma_lookup_mode mode, + struct uverbs_attr_bundle *attrs); void rdma_lookup_put_uobject(struct ib_uobject *uobj, enum rdma_lookup_mode mode); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile); -void rdma_alloc_abort_uobject(struct ib_uobject *uobj); -int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj); + struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *attrs); +void rdma_alloc_abort_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs); +int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs); struct uverbs_obj_fd_type { /* diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h new file mode 100644 index 000000000000..59363a083ecb --- /dev/null +++ b/include/trace/events/ib_mad.h @@ -0,0 +1,390 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ + +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ib_mad + +#if !defined(_TRACE_IB_MAD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IB_MAD_H + +#include <linux/tracepoint.h> +#include <rdma/ib_mad.h> + +#ifdef CONFIG_TRACEPOINTS +struct trace_event_raw_ib_mad_send_template; +static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_qp_info *qp_info, + struct trace_event_raw_ib_mad_send_template *entry); +#endif + +DECLARE_EVENT_CLASS(ib_mad_send_template, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info), + + TP_STRUCT__entry( + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u8, port_num) + __field(u32, qp_num) + __field(u8, method) + __field(u8, sl) + __field(u16, attr_id) + __field(u32, attr_mod) + __field(u64, wrtid) + __field(u64, tid) + __field(u16, status) + __field(u16, class_specific) + __field(u32, length) + __field(u32, dlid) + __field(u32, rqpn) + __field(u32, rqkey) + __field(u32, dev_index) + __field(void *, agent_priv) + __field(unsigned long, timeout) + __field(int, retries_left) + __field(int, max_retries) + __field(int, retry) + __field(u16, pkey) + ), + + TP_fast_assign( + __entry->dev_index = wr->mad_agent_priv->agent.device->index; + __entry->port_num = wr->mad_agent_priv->agent.port_num; + __entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num; + __entry->agent_priv = wr->mad_agent_priv; + __entry->wrtid = wr->tid; + __entry->max_retries = wr->max_retries; + __entry->retries_left = wr->retries_left; + __entry->retry = wr->retry; + __entry->timeout = wr->timeout; + __entry->length = wr->send_buf.hdr_len + + wr->send_buf.data_len; + __entry->base_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version; + __entry->mgmt_class = + ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class; + __entry->class_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version; + __entry->method = + ((struct ib_mad_hdr *)wr->send_buf.mad)->method; + __entry->status = + ((struct ib_mad_hdr *)wr->send_buf.mad)->status; + __entry->class_specific = + ((struct ib_mad_hdr *)wr->send_buf.mad)->class_specific; + __entry->tid = ((struct ib_mad_hdr *)wr->send_buf.mad)->tid; + __entry->attr_id = + ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_id; + __entry->attr_mod = + ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_mod; + create_mad_addr_info(wr, qp_info, __entry); + ), + + TP_printk("%d:%d QP%d agent %p: " \ + "wrtid 0x%llx; %d/%d retries(%d); timeout %lu length %d : " \ + "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \ + "method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \ + "attr_id 0x%x attr_mod 0x%x => dlid 0x%08x sl %d "\ + "pkey 0x%x rpqn 0x%x rqpkey 0x%x", + __entry->dev_index, __entry->port_num, __entry->qp_num, + __entry->agent_priv, be64_to_cpu(__entry->wrtid), + __entry->retries_left, __entry->max_retries, + __entry->retry, __entry->timeout, __entry->length, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, + __entry->method, be16_to_cpu(__entry->status), + be16_to_cpu(__entry->class_specific), + be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), + be32_to_cpu(__entry->attr_mod), + be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey, + __entry->rqpn, __entry->rqkey + ) +); + +DEFINE_EVENT(ib_mad_send_template, ib_mad_error_handler, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info)); +DEFINE_EVENT(ib_mad_send_template, ib_mad_ib_send_mad, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info)); +DEFINE_EVENT(ib_mad_send_template, ib_mad_send_done_resend, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info)); + +TRACE_EVENT(ib_mad_send_done_handler, + TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_wc *wc), + TP_ARGS(wr, wc), + + TP_STRUCT__entry( + __field(u8, port_num) + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u32, qp_num) + __field(u64, wrtid) + __field(u16, status) + __field(u16, wc_status) + __field(u32, length) + __field(void *, agent_priv) + __field(unsigned long, timeout) + __field(u32, dev_index) + __field(int, retries_left) + __field(int, max_retries) + __field(int, retry) + __field(u8, method) + ), + + TP_fast_assign( + __entry->dev_index = wr->mad_agent_priv->agent.device->index; + __entry->port_num = wr->mad_agent_priv->agent.port_num; + __entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num; + __entry->agent_priv = wr->mad_agent_priv; + __entry->wrtid = wr->tid; + __entry->max_retries = wr->max_retries; + __entry->retries_left = wr->retries_left; + __entry->retry = wr->retry; + __entry->timeout = wr->timeout; + __entry->base_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version; + __entry->mgmt_class = + ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class; + __entry->class_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version; + __entry->method = + ((struct ib_mad_hdr *)wr->send_buf.mad)->method; + __entry->status = + ((struct ib_mad_hdr *)wr->send_buf.mad)->status; + __entry->wc_status = wc->status; + __entry->length = wc->byte_len; + ), + + TP_printk("%d:%d QP%d : SEND WC Status %d : agent %p: " \ + "wrtid 0x%llx %d/%d retries(%d) timeout %lu length %d: " \ + "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \ + "method 0x%x status 0x%x", + __entry->dev_index, __entry->port_num, __entry->qp_num, + __entry->wc_status, + __entry->agent_priv, be64_to_cpu(__entry->wrtid), + __entry->retries_left, __entry->max_retries, + __entry->retry, __entry->timeout, + __entry->length, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, __entry->method, + be16_to_cpu(__entry->status) + ) +); + +TRACE_EVENT(ib_mad_recv_done_handler, + TP_PROTO(struct ib_mad_qp_info *qp_info, struct ib_wc *wc, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(qp_info, wc, mad_hdr), + + TP_STRUCT__entry( + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u8, port_num) + __field(u32, qp_num) + __field(u16, status) + __field(u16, class_specific) + __field(u32, length) + __field(u64, tid) + __field(u8, method) + __field(u8, sl) + __field(u16, attr_id) + __field(u32, attr_mod) + __field(u16, src_qp) + __field(u16, wc_status) + __field(u32, slid) + __field(u32, dev_index) + __field(u16, pkey) + ), + + TP_fast_assign( + __entry->dev_index = qp_info->port_priv->device->index; + __entry->port_num = qp_info->port_priv->port_num; + __entry->qp_num = qp_info->qp->qp_num; + __entry->length = wc->byte_len; + __entry->base_version = mad_hdr->base_version; + __entry->mgmt_class = mad_hdr->mgmt_class; + __entry->class_version = mad_hdr->class_version; + __entry->method = mad_hdr->method; + __entry->status = mad_hdr->status; + __entry->class_specific = mad_hdr->class_specific; + __entry->tid = mad_hdr->tid; + __entry->attr_id = mad_hdr->attr_id; + __entry->attr_mod = mad_hdr->attr_mod; + __entry->slid = wc->slid; + __entry->src_qp = wc->src_qp; + __entry->sl = wc->sl; + ib_query_pkey(qp_info->port_priv->device, + qp_info->port_priv->port_num, + wc->pkey_index, &__entry->pkey); + __entry->wc_status = wc->status; + ), + + TP_printk("%d:%d QP%d : RECV WC Status %d : length %d : hdr : " \ + "base_ver 0x%02x class 0x%02x class_ver 0x%02x " \ + "method 0x%02x status 0x%04x class_specific 0x%04x " \ + "tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \ + "slid 0x%08x src QP%d, sl %d pkey 0x%04x", + __entry->dev_index, __entry->port_num, __entry->qp_num, + __entry->wc_status, + __entry->length, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, __entry->method, + be16_to_cpu(__entry->status), + be16_to_cpu(__entry->class_specific), + be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), + be32_to_cpu(__entry->attr_mod), + __entry->slid, __entry->src_qp, __entry->sl, __entry->pkey + ) +); + +DECLARE_EVENT_CLASS(ib_mad_agent_template, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent), + + TP_STRUCT__entry( + __field(u32, dev_index) + __field(u32, hi_tid) + __field(u8, port_num) + __field(u8, mgmt_class) + __field(u8, mgmt_class_version) + ), + + TP_fast_assign( + __entry->dev_index = agent->agent.device->index; + __entry->port_num = agent->agent.port_num; + __entry->hi_tid = agent->agent.hi_tid; + + if (agent->reg_req) { + __entry->mgmt_class = agent->reg_req->mgmt_class; + __entry->mgmt_class_version = + agent->reg_req->mgmt_class_version; + } else { + __entry->mgmt_class = 0; + __entry->mgmt_class_version = 0; + } + ), + + TP_printk("%d:%d mad agent : hi_tid 0x%08x class 0x%02x class_ver 0x%02x", + __entry->dev_index, __entry->port_num, + __entry->hi_tid, __entry->mgmt_class, + __entry->mgmt_class_version + ) +); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_recv_done_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_send_done_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_create_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_unregister_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); + + + +DECLARE_EVENT_CLASS(ib_mad_opa_smi_template, + TP_PROTO(struct opa_smp *smp), + TP_ARGS(smp), + + TP_STRUCT__entry( + __field(u64, mkey) + __field(u32, dr_slid) + __field(u32, dr_dlid) + __field(u8, hop_ptr) + __field(u8, hop_cnt) + __array(u8, initial_path, OPA_SMP_MAX_PATH_HOPS) + __array(u8, return_path, OPA_SMP_MAX_PATH_HOPS) + ), + + TP_fast_assign( + __entry->hop_ptr = smp->hop_ptr; + __entry->hop_cnt = smp->hop_cnt; + __entry->mkey = smp->mkey; + __entry->dr_slid = smp->route.dr.dr_slid; + __entry->dr_dlid = smp->route.dr.dr_dlid; + memcpy(__entry->initial_path, smp->route.dr.initial_path, + OPA_SMP_MAX_PATH_HOPS); + memcpy(__entry->return_path, smp->route.dr.return_path, + OPA_SMP_MAX_PATH_HOPS); + ), + + TP_printk("OPA SMP: hop_ptr %d hop_cnt %d " \ + "mkey 0x%016llx dr_slid 0x%08x dr_dlid 0x%08x " \ + "initial_path %*ph return_path %*ph ", + __entry->hop_ptr, __entry->hop_cnt, + be64_to_cpu(__entry->mkey), be32_to_cpu(__entry->dr_slid), + be32_to_cpu(__entry->dr_dlid), + OPA_SMP_MAX_PATH_HOPS, __entry->initial_path, + OPA_SMP_MAX_PATH_HOPS, __entry->return_path + ) +); + +DEFINE_EVENT(ib_mad_opa_smi_template, ib_mad_handle_opa_smi, + TP_PROTO(struct opa_smp *smp), + TP_ARGS(smp)); +DEFINE_EVENT(ib_mad_opa_smi_template, ib_mad_handle_out_opa_smi, + TP_PROTO(struct opa_smp *smp), + TP_ARGS(smp)); + + +DECLARE_EVENT_CLASS(ib_mad_opa_ib_template, + TP_PROTO(struct ib_smp *smp), + TP_ARGS(smp), + + TP_STRUCT__entry( + __field(u64, mkey) + __field(u32, dr_slid) + __field(u32, dr_dlid) + __field(u8, hop_ptr) + __field(u8, hop_cnt) + __array(u8, initial_path, IB_SMP_MAX_PATH_HOPS) + __array(u8, return_path, IB_SMP_MAX_PATH_HOPS) + ), + + TP_fast_assign( + __entry->hop_ptr = smp->hop_ptr; + __entry->hop_cnt = smp->hop_cnt; + __entry->mkey = smp->mkey; + __entry->dr_slid = smp->dr_slid; + __entry->dr_dlid = smp->dr_dlid; + memcpy(__entry->initial_path, smp->initial_path, + IB_SMP_MAX_PATH_HOPS); + memcpy(__entry->return_path, smp->return_path, + IB_SMP_MAX_PATH_HOPS); + ), + + TP_printk("OPA SMP: hop_ptr %d hop_cnt %d " \ + "mkey 0x%016llx dr_slid 0x%04x dr_dlid 0x%04x " \ + "initial_path %*ph return_path %*ph ", + __entry->hop_ptr, __entry->hop_cnt, + be64_to_cpu(__entry->mkey), be16_to_cpu(__entry->dr_slid), + be16_to_cpu(__entry->dr_dlid), + IB_SMP_MAX_PATH_HOPS, __entry->initial_path, + IB_SMP_MAX_PATH_HOPS, __entry->return_path + ) +); + +DEFINE_EVENT(ib_mad_opa_ib_template, ib_mad_handle_ib_smi, + TP_PROTO(struct ib_smp *smp), + TP_ARGS(smp)); +DEFINE_EVENT(ib_mad_opa_ib_template, ib_mad_handle_out_ib_smi, + TP_PROTO(struct ib_smp *smp), + TP_ARGS(smp)); + +#endif /* _TRACE_IB_MAD_H */ + +#include <trace/define_trace.h> diff --git a/include/trace/events/ib_umad.h b/include/trace/events/ib_umad.h new file mode 100644 index 000000000000..c393a19a0f60 --- /dev/null +++ b/include/trace/events/ib_umad.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ + +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ib_umad + +#if !defined(_TRACE_IB_UMAD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IB_UMAD_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(ib_umad_template, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr), + + TP_STRUCT__entry( + __field(u8, port_num) + __field(u8, sl) + __field(u8, path_bits) + __field(u8, grh_present) + __field(u32, id) + __field(u32, status) + __field(u32, timeout_ms) + __field(u32, retires) + __field(u32, length) + __field(u32, qpn) + __field(u32, qkey) + __field(u8, gid_index) + __field(u8, hop_limit) + __field(u16, lid) + __field(u16, attr_id) + __field(u16, pkey_index) + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u8, method) + __field(u32, flow_label) + __field(u16, mad_status) + __field(u16, class_specific) + __field(u32, attr_mod) + __field(u64, tid) + __array(u8, gid, 16) + __field(u32, dev_index) + __field(u8, traffic_class) + ), + + TP_fast_assign( + __entry->dev_index = file->port->ib_dev->index; + __entry->port_num = file->port->port_num; + + __entry->id = umad_hdr->id; + __entry->status = umad_hdr->status; + __entry->timeout_ms = umad_hdr->timeout_ms; + __entry->retires = umad_hdr->retries; + __entry->length = umad_hdr->length; + __entry->qpn = umad_hdr->qpn; + __entry->qkey = umad_hdr->qkey; + __entry->lid = umad_hdr->lid; + __entry->sl = umad_hdr->sl; + __entry->path_bits = umad_hdr->path_bits; + __entry->grh_present = umad_hdr->grh_present; + __entry->gid_index = umad_hdr->gid_index; + __entry->hop_limit = umad_hdr->hop_limit; + __entry->traffic_class = umad_hdr->traffic_class; + memcpy(__entry->gid, umad_hdr->gid, sizeof(umad_hdr->gid)); + __entry->flow_label = umad_hdr->flow_label; + __entry->pkey_index = umad_hdr->pkey_index; + + __entry->base_version = mad_hdr->base_version; + __entry->mgmt_class = mad_hdr->mgmt_class; + __entry->class_version = mad_hdr->class_version; + __entry->method = mad_hdr->method; + __entry->mad_status = mad_hdr->status; + __entry->class_specific = mad_hdr->class_specific; + __entry->tid = mad_hdr->tid; + __entry->attr_id = mad_hdr->attr_id; + __entry->attr_mod = mad_hdr->attr_mod; + ), + + TP_printk("%d:%d umad_hdr: id 0x%08x status 0x%08x ms %u ret %u " \ + "len %u QP%u qkey 0x%08x lid 0x%04x sl %u path_bits 0x%x " \ + "grh 0x%x gidi %u hop_lim %u traf_cl %u gid %pI6c " \ + "flow 0x%08x pkeyi %u MAD: base_ver 0x%x class 0x%x " \ + "class_ver 0x%x method 0x%x status 0x%04x " \ + "class_specific 0x%04x tid 0x%016llx attr_id 0x%04x " \ + "attr_mod 0x%08x ", + __entry->dev_index, __entry->port_num, + __entry->id, __entry->status, __entry->timeout_ms, + __entry->retires, __entry->length, be32_to_cpu(__entry->qpn), + be32_to_cpu(__entry->qkey), be16_to_cpu(__entry->lid), + __entry->sl, __entry->path_bits, __entry->grh_present, + __entry->gid_index, __entry->hop_limit, + __entry->traffic_class, &__entry->gid, + be32_to_cpu(__entry->flow_label), __entry->pkey_index, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, __entry->method, + be16_to_cpu(__entry->mad_status), + be16_to_cpu(__entry->class_specific), + be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), + be32_to_cpu(__entry->attr_mod) + ) +); + +DEFINE_EVENT(ib_umad_template, ib_umad_write, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr)); + +DEFINE_EVENT(ib_umad_template, ib_umad_read_recv, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr)); + +DEFINE_EVENT(ib_umad_template, ib_umad_read_send, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr)); + +#endif /* _TRACE_IB_UMAD_H */ + +#include <trace/define_trace.h> diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h new file mode 100644 index 000000000000..9599a2a62be8 --- /dev/null +++ b/include/uapi/rdma/efa-abi.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef EFA_ABI_USER_H +#define EFA_ABI_USER_H + +#include <linux/types.h> + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define EFA_UVERBS_ABI_VERSION 1 + +/* + * Keep structs aligned to 8 bytes. + * Keep reserved fields as arrays of __u8 named reserved_XXX where XXX is the + * hex bit offset of the field. + */ + +enum efa_ibv_user_cmds_supp_udata { + EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE = 1 << 0, + EFA_USER_CMDS_SUPP_UDATA_CREATE_AH = 1 << 1, +}; + +struct efa_ibv_alloc_ucontext_resp { + __u32 comp_mask; + __u32 cmds_supp_udata_mask; + __u16 sub_cqs_per_cq; + __u16 inline_buf_size; + __u32 max_llq_size; /* bytes */ +}; + +struct efa_ibv_alloc_pd_resp { + __u32 comp_mask; + __u16 pdn; + __u8 reserved_30[2]; +}; + +struct efa_ibv_create_cq { + __u32 comp_mask; + __u32 cq_entry_size; + __u16 num_sub_cqs; + __u8 reserved_50[6]; +}; + +struct efa_ibv_create_cq_resp { + __u32 comp_mask; + __u8 reserved_20[4]; + __aligned_u64 q_mmap_key; + __aligned_u64 q_mmap_size; + __u16 cq_idx; + __u8 reserved_d0[6]; +}; + +enum { + EFA_QP_DRIVER_TYPE_SRD = 0, +}; + +struct efa_ibv_create_qp { + __u32 comp_mask; + __u32 rq_ring_size; /* bytes */ + __u32 sq_ring_size; /* bytes */ + __u32 driver_qp_type; +}; + +struct efa_ibv_create_qp_resp { + __u32 comp_mask; + /* the offset inside the page of the rq db */ + __u32 rq_db_offset; + /* the offset inside the page of the sq db */ + __u32 sq_db_offset; + /* the offset inside the page of descriptors buffer */ + __u32 llq_desc_offset; + __aligned_u64 rq_mmap_key; + __aligned_u64 rq_mmap_size; + __aligned_u64 rq_db_mmap_key; + __aligned_u64 sq_db_mmap_key; + __aligned_u64 llq_desc_mmap_key; + __u16 send_sub_cq_idx; + __u16 recv_sub_cq_idx; + __u8 reserved_1e0[4]; +}; + +struct efa_ibv_create_ah_resp { + __u32 comp_mask; + __u16 efa_address_handle; + __u8 reserved_30[2]; +}; + +struct efa_ibv_ex_query_device_resp { + __u32 comp_mask; + __u32 max_sq_wr; + __u32 max_rq_wr; + __u16 max_sq_sge; + __u16 max_rq_sge; +}; + +#endif /* EFA_ABI_USER_H */ diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index f4d4010b7e3e..624f5b53eb1f 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -360,6 +360,7 @@ enum mlx5_ib_create_qp_resp_mask { MLX5_IB_CREATE_QP_RESP_MASK_TISN = 1UL << 1, MLX5_IB_CREATE_QP_RESP_MASK_RQN = 1UL << 2, MLX5_IB_CREATE_QP_RESP_MASK_SQN = 1UL << 3, + MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR = 1UL << 4, }; struct mlx5_ib_create_qp_resp { @@ -371,6 +372,7 @@ struct mlx5_ib_create_qp_resp { __u32 rqn; __u32 sqn; __u32 reserved1; + __u64 tir_icm_addr; }; struct mlx5_ib_alloc_mw { diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 8149d224030b..d404c951954c 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -44,6 +44,7 @@ enum mlx5_ib_create_flow_action_attrs { enum mlx5_ib_alloc_dm_attrs { MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, }; enum mlx5_ib_devx_methods { @@ -144,6 +145,7 @@ enum mlx5_ib_flow_matcher_create_attrs { MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, }; enum mlx5_ib_flow_matcher_destroy_attrs { diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 4a701033b93f..a8f34c237458 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -42,6 +42,7 @@ enum mlx5_ib_uapi_flow_action_flags { enum mlx5_ib_uapi_flow_table_type { MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0, MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1, + MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB = 0x2, }; enum mlx5_ib_uapi_flow_action_packet_reformat_type { @@ -56,5 +57,11 @@ struct mlx5_ib_uapi_devx_async_cmd_hdr { __u8 out_data[]; }; +enum mlx5_ib_uapi_dm_type { + MLX5_IB_UAPI_DM_TYPE_MEMIC, + MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM, + MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM, +}; + #endif diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 5cc592728071..42a8bdc40a14 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -49,17 +49,6 @@ enum { RDMA_NL_IWPM_NUM_OPS }; -struct rdma_cm_id_stats { - __u32 qp_num; - __u32 bound_dev_if; - __u32 port_space; - __s32 pid; - __u8 cm_state; - __u8 node_type; - __u8 port_num; - __u8 qp_type; -}; - enum { IWPM_NLA_REG_PID_UNSPEC = 0, IWPM_NLA_REG_PID_SEQ, @@ -261,7 +250,10 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_PORT_GET, /* can dump */ - /* 6 - 8 are free to use */ + RDMA_NLDEV_CMD_SYS_GET, /* can dump */ + RDMA_NLDEV_CMD_SYS_SET, + + /* 8 is free to use */ RDMA_NLDEV_CMD_RES_GET = 9, /* can dump */ @@ -473,6 +465,21 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_LINK_TYPE, /* string */ /* + * net namespace mode for rdma subsystem: + * either shared or exclusive among multiple net namespaces. + */ + RDMA_NLDEV_SYS_ATTR_NETNS_MODE, /* u8 */ + /* + * Device protocol, e.g. ib, iw, usnic, roce and opa + */ + RDMA_NLDEV_ATTR_DEV_PROTOCOL, /* string */ + + /* + * File descriptor handle of the net namespace object + */ + RDMA_NLDEV_NET_NS_FD, /* u32 */ + + /* * Always the end */ RDMA_NLDEV_ATTR_MAX diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h index 06c34d99be85..26213f49f5c8 100644 --- a/include/uapi/rdma/rdma_user_ioctl_cmds.h +++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h @@ -102,6 +102,7 @@ enum rdma_driver_id { RDMA_DRIVER_RXE, RDMA_DRIVER_HFI1, RDMA_DRIVER_QIB, + RDMA_DRIVER_EFA, }; #endif |