diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/blk-iopoll.h | 46 | ||||
-rw-r--r-- | include/linux/interrupt.h | 2 | ||||
-rw-r--r-- | include/linux/irq_poll.h | 25 | ||||
-rw-r--r-- | include/linux/mlx4/cmd.h | 3 | ||||
-rw-r--r-- | include/linux/mlx4/device.h | 15 | ||||
-rw-r--r-- | include/linux/mlx4/qp.h | 15 | ||||
-rw-r--r-- | include/linux/mlx5/device.h | 40 | ||||
-rw-r--r-- | include/linux/mlx5/driver.h | 20 | ||||
-rw-r--r-- | include/linux/mlx5/mlx5_ifc.h | 48 | ||||
-rw-r--r-- | include/linux/mlx5/qp.h | 46 | ||||
-rw-r--r-- | include/linux/mlx5/transobj.h | 78 | ||||
-rw-r--r-- | include/linux/mlx5/vport.h | 8 | ||||
-rw-r--r-- | include/linux/sunrpc/svc_rdma.h | 39 | ||||
-rw-r--r-- | include/rdma/ib_addr.h | 16 | ||||
-rw-r--r-- | include/rdma/ib_cache.h | 4 | ||||
-rw-r--r-- | include/rdma/ib_mad.h | 2 | ||||
-rw-r--r-- | include/rdma/ib_pack.h | 45 | ||||
-rw-r--r-- | include/rdma/ib_pma.h | 1 | ||||
-rw-r--r-- | include/rdma/ib_sa.h | 3 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 356 | ||||
-rw-r--r-- | include/scsi/iser.h | 78 | ||||
-rw-r--r-- | include/trace/events/irq.h | 2 |
22 files changed, 624 insertions, 268 deletions
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h deleted file mode 100644 index 77ae77c0b704..000000000000 --- a/include/linux/blk-iopoll.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef BLK_IOPOLL_H -#define BLK_IOPOLL_H - -struct blk_iopoll; -typedef int (blk_iopoll_fn)(struct blk_iopoll *, int); - -struct blk_iopoll { - struct list_head list; - unsigned long state; - unsigned long data; - int weight; - int max; - blk_iopoll_fn *poll; -}; - -enum { - IOPOLL_F_SCHED = 0, - IOPOLL_F_DISABLE = 1, -}; - -/* - * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating - * that we were the first to acquire this iop for scheduling. If this iop - * is currently disabled, return "failure". - */ -static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop) -{ - if (!test_bit(IOPOLL_F_DISABLE, &iop->state)) - return test_and_set_bit(IOPOLL_F_SCHED, &iop->state); - - return 1; -} - -static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop) -{ - return test_bit(IOPOLL_F_DISABLE, &iop->state); -} - -extern void blk_iopoll_sched(struct blk_iopoll *); -extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *); -extern void blk_iopoll_complete(struct blk_iopoll *); -extern void __blk_iopoll_complete(struct blk_iopoll *); -extern void blk_iopoll_enable(struct blk_iopoll *); -extern void blk_iopoll_disable(struct blk_iopoll *); - -#endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index cb30edbfe9fc..0e95fcc75b2a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -413,7 +413,7 @@ enum NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, - BLOCK_IOPOLL_SOFTIRQ, + IRQ_POLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h new file mode 100644 index 000000000000..3e8c1b8fb9be --- /dev/null +++ b/include/linux/irq_poll.h @@ -0,0 +1,25 @@ +#ifndef IRQ_POLL_H +#define IRQ_POLL_H + +struct irq_poll; +typedef int (irq_poll_fn)(struct irq_poll *, int); + +struct irq_poll { + struct list_head list; + unsigned long state; + int weight; + irq_poll_fn *poll; +}; + +enum { + IRQ_POLL_F_SCHED = 0, + IRQ_POLL_F_DISABLE = 1, +}; + +extern void irq_poll_sched(struct irq_poll *); +extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); +extern void irq_poll_complete(struct irq_poll *); +extern void irq_poll_enable(struct irq_poll *); +extern void irq_poll_disable(struct irq_poll *); + +#endif diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 58391f2e0414..116b284bc4ce 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -206,7 +206,8 @@ enum { MLX4_SET_PORT_GID_TABLE = 0x5, MLX4_SET_PORT_PRIO2TC = 0x8, MLX4_SET_PORT_SCHEDULER = 0x9, - MLX4_SET_PORT_VXLAN = 0xB + MLX4_SET_PORT_VXLAN = 0xB, + MLX4_SET_PORT_ROCE_ADDR = 0xD }; enum { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d3133be12d92..430a929f048b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -216,6 +216,7 @@ enum { MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30, MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, + MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, }; enum { @@ -267,12 +268,14 @@ enum { MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, + MLX4_BMME_FLAG_ROCE_V1_V2 = 1 << 19, MLX4_BMME_FLAG_PORT_REMAP = 1 << 24, MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28, }; enum { - MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP + MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP, + MLX4_FLAG_ROCE_V1_V2 = MLX4_BMME_FLAG_ROCE_V1_V2 }; enum mlx4_event { @@ -979,14 +982,11 @@ struct mlx4_mad_ifc { for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if ((type) == (dev)->caps.port_mask[(port)]) -#define mlx4_foreach_non_ib_transport_port(port, dev) \ - for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ - if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB)) - #define mlx4_foreach_ib_transport_port(port, dev) \ - for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ - ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) + ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \ + ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)) #define MLX4_INVALID_SLAVE_ID 0xFF #define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1) @@ -1457,6 +1457,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis); +int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port); int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2); int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index fe052e234906..587cdf943b52 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -194,7 +194,7 @@ struct mlx4_qp_context { u8 mtu_msgmax; u8 rq_size_stride; u8 sq_size_stride; - u8 rlkey; + u8 rlkey_roce_mode; __be32 usr_page; __be32 local_qpn; __be32 remote_qpn; @@ -204,7 +204,8 @@ struct mlx4_qp_context { u32 reserved1; __be32 next_send_psn; __be32 cqn_send; - u32 reserved2[2]; + __be16 roce_entropy; + __be16 reserved2[3]; __be32 last_acked_psn; __be32 ssn; __be32 params2; @@ -487,4 +488,14 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp); +static inline u16 folded_qp(u32 q) +{ + u16 res; + + res = ((q & 0xff) ^ ((q & 0xff0000) >> 16)) | (q & 0xff00); + return res; +} + +u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn); + #endif /* MLX4_QP_H */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 7be845e30689..987764afa65c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -223,6 +223,14 @@ enum { #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT +#define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8) + +enum { + MLX5_EVENT_QUEUE_TYPE_QP = 0, + MLX5_EVENT_QUEUE_TYPE_RQ = 1, + MLX5_EVENT_QUEUE_TYPE_SQ = 2, +}; + enum mlx5_event { MLX5_EVENT_TYPE_COMP = 0x0, @@ -280,6 +288,26 @@ enum { }; enum { + MLX5_ROCE_VERSION_1 = 0, + MLX5_ROCE_VERSION_2 = 2, +}; + +enum { + MLX5_ROCE_VERSION_1_CAP = 1 << MLX5_ROCE_VERSION_1, + MLX5_ROCE_VERSION_2_CAP = 1 << MLX5_ROCE_VERSION_2, +}; + +enum { + MLX5_ROCE_L3_TYPE_IPV4 = 0, + MLX5_ROCE_L3_TYPE_IPV6 = 1, +}; + +enum { + MLX5_ROCE_L3_TYPE_IPV4_CAP = 1 << 1, + MLX5_ROCE_L3_TYPE_IPV6_CAP = 1 << 2, +}; + +enum { MLX5_OPCODE_NOP = 0x00, MLX5_OPCODE_SEND_INVAL = 0x01, MLX5_OPCODE_RDMA_WRITE = 0x08, @@ -446,7 +474,7 @@ struct mlx5_init_seg { __be32 rsvd2[880]; __be32 internal_timer_h; __be32 internal_timer_l; - __be32 rsrv3[2]; + __be32 rsvd3[2]; __be32 health_counter; __be32 rsvd4[1019]; __be64 ieee1588_clk; @@ -460,7 +488,9 @@ struct mlx5_eqe_comp { }; struct mlx5_eqe_qp_srq { - __be32 reserved[6]; + __be32 reserved1[5]; + u8 type; + u8 reserved2[3]; __be32 qp_srq_n; }; @@ -651,6 +681,12 @@ enum { }; enum { + MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH = 0x0, + MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6 = 0x1, + MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4 = 0x2, +}; + +enum { CQE_L2_OK = 1 << 0, CQE_L3_OK = 1 << 1, CQE_L4_OK = 1 << 2, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5162f3533042..1e3006dcf35d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -115,6 +115,11 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, }; +enum { + MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0, + MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1, +}; + enum mlx5_page_fault_resume_flags { MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0, MLX5_PAGE_FAULT_RESUME_WRITE = 1 << 1, @@ -341,9 +346,11 @@ struct mlx5_core_mr { }; enum mlx5_res_type { - MLX5_RES_QP, - MLX5_RES_SRQ, - MLX5_RES_XSRQ, + MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP, + MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ, + MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ, + MLX5_RES_SRQ = 3, + MLX5_RES_XSRQ = 4, }; struct mlx5_core_rsc_common { @@ -651,13 +658,6 @@ extern struct workqueue_struct *mlx5_core_wq; .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field -struct ib_field { - size_t struct_offset_bytes; - size_t struct_size_bytes; - int offset_bits; - int size_bits; -}; - static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) { return pci_get_drvdata(pdev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 68d73f82e009..231ab6bcea76 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -67,6 +67,11 @@ enum { }; enum { + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, + MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, +}; + +enum { MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, MLX5_CMD_OP_QUERY_ADAPTER = 0x101, MLX5_CMD_OP_INIT_HCA = 0x102, @@ -573,21 +578,24 @@ enum { struct mlx5_ifc_atomic_caps_bits { u8 reserved_0[0x40]; - u8 atomic_req_endianness[0x1]; - u8 reserved_1[0x1f]; + u8 atomic_req_8B_endianess_mode[0x2]; + u8 reserved_1[0x4]; + u8 supported_atomic_req_8B_endianess_mode_1[0x1]; - u8 reserved_2[0x20]; + u8 reserved_2[0x19]; - u8 reserved_3[0x10]; - u8 atomic_operations[0x10]; + u8 reserved_3[0x20]; u8 reserved_4[0x10]; - u8 atomic_size_qp[0x10]; + u8 atomic_operations[0x10]; u8 reserved_5[0x10]; + u8 atomic_size_qp[0x10]; + + u8 reserved_6[0x10]; u8 atomic_size_dc[0x10]; - u8 reserved_6[0x720]; + u8 reserved_7[0x720]; }; struct mlx5_ifc_odp_cap_bits { @@ -850,7 +858,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_66[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_67[0x40]; + u8 reserved_67[0x20]; + u8 device_frequency_mhz[0x20]; u8 device_frequency_khz[0x20]; u8 reserved_68[0x5f]; u8 cqe_zip[0x1]; @@ -2215,19 +2224,25 @@ struct mlx5_ifc_nic_vport_context_bits { u8 mtu[0x10]; - u8 reserved_3[0x640]; + u8 system_image_guid[0x40]; + u8 port_guid[0x40]; + u8 node_guid[0x40]; + + u8 reserved_3[0x140]; + u8 qkey_violation_counter[0x10]; + u8 reserved_4[0x430]; u8 promisc_uc[0x1]; u8 promisc_mc[0x1]; u8 promisc_all[0x1]; - u8 reserved_4[0x2]; + u8 reserved_5[0x2]; u8 allowed_list_type[0x3]; - u8 reserved_5[0xc]; + u8 reserved_6[0xc]; u8 allowed_list_size[0xc]; struct mlx5_ifc_mac_address_layout_bits permanent_address; - u8 reserved_6[0x20]; + u8 reserved_7[0x20]; u8 current_uc_mac_address[0][0x40]; }; @@ -4199,6 +4214,13 @@ struct mlx5_ifc_modify_tis_out_bits { u8 reserved_1[0x40]; }; +struct mlx5_ifc_modify_tis_bitmask_bits { + u8 reserved_0[0x20]; + + u8 reserved_1[0x1f]; + u8 prio[0x1]; +}; + struct mlx5_ifc_modify_tis_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; @@ -4211,7 +4233,7 @@ struct mlx5_ifc_modify_tis_in_bits { u8 reserved_3[0x20]; - u8 modify_bitmask[0x40]; + struct mlx5_ifc_modify_tis_bitmask_bits bitmask; u8 reserved_4[0x40]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index f079fb1a31f7..5b8c89ffaa58 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -85,7 +85,16 @@ enum mlx5_qp_state { MLX5_QP_STATE_ERR = 6, MLX5_QP_STATE_SQ_DRAINING = 7, MLX5_QP_STATE_SUSPENDED = 9, - MLX5_QP_NUM_STATE + MLX5_QP_NUM_STATE, + MLX5_QP_STATE, + MLX5_QP_STATE_BAD, +}; + +enum { + MLX5_SQ_STATE_NA = MLX5_SQC_STATE_ERR + 1, + MLX5_SQ_NUM_STATE = MLX5_SQ_STATE_NA + 1, + MLX5_RQ_STATE_NA = MLX5_RQC_STATE_ERR + 1, + MLX5_RQ_NUM_STATE = MLX5_RQ_STATE_NA + 1, }; enum { @@ -130,6 +139,9 @@ enum { MLX5_QP_BIT_RWE = 1 << 14, MLX5_QP_BIT_RAE = 1 << 13, MLX5_QP_BIT_RIC = 1 << 4, + MLX5_QP_BIT_CC_SLAVE_RECV = 1 << 2, + MLX5_QP_BIT_CC_SLAVE_SEND = 1 << 1, + MLX5_QP_BIT_CC_MASTER = 1 << 0 }; enum { @@ -248,8 +260,12 @@ struct mlx5_av { __be32 dqp_dct; u8 stat_rate_sl; u8 fl_mlid; - __be16 rlid; - u8 reserved0[10]; + union { + __be16 rlid; + __be16 udp_sport; + }; + u8 reserved0[4]; + u8 rmac[6]; u8 tclass; u8 hop_limit; __be32 grh_gid_fl; @@ -456,11 +472,16 @@ struct mlx5_qp_path { u8 static_rate; u8 hop_limit; __be32 tclass_flowlabel; - u8 rgid[16]; - u8 rsvd1[4]; - u8 sl; + union { + u8 rgid[16]; + u8 rip[16]; + }; + u8 f_dscp_ecn_prio; + u8 ecn_dscp; + __be16 udp_sport; + u8 dci_cfi_prio_sl; u8 port; - u8 rsvd2[6]; + u8 rmac[6]; }; struct mlx5_qp_context { @@ -620,8 +641,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, struct mlx5_create_qp_mbox_in *in, int inlen); -int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state, - enum mlx5_qp_state new_state, +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, struct mlx5_modify_qp_mbox_in *in, int sqd_event, struct mlx5_core_qp *qp); int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, @@ -639,6 +659,14 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, u8 context, int error); #endif +int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, + struct mlx5_core_qp *rq); +void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, + struct mlx5_core_qp *rq); +int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, + struct mlx5_core_qp *sq); +void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, + struct mlx5_core_qp *sq); static inline const char *mlx5_qp_type_str(int type) { diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h new file mode 100644 index 000000000000..88441f5ece25 --- /dev/null +++ b/include/linux/mlx5/transobj.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __TRANSOBJ_H__ +#define __TRANSOBJ_H__ + +#include <linux/mlx5/driver.h> + +int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn); +void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn); +int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqn); +int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen); +void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn); +int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out); +int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *sqn); +int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); +void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); +int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out); +int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tirn); +int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, + int inlen); +void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); +int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tisn); +int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in, + int inlen); +void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); +int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); +int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); +int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); + +int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqtn); +int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, + int inlen); +void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn); + +#endif /* __TRANSOBJ_H__ */ diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 638f2ca7a527..123771003e68 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -45,6 +45,11 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, u16 vport, u8 *addr); +int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, + u64 *system_image_guid); +int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); +int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, + u16 *qkey_viol_cntr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, union ib_gid *gid); @@ -85,4 +90,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, u16 vlans[], int list_size); +int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); +int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); + #endif /* __MLX5_VPORT_H__ */ diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f869807a0d0e..5322fea6fe4c 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -51,6 +51,7 @@ /* RPC/RDMA parameters and stats */ extern unsigned int svcrdma_ord; extern unsigned int svcrdma_max_requests; +extern unsigned int svcrdma_max_bc_requests; extern unsigned int svcrdma_max_req_size; extern atomic_t rdma_stat_recv; @@ -69,6 +70,7 @@ extern atomic_t rdma_stat_sq_prod; * completes. */ struct svc_rdma_op_ctxt { + struct list_head free; struct svc_rdma_op_ctxt *read_hdr; struct svc_rdma_fastreg_mr *frmr; int hdr_count; @@ -112,6 +114,7 @@ struct svc_rdma_fastreg_mr { struct list_head frmr_list; }; struct svc_rdma_req_map { + struct list_head free; unsigned long count; union { struct kvec sge[RPCSVC_MAXPAGES]; @@ -132,28 +135,32 @@ struct svcxprt_rdma { int sc_max_sge; int sc_max_sge_rd; /* max sge for read target */ - int sc_sq_depth; /* Depth of SQ */ atomic_t sc_sq_count; /* Number of SQ WR on queue */ - - int sc_max_requests; /* Depth of RQ */ + unsigned int sc_sq_depth; /* Depth of SQ */ + unsigned int sc_rq_depth; /* Depth of RQ */ + u32 sc_max_requests; /* Forward credits */ + u32 sc_max_bc_requests;/* Backward credits */ int sc_max_req_size; /* Size of each RQ WR buf */ struct ib_pd *sc_pd; atomic_t sc_dma_used; - atomic_t sc_ctxt_used; + spinlock_t sc_ctxt_lock; + struct list_head sc_ctxts; + int sc_ctxt_used; + spinlock_t sc_map_lock; + struct list_head sc_maps; + struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; - struct ib_mr *sc_phys_mr; /* MR for server memory */ int (*sc_reader)(struct svcxprt_rdma *, struct svc_rqst *, struct svc_rdma_op_ctxt *, int *, u32 *, u32, u32, u64, bool); u32 sc_dev_caps; /* distilled device caps */ - u32 sc_dma_lkey; /* local dma key */ unsigned int sc_frmr_pg_list_len; struct list_head sc_frmr_q; spinlock_t sc_frmr_q_lock; @@ -179,8 +186,18 @@ struct svcxprt_rdma { #define RPCRDMA_MAX_REQUESTS 32 #define RPCRDMA_MAX_REQ_SIZE 4096 +/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our + * current NFSv4.1 implementation supports one backchannel slot. + */ +#define RPCRDMA_MAX_BC_REQUESTS 2 + #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD +/* svc_rdma_backchannel.c */ +extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, + struct rpcrdma_msg *rmsgp, + struct xdr_buf *rcvbuf); + /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, @@ -206,6 +223,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, u32, u32, u64, bool); /* svc_rdma_sendto.c */ +extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *, + struct svc_rdma_req_map *); extern int svc_rdma_sendto(struct svc_rqst *); extern struct rpcrdma_read_chunk * svc_rdma_get_read_chunk(struct rpcrdma_msg *); @@ -214,13 +233,14 @@ extern struct rpcrdma_read_chunk * extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode); -extern int svc_rdma_post_recv(struct svcxprt_rdma *); +extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t); extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); -extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); -extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); +extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *); +extern void svc_rdma_put_req_map(struct svcxprt_rdma *, + struct svc_rdma_req_map *); extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); @@ -234,6 +254,7 @@ extern struct svc_xprt_class svc_rdma_bc_class; #endif /* svc_rdma.c */ +extern struct workqueue_struct *svc_rdma_wq; extern int svc_rdma_init(void); extern void svc_rdma_cleanup(void); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 11528591d0d7..c34c9002460c 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -83,6 +83,8 @@ struct rdma_dev_addr { int bound_dev_if; enum rdma_transport_type transport; struct net *net; + enum rdma_network_type network; + int hoplimit; }; /** @@ -91,8 +93,8 @@ struct rdma_dev_addr { * * The dev_addr->net field must be initialized. */ -int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, - u16 *vlan_id); +int rdma_translate_ip(const struct sockaddr *addr, + struct rdma_dev_addr *dev_addr, u16 *vlan_id); /** * rdma_resolve_ip - Resolve source and destination IP addresses to @@ -117,6 +119,10 @@ int rdma_resolve_ip(struct rdma_addr_client *client, struct rdma_dev_addr *addr, void *context), void *context); +int rdma_resolve_ip_route(struct sockaddr *src_addr, + const struct sockaddr *dst_addr, + struct rdma_dev_addr *addr); + void rdma_addr_cancel(struct rdma_dev_addr *addr); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, @@ -125,8 +131,10 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, int rdma_addr_size(struct sockaddr *addr); int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); -int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, - u8 *smac, u16 *vlan_id, int if_index); +int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, + const union ib_gid *dgid, + u8 *smac, u16 *vlan_id, int *if_index, + int *hoplimit); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 269a27cf0a46..e30f19bd4a41 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -60,6 +60,7 @@ int ib_get_cached_gid(struct ib_device *device, * a specified GID value occurs. * @device: The device to query. * @gid: The GID value to search for. + * @gid_type: The GID type to search for. * @ndev: In RoCE, the net device of the device. NULL means ignore. * @port_num: The port number of the device where the GID value was found. * @index: The index into the cached GID table where the GID was found. This @@ -70,6 +71,7 @@ int ib_get_cached_gid(struct ib_device *device, */ int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, + enum ib_gid_type gid_type, struct net_device *ndev, u8 *port_num, u16 *index); @@ -79,6 +81,7 @@ int ib_find_cached_gid(struct ib_device *device, * GID value occurs * @device: The device to query. * @gid: The GID value to search for. + * @gid_type: The GID type to search for. * @port_num: The port number of the device where the GID value sould be * searched. * @ndev: In RoCE, the net device of the device. Null means ignore. @@ -90,6 +93,7 @@ int ib_find_cached_gid(struct ib_device *device, */ int ib_find_cached_gid_by_port(struct ib_device *device, const union ib_gid *gid, + enum ib_gid_type gid_type, u8 port_num, struct net_device *ndev, u16 *index); diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index ec9b44dd3d80..0ff049bd9ad4 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -438,6 +438,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, /** * ib_mad_recv_handler - callback handler for a received MAD. * @mad_agent: MAD agent requesting the received MAD. + * @send_buf: Send buffer if found, else NULL * @mad_recv_wc: Received work completion information on the received MAD. * * MADs received in response to a send request operation will be handed to @@ -447,6 +448,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, * modify the data referenced by @mad_recv_wc. */ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_recv_wc); /** diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index e99d8f9a4551..0f3daae44bf9 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -41,6 +41,8 @@ enum { IB_ETH_BYTES = 14, IB_VLAN_BYTES = 4, IB_GRH_BYTES = 40, + IB_IP4_BYTES = 20, + IB_UDP_BYTES = 8, IB_BTH_BYTES = 12, IB_DETH_BYTES = 8 }; @@ -223,6 +225,27 @@ struct ib_unpacked_eth { __be16 type; }; +struct ib_unpacked_ip4 { + u8 ver; + u8 hdr_len; + u8 tos; + __be16 tot_len; + __be16 id; + __be16 frag_off; + u8 ttl; + u8 protocol; + __sum16 check; + __be32 saddr; + __be32 daddr; +}; + +struct ib_unpacked_udp { + __be16 sport; + __be16 dport; + __be16 length; + __be16 csum; +}; + struct ib_unpacked_vlan { __be16 tag; __be16 type; @@ -237,6 +260,10 @@ struct ib_ud_header { struct ib_unpacked_vlan vlan; int grh_present; struct ib_unpacked_grh grh; + int ipv4_present; + struct ib_unpacked_ip4 ip4; + int udp_present; + struct ib_unpacked_udp udp; struct ib_unpacked_bth bth; struct ib_unpacked_deth deth; int immediate_present; @@ -253,13 +280,17 @@ void ib_unpack(const struct ib_field *desc, void *buf, void *structure); -void ib_ud_header_init(int payload_bytes, - int lrh_present, - int eth_present, - int vlan_present, - int grh_present, - int immediate_present, - struct ib_ud_header *header); +__sum16 ib_ud_ip4_csum(struct ib_ud_header *header); + +int ib_ud_header_init(int payload_bytes, + int lrh_present, + int eth_present, + int vlan_present, + int grh_present, + int ip_version, + int udp_present, + int immediate_present, + struct ib_ud_header *header); int ib_ud_header_pack(struct ib_ud_header *header, void *buf); diff --git a/include/rdma/ib_pma.h b/include/rdma/ib_pma.h index a5889f18807b..2f8a65c1fca7 100644 --- a/include/rdma/ib_pma.h +++ b/include/rdma/ib_pma.h @@ -42,6 +42,7 @@ */ #define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8) #define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9) +#define IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF cpu_to_be16(1 << 10) #define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12) #define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 301969552d0a..cdc1c81aa275 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -160,6 +160,7 @@ struct ib_sa_path_rec { int ifindex; /* ignored in IB */ struct net *net; + enum ib_gid_type gid_type; }; static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec) @@ -402,6 +403,8 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, */ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, + struct net_device *ndev, + enum ib_gid_type gid_type, struct ib_ah_attr *ah_attr); /** diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 120da1d7f57e..284b00c8fea4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -49,13 +49,19 @@ #include <linux/scatterlist.h> #include <linux/workqueue.h> #include <linux/socket.h> +#include <linux/irq_poll.h> #include <uapi/linux/if_ether.h> +#include <net/ipv6.h> +#include <net/ip.h> +#include <linux/string.h> +#include <linux/slab.h> #include <linux/atomic.h> #include <linux/mmu_notifier.h> #include <asm/uaccess.h> extern struct workqueue_struct *ib_wq; +extern struct workqueue_struct *ib_comp_wq; union ib_gid { u8 raw[16]; @@ -67,7 +73,17 @@ union ib_gid { extern union ib_gid zgid; +enum ib_gid_type { + /* If link layer is Ethernet, this is RoCE V1 */ + IB_GID_TYPE_IB = 0, + IB_GID_TYPE_ROCE = 0, + IB_GID_TYPE_ROCE_UDP_ENCAP = 1, + IB_GID_TYPE_SIZE +}; + +#define ROCE_V2_UDP_DPORT 4791 struct ib_gid_attr { + enum ib_gid_type gid_type; struct net_device *ndev; }; @@ -98,6 +114,35 @@ enum rdma_protocol_type { __attribute_const__ enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type); +enum rdma_network_type { + RDMA_NETWORK_IB, + RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB, + RDMA_NETWORK_IPV4, + RDMA_NETWORK_IPV6 +}; + +static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type) +{ + if (network_type == RDMA_NETWORK_IPV4 || + network_type == RDMA_NETWORK_IPV6) + return IB_GID_TYPE_ROCE_UDP_ENCAP; + + /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */ + return IB_GID_TYPE_IB; +} + +static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type, + union ib_gid *gid) +{ + if (gid_type == IB_GID_TYPE_IB) + return RDMA_NETWORK_IB; + + if (ipv6_addr_v4mapped((struct in6_addr *)gid)) + return RDMA_NETWORK_IPV4; + else + return RDMA_NETWORK_IPV6; +} + enum rdma_link_layer { IB_LINK_LAYER_UNSPECIFIED, IB_LINK_LAYER_INFINIBAND, @@ -105,24 +150,32 @@ enum rdma_link_layer { }; enum ib_device_cap_flags { - IB_DEVICE_RESIZE_MAX_WR = 1, - IB_DEVICE_BAD_PKEY_CNTR = (1<<1), - IB_DEVICE_BAD_QKEY_CNTR = (1<<2), - IB_DEVICE_RAW_MULTI = (1<<3), - IB_DEVICE_AUTO_PATH_MIG = (1<<4), - IB_DEVICE_CHANGE_PHY_PORT = (1<<5), - IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6), - IB_DEVICE_CURR_QP_STATE_MOD = (1<<7), - IB_DEVICE_SHUTDOWN_PORT = (1<<8), - IB_DEVICE_INIT_TYPE = (1<<9), - IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10), - IB_DEVICE_SYS_IMAGE_GUID = (1<<11), - IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), - IB_DEVICE_SRQ_RESIZE = (1<<13), - IB_DEVICE_N_NOTIFY_CQ = (1<<14), - IB_DEVICE_LOCAL_DMA_LKEY = (1<<15), - IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */ - IB_DEVICE_MEM_WINDOW = (1<<17), + IB_DEVICE_RESIZE_MAX_WR = (1 << 0), + IB_DEVICE_BAD_PKEY_CNTR = (1 << 1), + IB_DEVICE_BAD_QKEY_CNTR = (1 << 2), + IB_DEVICE_RAW_MULTI = (1 << 3), + IB_DEVICE_AUTO_PATH_MIG = (1 << 4), + IB_DEVICE_CHANGE_PHY_PORT = (1 << 5), + IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6), + IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7), + IB_DEVICE_SHUTDOWN_PORT = (1 << 8), + IB_DEVICE_INIT_TYPE = (1 << 9), + IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10), + IB_DEVICE_SYS_IMAGE_GUID = (1 << 11), + IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12), + IB_DEVICE_SRQ_RESIZE = (1 << 13), + IB_DEVICE_N_NOTIFY_CQ = (1 << 14), + + /* + * This device supports a per-device lkey or stag that can be + * used without performing a memory registration for the local + * memory. Note that ULPs should never check this flag, but + * instead of use the local_dma_lkey flag in the ib_pd structure, + * which will always contain a usable lkey. + */ + IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15), + IB_DEVICE_RESERVED /* old SEND_W_INV */ = (1 << 16), + IB_DEVICE_MEM_WINDOW = (1 << 17), /* * Devices should set IB_DEVICE_UD_IP_SUM if they support * insertion of UDP and TCP checksum on outgoing UD IPoIB @@ -130,18 +183,35 @@ enum ib_device_cap_flags { * incoming messages. Setting this flag implies that the * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. */ - IB_DEVICE_UD_IP_CSUM = (1<<18), - IB_DEVICE_UD_TSO = (1<<19), - IB_DEVICE_XRC = (1<<20), - IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), - IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), - IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), - IB_DEVICE_RC_IP_CSUM = (1<<25), - IB_DEVICE_RAW_IP_CSUM = (1<<26), - IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), - IB_DEVICE_SIGNATURE_HANDOVER = (1<<30), - IB_DEVICE_ON_DEMAND_PAGING = (1<<31), + IB_DEVICE_UD_IP_CSUM = (1 << 18), + IB_DEVICE_UD_TSO = (1 << 19), + IB_DEVICE_XRC = (1 << 20), + + /* + * This device supports the IB "base memory management extension", + * which includes support for fast registrations (IB_WR_REG_MR, + * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs). This flag should + * also be set by any iWarp device which must support FRs to comply + * to the iWarp verbs spec. iWarp devices also support the + * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the + * stag. + */ + IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21), + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22), + IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23), + IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24), + IB_DEVICE_RC_IP_CSUM = (1 << 25), + IB_DEVICE_RAW_IP_CSUM = (1 << 26), + /* + * Devices should set IB_DEVICE_CROSS_CHANNEL if they + * support execution of WQEs that involve synchronization + * of I/O operations with single completion queue managed + * by hardware. + */ + IB_DEVICE_CROSS_CHANNEL = (1 << 27), + IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), + IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), + IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), }; enum ib_signature_prot_cap { @@ -184,6 +254,7 @@ struct ib_odp_caps { enum ib_cq_creation_flags { IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, + IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1, }; struct ib_cq_init_attr { @@ -393,6 +464,7 @@ union rdma_protocol_stats { #define RDMA_CORE_CAP_PROT_IB 0x00100000 #define RDMA_CORE_CAP_PROT_ROCE 0x00200000 #define RDMA_CORE_CAP_PROT_IWARP 0x00400000 +#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000 #define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ | RDMA_CORE_CAP_IB_MAD \ @@ -405,6 +477,12 @@ union rdma_protocol_stats { | RDMA_CORE_CAP_IB_CM \ | RDMA_CORE_CAP_AF_IB \ | RDMA_CORE_CAP_ETH_AH) +#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP \ + (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \ + | RDMA_CORE_CAP_IB_MAD \ + | RDMA_CORE_CAP_IB_CM \ + | RDMA_CORE_CAP_AF_IB \ + | RDMA_CORE_CAP_ETH_AH) #define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \ | RDMA_CORE_CAP_IW_CM) #define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \ @@ -519,6 +597,17 @@ struct ib_grh { union ib_gid dgid; }; +union rdma_network_hdr { + struct ib_grh ibgrh; + struct { + /* The IB spec states that if it's IPv4, the header + * is located in the last 20 bytes of the header. + */ + u8 reserved[20]; + struct iphdr roce4grh; + }; +}; + enum { IB_MULTICAST_QPN = 0xffffff }; @@ -734,7 +823,6 @@ enum ib_wc_opcode { IB_WC_RDMA_READ, IB_WC_COMP_SWAP, IB_WC_FETCH_ADD, - IB_WC_BIND_MW, IB_WC_LSO, IB_WC_LOCAL_INV, IB_WC_REG_MR, @@ -755,10 +843,14 @@ enum ib_wc_flags { IB_WC_IP_CSUM_OK = (1<<3), IB_WC_WITH_SMAC = (1<<4), IB_WC_WITH_VLAN = (1<<5), + IB_WC_WITH_NETWORK_HDR_TYPE = (1<<6), }; struct ib_wc { - u64 wr_id; + union { + u64 wr_id; + struct ib_cqe *wr_cqe; + }; enum ib_wc_status status; enum ib_wc_opcode opcode; u32 vendor_err; @@ -777,6 +869,7 @@ struct ib_wc { u8 port_num; /* valid only for DR SMPs on switches */ u8 smac[ETH_ALEN]; u16 vlan_id; + u8 network_hdr_type; }; enum ib_cq_notify_flags { @@ -866,6 +959,9 @@ enum ib_qp_type { enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + IB_QP_CREATE_CROSS_CHANNEL = 1 << 2, + IB_QP_CREATE_MANAGED_SEND = 1 << 3, + IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_SIGNATURE_EN = 1 << 6, IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, @@ -1027,7 +1123,6 @@ enum ib_wr_opcode { IB_WR_REG_MR, IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, - IB_WR_BIND_MW, IB_WR_REG_SIG_MR, /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. @@ -1062,26 +1157,16 @@ struct ib_sge { u32 lkey; }; -/** - * struct ib_mw_bind_info - Parameters for a memory window bind operation. - * @mr: A memory region to bind the memory window to. - * @addr: The address where the memory window should begin. - * @length: The length of the memory window, in bytes. - * @mw_access_flags: Access flags from enum ib_access_flags for the window. - * - * This struct contains the shared parameters for type 1 and type 2 - * memory window bind operations. - */ -struct ib_mw_bind_info { - struct ib_mr *mr; - u64 addr; - u64 length; - int mw_access_flags; +struct ib_cqe { + void (*done)(struct ib_cq *cq, struct ib_wc *wc); }; struct ib_send_wr { struct ib_send_wr *next; - u64 wr_id; + union { + u64 wr_id; + struct ib_cqe *wr_cqe; + }; struct ib_sge *sg_list; int num_sge; enum ib_wr_opcode opcode; @@ -1147,19 +1232,6 @@ static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr) return container_of(wr, struct ib_reg_wr, wr); } -struct ib_bind_mw_wr { - struct ib_send_wr wr; - struct ib_mw *mw; - /* The new rkey for the memory window. */ - u32 rkey; - struct ib_mw_bind_info bind_info; -}; - -static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr) -{ - return container_of(wr, struct ib_bind_mw_wr, wr); -} - struct ib_sig_handover_wr { struct ib_send_wr wr; struct ib_sig_attrs *sig_attrs; @@ -1175,7 +1247,10 @@ static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr) struct ib_recv_wr { struct ib_recv_wr *next; - u64 wr_id; + union { + u64 wr_id; + struct ib_cqe *wr_cqe; + }; struct ib_sge *sg_list; int num_sge; }; @@ -1190,20 +1265,10 @@ enum ib_access_flags { IB_ACCESS_ON_DEMAND = (1<<6), }; -struct ib_phys_buf { - u64 addr; - u64 size; -}; - -struct ib_mr_attr { - struct ib_pd *pd; - u64 device_virt_addr; - u64 size; - int mr_access_flags; - u32 lkey; - u32 rkey; -}; - +/* + * XXX: these are apparently used for ->rereg_user_mr, no idea why they + * are hidden here instead of a uapi header! + */ enum ib_mr_rereg_flags { IB_MR_REREG_TRANS = 1, IB_MR_REREG_PD = (1<<1), @@ -1211,18 +1276,6 @@ enum ib_mr_rereg_flags { IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1) }; -/** - * struct ib_mw_bind - Parameters for a type 1 memory window bind operation. - * @wr_id: Work request id. - * @send_flags: Flags from ib_send_flags enum. - * @bind_info: More parameters of the bind operation. - */ -struct ib_mw_bind { - u64 wr_id; - int send_flags; - struct ib_mw_bind_info bind_info; -}; - struct ib_fmr_attr { int max_pages; int max_maps; @@ -1307,6 +1360,12 @@ struct ib_ah { typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); +enum ib_poll_context { + IB_POLL_DIRECT, /* caller context, no hw completions */ + IB_POLL_SOFTIRQ, /* poll from softirq context */ + IB_POLL_WORKQUEUE, /* poll from workqueue */ +}; + struct ib_cq { struct ib_device *device; struct ib_uobject *uobject; @@ -1315,6 +1374,12 @@ struct ib_cq { void *cq_context; int cqe; atomic_t usecnt; /* count number of work queues */ + enum ib_poll_context poll_ctx; + struct ib_wc *wc; + union { + struct irq_poll iop; + struct work_struct work; + }; }; struct ib_srq { @@ -1363,7 +1428,6 @@ struct ib_mr { u64 iova; u32 length; unsigned int page_size; - atomic_t usecnt; /* count number of MWs */ }; struct ib_mw { @@ -1724,11 +1788,6 @@ struct ib_device { int wc_cnt); struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); - struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start); struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, @@ -1741,8 +1800,6 @@ struct ib_device { int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); - int (*query_mr)(struct ib_mr *mr, - struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); struct ib_mr * (*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, @@ -1750,18 +1807,8 @@ struct ib_device { int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents); - int (*rereg_phys_mr)(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start); struct ib_mw * (*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type); - int (*bind_mw)(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind); int (*dealloc_mw)(struct ib_mw *mw); struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, @@ -1823,6 +1870,7 @@ struct ib_device { u16 is_switch:1; u8 node_type; u8 phys_port_cnt; + struct ib_device_attr attrs; /** * The following mandatory functions are used only at device @@ -1888,6 +1936,31 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; } +static inline bool ib_is_udata_cleared(struct ib_udata *udata, + size_t offset, + size_t len) +{ + const void __user *p = udata->inbuf + offset; + bool ret = false; + u8 *buf; + + if (len > USHRT_MAX) + return false; + + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return false; + + if (copy_from_user(buf, p, len)) + goto free; + + ret = !memchr_inv(buf, 0, len); + +free: + kfree(buf); + return ret; +} + /** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for @@ -1912,9 +1985,6 @@ int ib_register_event_handler (struct ib_event_handler *event_handler); int ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(struct ib_event *event); -int ib_query_device(struct ib_device *device, - struct ib_device_attr *device_attr); - int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); @@ -1968,6 +2038,17 @@ static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num) { + return device->port_immutable[port_num].core_cap_flags & + (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP); +} + +static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; +} + +static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num) +{ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE; } @@ -1978,8 +2059,8 @@ static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_n static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num) { - return device->port_immutable[port_num].core_cap_flags & - (RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_PROT_ROCE); + return rdma_protocol_ib(device, port_num) || + rdma_protocol_roce(device, port_num); } /** @@ -2220,7 +2301,8 @@ int ib_modify_port(struct ib_device *device, struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, - struct net_device *ndev, u8 *port_num, u16 *index); + enum ib_gid_type gid_type, struct net_device *ndev, + u8 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); @@ -2454,6 +2536,11 @@ static inline int ib_post_recv(struct ib_qp *qp, return qp->device->post_recv(qp, recv_wr, bad_recv_wr); } +struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx); +void ib_free_cq(struct ib_cq *cq); +int ib_process_cq_direct(struct ib_cq *cq, int budget); + /** * ib_create_cq - Creates a CQ on the specified device. * @device: The device on which to create the CQ. @@ -2839,13 +2926,6 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, } /** - * ib_query_mr - Retrieves information about a specific memory region. - * @mr: The memory region to retrieve information about. - * @mr_attr: The attributes of the specified memory region. - */ -int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); - -/** * ib_dereg_mr - Deregisters a memory region and removes it from the * HCA translation table. * @mr: The memory region to deregister. @@ -2882,42 +2962,6 @@ static inline u32 ib_inc_rkey(u32 rkey) } /** - * ib_alloc_mw - Allocates a memory window. - * @pd: The protection domain associated with the memory window. - * @type: The type of the memory window (1 or 2). - */ -struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); - -/** - * ib_bind_mw - Posts a work request to the send queue of the specified - * QP, which binds the memory window to the given address range and - * remote access attributes. - * @qp: QP to post the bind work request on. - * @mw: The memory window to bind. - * @mw_bind: Specifies information about the memory window, including - * its address range, remote access rights, and associated memory region. - * - * If there is no immediate error, the function will update the rkey member - * of the mw parameter to its new value. The bind operation can still fail - * asynchronously. - */ -static inline int ib_bind_mw(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind) -{ - /* XXX reference counting in corresponding MR? */ - return mw->device->bind_mw ? - mw->device->bind_mw(qp, mw, mw_bind) : - -ENOSYS; -} - -/** - * ib_dealloc_mw - Deallocates a memory window. - * @mw: The memory window to deallocate. - */ -int ib_dealloc_mw(struct ib_mw *mw); - -/** * ib_alloc_fmr - Allocates a unmapped fast memory region. * @pd: The protection domain associated with the unmapped region. * @mr_access_flags: Specifies the memory access rights. diff --git a/include/scsi/iser.h b/include/scsi/iser.h new file mode 100644 index 000000000000..2e678fa74eca --- /dev/null +++ b/include/scsi/iser.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ISCSI_ISER_H +#define ISCSI_ISER_H + +#define ISER_ZBVA_NOT_SUP 0x80 +#define ISER_SEND_W_INV_NOT_SUP 0x40 +#define ISERT_ZBVA_NOT_USED 0x80 +#define ISERT_SEND_W_INV_NOT_USED 0x40 + +#define ISCSI_CTRL 0x10 +#define ISER_HELLO 0x20 +#define ISER_HELLORPLY 0x30 + +#define ISER_VER 0x10 +#define ISER_WSV 0x08 +#define ISER_RSV 0x04 + +/** + * struct iser_cm_hdr - iSER CM header (from iSER Annex A12) + * + * @flags: flags support (zbva, send_w_inv) + * @rsvd: reserved + */ +struct iser_cm_hdr { + u8 flags; + u8 rsvd[3]; +} __packed; + +/** + * struct iser_ctrl - iSER header of iSCSI control PDU + * + * @flags: opcode and read/write valid bits + * @rsvd: reserved + * @write_stag: write rkey + * @write_va: write virtual address + * @reaf_stag: read rkey + * @read_va: read virtual address + */ +struct iser_ctrl { + u8 flags; + u8 rsvd[3]; + __be32 write_stag; + __be64 write_va; + __be32 read_stag; + __be64 read_va; +} __packed; + +#endif /* ISCSI_ISER_H */ diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index ff8f6c091a15..f95f25e786ef 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -15,7 +15,7 @@ struct softirq_action; softirq_name(NET_TX) \ softirq_name(NET_RX) \ softirq_name(BLOCK) \ - softirq_name(BLOCK_IOPOLL) \ + softirq_name(IRQ_POLL) \ softirq_name(TASKLET) \ softirq_name(SCHED) \ softirq_name(HRTIMER) \ |