summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/hmm-dma.h33
-rw-r--r--include/linux/hmm.h24
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/net/mana/gdma.h47
-rw-r--r--include/net/mana/hw_channel.h9
-rw-r--r--include/net/mana/mana.h3
-rw-r--r--include/rdma/ib_cm.h17
-rw-r--r--include/rdma/ib_umem_odp.h25
-rw-r--r--include/rdma/ib_verbs.h18
-rw-r--r--include/rdma/rdma_cm.h1
-rw-r--r--include/uapi/rdma/ib_user_verbs.h16
11 files changed, 132 insertions, 62 deletions
diff --git a/include/linux/hmm-dma.h b/include/linux/hmm-dma.h
new file mode 100644
index 000000000000..f58b9fc71999
--- /dev/null
+++ b/include/linux/hmm-dma.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
+#ifndef LINUX_HMM_DMA_H
+#define LINUX_HMM_DMA_H
+
+#include <linux/dma-mapping.h>
+
+struct dma_iova_state;
+struct pci_p2pdma_map_state;
+
+/*
+ * struct hmm_dma_map - array of PFNs and DMA addresses
+ *
+ * @state: DMA IOVA state
+ * @pfns: array of PFNs
+ * @dma_list: array of DMA addresses
+ * @dma_entry_size: size of each DMA entry in the array
+ */
+struct hmm_dma_map {
+ struct dma_iova_state state;
+ unsigned long *pfn_list;
+ dma_addr_t *dma_list;
+ size_t dma_entry_size;
+};
+
+int hmm_dma_map_alloc(struct device *dev, struct hmm_dma_map *map,
+ size_t nr_entries, size_t dma_entry_size);
+void hmm_dma_map_free(struct device *dev, struct hmm_dma_map *map);
+dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map,
+ size_t idx,
+ struct pci_p2pdma_map_state *p2pdma_state);
+bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx);
+#endif /* LINUX_HMM_DMA_H */
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 126a36571667..db75ffc949a7 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -23,6 +23,10 @@ struct mmu_interval_notifier;
* HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID)
* HMM_PFN_ERROR - accessing the pfn is impossible and the device should
* fail. ie poisoned memory, special pages, no vma, etc
+ * HMM_PFN_P2PDMA - P2P page
+ * HMM_PFN_P2PDMA_BUS - Bus mapped P2P transfer
+ * HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation
+ * to mark that page is already DMA mapped
*
* On input:
* 0 - Return the current state of the page, do not fault it.
@@ -36,13 +40,21 @@ enum hmm_pfn_flags {
HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1),
HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2),
HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3),
- HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 8),
+ /*
+ * Sticky flags, carried from input to output,
+ * don't forget to update HMM_PFN_INOUT_FLAGS
+ */
+ HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 4),
+ HMM_PFN_P2PDMA = 1UL << (BITS_PER_LONG - 5),
+ HMM_PFN_P2PDMA_BUS = 1UL << (BITS_PER_LONG - 6),
+
+ HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 11),
/* Input flags */
HMM_PFN_REQ_FAULT = HMM_PFN_VALID,
HMM_PFN_REQ_WRITE = HMM_PFN_WRITE,
- HMM_PFN_FLAGS = 0xFFUL << HMM_PFN_ORDER_SHIFT,
+ HMM_PFN_FLAGS = ~((1UL << HMM_PFN_ORDER_SHIFT) - 1),
};
/*
@@ -58,6 +70,14 @@ static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn)
}
/*
+ * hmm_pfn_to_phys() - return physical address pointed to by a device entry
+ */
+static inline phys_addr_t hmm_pfn_to_phys(unsigned long hmm_pfn)
+{
+ return __pfn_to_phys(hmm_pfn & ~HMM_PFN_FLAGS);
+}
+
+/*
* hmm_pfn_to_map_order() - return the CPU mapping size order
*
* This is optionally useful to optimize processing of the pfn result
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d1dfbad9a447..e6ba8f4f4bd1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -398,6 +398,7 @@ struct mlx5_core_rsc_common {
enum mlx5_res_type res;
refcount_t refcount;
struct completion free;
+ bool invalid;
};
struct mlx5_uars_page {
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 228603bf03f2..3ce56a816425 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -60,6 +60,7 @@ enum gdma_eqe_type {
GDMA_EQE_HWC_INIT_DONE = 131,
GDMA_EQE_HWC_SOC_RECONFIG = 132,
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
+ GDMA_EQE_HWC_SOC_SERVICE = 134,
GDMA_EQE_RNIC_QP_FATAL = 176,
};
@@ -70,6 +71,18 @@ enum {
GDMA_DEVICE_MANA_IB = 3,
};
+enum gdma_service_type {
+ GDMA_SERVICE_TYPE_NONE = 0,
+ GDMA_SERVICE_TYPE_RDMA_SUSPEND = 1,
+ GDMA_SERVICE_TYPE_RDMA_RESUME = 2,
+};
+
+struct mana_service_work {
+ struct work_struct work;
+ struct gdma_dev *gdma_dev;
+ enum gdma_service_type event;
+};
+
struct gdma_resource {
/* Protect the bitmap */
spinlock_t lock;
@@ -224,6 +237,8 @@ struct gdma_dev {
void *driver_data;
struct auxiliary_device *adev;
+ bool is_suspended;
+ bool rdma_teardown;
};
/* MANA_PAGE_SIZE is the DMA unit */
@@ -407,6 +422,10 @@ struct gdma_context {
/* Azure RDMA adapter */
struct gdma_dev mana_ib;
+
+ u64 pf_cap_flags1;
+
+ struct workqueue_struct *service_wq;
};
static inline bool mana_gd_is_mana(struct gdma_dev *gd)
@@ -553,6 +572,7 @@ enum {
*/
#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
+#define GDMA_DRV_CAP_FLAG_1_GDMA_PAGES_4MB_1GB_2GB BIT(4)
#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5)
/* Driver can handle holes (zeros) in the device list */
@@ -707,20 +727,6 @@ struct gdma_query_hwc_timeout_resp {
u32 reserved;
};
-enum atb_page_size {
- ATB_PAGE_SIZE_4K,
- ATB_PAGE_SIZE_8K,
- ATB_PAGE_SIZE_16K,
- ATB_PAGE_SIZE_32K,
- ATB_PAGE_SIZE_64K,
- ATB_PAGE_SIZE_128K,
- ATB_PAGE_SIZE_256K,
- ATB_PAGE_SIZE_512K,
- ATB_PAGE_SIZE_1M,
- ATB_PAGE_SIZE_2M,
- ATB_PAGE_SIZE_MAX,
-};
-
enum gdma_mr_access_flags {
GDMA_ACCESS_FLAG_LOCAL_READ = BIT_ULL(0),
GDMA_ACCESS_FLAG_LOCAL_WRITE = BIT_ULL(1),
@@ -815,6 +821,8 @@ enum gdma_mr_type {
* address that is set up in the MST
*/
GDMA_MR_TYPE_GVA = 2,
+ /* Guest zero-based address MRs */
+ GDMA_MR_TYPE_ZBVA = 4,
};
struct gdma_create_mr_params {
@@ -826,6 +834,10 @@ struct gdma_create_mr_params {
u64 virtual_address;
enum gdma_mr_access_flags access_flags;
} gva;
+ struct {
+ u64 dma_region_handle;
+ enum gdma_mr_access_flags access_flags;
+ } zbva;
};
};
@@ -841,7 +853,10 @@ struct gdma_create_mr_request {
u64 virtual_address;
enum gdma_mr_access_flags access_flags;
} gva;
-
+ struct {
+ u64 dma_region_handle;
+ enum gdma_mr_access_flags access_flags;
+ } zbva;
};
u32 reserved_2;
};/* HW DATA */
@@ -893,4 +908,6 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle);
void mana_register_debugfs(void);
void mana_unregister_debugfs(void);
+int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event);
+
#endif /* _GDMA_H */
diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h
index 158b125692c2..83cf93338eb3 100644
--- a/include/net/mana/hw_channel.h
+++ b/include/net/mana/hw_channel.h
@@ -49,6 +49,15 @@ union hwc_init_type_data {
};
}; /* HW DATA */
+union hwc_init_soc_service_type {
+ u32 as_uint32;
+
+ struct {
+ u32 value : 28;
+ u32 type : 4;
+ };
+}; /* HW DATA */
+
struct hwc_rx_oob {
u32 type : 6;
u32 eom : 1;
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 38238c1d00bf..9abb66461211 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -489,6 +489,9 @@ int mana_detach(struct net_device *ndev, bool from_close);
int mana_probe(struct gdma_dev *gd, bool resuming);
void mana_remove(struct gdma_dev *gd, bool suspending);
+int mana_rdma_probe(struct gdma_dev *gd);
+void mana_rdma_remove(struct gdma_dev *gd);
+
void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev);
int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames,
u32 flags);
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index a2ac62b4a6cf..1fa3786f82f4 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -480,23 +480,12 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
const void *private_data,
u8 private_data_len);
-#define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */
-
/**
- * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection
- * message.
+ * ib_prepare_cm_mra - Prepares to send a message receipt acknowledgment to a
+ connection message in case duplicates are received.
* @cm_id: Connection identifier associated with the connection message.
- * @service_timeout: The lower 5-bits specify the maximum time required for
- * the sender to reply to the connection message. The upper 3-bits
- * specify additional control flags.
- * @private_data: Optional user-defined private data sent with the
- * message receipt acknowledgement.
- * @private_data_len: Size of the private data buffer, in bytes.
*/
-int ib_send_cm_mra(struct ib_cm_id *cm_id,
- u8 service_timeout,
- const void *private_data,
- u8 private_data_len);
+int ib_prepare_cm_mra(struct ib_cm_id *cm_id);
/**
* ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 0844c1d05ac6..2a24bf791c10 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -8,23 +8,17 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_verbs.h>
+#include <linux/hmm-dma.h>
struct ib_umem_odp {
struct ib_umem umem;
struct mmu_interval_notifier notifier;
struct pid *tgid;
- /* An array of the pfns included in the on-demand paging umem. */
- unsigned long *pfn_list;
+ struct hmm_dma_map map;
/*
- * An array with DMA addresses mapped for pfns in pfn_list.
- * The lower two bits designate access permissions.
- * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT.
- */
- dma_addr_t *dma_list;
- /*
- * The umem_mutex protects the page_list and dma_list fields of an ODP
+ * The umem_mutex protects the page_list field of an ODP
* umem, allowing only a single thread to map/unmap pages. The mutex
* also protects access to the mmu notifier counters.
*/
@@ -67,19 +61,6 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
umem_odp->page_shift;
}
-/*
- * The lower 2 bits of the DMA address signal the R/W permissions for
- * the entry. To upgrade the permissions, provide the appropriate
- * bitmask to the map_dma_pages function.
- *
- * Be aware that upgrading a mapped address might result in change of
- * the DMA address for the page.
- */
-#define ODP_READ_ALLOWED_BIT (1<<0ULL)
-#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
-
-#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
-
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_umem_odp *
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 901353796fbb..af43a8d2a74a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -314,17 +314,19 @@ enum ib_atomic_cap {
};
enum ib_odp_general_cap_bits {
- IB_ODP_SUPPORT = 1 << 0,
- IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
+ IB_ODP_SUPPORT = IB_UVERBS_ODP_SUPPORT,
+ IB_ODP_SUPPORT_IMPLICIT = IB_UVERBS_ODP_SUPPORT_IMPLICIT,
};
enum ib_odp_transport_cap_bits {
- IB_ODP_SUPPORT_SEND = 1 << 0,
- IB_ODP_SUPPORT_RECV = 1 << 1,
- IB_ODP_SUPPORT_WRITE = 1 << 2,
- IB_ODP_SUPPORT_READ = 1 << 3,
- IB_ODP_SUPPORT_ATOMIC = 1 << 4,
- IB_ODP_SUPPORT_SRQ_RECV = 1 << 5,
+ IB_ODP_SUPPORT_SEND = IB_UVERBS_ODP_SUPPORT_SEND,
+ IB_ODP_SUPPORT_RECV = IB_UVERBS_ODP_SUPPORT_RECV,
+ IB_ODP_SUPPORT_WRITE = IB_UVERBS_ODP_SUPPORT_WRITE,
+ IB_ODP_SUPPORT_READ = IB_UVERBS_ODP_SUPPORT_READ,
+ IB_ODP_SUPPORT_ATOMIC = IB_UVERBS_ODP_SUPPORT_ATOMIC,
+ IB_ODP_SUPPORT_SRQ_RECV = IB_UVERBS_ODP_SUPPORT_SRQ_RECV,
+ IB_ODP_SUPPORT_FLUSH = IB_UVERBS_ODP_SUPPORT_FLUSH,
+ IB_ODP_SUPPORT_ATOMIC_WRITE = IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE,
};
struct ib_odp_caps {
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 8a8ab2f793ab..d1593ad47e28 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -388,6 +388,5 @@ void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
union ib_gid *dgid);
struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *cm_id);
-struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res);
#endif /* RDMA_CM_H */
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index e16650f0c85d..3b7bd99813e9 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -233,6 +233,22 @@ struct ib_uverbs_ex_query_device {
__u32 reserved;
};
+enum ib_uverbs_odp_general_cap_bits {
+ IB_UVERBS_ODP_SUPPORT = 1 << 0,
+ IB_UVERBS_ODP_SUPPORT_IMPLICIT = 1 << 1,
+};
+
+enum ib_uverbs_odp_transport_cap_bits {
+ IB_UVERBS_ODP_SUPPORT_SEND = 1 << 0,
+ IB_UVERBS_ODP_SUPPORT_RECV = 1 << 1,
+ IB_UVERBS_ODP_SUPPORT_WRITE = 1 << 2,
+ IB_UVERBS_ODP_SUPPORT_READ = 1 << 3,
+ IB_UVERBS_ODP_SUPPORT_ATOMIC = 1 << 4,
+ IB_UVERBS_ODP_SUPPORT_SRQ_RECV = 1 << 5,
+ IB_UVERBS_ODP_SUPPORT_FLUSH = 1 << 6,
+ IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE = 1 << 7,
+};
+
struct ib_uverbs_odp_caps {
__aligned_u64 general_caps;
struct {