From 292841b09648ce7aee5df16ab72581f3b6c2bd7a Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 24 May 2016 02:14:05 +1000 Subject: cxl: Update process element after allocating interrupts In the kernel API, it is possible to attempt to allocate AFU interrupts after already starting a context. Since the process element structure used by the hardware is only filled out at the time the context is started, it will not be updated with the interrupt numbers that have just been allocated and therefore AFU interrupts will not work unless they were allocated prior to starting the context. This can present some difficulties as each CAPI enabled PCI device in the kernel API has a default context, which may need to be started very early to enable translations, potentially before interrupts can easily be set up. This patch makes the API more flexible to allow interrupts to be allocated after a context has already been started and takes care of updating the PE structure used by the hardware and notifying it to discard any cached copy it may have. The update is currently performed via a terminate/remove/add sequence. This is necessary on some hardware such as the XSL that does not properly support the update LLCMD. Note that this is only supported on powernv at present - attempting to perform this ordering on PowerVM will raise a warning. Signed-off-by: Ian Munsie Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/misc/cxl/api.c') diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 6d228ccd884d..99081b821f12 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -102,7 +102,10 @@ int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) if (num == 0) num = ctx->afu->pp_irqs; res = afu_allocate_irqs(ctx, num); - if (!res && !cpu_has_feature(CPU_FTR_HVMODE)) { + if (res) + return res; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) { /* In a guest, the PSL interrupt is not multiplexed. It was * allocated above, and we need to set its handler */ @@ -110,6 +113,13 @@ int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) if (hwirq) cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl"); } + + if (ctx->status == STARTED) { + if (cxl_ops->update_ivtes) + cxl_ops->update_ivtes(ctx); + else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n"); + } + return res; } EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); -- cgit v1.2.3 From b810253bd9342f863a86ec7dfff4a5a7a0394d2f Mon Sep 17 00:00:00 2001 From: Philippe Bergheaud Date: Thu, 23 Jun 2016 15:03:53 +0200 Subject: cxl: Add mechanism for delivering AFU driver specific events This adds an afu_driver_ops structure with fetch_event() and event_delivered() callbacks. An AFU driver such as cxlflash can fill this out and associate it with a context to enable passing custom AFU specific events to userspace. This also adds a new kernel API function cxl_context_pending_events(), that the AFU driver can use to notify the cxl driver that new specific events are ready to be delivered, and wake up anyone waiting on the context wait queue. The current count of AFU driver specific events is stored in the field afu_driver_events of the context structure. The cxl driver checks the afu_driver_events count during poll, select, read, etc. calls to check if an AFU driver specific event is pending, and calls fetch_event() to obtain and deliver that event. This way, the cxl driver takes care of all the usual locking semantics around these calls and handles all the generic cxl events, so that the AFU driver only needs to worry about it's own events. fetch_event() return a struct cxl_event_afu_driver_reserved, allocated by the AFU driver, and filled in with the specific event information and size. Total event size (header + data) should not be greater than CXL_READ_MIN_SIZE (4K). Th cxl driver prepends an appropriate cxl event header, copies the event to userspace, and finally calls event_delivered() to return the status of the operation to the AFU driver. The event is identified by the context and cxl_event_afu_driver_reserved pointers. Since AFU drivers provide their own means for userspace to obtain the AFU file descriptor (i.e. cxlflash uses an ioctl on their scsi file descriptor to obtain the AFU file descriptor) and the generic cxl driver will never use this event, the ABI of the event is up to each individual AFU driver. Signed-off-by: Philippe Bergheaud Signed-off-by: Michael Ellerman --- drivers/misc/cxl/Kconfig | 5 ++++ drivers/misc/cxl/api.c | 17 +++++++++++++ drivers/misc/cxl/cxl.h | 7 +++++- drivers/misc/cxl/file.c | 64 ++++++++++++++++++++++++++++++++++++++++++------ include/misc/cxl.h | 48 ++++++++++++++++++++++++++++++++++++ include/uapi/misc/cxl.h | 17 +++++++++++++ 6 files changed, 149 insertions(+), 9 deletions(-) (limited to 'drivers/misc/cxl/api.c') diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index 8756d06e2bb8..560412cd4c98 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -15,12 +15,17 @@ config CXL_EEH bool default n +config CXL_AFU_DRIVER_OPS + bool + default n + config CXL tristate "Support for IBM Coherent Accelerators (CXL)" depends on PPC_POWERNV && PCI_MSI && EEH select CXL_BASE select CXL_KERNEL_API select CXL_EEH + select CXL_AFU_DRIVER_OPS default m help Select this option to enable driver support for IBM Coherent diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 99081b821f12..f11dc0e5fdd6 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -333,6 +333,23 @@ struct cxl_context *cxl_fops_get_context(struct file *file) } EXPORT_SYMBOL_GPL(cxl_fops_get_context); +void cxl_set_driver_ops(struct cxl_context *ctx, + struct cxl_afu_driver_ops *ops) +{ + WARN_ON(!ops->fetch_event || !ops->event_delivered); + atomic_set(&ctx->afu_driver_events, 0); + ctx->afu_driver_ops = ops; +} +EXPORT_SYMBOL_GPL(cxl_set_driver_ops); + +void cxl_context_events_pending(struct cxl_context *ctx, + unsigned int new_events) +{ + atomic_add(new_events, &ctx->afu_driver_events); + wake_up_all(&ctx->wq); +} +EXPORT_SYMBOL_GPL(cxl_context_events_pending); + int cxl_start_work(struct cxl_context *ctx, struct cxl_ioctl_start_work *work) { diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index ce2b9d513069..422ee53868a8 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -24,6 +24,7 @@ #include #include +#include #include extern uint cxl_verbose; @@ -34,7 +35,7 @@ extern uint cxl_verbose; * Bump version each time a user API change is made, whether it is * backwards compatible ot not. */ -#define CXL_API_VERSION 2 +#define CXL_API_VERSION 3 #define CXL_API_VERSION_COMPATIBLE 1 /* @@ -528,6 +529,10 @@ struct cxl_context { bool pending_fault; bool pending_afu_err; + /* Used by AFU drivers for driver specific event delivery */ + struct cxl_afu_driver_ops *afu_driver_ops; + atomic_t afu_driver_events; + struct rcu_head rcu; }; diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index eec468f1612f..5fb9894b157f 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -293,6 +293,17 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm) return cxl_context_iomap(ctx, vm); } +static inline bool ctx_event_pending(struct cxl_context *ctx) +{ + if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err) + return true; + + if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) + return true; + + return false; +} + unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) { struct cxl_context *ctx = file->private_data; @@ -305,8 +316,7 @@ unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) pr_devel("afu_poll wait done pe: %i\n", ctx->pe); spin_lock_irqsave(&ctx->lock, flags); - if (ctx->pending_irq || ctx->pending_fault || - ctx->pending_afu_err) + if (ctx_event_pending(ctx)) mask |= POLLIN | POLLRDNORM; else if (ctx->status == CLOSED) /* Only error on closed when there are no futher events pending @@ -319,16 +329,46 @@ unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) return mask; } -static inline int ctx_event_pending(struct cxl_context *ctx) +static ssize_t afu_driver_event_copy(struct cxl_context *ctx, + char __user *buf, + struct cxl_event *event, + struct cxl_event_afu_driver_reserved *pl) { - return (ctx->pending_irq || ctx->pending_fault || - ctx->pending_afu_err || (ctx->status == CLOSED)); + /* Check event */ + if (!pl) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL); + return -EFAULT; + } + + /* Check event size */ + event->header.size += pl->data_size; + if (event->header.size > CXL_READ_MIN_SIZE) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL); + return -EFAULT; + } + + /* Copy event header */ + if (copy_to_user(buf, event, sizeof(struct cxl_event_header))) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT); + return -EFAULT; + } + + /* Copy event data */ + buf += sizeof(struct cxl_event_header); + if (copy_to_user(buf, &pl->data, pl->data_size)) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT); + return -EFAULT; + } + + ctx->afu_driver_ops->event_delivered(ctx, pl, 0); /* Success */ + return event->header.size; } ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off) { struct cxl_context *ctx = file->private_data; + struct cxl_event_afu_driver_reserved *pl = NULL; struct cxl_event event; unsigned long flags; int rc; @@ -344,7 +384,7 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, for (;;) { prepare_to_wait(&ctx->wq, &wait, TASK_INTERRUPTIBLE); - if (ctx_event_pending(ctx)) + if (ctx_event_pending(ctx) || (ctx->status == CLOSED)) break; if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { @@ -374,7 +414,12 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, memset(&event, 0, sizeof(event)); event.header.process_element = ctx->pe; event.header.size = sizeof(struct cxl_event_header); - if (ctx->pending_irq) { + if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) { + pr_devel("afu_read delivering AFU driver specific event\n"); + pl = ctx->afu_driver_ops->fetch_event(ctx); + atomic_dec(&ctx->afu_driver_events); + event.header.type = CXL_EVENT_AFU_DRIVER; + } else if (ctx->pending_irq) { pr_devel("afu_read delivering AFU interrupt\n"); event.header.size += sizeof(struct cxl_event_afu_interrupt); event.header.type = CXL_EVENT_AFU_INTERRUPT; @@ -404,6 +449,9 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, spin_unlock_irqrestore(&ctx->lock, flags); + if (event.header.type == CXL_EVENT_AFU_DRIVER) + return afu_driver_event_copy(ctx, buf, &event, pl); + if (copy_to_user(buf, &event, event.header.size)) return -EFAULT; return event.header.size; @@ -558,7 +606,7 @@ int __init cxl_file_init(void) * If these change we really need to update API. Either change some * flags or update API version number CXL_API_VERSION. */ - BUILD_BUG_ON(CXL_API_VERSION != 2); + BUILD_BUG_ON(CXL_API_VERSION != 3); BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64); BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8); BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8); diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 56560c5781b4..17419f61e611 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -220,4 +220,52 @@ void cxl_perst_reloads_same_image(struct cxl_afu *afu, */ ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count); +/* + * AFU driver ops allow an AFU driver to create their own events to pass to + * userspace through the file descriptor as a simpler alternative to overriding + * the read() and poll() calls that works with the generic cxl events. These + * events are given priority over the generic cxl events, so they will be + * delivered first if multiple types of events are pending. + * + * The AFU driver must call cxl_context_events_pending() to notify the cxl + * driver that new events are ready to be delivered for a specific context. + * cxl_context_events_pending() will adjust the current count of AFU driver + * events for this context, and wake up anyone waiting on the context wait + * queue. + * + * The cxl driver will then call fetch_event() to get a structure defining + * the size and address of the driver specific event data. The cxl driver + * will build a cxl header with type and process_element fields filled in, + * and header.size set to sizeof(struct cxl_event_header) + data_size. + * The total size of the event is limited to CXL_READ_MIN_SIZE (4K). + * + * fetch_event() is called with a spin lock held, so it must not sleep. + * + * The cxl driver will then deliver the event to userspace, and finally + * call event_delivered() to return the status of the operation, identified + * by cxl context and AFU driver event data pointers. + * 0 Success + * -EFAULT copy_to_user() has failed + * -EINVAL Event data pointer is NULL, or event size is greater than + * CXL_READ_MIN_SIZE. + */ +struct cxl_afu_driver_ops { + struct cxl_event_afu_driver_reserved *(*fetch_event) ( + struct cxl_context *ctx); + void (*event_delivered) (struct cxl_context *ctx, + struct cxl_event_afu_driver_reserved *event, + int rc); +}; + +/* + * Associate the above driver ops with a specific context. + * Reset the current count of AFU driver events. + */ +void cxl_set_driver_ops(struct cxl_context *ctx, + struct cxl_afu_driver_ops *ops); + +/* Notify cxl driver that new events are ready to be delivered for context */ +void cxl_context_events_pending(struct cxl_context *ctx, + unsigned int new_events); + #endif /* _MISC_CXL_H */ diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index 8cd334f99ddc..cbae529b7ce0 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -93,6 +93,7 @@ enum cxl_event_type { CXL_EVENT_AFU_INTERRUPT = 1, CXL_EVENT_DATA_STORAGE = 2, CXL_EVENT_AFU_ERROR = 3, + CXL_EVENT_AFU_DRIVER = 4, }; struct cxl_event_header { @@ -124,12 +125,28 @@ struct cxl_event_afu_error { __u64 error; }; +struct cxl_event_afu_driver_reserved { + /* + * Defines the buffer passed to the cxl driver by the AFU driver. + * + * This is not ABI since the event header.size passed to the user for + * existing events is set in the read call to sizeof(cxl_event_header) + * + sizeof(whatever event is being dispatched) and the user is already + * required to use a 4K buffer on the read call. + * + * Of course the contents will be ABI, but that's up the AFU driver. + */ + size_t data_size; + u8 data[]; +}; + struct cxl_event { struct cxl_event_header header; union { struct cxl_event_afu_interrupt irq; struct cxl_event_data_storage fault; struct cxl_event_afu_error afu_error; + struct cxl_event_afu_driver_reserved afu_driver_event; }; }; -- cgit v1.2.3 From ad42de859ff14c079e966e61cbcba85265b982e1 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 24 Jun 2016 08:47:07 +0200 Subject: cxl: Add set and get private data to context struct This provides AFU drivers a means to associate private data with a cxl context. This is particularly intended for make the new callbacks for driver specific events easier for AFU drivers to use, as they can easily get back to any private data structures they may use. Signed-off-by: Michael Neuling Signed-off-by: Ian Munsie Signed-off-by: Philippe Bergheaud Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 21 +++++++++++++++++++++ drivers/misc/cxl/cxl.h | 3 +++ include/misc/cxl.h | 7 +++++++ 3 files changed, 31 insertions(+) (limited to 'drivers/misc/cxl/api.c') diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index f11dc0e5fdd6..7707055d33ab 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -94,6 +94,27 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) return 0; } + +int cxl_set_priv(struct cxl_context *ctx, void *priv) +{ + if (!ctx) + return -EINVAL; + + ctx->priv = priv; + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_set_priv); + +void *cxl_get_priv(struct cxl_context *ctx) +{ + if (!ctx) + return ERR_PTR(-EINVAL); + + return ctx->priv; +} +EXPORT_SYMBOL_GPL(cxl_get_priv); + int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) { int res; diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 422ee53868a8..27578fceda8a 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -484,6 +484,9 @@ struct cxl_context { /* Only used in PR mode */ u64 process_token; + /* driver private data */ + void *priv; + unsigned long *irq_bitmap; /* Accessed from IRQ context */ struct cxl_irq_ranges irqs; struct list_head irq_names; diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 17419f61e611..b6d040f31f76 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -85,6 +85,13 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev); */ int cxl_release_context(struct cxl_context *ctx); +/* + * Set and get private data associated with a context. Allows drivers to have a + * back pointer to some useful structure. + */ +int cxl_set_priv(struct cxl_context *ctx, void *priv); +void *cxl_get_priv(struct cxl_context *ctx); + /* * Allocate AFU interrupts for this context. num=0 will allocate the default * for this AFU as given in the AFU descriptor. This number doesn't include the -- cgit v1.2.3 From 317f5ef1b363417b6f1e93b90dfd2ffd6be6e867 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:07 +1000 Subject: cxl: Add support for using the kernel API with a real PHB This hooks up support for using the kernel API with a real PHB. After the AFU initialisation has completed it calls into the PHB code to pass it the AFU that will be used by other peer physical functions on the adapter. The cxl_pci_to_afu API is extended to work with peer PCI devices, retrieving the peer AFU from the PHB. This API may also now return an error if it is called on a PCI device that is not associated with either a cxl vPHB or a peer PCI device to an AFU, and this error is propagated down. Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 5 +++++ drivers/misc/cxl/pci.c | 3 +++ drivers/misc/cxl/vphb.c | 16 ++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'drivers/misc/cxl/api.c') diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 7707055d33ab..6a030bf71655 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "cxl.h" @@ -24,6 +25,8 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) int rc; afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return ERR_CAST(afu); ctx = cxl_context_alloc(); if (IS_ERR(ctx)) { @@ -438,6 +441,8 @@ EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image); ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) { struct cxl_afu *afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return -ENODEV; return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); } diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index dd7ff221288b..cb5d172fec1b 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1502,6 +1502,9 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc); } + if (pnv_pci_on_cxl_phb(dev) && adapter->slices >= 1) + pnv_cxl_phb_set_peer_afu(dev, adapter->afu[0]); + return 0; } diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 8865e8d9b3c5..dee8def1c193 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -9,6 +9,7 @@ #include #include +#include #include "cxl.h" static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) @@ -258,13 +259,18 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu) pcibios_free_controller(phb); } +static bool _cxl_pci_is_vphb_device(struct pci_controller *phb) +{ + return (phb->ops == &cxl_pcie_pci_ops); +} + bool cxl_pci_is_vphb_device(struct pci_dev *dev) { struct pci_controller *phb; phb = pci_bus_to_host(dev->bus); - return (phb->ops == &cxl_pcie_pci_ops); + return _cxl_pci_is_vphb_device(phb); } struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) @@ -273,7 +279,13 @@ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) phb = pci_bus_to_host(dev->bus); - return (struct cxl_afu *)phb->private_data; + if (_cxl_pci_is_vphb_device(phb)) + return (struct cxl_afu *)phb->private_data; + + if (pnv_pci_on_cxl_phb(dev)) + return pnv_cxl_phb_to_afu(phb); + + return ERR_PTR(-ENODEV); } EXPORT_SYMBOL_GPL(cxl_pci_to_afu); -- cgit v1.2.3 From 79384e4b71240abf50c375eea56060b0d79c242a Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:08 +1000 Subject: cxl: Add kernel APIs to get & set the max irqs per context These APIs will be used by the Mellanox CX4 support. While they function standalone to configure existing behaviour, their primary purpose is to allow the Mellanox driver to inform the cxl driver of a hardware limitation, which will be used in a future patch. Signed-off-by: Ian Munsie Reviewed-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 27 +++++++++++++++++++++++++++ include/misc/cxl.h | 10 ++++++++++ 2 files changed, 37 insertions(+) (limited to 'drivers/misc/cxl/api.c') diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 6a030bf71655..1e2c0d9f1df7 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -447,3 +447,30 @@ ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); } EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd); + +int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs) +{ + struct cxl_afu *afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return -ENODEV; + + if (irqs > afu->adapter->user_irqs) + return -EINVAL; + + /* Limit user_irqs to prevent the user increasing this via sysfs */ + afu->adapter->user_irqs = irqs; + afu->irqs_max = irqs; + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process); + +int cxl_get_max_irqs_per_process(struct pci_dev *dev) +{ + struct cxl_afu *afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return -ENODEV; + + return afu->irqs_max; +} +EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process); diff --git a/include/misc/cxl.h b/include/misc/cxl.h index dd9eebba3bb6..fc07ed47c2d8 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -166,6 +166,16 @@ void cxl_psa_unmap(void __iomem *addr); /* Get the process element for this context */ int cxl_process_element(struct cxl_context *ctx); +/* + * Limit the number of interrupts that a single context can allocate via + * cxl_start_work. If using the api with a real phb, this may be used to + * request that additional default contexts be created when allocating + * interrupts via pci_enable_msix_range. These will be set to the same running + * state as the default context, and if that is running it will reuse the + * parameters previously passed to cxl_start_context for the default context. + */ +int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs); +int cxl_get_max_irqs_per_process(struct pci_dev *dev); /* * These calls allow drivers to create their own file descriptors and make them -- cgit v1.2.3 From cbce0917e2e47d4bf5aa3b5fd6b1247f33e1a126 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:09 +1000 Subject: cxl: Add preliminary workaround for CX4 interrupt limitation The Mellanox CX4 has a hardware limitation where only 4 bits of the AFU interrupt number can be passed to the XSL when sending an interrupt, limiting it to only 15 interrupts per context (AFU interrupt number 0 is invalid). In order to overcome this, we will allocate additional contexts linked to the default context as extra address space for the extra interrupts - this will be implemented in the next patch. This patch adds the preliminary support to allow this, by way of adding a linked list in the context structure that we use to keep track of the contexts dedicated to interrupts, and an API to simultaneously iterate over the related context structures, AFU interrupt numbers and hardware interrupt numbers. The point of using a single API to iterate these is to hide some of the details of the iteration from external code, and to reduce the number of APIs that need to be exported via base.c to allow built in code to call. Signed-off-by: Ian Munsie Reviewed-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 15 +++++++++++++++ drivers/misc/cxl/base.c | 17 +++++++++++++++++ drivers/misc/cxl/context.c | 1 + drivers/misc/cxl/cxl.h | 10 ++++++++++ drivers/misc/cxl/main.c | 1 + include/misc/cxl.h | 9 +++++++++ 6 files changed, 53 insertions(+) (limited to 'drivers/misc/cxl/api.c') diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 1e2c0d9f1df7..f02a85974e49 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -97,6 +97,21 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) return 0; } +int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq) +{ + if (*ctx == NULL || *afu_irq == 0) { + *afu_irq = 1; + *ctx = cxl_get_context(pdev); + } else { + (*afu_irq)++; + if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) { + *ctx = list_next_entry(*ctx, extra_irq_contexts); + *afu_irq = 1; + } + } + return cxl_find_afu_irq(*ctx, *afu_irq); +} +/* Exported via cxl_base */ int cxl_set_priv(struct cxl_context *ctx, void *priv) { diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index e1e80cb99ad9..fe90f895bb10 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -141,6 +141,23 @@ void cxl_pci_disable_device(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(cxl_pci_disable_device); +int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq) +{ + int ret; + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return -EBUSY; + + ret = calls->cxl_next_msi_hwirq(pdev, ctx, afu_irq); + + cxl_calls_put(calls); + + return ret; +} +EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq); + static int __init cxl_base_init(void) { struct device_node *np; diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index edbb99e93114..2616cddbbb33 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -68,6 +68,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, ctx->pending_afu_err = false; INIT_LIST_HEAD(&ctx->irq_names); + INIT_LIST_HEAD(&ctx->extra_irq_contexts); /* * When we have to destroy all contexts in cxl_context_detach_all() we diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index b81f476a2b9f..73b9a55aa35b 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -537,6 +537,14 @@ struct cxl_context { atomic_t afu_driver_events; struct rcu_head rcu; + + /* + * Only used when more interrupts are allocated via + * pci_enable_msix_range than are supported in the default context, to + * use additional contexts to overcome the limitation. i.e. Mellanox + * CX4 only: + */ + struct list_head extra_irq_contexts; }; struct cxl_service_layer_ops { @@ -722,11 +730,13 @@ ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, /* Internal functions wrapped in cxl_base to allow PHB to call them */ bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); void _cxl_pci_disable_device(struct pci_dev *dev); +int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu); void (*cxl_pci_disable_device)(struct pci_dev *dev); + int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); struct module *owner; }; diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 4e5474b00b28..66fac713e7ad 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -112,6 +112,7 @@ static struct cxl_calls cxl_calls = { .cxl_slbia = cxl_slbia_core, .cxl_pci_associate_default_context = _cxl_pci_associate_default_context, .cxl_pci_disable_device = _cxl_pci_disable_device, + .cxl_next_msi_hwirq = _cxl_next_msi_hwirq, .owner = THIS_MODULE, }; diff --git a/include/misc/cxl.h b/include/misc/cxl.h index fc07ed47c2d8..6c52cbcdfd79 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -177,6 +177,15 @@ int cxl_process_element(struct cxl_context *ctx); int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs); int cxl_get_max_irqs_per_process(struct pci_dev *dev); +/* + * Use to simultaneously iterate over hardware interrupt numbers, contexts and + * afu interrupt numbers allocated for the device via pci_enable_msix_range and + * is a useful convenience function when working with hardware that has + * limitations on the number of interrupts per process. *ctx and *afu_irq + * should be NULL and 0 to start the iteration. + */ +int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); + /* * These calls allow drivers to create their own file descriptors and make them * identical to the cxl file descriptor user API. An example use case: -- cgit v1.2.3 From a2f67d5ee8d950caaa7a6144cf0bfb256500b73e Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:10 +1000 Subject: cxl: Add support for interrupts on the Mellanox CX4 The Mellanox CX4 in cxl mode uses a hybrid interrupt model, where interrupts are routed from the networking hardware to the XSL using the MSIX table, and from there will be transformed back into an MSIX interrupt using the cxl style interrupts (i.e. using IVTE entries and ranges to map a PE and AFU interrupt number to an MSIX address). We want to hide the implementation details of cxl interrupts as much as possible. To this end, we use a special version of the MSI setup & teardown routines in the PHB while in cxl mode to allocate the cxl interrupts and configure the IVTE entries in the process element. This function does not configure the MSIX table - the CX4 card uses a custom format in that table and it would not be appropriate to fill that out in generic code. The rest of the functionality is similar to the "Full MSI-X mode" described in the CAIA, and this could be easily extended to support other adapters that use that mode in the future. The interrupts will be associated with the default context. If the maximum number of interrupts per context has been limited (e.g. by the mlx5 driver), it will automatically allocate additional kernel contexts to associate extra interrupts as required. These contexts will be started using the same WED that was used to start the default context. Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-cxl.c | 84 +++++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/pci-ioda.c | 4 ++ arch/powerpc/platforms/powernv/pci.h | 2 + drivers/misc/cxl/api.c | 71 ++++++++++++++++++++++++++ drivers/misc/cxl/base.c | 31 ++++++++++++ drivers/misc/cxl/cxl.h | 4 ++ drivers/misc/cxl/main.c | 2 + include/misc/cxl-base.h | 4 ++ 8 files changed, 202 insertions(+) (limited to 'drivers/misc/cxl/api.c') diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c index 831bbfb4e278..3f342077628b 100644 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -281,3 +282,86 @@ void pnv_cxl_disable_device(struct pci_dev *dev) cxl_pci_disable_device(dev); cxl_afu_put(afu); } + +/* + * This is a special version of pnv_setup_msi_irqs for cards in cxl mode. This + * function handles setting up the IVTE entries for the XSL to use. + * + * We are currently not filling out the MSIX table, since the only currently + * supported adapter (CX4) uses a custom MSIX table format in cxl mode and it + * is up to their driver to fill that out. In the future we may fill out the + * MSIX table (and change the IVTE entries to be an index to the MSIX table) + * for adapters implementing the Full MSI-X mode described in the CAIA. + */ +int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + struct msi_desc *entry; + struct cxl_context *ctx = NULL; + unsigned int virq; + int hwirq; + int afu_irq = 0; + int rc; + + if (WARN_ON(!phb) || !phb->msi_bmp.bitmap) + return -ENODEV; + + if (pdev->no_64bit_msi && !phb->msi32_support) + return -ENODEV; + + rc = cxl_cx4_setup_msi_irqs(pdev, nvec, type); + if (rc) + return rc; + + for_each_pci_msi_entry(entry, pdev) { + if (!entry->msi_attrib.is_64 && !phb->msi32_support) { + pr_warn("%s: Supports only 64-bit MSIs\n", + pci_name(pdev)); + return -ENXIO; + } + + hwirq = cxl_next_msi_hwirq(pdev, &ctx, &afu_irq); + if (WARN_ON(hwirq <= 0)) + return (hwirq ? hwirq : -ENOMEM); + + virq = irq_create_mapping(NULL, hwirq); + if (virq == NO_IRQ) { + pr_warn("%s: Failed to map cxl mode MSI to linux irq\n", + pci_name(pdev)); + return -ENOMEM; + } + + rc = pnv_cxl_ioda_msi_setup(pdev, hwirq, virq); + if (rc) { + pr_warn("%s: Failed to setup cxl mode MSI\n", pci_name(pdev)); + irq_dispose_mapping(virq); + return rc; + } + + irq_set_msi_desc(virq, entry); + } + + return 0; +} + +void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + struct msi_desc *entry; + irq_hw_number_t hwirq; + + if (WARN_ON(!phb)) + return; + + for_each_pci_msi_entry(entry, pdev) { + if (entry->irq == NO_IRQ) + continue; + hwirq = virq_to_hw(entry->irq); + irq_set_msi_desc(entry->irq, NULL); + irq_dispose_mapping(entry->irq); + } + + cxl_cx4_teardown_msi_irqs(pdev); +} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 104c0405f47c..530d4afbe2fd 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3465,6 +3465,10 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, .dma_bus_setup = pnv_pci_dma_bus_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_cxl_cx4_setup_msi_irqs, + .teardown_msi_irqs = pnv_cxl_cx4_teardown_msi_irqs, +#endif .enable_device_hook = pnv_cxl_enable_device_hook, .disable_device = pnv_cxl_disable_device, .release_device = pnv_pci_release_device, diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 20c9a6b1c64d..f0c276c05f50 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -247,6 +247,8 @@ extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe); /* cxl functions */ extern bool pnv_cxl_enable_device_hook(struct pci_dev *dev); extern void pnv_cxl_disable_device(struct pci_dev *dev); +extern int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); +extern void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); /* phb ops (cxl switches these when enabling the kernel api on the phb) */ diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index f02a85974e49..f3d34b941f85 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "cxl.h" @@ -489,3 +490,73 @@ int cxl_get_max_irqs_per_process(struct pci_dev *dev) return afu->irqs_max; } EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process); + +/* + * This is a special interrupt allocation routine called from the PHB's MSI + * setup function. When capi interrupts are allocated in this manner they must + * still be associated with a running context, but since the MSI APIs have no + * way to specify this we use the default context associated with the device. + * + * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU + * interrupt number, so in order to overcome this their driver informs us of + * the restriction by setting the maximum interrupts per context, and we + * allocate additional contexts as necessary so that we can keep the AFU + * interrupt number within the supported range. + */ +int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + struct cxl_context *ctx, *new_ctx, *default_ctx; + int remaining; + int rc; + + ctx = default_ctx = cxl_get_context(pdev); + if (WARN_ON(!default_ctx)) + return -ENODEV; + + remaining = nvec; + while (remaining > 0) { + rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max)); + if (rc) { + pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev)); + return rc; + } + remaining -= ctx->afu->irqs_max; + + if (ctx != default_ctx && default_ctx->status == STARTED) { + WARN_ON(cxl_start_context(ctx, + be64_to_cpu(default_ctx->elem->common.wed), + NULL)); + } + + if (remaining > 0) { + new_ctx = cxl_dev_context_init(pdev); + if (!new_ctx) { + pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev)); + return -ENOSPC; + } + list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts); + ctx = new_ctx; + } + } + + return 0; +} +/* Exported via cxl_base */ + +void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct cxl_context *ctx, *pos, *tmp; + + ctx = cxl_get_context(pdev); + if (WARN_ON(!ctx)) + return; + + cxl_free_afu_irqs(ctx); + list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) { + cxl_stop_context(pos); + cxl_free_afu_irqs(pos); + list_del(&pos->extra_irq_contexts); + cxl_release_context(pos); + } +} +/* Exported via cxl_base */ diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index fe90f895bb10..cd54ce6f6230 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -158,6 +158,37 @@ int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_ } EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq); +int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + int ret; + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return false; + + ret = calls->cxl_cx4_setup_msi_irqs(pdev, nvec, type); + + cxl_calls_put(calls); + + return ret; +} +EXPORT_SYMBOL_GPL(cxl_cx4_setup_msi_irqs); + +void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return; + + calls->cxl_cx4_teardown_msi_irqs(pdev); + + cxl_calls_put(calls); +} +EXPORT_SYMBOL_GPL(cxl_cx4_teardown_msi_irqs); + static int __init cxl_base_init(void) { struct device_node *np; diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 73b9a55aa35b..d50cdb137c43 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -731,12 +731,16 @@ ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); void _cxl_pci_disable_device(struct pci_dev *dev); int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); +int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); +void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu); void (*cxl_pci_disable_device)(struct pci_dev *dev); int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); + int (*cxl_cx4_setup_msi_irqs)(struct pci_dev *pdev, int nvec, int type); + void (*cxl_cx4_teardown_msi_irqs)(struct pci_dev *pdev); struct module *owner; }; diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 66fac713e7ad..d9be23b24aa3 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -113,6 +113,8 @@ static struct cxl_calls cxl_calls = { .cxl_pci_associate_default_context = _cxl_pci_associate_default_context, .cxl_pci_disable_device = _cxl_pci_disable_device, .cxl_next_msi_hwirq = _cxl_next_msi_hwirq, + .cxl_cx4_setup_msi_irqs = _cxl_cx4_setup_msi_irqs, + .cxl_cx4_teardown_msi_irqs = _cxl_cx4_teardown_msi_irqs, .owner = THIS_MODULE, }; diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h index bb7e629ae492..b2ebc91fe09a 100644 --- a/include/misc/cxl-base.h +++ b/include/misc/cxl-base.h @@ -43,6 +43,8 @@ void cxl_afu_put(struct cxl_afu *afu); void cxl_slbia(struct mm_struct *mm); bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); void cxl_pci_disable_device(struct pci_dev *dev); +int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); +void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); #else /* CONFIG_CXL_BASE */ @@ -52,6 +54,8 @@ static inline void cxl_afu_put(struct cxl_afu *afu) {} static inline void cxl_slbia(struct mm_struct *mm) {} static inline bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) { return false; } static inline void cxl_pci_disable_device(struct pci_dev *dev) {} +static inline int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { return -ENODEV; } +static inline void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) {} #endif /* CONFIG_CXL_BASE */ -- cgit v1.2.3