diff options
Diffstat (limited to 'drivers/misc')
55 files changed, 4083 insertions, 807 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 7c0fa24f9067..03605f8fc0dc 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -53,7 +53,7 @@ config AD525X_DPOT_SPI config ATMEL_TCLIB bool "Atmel AT32/AT91 Timer/Counter Library" - depends on (AVR32 || ARCH_AT91) + depends on ARCH_AT91 help Select this if you want a library to allocate the Timer/Counter blocks found on many Atmel processors. This facilitates using @@ -192,7 +192,7 @@ config ICS932S401 config ATMEL_SSC tristate "Device driver for Atmel SSC peripheral" - depends on HAS_IOMEM && (AVR32 || ARCH_AT91 || COMPILE_TEST) + depends on HAS_IOMEM && (ARCH_AT91 || COMPILE_TEST) ---help--- This option enables device driver support for Atmel Synchronized Serial Communication peripheral (SSC). @@ -512,5 +512,6 @@ source "drivers/misc/mic/Kconfig" source "drivers/misc/genwqe/Kconfig" source "drivers/misc/echo/Kconfig" source "drivers/misc/cxl/Kconfig" +source "drivers/misc/ocxl/Kconfig" source "drivers/misc/cardreader/Kconfig" endmenu diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 8d8cc096063b..c3c8624f4d95 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -55,7 +55,8 @@ obj-$(CONFIG_CXL_BASE) += cxl/ obj-$(CONFIG_ASPEED_LPC_CTRL) += aspeed-lpc-ctrl.o obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o -obj-$(CONFIG_MISC_RTSX) += cardreader/ +obj-$(CONFIG_OCXL) += ocxl/ +obj-$(CONFIG_MISC_RTSX) += cardreader/ lkdtm-$(CONFIG_LKDTM) += lkdtm_core.o lkdtm-$(CONFIG_LKDTM) += lkdtm_bugs.o diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c index fe1672747bc1..bc591b7168db 100644 --- a/drivers/misc/ad525x_dpot.c +++ b/drivers/misc/ad525x_dpot.c @@ -3,7 +3,7 @@ * Copyright (c) 2009-2010 Analog Devices, Inc. * Author: Michael Hennerich <hennerich@blackfin.uclinux.org> * - * DEVID #Wipers #Positions Resistor Options (kOhm) + * DEVID #Wipers #Positions Resistor Options (kOhm) * AD5258 1 64 1, 10, 50, 100 * AD5259 1 256 5, 10, 50, 100 * AD5251 2 64 1, 10, 50, 100 @@ -84,12 +84,12 @@ struct dpot_data { struct ad_dpot_bus_data bdata; struct mutex update_lock; - unsigned rdac_mask; - unsigned max_pos; + unsigned int rdac_mask; + unsigned int max_pos; unsigned long devid; - unsigned uid; - unsigned feat; - unsigned wipers; + unsigned int uid; + unsigned int feat; + unsigned int wipers; u16 rdac_cache[MAX_RDACS]; DECLARE_BITMAP(otp_en_mask, MAX_RDACS); }; @@ -126,7 +126,7 @@ static inline int dpot_write_r8d16(struct dpot_data *dpot, u8 reg, u16 val) static s32 dpot_read_spi(struct dpot_data *dpot, u8 reg) { - unsigned ctrl = 0; + unsigned int ctrl = 0; int value; if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD))) { @@ -175,7 +175,7 @@ static s32 dpot_read_spi(struct dpot_data *dpot, u8 reg) static s32 dpot_read_i2c(struct dpot_data *dpot, u8 reg) { int value; - unsigned ctrl = 0; + unsigned int ctrl = 0; switch (dpot->uid) { case DPOT_UID(AD5246_ID): @@ -238,7 +238,7 @@ static s32 dpot_read(struct dpot_data *dpot, u8 reg) static s32 dpot_write_spi(struct dpot_data *dpot, u8 reg, u16 value) { - unsigned val = 0; + unsigned int val = 0; if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD | DPOT_ADDR_OTP))) { if (dpot->feat & F_RDACS_WONLY) @@ -328,7 +328,7 @@ static s32 dpot_write_spi(struct dpot_data *dpot, u8 reg, u16 value) static s32 dpot_write_i2c(struct dpot_data *dpot, u8 reg, u16 value) { /* Only write the instruction byte for certain commands */ - unsigned tmp = 0, ctrl = 0; + unsigned int tmp = 0, ctrl = 0; switch (dpot->uid) { case DPOT_UID(AD5246_ID): @@ -515,11 +515,11 @@ set_##_name(struct device *dev, \ #define DPOT_DEVICE_SHOW_SET(name, reg) \ DPOT_DEVICE_SHOW(name, reg) \ DPOT_DEVICE_SET(name, reg) \ -static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, set_##name); +static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, set_##name) #define DPOT_DEVICE_SHOW_ONLY(name, reg) \ DPOT_DEVICE_SHOW(name, reg) \ -static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, NULL); +static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, NULL) DPOT_DEVICE_SHOW_SET(rdac0, DPOT_ADDR_RDAC | DPOT_RDAC0); DPOT_DEVICE_SHOW_SET(eeprom0, DPOT_ADDR_EEPROM | DPOT_RDAC0); @@ -616,7 +616,7 @@ set_##_name(struct device *dev, \ { \ return sysfs_do_cmd(dev, attr, buf, count, _cmd); \ } \ -static DEVICE_ATTR(_name, S_IWUSR | S_IRUGO, NULL, set_##_name); +static DEVICE_ATTR(_name, S_IWUSR | S_IRUGO, NULL, set_##_name) DPOT_DEVICE_DO_CMD(inc_all, DPOT_INC_ALL); DPOT_DEVICE_DO_CMD(dec_all, DPOT_DEC_ALL); @@ -636,7 +636,7 @@ static const struct attribute_group ad525x_group_commands = { }; static int ad_dpot_add_files(struct device *dev, - unsigned features, unsigned rdac) + unsigned int features, unsigned int rdac) { int err = sysfs_create_file(&dev->kobj, dpot_attrib_wipers[rdac]); @@ -661,7 +661,7 @@ static int ad_dpot_add_files(struct device *dev, } static inline void ad_dpot_remove_files(struct device *dev, - unsigned features, unsigned rdac) + unsigned int features, unsigned int rdac) { sysfs_remove_file(&dev->kobj, dpot_attrib_wipers[rdac]); diff --git a/drivers/misc/ad525x_dpot.h b/drivers/misc/ad525x_dpot.h index 6bd1eba23bc0..443a51fd5680 100644 --- a/drivers/misc/ad525x_dpot.h +++ b/drivers/misc/ad525x_dpot.h @@ -195,12 +195,12 @@ enum dpot_devid { struct dpot_data; struct ad_dpot_bus_ops { - int (*read_d8) (void *client); - int (*read_r8d8) (void *client, u8 reg); - int (*read_r8d16) (void *client, u8 reg); - int (*write_d8) (void *client, u8 val); - int (*write_r8d8) (void *client, u8 reg, u8 val); - int (*write_r8d16) (void *client, u8 reg, u16 val); + int (*read_d8)(void *client); + int (*read_r8d8)(void *client, u8 reg); + int (*read_r8d16)(void *client, u8 reg); + int (*write_d8)(void *client, u8 val); + int (*write_r8d8)(void *client, u8 reg, u8 val); + int (*write_r8d16)(void *client, u8 reg, u16 val); }; struct ad_dpot_bus_data { diff --git a/drivers/misc/apds990x.c b/drivers/misc/apds990x.c index c9f07032c2fc..ed9412d750b7 100644 --- a/drivers/misc/apds990x.c +++ b/drivers/misc/apds990x.c @@ -715,6 +715,7 @@ static ssize_t apds990x_rate_avail(struct device *dev, { int i; int pos = 0; + for (i = 0; i < ARRAY_SIZE(arates_hz); i++) pos += sprintf(buf + pos, "%d ", arates_hz[i]); sprintf(buf + pos - 1, "\n"); @@ -725,6 +726,7 @@ static ssize_t apds990x_rate_show(struct device *dev, struct device_attribute *attr, char *buf) { struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->arate); } @@ -784,6 +786,7 @@ static ssize_t apds990x_prox_show(struct device *dev, { ssize_t ret; struct apds990x_chip *chip = dev_get_drvdata(dev); + if (pm_runtime_suspended(dev) || !chip->prox_en) return -EIO; @@ -807,6 +810,7 @@ static ssize_t apds990x_prox_enable_show(struct device *dev, struct device_attribute *attr, char *buf) { struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->prox_en); } @@ -847,6 +851,7 @@ static ssize_t apds990x_prox_reporting_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", reporting_modes[!!chip->prox_continuous_mode]); } @@ -884,6 +889,7 @@ static ssize_t apds990x_lux_thresh_above_show(struct device *dev, struct device_attribute *attr, char *buf) { struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->lux_thres_hi); } @@ -891,6 +897,7 @@ static ssize_t apds990x_lux_thresh_below_show(struct device *dev, struct device_attribute *attr, char *buf) { struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->lux_thres_lo); } @@ -926,6 +933,7 @@ static ssize_t apds990x_lux_thresh_above_store(struct device *dev, { struct apds990x_chip *chip = dev_get_drvdata(dev); int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_hi, buf); + if (ret < 0) return ret; return len; @@ -937,6 +945,7 @@ static ssize_t apds990x_lux_thresh_below_store(struct device *dev, { struct apds990x_chip *chip = dev_get_drvdata(dev); int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_lo, buf); + if (ret < 0) return ret; return len; @@ -954,6 +963,7 @@ static ssize_t apds990x_prox_threshold_show(struct device *dev, struct device_attribute *attr, char *buf) { struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->prox_thres); } @@ -1026,6 +1036,7 @@ static ssize_t apds990x_chip_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%s %d\n", chip->chipname, chip->revision); } diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 12a41b2753f0..7ff315ad3692 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -45,6 +45,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) ctx->pid = NULL; /* Set in start work ioctl */ mutex_init(&ctx->mapping_lock); ctx->mapping = NULL; + ctx->tidr = 0; + ctx->assign_tidr = false; if (cxl_is_power8()) { spin_lock_init(&ctx->sste_lock); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index a798c2ccd67d..4f015da78f28 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -630,6 +630,9 @@ struct cxl_context { struct list_head extra_irq_contexts; struct mm_struct *mm; + + u16 tidr; + bool assign_tidr; }; struct cxl_irq_info; diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index dc9bc1807fdf..30ccba436b3b 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -199,10 +199,11 @@ int cxllib_get_PE_attributes(struct task_struct *task, */ attr->pid = mm->context.id; mmput(mm); + attr->tid = task->thread.tidr; } else { attr->pid = 0; + attr->tid = 0; } - attr->tid = 0; return 0; } EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 90341ccda9bd..0162516f5e57 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -173,7 +173,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, * flags are set it's invalid */ if (work.reserved1 || work.reserved2 || work.reserved3 || - work.reserved4 || work.reserved5 || work.reserved6 || + work.reserved4 || work.reserved5 || (work.flags & ~CXL_START_WORK_ALL)) { rc = -EINVAL; goto out; @@ -186,12 +186,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, rc = -EINVAL; goto out; } + if ((rc = afu_register_irqs(ctx, work.num_interrupts))) goto out; if (work.flags & CXL_START_WORK_AMR) amr = work.amr & mfspr(SPRN_UAMOR); + if (work.flags & CXL_START_WORK_TID) + ctx->assign_tidr = true; + ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF); /* @@ -263,8 +267,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, goto out; } - ctx->status = STARTED; rc = 0; + if (work.flags & CXL_START_WORK_TID) { + work.tid = ctx->tidr; + if (copy_to_user(uwork, &work, sizeof(work))) + rc = -EFAULT; + } + + ctx->status = STARTED; + out: mutex_unlock(&ctx->status_mutex); return rc; diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 02b6b45b4c20..1b3d7c65ea3f 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -16,6 +16,7 @@ #include <linux/uaccess.h> #include <linux/delay.h> #include <asm/synch.h> +#include <asm/switch_to.h> #include <misc/cxl-base.h> #include "cxl.h" @@ -655,6 +656,7 @@ static void update_ivtes_directed(struct cxl_context *ctx) static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr) { u32 pid; + int rc; cxl_assign_psn_space(ctx); @@ -673,7 +675,16 @@ static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr) pid = ctx->mm->context.id; } - ctx->elem->common.tid = 0; + /* Assign a unique TIDR (thread id) for the current thread */ + if (!(ctx->tidr) && (ctx->assign_tidr)) { + rc = set_thread_tidr(current); + if (rc) + return -ENODEV; + ctx->tidr = current->thread.tidr; + pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr); + } + + ctx->elem->common.tid = cpu_to_be32(ctx->tidr); ctx->elem->common.pid = cpu_to_be32(pid); ctx->elem->sr = cpu_to_be64(calculate_sr(ctx)); diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 19969ee86d6f..758842f65a1b 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -125,8 +125,6 @@ static const struct pci_device_id cxl_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), }, { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0623), }, { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0628), }, - { PCI_DEVICE_CLASS(0x120000, ~0), }, - { } }; MODULE_DEVICE_TABLE(pci, cxl_pci_tbl); diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c index 7231260ac287..98a921ea9ee8 100644 --- a/drivers/misc/ds1682.c +++ b/drivers/misc/ds1682.c @@ -59,25 +59,42 @@ static ssize_t ds1682_show(struct device *dev, struct device_attribute *attr, { struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); struct i2c_client *client = to_i2c_client(dev); - __le32 val = 0; + unsigned long long val, check; + __le32 val_le = 0; int rc; dev_dbg(dev, "ds1682_show() called on %s\n", attr->attr.name); /* Read the register */ rc = i2c_smbus_read_i2c_block_data(client, sattr->index, sattr->nr, - (u8 *) & val); + (u8 *)&val_le); if (rc < 0) return -EIO; - /* Special case: the 32 bit regs are time values with 1/4s - * resolution, scale them up to milliseconds */ - if (sattr->nr == 4) - return sprintf(buf, "%llu\n", - ((unsigned long long)le32_to_cpu(val)) * 250); + val = le32_to_cpu(val_le); + + if (sattr->index == DS1682_REG_ELAPSED) { + int retries = 5; + + /* Detect and retry when a tick occurs mid-read */ + do { + rc = i2c_smbus_read_i2c_block_data(client, sattr->index, + sattr->nr, + (u8 *)&val_le); + if (rc < 0 || retries <= 0) + return -EIO; + + check = val; + val = le32_to_cpu(val_le); + retries--; + } while (val != check && val != (check + 1)); + } - /* Format the output string and return # of bytes */ - return sprintf(buf, "%li\n", (long)le32_to_cpu(val)); + /* Format the output string and return # of bytes + * Special case: the 32 bit regs are time values with 1/4s + * resolution, scale them up to milliseconds + */ + return sprintf(buf, "%llu\n", (sattr->nr == 4) ? (val * 250) : val); } static ssize_t ds1682_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig index de58762097c4..68a1ac929917 100644 --- a/drivers/misc/eeprom/Kconfig +++ b/drivers/misc/eeprom/Kconfig @@ -4,6 +4,7 @@ config EEPROM_AT24 tristate "I2C EEPROMs / RAMs / ROMs from most vendors" depends on I2C && SYSFS select NVMEM + select REGMAP_I2C help Enable this driver to get read/write support to most I2C EEPROMs and compatible devices like FRAMs, SRAMs, ROMs etc. After you diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 4d63ac8a82e0..01f9c4921c50 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -24,8 +24,10 @@ #include <linux/acpi.h> #include <linux/i2c.h> #include <linux/nvmem-provider.h> +#include <linux/regmap.h> #include <linux/platform_data/at24.h> #include <linux/pm_runtime.h> +#include <linux/gpio/consumer.h> /* * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable. @@ -55,14 +57,13 @@ * which won't work on pure SMBus systems. */ +struct at24_client { + struct i2c_client *client; + struct regmap *regmap; +}; + struct at24_data { struct at24_platform_data chip; - int use_smbus; - int use_smbus_write; - - ssize_t (*read_func)(struct at24_data *, char *, unsigned int, size_t); - ssize_t (*write_func)(struct at24_data *, - const char *, unsigned int, size_t); /* * Lock protects against activities from other Linux tasks, @@ -70,18 +71,20 @@ struct at24_data { */ struct mutex lock; - u8 *writebuf; - unsigned write_max; - unsigned num_addresses; + unsigned int write_max; + unsigned int num_addresses; + unsigned int offset_adj; struct nvmem_config nvmem_config; struct nvmem_device *nvmem; + struct gpio_desc *wp_gpio; + /* * Some chips tie up multiple I2C addresses; dummy devices reserve * them for us, and we'll use them with SMBus calls. */ - struct i2c_client *client[]; + struct at24_client client[]; }; /* @@ -93,27 +96,17 @@ struct at24_data { * * This value is forced to be a power of two so that writes align on pages. */ -static unsigned io_limit = 128; -module_param(io_limit, uint, 0); -MODULE_PARM_DESC(io_limit, "Maximum bytes per I/O (default 128)"); +static unsigned int at24_io_limit = 128; +module_param_named(io_limit, at24_io_limit, uint, 0); +MODULE_PARM_DESC(at24_io_limit, "Maximum bytes per I/O (default 128)"); /* * Specs often allow 5 msec for a page write, sometimes 20 msec; * it's important to recover from write timeouts. */ -static unsigned write_timeout = 25; -module_param(write_timeout, uint, 0); -MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)"); - -#define AT24_SIZE_BYTELEN 5 -#define AT24_SIZE_FLAGS 8 - -#define AT24_BITMASK(x) (BIT(x) - 1) - -/* create non-zero magic value for given eeprom parameters */ -#define AT24_DEVICE_MAGIC(_len, _flags) \ - ((1 << AT24_SIZE_FLAGS | (_flags)) \ - << AT24_SIZE_BYTELEN | ilog2(_len)) +static unsigned int at24_write_timeout = 25; +module_param_named(write_timeout, at24_write_timeout, uint, 0); +MODULE_PARM_DESC(at24_write_timeout, "Time (in ms) to try writes (default 25)"); /* * Both reads and writes fail if the previous write didn't complete yet. This @@ -126,118 +119,123 @@ MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)"); * iteration of processing the request. Both should be unsigned integers * holding at least 32 bits. */ -#define loop_until_timeout(tout, op_time) \ - for (tout = jiffies + msecs_to_jiffies(write_timeout), op_time = 0; \ +#define at24_loop_until_timeout(tout, op_time) \ + for (tout = jiffies + msecs_to_jiffies(at24_write_timeout), \ + op_time = 0; \ op_time ? time_before(op_time, tout) : true; \ usleep_range(1000, 1500), op_time = jiffies) +struct at24_chip_data { + /* + * these fields mirror their equivalents in + * struct at24_platform_data + */ + u32 byte_len; + u8 flags; +}; + +#define AT24_CHIP_DATA(_name, _len, _flags) \ + static const struct at24_chip_data _name = { \ + .byte_len = _len, .flags = _flags, \ + } + +/* needs 8 addresses as A0-A2 are ignored */ +AT24_CHIP_DATA(at24_data_24c00, 128 / 8, AT24_FLAG_TAKE8ADDR); +/* old variants can't be handled with this generic entry! */ +AT24_CHIP_DATA(at24_data_24c01, 1024 / 8, 0); +AT24_CHIP_DATA(at24_data_24cs01, 16, + AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24c02, 2048 / 8, 0); +AT24_CHIP_DATA(at24_data_24cs02, 16, + AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24mac402, 48 / 8, + AT24_FLAG_MAC | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24mac602, 64 / 8, + AT24_FLAG_MAC | AT24_FLAG_READONLY); +/* spd is a 24c02 in memory DIMMs */ +AT24_CHIP_DATA(at24_data_spd, 2048 / 8, + AT24_FLAG_READONLY | AT24_FLAG_IRUGO); +AT24_CHIP_DATA(at24_data_24c04, 4096 / 8, 0); +AT24_CHIP_DATA(at24_data_24cs04, 16, + AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +/* 24rf08 quirk is handled at i2c-core */ +AT24_CHIP_DATA(at24_data_24c08, 8192 / 8, 0); +AT24_CHIP_DATA(at24_data_24cs08, 16, + AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24c16, 16384 / 8, 0); +AT24_CHIP_DATA(at24_data_24cs16, 16, + AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24c32, 32768 / 8, AT24_FLAG_ADDR16); +AT24_CHIP_DATA(at24_data_24cs32, 16, + AT24_FLAG_ADDR16 | AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24c64, 65536 / 8, AT24_FLAG_ADDR16); +AT24_CHIP_DATA(at24_data_24cs64, 16, + AT24_FLAG_ADDR16 | AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24c128, 131072 / 8, AT24_FLAG_ADDR16); +AT24_CHIP_DATA(at24_data_24c256, 262144 / 8, AT24_FLAG_ADDR16); +AT24_CHIP_DATA(at24_data_24c512, 524288 / 8, AT24_FLAG_ADDR16); +AT24_CHIP_DATA(at24_data_24c1024, 1048576 / 8, AT24_FLAG_ADDR16); +/* identical to 24c08 ? */ +AT24_CHIP_DATA(at24_data_INT3499, 8192 / 8, 0); + static const struct i2c_device_id at24_ids[] = { - /* needs 8 addresses as A0-A2 are ignored */ - { "24c00", AT24_DEVICE_MAGIC(128 / 8, AT24_FLAG_TAKE8ADDR) }, - /* old variants can't be handled with this generic entry! */ - { "24c01", AT24_DEVICE_MAGIC(1024 / 8, 0) }, - { "24cs01", AT24_DEVICE_MAGIC(16, - AT24_FLAG_SERIAL | AT24_FLAG_READONLY) }, - { "24c02", AT24_DEVICE_MAGIC(2048 / 8, 0) }, - { "24cs02", AT24_DEVICE_MAGIC(16, - AT24_FLAG_SERIAL | AT24_FLAG_READONLY) }, - { "24mac402", AT24_DEVICE_MAGIC(48 / 8, - AT24_FLAG_MAC | AT24_FLAG_READONLY) }, - { "24mac602", AT24_DEVICE_MAGIC(64 / 8, - AT24_FLAG_MAC | AT24_FLAG_READONLY) }, - /* spd is a 24c02 in memory DIMMs */ - { "spd", AT24_DEVICE_MAGIC(2048 / 8, - AT24_FLAG_READONLY | AT24_FLAG_IRUGO) }, - { "24c04", AT24_DEVICE_MAGIC(4096 / 8, 0) }, - { "24cs04", AT24_DEVICE_MAGIC(16, - AT24_FLAG_SERIAL | AT24_FLAG_READONLY) }, - /* 24rf08 quirk is handled at i2c-core */ - { "24c08", AT24_DEVICE_MAGIC(8192 / 8, 0) }, - { "24cs08", AT24_DEVICE_MAGIC(16, - AT24_FLAG_SERIAL | AT24_FLAG_READONLY) }, - { "24c16", AT24_DEVICE_MAGIC(16384 / 8, 0) }, - { "24cs16", AT24_DEVICE_MAGIC(16, - AT24_FLAG_SERIAL | AT24_FLAG_READONLY) }, - { "24c32", AT24_DEVICE_MAGIC(32768 / 8, AT24_FLAG_ADDR16) }, - { "24cs32", AT24_DEVICE_MAGIC(16, - AT24_FLAG_ADDR16 | - AT24_FLAG_SERIAL | - AT24_FLAG_READONLY) }, - { "24c64", AT24_DEVICE_MAGIC(65536 / 8, AT24_FLAG_ADDR16) }, - { "24cs64", AT24_DEVICE_MAGIC(16, - AT24_FLAG_ADDR16 | - AT24_FLAG_SERIAL | - AT24_FLAG_READONLY) }, - { "24c128", AT24_DEVICE_MAGIC(131072 / 8, AT24_FLAG_ADDR16) }, - { "24c256", AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16) }, - { "24c512", AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16) }, - { "24c1024", AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16) }, - { "at24", 0 }, + { "24c00", (kernel_ulong_t)&at24_data_24c00 }, + { "24c01", (kernel_ulong_t)&at24_data_24c01 }, + { "24cs01", (kernel_ulong_t)&at24_data_24cs01 }, + { "24c02", (kernel_ulong_t)&at24_data_24c02 }, + { "24cs02", (kernel_ulong_t)&at24_data_24cs02 }, + { "24mac402", (kernel_ulong_t)&at24_data_24mac402 }, + { "24mac602", (kernel_ulong_t)&at24_data_24mac602 }, + { "spd", (kernel_ulong_t)&at24_data_spd }, + { "24c04", (kernel_ulong_t)&at24_data_24c04 }, + { "24cs04", (kernel_ulong_t)&at24_data_24cs04 }, + { "24c08", (kernel_ulong_t)&at24_data_24c08 }, + { "24cs08", (kernel_ulong_t)&at24_data_24cs08 }, + { "24c16", (kernel_ulong_t)&at24_data_24c16 }, + { "24cs16", (kernel_ulong_t)&at24_data_24cs16 }, + { "24c32", (kernel_ulong_t)&at24_data_24c32 }, + { "24cs32", (kernel_ulong_t)&at24_data_24cs32 }, + { "24c64", (kernel_ulong_t)&at24_data_24c64 }, + { "24cs64", (kernel_ulong_t)&at24_data_24cs64 }, + { "24c128", (kernel_ulong_t)&at24_data_24c128 }, + { "24c256", (kernel_ulong_t)&at24_data_24c256 }, + { "24c512", (kernel_ulong_t)&at24_data_24c512 }, + { "24c1024", (kernel_ulong_t)&at24_data_24c1024 }, + { "at24", 0 }, { /* END OF LIST */ } }; MODULE_DEVICE_TABLE(i2c, at24_ids); static const struct of_device_id at24_of_match[] = { - { - .compatible = "atmel,24c00", - .data = (void *)AT24_DEVICE_MAGIC(128 / 8, AT24_FLAG_TAKE8ADDR) - }, - { - .compatible = "atmel,24c01", - .data = (void *)AT24_DEVICE_MAGIC(1024 / 8, 0) - }, - { - .compatible = "atmel,24c02", - .data = (void *)AT24_DEVICE_MAGIC(2048 / 8, 0) - }, - { - .compatible = "atmel,spd", - .data = (void *)AT24_DEVICE_MAGIC(2048 / 8, - AT24_FLAG_READONLY | AT24_FLAG_IRUGO) - }, - { - .compatible = "atmel,24c04", - .data = (void *)AT24_DEVICE_MAGIC(4096 / 8, 0) - }, - { - .compatible = "atmel,24c08", - .data = (void *)AT24_DEVICE_MAGIC(8192 / 8, 0) - }, - { - .compatible = "atmel,24c16", - .data = (void *)AT24_DEVICE_MAGIC(16384 / 8, 0) - }, - { - .compatible = "atmel,24c32", - .data = (void *)AT24_DEVICE_MAGIC(32768 / 8, AT24_FLAG_ADDR16) - }, - { - .compatible = "atmel,24c64", - .data = (void *)AT24_DEVICE_MAGIC(65536 / 8, AT24_FLAG_ADDR16) - }, - { - .compatible = "atmel,24c128", - .data = (void *)AT24_DEVICE_MAGIC(131072 / 8, AT24_FLAG_ADDR16) - }, - { - .compatible = "atmel,24c256", - .data = (void *)AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16) - }, - { - .compatible = "atmel,24c512", - .data = (void *)AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16) - }, - { - .compatible = "atmel,24c1024", - .data = (void *)AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16) - }, - { }, + { .compatible = "atmel,24c00", .data = &at24_data_24c00 }, + { .compatible = "atmel,24c01", .data = &at24_data_24c01 }, + { .compatible = "atmel,24cs01", .data = &at24_data_24cs01 }, + { .compatible = "atmel,24c02", .data = &at24_data_24c02 }, + { .compatible = "atmel,24cs02", .data = &at24_data_24cs02 }, + { .compatible = "atmel,24mac402", .data = &at24_data_24mac402 }, + { .compatible = "atmel,24mac602", .data = &at24_data_24mac602 }, + { .compatible = "atmel,spd", .data = &at24_data_spd }, + { .compatible = "atmel,24c04", .data = &at24_data_24c04 }, + { .compatible = "atmel,24cs04", .data = &at24_data_24cs04 }, + { .compatible = "atmel,24c08", .data = &at24_data_24c08 }, + { .compatible = "atmel,24cs08", .data = &at24_data_24cs08 }, + { .compatible = "atmel,24c16", .data = &at24_data_24c16 }, + { .compatible = "atmel,24cs16", .data = &at24_data_24cs16 }, + { .compatible = "atmel,24c32", .data = &at24_data_24c32 }, + { .compatible = "atmel,24cs32", .data = &at24_data_24cs32 }, + { .compatible = "atmel,24c64", .data = &at24_data_24c64 }, + { .compatible = "atmel,24cs64", .data = &at24_data_24cs64 }, + { .compatible = "atmel,24c128", .data = &at24_data_24c128 }, + { .compatible = "atmel,24c256", .data = &at24_data_24c256 }, + { .compatible = "atmel,24c512", .data = &at24_data_24c512 }, + { .compatible = "atmel,24c1024", .data = &at24_data_24c1024 }, + { /* END OF LIST */ }, }; MODULE_DEVICE_TABLE(of, at24_of_match); static const struct acpi_device_id at24_acpi_ids[] = { - { "INT3499", AT24_DEVICE_MAGIC(8192 / 8, 0) }, - { } + { "INT3499", (kernel_ulong_t)&at24_data_INT3499 }, + { /* END OF LIST */ } }; MODULE_DEVICE_TABLE(acpi, at24_acpi_ids); @@ -251,20 +249,11 @@ MODULE_DEVICE_TABLE(acpi, at24_acpi_ids); * Slave address and byte offset derive from the offset. Always * set the byte address; on a multi-master board, another master * may have changed the chip's "current" address pointer. - * - * REVISIT some multi-address chips don't rollover page reads to - * the next slave address, so we may need to truncate the count. - * Those chips might need another quirk flag. - * - * If the real hardware used four adjacent 24c02 chips and that - * were misconfigured as one 24c08, that would be a similar effect: - * one "eeprom" file not four, but larger reads would fail when - * they crossed certain pages. */ -static struct i2c_client *at24_translate_offset(struct at24_data *at24, - unsigned int *offset) +static struct at24_client *at24_translate_offset(struct at24_data *at24, + unsigned int *offset) { - unsigned i; + unsigned int i; if (at24->chip.flags & AT24_FLAG_ADDR16) { i = *offset >> 16; @@ -274,168 +263,55 @@ static struct i2c_client *at24_translate_offset(struct at24_data *at24, *offset &= 0xff; } - return at24->client[i]; + return &at24->client[i]; } -static ssize_t at24_eeprom_read_smbus(struct at24_data *at24, char *buf, +static size_t at24_adjust_read_count(struct at24_data *at24, unsigned int offset, size_t count) { - unsigned long timeout, read_time; - struct i2c_client *client; - int status; - - client = at24_translate_offset(at24, &offset); - - if (count > io_limit) - count = io_limit; - - /* Smaller eeproms can work given some SMBus extension calls */ - if (count > I2C_SMBUS_BLOCK_MAX) - count = I2C_SMBUS_BLOCK_MAX; - - loop_until_timeout(timeout, read_time) { - status = i2c_smbus_read_i2c_block_data_or_emulated(client, - offset, - count, buf); - - dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n", - count, offset, status, jiffies); - - if (status == count) - return count; - } - - return -ETIMEDOUT; -} - -static ssize_t at24_eeprom_read_i2c(struct at24_data *at24, char *buf, - unsigned int offset, size_t count) -{ - unsigned long timeout, read_time; - struct i2c_client *client; - struct i2c_msg msg[2]; - int status, i; - u8 msgbuf[2]; - - memset(msg, 0, sizeof(msg)); - client = at24_translate_offset(at24, &offset); - - if (count > io_limit) - count = io_limit; + unsigned int bits; + size_t remainder; /* - * When we have a better choice than SMBus calls, use a combined I2C - * message. Write address; then read up to io_limit data bytes. Note - * that read page rollover helps us here (unlike writes). msgbuf is - * u8 and will cast to our needs. + * In case of multi-address chips that don't rollover reads to + * the next slave address: truncate the count to the slave boundary, + * so that the read never straddles slaves. */ - i = 0; - if (at24->chip.flags & AT24_FLAG_ADDR16) - msgbuf[i++] = offset >> 8; - msgbuf[i++] = offset; - - msg[0].addr = client->addr; - msg[0].buf = msgbuf; - msg[0].len = i; - - msg[1].addr = client->addr; - msg[1].flags = I2C_M_RD; - msg[1].buf = buf; - msg[1].len = count; - - loop_until_timeout(timeout, read_time) { - status = i2c_transfer(client->adapter, msg, 2); - if (status == 2) - status = count; - - dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n", - count, offset, status, jiffies); - - if (status == count) - return count; + if (at24->chip.flags & AT24_FLAG_NO_RDROL) { + bits = (at24->chip.flags & AT24_FLAG_ADDR16) ? 16 : 8; + remainder = BIT(bits) - offset; + if (count > remainder) + count = remainder; } - return -ETIMEDOUT; + if (count > at24_io_limit) + count = at24_io_limit; + + return count; } -static ssize_t at24_eeprom_read_serial(struct at24_data *at24, char *buf, - unsigned int offset, size_t count) +static ssize_t at24_regmap_read(struct at24_data *at24, char *buf, + unsigned int offset, size_t count) { unsigned long timeout, read_time; + struct at24_client *at24_client; struct i2c_client *client; - struct i2c_msg msg[2]; - u8 addrbuf[2]; - int status; - - client = at24_translate_offset(at24, &offset); - - memset(msg, 0, sizeof(msg)); - msg[0].addr = client->addr; - msg[0].buf = addrbuf; - - /* - * The address pointer of the device is shared between the regular - * EEPROM array and the serial number block. The dummy write (part of - * the sequential read protocol) ensures the address pointer is reset - * to the desired position. - */ - if (at24->chip.flags & AT24_FLAG_ADDR16) { - /* - * For 16 bit address pointers, the word address must contain - * a '10' sequence in bits 11 and 10 regardless of the - * intended position of the address pointer. - */ - addrbuf[0] = 0x08; - addrbuf[1] = offset; - msg[0].len = 2; - } else { - /* - * Otherwise the word address must begin with a '10' sequence, - * regardless of the intended address. - */ - addrbuf[0] = 0x80 + offset; - msg[0].len = 1; - } - - msg[1].addr = client->addr; - msg[1].flags = I2C_M_RD; - msg[1].buf = buf; - msg[1].len = count; + struct regmap *regmap; + int ret; - loop_until_timeout(timeout, read_time) { - status = i2c_transfer(client->adapter, msg, 2); - if (status == 2) - return count; - } + at24_client = at24_translate_offset(at24, &offset); + regmap = at24_client->regmap; + client = at24_client->client; + count = at24_adjust_read_count(at24, offset, count); - return -ETIMEDOUT; -} + /* adjust offset for mac and serial read ops */ + offset += at24->offset_adj; -static ssize_t at24_eeprom_read_mac(struct at24_data *at24, char *buf, - unsigned int offset, size_t count) -{ - unsigned long timeout, read_time; - struct i2c_client *client; - struct i2c_msg msg[2]; - u8 addrbuf[2]; - int status; - - client = at24_translate_offset(at24, &offset); - - memset(msg, 0, sizeof(msg)); - msg[0].addr = client->addr; - msg[0].buf = addrbuf; - /* EUI-48 starts from 0x9a, EUI-64 from 0x98 */ - addrbuf[0] = 0xa0 - at24->chip.byte_len + offset; - msg[0].len = 1; - msg[1].addr = client->addr; - msg[1].flags = I2C_M_RD; - msg[1].buf = buf; - msg[1].len = count; - - loop_until_timeout(timeout, read_time) { - status = i2c_transfer(client->adapter, msg, 2); - if (status == 2) + at24_loop_until_timeout(timeout, read_time) { + ret = regmap_bulk_read(regmap, offset, buf, count); + dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n", + count, offset, ret, jiffies); + if (!ret) return count; } @@ -454,7 +330,7 @@ static ssize_t at24_eeprom_read_mac(struct at24_data *at24, char *buf, static size_t at24_adjust_write_count(struct at24_data *at24, unsigned int offset, size_t count) { - unsigned next_page; + unsigned int next_page; /* write_max is at most a page */ if (count > at24->write_max) @@ -468,91 +344,25 @@ static size_t at24_adjust_write_count(struct at24_data *at24, return count; } -static ssize_t at24_eeprom_write_smbus_block(struct at24_data *at24, - const char *buf, - unsigned int offset, size_t count) +static ssize_t at24_regmap_write(struct at24_data *at24, const char *buf, + unsigned int offset, size_t count) { unsigned long timeout, write_time; + struct at24_client *at24_client; struct i2c_client *client; - ssize_t status = 0; - - client = at24_translate_offset(at24, &offset); - count = at24_adjust_write_count(at24, offset, count); - - loop_until_timeout(timeout, write_time) { - status = i2c_smbus_write_i2c_block_data(client, - offset, count, buf); - if (status == 0) - status = count; - - dev_dbg(&client->dev, "write %zu@%d --> %zd (%ld)\n", - count, offset, status, jiffies); - - if (status == count) - return count; - } - - return -ETIMEDOUT; -} - -static ssize_t at24_eeprom_write_smbus_byte(struct at24_data *at24, - const char *buf, - unsigned int offset, size_t count) -{ - unsigned long timeout, write_time; - struct i2c_client *client; - ssize_t status = 0; - - client = at24_translate_offset(at24, &offset); - - loop_until_timeout(timeout, write_time) { - status = i2c_smbus_write_byte_data(client, offset, buf[0]); - if (status == 0) - status = count; - - dev_dbg(&client->dev, "write %zu@%d --> %zd (%ld)\n", - count, offset, status, jiffies); - - if (status == count) - return count; - } - - return -ETIMEDOUT; -} - -static ssize_t at24_eeprom_write_i2c(struct at24_data *at24, const char *buf, - unsigned int offset, size_t count) -{ - unsigned long timeout, write_time; - struct i2c_client *client; - struct i2c_msg msg; - ssize_t status = 0; - int i = 0; + struct regmap *regmap; + int ret; - client = at24_translate_offset(at24, &offset); + at24_client = at24_translate_offset(at24, &offset); + regmap = at24_client->regmap; + client = at24_client->client; count = at24_adjust_write_count(at24, offset, count); - msg.addr = client->addr; - msg.flags = 0; - - /* msg.buf is u8 and casts will mask the values */ - msg.buf = at24->writebuf; - if (at24->chip.flags & AT24_FLAG_ADDR16) - msg.buf[i++] = offset >> 8; - - msg.buf[i++] = offset; - memcpy(&msg.buf[i], buf, count); - msg.len = i + count; - - loop_until_timeout(timeout, write_time) { - status = i2c_transfer(client->adapter, &msg, 1); - if (status == 1) - status = count; - - dev_dbg(&client->dev, "write %zu@%d --> %zd (%ld)\n", - count, offset, status, jiffies); - - if (status == count) + at24_loop_until_timeout(timeout, write_time) { + ret = regmap_bulk_write(regmap, offset, buf, count); + dev_dbg(&client->dev, "write %zu@%d --> %d (%ld)\n", + count, offset, ret, jiffies); + if (!ret) return count; } @@ -562,7 +372,7 @@ static ssize_t at24_eeprom_write_i2c(struct at24_data *at24, const char *buf, static int at24_read(void *priv, unsigned int off, void *val, size_t count) { struct at24_data *at24 = priv; - struct device *dev = &at24->client[0]->dev; + struct device *dev = &at24->client[0].client->dev; char *buf = val; int ret; @@ -587,7 +397,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) while (count) { int status; - status = at24->read_func(at24, buf, off, count); + status = at24_regmap_read(at24, buf, off, count); if (status < 0) { mutex_unlock(&at24->lock); pm_runtime_put(dev); @@ -608,7 +418,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) static int at24_write(void *priv, unsigned int off, void *val, size_t count) { struct at24_data *at24 = priv; - struct device *dev = &at24->client[0]->dev; + struct device *dev = &at24->client[0].client->dev; char *buf = val; int ret; @@ -629,12 +439,14 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) * from this host, but not from other I2C masters. */ mutex_lock(&at24->lock); + gpiod_set_value_cansleep(at24->wp_gpio, 0); while (count) { int status; - status = at24->write_func(at24, buf, off, count); + status = at24_regmap_write(at24, buf, off, count); if (status < 0) { + gpiod_set_value_cansleep(at24->wp_gpio, 1); mutex_unlock(&at24->lock); pm_runtime_put(dev); return status; @@ -644,6 +456,7 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) count -= status; } + gpiod_set_value_cansleep(at24->wp_gpio, 1); mutex_unlock(&at24->lock); pm_runtime_put(dev); @@ -658,6 +471,8 @@ static void at24_get_pdata(struct device *dev, struct at24_platform_data *chip) if (device_property_present(dev, "read-only")) chip->flags |= AT24_FLAG_READONLY; + if (device_property_present(dev, "no-read-rollover")) + chip->flags |= AT24_FLAG_NO_RDROL; err = device_property_read_u32(dev, "size", &val); if (!err) @@ -676,16 +491,38 @@ static void at24_get_pdata(struct device *dev, struct at24_platform_data *chip) } } +static unsigned int at24_get_offset_adj(u8 flags, unsigned int byte_len) +{ + if (flags & AT24_FLAG_MAC) { + /* EUI-48 starts from 0x9a, EUI-64 from 0x98 */ + return 0xa0 - byte_len; + } else if (flags & AT24_FLAG_SERIAL && flags & AT24_FLAG_ADDR16) { + /* + * For 16 bit address pointers, the word address must contain + * a '10' sequence in bits 11 and 10 regardless of the + * intended position of the address pointer. + */ + return 0x0800; + } else if (flags & AT24_FLAG_SERIAL) { + /* + * Otherwise the word address must begin with a '10' sequence, + * regardless of the intended address. + */ + return 0x0080; + } else { + return 0; + } +} + static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct at24_platform_data chip; - kernel_ulong_t magic = 0; + struct at24_platform_data chip = { 0 }; + const struct at24_chip_data *cd = NULL; bool writable; - int use_smbus = 0; - int use_smbus_write = 0; struct at24_data *at24; int err; - unsigned i, num_addresses; + unsigned int i, num_addresses; + struct regmap_config regmap_config = { }; u8 test_byte; if (client->dev.platform_data) { @@ -698,28 +535,22 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) */ if (client->dev.of_node && of_match_device(at24_of_match, &client->dev)) { - magic = (kernel_ulong_t) - of_device_get_match_data(&client->dev); + cd = of_device_get_match_data(&client->dev); } else if (id) { - magic = id->driver_data; + cd = (void *)id->driver_data; } else { const struct acpi_device_id *aid; aid = acpi_match_device(at24_acpi_ids, &client->dev); if (aid) - magic = aid->driver_data; + cd = (void *)aid->driver_data; } - if (!magic) + if (!cd) return -ENODEV; - chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN)); - magic >>= AT24_SIZE_BYTELEN; - chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS); - + chip.byte_len = cd->byte_len; + chip.flags = cd->flags; at24_get_pdata(&client->dev, &chip); - - chip.setup = NULL; - chip.context = NULL; } if (!is_power_of_2(chip.byte_len)) @@ -733,43 +564,10 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_warn(&client->dev, "page_size looks suspicious (no power of 2)!\n"); - /* - * REVISIT: the size of the EUI-48 byte array is 6 in at24mac402, while - * the call to ilog2() in AT24_DEVICE_MAGIC() rounds it down to 4. - * - * Eventually we'll get rid of the magic values altoghether in favor of - * real structs, but for now just manually set the right size. - */ - if (chip.flags & AT24_FLAG_MAC && chip.byte_len == 4) - chip.byte_len = 6; - - /* Use I2C operations unless we're stuck with SMBus extensions. */ - if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { - if (chip.flags & AT24_FLAG_ADDR16) - return -EPFNOSUPPORT; - - if (i2c_check_functionality(client->adapter, - I2C_FUNC_SMBUS_READ_I2C_BLOCK)) { - use_smbus = I2C_SMBUS_I2C_BLOCK_DATA; - } else if (i2c_check_functionality(client->adapter, - I2C_FUNC_SMBUS_READ_WORD_DATA)) { - use_smbus = I2C_SMBUS_WORD_DATA; - } else if (i2c_check_functionality(client->adapter, - I2C_FUNC_SMBUS_READ_BYTE_DATA)) { - use_smbus = I2C_SMBUS_BYTE_DATA; - } else { - return -EPFNOSUPPORT; - } - - if (i2c_check_functionality(client->adapter, - I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) { - use_smbus_write = I2C_SMBUS_I2C_BLOCK_DATA; - } else if (i2c_check_functionality(client->adapter, - I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) { - use_smbus_write = I2C_SMBUS_BYTE_DATA; - chip.page_size = 1; - } - } + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C) && + !i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) + chip.page_size = 1; if (chip.flags & AT24_FLAG_TAKE8ADDR) num_addresses = 8; @@ -777,16 +575,28 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) num_addresses = DIV_ROUND_UP(chip.byte_len, (chip.flags & AT24_FLAG_ADDR16) ? 65536 : 256); + regmap_config.val_bits = 8; + regmap_config.reg_bits = (chip.flags & AT24_FLAG_ADDR16) ? 16 : 8; + at24 = devm_kzalloc(&client->dev, sizeof(struct at24_data) + - num_addresses * sizeof(struct i2c_client *), GFP_KERNEL); + num_addresses * sizeof(struct at24_client), GFP_KERNEL); if (!at24) return -ENOMEM; mutex_init(&at24->lock); - at24->use_smbus = use_smbus; - at24->use_smbus_write = use_smbus_write; at24->chip = chip; at24->num_addresses = num_addresses; + at24->offset_adj = at24_get_offset_adj(chip.flags, chip.byte_len); + + at24->wp_gpio = devm_gpiod_get_optional(&client->dev, + "wp", GPIOD_OUT_HIGH); + if (IS_ERR(at24->wp_gpio)) + return PTR_ERR(at24->wp_gpio); + + at24->client[0].client = client; + at24->client[0].regmap = devm_regmap_init_i2c(client, ®map_config); + if (IS_ERR(at24->client[0].regmap)) + return PTR_ERR(at24->client[0].regmap); if ((chip.flags & AT24_FLAG_SERIAL) && (chip.flags & AT24_FLAG_MAC)) { dev_err(&client->dev, @@ -794,59 +604,32 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) return -EINVAL; } - if (chip.flags & AT24_FLAG_SERIAL) { - at24->read_func = at24_eeprom_read_serial; - } else if (chip.flags & AT24_FLAG_MAC) { - at24->read_func = at24_eeprom_read_mac; - } else { - at24->read_func = at24->use_smbus ? at24_eeprom_read_smbus - : at24_eeprom_read_i2c; - } - - if (at24->use_smbus) { - if (at24->use_smbus_write == I2C_SMBUS_I2C_BLOCK_DATA) - at24->write_func = at24_eeprom_write_smbus_block; - else - at24->write_func = at24_eeprom_write_smbus_byte; - } else { - at24->write_func = at24_eeprom_write_i2c; - } - writable = !(chip.flags & AT24_FLAG_READONLY); if (writable) { - if (!use_smbus || use_smbus_write) { - - unsigned write_max = chip.page_size; - - if (write_max > io_limit) - write_max = io_limit; - if (use_smbus && write_max > I2C_SMBUS_BLOCK_MAX) - write_max = I2C_SMBUS_BLOCK_MAX; - at24->write_max = write_max; - - /* buffer (data + address at the beginning) */ - at24->writebuf = devm_kzalloc(&client->dev, - write_max + 2, GFP_KERNEL); - if (!at24->writebuf) - return -ENOMEM; - } else { - dev_warn(&client->dev, - "cannot write due to controller restrictions."); - } + at24->write_max = min_t(unsigned int, + chip.page_size, at24_io_limit); + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C) && + at24->write_max > I2C_SMBUS_BLOCK_MAX) + at24->write_max = I2C_SMBUS_BLOCK_MAX; } - at24->client[0] = client; - /* use dummy devices for multiple-address chips */ for (i = 1; i < num_addresses; i++) { - at24->client[i] = i2c_new_dummy(client->adapter, - client->addr + i); - if (!at24->client[i]) { + at24->client[i].client = i2c_new_dummy(client->adapter, + client->addr + i); + if (!at24->client[i].client) { dev_err(&client->dev, "address 0x%02x unavailable\n", client->addr + i); err = -EADDRINUSE; goto err_clients; } + at24->client[i].regmap = devm_regmap_init_i2c( + at24->client[i].client, + ®map_config); + if (IS_ERR(at24->client[i].regmap)) { + err = PTR_ERR(at24->client[i].regmap); + goto err_clients; + } } i2c_set_clientdata(client, at24); @@ -890,12 +673,6 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_info(&client->dev, "%u byte %s EEPROM, %s, %u bytes/write\n", chip.byte_len, client->name, writable ? "writable" : "read-only", at24->write_max); - if (use_smbus == I2C_SMBUS_WORD_DATA || - use_smbus == I2C_SMBUS_BYTE_DATA) { - dev_notice(&client->dev, "Falling back to %s reads, " - "performance will suffer\n", use_smbus == - I2C_SMBUS_WORD_DATA ? "word" : "byte"); - } /* export data to kernel code */ if (chip.setup) @@ -905,8 +682,8 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) err_clients: for (i = 1; i < num_addresses; i++) - if (at24->client[i]) - i2c_unregister_device(at24->client[i]); + if (at24->client[i].client) + i2c_unregister_device(at24->client[i].client); pm_runtime_disable(&client->dev); @@ -923,7 +700,7 @@ static int at24_remove(struct i2c_client *client) nvmem_unregister(at24->nvmem); for (i = 1; i < at24->num_addresses; i++) - i2c_unregister_device(at24->client[i]); + i2c_unregister_device(at24->client[i].client); pm_runtime_disable(&client->dev); pm_runtime_set_suspended(&client->dev); @@ -946,12 +723,12 @@ static struct i2c_driver at24_driver = { static int __init at24_init(void) { - if (!io_limit) { - pr_err("at24: io_limit must not be 0!\n"); + if (!at24_io_limit) { + pr_err("at24: at24_io_limit must not be 0!\n"); return -EINVAL; } - io_limit = rounddown_pow_of_two(io_limit); + at24_io_limit = rounddown_pow_of_two(at24_io_limit); return i2c_add_driver(&at24_driver); } module_init(at24_init); diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index 5afe4cd16569..9282ffd607ff 100644 --- a/drivers/misc/eeprom/at25.c +++ b/drivers/misc/eeprom/at25.c @@ -276,6 +276,9 @@ static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip) return -ENODEV; } switch (val) { + case 9: + chip->flags |= EE_INSTR_BIT3_IS_ADDR; + /* fall through */ case 8: chip->flags |= EE_ADDR1; break; diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index eb29113e0bac..5a17bfeb80d3 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -468,7 +468,7 @@ static struct class enclosure_class = { .dev_groups = enclosure_class_groups, }; -static const char *const enclosure_status [] = { +static const char *const enclosure_status[] = { [ENCLOSURE_STATUS_UNSUPPORTED] = "unsupported", [ENCLOSURE_STATUS_OK] = "OK", [ENCLOSURE_STATUS_CRITICAL] = "critical", @@ -480,7 +480,7 @@ static const char *const enclosure_status [] = { [ENCLOSURE_STATUS_MAX] = NULL, }; -static const char *const enclosure_type [] = { +static const char *const enclosure_type[] = { [ENCLOSURE_COMPONENT_DEVICE] = "device", [ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device", }; @@ -680,13 +680,7 @@ ATTRIBUTE_GROUPS(enclosure_component); static int __init enclosure_init(void) { - int err; - - err = class_register(&enclosure_class); - if (err) - return err; - - return 0; + return class_register(&enclosure_class); } static void __exit enclosure_exit(void) diff --git a/drivers/misc/fsa9480.c b/drivers/misc/fsa9480.c index 71d2793b372c..607b489a6501 100644 --- a/drivers/misc/fsa9480.c +++ b/drivers/misc/fsa9480.c @@ -465,6 +465,7 @@ fail1: static int fsa9480_remove(struct i2c_client *client) { struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client); + if (client->irq) free_irq(client->irq, usbsw); diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c index 4fd21e86ad56..c7cd3675bcd1 100644 --- a/drivers/misc/genwqe/card_base.c +++ b/drivers/misc/genwqe/card_base.c @@ -153,11 +153,11 @@ static struct genwqe_dev *genwqe_dev_alloc(void) cd->card_state = GENWQE_CARD_UNUSED; spin_lock_init(&cd->print_lock); - cd->ddcb_software_timeout = genwqe_ddcb_software_timeout; - cd->kill_timeout = genwqe_kill_timeout; + cd->ddcb_software_timeout = GENWQE_DDCB_SOFTWARE_TIMEOUT; + cd->kill_timeout = GENWQE_KILL_TIMEOUT; for (j = 0; j < GENWQE_MAX_VFS; j++) - cd->vf_jobtimeout_msec[j] = genwqe_vf_jobtimeout_msec; + cd->vf_jobtimeout_msec[j] = GENWQE_VF_JOBTIMEOUT_MSEC; genwqe_devices[i] = cd; return cd; @@ -324,11 +324,11 @@ static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd) u32 T = genwqe_T_psec(cd); u64 x; - if (genwqe_pf_jobtimeout_msec == 0) + if (GENWQE_PF_JOBTIMEOUT_MSEC == 0) return false; /* PF: large value needed, flash update 2sec per block */ - x = ilog2(genwqe_pf_jobtimeout_msec * + x = ilog2(GENWQE_PF_JOBTIMEOUT_MSEC * 16000000000uL/(T * 15)) - 10; genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, @@ -904,7 +904,7 @@ static int genwqe_reload_bistream(struct genwqe_dev *cd) * b) a critical GFIR occured * * Informational GFIRs are checked and potentially printed in - * health_check_interval seconds. + * GENWQE_HEALTH_CHECK_INTERVAL seconds. */ static int genwqe_health_thread(void *data) { @@ -918,7 +918,7 @@ static int genwqe_health_thread(void *data) rc = wait_event_interruptible_timeout(cd->health_waitq, (genwqe_health_check_cond(cd, &gfir) || (should_stop = kthread_should_stop())), - genwqe_health_check_interval * HZ); + GENWQE_HEALTH_CHECK_INTERVAL * HZ); if (should_stop) break; @@ -1028,7 +1028,7 @@ static int genwqe_health_check_start(struct genwqe_dev *cd) { int rc; - if (genwqe_health_check_interval <= 0) + if (GENWQE_HEALTH_CHECK_INTERVAL <= 0) return 0; /* valid for disabling the service */ /* moved before request_irq() */ diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h index 3743c87f8ab9..1c3967f10f55 100644 --- a/drivers/misc/genwqe/card_base.h +++ b/drivers/misc/genwqe/card_base.h @@ -47,13 +47,13 @@ #define GENWQE_CARD_NO_MAX (16 * GENWQE_MAX_FUNCS) /* Compile parameters, some of them appear in debugfs for later adjustment */ -#define genwqe_ddcb_max 32 /* DDCBs on the work-queue */ -#define genwqe_polling_enabled 0 /* in case of irqs not working */ -#define genwqe_ddcb_software_timeout 10 /* timeout per DDCB in seconds */ -#define genwqe_kill_timeout 8 /* time until process gets killed */ -#define genwqe_vf_jobtimeout_msec 250 /* 250 msec */ -#define genwqe_pf_jobtimeout_msec 8000 /* 8 sec should be ok */ -#define genwqe_health_check_interval 4 /* <= 0: disabled */ +#define GENWQE_DDCB_MAX 32 /* DDCBs on the work-queue */ +#define GENWQE_POLLING_ENABLED 0 /* in case of irqs not working */ +#define GENWQE_DDCB_SOFTWARE_TIMEOUT 10 /* timeout per DDCB in seconds */ +#define GENWQE_KILL_TIMEOUT 8 /* time until process gets killed */ +#define GENWQE_VF_JOBTIMEOUT_MSEC 250 /* 250 msec */ +#define GENWQE_PF_JOBTIMEOUT_MSEC 8000 /* 8 sec should be ok */ +#define GENWQE_HEALTH_CHECK_INTERVAL 4 /* <= 0: disabled */ /* Sysfs attribute groups used when we create the genwqe device */ extern const struct attribute_group *genwqe_attribute_groups[]; @@ -490,11 +490,9 @@ int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len); /* Memory allocation/deallocation; dma address handling */ int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, - void *uaddr, unsigned long size, - struct ddcb_requ *req); + void *uaddr, unsigned long size); -int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m, - struct ddcb_requ *req); +int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m); static inline bool dma_mapping_used(struct dma_mapping *m) { diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c index ddfeefe39540..b7f8d35c17a9 100644 --- a/drivers/misc/genwqe/card_ddcb.c +++ b/drivers/misc/genwqe/card_ddcb.c @@ -500,7 +500,7 @@ int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req) rc = wait_event_interruptible_timeout(queue->ddcb_waitqs[ddcb_no], ddcb_requ_finished(cd, req), - genwqe_ddcb_software_timeout * HZ); + GENWQE_DDCB_SOFTWARE_TIMEOUT * HZ); /* * We need to distinguish 3 cases here: @@ -633,7 +633,7 @@ int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req) __be32 old, new; /* unsigned long flags; */ - if (genwqe_ddcb_software_timeout <= 0) { + if (GENWQE_DDCB_SOFTWARE_TIMEOUT <= 0) { dev_err(&pci_dev->dev, "[%s] err: software timeout is not set!\n", __func__); return -EFAULT; @@ -641,7 +641,7 @@ int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req) pddcb = &queue->ddcb_vaddr[req->num]; - for (t = 0; t < genwqe_ddcb_software_timeout * 10; t++) { + for (t = 0; t < GENWQE_DDCB_SOFTWARE_TIMEOUT * 10; t++) { spin_lock_irqsave(&queue->ddcb_lock, flags); @@ -718,7 +718,7 @@ go_home: dev_err(&pci_dev->dev, "[%s] err: DDCB#%d not purged and not completed after %d seconds QSTAT=%016llx!!\n", - __func__, req->num, genwqe_ddcb_software_timeout, + __func__, req->num, GENWQE_DDCB_SOFTWARE_TIMEOUT, queue_status); print_ddcb_info(cd, req->queue); @@ -778,7 +778,7 @@ int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req, /* FIXME circumvention to improve performance when no irq is * there. */ - if (genwqe_polling_enabled) + if (GENWQE_POLLING_ENABLED) genwqe_check_ddcb_queue(cd, queue); /* @@ -878,7 +878,7 @@ int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req, pddcb->icrc_hsi_shi_32 = cpu_to_be32((u32)icrc << 16); /* enable DDCB completion irq */ - if (!genwqe_polling_enabled) + if (!GENWQE_POLLING_ENABLED) pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32; dev_dbg(&pci_dev->dev, "INPUT DDCB#%d\n", req->num); @@ -1028,10 +1028,10 @@ static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue) unsigned int queue_size; struct pci_dev *pci_dev = cd->pci_dev; - if (genwqe_ddcb_max < 2) + if (GENWQE_DDCB_MAX < 2) return -EINVAL; - queue_size = roundup(genwqe_ddcb_max * sizeof(struct ddcb), PAGE_SIZE); + queue_size = roundup(GENWQE_DDCB_MAX * sizeof(struct ddcb), PAGE_SIZE); queue->ddcbs_in_flight = 0; /* statistics */ queue->ddcbs_max_in_flight = 0; @@ -1040,7 +1040,7 @@ static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue) queue->wait_on_busy = 0; queue->ddcb_seq = 0x100; /* start sequence number */ - queue->ddcb_max = genwqe_ddcb_max; /* module parameter */ + queue->ddcb_max = GENWQE_DDCB_MAX; queue->ddcb_vaddr = __genwqe_alloc_consistent(cd, queue_size, &queue->ddcb_daddr); if (queue->ddcb_vaddr == NULL) { @@ -1194,7 +1194,7 @@ static int genwqe_card_thread(void *data) genwqe_check_ddcb_queue(cd, &cd->queue); - if (genwqe_polling_enabled) { + if (GENWQE_POLLING_ENABLED) { rc = wait_event_interruptible_timeout( cd->queue_waitq, genwqe_ddcbs_in_flight(cd) || @@ -1340,7 +1340,7 @@ static int queue_wake_up_all(struct genwqe_dev *cd) int genwqe_finish_queue(struct genwqe_dev *cd) { int i, rc = 0, in_flight; - int waitmax = genwqe_ddcb_software_timeout; + int waitmax = GENWQE_DDCB_SOFTWARE_TIMEOUT; struct pci_dev *pci_dev = cd->pci_dev; struct ddcb_queue *queue = &cd->queue; diff --git a/drivers/misc/genwqe/card_debugfs.c b/drivers/misc/genwqe/card_debugfs.c index c715534e7fe7..f921dd590271 100644 --- a/drivers/misc/genwqe/card_debugfs.c +++ b/drivers/misc/genwqe/card_debugfs.c @@ -198,7 +198,7 @@ static int genwqe_jtimer_show(struct seq_file *s, void *unused) jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 0); seq_printf(s, " PF 0x%016llx %d msec\n", jtimer, - genwqe_pf_jobtimeout_msec); + GENWQE_PF_JOBTIMEOUT_MSEC); for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) { jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index 3ecfa35457e0..0dd6b5ef314a 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -226,7 +226,7 @@ static void genwqe_remove_mappings(struct genwqe_file *cfile) kfree(dma_map); } else if (dma_map->type == GENWQE_MAPPING_SGL_TEMP) { /* we use dma_map statically from the request */ - genwqe_user_vunmap(cd, dma_map, NULL); + genwqe_user_vunmap(cd, dma_map); } } } @@ -249,7 +249,7 @@ static void genwqe_remove_pinnings(struct genwqe_file *cfile) * deleted. */ list_del_init(&dma_map->pin_list); - genwqe_user_vunmap(cd, dma_map, NULL); + genwqe_user_vunmap(cd, dma_map); kfree(dma_map); } } @@ -790,7 +790,7 @@ static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m) return -ENOMEM; genwqe_mapping_init(dma_map, GENWQE_MAPPING_SGL_PINNED); - rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size, NULL); + rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size); if (rc != 0) { dev_err(&pci_dev->dev, "[%s] genwqe_user_vmap rc=%d\n", __func__, rc); @@ -820,7 +820,7 @@ static int genwqe_unpin_mem(struct genwqe_file *cfile, struct genwqe_mem *m) return -ENOENT; genwqe_del_pin(cfile, dma_map); - genwqe_user_vunmap(cd, dma_map, NULL); + genwqe_user_vunmap(cd, dma_map); kfree(dma_map); return 0; } @@ -841,7 +841,7 @@ static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req) if (dma_mapping_used(dma_map)) { __genwqe_del_mapping(cfile, dma_map); - genwqe_user_vunmap(cd, dma_map, req); + genwqe_user_vunmap(cd, dma_map); } if (req->sgls[i].sgl != NULL) genwqe_free_sync_sgl(cd, &req->sgls[i]); @@ -947,7 +947,7 @@ static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req) m->write = 0; rc = genwqe_user_vmap(cd, m, (void *)u_addr, - u_size, req); + u_size); if (rc != 0) goto err_out; @@ -1011,7 +1011,6 @@ static int do_execute_ddcb(struct genwqe_file *cfile, { int rc; struct genwqe_ddcb_cmd *cmd; - struct ddcb_requ *req; struct genwqe_dev *cd = cfile->cd; struct file *filp = cfile->filp; @@ -1019,8 +1018,6 @@ static int do_execute_ddcb(struct genwqe_file *cfile, if (cmd == NULL) return -ENOMEM; - req = container_of(cmd, struct ddcb_requ, cmd); - if (copy_from_user(cmd, (void __user *)arg, sizeof(*cmd))) { ddcb_requ_free(cmd); return -EFAULT; @@ -1345,7 +1342,7 @@ static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd) rc = genwqe_kill_fasync(cd, SIGIO); if (rc > 0) { /* give kill_timeout seconds to close file descriptors ... */ - for (i = 0; (i < genwqe_kill_timeout) && + for (i = 0; (i < GENWQE_KILL_TIMEOUT) && genwqe_open_files(cd); i++) { dev_info(&pci_dev->dev, " %d sec ...", i); @@ -1363,7 +1360,7 @@ static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd) rc = genwqe_force_sig(cd, SIGKILL); /* force terminate */ if (rc) { /* Give kill_timout more seconds to end processes */ - for (i = 0; (i < genwqe_kill_timeout) && + for (i = 0; (i < GENWQE_KILL_TIMEOUT) && genwqe_open_files(cd); i++) { dev_warn(&pci_dev->dev, " %d sec ...", i); diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index 5c0d917636f7..8f2e6442d88b 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -524,22 +524,16 @@ int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl) } /** - * free_user_pages() - Give pinned pages back + * genwqe_free_user_pages() - Give pinned pages back * - * Documentation of get_user_pages is in mm/memory.c: + * Documentation of get_user_pages is in mm/gup.c: * * If the page is written to, set_page_dirty (or set_page_dirty_lock, * as appropriate) must be called after the page is finished with, and * before put_page is called. - * - * FIXME Could be of use to others and might belong in the generic - * code, if others agree. E.g. - * ll_free_user_pages in drivers/staging/lustre/lustre/llite/rw26.c - * ceph_put_page_vector in net/ceph/pagevec.c - * maybe more? */ -static int free_user_pages(struct page **page_list, unsigned int nr_pages, - int dirty) +static int genwqe_free_user_pages(struct page **page_list, + unsigned int nr_pages, int dirty) { unsigned int i; @@ -577,7 +571,7 @@ static int free_user_pages(struct page **page_list, unsigned int nr_pages, * Return: 0 if success */ int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, - unsigned long size, struct ddcb_requ *req) + unsigned long size) { int rc = -EINVAL; unsigned long data, offs; @@ -617,7 +611,7 @@ int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, /* assumption: get_user_pages can be killed by signals. */ if (rc < m->nr_pages) { - free_user_pages(m->page_list, rc, m->write); + genwqe_free_user_pages(m->page_list, rc, m->write); rc = -EFAULT; goto fail_get_user_pages; } @@ -629,7 +623,7 @@ int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, return 0; fail_free_user_pages: - free_user_pages(m->page_list, m->nr_pages, m->write); + genwqe_free_user_pages(m->page_list, m->nr_pages, m->write); fail_get_user_pages: kfree(m->page_list); @@ -647,8 +641,7 @@ int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, * @cd: pointer to genwqe device * @m: mapping params */ -int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m, - struct ddcb_requ *req) +int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m) { struct pci_dev *pci_dev = cd->pci_dev; @@ -662,7 +655,7 @@ int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m, genwqe_unmap_pages(cd, m->dma_list, m->nr_pages); if (m->page_list) { - free_user_pages(m->page_list, m->nr_pages, m->write); + genwqe_free_user_pages(m->page_list, m->nr_pages, m->write); kfree(m->page_list); m->page_list = NULL; diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c index 95ce3e891b1b..35693c0a78e2 100644 --- a/drivers/misc/hpilo.c +++ b/drivers/misc/hpilo.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for the HP iLO management processor. * * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. * David Altobelli <david.altobelli@hpe.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/kernel.h> #include <linux/types.h> diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h index b97672e0cf90..94dfb9e40e29 100644 --- a/drivers/misc/hpilo.h +++ b/drivers/misc/hpilo.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/drivers/char/hpilo.h * * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. * David Altobelli <david.altobelli@hp.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __HPILO_H #define __HPILO_H diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c index 28f51e01fd2b..81a0541ef3ac 100644 --- a/drivers/misc/ics932s401.c +++ b/drivers/misc/ics932s401.c @@ -33,7 +33,7 @@ static const unsigned short normal_i2c[] = { 0x69, I2C_CLIENT_END }; /* ICS932S401 registers */ #define ICS932S401_REG_CFG2 0x01 -#define ICS932S401_CFG1_SPREAD 0x01 +#define ICS932S401_CFG1_SPREAD 0x01 #define ICS932S401_REG_CFG7 0x06 #define ICS932S401_FS_MASK 0x07 #define ICS932S401_REG_VENDOR_REV 0x07 @@ -58,7 +58,7 @@ static const unsigned short normal_i2c[] = { 0x69, I2C_CLIENT_END }; #define ICS932S401_REG_SRC_SPREAD1 0x11 #define ICS932S401_REG_SRC_SPREAD2 0x12 #define ICS932S401_REG_CPU_DIVISOR 0x13 -#define ICS932S401_CPU_DIVISOR_SHIFT 4 +#define ICS932S401_CPU_DIVISOR_SHIFT 4 #define ICS932S401_REG_PCISRC_DIVISOR 0x14 #define ICS932S401_SRC_DIVISOR_MASK 0x0F #define ICS932S401_PCI_DIVISOR_SHIFT 4 @@ -225,6 +225,7 @@ static ssize_t show_cpu_clock_sel(struct device *dev, else { /* Freq is neatly wrapped up for us */ int fid = data->regs[ICS932S401_REG_CFG7] & ICS932S401_FS_MASK; + freq = fs_speeds[fid]; if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT) { switch (freq) { @@ -352,8 +353,7 @@ static DEVICE_ATTR(ref_clock, S_IRUGO, show_value, NULL); static DEVICE_ATTR(cpu_spread, S_IRUGO, show_spread, NULL); static DEVICE_ATTR(src_spread, S_IRUGO, show_spread, NULL); -static struct attribute *ics932s401_attr[] = -{ +static struct attribute *ics932s401_attr[] = { &dev_attr_spread_enabled.attr, &dev_attr_cpu_clock_selection.attr, &dev_attr_cpu_clock.attr, diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c index 976df0013633..b8032882c865 100644 --- a/drivers/misc/isl29003.c +++ b/drivers/misc/isl29003.c @@ -78,6 +78,7 @@ static int __isl29003_read_reg(struct i2c_client *client, u32 reg, u8 mask, u8 shift) { struct isl29003_data *data = i2c_get_clientdata(client); + return (data->reg_cache[reg] & mask) >> shift; } @@ -160,6 +161,7 @@ static int isl29003_get_power_state(struct i2c_client *client) { struct isl29003_data *data = i2c_get_clientdata(client); u8 cmdreg = data->reg_cache[ISL29003_REG_COMMAND]; + return ~cmdreg & ISL29003_ADC_PD; } @@ -196,6 +198,7 @@ static ssize_t isl29003_show_range(struct device *dev, struct device_attribute *attr, char *buf) { struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%i\n", isl29003_get_range(client)); } @@ -231,6 +234,7 @@ static ssize_t isl29003_show_resolution(struct device *dev, char *buf) { struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_resolution(client)); } @@ -264,6 +268,7 @@ static ssize_t isl29003_show_mode(struct device *dev, struct device_attribute *attr, char *buf) { struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_mode(client)); } @@ -298,6 +303,7 @@ static ssize_t isl29003_show_power_state(struct device *dev, char *buf) { struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_power_state(client)); } @@ -361,6 +367,7 @@ static int isl29003_init_client(struct i2c_client *client) * if one of the reads fails, we consider the init failed */ for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) { int v = i2c_smbus_read_byte_data(client, i); + if (v < 0) return -ENODEV; diff --git a/drivers/misc/lkdtm.h b/drivers/misc/lkdtm.h index 687a0dbbe199..9e513dcfd809 100644 --- a/drivers/misc/lkdtm.h +++ b/drivers/misc/lkdtm.h @@ -76,8 +76,8 @@ void __init lkdtm_usercopy_init(void); void __exit lkdtm_usercopy_exit(void); void lkdtm_USERCOPY_HEAP_SIZE_TO(void); void lkdtm_USERCOPY_HEAP_SIZE_FROM(void); -void lkdtm_USERCOPY_HEAP_FLAG_TO(void); -void lkdtm_USERCOPY_HEAP_FLAG_FROM(void); +void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void); +void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void); void lkdtm_USERCOPY_STACK_FRAME_TO(void); void lkdtm_USERCOPY_STACK_FRAME_FROM(void); void lkdtm_USERCOPY_STACK_BEYOND(void); diff --git a/drivers/misc/lkdtm_core.c b/drivers/misc/lkdtm_core.c index ba92291508dc..2154d1bfd18b 100644 --- a/drivers/misc/lkdtm_core.c +++ b/drivers/misc/lkdtm_core.c @@ -96,7 +96,7 @@ static struct crashpoint crashpoints[] = { CRASHPOINT("DIRECT", NULL), #ifdef CONFIG_KPROBES CRASHPOINT("INT_HARDWARE_ENTRY", "do_IRQ"), - CRASHPOINT("INT_HW_IRQ_EN", "handle_IRQ_event"), + CRASHPOINT("INT_HW_IRQ_EN", "handle_irq_event"), CRASHPOINT("INT_TASKLET_ENTRY", "tasklet_action"), CRASHPOINT("FS_DEVRW", "ll_rw_block"), CRASHPOINT("MEM_SWAPOUT", "shrink_inactive_list"), @@ -177,8 +177,8 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(ATOMIC_TIMING), CRASHTYPE(USERCOPY_HEAP_SIZE_TO), CRASHTYPE(USERCOPY_HEAP_SIZE_FROM), - CRASHTYPE(USERCOPY_HEAP_FLAG_TO), - CRASHTYPE(USERCOPY_HEAP_FLAG_FROM), + CRASHTYPE(USERCOPY_HEAP_WHITELIST_TO), + CRASHTYPE(USERCOPY_HEAP_WHITELIST_FROM), CRASHTYPE(USERCOPY_STACK_FRAME_TO), CRASHTYPE(USERCOPY_STACK_FRAME_FROM), CRASHTYPE(USERCOPY_STACK_BEYOND), diff --git a/drivers/misc/lkdtm_heap.c b/drivers/misc/lkdtm_heap.c index f5494a6d4be5..65026d7de130 100644 --- a/drivers/misc/lkdtm_heap.c +++ b/drivers/misc/lkdtm_heap.c @@ -16,6 +16,8 @@ void lkdtm_OVERWRITE_ALLOCATION(void) { size_t len = 1020; u32 *data = kmalloc(len, GFP_KERNEL); + if (!data) + return; data[1024 / sizeof(u32)] = 0x12345678; kfree(data); @@ -33,6 +35,8 @@ void lkdtm_WRITE_AFTER_FREE(void) size_t offset = (len / sizeof(*base)) / 2; base = kmalloc(len, GFP_KERNEL); + if (!base) + return; pr_info("Allocated memory %p-%p\n", base, &base[offset * 2]); pr_info("Attempting bad write to freed memory at %p\n", &base[offset]); diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c index a64372cc148d..9725aed305bb 100644 --- a/drivers/misc/lkdtm_usercopy.c +++ b/drivers/misc/lkdtm_usercopy.c @@ -20,7 +20,7 @@ */ static volatile size_t unconst = 0; static volatile size_t cache_size = 1024; -static struct kmem_cache *bad_cache; +static struct kmem_cache *whitelist_cache; static const unsigned char test_text[] = "This is a test.\n"; @@ -115,10 +115,16 @@ free_user: vm_munmap(user_addr, PAGE_SIZE); } +/* + * This checks for whole-object size validation with hardened usercopy, + * with or without usercopy whitelisting. + */ static void do_usercopy_heap_size(bool to_user) { unsigned long user_addr; unsigned char *one, *two; + void __user *test_user_addr; + void *test_kern_addr; size_t size = unconst + 1024; one = kmalloc(size, GFP_KERNEL); @@ -139,27 +145,30 @@ static void do_usercopy_heap_size(bool to_user) memset(one, 'A', size); memset(two, 'B', size); + test_user_addr = (void __user *)(user_addr + 16); + test_kern_addr = one + 16; + if (to_user) { pr_info("attempting good copy_to_user of correct size\n"); - if (copy_to_user((void __user *)user_addr, one, size)) { + if (copy_to_user(test_user_addr, test_kern_addr, size / 2)) { pr_warn("copy_to_user failed unexpectedly?!\n"); goto free_user; } pr_info("attempting bad copy_to_user of too large size\n"); - if (copy_to_user((void __user *)user_addr, one, 2 * size)) { + if (copy_to_user(test_user_addr, test_kern_addr, size)) { pr_warn("copy_to_user failed, but lacked Oops\n"); goto free_user; } } else { pr_info("attempting good copy_from_user of correct size\n"); - if (copy_from_user(one, (void __user *)user_addr, size)) { + if (copy_from_user(test_kern_addr, test_user_addr, size / 2)) { pr_warn("copy_from_user failed unexpectedly?!\n"); goto free_user; } pr_info("attempting bad copy_from_user of too large size\n"); - if (copy_from_user(one, (void __user *)user_addr, 2 * size)) { + if (copy_from_user(test_kern_addr, test_user_addr, size)) { pr_warn("copy_from_user failed, but lacked Oops\n"); goto free_user; } @@ -172,77 +181,79 @@ free_kernel: kfree(two); } -static void do_usercopy_heap_flag(bool to_user) +/* + * This checks for the specific whitelist window within an object. If this + * test passes, then do_usercopy_heap_size() tests will pass too. + */ +static void do_usercopy_heap_whitelist(bool to_user) { - unsigned long user_addr; - unsigned char *good_buf = NULL; - unsigned char *bad_buf = NULL; + unsigned long user_alloc; + unsigned char *buf = NULL; + unsigned char __user *user_addr; + size_t offset, size; /* Make sure cache was prepared. */ - if (!bad_cache) { + if (!whitelist_cache) { pr_warn("Failed to allocate kernel cache\n"); return; } /* - * Allocate one buffer from each cache (kmalloc will have the - * SLAB_USERCOPY flag already, but "bad_cache" won't). + * Allocate a buffer with a whitelisted window in the buffer. */ - good_buf = kmalloc(cache_size, GFP_KERNEL); - bad_buf = kmem_cache_alloc(bad_cache, GFP_KERNEL); - if (!good_buf || !bad_buf) { - pr_warn("Failed to allocate buffers from caches\n"); + buf = kmem_cache_alloc(whitelist_cache, GFP_KERNEL); + if (!buf) { + pr_warn("Failed to allocate buffer from whitelist cache\n"); goto free_alloc; } /* Allocate user memory we'll poke at. */ - user_addr = vm_mmap(NULL, 0, PAGE_SIZE, + user_alloc = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, 0); - if (user_addr >= TASK_SIZE) { + if (user_alloc >= TASK_SIZE) { pr_warn("Failed to allocate user memory\n"); goto free_alloc; } + user_addr = (void __user *)user_alloc; - memset(good_buf, 'A', cache_size); - memset(bad_buf, 'B', cache_size); + memset(buf, 'B', cache_size); + + /* Whitelisted window in buffer, from kmem_cache_create_usercopy. */ + offset = (cache_size / 4) + unconst; + size = (cache_size / 16) + unconst; if (to_user) { - pr_info("attempting good copy_to_user with SLAB_USERCOPY\n"); - if (copy_to_user((void __user *)user_addr, good_buf, - cache_size)) { + pr_info("attempting good copy_to_user inside whitelist\n"); + if (copy_to_user(user_addr, buf + offset, size)) { pr_warn("copy_to_user failed unexpectedly?!\n"); goto free_user; } - pr_info("attempting bad copy_to_user w/o SLAB_USERCOPY\n"); - if (copy_to_user((void __user *)user_addr, bad_buf, - cache_size)) { + pr_info("attempting bad copy_to_user outside whitelist\n"); + if (copy_to_user(user_addr, buf + offset - 1, size)) { pr_warn("copy_to_user failed, but lacked Oops\n"); goto free_user; } } else { - pr_info("attempting good copy_from_user with SLAB_USERCOPY\n"); - if (copy_from_user(good_buf, (void __user *)user_addr, - cache_size)) { + pr_info("attempting good copy_from_user inside whitelist\n"); + if (copy_from_user(buf + offset, user_addr, size)) { pr_warn("copy_from_user failed unexpectedly?!\n"); goto free_user; } - pr_info("attempting bad copy_from_user w/o SLAB_USERCOPY\n"); - if (copy_from_user(bad_buf, (void __user *)user_addr, - cache_size)) { + pr_info("attempting bad copy_from_user outside whitelist\n"); + if (copy_from_user(buf + offset - 1, user_addr, size)) { pr_warn("copy_from_user failed, but lacked Oops\n"); goto free_user; } } free_user: - vm_munmap(user_addr, PAGE_SIZE); + vm_munmap(user_alloc, PAGE_SIZE); free_alloc: - if (bad_buf) - kmem_cache_free(bad_cache, bad_buf); - kfree(good_buf); + if (buf) + kmem_cache_free(whitelist_cache, buf); } /* Callable tests. */ @@ -256,14 +267,14 @@ void lkdtm_USERCOPY_HEAP_SIZE_FROM(void) do_usercopy_heap_size(false); } -void lkdtm_USERCOPY_HEAP_FLAG_TO(void) +void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void) { - do_usercopy_heap_flag(true); + do_usercopy_heap_whitelist(true); } -void lkdtm_USERCOPY_HEAP_FLAG_FROM(void) +void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void) { - do_usercopy_heap_flag(false); + do_usercopy_heap_whitelist(false); } void lkdtm_USERCOPY_STACK_FRAME_TO(void) @@ -314,11 +325,15 @@ free_user: void __init lkdtm_usercopy_init(void) { /* Prepare cache that lacks SLAB_USERCOPY flag. */ - bad_cache = kmem_cache_create("lkdtm-no-usercopy", cache_size, 0, - 0, NULL); + whitelist_cache = + kmem_cache_create_usercopy("lkdtm-usercopy", cache_size, + 0, 0, + cache_size / 4, + cache_size / 16, + NULL); } void __exit lkdtm_usercopy_exit(void) { - kmem_cache_destroy(bad_cache); + kmem_cache_destroy(whitelist_cache); } diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 1ac10cb64d6e..3e5eabdae8d9 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -543,14 +543,20 @@ int mei_cldev_disable(struct mei_cl_device *cldev) mutex_lock(&bus->device_lock); if (!mei_cl_is_connected(cl)) { - dev_dbg(bus->dev, "Already disconnected"); + dev_dbg(bus->dev, "Already disconnected\n"); + err = 0; + goto out; + } + + if (bus->dev_state == MEI_DEV_POWER_DOWN) { + dev_dbg(bus->dev, "Device is powering down, don't bother with disconnection\n"); err = 0; goto out; } err = mei_cl_disconnect(cl); if (err < 0) - dev_err(bus->dev, "Could not disconnect from the ME client"); + dev_err(bus->dev, "Could not disconnect from the ME client\n"); out: /* Flush queues and remove any pending read */ diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 10dcf4ff99a5..334ab02e1de2 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1260,7 +1260,9 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) if (rets == -ENODATA) break; - if (rets && dev->dev_state != MEI_DEV_RESETTING) { + if (rets && + (dev->dev_state != MEI_DEV_RESETTING && + dev->dev_state != MEI_DEV_POWER_DOWN)) { dev_err(dev->dev, "mei_irq_read_handler ret = %d.\n", rets); schedule_work(&dev->reset_work); diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c index 24e4a4c96606..c2c8993e2a51 100644 --- a/drivers/misc/mei/hw-txe.c +++ b/drivers/misc/mei/hw-txe.c @@ -1127,7 +1127,9 @@ irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id) if (test_and_clear_bit(TXE_INTR_OUT_DB_BIT, &hw->intr_cause)) { /* Read from TXE */ rets = mei_irq_read_handler(dev, &cmpl_list, &slots); - if (rets && dev->dev_state != MEI_DEV_RESETTING) { + if (rets && + (dev->dev_state != MEI_DEV_RESETTING && + dev->dev_state != MEI_DEV_POWER_DOWN)) { dev_err(dev->dev, "mei_irq_read_handler ret = %d.\n", rets); diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index d2f691424dd1..c46f6e99a55e 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -310,6 +310,9 @@ void mei_stop(struct mei_device *dev) { dev_dbg(dev->dev, "stopping the device.\n"); + mutex_lock(&dev->device_lock); + dev->dev_state = MEI_DEV_POWER_DOWN; + mutex_unlock(&dev->device_lock); mei_cl_bus_remove_devices(dev); mei_cancel_work(dev); @@ -319,7 +322,6 @@ void mei_stop(struct mei_device *dev) mutex_lock(&dev->device_lock); - dev->dev_state = MEI_DEV_POWER_DOWN; mei_reset(dev); /* move device to disabled state unconditionally */ dev->dev_state = MEI_DEV_DISABLED; diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index f4f17552c9b8..4a0ccda4d04b 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -238,8 +238,11 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ mei_me_set_pm_domain(dev); - if (mei_pg_is_enabled(dev)) + if (mei_pg_is_enabled(dev)) { pm_runtime_put_noidle(&pdev->dev); + if (hw->d0i3_supported) + pm_runtime_allow(&pdev->dev); + } dev_dbg(&pdev->dev, "initialization successful.\n"); diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c index a341938c7e2c..3633202e18f4 100644 --- a/drivers/misc/mic/vop/vop_main.c +++ b/drivers/misc/mic/vop/vop_main.c @@ -452,10 +452,12 @@ static irqreturn_t vop_virtio_intr_handler(int irq, void *data) static void vop_virtio_release_dev(struct device *_d) { - /* - * No need for a release method similar to virtio PCI. - * Provide an empty one to avoid getting a warning from core. - */ + struct virtio_device *vdev = + container_of(_d, struct virtio_device, dev); + struct _vop_vdev *vop_vdev = + container_of(vdev, struct _vop_vdev, vdev); + + kfree(vop_vdev); } /* @@ -466,7 +468,7 @@ static int _vop_add_device(struct mic_device_desc __iomem *d, unsigned int offset, struct vop_device *vpdev, int dnode) { - struct _vop_vdev *vdev; + struct _vop_vdev *vdev, *reg_dev = NULL; int ret; u8 type = ioread8(&d->type); @@ -497,6 +499,7 @@ static int _vop_add_device(struct mic_device_desc __iomem *d, vdev->c2h_vdev_db = ioread8(&vdev->dc->c2h_vdev_db); ret = register_virtio_device(&vdev->vdev); + reg_dev = vdev; if (ret) { dev_err(_vop_dev(vdev), "Failed to register vop device %u type %u\n", @@ -512,7 +515,10 @@ static int _vop_add_device(struct mic_device_desc __iomem *d, free_irq: vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev); kfree: - kfree(vdev); + if (reg_dev) + put_device(&vdev->vdev.dev); + else + kfree(vdev); return ret; } @@ -568,7 +574,7 @@ static int _vop_remove_device(struct mic_device_desc __iomem *d, iowrite8(-1, &dc->h2c_vdev_db); if (status & VIRTIO_CONFIG_S_DRIVER_OK) wait_for_completion(&vdev->reset_done); - kfree(vdev); + put_device(&vdev->vdev.dev); iowrite8(1, &dc->guest_ack); dev_dbg(&vpdev->dev, "%s %d guest_ack %d\n", __func__, __LINE__, ioread8(&dc->guest_ack)); diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c index 4120ed8f0cae..01d1f2ba7bb8 100644 --- a/drivers/misc/mic/vop/vop_vringh.c +++ b/drivers/misc/mic/vop/vop_vringh.c @@ -937,13 +937,10 @@ static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg) dd.num_vq > MIC_MAX_VRINGS) return -EINVAL; - dd_config = kzalloc(mic_desc_size(&dd), GFP_KERNEL); - if (!dd_config) - return -ENOMEM; - if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) { - ret = -EFAULT; - goto free_ret; - } + dd_config = memdup_user(argp, mic_desc_size(&dd)); + if (IS_ERR(dd_config)) + return PTR_ERR(dd_config); + /* Ensure desc has not changed between the two reads */ if (memcmp(&dd, dd_config, sizeof(dd))) { ret = -EINVAL; @@ -995,17 +992,12 @@ _unlock_ret: ret = vop_vdev_inited(vdev); if (ret) goto __unlock_ret; - buf = kzalloc(vdev->dd->config_len, GFP_KERNEL); - if (!buf) { - ret = -ENOMEM; + buf = memdup_user(argp, vdev->dd->config_len); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); goto __unlock_ret; } - if (copy_from_user(buf, argp, vdev->dd->config_len)) { - ret = -EFAULT; - goto done; - } ret = vop_virtio_config_change(vdev, buf); -done: kfree(buf); __unlock_ret: mutex_unlock(&vdev->vdev_mutex); diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig new file mode 100644 index 000000000000..4bbdb0d3c8ee --- /dev/null +++ b/drivers/misc/ocxl/Kconfig @@ -0,0 +1,31 @@ +# +# Open Coherent Accelerator (OCXL) compatible devices +# + +config OCXL_BASE + bool + default n + select PPC_COPRO_BASE + +config OCXL + tristate "OpenCAPI coherent accelerator support" + depends on PPC_POWERNV && PCI && EEH + select OCXL_BASE + default m + help + Select this option to enable the ocxl driver for Open + Coherent Accelerator Processor Interface (OpenCAPI) devices. + + OpenCAPI allows FPGA and ASIC accelerators to be coherently + attached to a CPU over an OpenCAPI link. + + The ocxl driver enables userspace programs to access these + accelerators through devices in /dev/ocxl/. + + For more information, see http://opencapi.org. + + This is not to be confused with the support for IBM CAPI + accelerators (CONFIG_CXL), which are PCI-based instead of a + dedicated OpenCAPI link, and don't follow the same protocol. + + If unsure, say N. diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile new file mode 100644 index 000000000000..5229dcda8297 --- /dev/null +++ b/drivers/misc/ocxl/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0+ +ccflags-$(CONFIG_PPC_WERROR) += -Werror + +ocxl-y += main.o pci.o config.o file.o pasid.o +ocxl-y += link.o context.o afu_irq.o sysfs.o trace.o +obj-$(CONFIG_OCXL) += ocxl.o + +# For tracepoints to include our trace.h from tracepoint infrastructure: +CFLAGS_trace.o := -I$(src) + +# ccflags-y += -DDEBUG diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c new file mode 100644 index 000000000000..e70cfa24577f --- /dev/null +++ b/drivers/misc/ocxl/afu_irq.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include <linux/interrupt.h> +#include <linux/eventfd.h> +#include <asm/pnv-ocxl.h> +#include "ocxl_internal.h" +#include "trace.h" + +struct afu_irq { + int id; + int hw_irq; + unsigned int virq; + char *name; + u64 trigger_page; + struct eventfd_ctx *ev_ctx; +}; + +static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset) +{ + return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT; +} + +static u64 irq_id_to_offset(struct ocxl_context *ctx, int id) +{ + return ctx->afu->irq_base_offset + (id << PAGE_SHIFT); +} + +static irqreturn_t afu_irq_handler(int virq, void *data) +{ + struct afu_irq *irq = (struct afu_irq *) data; + + trace_ocxl_afu_irq_receive(virq); + if (irq->ev_ctx) + eventfd_signal(irq->ev_ctx, 1); + return IRQ_HANDLED; +} + +static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq) +{ + int rc; + + irq->virq = irq_create_mapping(NULL, irq->hw_irq); + if (!irq->virq) { + pr_err("irq_create_mapping failed\n"); + return -ENOMEM; + } + pr_debug("hw_irq %d mapped to virq %u\n", irq->hw_irq, irq->virq); + + irq->name = kasprintf(GFP_KERNEL, "ocxl-afu-%u", irq->virq); + if (!irq->name) { + irq_dispose_mapping(irq->virq); + return -ENOMEM; + } + + rc = request_irq(irq->virq, afu_irq_handler, 0, irq->name, irq); + if (rc) { + kfree(irq->name); + irq->name = NULL; + irq_dispose_mapping(irq->virq); + pr_err("request_irq failed: %d\n", rc); + return rc; + } + return 0; +} + +static void release_afu_irq(struct afu_irq *irq) +{ + free_irq(irq->virq, irq); + irq_dispose_mapping(irq->virq); + kfree(irq->name); +} + +int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset) +{ + struct afu_irq *irq; + int rc; + + irq = kzalloc(sizeof(struct afu_irq), GFP_KERNEL); + if (!irq) + return -ENOMEM; + + /* + * We limit the number of afu irqs per context and per link to + * avoid a single process or user depleting the pool of IPIs + */ + + mutex_lock(&ctx->irq_lock); + + irq->id = idr_alloc(&ctx->irq_idr, irq, 0, MAX_IRQ_PER_CONTEXT, + GFP_KERNEL); + if (irq->id < 0) { + rc = -ENOSPC; + goto err_unlock; + } + + rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq, + &irq->trigger_page); + if (rc) + goto err_idr; + + rc = setup_afu_irq(ctx, irq); + if (rc) + goto err_alloc; + + *irq_offset = irq_id_to_offset(ctx, irq->id); + + trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq, + *irq_offset); + mutex_unlock(&ctx->irq_lock); + return 0; + +err_alloc: + ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq); +err_idr: + idr_remove(&ctx->irq_idr, irq->id); +err_unlock: + mutex_unlock(&ctx->irq_lock); + kfree(irq); + return rc; +} + +static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx) +{ + trace_ocxl_afu_irq_free(ctx->pasid, irq->id); + if (ctx->mapping) + unmap_mapping_range(ctx->mapping, + irq_id_to_offset(ctx, irq->id), + 1 << PAGE_SHIFT, 1); + release_afu_irq(irq); + if (irq->ev_ctx) + eventfd_ctx_put(irq->ev_ctx); + ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq); + kfree(irq); +} + +int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset) +{ + struct afu_irq *irq; + int id = irq_offset_to_id(ctx, irq_offset); + + mutex_lock(&ctx->irq_lock); + + irq = idr_find(&ctx->irq_idr, id); + if (!irq) { + mutex_unlock(&ctx->irq_lock); + return -EINVAL; + } + idr_remove(&ctx->irq_idr, irq->id); + afu_irq_free(irq, ctx); + mutex_unlock(&ctx->irq_lock); + return 0; +} + +void ocxl_afu_irq_free_all(struct ocxl_context *ctx) +{ + struct afu_irq *irq; + int id; + + mutex_lock(&ctx->irq_lock); + idr_for_each_entry(&ctx->irq_idr, irq, id) + afu_irq_free(irq, ctx); + mutex_unlock(&ctx->irq_lock); +} + +int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd) +{ + struct afu_irq *irq; + struct eventfd_ctx *ev_ctx; + int rc = 0, id = irq_offset_to_id(ctx, irq_offset); + + mutex_lock(&ctx->irq_lock); + irq = idr_find(&ctx->irq_idr, id); + if (!irq) { + rc = -EINVAL; + goto unlock; + } + + ev_ctx = eventfd_ctx_fdget(eventfd); + if (IS_ERR(ev_ctx)) { + rc = -EINVAL; + goto unlock; + } + + irq->ev_ctx = ev_ctx; +unlock: + mutex_unlock(&ctx->irq_lock); + return rc; +} + +u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset) +{ + struct afu_irq *irq; + int id = irq_offset_to_id(ctx, irq_offset); + u64 addr = 0; + + mutex_lock(&ctx->irq_lock); + irq = idr_find(&ctx->irq_idr, id); + if (irq) + addr = irq->trigger_page; + mutex_unlock(&ctx->irq_lock); + return addr; +} diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c new file mode 100644 index 000000000000..2e30de9c694a --- /dev/null +++ b/drivers/misc/ocxl/config.c @@ -0,0 +1,723 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include <linux/pci.h> +#include <asm/pnv-ocxl.h> +#include <misc/ocxl.h> +#include <misc/ocxl-config.h> + +#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit))) +#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s) + +#define OCXL_DVSEC_AFU_IDX_MASK GENMASK(5, 0) +#define OCXL_DVSEC_ACTAG_MASK GENMASK(11, 0) +#define OCXL_DVSEC_PASID_MASK GENMASK(19, 0) +#define OCXL_DVSEC_PASID_LOG_MASK GENMASK(4, 0) + +#define OCXL_DVSEC_TEMPL_VERSION 0x0 +#define OCXL_DVSEC_TEMPL_NAME 0x4 +#define OCXL_DVSEC_TEMPL_AFU_VERSION 0x1C +#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL 0x20 +#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ 0x28 +#define OCXL_DVSEC_TEMPL_MMIO_PP 0x30 +#define OCXL_DVSEC_TEMPL_MMIO_PP_SZ 0x38 +#define OCXL_DVSEC_TEMPL_MEM_SZ 0x3C +#define OCXL_DVSEC_TEMPL_WWID 0x40 + +#define OCXL_MAX_AFU_PER_FUNCTION 64 +#define OCXL_TEMPL_LEN 0x58 +#define OCXL_TEMPL_NAME_LEN 24 +#define OCXL_CFG_TIMEOUT 3 + +static int find_dvsec(struct pci_dev *dev, int dvsec_id) +{ + int vsec = 0; + u16 vendor, id; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, + OCXL_EXT_CAP_ID_DVSEC))) { + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, + &vendor); + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); + if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) + return vsec; + } + return 0; +} + +static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) +{ + int vsec = 0; + u16 vendor, id; + u8 idx; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, + OCXL_EXT_CAP_ID_DVSEC))) { + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, + &vendor); + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); + + if (vendor == PCI_VENDOR_ID_IBM && + id == OCXL_DVSEC_AFU_CTRL_ID) { + pci_read_config_byte(dev, + vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, + &idx); + if (idx == afu_idx) + return vsec; + } + } + return 0; +} + +static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + u16 val; + int pos; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID); + if (!pos) { + /* + * PASID capability is not mandatory, but there + * shouldn't be any AFU + */ + dev_dbg(&dev->dev, "Function doesn't require any PASID\n"); + fn->max_pasid_log = -1; + goto out; + } + pci_read_config_word(dev, pos + PCI_PASID_CAP, &val); + fn->max_pasid_log = EXTRACT_BITS(val, 8, 12); + +out: + dev_dbg(&dev->dev, "PASID capability:\n"); + dev_dbg(&dev->dev, " Max PASID log = %d\n", fn->max_pasid_log); + return 0; +} + +static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int pos; + + pos = find_dvsec(dev, OCXL_DVSEC_TL_ID); + if (!pos && PCI_FUNC(dev->devfn) == 0) { + dev_err(&dev->dev, "Can't find TL DVSEC\n"); + return -ENODEV; + } + if (pos && PCI_FUNC(dev->devfn) != 0) { + dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n"); + return -ENODEV; + } + fn->dvsec_tl_pos = pos; + return 0; +} + +static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int pos, afu_present; + u32 val; + + pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID); + if (!pos) { + dev_err(&dev->dev, "Can't find function DVSEC\n"); + return -ENODEV; + } + fn->dvsec_function_pos = pos; + + pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); + afu_present = EXTRACT_BIT(val, 31); + if (!afu_present) { + fn->max_afu_index = -1; + dev_dbg(&dev->dev, "Function doesn't define any AFU\n"); + goto out; + } + fn->max_afu_index = EXTRACT_BITS(val, 24, 29); + +out: + dev_dbg(&dev->dev, "Function DVSEC:\n"); + dev_dbg(&dev->dev, " Max AFU index = %d\n", fn->max_afu_index); + return 0; +} + +static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int pos; + + if (fn->max_afu_index < 0) { + fn->dvsec_afu_info_pos = -1; + return 0; + } + + pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID); + if (!pos) { + dev_err(&dev->dev, "Can't find AFU information DVSEC\n"); + return -ENODEV; + } + fn->dvsec_afu_info_pos = pos; + return 0; +} + +static int read_dvsec_vendor(struct pci_dev *dev) +{ + int pos; + u32 cfg, tlx, dlx; + + /* + * vendor specific DVSEC is optional + * + * It's currently only used on function 0 to specify the + * version of some logic blocks. Some older images may not + * even have it so we ignore any errors + */ + if (PCI_FUNC(dev->devfn) != 0) + return 0; + + pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID); + if (!pos) + return 0; + + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg); + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx); + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx); + + dev_dbg(&dev->dev, "Vendor specific DVSEC:\n"); + dev_dbg(&dev->dev, " CFG version = 0x%x\n", cfg); + dev_dbg(&dev->dev, " TLX version = 0x%x\n", tlx); + dev_dbg(&dev->dev, " DLX version = 0x%x\n", dlx); + return 0; +} + +static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) { + dev_err(&dev->dev, + "AFUs are defined but no PASIDs are requested\n"); + return -EINVAL; + } + + if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) { + dev_err(&dev->dev, + "Max AFU index out of architectural limit (%d vs %d)\n", + fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION); + return -EINVAL; + } + return 0; +} + +int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int rc; + + rc = read_pasid(dev, fn); + if (rc) { + dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc); + return -ENODEV; + } + + rc = read_dvsec_tl(dev, fn); + if (rc) { + dev_err(&dev->dev, + "Invalid Transaction Layer DVSEC configuration: %d\n", + rc); + return -ENODEV; + } + + rc = read_dvsec_function(dev, fn); + if (rc) { + dev_err(&dev->dev, + "Invalid Function DVSEC configuration: %d\n", rc); + return -ENODEV; + } + + rc = read_dvsec_afu_info(dev, fn); + if (rc) { + dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc); + return -ENODEV; + } + + rc = read_dvsec_vendor(dev); + if (rc) { + dev_err(&dev->dev, + "Invalid vendor specific DVSEC configuration: %d\n", + rc); + return -ENODEV; + } + + rc = validate_function(dev, fn); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_config_read_function); + +static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn, + int offset, u32 *data) +{ + u32 val; + unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT); + int pos = fn->dvsec_afu_info_pos; + + /* Protect 'data valid' bit */ + if (EXTRACT_BIT(offset, 31)) { + dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n"); + return -EINVAL; + } + + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset); + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val); + while (!EXTRACT_BIT(val, 31)) { + if (time_after_eq(jiffies, timeout)) { + dev_err(&dev->dev, + "Timeout while reading AFU info DVSEC (offset=%d)\n", + offset); + return -EBUSY; + } + cpu_relax(); + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val); + } + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data); + return 0; +} + +int ocxl_config_check_afu_index(struct pci_dev *dev, + struct ocxl_fn_config *fn, int afu_idx) +{ + u32 val; + int rc, templ_major, templ_minor, len; + + pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx); + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val); + if (rc) + return rc; + + /* AFU index map can have holes */ + if (!val) + return 0; + + templ_major = EXTRACT_BITS(val, 8, 15); + templ_minor = EXTRACT_BITS(val, 0, 7); + dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n", + templ_major, templ_minor); + + len = EXTRACT_BITS(val, 16, 31); + if (len != OCXL_TEMPL_LEN) { + dev_warn(&dev->dev, + "Unexpected template length in AFU information (%#x)\n", + len); + } + return 1; +} +EXPORT_SYMBOL_GPL(ocxl_config_check_afu_index); + +static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu) +{ + int i, rc; + u32 val, *ptr; + + BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN); + for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) { + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val); + if (rc) + return rc; + ptr = (u32 *) &afu->name[i]; + *ptr = val; + } + afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */ + return 0; +} + +static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu) +{ + int rc; + u32 val; + + /* + * Global MMIO + */ + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val); + if (rc) + return rc; + afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2); + afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val); + if (rc) + return rc; + afu->global_mmio_offset += (u64) val << 32; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val); + if (rc) + return rc; + afu->global_mmio_size = val; + + /* + * Per-process MMIO + */ + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val); + if (rc) + return rc; + afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2); + afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val); + if (rc) + return rc; + afu->pp_mmio_offset += (u64) val << 32; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val); + if (rc) + return rc; + afu->pp_mmio_stride = val; + + return 0; +} + +static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu) +{ + int pos; + u8 val8; + u16 val16; + + pos = find_dvsec_afu_ctrl(dev, afu->idx); + if (!pos) { + dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n", + afu->idx); + return -ENODEV; + } + afu->dvsec_afu_control_pos = pos; + + pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8); + afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4); + + pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16); + afu->actag_supported = EXTRACT_BITS(val16, 0, 11); + return 0; +} + +static bool char_allowed(int c) +{ + /* + * Permitted Characters : Alphanumeric, hyphen, underscore, comma + */ + if ((c >= 0x30 && c <= 0x39) /* digits */ || + (c >= 0x41 && c <= 0x5A) /* upper case */ || + (c >= 0x61 && c <= 0x7A) /* lower case */ || + c == 0 /* NULL */ || + c == 0x2D /* - */ || + c == 0x5F /* _ */ || + c == 0x2C /* , */) + return true; + return false; +} + +static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu) +{ + int i; + + if (!afu->name[0]) { + dev_err(&dev->dev, "Empty AFU name\n"); + return -EINVAL; + } + for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) { + if (!char_allowed(afu->name[i])) { + dev_err(&dev->dev, + "Invalid character in AFU name\n"); + return -EINVAL; + } + } + + if (afu->global_mmio_bar != 0 && + afu->global_mmio_bar != 2 && + afu->global_mmio_bar != 4) { + dev_err(&dev->dev, "Invalid global MMIO bar number\n"); + return -EINVAL; + } + if (afu->pp_mmio_bar != 0 && + afu->pp_mmio_bar != 2 && + afu->pp_mmio_bar != 4) { + dev_err(&dev->dev, "Invalid per-process MMIO bar number\n"); + return -EINVAL; + } + return 0; +} + +int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu, u8 afu_idx) +{ + int rc; + u32 val32; + + /* + * First, we need to write the AFU idx for the AFU we want to + * access. + */ + WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx); + afu->idx = afu_idx; + pci_write_config_byte(dev, + fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX, + afu->idx); + + rc = read_afu_name(dev, fn, afu); + if (rc) + return rc; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32); + if (rc) + return rc; + afu->version_major = EXTRACT_BITS(val32, 24, 31); + afu->version_minor = EXTRACT_BITS(val32, 16, 23); + afu->afuc_type = EXTRACT_BITS(val32, 14, 15); + afu->afum_type = EXTRACT_BITS(val32, 12, 13); + afu->profile = EXTRACT_BITS(val32, 0, 7); + + rc = read_afu_mmio(dev, fn, afu); + if (rc) + return rc; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MEM_SZ, &val32); + if (rc) + return rc; + afu->log_mem_size = EXTRACT_BITS(val32, 0, 7); + + rc = read_afu_control(dev, afu); + if (rc) + return rc; + + dev_dbg(&dev->dev, "AFU configuration:\n"); + dev_dbg(&dev->dev, " name = %s\n", afu->name); + dev_dbg(&dev->dev, " version = %d.%d\n", afu->version_major, + afu->version_minor); + dev_dbg(&dev->dev, " global mmio bar = %hhu\n", afu->global_mmio_bar); + dev_dbg(&dev->dev, " global mmio offset = %#llx\n", + afu->global_mmio_offset); + dev_dbg(&dev->dev, " global mmio size = %#x\n", afu->global_mmio_size); + dev_dbg(&dev->dev, " pp mmio bar = %hhu\n", afu->pp_mmio_bar); + dev_dbg(&dev->dev, " pp mmio offset = %#llx\n", afu->pp_mmio_offset); + dev_dbg(&dev->dev, " pp mmio stride = %#x\n", afu->pp_mmio_stride); + dev_dbg(&dev->dev, " mem size (log) = %hhu\n", afu->log_mem_size); + dev_dbg(&dev->dev, " pasid supported (log) = %u\n", + afu->pasid_supported_log); + dev_dbg(&dev->dev, " actag supported = %u\n", + afu->actag_supported); + + rc = validate_afu(dev, afu); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_config_read_afu); + +int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled, + u16 *supported) +{ + int rc; + + /* + * This is really a simple wrapper for the kernel API, to + * avoid an external driver using ocxl as a library to call + * platform-dependent code + */ + rc = pnv_ocxl_get_actag(dev, base, enabled, supported); + if (rc) { + dev_err(&dev->dev, "Can't get actag for device: %d\n", rc); + return rc; + } + return 0; +} +EXPORT_SYMBOL_GPL(ocxl_config_get_actag_info); + +void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base, + int actag_count) +{ + u16 val; + + val = actag_count & OCXL_DVSEC_ACTAG_MASK; + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val); + + val = actag_base & OCXL_DVSEC_ACTAG_MASK; + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val); +} +EXPORT_SYMBOL_GPL(ocxl_config_set_afu_actag); + +int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count) +{ + return pnv_ocxl_get_pasid_count(dev, count); +} +EXPORT_SYMBOL_GPL(ocxl_config_get_pasid_info); + +void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base, + u32 pasid_count_log) +{ + u8 val8; + u32 val32; + + val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK; + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8); + + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE, + &val32); + val32 &= ~OCXL_DVSEC_PASID_MASK; + val32 |= pasid_base & OCXL_DVSEC_PASID_MASK; + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE, + val32); +} +EXPORT_SYMBOL_GPL(ocxl_config_set_afu_pasid); + +void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable) +{ + u8 val; + + pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val); + if (enable) + val |= 1; + else + val &= 0xFE; + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val); +} +EXPORT_SYMBOL_GPL(ocxl_config_set_afu_state); + +int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec) +{ + u32 val; + __be32 *be32ptr; + u8 timers; + int i, rc; + long recv_cap; + char *recv_rate; + + /* + * Skip on function != 0, as the TL can only be defined on 0 + */ + if (PCI_FUNC(dev->devfn) != 0) + return 0; + + recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL); + if (!recv_rate) + return -ENOMEM; + /* + * The spec defines 64 templates for messages in the + * Transaction Layer (TL). + * + * The host and device each support a subset, so we need to + * configure the transmitters on each side to send only + * templates the receiver understands, at a rate the receiver + * can process. Per the spec, template 0 must be supported by + * everybody. That's the template which has been used by the + * host and device so far. + * + * The sending rate limit must be set before the template is + * enabled. + */ + + /* + * Device -> host + */ + rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate, + PNV_OCXL_TL_RATE_BUF_SIZE); + if (rc) + goto out; + + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) { + be32ptr = (__be32 *) &recv_rate[i]; + pci_write_config_dword(dev, + tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i, + be32_to_cpu(*be32ptr)); + } + val = recv_cap >> 32; + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val); + val = recv_cap & GENMASK(31, 0); + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val); + + /* + * Host -> device + */ + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) { + pci_read_config_dword(dev, + tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i, + &val); + be32ptr = (__be32 *) &recv_rate[i]; + *be32ptr = cpu_to_be32(val); + } + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val); + recv_cap = (long) val << 32; + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val); + recv_cap |= val; + + rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate), + PNV_OCXL_TL_RATE_BUF_SIZE); + if (rc) + goto out; + + /* + * Opencapi commands needing to be retried are classified per + * the TL in 2 groups: short and long commands. + * + * The short back off timer it not used for now. It will be + * for opencapi 4.0. + * + * The long back off timer is typically used when an AFU hits + * a page fault but the NPU is already processing one. So the + * AFU needs to wait before it can resubmit. Having a value + * too low doesn't break anything, but can generate extra + * traffic on the link. + * We set it to 1.6 us for now. It's shorter than, but in the + * same order of magnitude as the time spent to process a page + * fault. + */ + timers = 0x2 << 4; /* long timer = 1.6 us */ + pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS, + timers); + + rc = 0; +out: + kfree(recv_rate); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_config_set_TL); + +int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid) +{ + u32 val; + unsigned long timeout; + + pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + &val); + if (EXTRACT_BIT(val, 20)) { + dev_err(&dev->dev, + "Can't terminate PASID %#x, previous termination didn't complete\n", + pasid); + return -EBUSY; + } + + val &= ~OCXL_DVSEC_PASID_MASK; + val |= pasid & OCXL_DVSEC_PASID_MASK; + val |= BIT(20); + pci_write_config_dword(dev, + afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + val); + + timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT); + pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + &val); + while (EXTRACT_BIT(val, 20)) { + if (time_after_eq(jiffies, timeout)) { + dev_err(&dev->dev, + "Timeout while waiting for AFU to terminate PASID %#x\n", + pasid); + return -EBUSY; + } + cpu_relax(); + pci_read_config_dword(dev, + afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + &val); + } + return 0; +} +EXPORT_SYMBOL_GPL(ocxl_config_terminate_pasid); + +void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first, + u32 tag_count) +{ + u32 val; + + val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16; + val |= tag_count & OCXL_DVSEC_ACTAG_MASK; + pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG, + val); +} +EXPORT_SYMBOL_GPL(ocxl_config_set_actag); diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c new file mode 100644 index 000000000000..909e8807824a --- /dev/null +++ b/drivers/misc/ocxl/context.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include <linux/sched/mm.h> +#include "trace.h" +#include "ocxl_internal.h" + +struct ocxl_context *ocxl_context_alloc(void) +{ + return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL); +} + +int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, + struct address_space *mapping) +{ + int pasid; + + ctx->afu = afu; + mutex_lock(&afu->contexts_lock); + pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base, + afu->pasid_base + afu->pasid_max, GFP_KERNEL); + if (pasid < 0) { + mutex_unlock(&afu->contexts_lock); + return pasid; + } + afu->pasid_count++; + mutex_unlock(&afu->contexts_lock); + + ctx->pasid = pasid; + ctx->status = OPENED; + mutex_init(&ctx->status_mutex); + ctx->mapping = mapping; + mutex_init(&ctx->mapping_lock); + init_waitqueue_head(&ctx->events_wq); + mutex_init(&ctx->xsl_error_lock); + mutex_init(&ctx->irq_lock); + idr_init(&ctx->irq_idr); + /* + * Keep a reference on the AFU to make sure it's valid for the + * duration of the life of the context + */ + ocxl_afu_get(afu); + return 0; +} + +/* + * Callback for when a translation fault triggers an error + * data: a pointer to the context which triggered the fault + * addr: the address that triggered the error + * dsisr: the value of the PPC64 dsisr register + */ +static void xsl_fault_error(void *data, u64 addr, u64 dsisr) +{ + struct ocxl_context *ctx = (struct ocxl_context *) data; + + mutex_lock(&ctx->xsl_error_lock); + ctx->xsl_error.addr = addr; + ctx->xsl_error.dsisr = dsisr; + ctx->xsl_error.count++; + mutex_unlock(&ctx->xsl_error_lock); + + wake_up_all(&ctx->events_wq); +} + +int ocxl_context_attach(struct ocxl_context *ctx, u64 amr) +{ + int rc; + + mutex_lock(&ctx->status_mutex); + if (ctx->status != OPENED) { + rc = -EIO; + goto out; + } + + rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, + current->mm->context.id, 0, amr, current->mm, + xsl_fault_error, ctx); + if (rc) + goto out; + + ctx->status = ATTACHED; +out: + mutex_unlock(&ctx->status_mutex); + return rc; +} + +static int map_afu_irq(struct vm_area_struct *vma, unsigned long address, + u64 offset, struct ocxl_context *ctx) +{ + u64 trigger_addr; + + trigger_addr = ocxl_afu_irq_get_addr(ctx, offset); + if (!trigger_addr) + return VM_FAULT_SIGBUS; + + vm_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT); + return VM_FAULT_NOPAGE; +} + +static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address, + u64 offset, struct ocxl_context *ctx) +{ + u64 pp_mmio_addr; + int pasid_off; + + if (offset >= ctx->afu->config.pp_mmio_stride) + return VM_FAULT_SIGBUS; + + mutex_lock(&ctx->status_mutex); + if (ctx->status != ATTACHED) { + mutex_unlock(&ctx->status_mutex); + pr_debug("%s: Context not attached, failing mmio mmap\n", + __func__); + return VM_FAULT_SIGBUS; + } + + pasid_off = ctx->pasid - ctx->afu->pasid_base; + pp_mmio_addr = ctx->afu->pp_mmio_start + + pasid_off * ctx->afu->config.pp_mmio_stride + + offset; + + vm_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT); + mutex_unlock(&ctx->status_mutex); + return VM_FAULT_NOPAGE; +} + +static int ocxl_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct ocxl_context *ctx = vma->vm_file->private_data; + u64 offset; + int rc; + + offset = vmf->pgoff << PAGE_SHIFT; + pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__, + ctx->pasid, vmf->address, offset); + + if (offset < ctx->afu->irq_base_offset) + rc = map_pp_mmio(vma, vmf->address, offset, ctx); + else + rc = map_afu_irq(vma, vmf->address, offset, ctx); + return rc; +} + +static const struct vm_operations_struct ocxl_vmops = { + .fault = ocxl_mmap_fault, +}; + +static int check_mmap_afu_irq(struct ocxl_context *ctx, + struct vm_area_struct *vma) +{ + /* only one page */ + if (vma_pages(vma) != 1) + return -EINVAL; + + /* check offset validty */ + if (!ocxl_afu_irq_get_addr(ctx, vma->vm_pgoff << PAGE_SHIFT)) + return -EINVAL; + + /* + * trigger page should only be accessible in write mode. + * + * It's a bit theoretical, as a page mmaped with only + * PROT_WRITE is currently readable, but it doesn't hurt. + */ + if ((vma->vm_flags & VM_READ) || (vma->vm_flags & VM_EXEC) || + !(vma->vm_flags & VM_WRITE)) + return -EINVAL; + vma->vm_flags &= ~(VM_MAYREAD | VM_MAYEXEC); + return 0; +} + +static int check_mmap_mmio(struct ocxl_context *ctx, + struct vm_area_struct *vma) +{ + if ((vma_pages(vma) + vma->vm_pgoff) > + (ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT)) + return -EINVAL; + return 0; +} + +int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma) +{ + int rc; + + if ((vma->vm_pgoff << PAGE_SHIFT) < ctx->afu->irq_base_offset) + rc = check_mmap_mmio(ctx, vma); + else + rc = check_mmap_afu_irq(ctx, vma); + if (rc) + return rc; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_ops = &ocxl_vmops; + return 0; +} + +int ocxl_context_detach(struct ocxl_context *ctx) +{ + struct pci_dev *dev; + int afu_control_pos; + enum ocxl_context_status status; + int rc; + + mutex_lock(&ctx->status_mutex); + status = ctx->status; + ctx->status = CLOSED; + mutex_unlock(&ctx->status_mutex); + if (status != ATTACHED) + return 0; + + dev = to_pci_dev(ctx->afu->fn->dev.parent); + afu_control_pos = ctx->afu->config.dvsec_afu_control_pos; + + mutex_lock(&ctx->afu->afu_control_lock); + rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid); + mutex_unlock(&ctx->afu->afu_control_lock); + trace_ocxl_terminate_pasid(ctx->pasid, rc); + if (rc) { + /* + * If we timeout waiting for the AFU to terminate the + * pasid, then it's dangerous to clean up the Process + * Element entry in the SPA, as it may be referenced + * in the future by the AFU. In which case, we would + * checkstop because of an invalid PE access (FIR + * register 2, bit 42). So leave the PE + * defined. Caller shouldn't free the context so that + * PASID remains allocated. + * + * A link reset will be required to cleanup the AFU + * and the SPA. + */ + if (rc == -EBUSY) + return rc; + } + rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid); + if (rc) { + dev_warn(&ctx->afu->dev, + "Couldn't remove PE entry cleanly: %d\n", rc); + } + return 0; +} + +void ocxl_context_detach_all(struct ocxl_afu *afu) +{ + struct ocxl_context *ctx; + int tmp; + + mutex_lock(&afu->contexts_lock); + idr_for_each_entry(&afu->contexts_idr, ctx, tmp) { + ocxl_context_detach(ctx); + /* + * We are force detaching - remove any active mmio + * mappings so userspace cannot interfere with the + * card if it comes back. Easiest way to exercise + * this is to unbind and rebind the driver via sysfs + * while it is in use. + */ + mutex_lock(&ctx->mapping_lock); + if (ctx->mapping) + unmap_mapping_range(ctx->mapping, 0, 0, 1); + mutex_unlock(&ctx->mapping_lock); + } + mutex_unlock(&afu->contexts_lock); +} + +void ocxl_context_free(struct ocxl_context *ctx) +{ + mutex_lock(&ctx->afu->contexts_lock); + ctx->afu->pasid_count--; + idr_remove(&ctx->afu->contexts_idr, ctx->pasid); + mutex_unlock(&ctx->afu->contexts_lock); + + ocxl_afu_irq_free_all(ctx); + idr_destroy(&ctx->irq_idr); + /* reference to the AFU taken in ocxl_context_init */ + ocxl_afu_put(ctx->afu); + kfree(ctx); +} diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c new file mode 100644 index 000000000000..c90c1a578d2f --- /dev/null +++ b/drivers/misc/ocxl/file.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/sched/signal.h> +#include <linux/uaccess.h> +#include <uapi/misc/ocxl.h> +#include "ocxl_internal.h" + + +#define OCXL_NUM_MINORS 256 /* Total to reserve */ + +static dev_t ocxl_dev; +static struct class *ocxl_class; +static struct mutex minors_idr_lock; +static struct idr minors_idr; + +static struct ocxl_afu *find_and_get_afu(dev_t devno) +{ + struct ocxl_afu *afu; + int afu_minor; + + afu_minor = MINOR(devno); + /* + * We don't declare an RCU critical section here, as our AFU + * is protected by a reference counter on the device. By the time the + * minor number of a device is removed from the idr, the ref count of + * the device is already at 0, so no user API will access that AFU and + * this function can't return it. + */ + afu = idr_find(&minors_idr, afu_minor); + if (afu) + ocxl_afu_get(afu); + return afu; +} + +static int allocate_afu_minor(struct ocxl_afu *afu) +{ + int minor; + + mutex_lock(&minors_idr_lock); + minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL); + mutex_unlock(&minors_idr_lock); + return minor; +} + +static void free_afu_minor(struct ocxl_afu *afu) +{ + mutex_lock(&minors_idr_lock); + idr_remove(&minors_idr, MINOR(afu->dev.devt)); + mutex_unlock(&minors_idr_lock); +} + +static int afu_open(struct inode *inode, struct file *file) +{ + struct ocxl_afu *afu; + struct ocxl_context *ctx; + int rc; + + pr_debug("%s for device %x\n", __func__, inode->i_rdev); + + afu = find_and_get_afu(inode->i_rdev); + if (!afu) + return -ENODEV; + + ctx = ocxl_context_alloc(); + if (!ctx) { + rc = -ENOMEM; + goto put_afu; + } + + rc = ocxl_context_init(ctx, afu, inode->i_mapping); + if (rc) + goto put_afu; + file->private_data = ctx; + ocxl_afu_put(afu); + return 0; + +put_afu: + ocxl_afu_put(afu); + return rc; +} + +static long afu_ioctl_attach(struct ocxl_context *ctx, + struct ocxl_ioctl_attach __user *uarg) +{ + struct ocxl_ioctl_attach arg; + u64 amr = 0; + int rc; + + pr_debug("%s for context %d\n", __func__, ctx->pasid); + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + /* Make sure reserved fields are not set for forward compatibility */ + if (arg.reserved1 || arg.reserved2 || arg.reserved3) + return -EINVAL; + + amr = arg.amr & mfspr(SPRN_UAMOR); + rc = ocxl_context_attach(ctx, amr); + return rc; +} + +#define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \ + x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" : \ + x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" : \ + x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" : \ + "UNKNOWN") + +static long afu_ioctl(struct file *file, unsigned int cmd, + unsigned long args) +{ + struct ocxl_context *ctx = file->private_data; + struct ocxl_ioctl_irq_fd irq_fd; + u64 irq_offset; + long rc; + + pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid, + CMD_STR(cmd)); + + if (ctx->status == CLOSED) + return -EIO; + + switch (cmd) { + case OCXL_IOCTL_ATTACH: + rc = afu_ioctl_attach(ctx, + (struct ocxl_ioctl_attach __user *) args); + break; + + case OCXL_IOCTL_IRQ_ALLOC: + rc = ocxl_afu_irq_alloc(ctx, &irq_offset); + if (!rc) { + rc = copy_to_user((u64 __user *) args, &irq_offset, + sizeof(irq_offset)); + if (rc) + ocxl_afu_irq_free(ctx, irq_offset); + } + break; + + case OCXL_IOCTL_IRQ_FREE: + rc = copy_from_user(&irq_offset, (u64 __user *) args, + sizeof(irq_offset)); + if (rc) + return -EFAULT; + rc = ocxl_afu_irq_free(ctx, irq_offset); + break; + + case OCXL_IOCTL_IRQ_SET_FD: + rc = copy_from_user(&irq_fd, (u64 __user *) args, + sizeof(irq_fd)); + if (rc) + return -EFAULT; + if (irq_fd.reserved) + return -EINVAL; + rc = ocxl_afu_irq_set_fd(ctx, irq_fd.irq_offset, + irq_fd.eventfd); + break; + + default: + rc = -EINVAL; + } + return rc; +} + +static long afu_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long args) +{ + return afu_ioctl(file, cmd, args); +} + +static int afu_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct ocxl_context *ctx = file->private_data; + + pr_debug("%s for context %d\n", __func__, ctx->pasid); + return ocxl_context_mmap(ctx, vma); +} + +static bool has_xsl_error(struct ocxl_context *ctx) +{ + bool ret; + + mutex_lock(&ctx->xsl_error_lock); + ret = !!ctx->xsl_error.addr; + mutex_unlock(&ctx->xsl_error_lock); + + return ret; +} + +/* + * Are there any events pending on the AFU + * ctx: The AFU context + * Returns: true if there are events pending + */ +static bool afu_events_pending(struct ocxl_context *ctx) +{ + if (has_xsl_error(ctx)) + return true; + return false; +} + +static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait) +{ + struct ocxl_context *ctx = file->private_data; + unsigned int mask = 0; + bool closed; + + pr_debug("%s for context %d\n", __func__, ctx->pasid); + + poll_wait(file, &ctx->events_wq, wait); + + mutex_lock(&ctx->status_mutex); + closed = (ctx->status == CLOSED); + mutex_unlock(&ctx->status_mutex); + + if (afu_events_pending(ctx)) + mask = POLLIN | POLLRDNORM; + else if (closed) + mask = POLLERR; + + return mask; +} + +/* + * Populate the supplied buffer with a single XSL error + * ctx: The AFU context to report the error from + * header: the event header to populate + * buf: The buffer to write the body into (should be at least + * AFU_EVENT_BODY_XSL_ERROR_SIZE) + * Return: the amount of buffer that was populated + */ +static ssize_t append_xsl_error(struct ocxl_context *ctx, + struct ocxl_kernel_event_header *header, + char __user *buf) +{ + struct ocxl_kernel_event_xsl_fault_error body; + + memset(&body, 0, sizeof(body)); + + mutex_lock(&ctx->xsl_error_lock); + if (!ctx->xsl_error.addr) { + mutex_unlock(&ctx->xsl_error_lock); + return 0; + } + + body.addr = ctx->xsl_error.addr; + body.dsisr = ctx->xsl_error.dsisr; + body.count = ctx->xsl_error.count; + + ctx->xsl_error.addr = 0; + ctx->xsl_error.dsisr = 0; + ctx->xsl_error.count = 0; + + mutex_unlock(&ctx->xsl_error_lock); + + header->type = OCXL_AFU_EVENT_XSL_FAULT_ERROR; + + if (copy_to_user(buf, &body, sizeof(body))) + return -EFAULT; + + return sizeof(body); +} + +#define AFU_EVENT_BODY_MAX_SIZE sizeof(struct ocxl_kernel_event_xsl_fault_error) + +/* + * Reports events on the AFU + * Format: + * Header (struct ocxl_kernel_event_header) + * Body (struct ocxl_kernel_event_*) + * Header... + */ +static ssize_t afu_read(struct file *file, char __user *buf, size_t count, + loff_t *off) +{ + struct ocxl_context *ctx = file->private_data; + struct ocxl_kernel_event_header header; + ssize_t rc; + size_t used = 0; + DEFINE_WAIT(event_wait); + + memset(&header, 0, sizeof(header)); + + /* Require offset to be 0 */ + if (*off != 0) + return -EINVAL; + + if (count < (sizeof(struct ocxl_kernel_event_header) + + AFU_EVENT_BODY_MAX_SIZE)) + return -EINVAL; + + for (;;) { + prepare_to_wait(&ctx->events_wq, &event_wait, + TASK_INTERRUPTIBLE); + + if (afu_events_pending(ctx)) + break; + + if (ctx->status == CLOSED) + break; + + if (file->f_flags & O_NONBLOCK) { + finish_wait(&ctx->events_wq, &event_wait); + return -EAGAIN; + } + + if (signal_pending(current)) { + finish_wait(&ctx->events_wq, &event_wait); + return -ERESTARTSYS; + } + + schedule(); + } + + finish_wait(&ctx->events_wq, &event_wait); + + if (has_xsl_error(ctx)) { + used = append_xsl_error(ctx, &header, buf + sizeof(header)); + if (used < 0) + return used; + } + + if (!afu_events_pending(ctx)) + header.flags |= OCXL_KERNEL_EVENT_FLAG_LAST; + + if (copy_to_user(buf, &header, sizeof(header))) + return -EFAULT; + + used += sizeof(header); + + rc = (ssize_t) used; + return rc; +} + +static int afu_release(struct inode *inode, struct file *file) +{ + struct ocxl_context *ctx = file->private_data; + int rc; + + pr_debug("%s for device %x\n", __func__, inode->i_rdev); + rc = ocxl_context_detach(ctx); + mutex_lock(&ctx->mapping_lock); + ctx->mapping = NULL; + mutex_unlock(&ctx->mapping_lock); + wake_up_all(&ctx->events_wq); + if (rc != -EBUSY) + ocxl_context_free(ctx); + return 0; +} + +static const struct file_operations ocxl_afu_fops = { + .owner = THIS_MODULE, + .open = afu_open, + .unlocked_ioctl = afu_ioctl, + .compat_ioctl = afu_compat_ioctl, + .mmap = afu_mmap, + .poll = afu_poll, + .read = afu_read, + .release = afu_release, +}; + +int ocxl_create_cdev(struct ocxl_afu *afu) +{ + int rc; + + cdev_init(&afu->cdev, &ocxl_afu_fops); + rc = cdev_add(&afu->cdev, afu->dev.devt, 1); + if (rc) { + dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc); + return rc; + } + return 0; +} + +void ocxl_destroy_cdev(struct ocxl_afu *afu) +{ + cdev_del(&afu->cdev); +} + +int ocxl_register_afu(struct ocxl_afu *afu) +{ + int minor; + + minor = allocate_afu_minor(afu); + if (minor < 0) + return minor; + afu->dev.devt = MKDEV(MAJOR(ocxl_dev), minor); + afu->dev.class = ocxl_class; + return device_register(&afu->dev); +} + +void ocxl_unregister_afu(struct ocxl_afu *afu) +{ + free_afu_minor(afu); +} + +static char *ocxl_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev)); +} + +int ocxl_file_init(void) +{ + int rc; + + mutex_init(&minors_idr_lock); + idr_init(&minors_idr); + + rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl"); + if (rc) { + pr_err("Unable to allocate ocxl major number: %d\n", rc); + return rc; + } + + ocxl_class = class_create(THIS_MODULE, "ocxl"); + if (IS_ERR(ocxl_class)) { + pr_err("Unable to create ocxl class\n"); + unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS); + return PTR_ERR(ocxl_class); + } + + ocxl_class->devnode = ocxl_devnode; + return 0; +} + +void ocxl_file_exit(void) +{ + class_destroy(ocxl_class); + unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS); + idr_destroy(&minors_idr); +} diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c new file mode 100644 index 000000000000..f30790582dc0 --- /dev/null +++ b/drivers/misc/ocxl/link.c @@ -0,0 +1,647 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include <linux/sched/mm.h> +#include <linux/mutex.h> +#include <linux/mmu_context.h> +#include <asm/copro.h> +#include <asm/pnv-ocxl.h> +#include <misc/ocxl.h> +#include "ocxl_internal.h" +#include "trace.h" + + +#define SPA_PASID_BITS 15 +#define SPA_PASID_MAX ((1 << SPA_PASID_BITS) - 1) +#define SPA_PE_MASK SPA_PASID_MAX +#define SPA_SPA_SIZE_LOG 22 /* Each SPA is 4 Mb */ + +#define SPA_CFG_SF (1ull << (63-0)) +#define SPA_CFG_TA (1ull << (63-1)) +#define SPA_CFG_HV (1ull << (63-3)) +#define SPA_CFG_UV (1ull << (63-4)) +#define SPA_CFG_XLAT_hpt (0ull << (63-6)) /* Hashed page table (HPT) mode */ +#define SPA_CFG_XLAT_roh (2ull << (63-6)) /* Radix on HPT mode */ +#define SPA_CFG_XLAT_ror (3ull << (63-6)) /* Radix on Radix mode */ +#define SPA_CFG_PR (1ull << (63-49)) +#define SPA_CFG_TC (1ull << (63-54)) +#define SPA_CFG_DR (1ull << (63-59)) + +#define SPA_XSL_TF (1ull << (63-3)) /* Translation fault */ +#define SPA_XSL_S (1ull << (63-38)) /* Store operation */ + +#define SPA_PE_VALID 0x80000000 + + +struct pe_data { + struct mm_struct *mm; + /* callback to trigger when a translation fault occurs */ + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr); + /* opaque pointer to be passed to the above callback */ + void *xsl_err_data; + struct rcu_head rcu; +}; + +struct spa { + struct ocxl_process_element *spa_mem; + int spa_order; + struct mutex spa_lock; + struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */ + char *irq_name; + int virq; + void __iomem *reg_dsisr; + void __iomem *reg_dar; + void __iomem *reg_tfc; + void __iomem *reg_pe_handle; + /* + * The following field are used by the memory fault + * interrupt handler. We can only have one interrupt at a + * time. The NPU won't raise another interrupt until the + * previous one has been ack'd by writing to the TFC register + */ + struct xsl_fault { + struct work_struct fault_work; + u64 pe; + u64 dsisr; + u64 dar; + struct pe_data pe_data; + } xsl_fault; +}; + +/* + * A opencapi link can be used be by several PCI functions. We have + * one link per device slot. + * + * A linked list of opencapi links should suffice, as there's a + * limited number of opencapi slots on a system and lookup is only + * done when the device is probed + */ +struct link { + struct list_head list; + struct kref ref; + int domain; + int bus; + int dev; + atomic_t irq_available; + struct spa *spa; + void *platform_data; +}; +static struct list_head links_list = LIST_HEAD_INIT(links_list); +static DEFINE_MUTEX(links_list_lock); + +enum xsl_response { + CONTINUE, + ADDRESS_ERROR, + RESTART, +}; + + +static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe) +{ + u64 reg; + + *dsisr = in_be64(spa->reg_dsisr); + *dar = in_be64(spa->reg_dar); + reg = in_be64(spa->reg_pe_handle); + *pe = reg & SPA_PE_MASK; +} + +static void ack_irq(struct spa *spa, enum xsl_response r) +{ + u64 reg = 0; + + /* continue is not supported */ + if (r == RESTART) + reg = PPC_BIT(31); + else if (r == ADDRESS_ERROR) + reg = PPC_BIT(30); + else + WARN(1, "Invalid irq response %d\n", r); + + if (reg) { + trace_ocxl_fault_ack(spa->spa_mem, spa->xsl_fault.pe, + spa->xsl_fault.dsisr, spa->xsl_fault.dar, reg); + out_be64(spa->reg_tfc, reg); + } +} + +static void xsl_fault_handler_bh(struct work_struct *fault_work) +{ + unsigned int flt = 0; + unsigned long access, flags, inv_flags = 0; + enum xsl_response r; + struct xsl_fault *fault = container_of(fault_work, struct xsl_fault, + fault_work); + struct spa *spa = container_of(fault, struct spa, xsl_fault); + + int rc; + + /* + * We need to release a reference on the mm whenever exiting this + * function (taken in the memory fault interrupt handler) + */ + rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr, + &flt); + if (rc) { + pr_debug("copro_handle_mm_fault failed: %d\n", rc); + if (fault->pe_data.xsl_err_cb) { + fault->pe_data.xsl_err_cb( + fault->pe_data.xsl_err_data, + fault->dar, fault->dsisr); + } + r = ADDRESS_ERROR; + goto ack; + } + + if (!radix_enabled()) { + /* + * update_mmu_cache() will not have loaded the hash + * since current->trap is not a 0x400 or 0x300, so + * just call hash_page_mm() here. + */ + access = _PAGE_PRESENT | _PAGE_READ; + if (fault->dsisr & SPA_XSL_S) + access |= _PAGE_WRITE; + + if (REGION_ID(fault->dar) != USER_REGION_ID) + access |= _PAGE_PRIVILEGED; + + local_irq_save(flags); + hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300, + inv_flags); + local_irq_restore(flags); + } + r = RESTART; +ack: + mmdrop(fault->pe_data.mm); + ack_irq(spa, r); +} + +static irqreturn_t xsl_fault_handler(int irq, void *data) +{ + struct link *link = (struct link *) data; + struct spa *spa = link->spa; + u64 dsisr, dar, pe_handle; + struct pe_data *pe_data; + struct ocxl_process_element *pe; + int lpid, pid, tid; + + read_irq(spa, &dsisr, &dar, &pe_handle); + trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1); + + WARN_ON(pe_handle > SPA_PE_MASK); + pe = spa->spa_mem + pe_handle; + lpid = be32_to_cpu(pe->lpid); + pid = be32_to_cpu(pe->pid); + tid = be32_to_cpu(pe->tid); + /* We could be reading all null values here if the PE is being + * removed while an interrupt kicks in. It's not supposed to + * happen if the driver notified the AFU to terminate the + * PASID, and the AFU waited for pending operations before + * acknowledging. But even if it happens, we won't find a + * memory context below and fail silently, so it should be ok. + */ + if (!(dsisr & SPA_XSL_TF)) { + WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr); + ack_irq(spa, ADDRESS_ERROR); + return IRQ_HANDLED; + } + + rcu_read_lock(); + pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle); + if (!pe_data) { + /* + * Could only happen if the driver didn't notify the + * AFU about PASID termination before removing the PE, + * or the AFU didn't wait for all memory access to + * have completed. + * + * Either way, we fail early, but we shouldn't log an + * error message, as it is a valid (if unexpected) + * scenario + */ + rcu_read_unlock(); + pr_debug("Unknown mm context for xsl interrupt\n"); + ack_irq(spa, ADDRESS_ERROR); + return IRQ_HANDLED; + } + WARN_ON(pe_data->mm->context.id != pid); + + spa->xsl_fault.pe = pe_handle; + spa->xsl_fault.dar = dar; + spa->xsl_fault.dsisr = dsisr; + spa->xsl_fault.pe_data = *pe_data; + mmgrab(pe_data->mm); /* mm count is released by bottom half */ + + rcu_read_unlock(); + schedule_work(&spa->xsl_fault.fault_work); + return IRQ_HANDLED; +} + +static void unmap_irq_registers(struct spa *spa) +{ + pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc, + spa->reg_pe_handle); +} + +static int map_irq_registers(struct pci_dev *dev, struct spa *spa) +{ + return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar, + &spa->reg_tfc, &spa->reg_pe_handle); +} + +static int setup_xsl_irq(struct pci_dev *dev, struct link *link) +{ + struct spa *spa = link->spa; + int rc; + int hwirq; + + rc = pnv_ocxl_get_xsl_irq(dev, &hwirq); + if (rc) + return rc; + + rc = map_irq_registers(dev, spa); + if (rc) + return rc; + + spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x", + link->domain, link->bus, link->dev); + if (!spa->irq_name) { + unmap_irq_registers(spa); + dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n"); + return -ENOMEM; + } + /* + * At some point, we'll need to look into allowing a higher + * number of interrupts. Could we have an IRQ domain per link? + */ + spa->virq = irq_create_mapping(NULL, hwirq); + if (!spa->virq) { + kfree(spa->irq_name); + unmap_irq_registers(spa); + dev_err(&dev->dev, + "irq_create_mapping failed for translation interrupt\n"); + return -EINVAL; + } + + dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq); + + rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name, + link); + if (rc) { + irq_dispose_mapping(spa->virq); + kfree(spa->irq_name); + unmap_irq_registers(spa); + dev_err(&dev->dev, + "request_irq failed for translation interrupt: %d\n", + rc); + return -EINVAL; + } + return 0; +} + +static void release_xsl_irq(struct link *link) +{ + struct spa *spa = link->spa; + + if (spa->virq) { + free_irq(spa->virq, link); + irq_dispose_mapping(spa->virq); + } + kfree(spa->irq_name); + unmap_irq_registers(spa); +} + +static int alloc_spa(struct pci_dev *dev, struct link *link) +{ + struct spa *spa; + + spa = kzalloc(sizeof(struct spa), GFP_KERNEL); + if (!spa) + return -ENOMEM; + + mutex_init(&spa->spa_lock); + INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL); + INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh); + + spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT; + spa->spa_mem = (struct ocxl_process_element *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order); + if (!spa->spa_mem) { + dev_err(&dev->dev, "Can't allocate Shared Process Area\n"); + kfree(spa); + return -ENOMEM; + } + pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus, + link->dev, spa->spa_mem); + + link->spa = spa; + return 0; +} + +static void free_spa(struct link *link) +{ + struct spa *spa = link->spa; + + pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus, + link->dev); + + if (spa && spa->spa_mem) { + free_pages((unsigned long) spa->spa_mem, spa->spa_order); + kfree(spa); + link->spa = NULL; + } +} + +static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link) +{ + struct link *link; + int rc; + + link = kzalloc(sizeof(struct link), GFP_KERNEL); + if (!link) + return -ENOMEM; + + kref_init(&link->ref); + link->domain = pci_domain_nr(dev->bus); + link->bus = dev->bus->number; + link->dev = PCI_SLOT(dev->devfn); + atomic_set(&link->irq_available, MAX_IRQ_PER_LINK); + + rc = alloc_spa(dev, link); + if (rc) + goto err_free; + + rc = setup_xsl_irq(dev, link); + if (rc) + goto err_spa; + + /* platform specific hook */ + rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask, + &link->platform_data); + if (rc) + goto err_xsl_irq; + + *out_link = link; + return 0; + +err_xsl_irq: + release_xsl_irq(link); +err_spa: + free_spa(link); +err_free: + kfree(link); + return rc; +} + +static void free_link(struct link *link) +{ + release_xsl_irq(link); + free_spa(link); + kfree(link); +} + +int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle) +{ + int rc = 0; + struct link *link; + + mutex_lock(&links_list_lock); + list_for_each_entry(link, &links_list, list) { + /* The functions of a device all share the same link */ + if (link->domain == pci_domain_nr(dev->bus) && + link->bus == dev->bus->number && + link->dev == PCI_SLOT(dev->devfn)) { + kref_get(&link->ref); + *link_handle = link; + goto unlock; + } + } + rc = alloc_link(dev, PE_mask, &link); + if (rc) + goto unlock; + + list_add(&link->list, &links_list); + *link_handle = link; +unlock: + mutex_unlock(&links_list_lock); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_link_setup); + +static void release_xsl(struct kref *ref) +{ + struct link *link = container_of(ref, struct link, ref); + + list_del(&link->list); + /* call platform code before releasing data */ + pnv_ocxl_spa_release(link->platform_data); + free_link(link); +} + +void ocxl_link_release(struct pci_dev *dev, void *link_handle) +{ + struct link *link = (struct link *) link_handle; + + mutex_lock(&links_list_lock); + kref_put(&link->ref, release_xsl); + mutex_unlock(&links_list_lock); +} +EXPORT_SYMBOL_GPL(ocxl_link_release); + +static u64 calculate_cfg_state(bool kernel) +{ + u64 state; + + state = SPA_CFG_DR; + if (mfspr(SPRN_LPCR) & LPCR_TC) + state |= SPA_CFG_TC; + if (radix_enabled()) + state |= SPA_CFG_XLAT_ror; + else + state |= SPA_CFG_XLAT_hpt; + state |= SPA_CFG_HV; + if (kernel) { + if (mfmsr() & MSR_SF) + state |= SPA_CFG_SF; + } else { + state |= SPA_CFG_PR; + if (!test_tsk_thread_flag(current, TIF_32BIT)) + state |= SPA_CFG_SF; + } + return state; +} + +int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, + u64 amr, struct mm_struct *mm, + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), + void *xsl_err_data) +{ + struct link *link = (struct link *) link_handle; + struct spa *spa = link->spa; + struct ocxl_process_element *pe; + int pe_handle, rc = 0; + struct pe_data *pe_data; + + BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128); + if (pasid > SPA_PASID_MAX) + return -EINVAL; + + mutex_lock(&spa->spa_lock); + pe_handle = pasid & SPA_PE_MASK; + pe = spa->spa_mem + pe_handle; + + if (pe->software_state) { + rc = -EBUSY; + goto unlock; + } + + pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL); + if (!pe_data) { + rc = -ENOMEM; + goto unlock; + } + + pe_data->mm = mm; + pe_data->xsl_err_cb = xsl_err_cb; + pe_data->xsl_err_data = xsl_err_data; + + memset(pe, 0, sizeof(struct ocxl_process_element)); + pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0)); + pe->lpid = cpu_to_be32(mfspr(SPRN_LPID)); + pe->pid = cpu_to_be32(pidr); + pe->tid = cpu_to_be32(tidr); + pe->amr = cpu_to_be64(amr); + pe->software_state = cpu_to_be32(SPA_PE_VALID); + + mm_context_add_copro(mm); + /* + * Barrier is to make sure PE is visible in the SPA before it + * is used by the device. It also helps with the global TLBI + * invalidation + */ + mb(); + radix_tree_insert(&spa->pe_tree, pe_handle, pe_data); + + /* + * The mm must stay valid for as long as the device uses it. We + * lower the count when the context is removed from the SPA. + * + * We grab mm_count (and not mm_users), as we don't want to + * end up in a circular dependency if a process mmaps its + * mmio, therefore incrementing the file ref count when + * calling mmap(), and forgets to unmap before exiting. In + * that scenario, when the kernel handles the death of the + * process, the file is not cleaned because unmap was not + * called, and the mm wouldn't be freed because we would still + * have a reference on mm_users. Incrementing mm_count solves + * the problem. + */ + mmgrab(mm); + trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr); +unlock: + mutex_unlock(&spa->spa_lock); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_link_add_pe); + +int ocxl_link_remove_pe(void *link_handle, int pasid) +{ + struct link *link = (struct link *) link_handle; + struct spa *spa = link->spa; + struct ocxl_process_element *pe; + struct pe_data *pe_data; + int pe_handle, rc; + + if (pasid > SPA_PASID_MAX) + return -EINVAL; + + /* + * About synchronization with our memory fault handler: + * + * Before removing the PE, the driver is supposed to have + * notified the AFU, which should have cleaned up and make + * sure the PASID is no longer in use, including pending + * interrupts. However, there's no way to be sure... + * + * We clear the PE and remove the context from our radix + * tree. From that point on, any new interrupt for that + * context will fail silently, which is ok. As mentioned + * above, that's not expected, but it could happen if the + * driver or AFU didn't do the right thing. + * + * There could still be a bottom half running, but we don't + * need to wait/flush, as it is managing a reference count on + * the mm it reads from the radix tree. + */ + pe_handle = pasid & SPA_PE_MASK; + pe = spa->spa_mem + pe_handle; + + mutex_lock(&spa->spa_lock); + + if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) { + rc = -EINVAL; + goto unlock; + } + + trace_ocxl_context_remove(current->pid, spa->spa_mem, pasid, + be32_to_cpu(pe->pid), be32_to_cpu(pe->tid)); + + memset(pe, 0, sizeof(struct ocxl_process_element)); + /* + * The barrier makes sure the PE is removed from the SPA + * before we clear the NPU context cache below, so that the + * old PE cannot be reloaded erroneously. + */ + mb(); + + /* + * hook to platform code + * On powerpc, the entry needs to be cleared from the context + * cache of the NPU. + */ + rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle); + WARN_ON(rc); + + pe_data = radix_tree_delete(&spa->pe_tree, pe_handle); + if (!pe_data) { + WARN(1, "Couldn't find pe data when removing PE\n"); + } else { + mm_context_remove_copro(pe_data->mm); + mmdrop(pe_data->mm); + kfree_rcu(pe_data, rcu); + } +unlock: + mutex_unlock(&spa->spa_lock); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_link_remove_pe); + +int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr) +{ + struct link *link = (struct link *) link_handle; + int rc, irq; + u64 addr; + + if (atomic_dec_if_positive(&link->irq_available) < 0) + return -ENOSPC; + + rc = pnv_ocxl_alloc_xive_irq(&irq, &addr); + if (rc) { + atomic_inc(&link->irq_available); + return rc; + } + + *hw_irq = irq; + *trigger_addr = addr; + return 0; +} +EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc); + +void ocxl_link_free_irq(void *link_handle, int hw_irq) +{ + struct link *link = (struct link *) link_handle; + + pnv_ocxl_free_xive_irq(hw_irq); + atomic_inc(&link->irq_available); +} +EXPORT_SYMBOL_GPL(ocxl_link_free_irq); diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c new file mode 100644 index 000000000000..7210d9e059be --- /dev/null +++ b/drivers/misc/ocxl/main.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include <linux/module.h> +#include <linux/pci.h> +#include "ocxl_internal.h" + +static int __init init_ocxl(void) +{ + int rc = 0; + + rc = ocxl_file_init(); + if (rc) + return rc; + + rc = pci_register_driver(&ocxl_pci_driver); + if (rc) { + ocxl_file_exit(); + return rc; + } + return 0; +} + +static void exit_ocxl(void) +{ + pci_unregister_driver(&ocxl_pci_driver); + ocxl_file_exit(); +} + +module_init(init_ocxl); +module_exit(exit_ocxl); + +MODULE_DESCRIPTION("Open Coherent Accelerator"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h new file mode 100644 index 000000000000..5d421824afd9 --- /dev/null +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#ifndef _OCXL_INTERNAL_H_ +#define _OCXL_INTERNAL_H_ + +#include <linux/pci.h> +#include <linux/cdev.h> +#include <linux/list.h> +#include <misc/ocxl.h> + +#define MAX_IRQ_PER_LINK 2000 +#define MAX_IRQ_PER_CONTEXT MAX_IRQ_PER_LINK + +#define to_ocxl_function(d) container_of(d, struct ocxl_fn, dev) +#define to_ocxl_afu(d) container_of(d, struct ocxl_afu, dev) + +extern struct pci_driver ocxl_pci_driver; + + +struct ocxl_fn { + struct device dev; + int bar_used[3]; + struct ocxl_fn_config config; + struct list_head afu_list; + int pasid_base; + int actag_base; + int actag_enabled; + int actag_supported; + struct list_head pasid_list; + struct list_head actag_list; + void *link; +}; + +struct ocxl_afu { + struct ocxl_fn *fn; + struct list_head list; + struct device dev; + struct cdev cdev; + struct ocxl_afu_config config; + int pasid_base; + int pasid_count; /* opened contexts */ + int pasid_max; /* maximum number of contexts */ + int actag_base; + int actag_enabled; + struct mutex contexts_lock; + struct idr contexts_idr; + struct mutex afu_control_lock; + u64 global_mmio_start; + u64 irq_base_offset; + void __iomem *global_mmio_ptr; + u64 pp_mmio_start; + struct bin_attribute attr_global_mmio; +}; + +enum ocxl_context_status { + CLOSED, + OPENED, + ATTACHED, +}; + +// Contains metadata about a translation fault +struct ocxl_xsl_error { + u64 addr; // The address that triggered the fault + u64 dsisr; // the value of the dsisr register + u64 count; // The number of times this fault has been triggered +}; + +struct ocxl_context { + struct ocxl_afu *afu; + int pasid; + struct mutex status_mutex; + enum ocxl_context_status status; + struct address_space *mapping; + struct mutex mapping_lock; + wait_queue_head_t events_wq; + struct mutex xsl_error_lock; + struct ocxl_xsl_error xsl_error; + struct mutex irq_lock; + struct idr irq_idr; +}; + +struct ocxl_process_element { + __be64 config_state; + __be32 reserved1[11]; + __be32 lpid; + __be32 tid; + __be32 pid; + __be32 reserved2[10]; + __be64 amr; + __be32 reserved3[3]; + __be32 software_state; +}; + + +extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu); +extern void ocxl_afu_put(struct ocxl_afu *afu); + +extern int ocxl_create_cdev(struct ocxl_afu *afu); +extern void ocxl_destroy_cdev(struct ocxl_afu *afu); +extern int ocxl_register_afu(struct ocxl_afu *afu); +extern void ocxl_unregister_afu(struct ocxl_afu *afu); + +extern int ocxl_file_init(void); +extern void ocxl_file_exit(void); + +extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size); +extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size); +extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size); +extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size); + +extern struct ocxl_context *ocxl_context_alloc(void); +extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, + struct address_space *mapping); +extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr); +extern int ocxl_context_mmap(struct ocxl_context *ctx, + struct vm_area_struct *vma); +extern int ocxl_context_detach(struct ocxl_context *ctx); +extern void ocxl_context_detach_all(struct ocxl_afu *afu); +extern void ocxl_context_free(struct ocxl_context *ctx); + +extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu); +extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu); + +extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset); +extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset); +extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx); +extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, + int eventfd); +extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset); + +#endif /* _OCXL_INTERNAL_H_ */ diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c new file mode 100644 index 000000000000..d14cb56e6920 --- /dev/null +++ b/drivers/misc/ocxl/pasid.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include "ocxl_internal.h" + + +struct id_range { + struct list_head list; + u32 start; + u32 end; +}; + +#ifdef DEBUG +static void dump_list(struct list_head *head, char *type_str) +{ + struct id_range *cur; + + pr_debug("%s ranges allocated:\n", type_str); + list_for_each_entry(cur, head, list) { + pr_debug("Range %d->%d\n", cur->start, cur->end); + } +} +#endif + +static int range_alloc(struct list_head *head, u32 size, int max_id, + char *type_str) +{ + struct list_head *pos; + struct id_range *cur, *new; + int rc, last_end; + + new = kmalloc(sizeof(struct id_range), GFP_KERNEL); + if (!new) + return -ENOMEM; + + pos = head; + last_end = -1; + list_for_each_entry(cur, head, list) { + if ((cur->start - last_end) > size) + break; + last_end = cur->end; + pos = &cur->list; + } + + new->start = last_end + 1; + new->end = new->start + size - 1; + + if (new->end > max_id) { + kfree(new); + rc = -ENOSPC; + } else { + list_add(&new->list, pos); + rc = new->start; + } + +#ifdef DEBUG + dump_list(head, type_str); +#endif + return rc; +} + +static void range_free(struct list_head *head, u32 start, u32 size, + char *type_str) +{ + bool found = false; + struct id_range *cur, *tmp; + + list_for_each_entry_safe(cur, tmp, head, list) { + if (cur->start == start && cur->end == (start + size - 1)) { + found = true; + list_del(&cur->list); + kfree(cur); + break; + } + } + WARN_ON(!found); +#ifdef DEBUG + dump_list(head, type_str); +#endif +} + +int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size) +{ + int max_pasid; + + if (fn->config.max_pasid_log < 0) + return -ENOSPC; + max_pasid = 1 << fn->config.max_pasid_log; + return range_alloc(&fn->pasid_list, size, max_pasid, "afu pasid"); +} + +void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size) +{ + return range_free(&fn->pasid_list, start, size, "afu pasid"); +} + +int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size) +{ + int max_actag; + + max_actag = fn->actag_enabled; + return range_alloc(&fn->actag_list, size, max_actag, "afu actag"); +} + +void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size) +{ + return range_free(&fn->actag_list, start, size, "afu actag"); +} diff --git a/drivers/misc/ocxl/pci.c b/drivers/misc/ocxl/pci.c new file mode 100644 index 000000000000..0051d9ec76cc --- /dev/null +++ b/drivers/misc/ocxl/pci.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/idr.h> +#include <asm/pnv-ocxl.h> +#include "ocxl_internal.h" + +/* + * Any opencapi device which wants to use this 'generic' driver should + * use the 0x062B device ID. Vendors should define the subsystem + * vendor/device ID to help differentiate devices. + */ +static const struct pci_device_id ocxl_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x062B), }, + { } +}; +MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl); + + +static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn) +{ + return (get_device(&fn->dev) == NULL) ? NULL : fn; +} + +static void ocxl_fn_put(struct ocxl_fn *fn) +{ + put_device(&fn->dev); +} + +struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu) +{ + return (get_device(&afu->dev) == NULL) ? NULL : afu; +} + +void ocxl_afu_put(struct ocxl_afu *afu) +{ + put_device(&afu->dev); +} + +static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn) +{ + struct ocxl_afu *afu; + + afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL); + if (!afu) + return NULL; + + mutex_init(&afu->contexts_lock); + mutex_init(&afu->afu_control_lock); + idr_init(&afu->contexts_idr); + afu->fn = fn; + ocxl_fn_get(fn); + return afu; +} + +static void free_afu(struct ocxl_afu *afu) +{ + idr_destroy(&afu->contexts_idr); + ocxl_fn_put(afu->fn); + kfree(afu); +} + +static void free_afu_dev(struct device *dev) +{ + struct ocxl_afu *afu = to_ocxl_afu(dev); + + ocxl_unregister_afu(afu); + free_afu(afu); +} + +static int set_afu_device(struct ocxl_afu *afu, const char *location) +{ + struct ocxl_fn *fn = afu->fn; + int rc; + + afu->dev.parent = &fn->dev; + afu->dev.release = free_afu_dev; + rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location, + afu->config.idx); + return rc; +} + +static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev) +{ + struct ocxl_fn *fn = afu->fn; + int actag_count, actag_offset; + + /* + * if there were not enough actags for the function, each afu + * reduces its count as well + */ + actag_count = afu->config.actag_supported * + fn->actag_enabled / fn->actag_supported; + actag_offset = ocxl_actag_afu_alloc(fn, actag_count); + if (actag_offset < 0) { + dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n", + actag_count, actag_offset); + return actag_offset; + } + afu->actag_base = fn->actag_base + actag_offset; + afu->actag_enabled = actag_count; + + ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos, + afu->actag_base, afu->actag_enabled); + dev_dbg(&afu->dev, "actag base=%d enabled=%d\n", + afu->actag_base, afu->actag_enabled); + return 0; +} + +static void reclaim_afu_actag(struct ocxl_afu *afu) +{ + struct ocxl_fn *fn = afu->fn; + int start_offset, size; + + start_offset = afu->actag_base - fn->actag_base; + size = afu->actag_enabled; + ocxl_actag_afu_free(afu->fn, start_offset, size); +} + +static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev) +{ + struct ocxl_fn *fn = afu->fn; + int pasid_count, pasid_offset; + + /* + * We only support the case where the function configuration + * requested enough PASIDs to cover all AFUs. + */ + pasid_count = 1 << afu->config.pasid_supported_log; + pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count); + if (pasid_offset < 0) { + dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n", + pasid_count, pasid_offset); + return pasid_offset; + } + afu->pasid_base = fn->pasid_base + pasid_offset; + afu->pasid_count = 0; + afu->pasid_max = pasid_count; + + ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos, + afu->pasid_base, + afu->config.pasid_supported_log); + dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n", + afu->pasid_base, pasid_count); + return 0; +} + +static void reclaim_afu_pasid(struct ocxl_afu *afu) +{ + struct ocxl_fn *fn = afu->fn; + int start_offset, size; + + start_offset = afu->pasid_base - fn->pasid_base; + size = 1 << afu->config.pasid_supported_log; + ocxl_pasid_afu_free(afu->fn, start_offset, size); +} + +static int reserve_fn_bar(struct ocxl_fn *fn, int bar) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + int rc, idx; + + if (bar != 0 && bar != 2 && bar != 4) + return -EINVAL; + + idx = bar >> 1; + if (fn->bar_used[idx]++ == 0) { + rc = pci_request_region(dev, bar, "ocxl"); + if (rc) + return rc; + } + return 0; +} + +static void release_fn_bar(struct ocxl_fn *fn, int bar) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + int idx; + + if (bar != 0 && bar != 2 && bar != 4) + return; + + idx = bar >> 1; + if (--fn->bar_used[idx] == 0) + pci_release_region(dev, bar); + WARN_ON(fn->bar_used[idx] < 0); +} + +static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev) +{ + int rc; + + rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar); + if (rc) + return rc; + + rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar); + if (rc) { + release_fn_bar(afu->fn, afu->config.global_mmio_bar); + return rc; + } + + afu->global_mmio_start = + pci_resource_start(dev, afu->config.global_mmio_bar) + + afu->config.global_mmio_offset; + afu->pp_mmio_start = + pci_resource_start(dev, afu->config.pp_mmio_bar) + + afu->config.pp_mmio_offset; + + afu->global_mmio_ptr = ioremap(afu->global_mmio_start, + afu->config.global_mmio_size); + if (!afu->global_mmio_ptr) { + release_fn_bar(afu->fn, afu->config.pp_mmio_bar); + release_fn_bar(afu->fn, afu->config.global_mmio_bar); + dev_err(&dev->dev, "Error mapping global mmio area\n"); + return -ENOMEM; + } + + /* + * Leave an empty page between the per-process mmio area and + * the AFU interrupt mappings + */ + afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE; + return 0; +} + +static void unmap_mmio_areas(struct ocxl_afu *afu) +{ + if (afu->global_mmio_ptr) { + iounmap(afu->global_mmio_ptr); + afu->global_mmio_ptr = NULL; + } + afu->global_mmio_start = 0; + afu->pp_mmio_start = 0; + release_fn_bar(afu->fn, afu->config.pp_mmio_bar); + release_fn_bar(afu->fn, afu->config.global_mmio_bar); +} + +static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev) +{ + int rc; + + rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx); + if (rc) + return rc; + + rc = set_afu_device(afu, dev_name(&dev->dev)); + if (rc) + return rc; + + rc = assign_afu_actag(afu, dev); + if (rc) + return rc; + + rc = assign_afu_pasid(afu, dev); + if (rc) { + reclaim_afu_actag(afu); + return rc; + } + + rc = map_mmio_areas(afu, dev); + if (rc) { + reclaim_afu_pasid(afu); + reclaim_afu_actag(afu); + return rc; + } + return 0; +} + +static void deconfigure_afu(struct ocxl_afu *afu) +{ + unmap_mmio_areas(afu); + reclaim_afu_pasid(afu); + reclaim_afu_actag(afu); +} + +static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu) +{ + int rc; + + ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1); + /* + * Char device creation is the last step, as processes can + * call our driver immediately, so all our inits must be finished. + */ + rc = ocxl_create_cdev(afu); + if (rc) + return rc; + return 0; +} + +static void deactivate_afu(struct ocxl_afu *afu) +{ + struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent); + + ocxl_destroy_cdev(afu); + ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0); +} + +static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx) +{ + int rc; + struct ocxl_afu *afu; + + afu = alloc_afu(fn); + if (!afu) + return -ENOMEM; + + rc = configure_afu(afu, afu_idx, dev); + if (rc) { + free_afu(afu); + return rc; + } + + rc = ocxl_register_afu(afu); + if (rc) + goto err; + + rc = ocxl_sysfs_add_afu(afu); + if (rc) + goto err; + + rc = activate_afu(dev, afu); + if (rc) + goto err_sys; + + list_add_tail(&afu->list, &fn->afu_list); + return 0; + +err_sys: + ocxl_sysfs_remove_afu(afu); +err: + deconfigure_afu(afu); + device_unregister(&afu->dev); + return rc; +} + +static void remove_afu(struct ocxl_afu *afu) +{ + list_del(&afu->list); + ocxl_context_detach_all(afu); + deactivate_afu(afu); + ocxl_sysfs_remove_afu(afu); + deconfigure_afu(afu); + device_unregister(&afu->dev); +} + +static struct ocxl_fn *alloc_function(struct pci_dev *dev) +{ + struct ocxl_fn *fn; + + fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL); + if (!fn) + return NULL; + + INIT_LIST_HEAD(&fn->afu_list); + INIT_LIST_HEAD(&fn->pasid_list); + INIT_LIST_HEAD(&fn->actag_list); + return fn; +} + +static void free_function(struct ocxl_fn *fn) +{ + WARN_ON(!list_empty(&fn->afu_list)); + WARN_ON(!list_empty(&fn->pasid_list)); + kfree(fn); +} + +static void free_function_dev(struct device *dev) +{ + struct ocxl_fn *fn = to_ocxl_function(dev); + + free_function(fn); +} + +static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev) +{ + int rc; + + fn->dev.parent = &dev->dev; + fn->dev.release = free_function_dev; + rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev)); + if (rc) + return rc; + pci_set_drvdata(dev, fn); + return 0; +} + +static int assign_function_actag(struct ocxl_fn *fn) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + u16 base, enabled, supported; + int rc; + + rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported); + if (rc) + return rc; + + fn->actag_base = base; + fn->actag_enabled = enabled; + fn->actag_supported = supported; + + ocxl_config_set_actag(dev, fn->config.dvsec_function_pos, + fn->actag_base, fn->actag_enabled); + dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n", + fn->actag_base, fn->actag_enabled); + return 0; +} + +static int set_function_pasid(struct ocxl_fn *fn) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + int rc, desired_count, max_count; + + /* A function may not require any PASID */ + if (fn->config.max_pasid_log < 0) + return 0; + + rc = ocxl_config_get_pasid_info(dev, &max_count); + if (rc) + return rc; + + desired_count = 1 << fn->config.max_pasid_log; + + if (desired_count > max_count) { + dev_err(&fn->dev, + "Function requires more PASIDs than is available (%d vs. %d)\n", + desired_count, max_count); + return -ENOSPC; + } + + fn->pasid_base = 0; + return 0; +} + +static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev) +{ + int rc; + + rc = pci_enable_device(dev); + if (rc) { + dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc); + return rc; + } + + /* + * Once it has been confirmed to work on our hardware, we + * should reset the function, to force the adapter to restart + * from scratch. + * A function reset would also reset all its AFUs. + * + * Some hints for implementation: + * + * - there's not status bit to know when the reset is done. We + * should try reading the config space to know when it's + * done. + * - probably something like: + * Reset + * wait 100ms + * issue config read + * allow device up to 1 sec to return success on config + * read before declaring it broken + * + * Some shared logic on the card (CFG, TLX) won't be reset, so + * there's no guarantee that it will be enough. + */ + rc = ocxl_config_read_function(dev, &fn->config); + if (rc) + return rc; + + rc = set_function_device(fn, dev); + if (rc) + return rc; + + rc = assign_function_actag(fn); + if (rc) + return rc; + + rc = set_function_pasid(fn); + if (rc) + return rc; + + rc = ocxl_link_setup(dev, 0, &fn->link); + if (rc) + return rc; + + rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos); + if (rc) { + ocxl_link_release(dev, fn->link); + return rc; + } + return 0; +} + +static void deconfigure_function(struct ocxl_fn *fn) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + + ocxl_link_release(dev, fn->link); + pci_disable_device(dev); +} + +static struct ocxl_fn *init_function(struct pci_dev *dev) +{ + struct ocxl_fn *fn; + int rc; + + fn = alloc_function(dev); + if (!fn) + return ERR_PTR(-ENOMEM); + + rc = configure_function(fn, dev); + if (rc) { + free_function(fn); + return ERR_PTR(rc); + } + + rc = device_register(&fn->dev); + if (rc) { + deconfigure_function(fn); + device_unregister(&fn->dev); + return ERR_PTR(rc); + } + return fn; +} + +static void remove_function(struct ocxl_fn *fn) +{ + deconfigure_function(fn); + device_unregister(&fn->dev); +} + +static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + int rc, afu_count = 0; + u8 afu; + struct ocxl_fn *fn; + + if (!radix_enabled()) { + dev_err(&dev->dev, "Unsupported memory model (hash)\n"); + return -ENODEV; + } + + fn = init_function(dev); + if (IS_ERR(fn)) { + dev_err(&dev->dev, "function init failed: %li\n", + PTR_ERR(fn)); + return PTR_ERR(fn); + } + + for (afu = 0; afu <= fn->config.max_afu_index; afu++) { + rc = ocxl_config_check_afu_index(dev, &fn->config, afu); + if (rc > 0) { + rc = init_afu(dev, fn, afu); + if (rc) { + dev_err(&dev->dev, + "Can't initialize AFU index %d\n", afu); + continue; + } + afu_count++; + } + } + dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count); + return 0; +} + +static void ocxl_remove(struct pci_dev *dev) +{ + struct ocxl_afu *afu, *tmp; + struct ocxl_fn *fn = pci_get_drvdata(dev); + + list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) { + remove_afu(afu); + } + remove_function(fn); +} + +struct pci_driver ocxl_pci_driver = { + .name = "ocxl", + .id_table = ocxl_pci_tbl, + .probe = ocxl_probe, + .remove = ocxl_remove, + .shutdown = ocxl_remove, +}; diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c new file mode 100644 index 000000000000..d9753a1db14b --- /dev/null +++ b/drivers/misc/ocxl/sysfs.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include <linux/sysfs.h> +#include "ocxl_internal.h" + +static ssize_t global_mmio_size_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + afu->config.global_mmio_size); +} + +static ssize_t pp_mmio_size_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + afu->config.pp_mmio_stride); +} + +static ssize_t afu_version_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n", + afu->config.version_major, + afu->config.version_minor); +} + +static ssize_t contexts_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%d/%d\n", + afu->pasid_count, afu->pasid_max); +} + +static struct device_attribute afu_attrs[] = { + __ATTR_RO(global_mmio_size), + __ATTR_RO(pp_mmio_size), + __ATTR_RO(afu_version), + __ATTR_RO(contexts), +}; + +static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj)); + + if (count == 0 || off < 0 || + off >= afu->config.global_mmio_size) + return 0; + memcpy_fromio(buf, afu->global_mmio_ptr + off, count); + return count; +} + +static int global_mmio_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct ocxl_afu *afu = vma->vm_private_data; + unsigned long offset; + + if (vmf->pgoff >= (afu->config.global_mmio_size >> PAGE_SHIFT)) + return VM_FAULT_SIGBUS; + + offset = vmf->pgoff; + offset += (afu->global_mmio_start >> PAGE_SHIFT); + vm_insert_pfn(vma, vmf->address, offset); + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct global_mmio_vmops = { + .fault = global_mmio_fault, +}; + +static int global_mmio_mmap(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + struct vm_area_struct *vma) +{ + struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj)); + + if ((vma_pages(vma) + vma->vm_pgoff) > + (afu->config.global_mmio_size >> PAGE_SHIFT)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_ops = &global_mmio_vmops; + vma->vm_private_data = afu; + return 0; +} + +int ocxl_sysfs_add_afu(struct ocxl_afu *afu) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { + rc = device_create_file(&afu->dev, &afu_attrs[i]); + if (rc) + goto err; + } + + sysfs_attr_init(&afu->attr_global_mmio.attr); + afu->attr_global_mmio.attr.name = "global_mmio_area"; + afu->attr_global_mmio.attr.mode = 0600; + afu->attr_global_mmio.size = afu->config.global_mmio_size; + afu->attr_global_mmio.read = global_mmio_read; + afu->attr_global_mmio.mmap = global_mmio_mmap; + rc = device_create_bin_file(&afu->dev, &afu->attr_global_mmio); + if (rc) { + dev_err(&afu->dev, + "Unable to create global mmio attr for afu: %d\n", + rc); + goto err; + } + + return 0; + +err: + for (i--; i >= 0; i--) + device_remove_file(&afu->dev, &afu_attrs[i]); + return rc; +} + +void ocxl_sysfs_remove_afu(struct ocxl_afu *afu) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) + device_remove_file(&afu->dev, &afu_attrs[i]); + device_remove_bin_file(&afu->dev, &afu->attr_global_mmio); +} diff --git a/drivers/misc/ocxl/trace.c b/drivers/misc/ocxl/trace.c new file mode 100644 index 000000000000..1e6947049697 --- /dev/null +++ b/drivers/misc/ocxl/trace.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "trace.h" +#endif diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h new file mode 100644 index 000000000000..bcb7ff330c1e --- /dev/null +++ b/drivers/misc/ocxl/trace.h @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ocxl + +#if !defined(_TRACE_OCXL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_OCXL_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(ocxl_context, + TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr), + TP_ARGS(pid, spa, pasid, pidr, tidr), + + TP_STRUCT__entry( + __field(pid_t, pid) + __field(void*, spa) + __field(int, pasid) + __field(u32, pidr) + __field(u32, tidr) + ), + + TP_fast_assign( + __entry->pid = pid; + __entry->spa = spa; + __entry->pasid = pasid; + __entry->pidr = pidr; + __entry->tidr = tidr; + ), + + TP_printk("linux pid=%d spa=0x%p pasid=0x%x pidr=0x%x tidr=0x%x", + __entry->pid, + __entry->spa, + __entry->pasid, + __entry->pidr, + __entry->tidr + ) +); + +DEFINE_EVENT(ocxl_context, ocxl_context_add, + TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr), + TP_ARGS(pid, spa, pasid, pidr, tidr) +); + +DEFINE_EVENT(ocxl_context, ocxl_context_remove, + TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr), + TP_ARGS(pid, spa, pasid, pidr, tidr) +); + +TRACE_EVENT(ocxl_terminate_pasid, + TP_PROTO(int pasid, int rc), + TP_ARGS(pasid, rc), + + TP_STRUCT__entry( + __field(int, pasid) + __field(int, rc) + ), + + TP_fast_assign( + __entry->pasid = pasid; + __entry->rc = rc; + ), + + TP_printk("pasid=0x%x rc=%d", + __entry->pasid, + __entry->rc + ) +); + +DECLARE_EVENT_CLASS(ocxl_fault_handler, + TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc), + TP_ARGS(spa, pe, dsisr, dar, tfc), + + TP_STRUCT__entry( + __field(void *, spa) + __field(u64, pe) + __field(u64, dsisr) + __field(u64, dar) + __field(u64, tfc) + ), + + TP_fast_assign( + __entry->spa = spa; + __entry->pe = pe; + __entry->dsisr = dsisr; + __entry->dar = dar; + __entry->tfc = tfc; + ), + + TP_printk("spa=%p pe=0x%llx dsisr=0x%llx dar=0x%llx tfc=0x%llx", + __entry->spa, + __entry->pe, + __entry->dsisr, + __entry->dar, + __entry->tfc + ) +); + +DEFINE_EVENT(ocxl_fault_handler, ocxl_fault, + TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc), + TP_ARGS(spa, pe, dsisr, dar, tfc) +); + +DEFINE_EVENT(ocxl_fault_handler, ocxl_fault_ack, + TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc), + TP_ARGS(spa, pe, dsisr, dar, tfc) +); + +TRACE_EVENT(ocxl_afu_irq_alloc, + TP_PROTO(int pasid, int irq_id, unsigned int virq, int hw_irq, + u64 irq_offset), + TP_ARGS(pasid, irq_id, virq, hw_irq, irq_offset), + + TP_STRUCT__entry( + __field(int, pasid) + __field(int, irq_id) + __field(unsigned int, virq) + __field(int, hw_irq) + __field(u64, irq_offset) + ), + + TP_fast_assign( + __entry->pasid = pasid; + __entry->irq_id = irq_id; + __entry->virq = virq; + __entry->hw_irq = hw_irq; + __entry->irq_offset = irq_offset; + ), + + TP_printk("pasid=0x%x irq_id=%d virq=%u hw_irq=%d irq_offset=0x%llx", + __entry->pasid, + __entry->irq_id, + __entry->virq, + __entry->hw_irq, + __entry->irq_offset + ) +); + +TRACE_EVENT(ocxl_afu_irq_free, + TP_PROTO(int pasid, int irq_id), + TP_ARGS(pasid, irq_id), + + TP_STRUCT__entry( + __field(int, pasid) + __field(int, irq_id) + ), + + TP_fast_assign( + __entry->pasid = pasid; + __entry->irq_id = irq_id; + ), + + TP_printk("pasid=0x%x irq_id=%d", + __entry->pasid, + __entry->irq_id + ) +); + +TRACE_EVENT(ocxl_afu_irq_receive, + TP_PROTO(int virq), + TP_ARGS(virq), + + TP_STRUCT__entry( + __field(int, virq) + ), + + TP_fast_assign( + __entry->virq = virq; + ), + + TP_printk("virq=%d", + __entry->virq + ) +); + +#endif /* _TRACE_OCXL_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace +#include <trace/define_trace.h> diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index 9918eda0e05f..a3454eb56fbf 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -258,6 +258,7 @@ static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) static const struct mmu_notifier_ops gru_mmuops = { + .flags = MMU_INVALIDATE_DOES_NOT_BLOCK, .invalidate_range_start = gru_invalidate_range_start, .invalidate_range_end = gru_invalidate_range_end, .release = gru_release, diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c index 2cde80c7bb93..9eea30f54fd6 100644 --- a/drivers/misc/vexpress-syscfg.c +++ b/drivers/misc/vexpress-syscfg.c @@ -270,10 +270,8 @@ static int vexpress_syscfg_probe(struct platform_device *pdev) /* Must use dev.parent (MFD), as that's where DT phandle points at... */ bridge = vexpress_config_bridge_register(pdev->dev.parent, &vexpress_syscfg_bridge_ops, syscfg); - if (IS_ERR(bridge)) - return PTR_ERR(bridge); - return 0; + return PTR_ERR_OR_ZERO(bridge); } static const struct platform_device_id vexpress_syscfg_id_table[] = { diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index 8af5c2672f71..0339538c182d 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -129,23 +129,6 @@ * *_MEM state, and vice versa. */ -/* - * VMCIMemcpy{To,From}QueueFunc() prototypes. Functions of these - * types are passed around to enqueue and dequeue routines. Note that - * often the functions passed are simply wrappers around memcpy - * itself. - * - * Note: In order for the memcpy typedefs to be compatible with the VMKernel, - * there's an unused last parameter for the hosted side. In - * ESX, that parameter holds a buffer type. - */ -typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue, - u64 queue_offset, const void *src, - size_t src_offset, size_t size); -typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset, - const struct vmci_queue *queue, - u64 queue_offset, size_t size); - /* The Kernel specific component of the struct vmci_queue structure. */ struct vmci_queue_kern_if { struct mutex __mutex; /* Protects the queue. */ @@ -351,11 +334,10 @@ static void *qp_alloc_queue(u64 size, u32 flags) * by traversing the offset -> page translation structure for the queue. * Assumes that offset + size does not wrap around in the queue. */ -static int __qp_memcpy_to_queue(struct vmci_queue *queue, - u64 queue_offset, - const void *src, - size_t size, - bool is_iovec) +static int qp_memcpy_to_queue_iter(struct vmci_queue *queue, + u64 queue_offset, + struct iov_iter *from, + size_t size) { struct vmci_queue_kern_if *kernel_if = queue->kernel_if; size_t bytes_copied = 0; @@ -380,23 +362,12 @@ static int __qp_memcpy_to_queue(struct vmci_queue *queue, else to_copy = size - bytes_copied; - if (is_iovec) { - struct msghdr *msg = (struct msghdr *)src; - int err; - - /* The iovec will track bytes_copied internally. */ - err = memcpy_from_msg((u8 *)va + page_offset, - msg, to_copy); - if (err != 0) { - if (kernel_if->host) - kunmap(kernel_if->u.h.page[page_index]); - return VMCI_ERROR_INVALID_ARGS; - } - } else { - memcpy((u8 *)va + page_offset, - (u8 *)src + bytes_copied, to_copy); + if (!copy_from_iter_full((u8 *)va + page_offset, to_copy, + from)) { + if (kernel_if->host) + kunmap(kernel_if->u.h.page[page_index]); + return VMCI_ERROR_INVALID_ARGS; } - bytes_copied += to_copy; if (kernel_if->host) kunmap(kernel_if->u.h.page[page_index]); @@ -411,11 +382,9 @@ static int __qp_memcpy_to_queue(struct vmci_queue *queue, * by traversing the offset -> page translation structure for the queue. * Assumes that offset + size does not wrap around in the queue. */ -static int __qp_memcpy_from_queue(void *dest, - const struct vmci_queue *queue, - u64 queue_offset, - size_t size, - bool is_iovec) +static int qp_memcpy_from_queue_iter(struct iov_iter *to, + const struct vmci_queue *queue, + u64 queue_offset, size_t size) { struct vmci_queue_kern_if *kernel_if = queue->kernel_if; size_t bytes_copied = 0; @@ -427,6 +396,7 @@ static int __qp_memcpy_from_queue(void *dest, (queue_offset + bytes_copied) & (PAGE_SIZE - 1); void *va; size_t to_copy; + int err; if (kernel_if->host) va = kmap(kernel_if->u.h.page[page_index]); @@ -440,23 +410,12 @@ static int __qp_memcpy_from_queue(void *dest, else to_copy = size - bytes_copied; - if (is_iovec) { - struct msghdr *msg = dest; - int err; - - /* The iovec will track bytes_copied internally. */ - err = memcpy_to_msg(msg, (u8 *)va + page_offset, - to_copy); - if (err != 0) { - if (kernel_if->host) - kunmap(kernel_if->u.h.page[page_index]); - return VMCI_ERROR_INVALID_ARGS; - } - } else { - memcpy((u8 *)dest + bytes_copied, - (u8 *)va + page_offset, to_copy); + err = copy_to_iter((u8 *)va + page_offset, to_copy, to); + if (err != to_copy) { + if (kernel_if->host) + kunmap(kernel_if->u.h.page[page_index]); + return VMCI_ERROR_INVALID_ARGS; } - bytes_copied += to_copy; if (kernel_if->host) kunmap(kernel_if->u.h.page[page_index]); @@ -569,54 +528,6 @@ static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set) return VMCI_SUCCESS; } -static int qp_memcpy_to_queue(struct vmci_queue *queue, - u64 queue_offset, - const void *src, size_t src_offset, size_t size) -{ - return __qp_memcpy_to_queue(queue, queue_offset, - (u8 *)src + src_offset, size, false); -} - -static int qp_memcpy_from_queue(void *dest, - size_t dest_offset, - const struct vmci_queue *queue, - u64 queue_offset, size_t size) -{ - return __qp_memcpy_from_queue((u8 *)dest + dest_offset, - queue, queue_offset, size, false); -} - -/* - * Copies from a given iovec from a VMCI Queue. - */ -static int qp_memcpy_to_queue_iov(struct vmci_queue *queue, - u64 queue_offset, - const void *msg, - size_t src_offset, size_t size) -{ - - /* - * We ignore src_offset because src is really a struct iovec * and will - * maintain offset internally. - */ - return __qp_memcpy_to_queue(queue, queue_offset, msg, size, true); -} - -/* - * Copies to a given iovec from a VMCI Queue. - */ -static int qp_memcpy_from_queue_iov(void *dest, - size_t dest_offset, - const struct vmci_queue *queue, - u64 queue_offset, size_t size) -{ - /* - * We ignore dest_offset because dest is really a struct iovec * and - * will maintain offset internally. - */ - return __qp_memcpy_from_queue(dest, queue, queue_offset, size, true); -} - /* * Allocates kernel VA space of specified size plus space for the queue * and kernel interface. This is different from the guest queue allocator, @@ -2629,12 +2540,11 @@ static bool qp_wait_for_ready_queue(struct vmci_qp *qpair) static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q, struct vmci_queue *consume_q, const u64 produce_q_size, - const void *buf, - size_t buf_size, - vmci_memcpy_to_queue_func memcpy_to_queue) + struct iov_iter *from) { s64 free_space; u64 tail; + size_t buf_size = iov_iter_count(from); size_t written; ssize_t result; @@ -2654,15 +2564,15 @@ static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q, written = (size_t) (free_space > buf_size ? buf_size : free_space); tail = vmci_q_header_producer_tail(produce_q->q_header); if (likely(tail + written < produce_q_size)) { - result = memcpy_to_queue(produce_q, tail, buf, 0, written); + result = qp_memcpy_to_queue_iter(produce_q, tail, from, written); } else { /* Tail pointer wraps around. */ const size_t tmp = (size_t) (produce_q_size - tail); - result = memcpy_to_queue(produce_q, tail, buf, 0, tmp); + result = qp_memcpy_to_queue_iter(produce_q, tail, from, tmp); if (result >= VMCI_SUCCESS) - result = memcpy_to_queue(produce_q, 0, buf, tmp, + result = qp_memcpy_to_queue_iter(produce_q, 0, from, written - tmp); } @@ -2690,11 +2600,10 @@ static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q, static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q, struct vmci_queue *consume_q, const u64 consume_q_size, - void *buf, - size_t buf_size, - vmci_memcpy_from_queue_func memcpy_from_queue, + struct iov_iter *to, bool update_consumer) { + size_t buf_size = iov_iter_count(to); s64 buf_ready; u64 head; size_t read; @@ -2716,15 +2625,15 @@ static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q, read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready); head = vmci_q_header_consumer_head(produce_q->q_header); if (likely(head + read < consume_q_size)) { - result = memcpy_from_queue(buf, 0, consume_q, head, read); + result = qp_memcpy_from_queue_iter(to, consume_q, head, read); } else { /* Head pointer wraps around. */ const size_t tmp = (size_t) (consume_q_size - head); - result = memcpy_from_queue(buf, 0, consume_q, head, tmp); + result = qp_memcpy_from_queue_iter(to, consume_q, head, tmp); if (result >= VMCI_SUCCESS) - result = memcpy_from_queue(buf, tmp, consume_q, 0, + result = qp_memcpy_from_queue_iter(to, consume_q, 0, read - tmp); } @@ -3118,18 +3027,21 @@ ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair, int buf_type) { ssize_t result; + struct iov_iter from; + struct kvec v = {.iov_base = (void *)buf, .iov_len = buf_size}; if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; + iov_iter_kvec(&from, WRITE | ITER_KVEC, &v, 1, buf_size); + qp_lock(qpair); do { result = qp_enqueue_locked(qpair->produce_q, qpair->consume_q, qpair->produce_q_size, - buf, buf_size, - qp_memcpy_to_queue); + &from); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) @@ -3159,18 +3071,21 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair, int buf_type) { ssize_t result; + struct iov_iter to; + struct kvec v = {.iov_base = buf, .iov_len = buf_size}; if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; + iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size); + qp_lock(qpair); do { result = qp_dequeue_locked(qpair->produce_q, qpair->consume_q, qpair->consume_q_size, - buf, buf_size, - qp_memcpy_from_queue, true); + &to, true); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) @@ -3200,19 +3115,22 @@ ssize_t vmci_qpair_peek(struct vmci_qp *qpair, size_t buf_size, int buf_type) { + struct iov_iter to; + struct kvec v = {.iov_base = buf, .iov_len = buf_size}; ssize_t result; if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; + iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size); + qp_lock(qpair); do { result = qp_dequeue_locked(qpair->produce_q, qpair->consume_q, qpair->consume_q_size, - buf, buf_size, - qp_memcpy_from_queue, false); + &to, false); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) @@ -3253,8 +3171,7 @@ ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, result = qp_enqueue_locked(qpair->produce_q, qpair->consume_q, qpair->produce_q_size, - msg, iov_size, - qp_memcpy_to_queue_iov); + &msg->msg_iter); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) @@ -3295,9 +3212,7 @@ ssize_t vmci_qpair_dequev(struct vmci_qp *qpair, result = qp_dequeue_locked(qpair->produce_q, qpair->consume_q, qpair->consume_q_size, - msg, iov_size, - qp_memcpy_from_queue_iov, - true); + &msg->msg_iter, true); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) @@ -3339,9 +3254,7 @@ ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, result = qp_dequeue_locked(qpair->produce_q, qpair->consume_q, qpair->consume_q_size, - msg, iov_size, - qp_memcpy_from_queue_iov, - false); + &msg->msg_iter, false); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) |