diff options
Diffstat (limited to 'drivers')
499 files changed, 28131 insertions, 9673 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 83e5f7e1a20d..18f3036fcb82 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -256,7 +256,7 @@ config ACPI_PROCESSOR config ACPI_IPMI tristate "IPMI" - depends on IPMI_SI + depends on IPMI_HANDLER default n help This driver enables the ACPI to access the BMC controller. And it @@ -440,7 +440,7 @@ config ACPI_CUSTOM_METHOD config ACPI_BGRT bool "Boottime Graphics Resource Table support" - depends on EFI && X86 + depends on EFI && (X86 || ARM64) help This driver adds support for exposing the ACPI Boottime Graphics Resource Table, which allows the operating system to obtain @@ -469,9 +469,8 @@ config ACPI_WATCHDOG config ACPI_EXTLOG tristate "Extended Error Log support" - depends on X86_MCE && X86_LOCAL_APIC + depends on X86_MCE && X86_LOCAL_APIC && EDAC select UEFI_CPER - select RAS default n help Certain usages such as Predictive Failure Analysis (PFA) require @@ -506,7 +505,7 @@ config CRC_PMIC_OPREGION config XPOWER_PMIC_OPREGION bool "ACPI operation region support for XPower AXP288 PMIC" - depends on AXP288_ADC = y + depends on MFD_AXP20X_I2C help This config adds ACPI operation region support for XPower AXP288 PMIC. @@ -516,6 +515,12 @@ config BXT_WC_PMIC_OPREGION help This config adds ACPI operation region support for BXT WhiskeyCove PMIC. +config CHT_WC_PMIC_OPREGION + bool "ACPI operation region support for CHT Whiskey Cove PMIC" + depends on INTEL_SOC_PMIC_CHTWC + help + This config adds ACPI operation region support for CHT Whiskey Cove PMIC. + endif config ACPI_CONFIGFS diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index d94f92f88ca1..d78065cc9324 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -101,6 +101,7 @@ obj-$(CONFIG_PMIC_OPREGION) += pmic/intel_pmic.o obj-$(CONFIG_CRC_PMIC_OPREGION) += pmic/intel_pmic_crc.o obj-$(CONFIG_XPOWER_PMIC_OPREGION) += pmic/intel_pmic_xpower.o obj-$(CONFIG_BXT_WC_PMIC_OPREGION) += pmic/intel_pmic_bxtwc.o +obj-$(CONFIG_CHT_WC_PMIC_OPREGION) += pmic/intel_pmic_chtwc.o obj-$(CONFIG_ACPI_CONFIGFS) += acpi_configfs.o diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index f71b756b05c4..8f52483219ba 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -57,12 +57,23 @@ static int acpi_ac_add(struct acpi_device *device); static int acpi_ac_remove(struct acpi_device *device); static void acpi_ac_notify(struct acpi_device *device, u32 event); +struct acpi_ac_bl { + const char *hid; + int hrv; +}; + static const struct acpi_device_id ac_device_ids[] = { {"ACPI0003", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, ac_device_ids); +/* Lists of PMIC ACPI HIDs with an (often better) native charger driver */ +static const struct acpi_ac_bl acpi_ac_blacklist[] = { + { "INT33F4", -1 }, /* X-Powers AXP288 PMIC */ + { "INT34D3", 3 }, /* Intel Cherrytrail Whiskey Cove PMIC */ +}; + #ifdef CONFIG_PM_SLEEP static int acpi_ac_resume(struct device *dev); #endif @@ -424,11 +435,20 @@ static int acpi_ac_remove(struct acpi_device *device) static int __init acpi_ac_init(void) { + unsigned int i; int result; if (acpi_disabled) return -ENODEV; + for (i = 0; i < ARRAY_SIZE(acpi_ac_blacklist); i++) + if (acpi_dev_present(acpi_ac_blacklist[i].hid, "1", + acpi_ac_blacklist[i].hrv)) { + pr_info(PREFIX "AC: found native %s PMIC, not loading\n", + acpi_ac_blacklist[i].hid); + return -ENODEV; + } + #ifdef CONFIG_ACPI_PROCFS_POWER acpi_ac_dir = acpi_lock_ac_dir(); if (!acpi_ac_dir) diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index a15270a806fc..502ea4dc2080 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -229,7 +229,7 @@ static int __init extlog_init(void) if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) return -ENODEV; - if (get_edac_report_status() == EDAC_REPORTING_FORCE) { + if (edac_get_report_status() == EDAC_REPORTING_FORCE) { pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); return -EPERM; } @@ -285,8 +285,8 @@ static int __init extlog_init(void) * eMCA event report method has higher priority than EDAC method, * unless EDAC event report method is mandatory. */ - old_edac_report_status = get_edac_report_status(); - set_edac_report_status(EDAC_REPORTING_DISABLED); + old_edac_report_status = edac_get_report_status(); + edac_set_report_status(EDAC_REPORTING_DISABLED); mce_register_decode_chain(&extlog_mce_dec); /* enable OS to be involved to take over management from BIOS */ ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; @@ -308,7 +308,7 @@ err: static void __exit extlog_exit(void) { - set_edac_report_status(old_edac_report_status); + edac_set_report_status(old_edac_report_status); mce_unregister_decode_chain(&extlog_mce_dec); ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; if (extlog_l1_addr) diff --git a/drivers/acpi/acpi_ipmi.c b/drivers/acpi/acpi_ipmi.c index 747c2ba98534..1b64419e2fec 100644 --- a/drivers/acpi/acpi_ipmi.c +++ b/drivers/acpi/acpi_ipmi.c @@ -429,8 +429,7 @@ static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data) if (msg->recv_type == IPMI_RESPONSE_RECV_TYPE && msg->msg.data_len == 1) { if (msg->msg.data[0] == IPMI_TIMEOUT_COMPLETION_CODE) { - dev_WARN_ONCE(dev, true, - "Unexpected response (timeout).\n"); + dev_dbg_once(dev, "Unexpected response (timeout).\n"); tx_msg->msg_done = ACPI_IPMI_TIMEOUT; } goto out_comp; diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 03250e1f1103..88cd949003f3 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -121,11 +121,14 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev, if (IS_ERR(pdev)) dev_err(&adev->dev, "platform device creation failed: %ld\n", PTR_ERR(pdev)); - else + else { + set_dev_node(&pdev->dev, acpi_get_node(adev->handle)); dev_dbg(&adev->dev, "created platform device %s\n", dev_name(&pdev->dev)); + } kfree(resources); + return pdev; } EXPORT_SYMBOL_GPL(acpi_create_platform_device); diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 0143135b3abe..f098e25b6b41 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -388,11 +388,6 @@ static int acpi_processor_add(struct acpi_device *device, if (result) /* Processor is not physically present or unavailable */ return 0; -#ifdef CONFIG_SMP - if (pr->id >= setup_max_cpus && pr->id != 0) - return 0; -#endif - BUG_ON(pr->id >= nr_cpu_ids); /* diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index d00bc0ef87a6..e88fe3632dd6 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -73,6 +73,10 @@ module_param(report_key_events, int, 0644); MODULE_PARM_DESC(report_key_events, "0: none, 1: output changes, 2: brightness changes, 3: all"); +/* + * Whether the struct acpi_video_device_attrib::device_id_scheme bit should be + * assumed even if not actually set. + */ static bool device_id_scheme = false; module_param(device_id_scheme, bool, 0444); @@ -88,6 +92,18 @@ static int acpi_video_bus_remove(struct acpi_device *device); static void acpi_video_bus_notify(struct acpi_device *device, u32 event); void acpi_video_detect_exit(void); +/* + * Indices in the _BCL method response: the first two items are special, + * the rest are all supported levels. + * + * See page 575 of the ACPI spec 3.0 + */ +enum acpi_video_level_idx { + ACPI_VIDEO_AC_LEVEL, /* level when machine has full power */ + ACPI_VIDEO_BATTERY_LEVEL, /* level when machine is on batteries */ + ACPI_VIDEO_FIRST_LEVEL, /* actual supported levels begin here */ +}; + static const struct acpi_device_id video_device_ids[] = { {ACPI_VIDEO_HID, 0}, {"", 0}, @@ -132,7 +148,15 @@ struct acpi_video_device_attrib { the VGA device. */ u32 pipe_id:3; /* For VGA multiple-head devices. */ u32 reserved:10; /* Must be 0 */ - u32 device_id_scheme:1; /* Device ID Scheme */ + + /* + * The device ID might not actually follow the scheme described by this + * struct acpi_video_device_attrib. If it does, then this bit + * device_id_scheme is set; otherwise, other fields should be ignored. + * + * (but also see the global flag device_id_scheme) + */ + u32 device_id_scheme:1; }; struct acpi_video_enumerated_device { @@ -217,20 +241,16 @@ static int acpi_video_get_brightness(struct backlight_device *bd) if (acpi_video_device_lcd_get_level_current(vd, &cur_level, false)) return -EINVAL; - for (i = 2; i < vd->brightness->count; i++) { + for (i = ACPI_VIDEO_FIRST_LEVEL; i < vd->brightness->count; i++) { if (vd->brightness->levels[i] == cur_level) - /* - * The first two entries are special - see page 575 - * of the ACPI spec 3.0 - */ - return i - 2; + return i - ACPI_VIDEO_FIRST_LEVEL; } return 0; } static int acpi_video_set_brightness(struct backlight_device *bd) { - int request_level = bd->props.brightness + 2; + int request_level = bd->props.brightness + ACPI_VIDEO_FIRST_LEVEL; struct acpi_video_device *vd = bl_get_data(bd); cancel_delayed_work(&vd->switch_brightness_work); @@ -244,18 +264,18 @@ static const struct backlight_ops acpi_backlight_ops = { }; /* thermal cooling device callbacks */ -static int video_get_max_state(struct thermal_cooling_device *cooling_dev, unsigned - long *state) +static int video_get_max_state(struct thermal_cooling_device *cooling_dev, + unsigned long *state) { struct acpi_device *device = cooling_dev->devdata; struct acpi_video_device *video = acpi_driver_data(device); - *state = video->brightness->count - 3; + *state = video->brightness->count - ACPI_VIDEO_FIRST_LEVEL - 1; return 0; } -static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, unsigned - long *state) +static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, + unsigned long *state) { struct acpi_device *device = cooling_dev->devdata; struct acpi_video_device *video = acpi_driver_data(device); @@ -264,7 +284,8 @@ static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, unsig if (acpi_video_device_lcd_get_level_current(video, &level, false)) return -EINVAL; - for (offset = 2; offset < video->brightness->count; offset++) + for (offset = ACPI_VIDEO_FIRST_LEVEL; offset < video->brightness->count; + offset++) if (level == video->brightness->levels[offset]) { *state = video->brightness->count - offset - 1; return 0; @@ -280,7 +301,7 @@ video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long st struct acpi_video_device *video = acpi_driver_data(device); int level; - if (state >= video->brightness->count - 2) + if (state >= video->brightness->count - ACPI_VIDEO_FIRST_LEVEL) return -EINVAL; state = video->brightness->count - state; @@ -345,10 +366,12 @@ acpi_video_device_lcd_set_level(struct acpi_video_device *device, int level) } device->brightness->curr = level; - for (state = 2; state < device->brightness->count; state++) + for (state = ACPI_VIDEO_FIRST_LEVEL; state < device->brightness->count; + state++) if (level == device->brightness->levels[state]) { if (device->backlight) - device->backlight->props.brightness = state - 2; + device->backlight->props.brightness = + state - ACPI_VIDEO_FIRST_LEVEL; return 0; } @@ -530,14 +553,16 @@ acpi_video_bqc_value_to_level(struct acpi_video_device *device, if (device->brightness->flags._BQC_use_index) { /* - * _BQC returns an index that doesn't account for - * the first 2 items with special meaning, so we need - * to compensate for that by offsetting ourselves + * _BQC returns an index that doesn't account for the first 2 + * items with special meaning (see enum acpi_video_level_idx), + * so we need to compensate for that by offsetting ourselves */ if (device->brightness->flags._BCL_reversed) - bqc_value = device->brightness->count - 3 - bqc_value; + bqc_value = device->brightness->count - + ACPI_VIDEO_FIRST_LEVEL - 1 - bqc_value; - level = device->brightness->levels[bqc_value + 2]; + level = device->brightness->levels[bqc_value + + ACPI_VIDEO_FIRST_LEVEL]; } else { level = bqc_value; } @@ -571,7 +596,8 @@ acpi_video_device_lcd_get_level_current(struct acpi_video_device *device, *level = acpi_video_bqc_value_to_level(device, *level); - for (i = 2; i < device->brightness->count; i++) + for (i = ACPI_VIDEO_FIRST_LEVEL; + i < device->brightness->count; i++) if (device->brightness->levels[i] == *level) { device->brightness->curr = *level; return 0; @@ -714,9 +740,37 @@ static int acpi_video_bqc_quirk(struct acpi_video_device *device, /* * Some systems always report current brightness level as maximum - * through _BQC, we need to test another value for them. + * through _BQC, we need to test another value for them. However, + * there is a subtlety: + * + * If the _BCL package ordering is descending, the first level + * (br->levels[2]) is likely to be 0, and if the number of levels + * matches the number of steps, we might confuse a returned level to + * mean the index. + * + * For example: + * + * current_level = max_level = 100 + * test_level = 0 + * returned level = 100 + * + * In this case 100 means the level, not the index, and _BCM failed. + * Still, if the _BCL package ordering is descending, the index of + * level 0 is also 100, so we assume _BQC is indexed, when it's not. + * + * This causes all _BQC calls to return bogus values causing weird + * behavior from the user's perspective. For example: + * + * xbacklight -set 10; xbacklight -set 20; + * + * would flash to 90% and then slowly down to the desired level (20). + * + * The solution is simple; test anything other than the first level + * (e.g. 1). */ - test_level = current_level == max_level ? br->levels[3] : max_level; + test_level = current_level == max_level + ? br->levels[ACPI_VIDEO_FIRST_LEVEL + 1] + : max_level; result = acpi_video_device_lcd_set_level(device, test_level); if (result) @@ -730,8 +784,8 @@ static int acpi_video_bqc_quirk(struct acpi_video_device *device, /* buggy _BQC found, need to find out if it uses index */ if (level < br->count) { if (br->flags._BCL_reversed) - level = br->count - 3 - level; - if (br->levels[level + 2] == test_level) + level = br->count - ACPI_VIDEO_FIRST_LEVEL - 1 - level; + if (br->levels[level + ACPI_VIDEO_FIRST_LEVEL] == test_level) br->flags._BQC_use_index = 1; } @@ -761,7 +815,7 @@ int acpi_video_get_levels(struct acpi_device *device, goto out; } - if (obj->package.count < 2) { + if (obj->package.count < ACPI_VIDEO_FIRST_LEVEL) { result = -EINVAL; goto out; } @@ -773,8 +827,13 @@ int acpi_video_get_levels(struct acpi_device *device, goto out; } - br->levels = kmalloc((obj->package.count + 2) * sizeof *(br->levels), - GFP_KERNEL); + /* + * Note that we have to reserve 2 extra items (ACPI_VIDEO_FIRST_LEVEL), + * in order to account for buggy BIOS which don't export the first two + * special levels (see below) + */ + br->levels = kmalloc((obj->package.count + ACPI_VIDEO_FIRST_LEVEL) * + sizeof(*br->levels), GFP_KERNEL); if (!br->levels) { result = -ENOMEM; goto out_free; @@ -788,7 +847,8 @@ int acpi_video_get_levels(struct acpi_device *device, } value = (u32) o->integer.value; /* Skip duplicate entries */ - if (count > 2 && br->levels[count - 1] == value) + if (count > ACPI_VIDEO_FIRST_LEVEL + && br->levels[count - 1] == value) continue; br->levels[count] = value; @@ -804,27 +864,30 @@ int acpi_video_get_levels(struct acpi_device *device, * In this case, the first two elements in _BCL packages * are also supported brightness levels that OS should take care of. */ - for (i = 2; i < count; i++) { - if (br->levels[i] == br->levels[0]) + for (i = ACPI_VIDEO_FIRST_LEVEL; i < count; i++) { + if (br->levels[i] == br->levels[ACPI_VIDEO_AC_LEVEL]) level_ac_battery++; - if (br->levels[i] == br->levels[1]) + if (br->levels[i] == br->levels[ACPI_VIDEO_BATTERY_LEVEL]) level_ac_battery++; } - if (level_ac_battery < 2) { - level_ac_battery = 2 - level_ac_battery; + if (level_ac_battery < ACPI_VIDEO_FIRST_LEVEL) { + level_ac_battery = ACPI_VIDEO_FIRST_LEVEL - level_ac_battery; br->flags._BCL_no_ac_battery_levels = 1; - for (i = (count - 1 + level_ac_battery); i >= 2; i--) + for (i = (count - 1 + level_ac_battery); + i >= ACPI_VIDEO_FIRST_LEVEL; i--) br->levels[i] = br->levels[i - level_ac_battery]; count += level_ac_battery; - } else if (level_ac_battery > 2) + } else if (level_ac_battery > ACPI_VIDEO_FIRST_LEVEL) ACPI_ERROR((AE_INFO, "Too many duplicates in _BCL package")); /* Check if the _BCL package is in a reversed order */ - if (max_level == br->levels[2]) { + if (max_level == br->levels[ACPI_VIDEO_FIRST_LEVEL]) { br->flags._BCL_reversed = 1; - sort(&br->levels[2], count - 2, sizeof(br->levels[2]), - acpi_video_cmp_level, NULL); + sort(&br->levels[ACPI_VIDEO_FIRST_LEVEL], + count - ACPI_VIDEO_FIRST_LEVEL, + sizeof(br->levels[ACPI_VIDEO_FIRST_LEVEL]), + acpi_video_cmp_level, NULL); } else if (max_level != br->levels[count - 1]) ACPI_ERROR((AE_INFO, "Found unordered _BCL package")); @@ -894,7 +957,7 @@ acpi_video_init_brightness(struct acpi_video_device *device) * level_old is invalid (no matter whether it's a level * or an index). Set the backlight to max_level in this case. */ - for (i = 2; i < br->count; i++) + for (i = ACPI_VIDEO_FIRST_LEVEL; i < br->count; i++) if (level == br->levels[i]) break; if (i == br->count || !level) @@ -906,7 +969,8 @@ set_level: goto out_free_levels; ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "found %d brightness levels\n", br->count - 2)); + "found %d brightness levels\n", + br->count - ACPI_VIDEO_FIRST_LEVEL)); return 0; out_free_levels: @@ -1297,7 +1361,7 @@ acpi_video_get_next_level(struct acpi_video_device *device, max = max_below = 0; min = min_above = 255; /* Find closest level to level_current */ - for (i = 2; i < device->brightness->count; i++) { + for (i = ACPI_VIDEO_FIRST_LEVEL; i < device->brightness->count; i++) { l = device->brightness->levels[i]; if (abs(l - level_current) < abs(delta)) { delta = l - level_current; @@ -1307,7 +1371,7 @@ acpi_video_get_next_level(struct acpi_video_device *device, } /* Ajust level_current to closest available level */ level_current += delta; - for (i = 2; i < device->brightness->count; i++) { + for (i = ACPI_VIDEO_FIRST_LEVEL; i < device->brightness->count; i++) { l = device->brightness->levels[i]; if (l < min) min = l; @@ -1680,7 +1744,8 @@ static void acpi_video_dev_register_backlight(struct acpi_video_device *device) memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_FIRMWARE; - props.max_brightness = device->brightness->count - 3; + props.max_brightness = + device->brightness->count - ACPI_VIDEO_FIRST_LEVEL - 1; device->backlight = backlight_device_register(name, parent, device, diff --git a/drivers/acpi/acpica/utresrc.c b/drivers/acpi/acpica/utresrc.c index c86bae7b1d0f..ff096d9755b9 100644 --- a/drivers/acpi/acpica/utresrc.c +++ b/drivers/acpi/acpica/utresrc.c @@ -421,10 +421,8 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state, ACPI_FUNCTION_TRACE(ut_walk_aml_resources); - /* - * The absolute minimum resource template is one end_tag descriptor. - * However, we will treat a lone end_tag as just a simple buffer. - */ + /* The absolute minimum resource template is one end_tag descriptor */ + if (aml_length < sizeof(struct aml_resource_end_tag)) { return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG); } @@ -456,8 +454,9 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state, /* Invoke the user function */ if (user_function) { - status = user_function(aml, length, offset, - resource_index, context); + status = + user_function(aml, length, offset, resource_index, + context); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); } @@ -481,12 +480,6 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state, *context = aml; } - /* Check if buffer is defined to be longer than the resource length */ - - if (aml_length > (offset + length)) { - return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG); - } - /* Normal exit */ return_ACPI_STATUS(AE_OK); diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig index 4616da4c15be..5a6f80fce0d6 100644 --- a/drivers/acpi/arm64/Kconfig +++ b/drivers/acpi/arm64/Kconfig @@ -4,3 +4,6 @@ config ACPI_IORT bool + +config ACPI_GTDT + bool diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 72331f2ce0e9..1017def2ea12 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_ACPI_IORT) += iort.o +obj-$(CONFIG_ACPI_GTDT) += gtdt.o diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c new file mode 100644 index 000000000000..597a737d538f --- /dev/null +++ b/drivers/acpi/arm64/gtdt.c @@ -0,0 +1,417 @@ +/* + * ARM Specific GTDT table Support + * + * Copyright (C) 2016, Linaro Ltd. + * Author: Daniel Lezcano <daniel.lezcano@linaro.org> + * Fu Wei <fu.wei@linaro.org> + * Hanjun Guo <hanjun.guo@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/acpi.h> +#include <linux/init.h> +#include <linux/irqdomain.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> + +#include <clocksource/arm_arch_timer.h> + +#undef pr_fmt +#define pr_fmt(fmt) "ACPI GTDT: " fmt + +/** + * struct acpi_gtdt_descriptor - Store the key info of GTDT for all functions + * @gtdt: The pointer to the struct acpi_table_gtdt of GTDT table. + * @gtdt_end: The pointer to the end of GTDT table. + * @platform_timer: The pointer to the start of Platform Timer Structure + * + * The struct store the key info of GTDT table, it should be initialized by + * acpi_gtdt_init. + */ +struct acpi_gtdt_descriptor { + struct acpi_table_gtdt *gtdt; + void *gtdt_end; + void *platform_timer; +}; + +static struct acpi_gtdt_descriptor acpi_gtdt_desc __initdata; + +static inline void *next_platform_timer(void *platform_timer) +{ + struct acpi_gtdt_header *gh = platform_timer; + + platform_timer += gh->length; + if (platform_timer < acpi_gtdt_desc.gtdt_end) + return platform_timer; + + return NULL; +} + +#define for_each_platform_timer(_g) \ + for (_g = acpi_gtdt_desc.platform_timer; _g; \ + _g = next_platform_timer(_g)) + +static inline bool is_timer_block(void *platform_timer) +{ + struct acpi_gtdt_header *gh = platform_timer; + + return gh->type == ACPI_GTDT_TYPE_TIMER_BLOCK; +} + +static inline bool is_non_secure_watchdog(void *platform_timer) +{ + struct acpi_gtdt_header *gh = platform_timer; + struct acpi_gtdt_watchdog *wd = platform_timer; + + if (gh->type != ACPI_GTDT_TYPE_WATCHDOG) + return false; + + return !(wd->timer_flags & ACPI_GTDT_WATCHDOG_SECURE); +} + +static int __init map_gt_gsi(u32 interrupt, u32 flags) +{ + int trigger, polarity; + + trigger = (flags & ACPI_GTDT_INTERRUPT_MODE) ? ACPI_EDGE_SENSITIVE + : ACPI_LEVEL_SENSITIVE; + + polarity = (flags & ACPI_GTDT_INTERRUPT_POLARITY) ? ACPI_ACTIVE_LOW + : ACPI_ACTIVE_HIGH; + + return acpi_register_gsi(NULL, interrupt, trigger, polarity); +} + +/** + * acpi_gtdt_map_ppi() - Map the PPIs of per-cpu arch_timer. + * @type: the type of PPI. + * + * Note: Secure state is not managed by the kernel on ARM64 systems. + * So we only handle the non-secure timer PPIs, + * ARCH_TIMER_PHYS_SECURE_PPI is treated as invalid type. + * + * Return: the mapped PPI value, 0 if error. + */ +int __init acpi_gtdt_map_ppi(int type) +{ + struct acpi_table_gtdt *gtdt = acpi_gtdt_desc.gtdt; + + switch (type) { + case ARCH_TIMER_PHYS_NONSECURE_PPI: + return map_gt_gsi(gtdt->non_secure_el1_interrupt, + gtdt->non_secure_el1_flags); + case ARCH_TIMER_VIRT_PPI: + return map_gt_gsi(gtdt->virtual_timer_interrupt, + gtdt->virtual_timer_flags); + + case ARCH_TIMER_HYP_PPI: + return map_gt_gsi(gtdt->non_secure_el2_interrupt, + gtdt->non_secure_el2_flags); + default: + pr_err("Failed to map timer interrupt: invalid type.\n"); + } + + return 0; +} + +/** + * acpi_gtdt_c3stop() - Got c3stop info from GTDT according to the type of PPI. + * @type: the type of PPI. + * + * Return: true if the timer HW state is lost when a CPU enters an idle state, + * false otherwise + */ +bool __init acpi_gtdt_c3stop(int type) +{ + struct acpi_table_gtdt *gtdt = acpi_gtdt_desc.gtdt; + + switch (type) { + case ARCH_TIMER_PHYS_NONSECURE_PPI: + return !(gtdt->non_secure_el1_flags & ACPI_GTDT_ALWAYS_ON); + + case ARCH_TIMER_VIRT_PPI: + return !(gtdt->virtual_timer_flags & ACPI_GTDT_ALWAYS_ON); + + case ARCH_TIMER_HYP_PPI: + return !(gtdt->non_secure_el2_flags & ACPI_GTDT_ALWAYS_ON); + + default: + pr_err("Failed to get c3stop info: invalid type.\n"); + } + + return false; +} + +/** + * acpi_gtdt_init() - Get the info of GTDT table to prepare for further init. + * @table: The pointer to GTDT table. + * @platform_timer_count: It points to a integer variable which is used + * for storing the number of platform timers. + * This pointer could be NULL, if the caller + * doesn't need this info. + * + * Return: 0 if success, -EINVAL if error. + */ +int __init acpi_gtdt_init(struct acpi_table_header *table, + int *platform_timer_count) +{ + void *platform_timer; + struct acpi_table_gtdt *gtdt; + + gtdt = container_of(table, struct acpi_table_gtdt, header); + acpi_gtdt_desc.gtdt = gtdt; + acpi_gtdt_desc.gtdt_end = (void *)table + table->length; + acpi_gtdt_desc.platform_timer = NULL; + if (platform_timer_count) + *platform_timer_count = 0; + + if (table->revision < 2) { + pr_warn("Revision:%d doesn't support Platform Timers.\n", + table->revision); + return 0; + } + + if (!gtdt->platform_timer_count) { + pr_debug("No Platform Timer.\n"); + return 0; + } + + platform_timer = (void *)gtdt + gtdt->platform_timer_offset; + if (platform_timer < (void *)table + sizeof(struct acpi_table_gtdt)) { + pr_err(FW_BUG "invalid timer data.\n"); + return -EINVAL; + } + acpi_gtdt_desc.platform_timer = platform_timer; + if (platform_timer_count) + *platform_timer_count = gtdt->platform_timer_count; + + return 0; +} + +static int __init gtdt_parse_timer_block(struct acpi_gtdt_timer_block *block, + struct arch_timer_mem *timer_mem) +{ + int i; + struct arch_timer_mem_frame *frame; + struct acpi_gtdt_timer_entry *gtdt_frame; + + if (!block->timer_count) { + pr_err(FW_BUG "GT block present, but frame count is zero."); + return -ENODEV; + } + + if (block->timer_count > ARCH_TIMER_MEM_MAX_FRAMES) { + pr_err(FW_BUG "GT block lists %d frames, ACPI spec only allows 8\n", + block->timer_count); + return -EINVAL; + } + + timer_mem->cntctlbase = (phys_addr_t)block->block_address; + /* + * The CNTCTLBase frame is 4KB (register offsets 0x000 - 0xFFC). + * See ARM DDI 0487A.k_iss10775, page I1-5129, Table I1-3 + * "CNTCTLBase memory map". + */ + timer_mem->size = SZ_4K; + + gtdt_frame = (void *)block + block->timer_offset; + if (gtdt_frame + block->timer_count != (void *)block + block->header.length) + return -EINVAL; + + /* + * Get the GT timer Frame data for every GT Block Timer + */ + for (i = 0; i < block->timer_count; i++, gtdt_frame++) { + if (gtdt_frame->common_flags & ACPI_GTDT_GT_IS_SECURE_TIMER) + continue; + if (gtdt_frame->frame_number >= ARCH_TIMER_MEM_MAX_FRAMES || + !gtdt_frame->base_address || !gtdt_frame->timer_interrupt) + goto error; + + frame = &timer_mem->frame[gtdt_frame->frame_number]; + + /* duplicate frame */ + if (frame->valid) + goto error; + + frame->phys_irq = map_gt_gsi(gtdt_frame->timer_interrupt, + gtdt_frame->timer_flags); + if (frame->phys_irq <= 0) { + pr_warn("failed to map physical timer irq in frame %d.\n", + gtdt_frame->frame_number); + goto error; + } + + if (gtdt_frame->virtual_timer_interrupt) { + frame->virt_irq = + map_gt_gsi(gtdt_frame->virtual_timer_interrupt, + gtdt_frame->virtual_timer_flags); + if (frame->virt_irq <= 0) { + pr_warn("failed to map virtual timer irq in frame %d.\n", + gtdt_frame->frame_number); + goto error; + } + } else { + pr_debug("virtual timer in frame %d not implemented.\n", + gtdt_frame->frame_number); + } + + frame->cntbase = gtdt_frame->base_address; + /* + * The CNTBaseN frame is 4KB (register offsets 0x000 - 0xFFC). + * See ARM DDI 0487A.k_iss10775, page I1-5130, Table I1-4 + * "CNTBaseN memory map". + */ + frame->size = SZ_4K; + frame->valid = true; + } + + return 0; + +error: + do { + if (gtdt_frame->common_flags & ACPI_GTDT_GT_IS_SECURE_TIMER || + gtdt_frame->frame_number >= ARCH_TIMER_MEM_MAX_FRAMES) + continue; + + frame = &timer_mem->frame[gtdt_frame->frame_number]; + + if (frame->phys_irq > 0) + acpi_unregister_gsi(gtdt_frame->timer_interrupt); + frame->phys_irq = 0; + + if (frame->virt_irq > 0) + acpi_unregister_gsi(gtdt_frame->virtual_timer_interrupt); + frame->virt_irq = 0; + } while (i-- >= 0 && gtdt_frame--); + + return -EINVAL; +} + +/** + * acpi_arch_timer_mem_init() - Get the info of all GT blocks in GTDT table. + * @timer_mem: The pointer to the array of struct arch_timer_mem for returning + * the result of parsing. The element number of this array should + * be platform_timer_count(the total number of platform timers). + * @timer_count: It points to a integer variable which is used for storing the + * number of GT blocks we have parsed. + * + * Return: 0 if success, -EINVAL/-ENODEV if error. + */ +int __init acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, + int *timer_count) +{ + int ret; + void *platform_timer; + + *timer_count = 0; + for_each_platform_timer(platform_timer) { + if (is_timer_block(platform_timer)) { + ret = gtdt_parse_timer_block(platform_timer, timer_mem); + if (ret) + return ret; + timer_mem++; + (*timer_count)++; + } + } + + if (*timer_count) + pr_info("found %d memory-mapped timer block(s).\n", + *timer_count); + + return 0; +} + +/* + * Initialize a SBSA generic Watchdog platform device info from GTDT + */ +static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd, + int index) +{ + struct platform_device *pdev; + int irq = map_gt_gsi(wd->timer_interrupt, wd->timer_flags); + + /* + * According to SBSA specification the size of refresh and control + * frames of SBSA Generic Watchdog is SZ_4K(Offset 0x000 – 0xFFF). + */ + struct resource res[] = { + DEFINE_RES_MEM(wd->control_frame_address, SZ_4K), + DEFINE_RES_MEM(wd->refresh_frame_address, SZ_4K), + DEFINE_RES_IRQ(irq), + }; + int nr_res = ARRAY_SIZE(res); + + pr_debug("found a Watchdog (0x%llx/0x%llx gsi:%u flags:0x%x).\n", + wd->refresh_frame_address, wd->control_frame_address, + wd->timer_interrupt, wd->timer_flags); + + if (!(wd->refresh_frame_address && wd->control_frame_address)) { + pr_err(FW_BUG "failed to get the Watchdog base address.\n"); + acpi_unregister_gsi(wd->timer_interrupt); + return -EINVAL; + } + + if (irq <= 0) { + pr_warn("failed to map the Watchdog interrupt.\n"); + nr_res--; + } + + /* + * Add a platform device named "sbsa-gwdt" to match the platform driver. + * "sbsa-gwdt": SBSA(Server Base System Architecture) Generic Watchdog + * The platform driver can get device info below by matching this name. + */ + pdev = platform_device_register_simple("sbsa-gwdt", index, res, nr_res); + if (IS_ERR(pdev)) { + acpi_unregister_gsi(wd->timer_interrupt); + return PTR_ERR(pdev); + } + + return 0; +} + +static int __init gtdt_sbsa_gwdt_init(void) +{ + void *platform_timer; + struct acpi_table_header *table; + int ret, timer_count, gwdt_count = 0; + + if (acpi_disabled) + return 0; + + if (ACPI_FAILURE(acpi_get_table(ACPI_SIG_GTDT, 0, &table))) + return -EINVAL; + + /* + * Note: Even though the global variable acpi_gtdt_desc has been + * initialized by acpi_gtdt_init() while initializing the arch timers, + * when we call this function to get SBSA watchdogs info from GTDT, the + * pointers stashed in it are stale (since they are early temporary + * mappings carried out before acpi_permanent_mmap is set) and we need + * to re-initialize them with permanent mapped pointer values to let the + * GTDT parsing possible. + */ + ret = acpi_gtdt_init(table, &timer_count); + if (ret || !timer_count) + return ret; + + for_each_platform_timer(platform_timer) { + if (is_non_secure_watchdog(platform_timer)) { + ret = gtdt_import_sbsa_gwdt(platform_timer, gwdt_count); + if (ret) + break; + gwdt_count++; + } + } + + if (gwdt_count) + pr_info("found %d SBSA generic Watchdog(s).\n", gwdt_count); + + return ret; +} + +device_initcall(gtdt_sbsa_gwdt_init); diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 4ef1e4624b2b..d42eeef9d928 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -67,6 +67,7 @@ MODULE_DESCRIPTION("ACPI Battery Driver"); MODULE_LICENSE("GPL"); static async_cookie_t async_cookie; +static bool battery_driver_registered; static int battery_bix_broken_package; static int battery_notification_delay_ms; static unsigned int cache_time = 1000; @@ -93,6 +94,11 @@ static const struct acpi_device_id battery_device_ids[] = { MODULE_DEVICE_TABLE(acpi, battery_device_ids); +/* Lists of PMIC ACPI HIDs with an (often better) native battery driver */ +static const char * const acpi_battery_blacklist[] = { + "INT33F4", /* X-Powers AXP288 PMIC */ +}; + enum { ACPI_BATTERY_ALARM_PRESENT, ACPI_BATTERY_XINFO_PRESENT, @@ -1315,8 +1321,17 @@ static struct acpi_driver acpi_battery_driver = { static void __init acpi_battery_init_async(void *unused, async_cookie_t cookie) { + unsigned int i; int result; + for (i = 0; i < ARRAY_SIZE(acpi_battery_blacklist); i++) + if (acpi_dev_present(acpi_battery_blacklist[i], "1", -1)) { + pr_info(PREFIX ACPI_BATTERY_DEVICE_NAME + ": found native %s PMIC, not loading\n", + acpi_battery_blacklist[i]); + return; + } + dmi_check_system(bat_dmi_table); #ifdef CONFIG_ACPI_PROCFS_POWER @@ -1329,6 +1344,7 @@ static void __init acpi_battery_init_async(void *unused, async_cookie_t cookie) if (result < 0) acpi_unlock_battery_dir(acpi_battery_dir); #endif + battery_driver_registered = (result == 0); } static int __init acpi_battery_init(void) @@ -1343,9 +1359,11 @@ static int __init acpi_battery_init(void) static void __exit acpi_battery_exit(void) { async_synchronize_cookie(async_cookie + 1); - acpi_bus_unregister_driver(&acpi_battery_driver); + if (battery_driver_registered) + acpi_bus_unregister_driver(&acpi_battery_driver); #ifdef CONFIG_ACPI_PROCFS_POWER - acpi_unlock_battery_dir(acpi_battery_dir); + if (acpi_battery_dir) + acpi_unlock_battery_dir(acpi_battery_dir); #endif } diff --git a/drivers/acpi/bgrt.c b/drivers/acpi/bgrt.c index ca28aa572aa9..df1c629205e7 100644 --- a/drivers/acpi/bgrt.c +++ b/drivers/acpi/bgrt.c @@ -81,6 +81,12 @@ static struct attribute_group bgrt_attribute_group = { .bin_attrs = bgrt_bin_attributes, }; +int __init acpi_parse_bgrt(struct acpi_table_header *table) +{ + efi_bgrt_init(table); + return 0; +} + static int __init bgrt_init(void) { int ret; diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index 4421f7c9981c..bb542acc0574 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -188,6 +188,14 @@ static struct dmi_system_id acpi_rev_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "Latitude 3350"), }, }, + { + .callback = dmi_enable_rev_override, + .ident = "DELL Inspiron 7537", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7537"), + }, + }, #endif {} }; diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 3ca0729f7e0e..6cbe6036da99 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -132,49 +132,54 @@ __ATTR(_name, 0444, show_##_name, NULL) #define to_cpc_desc(a) container_of(a, struct cpc_desc, kobj) +#define show_cppc_data(access_fn, struct_name, member_name) \ + static ssize_t show_##member_name(struct kobject *kobj, \ + struct attribute *attr, char *buf) \ + { \ + struct cpc_desc *cpc_ptr = to_cpc_desc(kobj); \ + struct struct_name st_name = {0}; \ + int ret; \ + \ + ret = access_fn(cpc_ptr->cpu_id, &st_name); \ + if (ret) \ + return ret; \ + \ + return scnprintf(buf, PAGE_SIZE, "%llu\n", \ + (u64)st_name.member_name); \ + } \ + define_one_cppc_ro(member_name) + +show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, highest_perf); +show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_perf); +show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_perf); +show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_nonlinear_perf); +show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, reference_perf); +show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); + static ssize_t show_feedback_ctrs(struct kobject *kobj, struct attribute *attr, char *buf) { struct cpc_desc *cpc_ptr = to_cpc_desc(kobj); struct cppc_perf_fb_ctrs fb_ctrs = {0}; + int ret; - cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs); + ret = cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs); + if (ret) + return ret; return scnprintf(buf, PAGE_SIZE, "ref:%llu del:%llu\n", fb_ctrs.reference, fb_ctrs.delivered); } define_one_cppc_ro(feedback_ctrs); -static ssize_t show_reference_perf(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct cpc_desc *cpc_ptr = to_cpc_desc(kobj); - struct cppc_perf_fb_ctrs fb_ctrs = {0}; - - cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", - fb_ctrs.reference_perf); -} -define_one_cppc_ro(reference_perf); - -static ssize_t show_wraparound_time(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct cpc_desc *cpc_ptr = to_cpc_desc(kobj); - struct cppc_perf_fb_ctrs fb_ctrs = {0}; - - cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", fb_ctrs.ctr_wrap_time); - -} -define_one_cppc_ro(wraparound_time); - static struct attribute *cppc_attrs[] = { &feedback_ctrs.attr, &reference_perf.attr, &wraparound_time.attr, + &highest_perf.attr, + &lowest_perf.attr, + &lowest_nonlinear_perf.attr, + &nominal_perf.attr, NULL }; @@ -972,9 +977,9 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); - struct cpc_register_resource *highest_reg, *lowest_reg, *ref_perf, - *nom_perf; - u64 high, low, nom; + struct cpc_register_resource *highest_reg, *lowest_reg, + *lowest_non_linear_reg, *nominal_reg; + u64 high, low, nom, min_nonlinear; int ret = 0, regs_in_pcc = 0; if (!cpc_desc) { @@ -984,12 +989,12 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) highest_reg = &cpc_desc->cpc_regs[HIGHEST_PERF]; lowest_reg = &cpc_desc->cpc_regs[LOWEST_PERF]; - ref_perf = &cpc_desc->cpc_regs[REFERENCE_PERF]; - nom_perf = &cpc_desc->cpc_regs[NOMINAL_PERF]; + lowest_non_linear_reg = &cpc_desc->cpc_regs[LOW_NON_LINEAR_PERF]; + nominal_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; /* Are any of the regs PCC ?*/ if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) || - CPC_IN_PCC(ref_perf) || CPC_IN_PCC(nom_perf)) { + CPC_IN_PCC(lowest_non_linear_reg) || CPC_IN_PCC(nominal_reg)) { regs_in_pcc = 1; down_write(&pcc_data.pcc_lock); /* Ring doorbell once to update PCC subspace */ @@ -1005,10 +1010,13 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) cpc_read(cpunum, lowest_reg, &low); perf_caps->lowest_perf = low; - cpc_read(cpunum, nom_perf, &nom); + cpc_read(cpunum, nominal_reg, &nom); perf_caps->nominal_perf = nom; - if (!high || !low || !nom) + cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear); + perf_caps->lowest_nonlinear_perf = min_nonlinear; + + if (!high || !low || !nom || !min_nonlinear) ret = -EFAULT; out_err: @@ -1083,7 +1091,7 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) perf_fb_ctrs->delivered = delivered; perf_fb_ctrs->reference = reference; perf_fb_ctrs->reference_perf = ref_perf; - perf_fb_ctrs->ctr_wrap_time = ctr_wrap_time; + perf_fb_ctrs->wraparound_time = ctr_wrap_time; out_err: if (regs_in_pcc) up_write(&pcc_data.pcc_lock); diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index f15900132912..66229ffa909b 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -65,8 +65,6 @@ static inline void acpi_cmos_rtc_init(void) {} #endif int acpi_rev_override_setup(char *str); -extern bool acpi_force_hot_remove; - void acpi_sysfs_add_hotplug_profile(struct acpi_hotplug_profile *hotplug, const char *name); int acpi_scan_add_handler_with_hotplug(struct acpi_scan_handler *handler, diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 662036bdc65e..c8ea9d698cd0 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1617,7 +1617,11 @@ static int cmp_map(const void *m0, const void *m1) const struct nfit_set_info_map *map0 = m0; const struct nfit_set_info_map *map1 = m1; - return map0->region_offset - map1->region_offset; + if (map0->region_offset < map1->region_offset) + return -1; + else if (map0->region_offset > map1->region_offset) + return 1; + return 0; } /* Retrieve the nth entry referencing this spa */ diff --git a/drivers/acpi/pmic/intel_pmic_chtwc.c b/drivers/acpi/pmic/intel_pmic_chtwc.c new file mode 100644 index 000000000000..85636d7a9d39 --- /dev/null +++ b/drivers/acpi/pmic/intel_pmic_chtwc.c @@ -0,0 +1,280 @@ +/* + * Intel CHT Whiskey Cove PMIC operation region driver + * Copyright (C) 2017 Hans de Goede <hdegoede@redhat.com> + * + * Based on various non upstream patches to support the CHT Whiskey Cove PMIC: + * Copyright (C) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/acpi.h> +#include <linux/init.h> +#include <linux/mfd/intel_soc_pmic.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include "intel_pmic.h" + +#define CHT_WC_V1P05A_CTRL 0x6e3b +#define CHT_WC_V1P15_CTRL 0x6e3c +#define CHT_WC_V1P05A_VSEL 0x6e3d +#define CHT_WC_V1P15_VSEL 0x6e3e +#define CHT_WC_V1P8A_CTRL 0x6e56 +#define CHT_WC_V1P8SX_CTRL 0x6e57 +#define CHT_WC_VDDQ_CTRL 0x6e58 +#define CHT_WC_V1P2A_CTRL 0x6e59 +#define CHT_WC_V1P2SX_CTRL 0x6e5a +#define CHT_WC_V1P8A_VSEL 0x6e5b +#define CHT_WC_VDDQ_VSEL 0x6e5c +#define CHT_WC_V2P8SX_CTRL 0x6e5d +#define CHT_WC_V3P3A_CTRL 0x6e5e +#define CHT_WC_V3P3SD_CTRL 0x6e5f +#define CHT_WC_VSDIO_CTRL 0x6e67 +#define CHT_WC_V3P3A_VSEL 0x6e68 +#define CHT_WC_VPROG1A_CTRL 0x6e90 +#define CHT_WC_VPROG1B_CTRL 0x6e91 +#define CHT_WC_VPROG1F_CTRL 0x6e95 +#define CHT_WC_VPROG2D_CTRL 0x6e99 +#define CHT_WC_VPROG3A_CTRL 0x6e9a +#define CHT_WC_VPROG3B_CTRL 0x6e9b +#define CHT_WC_VPROG4A_CTRL 0x6e9c +#define CHT_WC_VPROG4B_CTRL 0x6e9d +#define CHT_WC_VPROG4C_CTRL 0x6e9e +#define CHT_WC_VPROG4D_CTRL 0x6e9f +#define CHT_WC_VPROG5A_CTRL 0x6ea0 +#define CHT_WC_VPROG5B_CTRL 0x6ea1 +#define CHT_WC_VPROG6A_CTRL 0x6ea2 +#define CHT_WC_VPROG6B_CTRL 0x6ea3 +#define CHT_WC_VPROG1A_VSEL 0x6ec0 +#define CHT_WC_VPROG1B_VSEL 0x6ec1 +#define CHT_WC_V1P8SX_VSEL 0x6ec2 +#define CHT_WC_V1P2SX_VSEL 0x6ec3 +#define CHT_WC_V1P2A_VSEL 0x6ec4 +#define CHT_WC_VPROG1F_VSEL 0x6ec5 +#define CHT_WC_VSDIO_VSEL 0x6ec6 +#define CHT_WC_V2P8SX_VSEL 0x6ec7 +#define CHT_WC_V3P3SD_VSEL 0x6ec8 +#define CHT_WC_VPROG2D_VSEL 0x6ec9 +#define CHT_WC_VPROG3A_VSEL 0x6eca +#define CHT_WC_VPROG3B_VSEL 0x6ecb +#define CHT_WC_VPROG4A_VSEL 0x6ecc +#define CHT_WC_VPROG4B_VSEL 0x6ecd +#define CHT_WC_VPROG4C_VSEL 0x6ece +#define CHT_WC_VPROG4D_VSEL 0x6ecf +#define CHT_WC_VPROG5A_VSEL 0x6ed0 +#define CHT_WC_VPROG5B_VSEL 0x6ed1 +#define CHT_WC_VPROG6A_VSEL 0x6ed2 +#define CHT_WC_VPROG6B_VSEL 0x6ed3 + +/* + * Regulator support is based on the non upstream patch: + * "regulator: whiskey_cove: implements Whiskey Cove pmic VRF support" + * https://github.com/intel-aero/meta-intel-aero/blob/master/recipes-kernel/linux/linux-yocto/0019-regulator-whiskey_cove-implements-WhiskeyCove-pmic-V.patch + */ +static struct pmic_table power_table[] = { + { + .address = 0x0, + .reg = CHT_WC_V1P8A_CTRL, + .bit = 0x01, + }, /* V18A */ + { + .address = 0x04, + .reg = CHT_WC_V1P8SX_CTRL, + .bit = 0x07, + }, /* V18X */ + { + .address = 0x08, + .reg = CHT_WC_VDDQ_CTRL, + .bit = 0x01, + }, /* VDDQ */ + { + .address = 0x0c, + .reg = CHT_WC_V1P2A_CTRL, + .bit = 0x07, + }, /* V12A */ + { + .address = 0x10, + .reg = CHT_WC_V1P2SX_CTRL, + .bit = 0x07, + }, /* V12X */ + { + .address = 0x14, + .reg = CHT_WC_V2P8SX_CTRL, + .bit = 0x07, + }, /* V28X */ + { + .address = 0x18, + .reg = CHT_WC_V3P3A_CTRL, + .bit = 0x01, + }, /* V33A */ + { + .address = 0x1c, + .reg = CHT_WC_V3P3SD_CTRL, + .bit = 0x07, + }, /* V3SD */ + { + .address = 0x20, + .reg = CHT_WC_VSDIO_CTRL, + .bit = 0x07, + }, /* VSD */ +/* { + .address = 0x24, + .reg = ??, + .bit = ??, + }, ** VSW2 */ +/* { + .address = 0x28, + .reg = ??, + .bit = ??, + }, ** VSW1 */ +/* { + .address = 0x2c, + .reg = ??, + .bit = ??, + }, ** VUPY */ +/* { + .address = 0x30, + .reg = ??, + .bit = ??, + }, ** VRSO */ + { + .address = 0x34, + .reg = CHT_WC_VPROG1A_CTRL, + .bit = 0x07, + }, /* VP1A */ + { + .address = 0x38, + .reg = CHT_WC_VPROG1B_CTRL, + .bit = 0x07, + }, /* VP1B */ + { + .address = 0x3c, + .reg = CHT_WC_VPROG1F_CTRL, + .bit = 0x07, + }, /* VP1F */ + { + .address = 0x40, + .reg = CHT_WC_VPROG2D_CTRL, + .bit = 0x07, + }, /* VP2D */ + { + .address = 0x44, + .reg = CHT_WC_VPROG3A_CTRL, + .bit = 0x07, + }, /* VP3A */ + { + .address = 0x48, + .reg = CHT_WC_VPROG3B_CTRL, + .bit = 0x07, + }, /* VP3B */ + { + .address = 0x4c, + .reg = CHT_WC_VPROG4A_CTRL, + .bit = 0x07, + }, /* VP4A */ + { + .address = 0x50, + .reg = CHT_WC_VPROG4B_CTRL, + .bit = 0x07, + }, /* VP4B */ + { + .address = 0x54, + .reg = CHT_WC_VPROG4C_CTRL, + .bit = 0x07, + }, /* VP4C */ + { + .address = 0x58, + .reg = CHT_WC_VPROG4D_CTRL, + .bit = 0x07, + }, /* VP4D */ + { + .address = 0x5c, + .reg = CHT_WC_VPROG5A_CTRL, + .bit = 0x07, + }, /* VP5A */ + { + .address = 0x60, + .reg = CHT_WC_VPROG5B_CTRL, + .bit = 0x07, + }, /* VP5B */ + { + .address = 0x64, + .reg = CHT_WC_VPROG6A_CTRL, + .bit = 0x07, + }, /* VP6A */ + { + .address = 0x68, + .reg = CHT_WC_VPROG6B_CTRL, + .bit = 0x07, + }, /* VP6B */ +/* { + .address = 0x6c, + .reg = ??, + .bit = ??, + } ** VP7A */ +}; + +static int intel_cht_wc_pmic_get_power(struct regmap *regmap, int reg, + int bit, u64 *value) +{ + int data; + + if (regmap_read(regmap, reg, &data)) + return -EIO; + + *value = (data & bit) ? 1 : 0; + return 0; +} + +static int intel_cht_wc_pmic_update_power(struct regmap *regmap, int reg, + int bitmask, bool on) +{ + return regmap_update_bits(regmap, reg, bitmask, on ? 1 : 0); +} + +/* + * The thermal table and ops are empty, we do not support the Thermal opregion + * (DPTF) due to lacking documentation. + */ +static struct intel_pmic_opregion_data intel_cht_wc_pmic_opregion_data = { + .get_power = intel_cht_wc_pmic_get_power, + .update_power = intel_cht_wc_pmic_update_power, + .power_table = power_table, + .power_table_count = ARRAY_SIZE(power_table), +}; + +static int intel_cht_wc_pmic_opregion_probe(struct platform_device *pdev) +{ + struct intel_soc_pmic *pmic = dev_get_drvdata(pdev->dev.parent); + + return intel_pmic_install_opregion_handler(&pdev->dev, + ACPI_HANDLE(pdev->dev.parent), + pmic->regmap, + &intel_cht_wc_pmic_opregion_data); +} + +static struct platform_device_id cht_wc_opregion_id_table[] = { + { .name = "cht_wcove_region" }, + {}, +}; +MODULE_DEVICE_TABLE(platform, cht_wc_opregion_id_table); + +static struct platform_driver intel_cht_wc_pmic_opregion_driver = { + .probe = intel_cht_wc_pmic_opregion_probe, + .driver = { + .name = "cht_whiskey_cove_pmic", + }, + .id_table = cht_wc_opregion_id_table, +}; +module_platform_driver(intel_cht_wc_pmic_opregion_driver); + +MODULE_DESCRIPTION("Intel CHT Whiskey Cove PMIC operation region driver"); +MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/acpi/pmic/intel_pmic_xpower.c b/drivers/acpi/pmic/intel_pmic_xpower.c index e6e991ac20f3..55f51115f016 100644 --- a/drivers/acpi/pmic/intel_pmic_xpower.c +++ b/drivers/acpi/pmic/intel_pmic_xpower.c @@ -18,7 +18,6 @@ #include <linux/mfd/axp20x.h> #include <linux/regmap.h> #include <linux/platform_device.h> -#include <linux/iio/consumer.h> #include "intel_pmic.h" #define XPOWER_GPADC_LOW 0x5b @@ -186,28 +185,16 @@ static int intel_xpower_pmic_update_power(struct regmap *regmap, int reg, * @regmap: regmap of the PMIC device * @reg: register to get the reading * - * We could get the sensor value by manipulating the HW regs here, but since - * the axp288 IIO driver may also access the same regs at the same time, the - * APIs provided by IIO subsystem are used here instead to avoid problems. As - * a result, the two passed in params are of no actual use. - * * Return a positive value on success, errno on failure. */ static int intel_xpower_pmic_get_raw_temp(struct regmap *regmap, int reg) { - struct iio_channel *gpadc_chan; - int ret, val; - - gpadc_chan = iio_channel_get(NULL, "axp288-system-temp"); - if (IS_ERR_OR_NULL(gpadc_chan)) - return -EACCES; + u8 buf[2]; - ret = iio_read_channel_raw(gpadc_chan, &val); - if (ret < 0) - val = ret; + if (regmap_bulk_read(regmap, AXP288_GP_ADC_H, buf, 2)) + return -EIO; - iio_channel_release(gpadc_chan); - return val; + return (buf[0] << 4) + ((buf[1] >> 4) & 0x0F); } static struct intel_pmic_opregion_data intel_xpower_pmic_opregion_data = { diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index fcd4ce6f78d5..1c2b846c5776 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -200,6 +200,7 @@ static int acpi_power_get_list_state(struct list_head *list, int *state) return -EINVAL; /* The state of the list is 'on' IFF all resources are 'on'. */ + cur_state = 0; list_for_each_entry(entry, list, node) { struct acpi_power_resource *resource = entry->resource; acpi_handle handle = resource->device.handle; diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 3afddcd834ef..9364398204e9 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -37,14 +37,16 @@ static const u8 ads_uuid[16] = { static bool acpi_enumerate_nondev_subnodes(acpi_handle scope, const union acpi_object *desc, - struct acpi_device_data *data); + struct acpi_device_data *data, + struct fwnode_handle *parent); static bool acpi_extract_properties(const union acpi_object *desc, struct acpi_device_data *data); static bool acpi_nondev_subnode_extract(const union acpi_object *desc, acpi_handle handle, const union acpi_object *link, - struct list_head *list) + struct list_head *list, + struct fwnode_handle *parent) { struct acpi_data_node *dn; bool result; @@ -55,6 +57,7 @@ static bool acpi_nondev_subnode_extract(const union acpi_object *desc, dn->name = link->package.elements[0].string.pointer; dn->fwnode.type = FWNODE_ACPI_DATA; + dn->parent = parent; INIT_LIST_HEAD(&dn->data.subnodes); result = acpi_extract_properties(desc, &dn->data); @@ -71,9 +74,11 @@ static bool acpi_nondev_subnode_extract(const union acpi_object *desc, */ status = acpi_get_parent(handle, &scope); if (ACPI_SUCCESS(status) - && acpi_enumerate_nondev_subnodes(scope, desc, &dn->data)) + && acpi_enumerate_nondev_subnodes(scope, desc, &dn->data, + &dn->fwnode)) result = true; - } else if (acpi_enumerate_nondev_subnodes(NULL, desc, &dn->data)) { + } else if (acpi_enumerate_nondev_subnodes(NULL, desc, &dn->data, + &dn->fwnode)) { result = true; } @@ -91,7 +96,8 @@ static bool acpi_nondev_subnode_extract(const union acpi_object *desc, static bool acpi_nondev_subnode_data_ok(acpi_handle handle, const union acpi_object *link, - struct list_head *list) + struct list_head *list, + struct fwnode_handle *parent) { struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; acpi_status status; @@ -101,7 +107,8 @@ static bool acpi_nondev_subnode_data_ok(acpi_handle handle, if (ACPI_FAILURE(status)) return false; - if (acpi_nondev_subnode_extract(buf.pointer, handle, link, list)) + if (acpi_nondev_subnode_extract(buf.pointer, handle, link, list, + parent)) return true; ACPI_FREE(buf.pointer); @@ -110,7 +117,8 @@ static bool acpi_nondev_subnode_data_ok(acpi_handle handle, static bool acpi_nondev_subnode_ok(acpi_handle scope, const union acpi_object *link, - struct list_head *list) + struct list_head *list, + struct fwnode_handle *parent) { acpi_handle handle; acpi_status status; @@ -123,12 +131,13 @@ static bool acpi_nondev_subnode_ok(acpi_handle scope, if (ACPI_FAILURE(status)) return false; - return acpi_nondev_subnode_data_ok(handle, link, list); + return acpi_nondev_subnode_data_ok(handle, link, list, parent); } static int acpi_add_nondev_subnodes(acpi_handle scope, const union acpi_object *links, - struct list_head *list) + struct list_head *list, + struct fwnode_handle *parent) { bool ret = false; int i; @@ -150,15 +159,18 @@ static int acpi_add_nondev_subnodes(acpi_handle scope, /* The second one may be a string, a reference or a package. */ switch (link->package.elements[1].type) { case ACPI_TYPE_STRING: - result = acpi_nondev_subnode_ok(scope, link, list); + result = acpi_nondev_subnode_ok(scope, link, list, + parent); break; case ACPI_TYPE_LOCAL_REFERENCE: handle = link->package.elements[1].reference.handle; - result = acpi_nondev_subnode_data_ok(handle, link, list); + result = acpi_nondev_subnode_data_ok(handle, link, list, + parent); break; case ACPI_TYPE_PACKAGE: desc = &link->package.elements[1]; - result = acpi_nondev_subnode_extract(desc, NULL, link, list); + result = acpi_nondev_subnode_extract(desc, NULL, link, + list, parent); break; default: result = false; @@ -172,7 +184,8 @@ static int acpi_add_nondev_subnodes(acpi_handle scope, static bool acpi_enumerate_nondev_subnodes(acpi_handle scope, const union acpi_object *desc, - struct acpi_device_data *data) + struct acpi_device_data *data, + struct fwnode_handle *parent) { int i; @@ -194,7 +207,8 @@ static bool acpi_enumerate_nondev_subnodes(acpi_handle scope, if (memcmp(uuid->buffer.pointer, ads_uuid, sizeof(ads_uuid))) continue; - return acpi_add_nondev_subnodes(scope, links, &data->subnodes); + return acpi_add_nondev_subnodes(scope, links, &data->subnodes, + parent); } return false; @@ -345,7 +359,8 @@ void acpi_init_properties(struct acpi_device *adev) if (acpi_of) acpi_init_of_compatible(adev); } - if (acpi_enumerate_nondev_subnodes(adev->handle, buf.pointer, &adev->data)) + if (acpi_enumerate_nondev_subnodes(adev->handle, buf.pointer, + &adev->data, acpi_fwnode_handle(adev))) adev->data.pointer = buf.pointer; if (!adev->data.pointer) { @@ -699,6 +714,8 @@ static int acpi_data_prop_read_single(struct acpi_device_data *data, return ret; *(char **)val = obj->string.pointer; + + return 1; } else { ret = -EINVAL; } @@ -708,7 +725,15 @@ static int acpi_data_prop_read_single(struct acpi_device_data *data, int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val) { - return adev ? acpi_data_prop_read_single(&adev->data, propname, proptype, val) : -EINVAL; + int ret; + + if (!adev) + return -EINVAL; + + ret = acpi_data_prop_read_single(&adev->data, propname, proptype, val); + if (ret < 0 || proptype != ACPI_TYPE_STRING) + return ret; + return 0; } static int acpi_copy_property_array_u8(const union acpi_object *items, u8 *val, @@ -784,7 +809,7 @@ static int acpi_copy_property_array_string(const union acpi_object *items, val[i] = items[i].string.pointer; } - return 0; + return nval; } static int acpi_data_prop_read(struct acpi_device_data *data, @@ -798,7 +823,7 @@ static int acpi_data_prop_read(struct acpi_device_data *data, if (val && nval == 1) { ret = acpi_data_prop_read_single(data, propname, proptype, val); - if (!ret) + if (ret >= 0) return ret; } @@ -809,7 +834,7 @@ static int acpi_data_prop_read(struct acpi_device_data *data, if (!val) return obj->package.count; - if (nval > obj->package.count) + if (proptype != DEV_PROP_STRING && nval > obj->package.count) return -EOVERFLOW; else if (nval <= 0) return -EINVAL; @@ -830,7 +855,9 @@ static int acpi_data_prop_read(struct acpi_device_data *data, ret = acpi_copy_property_array_u64(items, (u64 *)val, nval); break; case DEV_PROP_STRING: - ret = acpi_copy_property_array_string(items, (char **)val, nval); + ret = acpi_copy_property_array_string( + items, (char **)val, + min_t(u32, nval, obj->package.count)); break; default: ret = -EINVAL; @@ -865,21 +892,22 @@ int acpi_node_prop_read(struct fwnode_handle *fwnode, const char *propname, } /** - * acpi_get_next_subnode - Return the next child node handle for a device. - * @dev: Device to find the next child node for. + * acpi_get_next_subnode - Return the next child node handle for a fwnode + * @fwnode: Firmware node to find the next child node for. * @child: Handle to one of the device's child nodes or a null handle. */ -struct fwnode_handle *acpi_get_next_subnode(struct device *dev, +struct fwnode_handle *acpi_get_next_subnode(struct fwnode_handle *fwnode, struct fwnode_handle *child) { - struct acpi_device *adev = ACPI_COMPANION(dev); + struct acpi_device *adev = to_acpi_device_node(fwnode); struct list_head *head, *next; - if (!adev) - return NULL; - if (!child || child->type == FWNODE_ACPI) { - head = &adev->children; + if (adev) + head = &adev->children; + else + goto nondev; + if (list_empty(head)) goto nondev; @@ -888,7 +916,6 @@ struct fwnode_handle *acpi_get_next_subnode(struct device *dev, next = adev->node.next; if (next == head) { child = NULL; - adev = ACPI_COMPANION(dev); goto nondev; } adev = list_entry(next, struct acpi_device, node); @@ -900,9 +927,16 @@ struct fwnode_handle *acpi_get_next_subnode(struct device *dev, nondev: if (!child || child->type == FWNODE_ACPI_DATA) { + struct acpi_data_node *data = to_acpi_data_node(fwnode); struct acpi_data_node *dn; - head = &adev->data.subnodes; + if (adev) + head = &adev->data.subnodes; + else if (data) + head = &data->data.subnodes; + else + return NULL; + if (list_empty(head)) return NULL; @@ -920,3 +954,168 @@ struct fwnode_handle *acpi_get_next_subnode(struct device *dev, } return NULL; } + +/** + * acpi_node_get_parent - Return parent fwnode of this fwnode + * @fwnode: Firmware node whose parent to get + * + * Returns parent node of an ACPI device or data firmware node or %NULL if + * not available. + */ +struct fwnode_handle *acpi_node_get_parent(struct fwnode_handle *fwnode) +{ + if (is_acpi_data_node(fwnode)) { + /* All data nodes have parent pointer so just return that */ + return to_acpi_data_node(fwnode)->parent; + } else if (is_acpi_device_node(fwnode)) { + acpi_handle handle, parent_handle; + + handle = to_acpi_device_node(fwnode)->handle; + if (ACPI_SUCCESS(acpi_get_parent(handle, &parent_handle))) { + struct acpi_device *adev; + + if (!acpi_bus_get_device(parent_handle, &adev)) + return acpi_fwnode_handle(adev); + } + } + + return NULL; +} + +/** + * acpi_graph_get_next_endpoint - Get next endpoint ACPI firmware node + * @fwnode: Pointer to the parent firmware node + * @prev: Previous endpoint node or %NULL to get the first + * + * Looks up next endpoint ACPI firmware node below a given @fwnode. Returns + * %NULL if there is no next endpoint, ERR_PTR() in case of error. In case + * of success the next endpoint is returned. + */ +struct fwnode_handle *acpi_graph_get_next_endpoint(struct fwnode_handle *fwnode, + struct fwnode_handle *prev) +{ + struct fwnode_handle *port = NULL; + struct fwnode_handle *endpoint; + + if (!prev) { + do { + port = fwnode_get_next_child_node(fwnode, port); + /* Ports must have port property */ + if (fwnode_property_present(port, "port")) + break; + } while (port); + } else { + port = fwnode_get_parent(prev); + } + + if (!port) + return NULL; + + endpoint = fwnode_get_next_child_node(port, prev); + while (!endpoint) { + port = fwnode_get_next_child_node(fwnode, port); + if (!port) + break; + if (fwnode_property_present(port, "port")) + endpoint = fwnode_get_next_child_node(port, NULL); + } + + if (endpoint) { + /* Endpoints must have "endpoint" property */ + if (!fwnode_property_present(endpoint, "endpoint")) + return ERR_PTR(-EPROTO); + } + + return endpoint; +} + +/** + * acpi_graph_get_child_prop_value - Return a child with a given property value + * @fwnode: device fwnode + * @prop_name: The name of the property to look for + * @val: the desired property value + * + * Return the port node corresponding to a given port number. Returns + * the child node on success, NULL otherwise. + */ +static struct fwnode_handle *acpi_graph_get_child_prop_value( + struct fwnode_handle *fwnode, const char *prop_name, unsigned int val) +{ + struct fwnode_handle *child; + + fwnode_for_each_child_node(fwnode, child) { + u32 nr; + + if (!fwnode_property_read_u32(fwnode, prop_name, &nr)) + continue; + + if (val == nr) + return child; + } + + return NULL; +} + + +/** + * acpi_graph_get_remote_enpoint - Parses and returns remote end of an endpoint + * @fwnode: Endpoint firmware node pointing to a remote device + * @parent: Firmware node of remote port parent is filled here if not %NULL + * @port: Firmware node of remote port is filled here if not %NULL + * @endpoint: Firmware node of remote endpoint is filled here if not %NULL + * + * Function parses remote end of ACPI firmware remote endpoint and fills in + * fields requested by the caller. Returns %0 in case of success and + * negative errno otherwise. + */ +int acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode, + struct fwnode_handle **parent, + struct fwnode_handle **port, + struct fwnode_handle **endpoint) +{ + unsigned int port_nr, endpoint_nr; + struct acpi_reference_args args; + int ret; + + memset(&args, 0, sizeof(args)); + ret = acpi_node_get_property_reference(fwnode, "remote-endpoint", 0, + &args); + if (ret) + return ret; + + /* + * Always require two arguments with the reference: port and + * endpoint indices. + */ + if (args.nargs != 2) + return -EPROTO; + + fwnode = acpi_fwnode_handle(args.adev); + port_nr = args.args[0]; + endpoint_nr = args.args[1]; + + if (parent) + *parent = fwnode; + + if (!port && !endpoint) + return 0; + + fwnode = acpi_graph_get_child_prop_value(fwnode, "port", port_nr); + if (!fwnode) + return -EPROTO; + + if (port) + *port = fwnode; + + if (!endpoint) + return 0; + + fwnode = acpi_graph_get_child_prop_value(fwnode, "endpoint", + endpoint_nr); + if (!fwnode) + return -EPROTO; + + *endpoint = fwnode; + + return 0; +} diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 192691880d55..c26931067415 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -30,12 +30,6 @@ extern struct acpi_device *acpi_root; #define INVALID_ACPI_HANDLE ((acpi_handle)empty_zero_page) -/* - * If set, devices will be hot-removed even if they cannot be put offline - * gracefully (from the kernel's standpoint). - */ -bool acpi_force_hot_remove; - static const char *dummy_hid = "device"; static LIST_HEAD(acpi_dep_list); @@ -170,9 +164,6 @@ static acpi_status acpi_bus_offline(acpi_handle handle, u32 lvl, void *data, pn->put_online = false; } ret = device_offline(pn->dev); - if (acpi_force_hot_remove) - continue; - if (ret >= 0) { pn->put_online = !ret; } else { @@ -241,11 +232,11 @@ static int acpi_scan_try_to_offline(struct acpi_device *device) acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX, NULL, acpi_bus_offline, (void *)true, (void **)&errdev); - if (!errdev || acpi_force_hot_remove) + if (!errdev) acpi_bus_offline(handle, 0, (void *)true, (void **)&errdev); - if (errdev && !acpi_force_hot_remove) { + if (errdev) { dev_warn(errdev, "Offline failed.\n"); acpi_bus_online(handle, 0, NULL, NULL); acpi_walk_namespace(ACPI_TYPE_ANY, handle, @@ -263,8 +254,7 @@ static int acpi_scan_hot_remove(struct acpi_device *device) unsigned long long sta; acpi_status status; - if (device->handler && device->handler->hotplug.demand_offline - && !acpi_force_hot_remove) { + if (device->handler && device->handler->hotplug.demand_offline) { if (!acpi_scan_is_offline(device, true)) return -EBUSY; } else { @@ -1850,6 +1840,8 @@ static void acpi_bus_attach(struct acpi_device *device) device->flags.power_manageable = 0; device->flags.initialized = true; + } else if (device->flags.visited) { + goto ok; } ret = acpi_scan_attach_handler(device); @@ -1857,15 +1849,20 @@ static void acpi_bus_attach(struct acpi_device *device) return; device->flags.match_driver = true; - if (!ret) { - ret = device_attach(&device->dev); - if (ret < 0) - return; - - if (!ret && device->pnp.type.platform_id) - acpi_default_enumeration(device); + if (ret > 0) { + acpi_device_set_enumerated(device); + goto ok; } + ret = device_attach(&device->dev); + if (ret < 0) + return; + + if (device->pnp.type.platform_id) + acpi_default_enumeration(device); + else + acpi_device_set_enumerated(device); + ok: list_for_each_entry(child, &device->children, node) acpi_bus_attach(child); diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index cf05ae973381..1b5ee1e0e5a3 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -921,7 +921,7 @@ void acpi_sysfs_add_hotplug_profile(struct acpi_hotplug_profile *hotplug, static ssize_t force_remove_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", !!acpi_force_hot_remove); + return sprintf(buf, "%d\n", 0); } static ssize_t force_remove_store(struct kobject *kobj, @@ -935,9 +935,10 @@ static ssize_t force_remove_store(struct kobject *kobj, if (ret < 0) return ret; - lock_device_hotplug(); - acpi_force_hot_remove = val; - unlock_device_hotplug(); + if (val) { + pr_err("Enabling force_remove is not supported anymore. Please report to linux-acpi@vger.kernel.org if you depend on this functionality\n"); + return -EINVAL; + } return size; } diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 2604189d6cd1..0dae722ab2ec 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -311,22 +311,6 @@ acpi_parse_entries_array(char *id, unsigned long table_size, } int __init -acpi_parse_entries(char *id, - unsigned long table_size, - acpi_tbl_entry_handler handler, - struct acpi_table_header *table_header, - int entry_id, unsigned int max_entries) -{ - struct acpi_subtable_proc proc = { - .id = entry_id, - .handler = handler, - }; - - return acpi_parse_entries_array(id, table_size, table_header, - &proc, 1, max_entries); -} - -int __init acpi_table_parse_entries_array(char *id, unsigned long table_size, struct acpi_subtable_proc *proc, int proc_num, diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 22c09952e177..27d0dcfcf47d 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -736,6 +736,72 @@ bool acpi_dev_found(const char *hid) } EXPORT_SYMBOL(acpi_dev_found); +struct acpi_dev_present_info { + struct acpi_device_id hid[2]; + const char *uid; + s64 hrv; +}; + +static int acpi_dev_present_cb(struct device *dev, void *data) +{ + struct acpi_device *adev = to_acpi_device(dev); + struct acpi_dev_present_info *match = data; + unsigned long long hrv; + acpi_status status; + + if (acpi_match_device_ids(adev, match->hid)) + return 0; + + if (match->uid && (!adev->pnp.unique_id || + strcmp(adev->pnp.unique_id, match->uid))) + return 0; + + if (match->hrv == -1) + return 1; + + status = acpi_evaluate_integer(adev->handle, "_HRV", NULL, &hrv); + if (ACPI_FAILURE(status)) + return 0; + + return hrv == match->hrv; +} + +/** + * acpi_dev_present - Detect that a given ACPI device is present + * @hid: Hardware ID of the device. + * @uid: Unique ID of the device, pass NULL to not check _UID + * @hrv: Hardware Revision of the device, pass -1 to not check _HRV + * + * Return %true if a matching device was present at the moment of invocation. + * Note that if the device is pluggable, it may since have disappeared. + * + * Note that unlike acpi_dev_found() this function checks the status + * of the device. So for devices which are present in the dsdt, but + * which are disabled (their _STA callback returns 0) this function + * will return false. + * + * For this function to work, acpi_bus_scan() must have been executed + * which happens in the subsys_initcall() subsection. Hence, do not + * call from a subsys_initcall() or earlier (use acpi_get_devices() + * instead). Calling from module_init() is fine (which is synonymous + * with device_initcall()). + */ +bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) +{ + struct acpi_dev_present_info match = {}; + struct device *dev; + + strlcpy(match.hid[0].id, hid, sizeof(match.hid[0].id)); + match.uid = uid; + match.hrv = hrv; + + dev = bus_find_device(&acpi_bus_type, NULL, &match, + acpi_dev_present_cb); + + return !!dev; +} +EXPORT_SYMBOL(acpi_dev_present); + /* * acpi_backlight= handling, this is done here rather then in video_detect.c * because __setup cannot be used in modules. diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 70b57d2229d6..ff6cb9e4c381 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -14,7 +14,6 @@ menuconfig ATA tristate "Serial ATA and Parallel ATA drivers (libata)" depends on HAS_IOMEM depends on BLOCK - depends on !(M32R || S390) || BROKEN select SCSI select GLOB ---help--- @@ -118,6 +117,15 @@ config AHCI_DA850 If unsure, say N. +config AHCI_DM816 + tristate "DaVinci DM816 AHCI SATA support" + depends on ARCH_OMAP2PLUS + help + This option enables support for the DaVinci DM816 SoC's + onboard AHCI SATA controller. + + If unsure, say N. + config AHCI_ST tristate "ST AHCI SATA support" depends on ARCH_STI @@ -885,14 +893,6 @@ config PATA_AT32 If unsure, say N. -config PATA_AT91 - tristate "PATA support for AT91SAM9260" - depends on ARM && SOC_AT91SAM9 - help - This option enables support for IDE devices on the Atmel AT91SAM9260 SoC. - - If unsure, say N. - config PATA_CMD640_PCI tristate "CMD640 PCI PATA support (Experimental)" depends on PCI diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 89a0a1915d36..3048cc100a46 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_SATA_HIGHBANK) += sata_highbank.o libahci.o obj-$(CONFIG_AHCI_BRCM) += ahci_brcm.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_CEVA) += ahci_ceva.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_DA850) += ahci_da850.o libahci.o libahci_platform.o +obj-$(CONFIG_AHCI_DM816) += ahci_dm816.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_IMX) += ahci_imx.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_MVEBU) += ahci_mvebu.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_OCTEON) += ahci_octeon.o @@ -91,7 +92,6 @@ obj-$(CONFIG_PATA_WINBOND) += pata_sl82c105.o # SFF PIO only obj-$(CONFIG_PATA_AT32) += pata_at32.o -obj-$(CONFIG_PATA_AT91) += pata_at91.o obj-$(CONFIG_PATA_CMD640_PCI) += pata_cmd640.o obj-$(CONFIG_PATA_FALCON) += pata_falcon.o obj-$(CONFIG_PATA_ISAPNP) += pata_isapnp.o diff --git a/drivers/ata/ahci_dm816.c b/drivers/ata/ahci_dm816.c new file mode 100644 index 000000000000..fbd827c3a75c --- /dev/null +++ b/drivers/ata/ahci_dm816.c @@ -0,0 +1,200 @@ +/* + * DaVinci DM816 AHCI SATA platform driver + * + * Copyright (C) 2017 BayLibre SAS + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/pm.h> +#include <linux/platform_device.h> +#include <linux/libata.h> +#include <linux/ahci_platform.h> + +#include "ahci.h" + +#define AHCI_DM816_DRV_NAME "ahci-dm816" + +#define AHCI_DM816_PHY_ENPLL(x) ((x) << 0) +#define AHCI_DM816_PHY_MPY(x) ((x) << 1) +#define AHCI_DM816_PHY_LOS(x) ((x) << 12) +#define AHCI_DM816_PHY_RXCDR(x) ((x) << 13) +#define AHCI_DM816_PHY_RXEQ(x) ((x) << 16) +#define AHCI_DM816_PHY_TXSWING(x) ((x) << 23) + +#define AHCI_DM816_P0PHYCR_REG 0x178 +#define AHCI_DM816_P1PHYCR_REG 0x1f8 + +#define AHCI_DM816_PLL_OUT 1500000000LU + +static const unsigned long pll_mpy_table[] = { + 400, 500, 600, 800, 825, 1000, 1200, + 1250, 1500, 1600, 1650, 2000, 2200, 2500 +}; + +static int ahci_dm816_get_mpy_bits(unsigned long refclk_rate) +{ + unsigned long pll_multiplier; + int i; + + /* + * We need to determine the value of the multiplier (MPY) bits. + * In order to include the 8.25 multiplier we need to first divide + * the refclk rate by 100. + */ + pll_multiplier = AHCI_DM816_PLL_OUT / (refclk_rate / 100); + + for (i = 0; i < ARRAY_SIZE(pll_mpy_table); i++) { + if (pll_mpy_table[i] == pll_multiplier) + return i; + } + + /* + * We should have divided evenly - if not, return an invalid + * value. + */ + return -1; +} + +static int ahci_dm816_phy_init(struct ahci_host_priv *hpriv, struct device *dev) +{ + unsigned long refclk_rate; + int mpy; + u32 val; + + /* + * We should have been supplied two clocks: the functional and + * keep-alive clock and the external reference clock. We need the + * rate of the latter to calculate the correct value of MPY bits. + */ + if (!hpriv->clks[1]) { + dev_err(dev, "reference clock not supplied\n"); + return -EINVAL; + } + + refclk_rate = clk_get_rate(hpriv->clks[1]); + if ((refclk_rate % 100) != 0) { + dev_err(dev, "reference clock rate must be divisible by 100\n"); + return -EINVAL; + } + + mpy = ahci_dm816_get_mpy_bits(refclk_rate); + if (mpy < 0) { + dev_err(dev, "can't calculate the MPY bits value\n"); + return -EINVAL; + } + + /* Enable the PHY and configure the first HBA port. */ + val = AHCI_DM816_PHY_MPY(mpy) | AHCI_DM816_PHY_LOS(1) | + AHCI_DM816_PHY_RXCDR(4) | AHCI_DM816_PHY_RXEQ(1) | + AHCI_DM816_PHY_TXSWING(3) | AHCI_DM816_PHY_ENPLL(1); + writel(val, hpriv->mmio + AHCI_DM816_P0PHYCR_REG); + + /* Configure the second HBA port. */ + val = AHCI_DM816_PHY_LOS(1) | AHCI_DM816_PHY_RXCDR(4) | + AHCI_DM816_PHY_RXEQ(1) | AHCI_DM816_PHY_TXSWING(3); + writel(val, hpriv->mmio + AHCI_DM816_P1PHYCR_REG); + + return 0; +} + +static int ahci_dm816_softreset(struct ata_link *link, + unsigned int *class, unsigned long deadline) +{ + int pmp, ret; + + pmp = sata_srst_pmp(link); + + /* + * There's an issue with the SATA controller on DM816 SoC: if we + * enable Port Multiplier support, but the drive is connected directly + * to the board, it can't be detected. As a workaround: if PMP is + * enabled, we first call ahci_do_softreset() and pass it the result of + * sata_srst_pmp(). If this call fails, we retry with pmp = 0. + */ + ret = ahci_do_softreset(link, class, pmp, deadline, ahci_check_ready); + if (pmp && ret == -EBUSY) + return ahci_do_softreset(link, class, 0, + deadline, ahci_check_ready); + + return ret; +} + +static struct ata_port_operations ahci_dm816_port_ops = { + .inherits = &ahci_platform_ops, + .softreset = ahci_dm816_softreset, +}; + +static const struct ata_port_info ahci_dm816_port_info = { + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_dm816_port_ops, +}; + +static struct scsi_host_template ahci_dm816_platform_sht = { + AHCI_SHT(AHCI_DM816_DRV_NAME), +}; + +static int ahci_dm816_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ahci_host_priv *hpriv; + int rc; + + hpriv = ahci_platform_get_resources(pdev); + if (IS_ERR(hpriv)) + return PTR_ERR(hpriv); + + rc = ahci_platform_enable_resources(hpriv); + if (rc) + return rc; + + rc = ahci_dm816_phy_init(hpriv, dev); + if (rc) + goto disable_resources; + + rc = ahci_platform_init_host(pdev, hpriv, + &ahci_dm816_port_info, + &ahci_dm816_platform_sht); + if (rc) + goto disable_resources; + + return 0; + +disable_resources: + ahci_platform_disable_resources(hpriv); + + return rc; +} + +static SIMPLE_DEV_PM_OPS(ahci_dm816_pm_ops, + ahci_platform_suspend, + ahci_platform_resume); + +static const struct of_device_id ahci_dm816_of_match[] = { + { .compatible = "ti,dm816-ahci", }, + { }, +}; +MODULE_DEVICE_TABLE(of, ahci_dm816_of_match); + +static struct platform_driver ahci_dm816_driver = { + .probe = ahci_dm816_probe, + .remove = ata_platform_remove_one, + .driver = { + .name = AHCI_DM816_DRV_NAME, + .of_match_table = ahci_dm816_of_match, + .pm = &ahci_dm816_pm_ops, + }, +}; +module_platform_driver(ahci_dm816_driver); + +MODULE_DESCRIPTION("DaVinci DM816 AHCI SATA platform driver"); +MODULE_AUTHOR("Bartosz Golaszewski <bgolaszewski@baylibre.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/ata/ahci_octeon.c b/drivers/ata/ahci_octeon.c index ea865fe953e1..5a44e089c6bb 100644 --- a/drivers/ata/ahci_octeon.c +++ b/drivers/ata/ahci_octeon.c @@ -38,11 +38,6 @@ static int ahci_octeon_probe(struct platform_device *pdev) int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "Platform resource[0] is missing\n"); - return -ENODEV; - } - base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(base)) return PTR_ERR(base); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ca75823697dd..2d83b8c75965 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4910,7 +4910,7 @@ void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, * LOCKING: * spin_lock_irqsave(host lock) */ -void ata_sg_clean(struct ata_queued_cmd *qc) +static void ata_sg_clean(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct scatterlist *sg = qc->sg; @@ -5902,9 +5902,9 @@ struct ata_port *ata_port_alloc(struct ata_host *host) INIT_LIST_HEAD(&ap->eh_done_q); init_waitqueue_head(&ap->eh_wait_q); init_completion(&ap->park_req_pending); - init_timer_deferrable(&ap->fastdrain_timer); - ap->fastdrain_timer.function = ata_eh_fastdrain_timerfn; - ap->fastdrain_timer.data = (unsigned long)ap; + setup_deferrable_timer(&ap->fastdrain_timer, + ata_eh_fastdrain_timerfn, + (unsigned long)ap); ap->cbl = ATA_CBL_NONE; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 1ac70744ae7b..49ba9834c715 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3393,46 +3393,6 @@ static size_t ata_format_dsm_trim_descr(struct scsi_cmnd *cmd, u32 trmax, } /** - * ata_format_dsm_trim_descr() - SATL Write Same to ATA SCT Write Same - * @cmd: SCSI command being translated - * @lba: Starting sector - * @num: Number of sectors to be zero'd. - * - * Rewrite the WRITE SAME payload to be an SCT Write Same formatted - * descriptor. - * NOTE: Writes a pattern (0's) in the foreground. - * - * Return: Number of bytes copied into sglist. - */ -static size_t ata_format_sct_write_same(struct scsi_cmnd *cmd, u64 lba, u64 num) -{ - struct scsi_device *sdp = cmd->device; - size_t len = sdp->sector_size; - size_t r; - u16 *buf; - unsigned long flags; - - spin_lock_irqsave(&ata_scsi_rbuf_lock, flags); - buf = ((void *)ata_scsi_rbuf); - - put_unaligned_le16(0x0002, &buf[0]); /* SCT_ACT_WRITE_SAME */ - put_unaligned_le16(0x0101, &buf[1]); /* WRITE PTRN FG */ - put_unaligned_le64(lba, &buf[2]); - put_unaligned_le64(num, &buf[6]); - put_unaligned_le32(0u, &buf[10]); /* pattern */ - - WARN_ON(len > ATA_SCSI_RBUF_SIZE); - - if (len > ATA_SCSI_RBUF_SIZE) - len = ATA_SCSI_RBUF_SIZE; - - r = sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, len); - spin_unlock_irqrestore(&ata_scsi_rbuf_lock, flags); - - return r; -} - -/** * ata_scsi_write_same_xlat() - SATL Write Same to ATA SCT Write Same * @qc: Command to be translated * @@ -3462,32 +3422,31 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc) if (unlikely(!dev->dma_mode)) goto invalid_opcode; + /* + * We only allow sending this command through the block layer, + * as it modifies the DATA OUT buffer, which would corrupt user + * memory for SG_IO commands. + */ + if (unlikely(blk_rq_is_passthrough(scmd->request))) + goto invalid_opcode; + if (unlikely(scmd->cmd_len < 16)) { fp = 15; goto invalid_fld; } scsi_16_lba_len(cdb, &block, &n_block); - if (unmap) { - /* If trim is not enabled the cmd is invalid. */ - if ((dev->horkage & ATA_HORKAGE_NOTRIM) || - !ata_id_has_trim(dev->id)) { - fp = 1; - bp = 3; - goto invalid_fld; - } - /* If the request is too large the cmd is invalid */ - if (n_block > 0xffff * trmax) { - fp = 2; - goto invalid_fld; - } - } else { - /* If write same is not available the cmd is invalid */ - if (!ata_id_sct_write_same(dev->id)) { - fp = 1; - bp = 3; - goto invalid_fld; - } + if (!unmap || + (dev->horkage & ATA_HORKAGE_NOTRIM) || + !ata_id_has_trim(dev->id)) { + fp = 1; + bp = 3; + goto invalid_fld; + } + /* If the request is too large the cmd is invalid */ + if (n_block > 0xffff * trmax) { + fp = 2; + goto invalid_fld; } /* @@ -3502,49 +3461,28 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc) * For DATA SET MANAGEMENT TRIM in ACS-2 nsect (aka count) * is defined as number of 512 byte blocks to be transferred. */ - if (unmap) { - size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block); - if (size != len) - goto invalid_param_len; - if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) { - /* Newer devices support queued TRIM commands */ - tf->protocol = ATA_PROT_NCQ; - tf->command = ATA_CMD_FPDMA_SEND; - tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f; - tf->nsect = qc->tag << 3; - tf->hob_feature = (size / 512) >> 8; - tf->feature = size / 512; + size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block); + if (size != len) + goto invalid_param_len; - tf->auxiliary = 1; - } else { - tf->protocol = ATA_PROT_DMA; - tf->hob_feature = 0; - tf->feature = ATA_DSM_TRIM; - tf->hob_nsect = (size / 512) >> 8; - tf->nsect = size / 512; - tf->command = ATA_CMD_DSM; - } - } else { - size = ata_format_sct_write_same(scmd, block, n_block); - if (size != len) - goto invalid_param_len; + if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) { + /* Newer devices support queued TRIM commands */ + tf->protocol = ATA_PROT_NCQ; + tf->command = ATA_CMD_FPDMA_SEND; + tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f; + tf->nsect = qc->tag << 3; + tf->hob_feature = (size / 512) >> 8; + tf->feature = size / 512; - tf->hob_feature = 0; - tf->feature = 0; - tf->hob_nsect = 0; - tf->nsect = 1; - tf->lbah = 0; - tf->lbam = 0; - tf->lbal = ATA_CMD_STANDBYNOW1; - tf->hob_lbah = 0; - tf->hob_lbam = 0; - tf->hob_lbal = 0; - tf->device = ATA_CMD_STANDBYNOW1; + tf->auxiliary = 1; + } else { tf->protocol = ATA_PROT_DMA; - tf->command = ATA_CMD_WRITE_LOG_DMA_EXT; - if (unlikely(dev->flags & ATA_DFLAG_PIO)) - tf->command = ATA_CMD_WRITE_LOG_EXT; + tf->hob_feature = 0; + tf->feature = ATA_DSM_TRIM; + tf->hob_nsect = (size / 512) >> 8; + tf->nsect = size / 512; + tf->command = ATA_CMD_DSM; } tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48 | @@ -3619,10 +3557,6 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf) case START_STOP: supported = 3; break; - case WRITE_SAME_16: - if (!ata_id_sct_write_same(dev->id)) - break; - /* fallthrough: if SCT ... only enable for ZBC */ case ZBC_IN: case ZBC_OUT: if (ata_id_zoned_cap(dev->id) || diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c deleted file mode 100644 index fd5b34f0d007..000000000000 --- a/drivers/ata/pata_at91.c +++ /dev/null @@ -1,503 +0,0 @@ -/* - * PATA driver for AT91SAM9260 Static Memory Controller - * with CompactFlash interface in True IDE mode - * - * Copyright (C) 2009 Matyukevich Sergey - * 2011 Igor Plyatov - * - * Based on: - * * generic platform driver by Paul Mundt: drivers/ata/pata_platform.c - * * pata_at32 driver by Kristoffer Nyborg Gregertsen - * * at91_ide driver by Stanislaw Gruszka - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/blkdev.h> -#include <linux/gfp.h> -#include <scsi/scsi_host.h> -#include <linux/ata.h> -#include <linux/clk.h> -#include <linux/libata.h> -#include <linux/mfd/syscon.h> -#include <linux/mfd/syscon/atmel-smc.h> -#include <linux/platform_device.h> -#include <linux/ata_platform.h> -#include <linux/platform_data/atmel.h> -#include <linux/regmap.h> -#include <linux/gpio.h> - -#define DRV_NAME "pata_at91" -#define DRV_VERSION "0.3" - -#define CF_IDE_OFFSET 0x00c00000 -#define CF_ALT_IDE_OFFSET 0x00e00000 -#define CF_IDE_RES_SIZE 0x08 -#define CS_PULSE_MAXIMUM 319 -#define ER_SMC_CALC 1 -#define ER_SMC_RECALC 2 - -struct at91_ide_info { - unsigned long mode; - unsigned int cs; - struct clk *mck; - void __iomem *ide_addr; - void __iomem *alt_addr; -}; - -/** - * struct smc_range - range of valid values for SMC register. - */ -struct smc_range { - int min; - int max; -}; - -struct regmap *smc; - -struct at91sam9_smc_generic_fields { - struct regmap_field *setup; - struct regmap_field *pulse; - struct regmap_field *cycle; - struct regmap_field *mode; -} fields; - -/** - * adjust_smc_value - adjust value for one of SMC registers. - * @value: adjusted value - * @range: array of SMC ranges with valid values - * @size: SMC ranges array size - * - * This returns the difference between input and output value or negative - * in case of invalid input value. - * If negative returned, then output value = maximal possible from ranges. - */ -static int adjust_smc_value(int *value, struct smc_range *range, int size) -{ - int maximum = (range + size - 1)->max; - int remainder; - - do { - if (*value < range->min) { - remainder = range->min - *value; - *value = range->min; /* nearest valid value */ - return remainder; - } else if ((range->min <= *value) && (*value <= range->max)) - return 0; - - range++; - } while (--size); - *value = maximum; - - return -1; /* invalid value */ -} - -/** - * calc_smc_vals - calculate SMC register values - * @dev: ATA device - * @setup: SMC_SETUP register value - * @pulse: SMC_PULSE register value - * @cycle: SMC_CYCLE register value - * - * This returns negative in case of invalid values for SMC registers: - * -ER_SMC_RECALC - recalculation required for SMC values, - * -ER_SMC_CALC - calculation failed (invalid input values). - * - * SMC use special coding scheme, see "Coding and Range of Timing - * Parameters" table from AT91SAM9 datasheets. - * - * SMC_SETUP = 128*setup[5] + setup[4:0] - * SMC_PULSE = 256*pulse[6] + pulse[5:0] - * SMC_CYCLE = 256*cycle[8:7] + cycle[6:0] - */ -static int calc_smc_vals(struct device *dev, - int *setup, int *pulse, int *cycle, int *cs_pulse) -{ - int ret_val; - int err = 0; - struct smc_range range_setup[] = { /* SMC_SETUP valid values */ - {.min = 0, .max = 31}, /* first range */ - {.min = 128, .max = 159} /* second range */ - }; - struct smc_range range_pulse[] = { /* SMC_PULSE valid values */ - {.min = 0, .max = 63}, /* first range */ - {.min = 256, .max = 319} /* second range */ - }; - struct smc_range range_cycle[] = { /* SMC_CYCLE valid values */ - {.min = 0, .max = 127}, /* first range */ - {.min = 256, .max = 383}, /* second range */ - {.min = 512, .max = 639}, /* third range */ - {.min = 768, .max = 895} /* fourth range */ - }; - - ret_val = adjust_smc_value(setup, range_setup, ARRAY_SIZE(range_setup)); - if (ret_val < 0) - dev_warn(dev, "maximal SMC Setup value\n"); - else - *cycle += ret_val; - - ret_val = adjust_smc_value(pulse, range_pulse, ARRAY_SIZE(range_pulse)); - if (ret_val < 0) - dev_warn(dev, "maximal SMC Pulse value\n"); - else - *cycle += ret_val; - - ret_val = adjust_smc_value(cycle, range_cycle, ARRAY_SIZE(range_cycle)); - if (ret_val < 0) - dev_warn(dev, "maximal SMC Cycle value\n"); - - *cs_pulse = *cycle; - if (*cs_pulse > CS_PULSE_MAXIMUM) { - dev_err(dev, "unable to calculate valid SMC settings\n"); - return -ER_SMC_CALC; - } - - ret_val = adjust_smc_value(cs_pulse, range_pulse, - ARRAY_SIZE(range_pulse)); - if (ret_val < 0) { - dev_warn(dev, "maximal SMC CS Pulse value\n"); - } else if (ret_val != 0) { - *cycle = *cs_pulse; - dev_warn(dev, "SMC Cycle extended\n"); - err = -ER_SMC_RECALC; - } - - return err; -} - -/** - * to_smc_format - convert values into SMC format - * @setup: SETUP value of SMC Setup Register - * @pulse: PULSE value of SMC Pulse Register - * @cycle: CYCLE value of SMC Cycle Register - * @cs_pulse: NCS_PULSE value of SMC Pulse Register - */ -static void to_smc_format(int *setup, int *pulse, int *cycle, int *cs_pulse) -{ - *setup = (*setup & 0x1f) | ((*setup & 0x80) >> 2); - *pulse = (*pulse & 0x3f) | ((*pulse & 0x100) >> 2); - *cycle = (*cycle & 0x7f) | ((*cycle & 0x300) >> 1); - *cs_pulse = (*cs_pulse & 0x3f) | ((*cs_pulse & 0x100) >> 2); -} - -static unsigned long calc_mck_cycles(unsigned long ns, unsigned long mck_hz) -{ - unsigned long mul; - - /* - * cycles = x [nsec] * f [Hz] / 10^9 [ns in sec] = - * x * (f / 1_000_000_000) = - * x * ((f * 65536) / 1_000_000_000) / 65536 = - * x * (((f / 10_000) * 65536) / 100_000) / 65536 = - */ - - mul = (mck_hz / 10000) << 16; - mul /= 100000; - - return (ns * mul + 65536) >> 16; /* rounding */ -} - -/** - * set_smc_timing - SMC timings setup. - * @dev: device - * @info: AT91 IDE info - * @ata: ATA timings - * - * Its assumed that write timings are same as read timings, - * cs_setup = 0 and cs_pulse = cycle. - */ -static void set_smc_timing(struct device *dev, struct ata_device *adev, - struct at91_ide_info *info, const struct ata_timing *ata) -{ - int ret = 0; - int use_iordy; - unsigned int t6z; /* data tristate time in ns */ - unsigned int cycle; /* SMC Cycle width in MCK ticks */ - unsigned int setup; /* SMC Setup width in MCK ticks */ - unsigned int pulse; /* CFIOR and CFIOW pulse width in MCK ticks */ - unsigned int cs_pulse; /* CS4 or CS5 pulse width in MCK ticks*/ - unsigned int tdf_cycles; /* SMC TDF MCK ticks */ - unsigned long mck_hz; /* MCK frequency in Hz */ - - t6z = (ata->mode < XFER_PIO_5) ? 30 : 20; - mck_hz = clk_get_rate(info->mck); - cycle = calc_mck_cycles(ata->cyc8b, mck_hz); - setup = calc_mck_cycles(ata->setup, mck_hz); - pulse = calc_mck_cycles(ata->act8b, mck_hz); - tdf_cycles = calc_mck_cycles(t6z, mck_hz); - - do { - ret = calc_smc_vals(dev, &setup, &pulse, &cycle, &cs_pulse); - } while (ret == -ER_SMC_RECALC); - - if (ret == -ER_SMC_CALC) - dev_err(dev, "Interface may not operate correctly\n"); - - dev_dbg(dev, "SMC Setup=%u, Pulse=%u, Cycle=%u, CS Pulse=%u\n", - setup, pulse, cycle, cs_pulse); - to_smc_format(&setup, &pulse, &cycle, &cs_pulse); - /* disable or enable waiting for IORDY signal */ - use_iordy = ata_pio_need_iordy(adev); - if (use_iordy) - info->mode |= AT91_SMC_EXNWMODE_READY; - - if (tdf_cycles > 15) { - tdf_cycles = 15; - dev_warn(dev, "maximal SMC TDF Cycles value\n"); - } - - dev_dbg(dev, "Use IORDY=%u, TDF Cycles=%u\n", use_iordy, tdf_cycles); - - regmap_fields_write(fields.setup, info->cs, - AT91SAM9_SMC_NRDSETUP(setup) | - AT91SAM9_SMC_NWESETUP(setup) | - AT91SAM9_SMC_NCS_NRDSETUP(0) | - AT91SAM9_SMC_NCS_WRSETUP(0)); - regmap_fields_write(fields.pulse, info->cs, - AT91SAM9_SMC_NRDPULSE(pulse) | - AT91SAM9_SMC_NWEPULSE(pulse) | - AT91SAM9_SMC_NCS_NRDPULSE(cs_pulse) | - AT91SAM9_SMC_NCS_WRPULSE(cs_pulse)); - regmap_fields_write(fields.cycle, info->cs, - AT91SAM9_SMC_NRDCYCLE(cycle) | - AT91SAM9_SMC_NWECYCLE(cycle)); - regmap_fields_write(fields.mode, info->cs, info->mode | - AT91_SMC_TDF_(tdf_cycles)); -} - -static void pata_at91_set_piomode(struct ata_port *ap, struct ata_device *adev) -{ - struct at91_ide_info *info = ap->host->private_data; - struct ata_timing timing; - int ret; - - /* Compute ATA timing and set it to SMC */ - ret = ata_timing_compute(adev, adev->pio_mode, &timing, 1000, 0); - if (ret) { - dev_warn(ap->dev, "Failed to compute ATA timing %d, " - "set PIO_0 timing\n", ret); - timing = *ata_timing_find_mode(XFER_PIO_0); - } - set_smc_timing(ap->dev, adev, info, &timing); -} - -static unsigned int pata_at91_data_xfer_noirq(struct ata_queued_cmd *qc, - unsigned char *buf, unsigned int buflen, int rw) -{ - struct at91_ide_info *info = qc->dev->link->ap->host->private_data; - unsigned int consumed; - unsigned int mode; - unsigned long flags; - - local_irq_save(flags); - regmap_fields_read(fields.mode, info->cs, &mode); - - /* set 16bit mode before writing data */ - regmap_fields_write(fields.mode, info->cs, (mode & ~AT91_SMC_DBW) | - AT91_SMC_DBW_16); - - consumed = ata_sff_data_xfer(qc, buf, buflen, rw); - - /* restore 8bit mode after data is written */ - regmap_fields_write(fields.mode, info->cs, (mode & ~AT91_SMC_DBW) | - AT91_SMC_DBW_8); - - local_irq_restore(flags); - return consumed; -} - -static struct scsi_host_template pata_at91_sht = { - ATA_PIO_SHT(DRV_NAME), -}; - -static struct ata_port_operations pata_at91_port_ops = { - .inherits = &ata_sff_port_ops, - - .sff_data_xfer = pata_at91_data_xfer_noirq, - .set_piomode = pata_at91_set_piomode, - .cable_detect = ata_cable_40wire, -}; - -static int at91sam9_smc_fields_init(struct device *dev) -{ - struct reg_field field = REG_FIELD(0, 0, 31); - - field.id_size = 8; - field.id_offset = AT91SAM9_SMC_GENERIC_BLK_SZ; - - field.reg = AT91SAM9_SMC_SETUP(AT91SAM9_SMC_GENERIC); - fields.setup = devm_regmap_field_alloc(dev, smc, field); - if (IS_ERR(fields.setup)) - return PTR_ERR(fields.setup); - - field.reg = AT91SAM9_SMC_PULSE(AT91SAM9_SMC_GENERIC); - fields.pulse = devm_regmap_field_alloc(dev, smc, field); - if (IS_ERR(fields.pulse)) - return PTR_ERR(fields.pulse); - - field.reg = AT91SAM9_SMC_CYCLE(AT91SAM9_SMC_GENERIC); - fields.cycle = devm_regmap_field_alloc(dev, smc, field); - if (IS_ERR(fields.cycle)) - return PTR_ERR(fields.cycle); - - field.reg = AT91SAM9_SMC_MODE(AT91SAM9_SMC_GENERIC); - fields.mode = devm_regmap_field_alloc(dev, smc, field); - - return PTR_ERR_OR_ZERO(fields.mode); -} - -static int pata_at91_probe(struct platform_device *pdev) -{ - struct at91_cf_data *board = dev_get_platdata(&pdev->dev); - struct device *dev = &pdev->dev; - struct at91_ide_info *info; - struct resource *mem_res; - struct ata_host *host; - struct ata_port *ap; - - int irq_flags = 0; - int irq = 0; - int ret; - - /* get platform resources: IO/CTL memories and irq/rst pins */ - - if (pdev->num_resources != 1) { - dev_err(&pdev->dev, "invalid number of resources\n"); - return -EINVAL; - } - - mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - if (!mem_res) { - dev_err(dev, "failed to get mem resource\n"); - return -EINVAL; - } - - irq = board->irq_pin; - - smc = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "atmel,smc"); - if (IS_ERR(smc)) - return PTR_ERR(smc); - - ret = at91sam9_smc_fields_init(dev); - if (ret < 0) - return ret; - - /* init ata host */ - - host = ata_host_alloc(dev, 1); - - if (!host) - return -ENOMEM; - - ap = host->ports[0]; - ap->ops = &pata_at91_port_ops; - ap->flags |= ATA_FLAG_SLAVE_POSS; - ap->pio_mask = ATA_PIO4; - - if (!gpio_is_valid(irq)) { - ap->flags |= ATA_FLAG_PIO_POLLING; - ata_port_desc(ap, "no IRQ, using PIO polling"); - } - - info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); - - if (!info) { - dev_err(dev, "failed to allocate memory for private data\n"); - return -ENOMEM; - } - - info->mck = clk_get(NULL, "mck"); - - if (IS_ERR(info->mck)) { - dev_err(dev, "failed to get access to mck clock\n"); - return -ENODEV; - } - - info->cs = board->chipselect; - info->mode = AT91_SMC_READMODE | AT91_SMC_WRITEMODE | - AT91_SMC_EXNWMODE_READY | AT91_SMC_BAT_SELECT | - AT91_SMC_DBW_8 | AT91_SMC_TDF_(0); - - info->ide_addr = devm_ioremap(dev, - mem_res->start + CF_IDE_OFFSET, CF_IDE_RES_SIZE); - - if (!info->ide_addr) { - dev_err(dev, "failed to map IO base\n"); - ret = -ENOMEM; - goto err_put; - } - - info->alt_addr = devm_ioremap(dev, - mem_res->start + CF_ALT_IDE_OFFSET, CF_IDE_RES_SIZE); - - if (!info->alt_addr) { - dev_err(dev, "failed to map CTL base\n"); - ret = -ENOMEM; - goto err_put; - } - - ap->ioaddr.cmd_addr = info->ide_addr; - ap->ioaddr.ctl_addr = info->alt_addr + 0x06; - ap->ioaddr.altstatus_addr = ap->ioaddr.ctl_addr; - - ata_sff_std_ports(&ap->ioaddr); - - ata_port_desc(ap, "mmio cmd 0x%llx ctl 0x%llx", - (unsigned long long)mem_res->start + CF_IDE_OFFSET, - (unsigned long long)mem_res->start + CF_ALT_IDE_OFFSET); - - host->private_data = info; - - ret = ata_host_activate(host, gpio_is_valid(irq) ? gpio_to_irq(irq) : 0, - gpio_is_valid(irq) ? ata_sff_interrupt : NULL, - irq_flags, &pata_at91_sht); - if (ret) - goto err_put; - - return 0; - -err_put: - clk_put(info->mck); - return ret; -} - -static int pata_at91_remove(struct platform_device *pdev) -{ - struct ata_host *host = platform_get_drvdata(pdev); - struct at91_ide_info *info; - - if (!host) - return 0; - info = host->private_data; - - ata_host_detach(host); - - if (!info) - return 0; - - clk_put(info->mck); - - return 0; -} - -static struct platform_driver pata_at91_driver = { - .probe = pata_at91_probe, - .remove = pata_at91_remove, - .driver = { - .name = DRV_NAME, - }, -}; - -module_platform_driver(pata_at91_driver); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Driver for CF in True IDE mode on AT91SAM9260 SoC"); -MODULE_AUTHOR("Matyukevich Sergey"); -MODULE_VERSION(DRV_VERSION); - diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c index 6c9aa95a9a05..49d705c9f0f7 100644 --- a/drivers/ata/pata_atiixp.c +++ b/drivers/ata/pata_atiixp.c @@ -278,11 +278,6 @@ static int atiixp_init_one(struct pci_dev *pdev, const struct pci_device_id *id) }; const struct ata_port_info *ppi[] = { &info, &info }; - /* SB600/700 don't have secondary port wired */ - if ((pdev->device == PCI_DEVICE_ID_ATI_IXP600_IDE) || - (pdev->device == PCI_DEVICE_ID_ATI_IXP700_IDE)) - ppi[1] = &ata_dummy_port_info; - return ata_pci_bmdma_init_one(pdev, ppi, &atiixp_sht, NULL, ATA_HOST_PARALLEL_SCAN); } diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index e347e7acd8ed..0adcb40d2794 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -1328,7 +1328,7 @@ static int pata_macio_pci_resume(struct pci_dev *pdev) } #endif /* CONFIG_PM_SLEEP */ -static struct of_device_id pata_macio_match[] = +static const struct of_device_id pata_macio_match[] = { { .name = "IDE", diff --git a/drivers/ata/pata_mpc52xx.c b/drivers/ata/pata_mpc52xx.c index 252ba27fa63b..9730125530f6 100644 --- a/drivers/ata/pata_mpc52xx.c +++ b/drivers/ata/pata_mpc52xx.c @@ -847,7 +847,7 @@ mpc52xx_ata_resume(struct platform_device *op) } #endif -static struct of_device_id mpc52xx_ata_of_match[] = { +static const struct of_device_id mpc52xx_ata_of_match[] = { { .compatible = "fsl,mpc5200-ata", }, { .compatible = "mpc5200-ata", }, {}, diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c index 201a32d0627f..01161c1aef4d 100644 --- a/drivers/ata/pata_of_platform.c +++ b/drivers/ata/pata_of_platform.c @@ -67,7 +67,7 @@ static int pata_of_platform_probe(struct platform_device *ofdev) reg_shift, pio_mask, &pata_platform_sht); } -static struct of_device_id pata_of_platform_match[] = { +static const struct of_device_id pata_of_platform_match[] = { { .compatible = "ata-generic", }, { }, }; diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index a723ae929783..01734d54c69c 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1612,7 +1612,7 @@ static int sata_fsl_resume(struct platform_device *op) } #endif -static struct of_device_id fsl_sata_match[] = { +static const struct of_device_id fsl_sata_match[] = { { .compatible = "fsl,pq-sata", }, diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 00ce26d0c047..b66bcda88320 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -4286,7 +4286,7 @@ static int mv_platform_resume(struct platform_device *pdev) #endif #ifdef CONFIG_OF -static struct of_device_id mv_sata_dt_ids[] = { +static const struct of_device_id mv_sata_dt_ids[] = { { .compatible = "marvell,armada-370-sata", }, { .compatible = "marvell,orion-sata", }, {}, diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index 0636d84fbefe..f3f538eec7b3 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -644,14 +644,16 @@ static void svia_configure(struct pci_dev *pdev, int board_id, pci_write_config_byte(pdev, SATA_NATIVE_MODE, tmp8); } - /* enable IRQ on hotplug */ - pci_read_config_byte(pdev, SVIA_MISC_3, &tmp8); - if ((tmp8 & SATA_HOTPLUG) != SATA_HOTPLUG) { - dev_dbg(&pdev->dev, - "enabling SATA hotplug (0x%x)\n", - (int) tmp8); - tmp8 |= SATA_HOTPLUG; - pci_write_config_byte(pdev, SVIA_MISC_3, tmp8); + if (board_id == vt6421) { + /* enable IRQ on hotplug */ + pci_read_config_byte(pdev, SVIA_MISC_3, &tmp8); + if ((tmp8 & SATA_HOTPLUG) != SATA_HOTPLUG) { + dev_dbg(&pdev->dev, + "enabling SATA hotplug (0x%x)\n", + (int) tmp8); + tmp8 |= SATA_HOTPLUG; + pci_write_config_byte(pdev, SVIA_MISC_3, tmp8); + } } /* diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index 0fc7c4da7756..d35e9a20caf7 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -345,8 +345,7 @@ platform_msi_create_device_domain(struct device *dev, data->host_data = host_data; domain = irq_domain_create_hierarchy(dev->msi_domain, 0, nvec, - of_node_to_fwnode(dev->of_node), - ops, data); + dev->fwnode, ops, data); if (!domain) goto free_priv; diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e697dec9d25b..ad196427b4f2 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -121,7 +121,9 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_lock_interruptible(p) p->lock_ops->lock_interruptible(p) #define genpd_unlock(p) p->lock_ops->unlock(p) +#define genpd_status_on(genpd) (genpd->status == GPD_STATE_ACTIVE) #define genpd_is_irq_safe(genpd) (genpd->flags & GENPD_FLAG_IRQ_SAFE) +#define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON) static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, struct generic_pm_domain *genpd) @@ -130,8 +132,12 @@ static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd); - /* Warn once if IRQ safe dev in no sleep domain */ - if (ret) + /* + * Warn once if an IRQ safe device is attached to a no sleep domain, as + * to indicate a suboptimal configuration for PM. For an always on + * domain this isn't case, thus don't warn. + */ + if (ret && !genpd_is_always_on(genpd)) dev_warn_once(dev, "PM domain %s will not be powered off\n", genpd->name); @@ -296,11 +302,15 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, * (1) The domain is already in the "power off" state. * (2) System suspend is in progress. */ - if (genpd->status == GPD_STATE_POWER_OFF - || genpd->prepared_count > 0) + if (!genpd_status_on(genpd) || genpd->prepared_count > 0) return 0; - if (atomic_read(&genpd->sd_count) > 0) + /* + * Abort power off for the PM domain in the following situations: + * (1) The domain is configured as always on. + * (2) When the domain has a subdomain being powered on. + */ + if (genpd_is_always_on(genpd) || atomic_read(&genpd->sd_count) > 0) return -EBUSY; list_for_each_entry(pdd, &genpd->dev_list, list_node) { @@ -373,7 +383,7 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth) struct gpd_link *link; int ret = 0; - if (genpd->status == GPD_STATE_ACTIVE) + if (genpd_status_on(genpd)) return 0; /* @@ -752,7 +762,7 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock, { struct gpd_link *link; - if (genpd->status == GPD_STATE_POWER_OFF) + if (!genpd_status_on(genpd) || genpd_is_always_on(genpd)) return; if (genpd->suspended_count != genpd->device_count @@ -761,7 +771,8 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock, /* Choose the deepest state when suspending */ genpd->state_idx = genpd->state_count - 1; - _genpd_power_off(genpd, false); + if (_genpd_power_off(genpd, false)) + return; genpd->status = GPD_STATE_POWER_OFF; @@ -793,7 +804,7 @@ static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock, { struct gpd_link *link; - if (genpd->status == GPD_STATE_ACTIVE) + if (genpd_status_on(genpd)) return; list_for_each_entry(link, &genpd->slave_links, slave_node) { @@ -1329,8 +1340,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, genpd_lock(subdomain); genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING); - if (genpd->status == GPD_STATE_POWER_OFF - && subdomain->status != GPD_STATE_POWER_OFF) { + if (!genpd_status_on(genpd) && genpd_status_on(subdomain)) { ret = -EINVAL; goto out; } @@ -1346,7 +1356,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, list_add_tail(&link->master_node, &genpd->master_links); link->slave = subdomain; list_add_tail(&link->slave_node, &subdomain->slave_links); - if (subdomain->status != GPD_STATE_POWER_OFF) + if (genpd_status_on(subdomain)) genpd_sd_counter_inc(genpd); out: @@ -1406,7 +1416,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, list_del(&link->master_node); list_del(&link->slave_node); kfree(link); - if (subdomain->status != GPD_STATE_POWER_OFF) + if (genpd_status_on(subdomain)) genpd_sd_counter_dec(genpd); ret = 0; @@ -1492,6 +1502,10 @@ int pm_genpd_init(struct generic_pm_domain *genpd, genpd->dev_ops.start = pm_clk_resume; } + /* Always-on domains must be powered on at initialization. */ + if (genpd_is_always_on(genpd) && !genpd_status_on(genpd)) + return -EINVAL; + /* Use only one "off" state if there were no states declared */ if (genpd->state_count == 0) { ret = genpd_set_default_power_state(genpd); @@ -1700,12 +1714,12 @@ int of_genpd_add_provider_simple(struct device_node *np, mutex_lock(&gpd_list_lock); - if (pm_genpd_present(genpd)) + if (pm_genpd_present(genpd)) { ret = genpd_add_provider(np, genpd_xlate_simple, genpd); - - if (!ret) { - genpd->provider = &np->fwnode; - genpd->has_provider = true; + if (!ret) { + genpd->provider = &np->fwnode; + genpd->has_provider = true; + } } mutex_unlock(&gpd_list_lock); @@ -2079,11 +2093,6 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state, int err; u32 residency; u32 entry_latency, exit_latency; - const struct of_device_id *match_id; - - match_id = of_match_node(idle_state_match, state_node); - if (!match_id) - return -EINVAL; err = of_property_read_u32(state_node, "entry-latency-us", &entry_latency); @@ -2132,6 +2141,7 @@ int of_genpd_parse_idle_states(struct device_node *dn, int err, ret; int count; struct of_phandle_iterator it; + const struct of_device_id *match_id; count = of_count_phandle_with_args(dn, "domain-idle-states", NULL); if (count <= 0) @@ -2144,6 +2154,9 @@ int of_genpd_parse_idle_states(struct device_node *dn, /* Loop over the phandles until all the requested entry is found */ of_for_each_phandle(&it, err, dn, "domain-idle-states", NULL, 0) { np = it.node; + match_id = of_match_node(idle_state_match, np); + if (!match_id) + continue; ret = genpd_parse_state(&st[i++], np); if (ret) { pr_err @@ -2155,8 +2168,11 @@ int of_genpd_parse_idle_states(struct device_node *dn, } } - *n = count; - *states = st; + *n = i; + if (!i) + kfree(st); + else + *states = st; return 0; } @@ -2221,7 +2237,7 @@ static int pm_genpd_summary_one(struct seq_file *s, if (WARN_ON(genpd->status >= ARRAY_SIZE(status_lookup))) goto exit; - if (genpd->status == GPD_STATE_POWER_OFF) + if (!genpd_status_on(genpd)) snprintf(state, sizeof(state), "%s-%u", status_lookup[genpd->status], genpd->state_idx); else diff --git a/drivers/base/property.c b/drivers/base/property.c index c458c63e353f..149de311a10e 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/of.h> #include <linux/of_address.h> +#include <linux/of_graph.h> #include <linux/property.h> #include <linux/etherdevice.h> #include <linux/phy.h> @@ -146,47 +147,45 @@ static int pset_prop_read_string_array(struct property_set *pset, const char *propname, const char **strings, size_t nval) { + const struct property_entry *prop; const void *pointer; - size_t length = nval * sizeof(*strings); + size_t array_len, length; + + /* Find out the array length. */ + prop = pset_prop_get(pset, propname); + if (!prop) + return -EINVAL; + + if (!prop->is_array) + /* The array length for a non-array string property is 1. */ + array_len = 1; + else + /* Find the length of an array. */ + array_len = pset_prop_count_elems_of_size(pset, propname, + sizeof(const char *)); + + /* Return how many there are if strings is NULL. */ + if (!strings) + return array_len; + + array_len = min(nval, array_len); + length = array_len * sizeof(*strings); pointer = pset_prop_find(pset, propname, length); if (IS_ERR(pointer)) return PTR_ERR(pointer); memcpy(strings, pointer, length); - return 0; -} -static int pset_prop_read_string(struct property_set *pset, - const char *propname, const char **strings) -{ - const struct property_entry *prop; - const char * const *pointer; - - prop = pset_prop_get(pset, propname); - if (!prop) - return -EINVAL; - if (!prop->is_string) - return -EILSEQ; - if (prop->is_array) { - pointer = prop->pointer.str; - if (!pointer) - return -ENODATA; - } else { - pointer = &prop->value.str; - if (*pointer && strnlen(*pointer, prop->length) >= prop->length) - return -EILSEQ; - } - - *strings = *pointer; - return 0; + return array_len; } -static inline struct fwnode_handle *dev_fwnode(struct device *dev) +struct fwnode_handle *dev_fwnode(struct device *dev) { return IS_ENABLED(CONFIG_OF) && dev->of_node ? &dev->of_node->fwnode : dev->fwnode; } +EXPORT_SYMBOL_GPL(dev_fwnode); /** * device_property_present - check if a property of a device is present @@ -340,8 +339,8 @@ EXPORT_SYMBOL_GPL(device_property_read_u64_array); * Function reads an array of string properties with @propname from the device * firmware description and stores them to @val if found. * - * Return: number of values if @val was %NULL, - * %0 if the property was found (success), + * Return: number of values read on success if @val is non-NULL, + * number of values available on success if @val is NULL, * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO or %-EILSEQ if the property is not an array of strings, @@ -553,25 +552,8 @@ static int __fwnode_property_read_string_array(struct fwnode_handle *fwnode, return acpi_node_prop_read(fwnode, propname, DEV_PROP_STRING, val, nval); else if (is_pset_node(fwnode)) - return val ? - pset_prop_read_string_array(to_pset_node(fwnode), - propname, val, nval) : - pset_prop_count_elems_of_size(to_pset_node(fwnode), - propname, - sizeof(const char *)); - return -ENXIO; -} - -static int __fwnode_property_read_string(struct fwnode_handle *fwnode, - const char *propname, const char **val) -{ - if (is_of_node(fwnode)) - return of_property_read_string(to_of_node(fwnode), propname, val); - else if (is_acpi_node(fwnode)) - return acpi_node_prop_read(fwnode, propname, DEV_PROP_STRING, - val, 1); - else if (is_pset_node(fwnode)) - return pset_prop_read_string(to_pset_node(fwnode), propname, val); + return pset_prop_read_string_array(to_pset_node(fwnode), + propname, val, nval); return -ENXIO; } @@ -585,11 +567,11 @@ static int __fwnode_property_read_string(struct fwnode_handle *fwnode, * Read an string list property @propname from the given firmware node and store * them to @val if found. * - * Return: number of values if @val was %NULL, - * %0 if the property was found (success), + * Return: number of values read on success if @val is non-NULL, + * number of values available on success if @val is NULL, * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, - * %-EPROTO if the property is not an array of strings, + * %-EPROTO or %-EILSEQ if the property is not an array of strings, * %-EOVERFLOW if the size of the property is not as expected, * %-ENXIO if no suitable firmware interface is present. */ @@ -626,14 +608,9 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_string_array); int fwnode_property_read_string(struct fwnode_handle *fwnode, const char *propname, const char **val) { - int ret; + int ret = fwnode_property_read_string_array(fwnode, propname, val, 1); - ret = __fwnode_property_read_string(fwnode, propname, val); - if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) && - !IS_ERR_OR_NULL(fwnode->secondary)) - ret = __fwnode_property_read_string(fwnode->secondary, - propname, val); - return ret; + return ret < 0 ? ret : 0; } EXPORT_SYMBOL_GPL(fwnode_property_read_string); @@ -932,41 +909,109 @@ int device_add_properties(struct device *dev, EXPORT_SYMBOL_GPL(device_add_properties); /** - * device_get_next_child_node - Return the next child node handle for a device - * @dev: Device to find the next child node for. - * @child: Handle to one of the device's child nodes or a null handle. + * fwnode_get_next_parent - Iterate to the node's parent + * @fwnode: Firmware whose parent is retrieved + * + * This is like fwnode_get_parent() except that it drops the refcount + * on the passed node, making it suitable for iterating through a + * node's parents. + * + * Returns a node pointer with refcount incremented, use + * fwnode_handle_node() on it when done. */ -struct fwnode_handle *device_get_next_child_node(struct device *dev, +struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode) +{ + struct fwnode_handle *parent = fwnode_get_parent(fwnode); + + fwnode_handle_put(fwnode); + + return parent; +} +EXPORT_SYMBOL_GPL(fwnode_get_next_parent); + +/** + * fwnode_get_parent - Return parent firwmare node + * @fwnode: Firmware whose parent is retrieved + * + * Return parent firmware node of the given node if possible or %NULL if no + * parent was available. + */ +struct fwnode_handle *fwnode_get_parent(struct fwnode_handle *fwnode) +{ + struct fwnode_handle *parent = NULL; + + if (is_of_node(fwnode)) { + struct device_node *node; + + node = of_get_parent(to_of_node(fwnode)); + if (node) + parent = &node->fwnode; + } else if (is_acpi_node(fwnode)) { + parent = acpi_node_get_parent(fwnode); + } + + return parent; +} +EXPORT_SYMBOL_GPL(fwnode_get_parent); + +/** + * fwnode_get_next_child_node - Return the next child node handle for a node + * @fwnode: Firmware node to find the next child node for. + * @child: Handle to one of the node's child nodes or a %NULL handle. + */ +struct fwnode_handle *fwnode_get_next_child_node(struct fwnode_handle *fwnode, struct fwnode_handle *child) { - if (IS_ENABLED(CONFIG_OF) && dev->of_node) { + if (is_of_node(fwnode)) { struct device_node *node; - node = of_get_next_available_child(dev->of_node, to_of_node(child)); + node = of_get_next_available_child(to_of_node(fwnode), + to_of_node(child)); if (node) return &node->fwnode; - } else if (IS_ENABLED(CONFIG_ACPI)) { - return acpi_get_next_subnode(dev, child); + } else if (is_acpi_node(fwnode)) { + return acpi_get_next_subnode(fwnode, child); } + return NULL; } +EXPORT_SYMBOL_GPL(fwnode_get_next_child_node); + +/** + * device_get_next_child_node - Return the next child node handle for a device + * @dev: Device to find the next child node for. + * @child: Handle to one of the device's child nodes or a null handle. + */ +struct fwnode_handle *device_get_next_child_node(struct device *dev, + struct fwnode_handle *child) +{ + struct acpi_device *adev = ACPI_COMPANION(dev); + struct fwnode_handle *fwnode = NULL; + + if (dev->of_node) + fwnode = &dev->of_node->fwnode; + else if (adev) + fwnode = acpi_fwnode_handle(adev); + + return fwnode_get_next_child_node(fwnode, child); +} EXPORT_SYMBOL_GPL(device_get_next_child_node); /** - * device_get_named_child_node - Return first matching named child node handle - * @dev: Device to find the named child node for. + * fwnode_get_named_child_node - Return first matching named child node handle + * @fwnode: Firmware node to find the named child node for. * @childname: String to match child node name against. */ -struct fwnode_handle *device_get_named_child_node(struct device *dev, +struct fwnode_handle *fwnode_get_named_child_node(struct fwnode_handle *fwnode, const char *childname) { struct fwnode_handle *child; /* - * Find first matching named child node of this device. + * Find first matching named child node of this fwnode. * For ACPI this will be a data only sub-node. */ - device_for_each_child_node(dev, child) { + fwnode_for_each_child_node(fwnode, child) { if (is_of_node(child)) { if (!of_node_cmp(to_of_node(child)->name, childname)) return child; @@ -978,9 +1023,32 @@ struct fwnode_handle *device_get_named_child_node(struct device *dev, return NULL; } +EXPORT_SYMBOL_GPL(fwnode_get_named_child_node); + +/** + * device_get_named_child_node - Return first matching named child node handle + * @dev: Device to find the named child node for. + * @childname: String to match child node name against. + */ +struct fwnode_handle *device_get_named_child_node(struct device *dev, + const char *childname) +{ + return fwnode_get_named_child_node(dev_fwnode(dev), childname); +} EXPORT_SYMBOL_GPL(device_get_named_child_node); /** + * fwnode_handle_get - Obtain a reference to a device node + * @fwnode: Pointer to the device node to obtain the reference to. + */ +void fwnode_handle_get(struct fwnode_handle *fwnode) +{ + if (is_of_node(fwnode)) + of_node_get(to_of_node(fwnode)); +} +EXPORT_SYMBOL_GPL(fwnode_handle_get); + +/** * fwnode_handle_put - Drop reference to a device node * @fwnode: Pointer to the device node to drop the reference to. * @@ -1117,3 +1185,157 @@ void *device_get_mac_address(struct device *dev, char *addr, int alen) return device_get_mac_addr(dev, "address", addr, alen); } EXPORT_SYMBOL(device_get_mac_address); + +/** + * device_graph_get_next_endpoint - Get next endpoint firmware node + * @fwnode: Pointer to the parent firmware node + * @prev: Previous endpoint node or %NULL to get the first + * + * Returns an endpoint firmware node pointer or %NULL if no more endpoints + * are available. + */ +struct fwnode_handle * +fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode, + struct fwnode_handle *prev) +{ + struct fwnode_handle *endpoint = NULL; + + if (is_of_node(fwnode)) { + struct device_node *node; + + node = of_graph_get_next_endpoint(to_of_node(fwnode), + to_of_node(prev)); + + if (node) + endpoint = &node->fwnode; + } else if (is_acpi_node(fwnode)) { + endpoint = acpi_graph_get_next_endpoint(fwnode, prev); + if (IS_ERR(endpoint)) + endpoint = NULL; + } + + return endpoint; + +} +EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint); + +/** + * fwnode_graph_get_remote_port_parent - Return fwnode of a remote device + * @fwnode: Endpoint firmware node pointing to the remote endpoint + * + * Extracts firmware node of a remote device the @fwnode points to. + */ +struct fwnode_handle * +fwnode_graph_get_remote_port_parent(struct fwnode_handle *fwnode) +{ + struct fwnode_handle *parent = NULL; + + if (is_of_node(fwnode)) { + struct device_node *node; + + node = of_graph_get_remote_port_parent(to_of_node(fwnode)); + if (node) + parent = &node->fwnode; + } else if (is_acpi_node(fwnode)) { + int ret; + + ret = acpi_graph_get_remote_endpoint(fwnode, &parent, NULL, + NULL); + if (ret) + return NULL; + } + + return parent; +} +EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port_parent); + +/** + * fwnode_graph_get_remote_port - Return fwnode of a remote port + * @fwnode: Endpoint firmware node pointing to the remote endpoint + * + * Extracts firmware node of a remote port the @fwnode points to. + */ +struct fwnode_handle *fwnode_graph_get_remote_port(struct fwnode_handle *fwnode) +{ + struct fwnode_handle *port = NULL; + + if (is_of_node(fwnode)) { + struct device_node *node; + + node = of_graph_get_remote_port(to_of_node(fwnode)); + if (node) + port = &node->fwnode; + } else if (is_acpi_node(fwnode)) { + int ret; + + ret = acpi_graph_get_remote_endpoint(fwnode, NULL, &port, NULL); + if (ret) + return NULL; + } + + return port; +} +EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port); + +/** + * fwnode_graph_get_remote_endpoint - Return fwnode of a remote endpoint + * @fwnode: Endpoint firmware node pointing to the remote endpoint + * + * Extracts firmware node of a remote endpoint the @fwnode points to. + */ +struct fwnode_handle * +fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode) +{ + struct fwnode_handle *endpoint = NULL; + + if (is_of_node(fwnode)) { + struct device_node *node; + + node = of_parse_phandle(to_of_node(fwnode), "remote-endpoint", + 0); + if (node) + endpoint = &node->fwnode; + } else if (is_acpi_node(fwnode)) { + int ret; + + ret = acpi_graph_get_remote_endpoint(fwnode, NULL, NULL, + &endpoint); + if (ret) + return NULL; + } + + return endpoint; +} +EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint); + +/** + * fwnode_graph_parse_endpoint - parse common endpoint node properties + * @fwnode: pointer to endpoint fwnode_handle + * @endpoint: pointer to the fwnode endpoint data structure + * + * Parse @fwnode representing a graph endpoint node and store the + * information in @endpoint. The caller must hold a reference to + * @fwnode. + */ +int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode, + struct fwnode_endpoint *endpoint) +{ + struct fwnode_handle *port_fwnode = fwnode_get_parent(fwnode); + + memset(endpoint, 0, sizeof(*endpoint)); + + endpoint->local_fwnode = fwnode; + + if (is_acpi_node(port_fwnode)) { + fwnode_property_read_u32(port_fwnode, "port", &endpoint->port); + fwnode_property_read_u32(fwnode, "endpoint", &endpoint->id); + } else { + fwnode_property_read_u32(port_fwnode, "reg", &endpoint->port); + fwnode_property_read_u32(fwnode, "reg", &endpoint->id); + } + + fwnode_handle_put(port_fwnode); + + return 0; +} +EXPORT_SYMBOL(fwnode_graph_parse_endpoint); diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index f744de7a0f9b..19df4918e37e 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -312,22 +312,6 @@ config BLK_DEV_SKD Use device /dev/skd$N amd /dev/skd$Np$M. -config BLK_DEV_OSD - tristate "OSD object-as-blkdev support" - depends on SCSI_OSD_ULD - ---help--- - Saying Y or M here will allow the exporting of a single SCSI - OSD (object-based storage) object as a Linux block device. - - For example, if you create a 2G object on an OSD device, - you can then use this module to present that 2G object as - a Linux block device. - - To compile this driver as a module, choose M here: the - module will be called osdblk. - - If unsure, say N. - config BLK_DEV_SX8 tristate "Promise SATA SX8 support" depends on PCI @@ -434,23 +418,6 @@ config ATA_OVER_ETH This driver provides Support for ATA over Ethernet block devices like the Coraid EtherDrive (R) Storage Blade. -config MG_DISK - tristate "mGine mflash, gflash support" - depends on ARM && GPIOLIB - help - mGine mFlash(gFlash) block device driver - -config MG_DISK_RES - int "Size of reserved area before MBR" - depends on MG_DISK - default 0 - help - Define size of reserved area that usually used for boot. Unit is KB. - All of the block device operation will be taken this value as start - offset - Examples: - 1024 => 1 MB - config SUNVDC tristate "Sun Virtual Disk Client support" depends on SUN_LDOMS @@ -512,19 +479,7 @@ config VIRTIO_BLK_SCSI Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on virtio-blk devices. This is only supported for the legacy virtio protocol and not enabled by default by any hypervisor. - Your probably want to virtio-scsi instead. - -config BLK_DEV_HD - bool "Very old hard disk (MFM/RLL/IDE) driver" - depends on HAVE_IDE - depends on !ARM || ARCH_RPC || BROKEN - help - This is a very old hard disk driver that lacks the enhanced - functionality of the newer ones. - - It is required for systems with ancient MFM/RLL/ESDI drives. - - If unsure, say N. + You probably want to use virtio-scsi instead. config BLK_DEV_RBD tristate "Rados block device (RBD)" diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 1e9661e26f29..ec8c36897b75 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -19,10 +19,8 @@ obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o obj-$(CONFIG_XILINX_SYSACE) += xsysace.o obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o -obj-$(CONFIG_MG_DISK) += mg_disk.o obj-$(CONFIG_SUNVDC) += sunvdc.o obj-$(CONFIG_BLK_DEV_SKD) += skd.o -obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o @@ -30,7 +28,6 @@ obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o -obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 2104b1b4ccda..fa69ecd52cb5 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -617,12 +617,12 @@ static void fd_error( void ) if (!fd_request) return; - fd_request->errors++; - if (fd_request->errors >= MAX_ERRORS) { + fd_request->error_count++; + if (fd_request->error_count >= MAX_ERRORS) { printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive ); fd_end_request_cur(-EIO); } - else if (fd_request->errors == RECALIBRATE_ERRORS) { + else if (fd_request->error_count == RECALIBRATE_ERRORS) { printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive ); if (SelectedDrive != -1) SUD.track = -1; @@ -1386,7 +1386,7 @@ static void setup_req_params( int drive ) ReqData = ReqBuffer + 512 * ReqCnt; if (UseTrackbuffer) - read_track = (ReqCmd == READ && fd_request->errors == 0); + read_track = (ReqCmd == READ && fd_request->error_count == 0); else read_track = 0; @@ -1409,8 +1409,10 @@ static struct request *set_next_request(void) fdc_queue = 0; if (q) { rq = blk_fetch_request(q); - if (rq) + if (rq) { + rq->error_count = 0; break; + } } } while (fdc_queue != old_pos); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 3adc32a3153b..4ec84d504780 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -134,28 +134,6 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) return page; } -static void brd_free_page(struct brd_device *brd, sector_t sector) -{ - struct page *page; - pgoff_t idx; - - spin_lock(&brd->brd_lock); - idx = sector >> PAGE_SECTORS_SHIFT; - page = radix_tree_delete(&brd->brd_pages, idx); - spin_unlock(&brd->brd_lock); - if (page) - __free_page(page); -} - -static void brd_zero_page(struct brd_device *brd, sector_t sector) -{ - struct page *page; - - page = brd_lookup_page(brd, sector); - if (page) - clear_highpage(page); -} - /* * Free all backing store pages and radix tree. This must only be called when * there are no other users of the device. @@ -212,24 +190,6 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) return 0; } -static void discard_from_brd(struct brd_device *brd, - sector_t sector, size_t n) -{ - while (n >= PAGE_SIZE) { - /* - * Don't want to actually discard pages here because - * re-allocating the pages can result in writeback - * deadlocks under heavy load. - */ - if (0) - brd_free_page(brd, sector); - else - brd_zero_page(brd, sector); - sector += PAGE_SIZE >> SECTOR_SHIFT; - n -= PAGE_SIZE; - } -} - /* * Copy n bytes from src to the brd starting at sector. Does not sleep. */ @@ -338,14 +298,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) goto io_error; - if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { - if (sector & ((PAGE_SIZE >> SECTOR_SHIFT) - 1) || - bio->bi_iter.bi_size & ~PAGE_MASK) - goto io_error; - discard_from_brd(brd, sector, bio->bi_iter.bi_size); - goto out; - } - bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; int err; @@ -357,7 +309,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) sector += len >> SECTOR_SHIFT; } -out: bio_endio(bio); return BLK_QC_T_NONE; io_error: @@ -464,11 +415,6 @@ static struct brd_device *brd_alloc(int i) * is harmless) */ blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE); - - brd->brd_queue->limits.discard_granularity = PAGE_SIZE; - blk_queue_max_discard_sectors(brd->brd_queue, UINT_MAX); - brd->brd_queue->limits.discard_zeroes_data = 1; - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); #ifdef CONFIG_BLK_DEV_RAM_DAX queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue); #endif diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 8e1a4554951c..cd375503f7b0 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1864,8 +1864,7 @@ static void cciss_softirq_done(struct request *rq) /* set the residual count for pc requests */ if (blk_rq_is_passthrough(rq)) scsi_req(rq)->resid_len = c->err_info->ResidualCnt; - - blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO); + blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0); spin_lock_irqsave(&h->lock, flags); cmd_free(h, c); @@ -3140,18 +3139,19 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, { int retry_cmd = 0; struct request *rq = cmd->rq; + struct scsi_request *sreq = scsi_req(rq); - rq->errors = 0; + sreq->result = 0; if (timeout) - rq->errors = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT); + sreq->result = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT); if (cmd->err_info->CommandStatus == 0) /* no error has occurred */ goto after_error_processing; switch (cmd->err_info->CommandStatus) { case CMD_TARGET_STATUS: - rq->errors = evaluate_target_status(h, cmd, &retry_cmd); + sreq->result = evaluate_target_status(h, cmd, &retry_cmd); break; case CMD_DATA_UNDERRUN: if (!blk_rq_is_passthrough(cmd->rq)) { @@ -3169,7 +3169,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_INVALID: dev_warn(&h->pdev->dev, "cciss: cmd %p is " "reported invalid\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3177,7 +3177,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_PROTOCOL_ERR: dev_warn(&h->pdev->dev, "cciss: cmd %p has " "protocol error\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3185,7 +3185,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_HARDWARE_ERR: dev_warn(&h->pdev->dev, "cciss: cmd %p had " " hardware error\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3193,7 +3193,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_CONNECTION_LOST: dev_warn(&h->pdev->dev, "cciss: cmd %p had " "connection lost\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3201,7 +3201,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_ABORTED: dev_warn(&h->pdev->dev, "cciss: cmd %p was " "aborted\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT); @@ -3209,7 +3209,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_ABORT_FAILED: dev_warn(&h->pdev->dev, "cciss: cmd %p reports " "abort failed\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3224,21 +3224,21 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, } else dev_warn(&h->pdev->dev, "%p retried too many times\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT); break; case CMD_TIMEOUT: dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_UNABORTABLE: dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3247,7 +3247,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, dev_warn(&h->pdev->dev, "cmd %p returned " "unknown status %x\n", cmd, cmd->err_info->CommandStatus); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3380,9 +3380,9 @@ static void do_cciss_request(struct request_queue *q) if (dma_mapping_error(&h->pdev->dev, temp64.val)) { dev_warn(&h->pdev->dev, "%s: error mapping page for DMA\n", __func__); - creq->errors = make_status_bytes(SAM_STAT_GOOD, - 0, DRIVER_OK, - DID_SOFT_ERROR); + scsi_req(creq)->result = + make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK, + DID_SOFT_ERROR); cmd_free(h, c); return; } @@ -3395,9 +3395,9 @@ static void do_cciss_request(struct request_queue *q) if (cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex], (seg - (h->max_cmd_sgentries - 1)) * sizeof(SGDescriptor_struct))) { - creq->errors = make_status_bytes(SAM_STAT_GOOD, - 0, DRIVER_OK, - DID_SOFT_ERROR); + scsi_req(creq)->result = + make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK, + DID_SOFT_ERROR); cmd_free(h, c); return; } diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index de5c3ee8a790..494837e59f23 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -236,9 +236,6 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL"); seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C"); seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync"); - - if (f & EE_IS_TRIM) - __seq_print_rq_state_bit(m, f & EE_IS_TRIM_USE_ZEROOUT, &sep, "zero-out", "trim"); seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same"); seq_putc(m, '\n'); } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 724d1c50fc52..d5da45bb03a6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -437,9 +437,6 @@ enum { /* is this a TRIM aka REQ_DISCARD? */ __EE_IS_TRIM, - /* our lower level cannot handle trim, - * and we want to fall back to zeroout instead */ - __EE_IS_TRIM_USE_ZEROOUT, /* In case a barrier failed, * we need to resubmit without the barrier flag. */ @@ -482,7 +479,6 @@ enum { #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) #define EE_IS_TRIM (1<<__EE_IS_TRIM) -#define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT) #define EE_RESUBMITTED (1<<__EE_RESUBMITTED) #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) @@ -1561,8 +1557,6 @@ extern void start_resync_timer_fn(unsigned long data); extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); /* drbd_receiver.c */ -extern int drbd_issue_discard_or_zero_out(struct drbd_device *device, - sector_t start, unsigned int nr_sectors, bool discard); extern int drbd_receiver(struct drbd_thread *thi); extern int drbd_ack_receiver(struct drbd_thread *thi); extern void drbd_send_ping_wf(struct work_struct *ws); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 92c60cbd04ee..84455c365f57 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -931,7 +931,6 @@ void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct r p->qlim->io_min = cpu_to_be32(queue_io_min(q)); p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); p->qlim->discard_enabled = blk_queue_discard(q); - p->qlim->discard_zeroes_data = queue_discard_zeroes_data(q); p->qlim->write_same_capable = !!q->limits.max_write_same_sectors; } else { q = device->rq_queue; @@ -941,7 +940,6 @@ void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct r p->qlim->io_min = cpu_to_be32(queue_io_min(q)); p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); p->qlim->discard_enabled = 0; - p->qlim->discard_zeroes_data = 0; p->qlim->write_same_capable = 0; } } @@ -1668,7 +1666,8 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, (bio->bi_opf & REQ_FUA ? DP_FUA : 0) | (bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) | (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) | - (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0); + (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) | + (bio_op(bio) == REQ_OP_WRITE_ZEROES ? DP_DISCARD : 0); else return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 908c704e20aa..02255a0d68b9 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1199,10 +1199,6 @@ static void decide_on_discard_support(struct drbd_device *device, struct drbd_connection *connection = first_peer_device(device)->connection; bool can_do = b ? blk_queue_discard(b) : true; - if (can_do && b && !b->limits.discard_zeroes_data && !discard_zeroes_if_aligned) { - can_do = false; - drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n"); - } if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) { can_do = false; drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n"); @@ -1217,10 +1213,12 @@ static void decide_on_discard_support(struct drbd_device *device, blk_queue_discard_granularity(q, 512); q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection); } else { queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); blk_queue_discard_granularity(q, 0); q->limits.max_discard_sectors = 0; + q->limits.max_write_zeroes_sectors = 0; } } @@ -1482,8 +1480,7 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis if (disk_conf->al_extents > drbd_al_extents_max(nbc)) disk_conf->al_extents = drbd_al_extents_max(nbc); - if (!blk_queue_discard(q) - || (!q->limits.discard_zeroes_data && !disk_conf->discard_zeroes_if_aligned)) { + if (!blk_queue_discard(q)) { if (disk_conf->rs_discard_granularity) { disk_conf->rs_discard_granularity = 0; /* disable feature */ drbd_info(device, "rs_discard_granularity feature disabled\n"); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index aa6bf9692eff..1b0a2be24f39 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1448,105 +1448,14 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); } -/* - * We *may* ignore the discard-zeroes-data setting, if so configured. - * - * Assumption is that it "discard_zeroes_data=0" is only because the backend - * may ignore partial unaligned discards. - * - * LVM/DM thin as of at least - * LVM version: 2.02.115(2)-RHEL7 (2015-01-28) - * Library version: 1.02.93-RHEL7 (2015-01-28) - * Driver version: 4.29.0 - * still behaves this way. - * - * For unaligned (wrt. alignment and granularity) or too small discards, - * we zero-out the initial (and/or) trailing unaligned partial chunks, - * but discard all the aligned full chunks. - * - * At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1". - */ -int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard) -{ - struct block_device *bdev = device->ldev->backing_bdev; - struct request_queue *q = bdev_get_queue(bdev); - sector_t tmp, nr; - unsigned int max_discard_sectors, granularity; - int alignment; - int err = 0; - - if (!discard) - goto zero_out; - - /* Zero-sector (unknown) and one-sector granularities are the same. */ - granularity = max(q->limits.discard_granularity >> 9, 1U); - alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; - - max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22)); - max_discard_sectors -= max_discard_sectors % granularity; - if (unlikely(!max_discard_sectors)) - goto zero_out; - - if (nr_sectors < granularity) - goto zero_out; - - tmp = start; - if (sector_div(tmp, granularity) != alignment) { - if (nr_sectors < 2*granularity) - goto zero_out; - /* start + gran - (start + gran - align) % gran */ - tmp = start + granularity - alignment; - tmp = start + granularity - sector_div(tmp, granularity); - - nr = tmp - start; - err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0); - nr_sectors -= nr; - start = tmp; - } - while (nr_sectors >= granularity) { - nr = min_t(sector_t, nr_sectors, max_discard_sectors); - err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0); - nr_sectors -= nr; - start += nr; - } - zero_out: - if (nr_sectors) { - err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0); - } - return err != 0; -} - -static bool can_do_reliable_discards(struct drbd_device *device) -{ - struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); - struct disk_conf *dc; - bool can_do; - - if (!blk_queue_discard(q)) - return false; - - if (q->limits.discard_zeroes_data) - return true; - - rcu_read_lock(); - dc = rcu_dereference(device->ldev->disk_conf); - can_do = dc->discard_zeroes_if_aligned; - rcu_read_unlock(); - return can_do; -} - static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req) { - /* If the backend cannot discard, or does not guarantee - * read-back zeroes in discarded ranges, we fall back to - * zero-out. Unless configuration specifically requested - * otherwise. */ - if (!can_do_reliable_discards(device)) - peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT; + struct block_device *bdev = device->ldev->backing_bdev; - if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector, - peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT))) + if (blkdev_issue_zeroout(bdev, peer_req->i.sector, peer_req->i.size >> 9, + GFP_NOIO, 0)) peer_req->flags |= EE_WAS_ERROR; + drbd_endio_write_sec_final(peer_req); } @@ -2376,7 +2285,7 @@ static unsigned long wire_flags_to_bio_flags(u32 dpf) static unsigned long wire_flags_to_bio_op(u32 dpf) { if (dpf & DP_DISCARD) - return REQ_OP_DISCARD; + return REQ_OP_WRITE_ZEROES; else return REQ_OP_WRITE; } @@ -2567,7 +2476,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * op_flags = wire_flags_to_bio_flags(dp_flags); if (pi->cmd == P_TRIM) { D_ASSERT(peer_device, peer_req->i.size > 0); - D_ASSERT(peer_device, op == REQ_OP_DISCARD); + D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES); D_ASSERT(peer_device, peer_req->pages == NULL); } else if (peer_req->pages == NULL) { D_ASSERT(device, peer_req->i.size == 0); @@ -4880,7 +4789,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac if (get_ldev(device)) { struct drbd_peer_request *peer_req; - const int op = REQ_OP_DISCARD; + const int op = REQ_OP_WRITE_ZEROES; peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector, size, 0, GFP_NOIO); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 652114ae1a8a..b5730e17b455 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -59,6 +59,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio drbd_req_make_private_bio(req, bio_src); req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0) | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0) + | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_UNMAP : 0) | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0); req->device = device; req->master_bio = bio_src; @@ -1148,10 +1149,10 @@ static int drbd_process_write_request(struct drbd_request *req) static void drbd_process_discard_req(struct drbd_request *req) { - int err = drbd_issue_discard_or_zero_out(req->device, - req->i.sector, req->i.size >> 9, true); + struct block_device *bdev = req->device->ldev->backing_bdev; - if (err) + if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9, + GFP_NOIO, 0)) req->private_bio->bi_error = -EIO; bio_endio(req->private_bio); } @@ -1180,7 +1181,8 @@ drbd_submit_req_private_bio(struct drbd_request *req) if (get_ldev(device)) { if (drbd_insert_fault(device, type)) bio_io_error(bio); - else if (bio_op(bio) == REQ_OP_DISCARD) + else if (bio_op(bio) == REQ_OP_WRITE_ZEROES || + bio_op(bio) == REQ_OP_DISCARD) drbd_process_discard_req(req); else generic_make_request(bio); @@ -1234,7 +1236,8 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long _drbd_start_io_acct(device, req); /* process discards always from our submitter thread */ - if (bio_op(bio) & REQ_OP_DISCARD) + if ((bio_op(bio) & REQ_OP_WRITE_ZEROES) || + (bio_op(bio) & REQ_OP_DISCARD)) goto queue_for_submitter_thread; if (rw == WRITE && req->private_bio && req->i.size diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 3bff33f21435..1afcb4e02d8d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -174,7 +174,8 @@ void drbd_peer_request_endio(struct bio *bio) struct drbd_peer_request *peer_req = bio->bi_private; struct drbd_device *device = peer_req->peer_device->device; bool is_write = bio_data_dir(bio) == WRITE; - bool is_discard = !!(bio_op(bio) == REQ_OP_DISCARD); + bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES || + bio_op(bio) == REQ_OP_DISCARD; if (bio->bi_error && __ratelimit(&drbd_ratelimit_state)) drbd_warn(device, "%s: error=%d s=%llus\n", @@ -249,6 +250,7 @@ void drbd_request_endio(struct bio *bio) /* to avoid recursion in __req_mod */ if (unlikely(bio->bi_error)) { switch (bio_op(bio)) { + case REQ_OP_WRITE_ZEROES: case REQ_OP_DISCARD: if (bio->bi_error == -EOPNOTSUPP) what = DISCARD_COMPLETED_NOTSUPP; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 45b4384f650c..60d4c7653178 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2805,8 +2805,10 @@ static int set_next_request(void) fdc_queue = 0; if (q) { current_req = blk_fetch_request(q); - if (current_req) + if (current_req) { + current_req->error_count = 0; break; + } } } while (fdc_queue != old_pos); @@ -2866,7 +2868,7 @@ do_request: _floppy = floppy_type + DP->autodetect[DRS->probed_format]; } else probing = 0; - errors = &(current_req->errors); + errors = &(current_req->error_count); tmp = make_raw_rw_request(); if (tmp < 2) { request_done(tmp); @@ -4207,9 +4209,7 @@ static int __init do_floppy_init(void) disks[drive]->fops = &floppy_fops; sprintf(disks[drive]->disk_name, "fd%d", drive); - init_timer(&motor_off_timer[drive]); - motor_off_timer[drive].data = drive; - motor_off_timer[drive].function = motor_off_callback; + setup_timer(&motor_off_timer[drive], motor_off_callback, drive); } err = register_blkdev(FLOPPY_MAJOR, "fd"); diff --git a/drivers/block/hd.c b/drivers/block/hd.c deleted file mode 100644 index 6043648da1e8..000000000000 --- a/drivers/block/hd.c +++ /dev/null @@ -1,803 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * - * This is the low-level hd interrupt support. It traverses the - * request-list, using interrupts to jump between functions. As - * all the functions are called within interrupts, we may not - * sleep. Special care is recommended. - * - * modified by Drew Eckhardt to check nr of hd's from the CMOS. - * - * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug - * in the early extended-partition checks and added DM partitions - * - * IRQ-unmask, drive-id, multiple-mode, support for ">16 heads", - * and general streamlining by Mark Lord. - * - * Removed 99% of above. Use Mark's ide driver for those options. - * This is now a lightweight ST-506 driver. (Paul Gortmaker) - * - * Modified 1995 Russell King for ARM processor. - * - * Bugfix: max_sectors must be <= 255 or the wheels tend to come - * off in a hurry once you queue things up - Paul G. 02/2001 - */ - -/* Uncomment the following if you want verbose error reports. */ -/* #define VERBOSE_ERRORS */ - -#include <linux/blkdev.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/interrupt.h> -#include <linux/timer.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/genhd.h> -#include <linux/string.h> -#include <linux/ioport.h> -#include <linux/init.h> -#include <linux/blkpg.h> -#include <linux/ata.h> -#include <linux/hdreg.h> - -#define HD_IRQ 14 - -#define REALLY_SLOW_IO -#include <asm/io.h> -#include <linux/uaccess.h> - -#ifdef __arm__ -#undef HD_IRQ -#endif -#include <asm/irq.h> -#ifdef __arm__ -#define HD_IRQ IRQ_HARDDISK -#endif - -/* Hd controller regster ports */ - -#define HD_DATA 0x1f0 /* _CTL when writing */ -#define HD_ERROR 0x1f1 /* see err-bits */ -#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */ -#define HD_SECTOR 0x1f3 /* starting sector */ -#define HD_LCYL 0x1f4 /* starting cylinder */ -#define HD_HCYL 0x1f5 /* high byte of starting cyl */ -#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */ -#define HD_STATUS 0x1f7 /* see status-bits */ -#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */ -#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */ -#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */ - -#define HD_CMD 0x3f6 /* used for resets */ -#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */ - -/* Bits of HD_STATUS */ -#define ERR_STAT 0x01 -#define INDEX_STAT 0x02 -#define ECC_STAT 0x04 /* Corrected error */ -#define DRQ_STAT 0x08 -#define SEEK_STAT 0x10 -#define SERVICE_STAT SEEK_STAT -#define WRERR_STAT 0x20 -#define READY_STAT 0x40 -#define BUSY_STAT 0x80 - -/* Bits for HD_ERROR */ -#define MARK_ERR 0x01 /* Bad address mark */ -#define TRK0_ERR 0x02 /* couldn't find track 0 */ -#define ABRT_ERR 0x04 /* Command aborted */ -#define MCR_ERR 0x08 /* media change request */ -#define ID_ERR 0x10 /* ID field not found */ -#define MC_ERR 0x20 /* media changed */ -#define ECC_ERR 0x40 /* Uncorrectable ECC error */ -#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ -#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ - -static DEFINE_SPINLOCK(hd_lock); -static struct request_queue *hd_queue; -static struct request *hd_req; - -#define TIMEOUT_VALUE (6*HZ) -#define HD_DELAY 0 - -#define MAX_ERRORS 16 /* Max read/write errors/sector */ -#define RESET_FREQ 8 /* Reset controller every 8th retry */ -#define RECAL_FREQ 4 /* Recalibrate every 4th retry */ -#define MAX_HD 2 - -#define STAT_OK (READY_STAT|SEEK_STAT) -#define OK_STATUS(s) (((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK) - -static void recal_intr(void); -static void bad_rw_intr(void); - -static int reset; -static int hd_error; - -/* - * This struct defines the HD's and their types. - */ -struct hd_i_struct { - unsigned int head, sect, cyl, wpcom, lzone, ctl; - int unit; - int recalibrate; - int special_op; -}; - -#ifdef HD_TYPE -static struct hd_i_struct hd_info[] = { HD_TYPE }; -static int NR_HD = ARRAY_SIZE(hd_info); -#else -static struct hd_i_struct hd_info[MAX_HD]; -static int NR_HD; -#endif - -static struct gendisk *hd_gendisk[MAX_HD]; - -static struct timer_list device_timer; - -#define TIMEOUT_VALUE (6*HZ) - -#define SET_TIMER \ - do { \ - mod_timer(&device_timer, jiffies + TIMEOUT_VALUE); \ - } while (0) - -static void (*do_hd)(void) = NULL; -#define SET_HANDLER(x) \ -if ((do_hd = (x)) != NULL) \ - SET_TIMER; \ -else \ - del_timer(&device_timer); - - -#if (HD_DELAY > 0) - -#include <linux/i8253.h> - -unsigned long last_req; - -unsigned long read_timer(void) -{ - unsigned long t, flags; - int i; - - raw_spin_lock_irqsave(&i8253_lock, flags); - t = jiffies * 11932; - outb_p(0, 0x43); - i = inb_p(0x40); - i |= inb(0x40) << 8; - raw_spin_unlock_irqrestore(&i8253_lock, flags); - return(t - i); -} -#endif - -static void __init hd_setup(char *str, int *ints) -{ - int hdind = 0; - - if (ints[0] != 3) - return; - if (hd_info[0].head != 0) - hdind = 1; - hd_info[hdind].head = ints[2]; - hd_info[hdind].sect = ints[3]; - hd_info[hdind].cyl = ints[1]; - hd_info[hdind].wpcom = 0; - hd_info[hdind].lzone = ints[1]; - hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0); - NR_HD = hdind+1; -} - -static bool hd_end_request(int err, unsigned int bytes) -{ - if (__blk_end_request(hd_req, err, bytes)) - return true; - hd_req = NULL; - return false; -} - -static bool hd_end_request_cur(int err) -{ - return hd_end_request(err, blk_rq_cur_bytes(hd_req)); -} - -static void dump_status(const char *msg, unsigned int stat) -{ - char *name = "hd?"; - if (hd_req) - name = hd_req->rq_disk->disk_name; - -#ifdef VERBOSE_ERRORS - printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff); - if (stat & BUSY_STAT) printk("Busy "); - if (stat & READY_STAT) printk("DriveReady "); - if (stat & WRERR_STAT) printk("WriteFault "); - if (stat & SEEK_STAT) printk("SeekComplete "); - if (stat & DRQ_STAT) printk("DataRequest "); - if (stat & ECC_STAT) printk("CorrectedError "); - if (stat & INDEX_STAT) printk("Index "); - if (stat & ERR_STAT) printk("Error "); - printk("}\n"); - if ((stat & ERR_STAT) == 0) { - hd_error = 0; - } else { - hd_error = inb(HD_ERROR); - printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff); - if (hd_error & BBD_ERR) printk("BadSector "); - if (hd_error & ECC_ERR) printk("UncorrectableError "); - if (hd_error & ID_ERR) printk("SectorIdNotFound "); - if (hd_error & ABRT_ERR) printk("DriveStatusError "); - if (hd_error & TRK0_ERR) printk("TrackZeroNotFound "); - if (hd_error & MARK_ERR) printk("AddrMarkNotFound "); - printk("}"); - if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) { - printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL), - inb(HD_CURRENT) & 0xf, inb(HD_SECTOR)); - if (hd_req) - printk(", sector=%ld", blk_rq_pos(hd_req)); - } - printk("\n"); - } -#else - printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff); - if ((stat & ERR_STAT) == 0) { - hd_error = 0; - } else { - hd_error = inb(HD_ERROR); - printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff); - } -#endif -} - -static void check_status(void) -{ - int i = inb_p(HD_STATUS); - - if (!OK_STATUS(i)) { - dump_status("check_status", i); - bad_rw_intr(); - } -} - -static int controller_busy(void) -{ - int retries = 100000; - unsigned char status; - - do { - status = inb_p(HD_STATUS); - } while ((status & BUSY_STAT) && --retries); - return status; -} - -static int status_ok(void) -{ - unsigned char status = inb_p(HD_STATUS); - - if (status & BUSY_STAT) - return 1; /* Ancient, but does it make sense??? */ - if (status & WRERR_STAT) - return 0; - if (!(status & READY_STAT)) - return 0; - if (!(status & SEEK_STAT)) - return 0; - return 1; -} - -static int controller_ready(unsigned int drive, unsigned int head) -{ - int retry = 100; - - do { - if (controller_busy() & BUSY_STAT) - return 0; - outb_p(0xA0 | (drive<<4) | head, HD_CURRENT); - if (status_ok()) - return 1; - } while (--retry); - return 0; -} - -static void hd_out(struct hd_i_struct *disk, - unsigned int nsect, - unsigned int sect, - unsigned int head, - unsigned int cyl, - unsigned int cmd, - void (*intr_addr)(void)) -{ - unsigned short port; - -#if (HD_DELAY > 0) - while (read_timer() - last_req < HD_DELAY) - /* nothing */; -#endif - if (reset) - return; - if (!controller_ready(disk->unit, head)) { - reset = 1; - return; - } - SET_HANDLER(intr_addr); - outb_p(disk->ctl, HD_CMD); - port = HD_DATA; - outb_p(disk->wpcom >> 2, ++port); - outb_p(nsect, ++port); - outb_p(sect, ++port); - outb_p(cyl, ++port); - outb_p(cyl >> 8, ++port); - outb_p(0xA0 | (disk->unit << 4) | head, ++port); - outb_p(cmd, ++port); -} - -static void hd_request (void); - -static int drive_busy(void) -{ - unsigned int i; - unsigned char c; - - for (i = 0; i < 500000 ; i++) { - c = inb_p(HD_STATUS); - if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK) - return 0; - } - dump_status("reset timed out", c); - return 1; -} - -static void reset_controller(void) -{ - int i; - - outb_p(4, HD_CMD); - for (i = 0; i < 1000; i++) barrier(); - outb_p(hd_info[0].ctl & 0x0f, HD_CMD); - for (i = 0; i < 1000; i++) barrier(); - if (drive_busy()) - printk("hd: controller still busy\n"); - else if ((hd_error = inb(HD_ERROR)) != 1) - printk("hd: controller reset failed: %02x\n", hd_error); -} - -static void reset_hd(void) -{ - static int i; - -repeat: - if (reset) { - reset = 0; - i = -1; - reset_controller(); - } else { - check_status(); - if (reset) - goto repeat; - } - if (++i < NR_HD) { - struct hd_i_struct *disk = &hd_info[i]; - disk->special_op = disk->recalibrate = 1; - hd_out(disk, disk->sect, disk->sect, disk->head-1, - disk->cyl, ATA_CMD_INIT_DEV_PARAMS, &reset_hd); - if (reset) - goto repeat; - } else - hd_request(); -} - -/* - * Ok, don't know what to do with the unexpected interrupts: on some machines - * doing a reset and a retry seems to result in an eternal loop. Right now I - * ignore it, and just set the timeout. - * - * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the - * drive enters "idle", "standby", or "sleep" mode, so if the status looks - * "good", we just ignore the interrupt completely. - */ -static void unexpected_hd_interrupt(void) -{ - unsigned int stat = inb_p(HD_STATUS); - - if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) { - dump_status("unexpected interrupt", stat); - SET_TIMER; - } -} - -/* - * bad_rw_intr() now tries to be a bit smarter and does things - * according to the error returned by the controller. - * -Mika Liljeberg (liljeber@cs.Helsinki.FI) - */ -static void bad_rw_intr(void) -{ - struct request *req = hd_req; - - if (req != NULL) { - struct hd_i_struct *disk = req->rq_disk->private_data; - if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) { - hd_end_request_cur(-EIO); - disk->special_op = disk->recalibrate = 1; - } else if (req->errors % RESET_FREQ == 0) - reset = 1; - else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0) - disk->special_op = disk->recalibrate = 1; - /* Otherwise just retry */ - } -} - -static inline int wait_DRQ(void) -{ - int retries; - int stat; - - for (retries = 0; retries < 100000; retries++) { - stat = inb_p(HD_STATUS); - if (stat & DRQ_STAT) - return 0; - } - dump_status("wait_DRQ", stat); - return -1; -} - -static void read_intr(void) -{ - struct request *req; - int i, retries = 100000; - - do { - i = (unsigned) inb_p(HD_STATUS); - if (i & BUSY_STAT) - continue; - if (!OK_STATUS(i)) - break; - if (i & DRQ_STAT) - goto ok_to_read; - } while (--retries > 0); - dump_status("read_intr", i); - bad_rw_intr(); - hd_request(); - return; - -ok_to_read: - req = hd_req; - insw(HD_DATA, bio_data(req->bio), 256); -#ifdef DEBUG - printk("%s: read: sector %ld, remaining = %u, buffer=%p\n", - req->rq_disk->disk_name, blk_rq_pos(req) + 1, - blk_rq_sectors(req) - 1, bio_data(req->bio)+512); -#endif - if (hd_end_request(0, 512)) { - SET_HANDLER(&read_intr); - return; - } - - (void) inb_p(HD_STATUS); -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - hd_request(); -} - -static void write_intr(void) -{ - struct request *req = hd_req; - int i; - int retries = 100000; - - do { - i = (unsigned) inb_p(HD_STATUS); - if (i & BUSY_STAT) - continue; - if (!OK_STATUS(i)) - break; - if ((blk_rq_sectors(req) <= 1) || (i & DRQ_STAT)) - goto ok_to_write; - } while (--retries > 0); - dump_status("write_intr", i); - bad_rw_intr(); - hd_request(); - return; - -ok_to_write: - if (hd_end_request(0, 512)) { - SET_HANDLER(&write_intr); - outsw(HD_DATA, bio_data(req->bio), 256); - return; - } - -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - hd_request(); -} - -static void recal_intr(void) -{ - check_status(); -#if (HD_DELAY > 0) - last_req = read_timer(); -#endif - hd_request(); -} - -/* - * This is another of the error-routines I don't know what to do with. The - * best idea seems to just set reset, and start all over again. - */ -static void hd_times_out(unsigned long dummy) -{ - char *name; - - do_hd = NULL; - - if (!hd_req) - return; - - spin_lock_irq(hd_queue->queue_lock); - reset = 1; - name = hd_req->rq_disk->disk_name; - printk("%s: timeout\n", name); - if (++hd_req->errors >= MAX_ERRORS) { -#ifdef DEBUG - printk("%s: too many errors\n", name); -#endif - hd_end_request_cur(-EIO); - } - hd_request(); - spin_unlock_irq(hd_queue->queue_lock); -} - -static int do_special_op(struct hd_i_struct *disk, struct request *req) -{ - if (disk->recalibrate) { - disk->recalibrate = 0; - hd_out(disk, disk->sect, 0, 0, 0, ATA_CMD_RESTORE, &recal_intr); - return reset; - } - if (disk->head > 16) { - printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name); - hd_end_request_cur(-EIO); - } - disk->special_op = 0; - return 1; -} - -/* - * The driver enables interrupts as much as possible. In order to do this, - * (a) the device-interrupt is disabled before entering hd_request(), - * and (b) the timeout-interrupt is disabled before the sti(). - * - * Interrupts are still masked (by default) whenever we are exchanging - * data/cmds with a drive, because some drives seem to have very poor - * tolerance for latency during I/O. The IDE driver has support to unmask - * interrupts for non-broken hardware, so use that driver if required. - */ -static void hd_request(void) -{ - unsigned int block, nsect, sec, track, head, cyl; - struct hd_i_struct *disk; - struct request *req; - - if (do_hd) - return; -repeat: - del_timer(&device_timer); - - if (!hd_req) { - hd_req = blk_fetch_request(hd_queue); - if (!hd_req) { - do_hd = NULL; - return; - } - } - req = hd_req; - - if (reset) { - reset_hd(); - return; - } - disk = req->rq_disk->private_data; - block = blk_rq_pos(req); - nsect = blk_rq_sectors(req); - if (block >= get_capacity(req->rq_disk) || - ((block+nsect) > get_capacity(req->rq_disk))) { - printk("%s: bad access: block=%d, count=%d\n", - req->rq_disk->disk_name, block, nsect); - hd_end_request_cur(-EIO); - goto repeat; - } - - if (disk->special_op) { - if (do_special_op(disk, req)) - goto repeat; - return; - } - sec = block % disk->sect + 1; - track = block / disk->sect; - head = track % disk->head; - cyl = track / disk->head; -#ifdef DEBUG - printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n", - req->rq_disk->disk_name, - req_data_dir(req) == READ ? "read" : "writ", - cyl, head, sec, nsect, bio_data(req->bio)); -#endif - - switch (req_op(req)) { - case REQ_OP_READ: - hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ, - &read_intr); - if (reset) - goto repeat; - break; - case REQ_OP_WRITE: - hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE, - &write_intr); - if (reset) - goto repeat; - if (wait_DRQ()) { - bad_rw_intr(); - goto repeat; - } - outsw(HD_DATA, bio_data(req->bio), 256); - break; - default: - printk("unknown hd-command\n"); - hd_end_request_cur(-EIO); - break; - } -} - -static void do_hd_request(struct request_queue *q) -{ - hd_request(); -} - -static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct hd_i_struct *disk = bdev->bd_disk->private_data; - - geo->heads = disk->head; - geo->sectors = disk->sect; - geo->cylinders = disk->cyl; - return 0; -} - -/* - * Releasing a block device means we sync() it, so that it can safely - * be forgotten about... - */ - -static irqreturn_t hd_interrupt(int irq, void *dev_id) -{ - void (*handler)(void) = do_hd; - - spin_lock(hd_queue->queue_lock); - - do_hd = NULL; - del_timer(&device_timer); - if (!handler) - handler = unexpected_hd_interrupt; - handler(); - - spin_unlock(hd_queue->queue_lock); - - return IRQ_HANDLED; -} - -static const struct block_device_operations hd_fops = { - .getgeo = hd_getgeo, -}; - -static int __init hd_init(void) -{ - int drive; - - if (register_blkdev(HD_MAJOR, "hd")) - return -1; - - hd_queue = blk_init_queue(do_hd_request, &hd_lock); - if (!hd_queue) { - unregister_blkdev(HD_MAJOR, "hd"); - return -ENOMEM; - } - - blk_queue_max_hw_sectors(hd_queue, 255); - init_timer(&device_timer); - device_timer.function = hd_times_out; - blk_queue_logical_block_size(hd_queue, 512); - - if (!NR_HD) { - /* - * We don't know anything about the drive. This means - * that you *MUST* specify the drive parameters to the - * kernel yourself. - * - * If we were on an i386, we used to read this info from - * the BIOS or CMOS. This doesn't work all that well, - * since this assumes that this is a primary or secondary - * drive, and if we're using this legacy driver, it's - * probably an auxiliary controller added to recover - * legacy data off an ST-506 drive. Either way, it's - * definitely safest to have the user explicitly specify - * the information. - */ - printk("hd: no drives specified - use hd=cyl,head,sectors" - " on kernel command line\n"); - goto out; - } - - for (drive = 0 ; drive < NR_HD ; drive++) { - struct gendisk *disk = alloc_disk(64); - struct hd_i_struct *p = &hd_info[drive]; - if (!disk) - goto Enomem; - disk->major = HD_MAJOR; - disk->first_minor = drive << 6; - disk->fops = &hd_fops; - sprintf(disk->disk_name, "hd%c", 'a'+drive); - disk->private_data = p; - set_capacity(disk, p->head * p->sect * p->cyl); - disk->queue = hd_queue; - p->unit = drive; - hd_gendisk[drive] = disk; - printk("%s: %luMB, CHS=%d/%d/%d\n", - disk->disk_name, (unsigned long)get_capacity(disk)/2048, - p->cyl, p->head, p->sect); - } - - if (request_irq(HD_IRQ, hd_interrupt, 0, "hd", NULL)) { - printk("hd: unable to get IRQ%d for the hard disk driver\n", - HD_IRQ); - goto out1; - } - if (!request_region(HD_DATA, 8, "hd")) { - printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA); - goto out2; - } - if (!request_region(HD_CMD, 1, "hd(cmd)")) { - printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD); - goto out3; - } - - /* Let them fly */ - for (drive = 0; drive < NR_HD; drive++) - add_disk(hd_gendisk[drive]); - - return 0; - -out3: - release_region(HD_DATA, 8); -out2: - free_irq(HD_IRQ, NULL); -out1: - for (drive = 0; drive < NR_HD; drive++) - put_disk(hd_gendisk[drive]); - NR_HD = 0; -out: - del_timer(&device_timer); - unregister_blkdev(HD_MAJOR, "hd"); - blk_cleanup_queue(hd_queue); - return -1; -Enomem: - while (drive--) - put_disk(hd_gendisk[drive]); - goto out; -} - -static int __init parse_hd_setup(char *line) -{ - int ints[6]; - - (void) get_options(line, ARRAY_SIZE(ints), ints); - hd_setup(NULL, ints); - - return 1; -} -__setup("hd=", parse_hd_setup); - -late_initcall(hd_init); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 0ecb6461ed81..994403efee19 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -445,32 +445,27 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq) return ret; } -static inline void handle_partial_read(struct loop_cmd *cmd, long bytes) +static void lo_complete_rq(struct request *rq) { - if (bytes < 0 || op_is_write(req_op(cmd->rq))) - return; + struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); - if (unlikely(bytes < blk_rq_bytes(cmd->rq))) { + if (unlikely(req_op(cmd->rq) == REQ_OP_READ && cmd->use_aio && + cmd->ret >= 0 && cmd->ret < blk_rq_bytes(cmd->rq))) { struct bio *bio = cmd->rq->bio; - bio_advance(bio, bytes); + bio_advance(bio, cmd->ret); zero_fill_bio(bio); } + + blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0); } static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) { struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); - struct request *rq = cmd->rq; - - handle_partial_read(cmd, ret); - if (ret > 0) - ret = 0; - else if (ret < 0) - ret = -EIO; - - blk_mq_complete_request(rq, ret); + cmd->ret = ret; + blk_mq_complete_request(cmd->rq); } static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, @@ -528,6 +523,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) case REQ_OP_FLUSH: return lo_req_flush(lo, rq); case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: return lo_discard(lo, rq, pos); case REQ_OP_WRITE: if (lo->transfer) @@ -826,7 +822,7 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_granularity = 0; q->limits.discard_alignment = 0; blk_queue_max_discard_sectors(q, 0); - q->limits.discard_zeroes_data = 0; + blk_queue_max_write_zeroes_sectors(q, 0); queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); return; } @@ -834,7 +830,7 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_granularity = inode->i_sb->s_blocksize; q->limits.discard_alignment = 0; blk_queue_max_discard_sectors(q, UINT_MAX >> 9); - q->limits.discard_zeroes_data = 1; + blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); } @@ -1660,6 +1656,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, switch (req_op(cmd->rq)) { case REQ_OP_FLUSH: case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: cmd->use_aio = false; break; default: @@ -1686,8 +1683,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd) ret = do_req_filebacked(lo, cmd->rq); failed: /* complete non-aio request */ - if (!cmd->use_aio || ret) - blk_mq_complete_request(cmd->rq, ret ? -EIO : 0); + if (!cmd->use_aio || ret) { + cmd->ret = ret ? -EIO : 0; + blk_mq_complete_request(cmd->rq); + } } static void loop_queue_work(struct kthread_work *work) @@ -1710,9 +1709,10 @@ static int loop_init_request(void *data, struct request *rq, return 0; } -static struct blk_mq_ops loop_mq_ops = { +static const struct blk_mq_ops loop_mq_ops = { .queue_rq = loop_queue_rq, .init_request = loop_init_request, + .complete = lo_complete_rq, }; static int loop_add(struct loop_device **l, int i) diff --git a/drivers/block/loop.h b/drivers/block/loop.h index fb2237c73e61..fecd3f97ef8c 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -70,6 +70,7 @@ struct loop_cmd { struct request *rq; struct list_head list; bool use_aio; /* use AIO interface to handle I/O */ + long ret; struct kiocb iocb; }; diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c deleted file mode 100644 index 286f276f586e..000000000000 --- a/drivers/block/mg_disk.c +++ /dev/null @@ -1,1112 +0,0 @@ -/* - * drivers/block/mg_disk.c - * - * Support for the mGine m[g]flash IO mode. - * Based on legacy hd.c - * - * (c) 2008 mGine Co.,LTD - * (c) 2008 unsik Kim <donari75@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/blkdev.h> -#include <linux/hdreg.h> -#include <linux/ata.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/platform_device.h> -#include <linux/gpio.h> -#include <linux/mg_disk.h> -#include <linux/slab.h> - -#define MG_RES_SEC (CONFIG_MG_DISK_RES << 1) - -/* name for block device */ -#define MG_DISK_NAME "mgd" - -#define MG_DISK_MAJ 0 -#define MG_DISK_MAX_PART 16 -#define MG_SECTOR_SIZE 512 -#define MG_MAX_SECTS 256 - -/* Register offsets */ -#define MG_BUFF_OFFSET 0x8000 -#define MG_REG_OFFSET 0xC000 -#define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ -#define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ -#define MG_REG_SECT_CNT (MG_REG_OFFSET + 4) -#define MG_REG_SECT_NUM (MG_REG_OFFSET + 6) -#define MG_REG_CYL_LOW (MG_REG_OFFSET + 8) -#define MG_REG_CYL_HIGH (MG_REG_OFFSET + 0xA) -#define MG_REG_DRV_HEAD (MG_REG_OFFSET + 0xC) -#define MG_REG_COMMAND (MG_REG_OFFSET + 0xE) /* write case */ -#define MG_REG_STATUS (MG_REG_OFFSET + 0xE) /* read case */ -#define MG_REG_DRV_CTRL (MG_REG_OFFSET + 0x10) -#define MG_REG_BURST_CTRL (MG_REG_OFFSET + 0x12) - -/* handy status */ -#define MG_STAT_READY (ATA_DRDY | ATA_DSC) -#define MG_READY_OK(s) (((s) & (MG_STAT_READY | (ATA_BUSY | ATA_DF | \ - ATA_ERR))) == MG_STAT_READY) - -/* error code for others */ -#define MG_ERR_NONE 0 -#define MG_ERR_TIMEOUT 0x100 -#define MG_ERR_INIT_STAT 0x101 -#define MG_ERR_TRANSLATION 0x102 -#define MG_ERR_CTRL_RST 0x103 -#define MG_ERR_INV_STAT 0x104 -#define MG_ERR_RSTOUT 0x105 - -#define MG_MAX_ERRORS 6 /* Max read/write errors */ - -/* command */ -#define MG_CMD_RD 0x20 -#define MG_CMD_WR 0x30 -#define MG_CMD_SLEEP 0x99 -#define MG_CMD_WAKEUP 0xC3 -#define MG_CMD_ID 0xEC -#define MG_CMD_WR_CONF 0x3C -#define MG_CMD_RD_CONF 0x40 - -/* operation mode */ -#define MG_OP_CASCADE (1 << 0) -#define MG_OP_CASCADE_SYNC_RD (1 << 1) -#define MG_OP_CASCADE_SYNC_WR (1 << 2) -#define MG_OP_INTERLEAVE (1 << 3) - -/* synchronous */ -#define MG_BURST_LAT_4 (3 << 4) -#define MG_BURST_LAT_5 (4 << 4) -#define MG_BURST_LAT_6 (5 << 4) -#define MG_BURST_LAT_7 (6 << 4) -#define MG_BURST_LAT_8 (7 << 4) -#define MG_BURST_LEN_4 (1 << 1) -#define MG_BURST_LEN_8 (2 << 1) -#define MG_BURST_LEN_16 (3 << 1) -#define MG_BURST_LEN_32 (4 << 1) -#define MG_BURST_LEN_CONT (0 << 1) - -/* timeout value (unit: ms) */ -#define MG_TMAX_CONF_TO_CMD 1 -#define MG_TMAX_WAIT_RD_DRQ 10 -#define MG_TMAX_WAIT_WR_DRQ 500 -#define MG_TMAX_RST_TO_BUSY 10 -#define MG_TMAX_HDRST_TO_RDY 500 -#define MG_TMAX_SWRST_TO_RDY 500 -#define MG_TMAX_RSTOUT 3000 - -#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST) - -/* main structure for mflash driver */ -struct mg_host { - struct device *dev; - - struct request_queue *breq; - struct request *req; - spinlock_t lock; - struct gendisk *gd; - - struct timer_list timer; - void (*mg_do_intr) (struct mg_host *); - - u16 id[ATA_ID_WORDS]; - - u16 cyls; - u16 heads; - u16 sectors; - u32 n_sectors; - u32 nres_sectors; - - void __iomem *dev_base; - unsigned int irq; - unsigned int rst; - unsigned int rstout; - - u32 major; - u32 error; -}; - -/* - * Debugging macro and defines - */ -#undef DO_MG_DEBUG -#ifdef DO_MG_DEBUG -# define MG_DBG(fmt, args...) \ - printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args) -#else /* CONFIG_MG_DEBUG */ -# define MG_DBG(fmt, args...) do { } while (0) -#endif /* CONFIG_MG_DEBUG */ - -static void mg_request(struct request_queue *); - -static bool mg_end_request(struct mg_host *host, int err, unsigned int nr_bytes) -{ - if (__blk_end_request(host->req, err, nr_bytes)) - return true; - - host->req = NULL; - return false; -} - -static bool mg_end_request_cur(struct mg_host *host, int err) -{ - return mg_end_request(host, err, blk_rq_cur_bytes(host->req)); -} - -static void mg_dump_status(const char *msg, unsigned int stat, - struct mg_host *host) -{ - char *name = MG_DISK_NAME; - - if (host->req) - name = host->req->rq_disk->disk_name; - - printk(KERN_ERR "%s: %s: status=0x%02x { ", name, msg, stat & 0xff); - if (stat & ATA_BUSY) - printk("Busy "); - if (stat & ATA_DRDY) - printk("DriveReady "); - if (stat & ATA_DF) - printk("WriteFault "); - if (stat & ATA_DSC) - printk("SeekComplete "); - if (stat & ATA_DRQ) - printk("DataRequest "); - if (stat & ATA_CORR) - printk("CorrectedError "); - if (stat & ATA_ERR) - printk("Error "); - printk("}\n"); - if ((stat & ATA_ERR) == 0) { - host->error = 0; - } else { - host->error = inb((unsigned long)host->dev_base + MG_REG_ERROR); - printk(KERN_ERR "%s: %s: error=0x%02x { ", name, msg, - host->error & 0xff); - if (host->error & ATA_BBK) - printk("BadSector "); - if (host->error & ATA_UNC) - printk("UncorrectableError "); - if (host->error & ATA_IDNF) - printk("SectorIdNotFound "); - if (host->error & ATA_ABORTED) - printk("DriveStatusError "); - if (host->error & ATA_AMNF) - printk("AddrMarkNotFound "); - printk("}"); - if (host->error & (ATA_BBK | ATA_UNC | ATA_IDNF | ATA_AMNF)) { - if (host->req) - printk(", sector=%u", - (unsigned int)blk_rq_pos(host->req)); - } - printk("\n"); - } -} - -static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec) -{ - u8 status; - unsigned long expire, cur_jiffies; - struct mg_drv_data *prv_data = host->dev->platform_data; - - host->error = MG_ERR_NONE; - expire = jiffies + msecs_to_jiffies(msec); - - /* These 2 times dummy status read prevents reading invalid - * status. A very little time (3 times of mflash operating clk) - * is required for busy bit is set. Use dummy read instead of - * busy wait, because mflash's PLL is machine dependent. - */ - if (prv_data->use_polling) { - status = inb((unsigned long)host->dev_base + MG_REG_STATUS); - status = inb((unsigned long)host->dev_base + MG_REG_STATUS); - } - - status = inb((unsigned long)host->dev_base + MG_REG_STATUS); - - do { - cur_jiffies = jiffies; - if (status & ATA_BUSY) { - if (expect == ATA_BUSY) - break; - } else { - /* Check the error condition! */ - if (status & ATA_ERR) { - mg_dump_status("mg_wait", status, host); - break; - } - - if (expect == MG_STAT_READY) - if (MG_READY_OK(status)) - break; - - if (expect == ATA_DRQ) - if (status & ATA_DRQ) - break; - } - if (!msec) { - mg_dump_status("not ready", status, host); - return MG_ERR_INV_STAT; - } - - status = inb((unsigned long)host->dev_base + MG_REG_STATUS); - } while (time_before(cur_jiffies, expire)); - - if (time_after_eq(cur_jiffies, expire) && msec) - host->error = MG_ERR_TIMEOUT; - - return host->error; -} - -static unsigned int mg_wait_rstout(u32 rstout, u32 msec) -{ - unsigned long expire; - - expire = jiffies + msecs_to_jiffies(msec); - while (time_before(jiffies, expire)) { - if (gpio_get_value(rstout) == 1) - return MG_ERR_NONE; - msleep(10); - } - - return MG_ERR_RSTOUT; -} - -static void mg_unexpected_intr(struct mg_host *host) -{ - u32 status = inb((unsigned long)host->dev_base + MG_REG_STATUS); - - mg_dump_status("mg_unexpected_intr", status, host); -} - -static irqreturn_t mg_irq(int irq, void *dev_id) -{ - struct mg_host *host = dev_id; - void (*handler)(struct mg_host *) = host->mg_do_intr; - - spin_lock(&host->lock); - - host->mg_do_intr = NULL; - del_timer(&host->timer); - if (!handler) - handler = mg_unexpected_intr; - handler(host); - - spin_unlock(&host->lock); - - return IRQ_HANDLED; -} - -/* local copy of ata_id_string() */ -static void mg_id_string(const u16 *id, unsigned char *s, - unsigned int ofs, unsigned int len) -{ - unsigned int c; - - BUG_ON(len & 1); - - while (len > 0) { - c = id[ofs] >> 8; - *s = c; - s++; - - c = id[ofs] & 0xff; - *s = c; - s++; - - ofs++; - len -= 2; - } -} - -/* local copy of ata_id_c_string() */ -static void mg_id_c_string(const u16 *id, unsigned char *s, - unsigned int ofs, unsigned int len) -{ - unsigned char *p; - - mg_id_string(id, s, ofs, len - 1); - - p = s + strnlen(s, len - 1); - while (p > s && p[-1] == ' ') - p--; - *p = '\0'; -} - -static int mg_get_disk_id(struct mg_host *host) -{ - u32 i; - s32 err; - const u16 *id = host->id; - struct mg_drv_data *prv_data = host->dev->platform_data; - char fwrev[ATA_ID_FW_REV_LEN + 1]; - char model[ATA_ID_PROD_LEN + 1]; - char serial[ATA_ID_SERNO_LEN + 1]; - - if (!prv_data->use_polling) - outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - - outb(MG_CMD_ID, (unsigned long)host->dev_base + MG_REG_COMMAND); - err = mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_RD_DRQ); - if (err) - return err; - - for (i = 0; i < (MG_SECTOR_SIZE >> 1); i++) - host->id[i] = le16_to_cpu(inw((unsigned long)host->dev_base + - MG_BUFF_OFFSET + i * 2)); - - outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); - err = mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD); - if (err) - return err; - - if ((id[ATA_ID_FIELD_VALID] & 1) == 0) - return MG_ERR_TRANSLATION; - - host->n_sectors = ata_id_u32(id, ATA_ID_LBA_CAPACITY); - host->cyls = id[ATA_ID_CYLS]; - host->heads = id[ATA_ID_HEADS]; - host->sectors = id[ATA_ID_SECTORS]; - - if (MG_RES_SEC && host->heads && host->sectors) { - /* modify cyls, n_sectors */ - host->cyls = (host->n_sectors - MG_RES_SEC) / - host->heads / host->sectors; - host->nres_sectors = host->n_sectors - host->cyls * - host->heads * host->sectors; - host->n_sectors -= host->nres_sectors; - } - - mg_id_c_string(id, fwrev, ATA_ID_FW_REV, sizeof(fwrev)); - mg_id_c_string(id, model, ATA_ID_PROD, sizeof(model)); - mg_id_c_string(id, serial, ATA_ID_SERNO, sizeof(serial)); - printk(KERN_INFO "mg_disk: model: %s\n", model); - printk(KERN_INFO "mg_disk: firm: %.8s\n", fwrev); - printk(KERN_INFO "mg_disk: serial: %s\n", serial); - printk(KERN_INFO "mg_disk: %d + reserved %d sectors\n", - host->n_sectors, host->nres_sectors); - - if (!prv_data->use_polling) - outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - - return err; -} - - -static int mg_disk_init(struct mg_host *host) -{ - struct mg_drv_data *prv_data = host->dev->platform_data; - s32 err; - u8 init_status; - - /* hdd rst low */ - gpio_set_value(host->rst, 0); - err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY); - if (err) - return err; - - /* hdd rst high */ - gpio_set_value(host->rst, 1); - err = mg_wait(host, MG_STAT_READY, MG_TMAX_HDRST_TO_RDY); - if (err) - return err; - - /* soft reset on */ - outb(ATA_SRST | (prv_data->use_polling ? ATA_NIEN : 0), - (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY); - if (err) - return err; - - /* soft reset off */ - outb(prv_data->use_polling ? ATA_NIEN : 0, - (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - err = mg_wait(host, MG_STAT_READY, MG_TMAX_SWRST_TO_RDY); - if (err) - return err; - - init_status = inb((unsigned long)host->dev_base + MG_REG_STATUS) & 0xf; - - if (init_status == 0xf) - return MG_ERR_INIT_STAT; - - return err; -} - -static void mg_bad_rw_intr(struct mg_host *host) -{ - if (host->req) - if (++host->req->errors >= MG_MAX_ERRORS || - host->error == MG_ERR_TIMEOUT) - mg_end_request_cur(host, -EIO); -} - -static unsigned int mg_out(struct mg_host *host, - unsigned int sect_num, - unsigned int sect_cnt, - unsigned int cmd, - void (*intr_addr)(struct mg_host *)) -{ - struct mg_drv_data *prv_data = host->dev->platform_data; - - if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) - return host->error; - - if (!prv_data->use_polling) { - host->mg_do_intr = intr_addr; - mod_timer(&host->timer, jiffies + 3 * HZ); - } - if (MG_RES_SEC) - sect_num += MG_RES_SEC; - outb((u8)sect_cnt, (unsigned long)host->dev_base + MG_REG_SECT_CNT); - outb((u8)sect_num, (unsigned long)host->dev_base + MG_REG_SECT_NUM); - outb((u8)(sect_num >> 8), (unsigned long)host->dev_base + - MG_REG_CYL_LOW); - outb((u8)(sect_num >> 16), (unsigned long)host->dev_base + - MG_REG_CYL_HIGH); - outb((u8)((sect_num >> 24) | ATA_LBA | ATA_DEVICE_OBS), - (unsigned long)host->dev_base + MG_REG_DRV_HEAD); - outb(cmd, (unsigned long)host->dev_base + MG_REG_COMMAND); - return MG_ERR_NONE; -} - -static void mg_read_one(struct mg_host *host, struct request *req) -{ - u16 *buff = (u16 *)bio_data(req->bio); - u32 i; - - for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) - *buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET + - (i << 1)); -} - -static void mg_read(struct request *req) -{ - struct mg_host *host = req->rq_disk->private_data; - - if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req), - MG_CMD_RD, NULL) != MG_ERR_NONE) - mg_bad_rw_intr(host); - - MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", - blk_rq_sectors(req), blk_rq_pos(req), bio_data(req->bio)); - - do { - if (mg_wait(host, ATA_DRQ, - MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return; - } - - mg_read_one(host, req); - - outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + - MG_REG_COMMAND); - } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); -} - -static void mg_write_one(struct mg_host *host, struct request *req) -{ - u16 *buff = (u16 *)bio_data(req->bio); - u32 i; - - for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) - outw(*buff++, (unsigned long)host->dev_base + MG_BUFF_OFFSET + - (i << 1)); -} - -static void mg_write(struct request *req) -{ - struct mg_host *host = req->rq_disk->private_data; - unsigned int rem = blk_rq_sectors(req); - - if (mg_out(host, blk_rq_pos(req), rem, - MG_CMD_WR, NULL) != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return; - } - - MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", - rem, blk_rq_pos(req), bio_data(req->bio)); - - if (mg_wait(host, ATA_DRQ, - MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return; - } - - do { - mg_write_one(host, req); - - outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + - MG_REG_COMMAND); - - rem--; - if (rem > 1 && mg_wait(host, ATA_DRQ, - MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return; - } else if (mg_wait(host, MG_STAT_READY, - MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return; - } - } while (mg_end_request(host, 0, MG_SECTOR_SIZE)); -} - -static void mg_read_intr(struct mg_host *host) -{ - struct request *req = host->req; - u32 i; - - /* check status */ - do { - i = inb((unsigned long)host->dev_base + MG_REG_STATUS); - if (i & ATA_BUSY) - break; - if (!MG_READY_OK(i)) - break; - if (i & ATA_DRQ) - goto ok_to_read; - } while (0); - mg_dump_status("mg_read_intr", i, host); - mg_bad_rw_intr(host); - mg_request(host->breq); - return; - -ok_to_read: - mg_read_one(host, req); - - MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", - blk_rq_pos(req), blk_rq_sectors(req) - 1, bio_data(req->bio)); - - /* send read confirm */ - outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); - - if (mg_end_request(host, 0, MG_SECTOR_SIZE)) { - /* set handler if read remains */ - host->mg_do_intr = mg_read_intr; - mod_timer(&host->timer, jiffies + 3 * HZ); - } else /* goto next request */ - mg_request(host->breq); -} - -static void mg_write_intr(struct mg_host *host) -{ - struct request *req = host->req; - u32 i; - bool rem; - - /* check status */ - do { - i = inb((unsigned long)host->dev_base + MG_REG_STATUS); - if (i & ATA_BUSY) - break; - if (!MG_READY_OK(i)) - break; - if ((blk_rq_sectors(req) <= 1) || (i & ATA_DRQ)) - goto ok_to_write; - } while (0); - mg_dump_status("mg_write_intr", i, host); - mg_bad_rw_intr(host); - mg_request(host->breq); - return; - -ok_to_write: - if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) { - /* write 1 sector and set handler if remains */ - mg_write_one(host, req); - MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", - blk_rq_pos(req), blk_rq_sectors(req), bio_data(req->bio)); - host->mg_do_intr = mg_write_intr; - mod_timer(&host->timer, jiffies + 3 * HZ); - } - - /* send write confirm */ - outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); - - if (!rem) - mg_request(host->breq); -} - -static void mg_times_out(unsigned long data) -{ - struct mg_host *host = (struct mg_host *)data; - char *name; - - spin_lock_irq(&host->lock); - - if (!host->req) - goto out_unlock; - - host->mg_do_intr = NULL; - - name = host->req->rq_disk->disk_name; - printk(KERN_DEBUG "%s: timeout\n", name); - - host->error = MG_ERR_TIMEOUT; - mg_bad_rw_intr(host); - -out_unlock: - mg_request(host->breq); - spin_unlock_irq(&host->lock); -} - -static void mg_request_poll(struct request_queue *q) -{ - struct mg_host *host = q->queuedata; - - while (1) { - if (!host->req) { - host->req = blk_fetch_request(q); - if (!host->req) - break; - } - - switch (req_op(host->req)) { - case REQ_OP_READ: - mg_read(host->req); - break; - case REQ_OP_WRITE: - mg_write(host->req); - break; - default: - mg_end_request_cur(host, -EIO); - break; - } - } -} - -static unsigned int mg_issue_req(struct request *req, - struct mg_host *host, - unsigned int sect_num, - unsigned int sect_cnt) -{ - switch (req_op(host->req)) { - case REQ_OP_READ: - if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) - != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return host->error; - } - break; - case REQ_OP_WRITE: - /* TODO : handler */ - outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr) - != MG_ERR_NONE) { - mg_bad_rw_intr(host); - return host->error; - } - del_timer(&host->timer); - mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ); - outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - if (host->error) { - mg_bad_rw_intr(host); - return host->error; - } - mg_write_one(host, req); - mod_timer(&host->timer, jiffies + 3 * HZ); - outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + - MG_REG_COMMAND); - break; - default: - mg_end_request_cur(host, -EIO); - break; - } - return MG_ERR_NONE; -} - -/* This function also called from IRQ context */ -static void mg_request(struct request_queue *q) -{ - struct mg_host *host = q->queuedata; - struct request *req; - u32 sect_num, sect_cnt; - - while (1) { - if (!host->req) { - host->req = blk_fetch_request(q); - if (!host->req) - break; - } - req = host->req; - - /* check unwanted request call */ - if (host->mg_do_intr) - return; - - del_timer(&host->timer); - - sect_num = blk_rq_pos(req); - /* deal whole segments */ - sect_cnt = blk_rq_sectors(req); - - /* sanity check */ - if (sect_num >= get_capacity(req->rq_disk) || - ((sect_num + sect_cnt) > - get_capacity(req->rq_disk))) { - printk(KERN_WARNING - "%s: bad access: sector=%d, count=%d\n", - req->rq_disk->disk_name, - sect_num, sect_cnt); - mg_end_request_cur(host, -EIO); - continue; - } - - if (!mg_issue_req(req, host, sect_num, sect_cnt)) - return; - } -} - -static int mg_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct mg_host *host = bdev->bd_disk->private_data; - - geo->cylinders = (unsigned short)host->cyls; - geo->heads = (unsigned char)host->heads; - geo->sectors = (unsigned char)host->sectors; - return 0; -} - -static const struct block_device_operations mg_disk_ops = { - .getgeo = mg_getgeo -}; - -#ifdef CONFIG_PM_SLEEP -static int mg_suspend(struct device *dev) -{ - struct mg_drv_data *prv_data = dev->platform_data; - struct mg_host *host = prv_data->host; - - if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) - return -EIO; - - if (!prv_data->use_polling) - outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - - outb(MG_CMD_SLEEP, (unsigned long)host->dev_base + MG_REG_COMMAND); - /* wait until mflash deep sleep */ - msleep(1); - - if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) { - if (!prv_data->use_polling) - outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - return -EIO; - } - - return 0; -} - -static int mg_resume(struct device *dev) -{ - struct mg_drv_data *prv_data = dev->platform_data; - struct mg_host *host = prv_data->host; - - if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) - return -EIO; - - outb(MG_CMD_WAKEUP, (unsigned long)host->dev_base + MG_REG_COMMAND); - /* wait until mflash wakeup */ - msleep(1); - - if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) - return -EIO; - - if (!prv_data->use_polling) - outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - - return 0; -} -#endif - -static SIMPLE_DEV_PM_OPS(mg_pm, mg_suspend, mg_resume); - -static int mg_probe(struct platform_device *plat_dev) -{ - struct mg_host *host; - struct resource *rsc; - struct mg_drv_data *prv_data = plat_dev->dev.platform_data; - int err = 0; - - if (!prv_data) { - printk(KERN_ERR "%s:%d fail (no driver_data)\n", - __func__, __LINE__); - err = -EINVAL; - goto probe_err; - } - - /* alloc mg_host */ - host = kzalloc(sizeof(struct mg_host), GFP_KERNEL); - if (!host) { - printk(KERN_ERR "%s:%d fail (no memory for mg_host)\n", - __func__, __LINE__); - err = -ENOMEM; - goto probe_err; - } - host->major = MG_DISK_MAJ; - - /* link each other */ - prv_data->host = host; - host->dev = &plat_dev->dev; - - /* io remap */ - rsc = platform_get_resource(plat_dev, IORESOURCE_MEM, 0); - if (!rsc) { - printk(KERN_ERR "%s:%d platform_get_resource fail\n", - __func__, __LINE__); - err = -EINVAL; - goto probe_err_2; - } - host->dev_base = ioremap(rsc->start, resource_size(rsc)); - if (!host->dev_base) { - printk(KERN_ERR "%s:%d ioremap fail\n", - __func__, __LINE__); - err = -EIO; - goto probe_err_2; - } - MG_DBG("dev_base = 0x%x\n", (u32)host->dev_base); - - /* get reset pin */ - rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO, - MG_RST_PIN); - if (!rsc) { - printk(KERN_ERR "%s:%d get reset pin fail\n", - __func__, __LINE__); - err = -EIO; - goto probe_err_3; - } - host->rst = rsc->start; - - /* init rst pin */ - err = gpio_request(host->rst, MG_RST_PIN); - if (err) - goto probe_err_3; - gpio_direction_output(host->rst, 1); - - /* reset out pin */ - if (!(prv_data->dev_attr & MG_DEV_MASK)) { - err = -EINVAL; - goto probe_err_3a; - } - - if (prv_data->dev_attr != MG_BOOT_DEV) { - rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO, - MG_RSTOUT_PIN); - if (!rsc) { - printk(KERN_ERR "%s:%d get reset-out pin fail\n", - __func__, __LINE__); - err = -EIO; - goto probe_err_3a; - } - host->rstout = rsc->start; - err = gpio_request(host->rstout, MG_RSTOUT_PIN); - if (err) - goto probe_err_3a; - gpio_direction_input(host->rstout); - } - - /* disk reset */ - if (prv_data->dev_attr == MG_STORAGE_DEV) { - /* If POR seq. not yet finished, wait */ - err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT); - if (err) - goto probe_err_3b; - err = mg_disk_init(host); - if (err) { - printk(KERN_ERR "%s:%d fail (err code : %d)\n", - __func__, __LINE__, err); - err = -EIO; - goto probe_err_3b; - } - } - - /* get irq resource */ - if (!prv_data->use_polling) { - host->irq = platform_get_irq(plat_dev, 0); - if (host->irq == -ENXIO) { - err = host->irq; - goto probe_err_3b; - } - err = request_irq(host->irq, mg_irq, - IRQF_TRIGGER_RISING, - MG_DEV_NAME, host); - if (err) { - printk(KERN_ERR "%s:%d fail (request_irq err=%d)\n", - __func__, __LINE__, err); - goto probe_err_3b; - } - - } - - /* get disk id */ - err = mg_get_disk_id(host); - if (err) { - printk(KERN_ERR "%s:%d fail (err code : %d)\n", - __func__, __LINE__, err); - err = -EIO; - goto probe_err_4; - } - - err = register_blkdev(host->major, MG_DISK_NAME); - if (err < 0) { - printk(KERN_ERR "%s:%d register_blkdev fail (err code : %d)\n", - __func__, __LINE__, err); - goto probe_err_4; - } - if (!host->major) - host->major = err; - - spin_lock_init(&host->lock); - - if (prv_data->use_polling) - host->breq = blk_init_queue(mg_request_poll, &host->lock); - else - host->breq = blk_init_queue(mg_request, &host->lock); - - if (!host->breq) { - err = -ENOMEM; - printk(KERN_ERR "%s:%d (blk_init_queue) fail\n", - __func__, __LINE__); - goto probe_err_5; - } - host->breq->queuedata = host; - - /* mflash is random device, thanx for the noop */ - err = elevator_change(host->breq, "noop"); - if (err) { - printk(KERN_ERR "%s:%d (elevator_init) fail\n", - __func__, __LINE__); - goto probe_err_6; - } - blk_queue_max_hw_sectors(host->breq, MG_MAX_SECTS); - blk_queue_logical_block_size(host->breq, MG_SECTOR_SIZE); - - init_timer(&host->timer); - host->timer.function = mg_times_out; - host->timer.data = (unsigned long)host; - - host->gd = alloc_disk(MG_DISK_MAX_PART); - if (!host->gd) { - printk(KERN_ERR "%s:%d (alloc_disk) fail\n", - __func__, __LINE__); - err = -ENOMEM; - goto probe_err_7; - } - host->gd->major = host->major; - host->gd->first_minor = 0; - host->gd->fops = &mg_disk_ops; - host->gd->queue = host->breq; - host->gd->private_data = host; - sprintf(host->gd->disk_name, MG_DISK_NAME"a"); - - set_capacity(host->gd, host->n_sectors); - - add_disk(host->gd); - - return err; - -probe_err_7: - del_timer_sync(&host->timer); -probe_err_6: - blk_cleanup_queue(host->breq); -probe_err_5: - unregister_blkdev(host->major, MG_DISK_NAME); -probe_err_4: - if (!prv_data->use_polling) - free_irq(host->irq, host); -probe_err_3b: - gpio_free(host->rstout); -probe_err_3a: - gpio_free(host->rst); -probe_err_3: - iounmap(host->dev_base); -probe_err_2: - kfree(host); -probe_err: - return err; -} - -static int mg_remove(struct platform_device *plat_dev) -{ - struct mg_drv_data *prv_data = plat_dev->dev.platform_data; - struct mg_host *host = prv_data->host; - int err = 0; - - /* delete timer */ - del_timer_sync(&host->timer); - - /* remove disk */ - if (host->gd) { - del_gendisk(host->gd); - put_disk(host->gd); - } - /* remove queue */ - if (host->breq) - blk_cleanup_queue(host->breq); - - /* unregister blk device */ - unregister_blkdev(host->major, MG_DISK_NAME); - - /* free irq */ - if (!prv_data->use_polling) - free_irq(host->irq, host); - - /* free reset-out pin */ - if (prv_data->dev_attr != MG_BOOT_DEV) - gpio_free(host->rstout); - - /* free rst pin */ - if (host->rst) - gpio_free(host->rst); - - /* unmap io */ - if (host->dev_base) - iounmap(host->dev_base); - - /* free mg_host */ - kfree(host); - - return err; -} - -static struct platform_driver mg_disk_driver = { - .probe = mg_probe, - .remove = mg_remove, - .driver = { - .name = MG_DEV_NAME, - .pm = &mg_pm, - } -}; - -/**************************************************************************** - * - * Module stuff - * - ****************************************************************************/ - -static int __init mg_init(void) -{ - printk(KERN_INFO "mGine mflash driver, (c) 2008 mGine Co.\n"); - return platform_driver_register(&mg_disk_driver); -} - -static void __exit mg_exit(void) -{ - printk(KERN_INFO "mflash driver : bye bye\n"); - platform_driver_unregister(&mg_disk_driver); -} - -module_init(mg_init); -module_exit(mg_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("unsik Kim <donari75@gmail.com>"); -MODULE_DESCRIPTION("mGine m[g]flash device driver"); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index f96ab717534c..02804cc79d82 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -169,6 +169,25 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev) return false; /* device present */ } +/* we have to use runtime tag to setup command header */ +static void mtip_init_cmd_header(struct request *rq) +{ + struct driver_data *dd = rq->q->queuedata; + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64; + + /* Point the command headers at the command tables. */ + cmd->command_header = dd->port->command_list + + (sizeof(struct mtip_cmd_hdr) * rq->tag); + cmd->command_header_dma = dd->port->command_list_dma + + (sizeof(struct mtip_cmd_hdr) * rq->tag); + + if (host_cap_64) + cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16); + + cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); +} + static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) { struct request *rq; @@ -180,6 +199,9 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) if (IS_ERR(rq)) return NULL; + /* Internal cmd isn't submitted via .queue_rq */ + mtip_init_cmd_header(rq); + return blk_mq_rq_to_pdu(rq); } @@ -241,7 +263,8 @@ static void mtip_async_complete(struct mtip_port *port, rq = mtip_rq_from_tag(dd, tag); - blk_mq_complete_request(rq, status); + cmd->status = status; + blk_mq_complete_request(rq); } /* @@ -2910,18 +2933,19 @@ static void mtip_softirq_done_fn(struct request *rq) if (unlikely(cmd->unaligned)) up(&dd->port->cmd_slot_unal); - blk_mq_end_request(rq, rq->errors); + blk_mq_end_request(rq, cmd->status); } static void mtip_abort_cmd(struct request *req, void *data, bool reserved) { + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); struct driver_data *dd = data; dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag); clear_bit(req->tag, dd->port->cmds_to_issue); - req->errors = -EIO; + cmd->status = -EIO; mtip_softirq_done_fn(req); } @@ -3807,6 +3831,8 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq = bd->rq; int ret; + mtip_init_cmd_header(rq); + if (unlikely(mtip_check_unal_depth(hctx, rq))) return BLK_MQ_RQ_QUEUE_BUSY; @@ -3816,7 +3842,6 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, if (likely(!ret)) return BLK_MQ_RQ_QUEUE_OK; - rq->errors = ret; return BLK_MQ_RQ_QUEUE_ERROR; } @@ -3838,7 +3863,6 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, { struct driver_data *dd = data; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64; /* * For flush requests, request_idx starts at the end of the @@ -3855,17 +3879,6 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, memset(cmd->command, 0, CMD_DMA_ALLOC_SZ); - /* Point the command headers at the command tables. */ - cmd->command_header = dd->port->command_list + - (sizeof(struct mtip_cmd_hdr) * request_idx); - cmd->command_header_dma = dd->port->command_list_dma + - (sizeof(struct mtip_cmd_hdr) * request_idx); - - if (host_cap_64) - cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16); - - cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); - sg_init_table(cmd->sg, MTIP_MAX_SG); return 0; } @@ -3889,7 +3902,7 @@ exit_handler: return BLK_EH_RESET_TIMER; } -static struct blk_mq_ops mtip_mq_ops = { +static const struct blk_mq_ops mtip_mq_ops = { .queue_rq = mtip_queue_rq, .init_request = mtip_init_cmd, .exit_request = mtip_free_cmd, @@ -3969,7 +3982,7 @@ static int mtip_block_initialize(struct driver_data *dd) dd->tags.reserved_tags = 1; dd->tags.cmd_size = sizeof(struct mtip_cmd); dd->tags.numa_node = dd->numa_node; - dd->tags.flags = BLK_MQ_F_SHOULD_MERGE; + dd->tags.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_NO_SCHED; dd->tags.driver_data = dd; dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS; @@ -4025,7 +4038,6 @@ skip_create_disk: dd->queue->limits.discard_granularity = 4096; blk_queue_max_discard_sectors(dd->queue, MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); - dd->queue->limits.discard_zeroes_data = 0; } /* Set the capacity of the device in 512 byte sectors. */ @@ -4107,9 +4119,11 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) struct driver_data *dd = (struct driver_data *)data; struct mtip_cmd *cmd; - if (likely(!reserv)) - blk_mq_complete_request(rq, -ENODEV); - else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { + if (likely(!reserv)) { + cmd = blk_mq_rq_to_pdu(rq); + cmd->status = -ENODEV; + blk_mq_complete_request(rq); + } else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); if (cmd->comp_func) @@ -4162,7 +4176,7 @@ static int mtip_block_remove(struct driver_data *dd) dev_info(&dd->pdev->dev, "device %s surprise removal\n", dd->disk->disk_name); - blk_mq_freeze_queue_start(dd->queue); + blk_freeze_queue_start(dd->queue); blk_mq_stop_hw_queues(dd->queue); blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 7617888f7944..57b41528a824 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -352,6 +352,7 @@ struct mtip_cmd { int retries; /* The number of retries left for this command. */ int direction; /* Data transfer direction */ + int status; }; /* Structure used to describe a port. */ diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3c9052bf2327..56efb0444b4d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -40,49 +40,82 @@ #include <asm/types.h> #include <linux/nbd.h> +#include <linux/nbd-netlink.h> +#include <net/genetlink.h> static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); +static int nbd_total_devices = 0; struct nbd_sock { struct socket *sock; struct mutex tx_lock; struct request *pending; int sent; + bool dead; + int fallback_index; + int cookie; +}; + +struct recv_thread_args { + struct work_struct work; + struct nbd_device *nbd; + int index; +}; + +struct link_dead_args { + struct work_struct work; + int index; }; #define NBD_TIMEDOUT 0 #define NBD_DISCONNECT_REQUESTED 1 #define NBD_DISCONNECTED 2 -#define NBD_RUNNING 3 +#define NBD_HAS_PID_FILE 3 +#define NBD_HAS_CONFIG_REF 4 +#define NBD_BOUND 5 +#define NBD_DESTROY_ON_DISCONNECT 6 -struct nbd_device { +struct nbd_config { u32 flags; unsigned long runtime_flags; - struct nbd_sock **socks; - int magic; + u64 dead_conn_timeout; - struct blk_mq_tag_set tag_set; - - struct mutex config_lock; - struct gendisk *disk; + struct nbd_sock **socks; int num_connections; + atomic_t live_connections; + wait_queue_head_t conn_wait; + atomic_t recv_threads; wait_queue_head_t recv_wq; loff_t blksize; loff_t bytesize; - - struct task_struct *task_recv; - struct task_struct *task_setup; - #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbg_dir; #endif }; +struct nbd_device { + struct blk_mq_tag_set tag_set; + + int index; + refcount_t config_refs; + refcount_t refs; + struct nbd_config *config; + struct mutex config_lock; + struct gendisk *disk; + + struct list_head list; + struct task_struct *task_recv; + struct task_struct *task_setup; +}; + struct nbd_cmd { struct nbd_device *nbd; + int index; + int cookie; struct completion send_complete; + int status; }; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -100,18 +133,16 @@ static int part_shift; static int nbd_dev_dbg_init(struct nbd_device *nbd); static void nbd_dev_dbg_close(struct nbd_device *nbd); - +static void nbd_config_put(struct nbd_device *nbd); +static void nbd_connect_reply(struct genl_info *info, int index); +static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); +static void nbd_dead_link_work(struct work_struct *work); static inline struct device *nbd_to_dev(struct nbd_device *nbd) { return disk_to_dev(nbd->disk); } -static bool nbd_is_connected(struct nbd_device *nbd) -{ - return !!nbd->task_recv; -} - static const char *nbdcmd_to_ascii(int cmd) { switch (cmd) { @@ -124,44 +155,104 @@ static const char *nbdcmd_to_ascii(int cmd) return "invalid"; } -static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) +static ssize_t pid_show(struct device *dev, + struct device_attribute *attr, char *buf) { - if (bdev->bd_openers <= 1) - bd_set_size(bdev, 0); - set_capacity(nbd->disk, 0); - kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); + struct gendisk *disk = dev_to_disk(dev); + struct nbd_device *nbd = (struct nbd_device *)disk->private_data; - return 0; + return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); +} + +static struct device_attribute pid_attr = { + .attr = { .name = "pid", .mode = S_IRUGO}, + .show = pid_show, +}; + +static void nbd_dev_remove(struct nbd_device *nbd) +{ + struct gendisk *disk = nbd->disk; + if (disk) { + del_gendisk(disk); + blk_cleanup_queue(disk->queue); + blk_mq_free_tag_set(&nbd->tag_set); + disk->private_data = NULL; + put_disk(disk); + } + kfree(nbd); +} + +static void nbd_put(struct nbd_device *nbd) +{ + if (refcount_dec_and_mutex_lock(&nbd->refs, + &nbd_index_mutex)) { + idr_remove(&nbd_index_idr, nbd->index); + mutex_unlock(&nbd_index_mutex); + nbd_dev_remove(nbd); + } +} + +static int nbd_disconnected(struct nbd_config *config) +{ + return test_bit(NBD_DISCONNECTED, &config->runtime_flags) || + test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); +} + +static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, + int notify) +{ + if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) { + struct link_dead_args *args; + args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO); + if (args) { + INIT_WORK(&args->work, nbd_dead_link_work); + args->index = nbd->index; + queue_work(system_wq, &args->work); + } + } + if (!nsock->dead) { + kernel_sock_shutdown(nsock->sock, SHUT_RDWR); + atomic_dec(&nbd->config->live_connections); + } + nsock->dead = true; + nsock->pending = NULL; + nsock->sent = 0; +} + +static void nbd_size_clear(struct nbd_device *nbd) +{ + if (nbd->config->bytesize) { + set_capacity(nbd->disk, 0); + kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); + } } -static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev) +static void nbd_size_update(struct nbd_device *nbd) { - blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize); - blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize); - bd_set_size(bdev, nbd->bytesize); - set_capacity(nbd->disk, nbd->bytesize >> 9); + struct nbd_config *config = nbd->config; + blk_queue_logical_block_size(nbd->disk->queue, config->blksize); + blk_queue_physical_block_size(nbd->disk->queue, config->blksize); + set_capacity(nbd->disk, config->bytesize >> 9); kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); } -static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, - loff_t blocksize, loff_t nr_blocks) +static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, + loff_t nr_blocks) { - nbd->blksize = blocksize; - nbd->bytesize = blocksize * nr_blocks; - if (nbd_is_connected(nbd)) - nbd_size_update(nbd, bdev); + struct nbd_config *config = nbd->config; + config->blksize = blocksize; + config->bytesize = blocksize * nr_blocks; + nbd_size_update(nbd); } -static void nbd_end_request(struct nbd_cmd *cmd) +static void nbd_complete_rq(struct request *req) { - struct nbd_device *nbd = cmd->nbd; - struct request *req = blk_mq_rq_from_pdu(cmd); - int error = req->errors ? -EIO : 0; + struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); - dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd, - error ? "failed" : "done"); + dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", cmd, + cmd->status ? "failed" : "done"); - blk_mq_complete_request(req, error); + blk_mq_end_request(req, cmd->status); } /* @@ -169,17 +260,18 @@ static void nbd_end_request(struct nbd_cmd *cmd) */ static void sock_shutdown(struct nbd_device *nbd) { + struct nbd_config *config = nbd->config; int i; - if (nbd->num_connections == 0) + if (config->num_connections == 0) return; - if (test_and_set_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) + if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags)) return; - for (i = 0; i < nbd->num_connections; i++) { - struct nbd_sock *nsock = nbd->socks[i]; + for (i = 0; i < config->num_connections; i++) { + struct nbd_sock *nsock = config->socks[i]; mutex_lock(&nsock->tx_lock); - kernel_sock_shutdown(nsock->sock, SHUT_RDWR); + nbd_mark_nsock_dead(nbd, nsock, 0); mutex_unlock(&nsock->tx_lock); } dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); @@ -190,14 +282,58 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); struct nbd_device *nbd = cmd->nbd; + struct nbd_config *config; - dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); - set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); - req->errors = -EIO; + if (!refcount_inc_not_zero(&nbd->config_refs)) { + cmd->status = -EIO; + return BLK_EH_HANDLED; + } - mutex_lock(&nbd->config_lock); + /* If we are waiting on our dead timer then we could get timeout + * callbacks for our request. For this we just want to reset the timer + * and let the queue side take care of everything. + */ + if (!completion_done(&cmd->send_complete)) { + nbd_config_put(nbd); + return BLK_EH_RESET_TIMER; + } + config = nbd->config; + + if (config->num_connections > 1) { + dev_err_ratelimited(nbd_to_dev(nbd), + "Connection timed out, retrying\n"); + /* + * Hooray we have more connections, requeue this IO, the submit + * path will put it on a real connection. + */ + if (config->socks && config->num_connections > 1) { + if (cmd->index < config->num_connections) { + struct nbd_sock *nsock = + config->socks[cmd->index]; + mutex_lock(&nsock->tx_lock); + /* We can have multiple outstanding requests, so + * we don't want to mark the nsock dead if we've + * already reconnected with a new socket, so + * only mark it dead if its the same socket we + * were sent out on. + */ + if (cmd->cookie == nsock->cookie) + nbd_mark_nsock_dead(nbd, nsock, 1); + mutex_unlock(&nsock->tx_lock); + } + blk_mq_requeue_request(req, true); + nbd_config_put(nbd); + return BLK_EH_NOT_HANDLED; + } + } else { + dev_err_ratelimited(nbd_to_dev(nbd), + "Connection timed out\n"); + } + set_bit(NBD_TIMEDOUT, &config->runtime_flags); + cmd->status = -EIO; sock_shutdown(nbd); - mutex_unlock(&nbd->config_lock); + nbd_config_put(nbd); + return BLK_EH_HANDLED; } @@ -207,7 +343,8 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, static int sock_xmit(struct nbd_device *nbd, int index, int send, struct iov_iter *iter, int msg_flags, int *sent) { - struct socket *sock = nbd->socks[index]->sock; + struct nbd_config *config = nbd->config; + struct socket *sock = config->socks[index]->sock; int result; struct msghdr msg; unsigned long pflags = current->flags; @@ -253,7 +390,8 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); - struct nbd_sock *nsock = nbd->socks[index]; + struct nbd_config *config = nbd->config; + struct nbd_sock *nsock = config->socks[index]; int result; struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; @@ -284,7 +422,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) } if (rq_data_dir(req) == WRITE && - (nbd->flags & NBD_FLAG_READ_ONLY)) { + (config->flags & NBD_FLAG_READ_ONLY)) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Write on read-only\n"); return -EIO; @@ -301,6 +439,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) } iov_iter_advance(&from, sent); } + cmd->index = index; + cmd->cookie = nsock->cookie; request.type = htonl(type); if (type != NBD_CMD_FLUSH) { request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); @@ -328,7 +468,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) } dev_err_ratelimited(disk_to_dev(nbd->disk), "Send control failed (result %d)\n", result); - return -EIO; + return -EAGAIN; } send_pages: if (type != NBD_CMD_WRITE) @@ -370,7 +510,7 @@ send_pages: dev_err(disk_to_dev(nbd->disk), "Send data failed (result %d)\n", result); - return -EIO; + return -EAGAIN; } /* * The completion might already have come in, @@ -392,6 +532,7 @@ out: /* NULL returned = something went wrong, inform userspace */ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) { + struct nbd_config *config = nbd->config; int result; struct nbd_reply reply; struct nbd_cmd *cmd; @@ -405,8 +546,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result <= 0) { - if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && - !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) + if (!nbd_disconnected(config)) dev_err(disk_to_dev(nbd->disk), "Receive control failed (result %d)\n", result); return ERR_PTR(result); @@ -433,7 +573,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (ntohl(reply.error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", ntohl(reply.error)); - req->errors = -EIO; + cmd->status = -EIO; return cmd; } @@ -449,8 +589,19 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); - req->errors = -EIO; - return cmd; + /* + * If we've disconnected or we only have 1 + * connection then we need to make sure we + * complete this request, otherwise error out + * and let the timeout stuff handle resubmitting + * this request onto another connection. + */ + if (nbd_disconnected(config) || + config->num_connections <= 1) { + cmd->status = -EIO; + return cmd; + } + return ERR_PTR(-EIO); } dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", cmd, bvec.bv_len); @@ -462,54 +613,34 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) return cmd; } -static ssize_t pid_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gendisk *disk = dev_to_disk(dev); - struct nbd_device *nbd = (struct nbd_device *)disk->private_data; - - return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); -} - -static struct device_attribute pid_attr = { - .attr = { .name = "pid", .mode = S_IRUGO}, - .show = pid_show, -}; - -struct recv_thread_args { - struct work_struct work; - struct nbd_device *nbd; - int index; -}; - static void recv_work(struct work_struct *work) { struct recv_thread_args *args = container_of(work, struct recv_thread_args, work); struct nbd_device *nbd = args->nbd; + struct nbd_config *config = nbd->config; struct nbd_cmd *cmd; int ret = 0; - BUG_ON(nbd->magic != NBD_MAGIC); while (1) { cmd = nbd_read_stat(nbd, args->index); if (IS_ERR(cmd)) { + struct nbd_sock *nsock = config->socks[args->index]; + + mutex_lock(&nsock->tx_lock); + nbd_mark_nsock_dead(nbd, nsock, 1); + mutex_unlock(&nsock->tx_lock); ret = PTR_ERR(cmd); break; } - nbd_end_request(cmd); + blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); } - - /* - * We got an error, shut everybody down if this wasn't the result of a - * disconnect request. - */ - if (ret && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) - sock_shutdown(nbd); - atomic_dec(&nbd->recv_threads); - wake_up(&nbd->recv_wq); + atomic_dec(&config->recv_threads); + wake_up(&config->recv_wq); + nbd_config_put(nbd); + kfree(args); } static void nbd_clear_req(struct request *req, void *data, bool reserved) @@ -519,47 +650,119 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved) if (!blk_mq_request_started(req)) return; cmd = blk_mq_rq_to_pdu(req); - req->errors = -EIO; - nbd_end_request(cmd); + cmd->status = -EIO; + blk_mq_complete_request(req); } static void nbd_clear_que(struct nbd_device *nbd) { - BUG_ON(nbd->magic != NBD_MAGIC); - + blk_mq_stop_hw_queues(nbd->disk->queue); blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); + blk_mq_start_hw_queues(nbd->disk->queue); dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); } +static int find_fallback(struct nbd_device *nbd, int index) +{ + struct nbd_config *config = nbd->config; + int new_index = -1; + struct nbd_sock *nsock = config->socks[index]; + int fallback = nsock->fallback_index; + + if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) + return new_index; + + if (config->num_connections <= 1) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Attempted send on invalid socket\n"); + return new_index; + } + + if (fallback >= 0 && fallback < config->num_connections && + !config->socks[fallback]->dead) + return fallback; + + if (nsock->fallback_index < 0 || + nsock->fallback_index >= config->num_connections || + config->socks[nsock->fallback_index]->dead) { + int i; + for (i = 0; i < config->num_connections; i++) { + if (i == index) + continue; + if (!config->socks[i]->dead) { + new_index = i; + break; + } + } + nsock->fallback_index = new_index; + if (new_index < 0) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Dead connection, failed to find a fallback\n"); + return new_index; + } + } + new_index = nsock->fallback_index; + return new_index; +} + +static int wait_for_reconnect(struct nbd_device *nbd) +{ + struct nbd_config *config = nbd->config; + if (!config->dead_conn_timeout) + return 0; + if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) + return 0; + wait_event_interruptible_timeout(config->conn_wait, + atomic_read(&config->live_connections), + config->dead_conn_timeout); + return atomic_read(&config->live_connections); +} static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); struct nbd_device *nbd = cmd->nbd; + struct nbd_config *config; struct nbd_sock *nsock; int ret; - if (index >= nbd->num_connections) { + if (!refcount_inc_not_zero(&nbd->config_refs)) { dev_err_ratelimited(disk_to_dev(nbd->disk), - "Attempted send on invalid socket\n"); + "Socks array is empty\n"); return -EINVAL; } + config = nbd->config; - if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) { + if (index >= config->num_connections) { dev_err_ratelimited(disk_to_dev(nbd->disk), - "Attempted send on closed socket\n"); + "Attempted send on invalid socket\n"); + nbd_config_put(nbd); return -EINVAL; } - - req->errors = 0; - - nsock = nbd->socks[index]; + cmd->status = 0; +again: + nsock = config->socks[index]; mutex_lock(&nsock->tx_lock); - if (unlikely(!nsock->sock)) { + if (nsock->dead) { + int old_index = index; + index = find_fallback(nbd, index); mutex_unlock(&nsock->tx_lock); - dev_err_ratelimited(disk_to_dev(nbd->disk), - "Attempted send on closed socket\n"); - return -EINVAL; + if (index < 0) { + if (wait_for_reconnect(nbd)) { + index = old_index; + goto again; + } + /* All the sockets should already be down at this point, + * we just want to make sure that DISCONNECTED is set so + * any requests that come in that were queue'ed waiting + * for the reconnect timer don't trigger the timer again + * and instead just error out. + */ + sock_shutdown(nbd); + nbd_config_put(nbd); + return -EIO; + } + goto again; } /* Handle the case that we have a pending request that was partially @@ -572,9 +775,21 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) ret = 0; goto out; } + /* + * Some failures are related to the link going down, so anything that + * returns EAGAIN can be retried on a different socket. + */ ret = nbd_send_cmd(nbd, cmd, index); + if (ret == -EAGAIN) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Request send failed trying another connection\n"); + nbd_mark_nsock_dead(nbd, nsock, 1); + mutex_unlock(&nsock->tx_lock); + goto again; + } out: mutex_unlock(&nsock->tx_lock); + nbd_config_put(nbd); return ret; } @@ -611,9 +826,10 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } -static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, - unsigned long arg) +static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, + bool netlink) { + struct nbd_config *config = nbd->config; struct socket *sock; struct nbd_sock **socks; struct nbd_sock *nsock; @@ -623,43 +839,107 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, if (!sock) return err; - if (!nbd->task_setup) + if (!netlink && !nbd->task_setup && + !test_bit(NBD_BOUND, &config->runtime_flags)) nbd->task_setup = current; - if (nbd->task_setup != current) { + + if (!netlink && + (nbd->task_setup != current || + test_bit(NBD_BOUND, &config->runtime_flags))) { dev_err(disk_to_dev(nbd->disk), "Device being setup by another task"); - return -EINVAL; + sockfd_put(sock); + return -EBUSY; } - socks = krealloc(nbd->socks, (nbd->num_connections + 1) * + socks = krealloc(config->socks, (config->num_connections + 1) * sizeof(struct nbd_sock *), GFP_KERNEL); - if (!socks) + if (!socks) { + sockfd_put(sock); return -ENOMEM; + } nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); - if (!nsock) + if (!nsock) { + sockfd_put(sock); return -ENOMEM; + } - nbd->socks = socks; + config->socks = socks; + nsock->fallback_index = -1; + nsock->dead = false; mutex_init(&nsock->tx_lock); nsock->sock = sock; nsock->pending = NULL; nsock->sent = 0; - socks[nbd->num_connections++] = nsock; + nsock->cookie = 0; + socks[config->num_connections++] = nsock; + atomic_inc(&config->live_connections); - if (max_part) - bdev->bd_invalidated = 1; return 0; } +static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) +{ + struct nbd_config *config = nbd->config; + struct socket *sock, *old; + struct recv_thread_args *args; + int i; + int err; + + sock = sockfd_lookup(arg, &err); + if (!sock) + return err; + + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) { + sockfd_put(sock); + return -ENOMEM; + } + + for (i = 0; i < config->num_connections; i++) { + struct nbd_sock *nsock = config->socks[i]; + + if (!nsock->dead) + continue; + + mutex_lock(&nsock->tx_lock); + if (!nsock->dead) { + mutex_unlock(&nsock->tx_lock); + continue; + } + sk_set_memalloc(sock->sk); + atomic_inc(&config->recv_threads); + refcount_inc(&nbd->config_refs); + old = nsock->sock; + nsock->fallback_index = -1; + nsock->sock = sock; + nsock->dead = false; + INIT_WORK(&args->work, recv_work); + args->index = i; + args->nbd = nbd; + nsock->cookie++; + mutex_unlock(&nsock->tx_lock); + sockfd_put(old); + + /* We take the tx_mutex in an error path in the recv_work, so we + * need to queue_work outside of the tx_mutex. + */ + queue_work(recv_workqueue, &args->work); + + atomic_inc(&config->live_connections); + wake_up(&config->conn_wait); + return 0; + } + sockfd_put(sock); + kfree(args); + return -ENOSPC; +} + /* Reset all properties of an NBD device */ static void nbd_reset(struct nbd_device *nbd) { - nbd->runtime_flags = 0; - nbd->blksize = 1024; - nbd->bytesize = 0; - set_capacity(nbd->disk, 0); - nbd->flags = 0; + nbd->config = NULL; nbd->tag_set.timeout = 0; queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); } @@ -668,21 +948,23 @@ static void nbd_bdev_reset(struct block_device *bdev) { if (bdev->bd_openers > 1) return; - set_device_ro(bdev, false); - bdev->bd_inode->i_size = 0; + bd_set_size(bdev, 0); if (max_part > 0) { blkdev_reread_part(bdev); bdev->bd_invalidated = 1; } } -static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev) +static void nbd_parse_flags(struct nbd_device *nbd) { - if (nbd->flags & NBD_FLAG_READ_ONLY) - set_device_ro(bdev, true); - if (nbd->flags & NBD_FLAG_SEND_TRIM) + struct nbd_config *config = nbd->config; + if (config->flags & NBD_FLAG_READ_ONLY) + set_disk_ro(nbd->disk, true); + else + set_disk_ro(nbd->disk, false); + if (config->flags & NBD_FLAG_SEND_TRIM) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); - if (nbd->flags & NBD_FLAG_SEND_FLUSH) + if (config->flags & NBD_FLAG_SEND_FLUSH) blk_queue_write_cache(nbd->disk->queue, true, false); else blk_queue_write_cache(nbd->disk->queue, false, false); @@ -690,6 +972,7 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev) static void send_disconnects(struct nbd_device *nbd) { + struct nbd_config *config = nbd->config; struct nbd_request request = { .magic = htonl(NBD_REQUEST_MAGIC), .type = htonl(NBD_CMD_DISC), @@ -698,7 +981,7 @@ static void send_disconnects(struct nbd_device *nbd) struct iov_iter from; int i, ret; - for (i = 0; i < nbd->num_connections; i++) { + for (i = 0; i < config->num_connections; i++) { iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); ret = sock_xmit(nbd, i, 1, &from, 0, NULL); if (ret <= 0) @@ -707,145 +990,162 @@ static void send_disconnects(struct nbd_device *nbd) } } -static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev) +static int nbd_disconnect(struct nbd_device *nbd) { - dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); - if (!nbd->socks) - return -EINVAL; - - mutex_unlock(&nbd->config_lock); - fsync_bdev(bdev); - mutex_lock(&nbd->config_lock); - - /* Check again after getting mutex back. */ - if (!nbd->socks) - return -EINVAL; + struct nbd_config *config = nbd->config; + dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED, - &nbd->runtime_flags)) + &config->runtime_flags)) send_disconnects(nbd); return 0; } -static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev) +static void nbd_clear_sock(struct nbd_device *nbd) { sock_shutdown(nbd); nbd_clear_que(nbd); + nbd->task_setup = NULL; +} - __invalidate_device(bdev, true); - nbd_bdev_reset(bdev); - /* - * We want to give the run thread a chance to wait for everybody - * to clean up and then do it's own cleanup. - */ - if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) && - nbd->num_connections) { - int i; - - for (i = 0; i < nbd->num_connections; i++) { - sockfd_put(nbd->socks[i]->sock); - kfree(nbd->socks[i]); +static void nbd_config_put(struct nbd_device *nbd) +{ + if (refcount_dec_and_mutex_lock(&nbd->config_refs, + &nbd->config_lock)) { + struct nbd_config *config = nbd->config; + nbd_dev_dbg_close(nbd); + nbd_size_clear(nbd); + if (test_and_clear_bit(NBD_HAS_PID_FILE, + &config->runtime_flags)) + device_remove_file(disk_to_dev(nbd->disk), &pid_attr); + nbd->task_recv = NULL; + nbd_clear_sock(nbd); + if (config->num_connections) { + int i; + for (i = 0; i < config->num_connections; i++) { + sockfd_put(config->socks[i]->sock); + kfree(config->socks[i]); + } + kfree(config->socks); } - kfree(nbd->socks); - nbd->socks = NULL; - nbd->num_connections = 0; - } - nbd->task_setup = NULL; + nbd_reset(nbd); - return 0; + mutex_unlock(&nbd->config_lock); + nbd_put(nbd); + module_put(THIS_MODULE); + } } -static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev) +static int nbd_start_device(struct nbd_device *nbd) { - struct recv_thread_args *args; - int num_connections = nbd->num_connections; + struct nbd_config *config = nbd->config; + int num_connections = config->num_connections; int error = 0, i; if (nbd->task_recv) return -EBUSY; - if (!nbd->socks) + if (!config->socks) return -EINVAL; if (num_connections > 1 && - !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) { + !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) { dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); - error = -EINVAL; - goto out_err; + return -EINVAL; } - set_bit(NBD_RUNNING, &nbd->runtime_flags); - blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections); - args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL); - if (!args) { - error = -ENOMEM; - goto out_err; - } + blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); nbd->task_recv = current; - mutex_unlock(&nbd->config_lock); - nbd_parse_flags(nbd, bdev); + nbd_parse_flags(nbd); error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); if (error) { dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); - goto out_recv; + return error; } - - nbd_size_update(nbd, bdev); + set_bit(NBD_HAS_PID_FILE, &config->runtime_flags); nbd_dev_dbg_init(nbd); for (i = 0; i < num_connections; i++) { - sk_set_memalloc(nbd->socks[i]->sock->sk); - atomic_inc(&nbd->recv_threads); - INIT_WORK(&args[i].work, recv_work); - args[i].nbd = nbd; - args[i].index = i; - queue_work(recv_workqueue, &args[i].work); - } - wait_event_interruptible(nbd->recv_wq, - atomic_read(&nbd->recv_threads) == 0); - for (i = 0; i < num_connections; i++) - flush_work(&args[i].work); - nbd_dev_dbg_close(nbd); - nbd_size_clear(nbd, bdev); - device_remove_file(disk_to_dev(nbd->disk), &pid_attr); -out_recv: - mutex_lock(&nbd->config_lock); - nbd->task_recv = NULL; -out_err: - clear_bit(NBD_RUNNING, &nbd->runtime_flags); - nbd_clear_sock(nbd, bdev); + struct recv_thread_args *args; + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) { + sock_shutdown(nbd); + return -ENOMEM; + } + sk_set_memalloc(config->socks[i]->sock->sk); + atomic_inc(&config->recv_threads); + refcount_inc(&nbd->config_refs); + INIT_WORK(&args->work, recv_work); + args->nbd = nbd; + args->index = i; + queue_work(recv_workqueue, &args->work); + } + return error; +} + +static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) +{ + struct nbd_config *config = nbd->config; + int ret; + + ret = nbd_start_device(nbd); + if (ret) + return ret; + + bd_set_size(bdev, config->bytesize); + if (max_part) + bdev->bd_invalidated = 1; + mutex_unlock(&nbd->config_lock); + ret = wait_event_interruptible(config->recv_wq, + atomic_read(&config->recv_threads) == 0); + if (ret) + sock_shutdown(nbd); + mutex_lock(&nbd->config_lock); + bd_set_size(bdev, 0); /* user requested, ignore socket errors */ - if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) - error = 0; - if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags)) - error = -ETIMEDOUT; + if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) + ret = 0; + if (test_bit(NBD_TIMEDOUT, &config->runtime_flags)) + ret = -ETIMEDOUT; + return ret; +} - nbd_reset(nbd); - return error; +static void nbd_clear_sock_ioctl(struct nbd_device *nbd, + struct block_device *bdev) +{ + sock_shutdown(nbd); + kill_bdev(bdev); + nbd_bdev_reset(bdev); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); } /* Must be called with config_lock held */ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, unsigned int cmd, unsigned long arg) { + struct nbd_config *config = nbd->config; + switch (cmd) { case NBD_DISCONNECT: - return nbd_disconnect(nbd, bdev); + return nbd_disconnect(nbd); case NBD_CLEAR_SOCK: - return nbd_clear_sock(nbd, bdev); + nbd_clear_sock_ioctl(nbd, bdev); + return 0; case NBD_SET_SOCK: - return nbd_add_socket(nbd, bdev, arg); + return nbd_add_socket(nbd, arg, false); case NBD_SET_BLKSIZE: - nbd_size_set(nbd, bdev, arg, - div_s64(nbd->bytesize, arg)); + nbd_size_set(nbd, arg, + div_s64(config->bytesize, arg)); return 0; case NBD_SET_SIZE: - nbd_size_set(nbd, bdev, nbd->blksize, - div_s64(arg, nbd->blksize)); + nbd_size_set(nbd, config->blksize, + div_s64(arg, config->blksize)); return 0; case NBD_SET_SIZE_BLOCKS: - nbd_size_set(nbd, bdev, nbd->blksize, arg); + nbd_size_set(nbd, config->blksize, arg); return 0; case NBD_SET_TIMEOUT: if (arg) { @@ -855,10 +1155,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return 0; case NBD_SET_FLAGS: - nbd->flags = arg; + config->flags = arg; return 0; case NBD_DO_IT: - return nbd_start_device(nbd, bdev); + return nbd_start_device_ioctl(nbd, bdev); case NBD_CLEAR_QUE: /* * This is for compatibility only. The queue is always cleared @@ -879,23 +1179,92 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct nbd_device *nbd = bdev->bd_disk->private_data; - int error; + struct nbd_config *config = nbd->config; + int error = -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - BUG_ON(nbd->magic != NBD_MAGIC); - mutex_lock(&nbd->config_lock); - error = __nbd_ioctl(bdev, nbd, cmd, arg); - mutex_unlock(&nbd->config_lock); + /* Don't allow ioctl operations on a nbd device that was created with + * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine. + */ + if (!test_bit(NBD_BOUND, &config->runtime_flags) || + (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK)) + error = __nbd_ioctl(bdev, nbd, cmd, arg); + else + dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n"); + mutex_unlock(&nbd->config_lock); return error; } +static struct nbd_config *nbd_alloc_config(void) +{ + struct nbd_config *config; + + config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); + if (!config) + return NULL; + atomic_set(&config->recv_threads, 0); + init_waitqueue_head(&config->recv_wq); + init_waitqueue_head(&config->conn_wait); + config->blksize = 1024; + atomic_set(&config->live_connections, 0); + try_module_get(THIS_MODULE); + return config; +} + +static int nbd_open(struct block_device *bdev, fmode_t mode) +{ + struct nbd_device *nbd; + int ret = 0; + + mutex_lock(&nbd_index_mutex); + nbd = bdev->bd_disk->private_data; + if (!nbd) { + ret = -ENXIO; + goto out; + } + if (!refcount_inc_not_zero(&nbd->refs)) { + ret = -ENXIO; + goto out; + } + if (!refcount_inc_not_zero(&nbd->config_refs)) { + struct nbd_config *config; + + mutex_lock(&nbd->config_lock); + if (refcount_inc_not_zero(&nbd->config_refs)) { + mutex_unlock(&nbd->config_lock); + goto out; + } + config = nbd->config = nbd_alloc_config(); + if (!config) { + ret = -ENOMEM; + mutex_unlock(&nbd->config_lock); + goto out; + } + refcount_set(&nbd->config_refs, 1); + refcount_inc(&nbd->refs); + mutex_unlock(&nbd->config_lock); + } +out: + mutex_unlock(&nbd_index_mutex); + return ret; +} + +static void nbd_release(struct gendisk *disk, fmode_t mode) +{ + struct nbd_device *nbd = disk->private_data; + nbd_config_put(nbd); + nbd_put(nbd); +} + static const struct block_device_operations nbd_fops = { .owner = THIS_MODULE, + .open = nbd_open, + .release = nbd_release, .ioctl = nbd_ioctl, .compat_ioctl = nbd_ioctl, }; @@ -927,7 +1296,7 @@ static const struct file_operations nbd_dbg_tasks_ops = { static int nbd_dbg_flags_show(struct seq_file *s, void *unused) { struct nbd_device *nbd = s->private; - u32 flags = nbd->flags; + u32 flags = nbd->config->flags; seq_printf(s, "Hex: 0x%08x\n\n", flags); @@ -960,6 +1329,7 @@ static const struct file_operations nbd_dbg_flags_ops = { static int nbd_dev_dbg_init(struct nbd_device *nbd) { struct dentry *dir; + struct nbd_config *config = nbd->config; if (!nbd_dbg_dir) return -EIO; @@ -970,12 +1340,12 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd) nbd_name(nbd)); return -EIO; } - nbd->dbg_dir = dir; + config->dbg_dir = dir; debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); - debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize); + debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize); debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); - debugfs_create_u64("blocksize", 0444, dir, &nbd->blksize); + debugfs_create_u64("blocksize", 0444, dir, &config->blksize); debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); return 0; @@ -983,7 +1353,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd) static void nbd_dev_dbg_close(struct nbd_device *nbd) { - debugfs_remove_recursive(nbd->dbg_dir); + debugfs_remove_recursive(nbd->config->dbg_dir); } static int nbd_dbg_init(void) @@ -1035,25 +1405,13 @@ static int nbd_init_request(void *data, struct request *rq, return 0; } -static struct blk_mq_ops nbd_mq_ops = { +static const struct blk_mq_ops nbd_mq_ops = { .queue_rq = nbd_queue_rq, + .complete = nbd_complete_rq, .init_request = nbd_init_request, .timeout = nbd_xmit_timeout, }; -static void nbd_dev_remove(struct nbd_device *nbd) -{ - struct gendisk *disk = nbd->disk; - nbd->magic = 0; - if (disk) { - del_gendisk(disk); - blk_cleanup_queue(disk->queue); - blk_mq_free_tag_set(&nbd->tag_set); - put_disk(disk); - } - kfree(nbd); -} - static int nbd_dev_add(int index) { struct nbd_device *nbd; @@ -1082,6 +1440,7 @@ static int nbd_dev_add(int index) if (err < 0) goto out_free_disk; + nbd->index = index; nbd->disk = disk; nbd->tag_set.ops = &nbd_mq_ops; nbd->tag_set.nr_hw_queues = 1; @@ -1110,20 +1469,23 @@ static int nbd_dev_add(int index) queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); disk->queue->limits.discard_granularity = 512; blk_queue_max_discard_sectors(disk->queue, UINT_MAX); - disk->queue->limits.discard_zeroes_data = 0; + blk_queue_max_segment_size(disk->queue, UINT_MAX); + blk_queue_max_segments(disk->queue, USHRT_MAX); blk_queue_max_hw_sectors(disk->queue, 65536); disk->queue->limits.max_sectors = 256; - nbd->magic = NBD_MAGIC; mutex_init(&nbd->config_lock); + refcount_set(&nbd->config_refs, 0); + refcount_set(&nbd->refs, 1); + INIT_LIST_HEAD(&nbd->list); disk->major = NBD_MAJOR; disk->first_minor = index << part_shift; disk->fops = &nbd_fops; disk->private_data = nbd; sprintf(disk->disk_name, "nbd%d", index); - init_waitqueue_head(&nbd->recv_wq); nbd_reset(nbd); add_disk(disk); + nbd_total_devices++; return index; out_free_tags: @@ -1138,10 +1500,535 @@ out: return err; } -/* - * And here should be modules and kernel interface - * (Just smiley confuses emacs :-) +static int find_free_cb(int id, void *ptr, void *data) +{ + struct nbd_device *nbd = ptr; + struct nbd_device **found = data; + + if (!refcount_read(&nbd->config_refs)) { + *found = nbd; + return 1; + } + return 0; +} + +/* Netlink interface. */ +static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { + [NBD_ATTR_INDEX] = { .type = NLA_U32 }, + [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 }, + [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 }, + [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 }, + [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, + [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, + [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, + [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, + [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, +}; + +static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { + [NBD_SOCK_FD] = { .type = NLA_U32 }, +}; + +/* We don't use this right now since we don't parse the incoming list, but we + * still want it here so userspace knows what to expect. */ +static struct nla_policy __attribute__((unused)) +nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { + [NBD_DEVICE_INDEX] = { .type = NLA_U32 }, + [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, +}; + +static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd = NULL; + struct nbd_config *config; + int index = -1; + int ret; + bool put_dev = false; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (info->attrs[NBD_ATTR_INDEX]) + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + if (!info->attrs[NBD_ATTR_SOCKETS]) { + printk(KERN_ERR "nbd: must specify at least one socket\n"); + return -EINVAL; + } + if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { + printk(KERN_ERR "nbd: must specify a size in bytes for the device\n"); + return -EINVAL; + } +again: + mutex_lock(&nbd_index_mutex); + if (index == -1) { + ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); + if (ret == 0) { + int new_index; + new_index = nbd_dev_add(-1); + if (new_index < 0) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: failed to add new device\n"); + return ret; + } + nbd = idr_find(&nbd_index_idr, new_index); + } + } else { + nbd = idr_find(&nbd_index_idr, index); + } + if (!nbd) { + printk(KERN_ERR "nbd: couldn't find device at index %d\n", + index); + mutex_unlock(&nbd_index_mutex); + return -EINVAL; + } + if (!refcount_inc_not_zero(&nbd->refs)) { + mutex_unlock(&nbd_index_mutex); + if (index == -1) + goto again; + printk(KERN_ERR "nbd: device at index %d is going down\n", + index); + return -EINVAL; + } + mutex_unlock(&nbd_index_mutex); + + mutex_lock(&nbd->config_lock); + if (refcount_read(&nbd->config_refs)) { + mutex_unlock(&nbd->config_lock); + nbd_put(nbd); + if (index == -1) + goto again; + printk(KERN_ERR "nbd: nbd%d already in use\n", index); + return -EBUSY; + } + if (WARN_ON(nbd->config)) { + mutex_unlock(&nbd->config_lock); + nbd_put(nbd); + return -EINVAL; + } + config = nbd->config = nbd_alloc_config(); + if (!nbd->config) { + mutex_unlock(&nbd->config_lock); + nbd_put(nbd); + printk(KERN_ERR "nbd: couldn't allocate config\n"); + return -ENOMEM; + } + refcount_set(&nbd->config_refs, 1); + set_bit(NBD_BOUND, &config->runtime_flags); + + if (info->attrs[NBD_ATTR_SIZE_BYTES]) { + u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); + nbd_size_set(nbd, config->blksize, + div64_u64(bytes, config->blksize)); + } + if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { + u64 bsize = + nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); + nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); + } + if (info->attrs[NBD_ATTR_TIMEOUT]) { + u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); + nbd->tag_set.timeout = timeout * HZ; + blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); + } + if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { + config->dead_conn_timeout = + nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); + config->dead_conn_timeout *= HZ; + } + if (info->attrs[NBD_ATTR_SERVER_FLAGS]) + config->flags = + nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); + if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { + u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); + if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { + set_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags); + put_dev = true; + } + } + + if (info->attrs[NBD_ATTR_SOCKETS]) { + struct nlattr *attr; + int rem, fd; + + nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], + rem) { + struct nlattr *socks[NBD_SOCK_MAX+1]; + + if (nla_type(attr) != NBD_SOCK_ITEM) { + printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); + ret = -EINVAL; + goto out; + } + ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, + nbd_sock_policy); + if (ret != 0) { + printk(KERN_ERR "nbd: error processing sock list\n"); + ret = -EINVAL; + goto out; + } + if (!socks[NBD_SOCK_FD]) + continue; + fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); + ret = nbd_add_socket(nbd, fd, true); + if (ret) + goto out; + } + } + ret = nbd_start_device(nbd); +out: + mutex_unlock(&nbd->config_lock); + if (!ret) { + set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags); + refcount_inc(&nbd->config_refs); + nbd_connect_reply(info, nbd->index); + } + nbd_config_put(nbd); + if (put_dev) + nbd_put(nbd); + return ret; +} + +static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd; + int index; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (!info->attrs[NBD_ATTR_INDEX]) { + printk(KERN_ERR "nbd: must specify an index to disconnect\n"); + return -EINVAL; + } + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + mutex_lock(&nbd_index_mutex); + nbd = idr_find(&nbd_index_idr, index); + if (!nbd) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: couldn't find device at index %d\n", + index); + return -EINVAL; + } + if (!refcount_inc_not_zero(&nbd->refs)) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: device at index %d is going down\n", + index); + return -EINVAL; + } + mutex_unlock(&nbd_index_mutex); + if (!refcount_inc_not_zero(&nbd->config_refs)) { + nbd_put(nbd); + return 0; + } + mutex_lock(&nbd->config_lock); + nbd_disconnect(nbd); + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); + nbd_config_put(nbd); + nbd_put(nbd); + return 0; +} + +static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd = NULL; + struct nbd_config *config; + int index; + int ret = -EINVAL; + bool put_dev = false; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (!info->attrs[NBD_ATTR_INDEX]) { + printk(KERN_ERR "nbd: must specify a device to reconfigure\n"); + return -EINVAL; + } + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + mutex_lock(&nbd_index_mutex); + nbd = idr_find(&nbd_index_idr, index); + if (!nbd) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: couldn't find a device at index %d\n", + index); + return -EINVAL; + } + if (!refcount_inc_not_zero(&nbd->refs)) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: device at index %d is going down\n", + index); + return -EINVAL; + } + mutex_unlock(&nbd_index_mutex); + + if (!refcount_inc_not_zero(&nbd->config_refs)) { + dev_err(nbd_to_dev(nbd), + "not configured, cannot reconfigure\n"); + nbd_put(nbd); + return -EINVAL; + } + + mutex_lock(&nbd->config_lock); + config = nbd->config; + if (!test_bit(NBD_BOUND, &config->runtime_flags) || + !nbd->task_recv) { + dev_err(nbd_to_dev(nbd), + "not configured, cannot reconfigure\n"); + goto out; + } + + if (info->attrs[NBD_ATTR_TIMEOUT]) { + u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); + nbd->tag_set.timeout = timeout * HZ; + blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); + } + if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { + config->dead_conn_timeout = + nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); + config->dead_conn_timeout *= HZ; + } + if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { + u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); + if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { + if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags)) + put_dev = true; + } else { + if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags)) + refcount_inc(&nbd->refs); + } + } + + if (info->attrs[NBD_ATTR_SOCKETS]) { + struct nlattr *attr; + int rem, fd; + + nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], + rem) { + struct nlattr *socks[NBD_SOCK_MAX+1]; + + if (nla_type(attr) != NBD_SOCK_ITEM) { + printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); + ret = -EINVAL; + goto out; + } + ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, + nbd_sock_policy); + if (ret != 0) { + printk(KERN_ERR "nbd: error processing sock list\n"); + ret = -EINVAL; + goto out; + } + if (!socks[NBD_SOCK_FD]) + continue; + fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); + ret = nbd_reconnect_socket(nbd, fd); + if (ret) { + if (ret == -ENOSPC) + ret = 0; + goto out; + } + dev_info(nbd_to_dev(nbd), "reconnected socket\n"); + } + } +out: + mutex_unlock(&nbd->config_lock); + nbd_config_put(nbd); + nbd_put(nbd); + if (put_dev) + nbd_put(nbd); + return ret; +} + +static const struct genl_ops nbd_connect_genl_ops[] = { + { + .cmd = NBD_CMD_CONNECT, + .policy = nbd_attr_policy, + .doit = nbd_genl_connect, + }, + { + .cmd = NBD_CMD_DISCONNECT, + .policy = nbd_attr_policy, + .doit = nbd_genl_disconnect, + }, + { + .cmd = NBD_CMD_RECONFIGURE, + .policy = nbd_attr_policy, + .doit = nbd_genl_reconfigure, + }, + { + .cmd = NBD_CMD_STATUS, + .policy = nbd_attr_policy, + .doit = nbd_genl_status, + }, +}; + +static const struct genl_multicast_group nbd_mcast_grps[] = { + { .name = NBD_GENL_MCAST_GROUP_NAME, }, +}; + +static struct genl_family nbd_genl_family __ro_after_init = { + .hdrsize = 0, + .name = NBD_GENL_FAMILY_NAME, + .version = NBD_GENL_VERSION, + .module = THIS_MODULE, + .ops = nbd_connect_genl_ops, + .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), + .maxattr = NBD_ATTR_MAX, + .mcgrps = nbd_mcast_grps, + .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), +}; + +static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) +{ + struct nlattr *dev_opt; + u8 connected = 0; + int ret; + + /* This is a little racey, but for status it's ok. The + * reason we don't take a ref here is because we can't + * take a ref in the index == -1 case as we would need + * to put under the nbd_index_mutex, which could + * deadlock if we are configured to remove ourselves + * once we're disconnected. + */ + if (refcount_read(&nbd->config_refs)) + connected = 1; + dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM); + if (!dev_opt) + return -EMSGSIZE; + ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); + if (ret) + return -EMSGSIZE; + ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED, + connected); + if (ret) + return -EMSGSIZE; + nla_nest_end(reply, dev_opt); + return 0; +} + +static int status_cb(int id, void *ptr, void *data) +{ + struct nbd_device *nbd = ptr; + return populate_nbd_status(nbd, (struct sk_buff *)data); +} + +static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *dev_list; + struct sk_buff *reply; + void *reply_head; + size_t msg_size; + int index = -1; + int ret = -ENOMEM; + + if (info->attrs[NBD_ATTR_INDEX]) + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + + mutex_lock(&nbd_index_mutex); + + msg_size = nla_total_size(nla_attr_size(sizeof(u32)) + + nla_attr_size(sizeof(u8))); + msg_size *= (index == -1) ? nbd_total_devices : 1; + + reply = genlmsg_new(msg_size, GFP_KERNEL); + if (!reply) + goto out; + reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0, + NBD_CMD_STATUS); + if (!reply_head) { + nlmsg_free(reply); + goto out; + } + + dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST); + if (index == -1) { + ret = idr_for_each(&nbd_index_idr, &status_cb, reply); + if (ret) { + nlmsg_free(reply); + goto out; + } + } else { + struct nbd_device *nbd; + nbd = idr_find(&nbd_index_idr, index); + if (nbd) { + ret = populate_nbd_status(nbd, reply); + if (ret) { + nlmsg_free(reply); + goto out; + } + } + } + nla_nest_end(reply, dev_list); + genlmsg_end(reply, reply_head); + genlmsg_reply(reply, info); + ret = 0; +out: + mutex_unlock(&nbd_index_mutex); + return ret; +} + +static void nbd_connect_reply(struct genl_info *info, int index) +{ + struct sk_buff *skb; + void *msg_head; + int ret; + + skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); + if (!skb) + return; + msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0, + NBD_CMD_CONNECT); + if (!msg_head) { + nlmsg_free(skb); + return; + } + ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); + if (ret) { + nlmsg_free(skb); + return; + } + genlmsg_end(skb, msg_head); + genlmsg_reply(skb, info); +} + +static void nbd_mcast_index(int index) +{ + struct sk_buff *skb; + void *msg_head; + int ret; + + skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); + if (!skb) + return; + msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0, + NBD_CMD_LINK_DEAD); + if (!msg_head) { + nlmsg_free(skb); + return; + } + ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); + if (ret) { + nlmsg_free(skb); + return; + } + genlmsg_end(skb, msg_head); + genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL); +} + +static void nbd_dead_link_work(struct work_struct *work) +{ + struct link_dead_args *args = container_of(work, struct link_dead_args, + work); + nbd_mcast_index(args->index); + kfree(args); +} static int __init nbd_init(void) { @@ -1184,6 +2071,11 @@ static int __init nbd_init(void) return -EIO; } + if (genl_register_family(&nbd_genl_family)) { + unregister_blkdev(NBD_MAJOR, "nbd"); + destroy_workqueue(recv_workqueue); + return -EINVAL; + } nbd_dbg_init(); mutex_lock(&nbd_index_mutex); @@ -1195,17 +2087,34 @@ static int __init nbd_init(void) static int nbd_exit_cb(int id, void *ptr, void *data) { + struct list_head *list = (struct list_head *)data; struct nbd_device *nbd = ptr; - nbd_dev_remove(nbd); + + list_add_tail(&nbd->list, list); return 0; } static void __exit nbd_cleanup(void) { + struct nbd_device *nbd; + LIST_HEAD(del_list); + nbd_dbg_close(); - idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL); + mutex_lock(&nbd_index_mutex); + idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list); + mutex_unlock(&nbd_index_mutex); + + while (!list_empty(&del_list)) { + nbd = list_first_entry(&del_list, struct nbd_device, list); + list_del_init(&nbd->list); + if (refcount_read(&nbd->refs) != 1) + printk(KERN_ERR "nbd: possibly leaking a device\n"); + nbd_put(nbd); + } + idr_destroy(&nbd_index_idr); + genl_unregister_family(&nbd_genl_family); destroy_workqueue(recv_workqueue); unregister_blkdev(NBD_MAJOR, "nbd"); } diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 6f2e565bccc5..d946e1eeac8e 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -117,6 +117,10 @@ static bool use_lightnvm; module_param(use_lightnvm, bool, S_IRUGO); MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device"); +static bool blocking; +module_param(blocking, bool, S_IRUGO); +MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); + static int irqmode = NULL_IRQ_SOFTIRQ; static int null_set_irqmode(const char *str, const struct kernel_param *kp) @@ -277,7 +281,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd) case NULL_IRQ_SOFTIRQ: switch (queue_mode) { case NULL_Q_MQ: - blk_mq_complete_request(cmd->rq, cmd->rq->errors); + blk_mq_complete_request(cmd->rq); break; case NULL_Q_RQ: blk_complete_request(cmd->rq); @@ -357,6 +361,8 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, { struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); + if (irqmode == NULL_IRQ_TIMER) { hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cmd->timer.function = null_cmd_timer_expired; @@ -392,7 +398,7 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static struct blk_mq_ops null_mq_ops = { +static const struct blk_mq_ops null_mq_ops = { .queue_rq = null_queue_rq, .init_hctx = null_init_hctx, .complete = null_softirq_done_fn, @@ -437,14 +443,7 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) if (IS_ERR(rq)) return -ENOMEM; - rq->__sector = bio->bi_iter.bi_sector; - rq->ioprio = bio_prio(bio); - - if (bio_has_data(bio)) - rq->nr_phys_segments = bio_phys_segments(q, bio); - - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; + blk_init_request_from_bio(rq, bio); rq->end_io_data = rqd; @@ -724,6 +723,9 @@ static int null_add_dev(void) nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; nullb->tag_set.driver_data = nullb; + if (blocking) + nullb->tag_set.flags |= BLK_MQ_F_BLOCKING; + rv = blk_mq_alloc_tag_set(&nullb->tag_set); if (rv) goto out_cleanup_queues; diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c deleted file mode 100644 index 8127b8201a01..000000000000 --- a/drivers/block/osdblk.c +++ /dev/null @@ -1,693 +0,0 @@ - -/* - osdblk.c -- Export a single SCSI OSD object as a Linux block device - - - Copyright 2009 Red Hat, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - - - Instructions for use - -------------------- - - 1) Map a Linux block device to an existing OSD object. - - In this example, we will use partition id 1234, object id 5678, - OSD device /dev/osd1. - - $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add - - - 2) List all active blkdev<->object mappings. - - In this example, we have performed step #1 twice, creating two blkdevs, - mapped to two separate OSD objects. - - $ cat /sys/class/osdblk/list - 0 174 1234 5678 /dev/osd1 - 1 179 1994 897123 /dev/osd0 - - The columns, in order, are: - - blkdev unique id - - blkdev assigned major - - OSD object partition id - - OSD object id - - OSD device - - - 3) Remove an active blkdev<->object mapping. - - In this example, we remove the mapping with blkdev unique id 1. - - $ echo 1 > /sys/class/osdblk/remove - - - NOTE: The actual creation and deletion of OSD objects is outside the scope - of this driver. - - */ - -#include <linux/kernel.h> -#include <linux/device.h> -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <scsi/osd_initiator.h> -#include <scsi/osd_attributes.h> -#include <scsi/osd_sec.h> -#include <scsi/scsi_device.h> - -#define DRV_NAME "osdblk" -#define PFX DRV_NAME ": " - -/* #define _OSDBLK_DEBUG */ -#ifdef _OSDBLK_DEBUG -#define OSDBLK_DEBUG(fmt, a...) \ - printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a) -#else -#define OSDBLK_DEBUG(fmt, a...) \ - do { if (0) printk(fmt, ##a); } while (0) -#endif - -MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); -MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko"); -MODULE_LICENSE("GPL"); - -struct osdblk_device; - -enum { - OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */ - OSDBLK_MAX_REQ = 32, /* max parallel requests */ - OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */ -}; - -struct osdblk_request { - struct request *rq; /* blk layer request */ - struct bio *bio; /* cloned bio */ - struct osdblk_device *osdev; /* associated blkdev */ -}; - -struct osdblk_device { - int id; /* blkdev unique id */ - - int major; /* blkdev assigned major */ - struct gendisk *disk; /* blkdev's gendisk and rq */ - struct request_queue *q; - - struct osd_dev *osd; /* associated OSD */ - - char name[32]; /* blkdev name, e.g. osdblk34 */ - - spinlock_t lock; /* queue lock */ - - struct osd_obj_id obj; /* OSD partition, obj id */ - uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */ - - struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */ - - struct list_head node; - - char osd_path[0]; /* OSD device path */ -}; - -static struct class *class_osdblk; /* /sys/class/osdblk */ -static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ -static LIST_HEAD(osdblkdev_list); - -static const struct block_device_operations osdblk_bd_ops = { - .owner = THIS_MODULE, -}; - -static const struct osd_attr g_attr_logical_length = ATTR_DEF( - OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); - -static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN], - const struct osd_obj_id *obj) -{ - osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); -} - -/* copied from exofs; move to libosd? */ -/* - * Perform a synchronous OSD operation. copied from exofs; move to libosd? - */ -static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential) -{ - int ret; - - or->timeout = timeout; - ret = osd_finalize_request(or, 0, credential, NULL); - if (ret) - return ret; - - ret = osd_execute_request(or); - - /* osd_req_decode_sense(or, ret); */ - return ret; -} - -/* - * Perform an asynchronous OSD operation. copied from exofs; move to libosd? - */ -static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done, - void *caller_context, u8 *cred) -{ - int ret; - - ret = osd_finalize_request(or, 0, cred, NULL); - if (ret) - return ret; - - ret = osd_execute_request_async(or, async_done, caller_context); - - return ret; -} - -/* copied from exofs; move to libosd? */ -static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) -{ - struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ - void *iter = NULL; - int nelem; - - do { - nelem = 1; - osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); - if ((cur_attr.attr_page == attr->attr_page) && - (cur_attr.attr_id == attr->attr_id)) { - attr->len = cur_attr.len; - attr->val_ptr = cur_attr.val_ptr; - return 0; - } - } while (iter); - - return -EIO; -} - -static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out) -{ - struct osd_request *or; - struct osd_attr attr; - int ret; - - /* start request */ - or = osd_start_request(osdev->osd, GFP_KERNEL); - if (!or) - return -ENOMEM; - - /* create a get-attributes(length) request */ - osd_req_get_attributes(or, &osdev->obj); - - osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); - - /* execute op synchronously */ - ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred); - if (ret) - goto out; - - /* extract length from returned attribute info */ - attr = g_attr_logical_length; - ret = extract_attr_from_req(or, &attr); - if (ret) - goto out; - - *size_out = get_unaligned_be64(attr.val_ptr); - -out: - osd_end_request(or); - return ret; - -} - -static void osdblk_osd_complete(struct osd_request *or, void *private) -{ - struct osdblk_request *orq = private; - struct osd_sense_info osi; - int ret = osd_req_decode_sense(or, &osi); - - if (ret) { - ret = -EIO; - OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret); - } - - /* complete OSD request */ - osd_end_request(or); - - /* complete request passed to osdblk by block layer */ - __blk_end_request_all(orq->rq, ret); -} - -static void bio_chain_put(struct bio *chain) -{ - struct bio *tmp; - - while (chain) { - tmp = chain; - chain = chain->bi_next; - - bio_put(tmp); - } -} - -static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) -{ - struct bio *tmp, *new_chain = NULL, *tail = NULL; - - while (old_chain) { - tmp = bio_clone_kmalloc(old_chain, gfpmask); - if (!tmp) - goto err_out; - - tmp->bi_bdev = NULL; - gfpmask &= ~__GFP_DIRECT_RECLAIM; - tmp->bi_next = NULL; - - if (!new_chain) - new_chain = tail = tmp; - else { - tail->bi_next = tmp; - tail = tmp; - } - - old_chain = old_chain->bi_next; - } - - return new_chain; - -err_out: - OSDBLK_DEBUG("bio_chain_clone with err\n"); - bio_chain_put(new_chain); - return NULL; -} - -static void osdblk_rq_fn(struct request_queue *q) -{ - struct osdblk_device *osdev = q->queuedata; - - while (1) { - struct request *rq; - struct osdblk_request *orq; - struct osd_request *or; - struct bio *bio; - bool do_write, do_flush; - - /* peek at request from block layer */ - rq = blk_fetch_request(q); - if (!rq) - break; - - /* deduce our operation (read, write, flush) */ - /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] - * into a clearly defined set of RPC commands: - * read, write, flush, scsi command, power mgmt req, - * driver-specific, etc. - */ - - do_flush = (req_op(rq) == REQ_OP_FLUSH); - do_write = (rq_data_dir(rq) == WRITE); - - if (!do_flush) { /* osd_flush does not use a bio */ - /* a bio clone to be passed down to OSD request */ - bio = bio_chain_clone(rq->bio, GFP_ATOMIC); - if (!bio) - break; - } else - bio = NULL; - - /* alloc internal OSD request, for OSD command execution */ - or = osd_start_request(osdev->osd, GFP_ATOMIC); - if (!or) { - bio_chain_put(bio); - OSDBLK_DEBUG("osd_start_request with err\n"); - break; - } - - orq = &osdev->req[rq->tag]; - orq->rq = rq; - orq->bio = bio; - orq->osdev = osdev; - - /* init OSD command: flush, write or read */ - if (do_flush) - osd_req_flush_object(or, &osdev->obj, - OSD_CDB_FLUSH_ALL, 0, 0); - else if (do_write) - osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, - bio, blk_rq_bytes(rq)); - else - osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, - bio, blk_rq_bytes(rq)); - - OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n", - do_flush ? "flush" : do_write ? - "write" : "read", blk_rq_bytes(rq), - blk_rq_pos(rq) * 512ULL); - - /* begin OSD command execution */ - if (osd_async_op(or, osdblk_osd_complete, orq, - osdev->obj_cred)) { - osd_end_request(or); - blk_requeue_request(q, rq); - bio_chain_put(bio); - OSDBLK_DEBUG("osd_execute_request_async with err\n"); - break; - } - - /* remove the special 'flush' marker, now that the command - * is executing - */ - rq->special = NULL; - } -} - -static void osdblk_free_disk(struct osdblk_device *osdev) -{ - struct gendisk *disk = osdev->disk; - - if (!disk) - return; - - if (disk->flags & GENHD_FL_UP) - del_gendisk(disk); - if (disk->queue) - blk_cleanup_queue(disk->queue); - put_disk(disk); -} - -static int osdblk_init_disk(struct osdblk_device *osdev) -{ - struct gendisk *disk; - struct request_queue *q; - int rc; - u64 obj_size = 0; - - /* contact OSD, request size info about the object being mapped */ - rc = osdblk_get_obj_size(osdev, &obj_size); - if (rc) - return rc; - - /* create gendisk info */ - disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR); - if (!disk) - return -ENOMEM; - - sprintf(disk->disk_name, DRV_NAME "%d", osdev->id); - disk->major = osdev->major; - disk->first_minor = 0; - disk->fops = &osdblk_bd_ops; - disk->private_data = osdev; - - /* init rq */ - q = blk_init_queue(osdblk_rq_fn, &osdev->lock); - if (!q) { - put_disk(disk); - return -ENOMEM; - } - - /* switch queue to TCQ mode; allocate tag map */ - rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO); - if (rc) { - blk_cleanup_queue(q); - put_disk(disk); - return rc; - } - - /* Set our limits to the lower device limits, because osdblk cannot - * sleep when allocating a lower-request and therefore cannot be - * bouncing. - */ - blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); - - blk_queue_prep_rq(q, blk_queue_start_tag); - blk_queue_write_cache(q, true, false); - - disk->queue = q; - - q->queuedata = osdev; - - osdev->disk = disk; - osdev->q = q; - - /* finally, announce the disk to the world */ - set_capacity(disk, obj_size / 512ULL); - add_disk(disk); - - printk(KERN_INFO "%s: Added of size 0x%llx\n", - disk->disk_name, (unsigned long long)obj_size); - - return 0; -} - -/******************************************************************** - * /sys/class/osdblk/ - * add map OSD object to blkdev - * remove unmap OSD object - * list show mappings - *******************************************************************/ - -static void class_osdblk_release(struct class *cls) -{ - kfree(cls); -} - -static ssize_t class_osdblk_list(struct class *c, - struct class_attribute *attr, - char *data) -{ - int n = 0; - struct list_head *tmp; - - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &osdblkdev_list) { - struct osdblk_device *osdev; - - osdev = list_entry(tmp, struct osdblk_device, node); - - n += sprintf(data+n, "%d %d %llu %llu %s\n", - osdev->id, - osdev->major, - osdev->obj.partition, - osdev->obj.id, - osdev->osd_path); - } - - mutex_unlock(&ctl_mutex); - return n; -} - -static ssize_t class_osdblk_add(struct class *c, - struct class_attribute *attr, - const char *buf, size_t count) -{ - struct osdblk_device *osdev; - ssize_t rc; - int irc, new_id = 0; - struct list_head *tmp; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - - /* new osdblk_device object */ - osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL); - if (!osdev) { - rc = -ENOMEM; - goto err_out_mod; - } - - /* static osdblk_device initialization */ - spin_lock_init(&osdev->lock); - INIT_LIST_HEAD(&osdev->node); - - /* generate unique id: find highest unique id, add one */ - - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &osdblkdev_list) { - struct osdblk_device *osdev; - - osdev = list_entry(tmp, struct osdblk_device, node); - if (osdev->id > new_id) - new_id = osdev->id + 1; - } - - osdev->id = new_id; - - /* add to global list */ - list_add_tail(&osdev->node, &osdblkdev_list); - - mutex_unlock(&ctl_mutex); - - /* parse add command */ - if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id, - osdev->osd_path) != 3) { - rc = -EINVAL; - goto err_out_slot; - } - - /* initialize rest of new object */ - sprintf(osdev->name, DRV_NAME "%d", osdev->id); - - /* contact requested OSD */ - osdev->osd = osduld_path_lookup(osdev->osd_path); - if (IS_ERR(osdev->osd)) { - rc = PTR_ERR(osdev->osd); - goto err_out_slot; - } - - /* build OSD credential */ - osdblk_make_credential(osdev->obj_cred, &osdev->obj); - - /* register our block device */ - irc = register_blkdev(0, osdev->name); - if (irc < 0) { - rc = irc; - goto err_out_osd; - } - - osdev->major = irc; - - /* set up and announce blkdev mapping */ - rc = osdblk_init_disk(osdev); - if (rc) - goto err_out_blkdev; - - return count; - -err_out_blkdev: - unregister_blkdev(osdev->major, osdev->name); -err_out_osd: - osduld_put_device(osdev->osd); -err_out_slot: - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - list_del_init(&osdev->node); - mutex_unlock(&ctl_mutex); - - kfree(osdev); -err_out_mod: - OSDBLK_DEBUG("Error adding device %s\n", buf); - module_put(THIS_MODULE); - return rc; -} - -static ssize_t class_osdblk_remove(struct class *c, - struct class_attribute *attr, - const char *buf, - size_t count) -{ - struct osdblk_device *osdev = NULL; - int target_id, rc; - unsigned long ul; - struct list_head *tmp; - - rc = kstrtoul(buf, 10, &ul); - if (rc) - return rc; - - /* convert to int; abort if we lost anything in the conversion */ - target_id = (int) ul; - if (target_id != ul) - return -EINVAL; - - /* remove object from list immediately */ - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &osdblkdev_list) { - osdev = list_entry(tmp, struct osdblk_device, node); - if (osdev->id == target_id) { - list_del_init(&osdev->node); - break; - } - osdev = NULL; - } - - mutex_unlock(&ctl_mutex); - - if (!osdev) - return -ENOENT; - - /* clean up and free blkdev and associated OSD connection */ - osdblk_free_disk(osdev); - unregister_blkdev(osdev->major, osdev->name); - osduld_put_device(osdev->osd); - kfree(osdev); - - /* release module ref */ - module_put(THIS_MODULE); - - return count; -} - -static struct class_attribute class_osdblk_attrs[] = { - __ATTR(add, 0200, NULL, class_osdblk_add), - __ATTR(remove, 0200, NULL, class_osdblk_remove), - __ATTR(list, 0444, class_osdblk_list, NULL), - __ATTR_NULL -}; - -static int osdblk_sysfs_init(void) -{ - int ret = 0; - - /* - * create control files in sysfs - * /sys/class/osdblk/... - */ - class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL); - if (!class_osdblk) - return -ENOMEM; - - class_osdblk->name = DRV_NAME; - class_osdblk->owner = THIS_MODULE; - class_osdblk->class_release = class_osdblk_release; - class_osdblk->class_attrs = class_osdblk_attrs; - - ret = class_register(class_osdblk); - if (ret) { - kfree(class_osdblk); - class_osdblk = NULL; - printk(PFX "failed to create class osdblk\n"); - return ret; - } - - return 0; -} - -static void osdblk_sysfs_cleanup(void) -{ - if (class_osdblk) - class_destroy(class_osdblk); - class_osdblk = NULL; -} - -static int __init osdblk_init(void) -{ - int rc; - - rc = osdblk_sysfs_init(); - if (rc) - return rc; - - return 0; -} - -static void __exit osdblk_exit(void) -{ - osdblk_sysfs_cleanup(); -} - -module_init(osdblk_init); -module_exit(osdblk_exit); - diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 939641d6e262..b1267ef34d5a 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -300,6 +300,11 @@ static void pcd_init_units(void) struct gendisk *disk = alloc_disk(1); if (!disk) continue; + disk->queue = blk_init_queue(do_pcd_request, &pcd_lock); + if (!disk->queue) { + put_disk(disk); + continue; + } cd->disk = disk; cd->pi = &cd->pia; cd->present = 0; @@ -735,18 +740,36 @@ static int pcd_detect(void) } /* I/O request processing */ -static struct request_queue *pcd_queue; +static int pcd_queue; + +static int set_next_request(void) +{ + struct pcd_unit *cd; + struct request_queue *q; + int old_pos = pcd_queue; + + do { + cd = &pcd[pcd_queue]; + q = cd->present ? cd->disk->queue : NULL; + if (++pcd_queue == PCD_UNITS) + pcd_queue = 0; + if (q) { + pcd_req = blk_fetch_request(q); + if (pcd_req) + break; + } + } while (pcd_queue != old_pos); + + return pcd_req != NULL; +} -static void do_pcd_request(struct request_queue * q) +static void pcd_request(void) { if (pcd_busy) return; while (1) { - if (!pcd_req) { - pcd_req = blk_fetch_request(q); - if (!pcd_req) - return; - } + if (!pcd_req && !set_next_request()) + return; if (rq_data_dir(pcd_req) == READ) { struct pcd_unit *cd = pcd_req->rq_disk->private_data; @@ -766,6 +789,11 @@ static void do_pcd_request(struct request_queue * q) } } +static void do_pcd_request(struct request_queue *q) +{ + pcd_request(); +} + static inline void next_request(int err) { unsigned long saved_flags; @@ -774,7 +802,7 @@ static inline void next_request(int err) if (!__blk_end_request_cur(pcd_req, err)) pcd_req = NULL; pcd_busy = 0; - do_pcd_request(pcd_queue); + pcd_request(); spin_unlock_irqrestore(&pcd_lock, saved_flags); } @@ -849,7 +877,7 @@ static void do_pcd_read_drq(void) do_pcd_read(); spin_lock_irqsave(&pcd_lock, saved_flags); - do_pcd_request(pcd_queue); + pcd_request(); spin_unlock_irqrestore(&pcd_lock, saved_flags); } @@ -957,19 +985,10 @@ static int __init pcd_init(void) return -EBUSY; } - pcd_queue = blk_init_queue(do_pcd_request, &pcd_lock); - if (!pcd_queue) { - unregister_blkdev(major, name); - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) - put_disk(cd->disk); - return -ENOMEM; - } - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { if (cd->present) { register_cdrom(&cd->info); cd->disk->private_data = cd; - cd->disk->queue = pcd_queue; add_disk(cd->disk); } } @@ -988,9 +1007,9 @@ static void __exit pcd_exit(void) pi_release(cd->pi); unregister_cdrom(&cd->info); } + blk_cleanup_queue(cd->disk->queue); put_disk(cd->disk); } - blk_cleanup_queue(pcd_queue); unregister_blkdev(major, name); pi_unregister_driver(par_drv); } diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 9cfd2e06a649..7d2402f90978 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -381,12 +381,33 @@ static enum action do_pd_write_start(void); static enum action do_pd_read_drq(void); static enum action do_pd_write_done(void); -static struct request_queue *pd_queue; +static int pd_queue; static int pd_claimed; static struct pd_unit *pd_current; /* current request's drive */ static PIA *pi_current; /* current request's PIA */ +static int set_next_request(void) +{ + struct gendisk *disk; + struct request_queue *q; + int old_pos = pd_queue; + + do { + disk = pd[pd_queue].gd; + q = disk ? disk->queue : NULL; + if (++pd_queue == PD_UNITS) + pd_queue = 0; + if (q) { + pd_req = blk_fetch_request(q); + if (pd_req) + break; + } + } while (pd_queue != old_pos); + + return pd_req != NULL; +} + static void run_fsm(void) { while (1) { @@ -418,8 +439,7 @@ static void run_fsm(void) spin_lock_irqsave(&pd_lock, saved_flags); if (!__blk_end_request_cur(pd_req, res == Ok ? 0 : -EIO)) { - pd_req = blk_fetch_request(pd_queue); - if (!pd_req) + if (!set_next_request()) stop = 1; } spin_unlock_irqrestore(&pd_lock, saved_flags); @@ -719,18 +739,15 @@ static int pd_special_command(struct pd_unit *disk, enum action (*func)(struct pd_unit *disk)) { struct request *rq; - int err = 0; rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); rq->special = func; - - err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); - + blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); blk_put_request(rq); - return err; + return 0; } /* kernel glue structures */ @@ -839,7 +856,13 @@ static void pd_probe_drive(struct pd_unit *disk) p->first_minor = (disk - pd) << PD_BITS; disk->gd = p; p->private_data = disk; - p->queue = pd_queue; + p->queue = blk_init_queue(do_pd_request, &pd_lock); + if (!p->queue) { + disk->gd = NULL; + put_disk(p); + return; + } + blk_queue_max_hw_sectors(p->queue, cluster); if (disk->drive == -1) { for (disk->drive = 0; disk->drive <= 1; disk->drive++) @@ -919,26 +942,18 @@ static int __init pd_init(void) if (disable) goto out1; - pd_queue = blk_init_queue(do_pd_request, &pd_lock); - if (!pd_queue) - goto out1; - - blk_queue_max_hw_sectors(pd_queue, cluster); - if (register_blkdev(major, name)) - goto out2; + goto out1; printk("%s: %s version %s, major %d, cluster %d, nice %d\n", name, name, PD_VERSION, major, cluster, nice); if (!pd_detect()) - goto out3; + goto out2; return 0; -out3: - unregister_blkdev(major, name); out2: - blk_cleanup_queue(pd_queue); + unregister_blkdev(major, name); out1: return -ENODEV; } @@ -953,11 +968,11 @@ static void __exit pd_exit(void) if (p) { disk->gd = NULL; del_gendisk(p); + blk_cleanup_queue(p->queue); put_disk(p); pi_release(disk->pi); } } - blk_cleanup_queue(pd_queue); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 14c5d32f5d8b..f24ca7315ddc 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -287,6 +287,12 @@ static void __init pf_init_units(void) struct gendisk *disk = alloc_disk(1); if (!disk) continue; + disk->queue = blk_init_queue(do_pf_request, &pf_spin_lock); + if (!disk->queue) { + put_disk(disk); + return; + } + blk_queue_max_segments(disk->queue, cluster); pf->disk = disk; pf->pi = &pf->pia; pf->media_status = PF_NM; @@ -772,7 +778,28 @@ static int pf_ready(void) return (((status_reg(pf_current) & (STAT_BUSY | pf_mask)) == pf_mask)); } -static struct request_queue *pf_queue; +static int pf_queue; + +static int set_next_request(void) +{ + struct pf_unit *pf; + struct request_queue *q; + int old_pos = pf_queue; + + do { + pf = &units[pf_queue]; + q = pf->present ? pf->disk->queue : NULL; + if (++pf_queue == PF_UNITS) + pf_queue = 0; + if (q) { + pf_req = blk_fetch_request(q); + if (pf_req) + break; + } + } while (pf_queue != old_pos); + + return pf_req != NULL; +} static void pf_end_request(int err) { @@ -780,16 +807,13 @@ static void pf_end_request(int err) pf_req = NULL; } -static void do_pf_request(struct request_queue * q) +static void pf_request(void) { if (pf_busy) return; repeat: - if (!pf_req) { - pf_req = blk_fetch_request(q); - if (!pf_req) - return; - } + if (!pf_req && !set_next_request()) + return; pf_current = pf_req->rq_disk->private_data; pf_block = blk_rq_pos(pf_req); @@ -817,6 +841,11 @@ repeat: } } +static void do_pf_request(struct request_queue *q) +{ + pf_request(); +} + static int pf_next_buf(void) { unsigned long saved_flags; @@ -846,7 +875,7 @@ static inline void next_request(int err) spin_lock_irqsave(&pf_spin_lock, saved_flags); pf_end_request(err); pf_busy = 0; - do_pf_request(pf_queue); + pf_request(); spin_unlock_irqrestore(&pf_spin_lock, saved_flags); } @@ -972,15 +1001,6 @@ static int __init pf_init(void) put_disk(pf->disk); return -EBUSY; } - pf_queue = blk_init_queue(do_pf_request, &pf_spin_lock); - if (!pf_queue) { - unregister_blkdev(major, name); - for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) - put_disk(pf->disk); - return -ENOMEM; - } - - blk_queue_max_segments(pf_queue, cluster); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { struct gendisk *disk = pf->disk; @@ -988,7 +1008,6 @@ static int __init pf_init(void) if (!pf->present) continue; disk->private_data = pf; - disk->queue = pf_queue; add_disk(disk); } return 0; @@ -1003,10 +1022,10 @@ static void __exit pf_exit(void) if (!pf->present) continue; del_gendisk(pf->disk); + blk_cleanup_queue(pf->disk->queue); put_disk(pf->disk); pi_release(pf->pi); } - blk_cleanup_queue(pf_queue); } MODULE_LICENSE("GPL"); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 66d846ba85a9..205b865ebeb9 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -724,7 +724,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->rq_flags |= RQF_QUIET; blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0); - if (rq->errors) + if (scsi_req(rq)->result) ret = -EIO; out: blk_put_request(rq); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 517838b65964..089ac4179919 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4317,7 +4317,7 @@ static int rbd_init_request(void *data, struct request *rq, return 0; } -static struct blk_mq_ops rbd_mq_ops = { +static const struct blk_mq_ops rbd_mq_ops = { .queue_rq = rbd_queue_rq, .init_request = rbd_init_request, }; @@ -4380,7 +4380,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.discard_granularity = segment_size; q->limits.discard_alignment = segment_size; blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); - q->limits.discard_zeroes_data = 1; if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index f81d70b39d10..9c566364ac9c 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -300,7 +300,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) RSXX_HW_BLK_SIZE >> 9); card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE; card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE; - card->queue->limits.discard_zeroes_data = 1; } card->queue->queuedata = card; diff --git a/drivers/block/swim.c b/drivers/block/swim.c index b5afd495d482..3064be6cf375 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -211,7 +211,7 @@ enum head { struct swim_priv { struct swim __iomem *base; spinlock_t lock; - struct request_queue *queue; + int fdc_queue; int floppy_count; struct floppy_state unit[FD_MAX_UNIT]; }; @@ -525,12 +525,33 @@ static int floppy_read_sectors(struct floppy_state *fs, return 0; } -static void redo_fd_request(struct request_queue *q) +static struct request *swim_next_request(struct swim_priv *swd) { + struct request_queue *q; + struct request *rq; + int old_pos = swd->fdc_queue; + + do { + q = swd->unit[swd->fdc_queue].disk->queue; + if (++swd->fdc_queue == swd->floppy_count) + swd->fdc_queue = 0; + if (q) { + rq = blk_fetch_request(q); + if (rq) + return rq; + } + } while (swd->fdc_queue != old_pos); + + return NULL; +} + +static void do_fd_request(struct request_queue *q) +{ + struct swim_priv *swd = q->queuedata; struct request *req; struct floppy_state *fs; - req = blk_fetch_request(q); + req = swim_next_request(swd); while (req) { int err = -EIO; @@ -554,15 +575,10 @@ static void redo_fd_request(struct request_queue *q) } done: if (!__blk_end_request_cur(req, err)) - req = blk_fetch_request(q); + req = swim_next_request(swd); } } -static void do_fd_request(struct request_queue *q) -{ - redo_fd_request(q); -} - static struct floppy_struct floppy_type[4] = { { 0, 0, 0, 0, 0, 0x00, 0x00, 0x00, 0x00, NULL }, /* no testing */ { 720, 9, 1, 80, 0, 0x2A, 0x02, 0xDF, 0x50, NULL }, /* 360KB SS 3.5"*/ @@ -833,22 +849,25 @@ static int swim_floppy_init(struct swim_priv *swd) return -EBUSY; } + spin_lock_init(&swd->lock); + for (drive = 0; drive < swd->floppy_count; drive++) { swd->unit[drive].disk = alloc_disk(1); if (swd->unit[drive].disk == NULL) { err = -ENOMEM; goto exit_put_disks; } + swd->unit[drive].disk->queue = blk_init_queue(do_fd_request, + &swd->lock); + if (!swd->unit[drive].disk->queue) { + err = -ENOMEM; + put_disk(swd->unit[drive].disk); + goto exit_put_disks; + } + swd->unit[drive].disk->queue->queuedata = swd; swd->unit[drive].swd = swd; } - spin_lock_init(&swd->lock); - swd->queue = blk_init_queue(do_fd_request, &swd->lock); - if (!swd->queue) { - err = -ENOMEM; - goto exit_put_disks; - } - for (drive = 0; drive < swd->floppy_count; drive++) { swd->unit[drive].disk->flags = GENHD_FL_REMOVABLE; swd->unit[drive].disk->major = FLOPPY_MAJOR; @@ -856,7 +875,6 @@ static int swim_floppy_init(struct swim_priv *swd) sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive); swd->unit[drive].disk->fops = &floppy_fops; swd->unit[drive].disk->private_data = &swd->unit[drive]; - swd->unit[drive].disk->queue = swd->queue; set_capacity(swd->unit[drive].disk, 2880); add_disk(swd->unit[drive].disk); } @@ -943,13 +961,12 @@ static int swim_remove(struct platform_device *dev) for (drive = 0; drive < swd->floppy_count; drive++) { del_gendisk(swd->unit[drive].disk); + blk_cleanup_queue(swd->unit[drive].disk->queue); put_disk(swd->unit[drive].disk); } unregister_blkdev(FLOPPY_MAJOR, "fd"); - blk_cleanup_queue(swd->queue); - /* eject floppies */ for (drive = 0; drive < swd->floppy_count; drive++) diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 61b3ffa4f458..ba4809c9bdba 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -343,8 +343,8 @@ static void start_request(struct floppy_state *fs) req->rq_disk->disk_name, req->cmd, (long)blk_rq_pos(req), blk_rq_sectors(req), bio_data(req->bio)); - swim3_dbg(" errors=%d current_nr_sectors=%u\n", - req->errors, blk_rq_cur_sectors(req)); + swim3_dbg(" current_nr_sectors=%u\n", + blk_rq_cur_sectors(req)); #endif if (blk_rq_pos(req) >= fs->total_secs) { diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1d4c9f8bc1e1..f94614257462 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -111,7 +111,7 @@ static int virtblk_add_req_scsi(struct virtqueue *vq, struct virtblk_req *vbr, return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); } -static inline void virtblk_scsi_reques_done(struct request *req) +static inline void virtblk_scsi_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); struct virtio_blk *vblk = req->q->queuedata; @@ -119,7 +119,7 @@ static inline void virtblk_scsi_reques_done(struct request *req) sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); - req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); + sreq->result = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); } static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, @@ -144,7 +144,7 @@ static inline int virtblk_add_req_scsi(struct virtqueue *vq, { return -EIO; } -static inline void virtblk_scsi_reques_done(struct request *req) +static inline void virtblk_scsi_request_done(struct request *req) { } #define virtblk_ioctl NULL @@ -175,19 +175,15 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - int error = virtblk_result(vbr); switch (req_op(req)) { case REQ_OP_SCSI_IN: case REQ_OP_SCSI_OUT: - virtblk_scsi_reques_done(req); - break; - case REQ_OP_DRV_IN: - req->errors = (error != 0); + virtblk_scsi_request_done(req); break; } - blk_mq_end_request(req, error); + blk_mq_end_request(req, virtblk_result(vbr)); } static void virtblk_done(struct virtqueue *vq) @@ -205,7 +201,7 @@ static void virtblk_done(struct virtqueue *vq) while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { struct request *req = blk_mq_rq_from_pdu(vbr); - blk_mq_complete_request(req, req->errors); + blk_mq_complete_request(req); req_done = true; } if (unlikely(virtqueue_is_broken(vq))) @@ -310,7 +306,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) if (err) goto out; - err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); + blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); + err = virtblk_result(blk_mq_rq_to_pdu(req)); out: blk_put_request(req); return err; @@ -597,7 +594,7 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set) return blk_mq_virtio_map_queues(set, vblk->vdev, 0); } -static struct blk_mq_ops virtio_mq_ops = { +static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .complete = virtblk_request_done, .init_request = virtblk_init_request, diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 5067a0a952cb..39459631667c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -115,6 +115,15 @@ struct split_bio { atomic_t pending; }; +struct blkif_req { + int error; +}; + +static inline struct blkif_req *blkif_req(struct request *rq) +{ + return blk_mq_rq_to_pdu(rq); +} + static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; @@ -907,8 +916,14 @@ out_busy: return BLK_MQ_RQ_QUEUE_BUSY; } -static struct blk_mq_ops blkfront_mq_ops = { +static void blkif_complete_rq(struct request *rq) +{ + blk_mq_end_request(rq, blkif_req(rq)->error); +} + +static const struct blk_mq_ops blkfront_mq_ops = { .queue_rq = blkif_queue_rq, + .complete = blkif_complete_rq, }; static void blkif_set_queue_limits(struct blkfront_info *info) @@ -969,7 +984,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, info->tag_set.queue_depth = BLK_RING_SIZE(info); info->tag_set.numa_node = NUMA_NO_NODE; info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; - info->tag_set.cmd_size = 0; + info->tag_set.cmd_size = sizeof(struct blkif_req); info->tag_set.driver_data = info; if (blk_mq_alloc_tag_set(&info->tag_set)) @@ -1543,7 +1558,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; struct blkfront_info *info = rinfo->dev_info; - int error; if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return IRQ_HANDLED; @@ -1587,37 +1601,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) continue; } - error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; + blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { case BLKIF_OP_DISCARD: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + blkif_req(req)->error = -EOPNOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; queue_flag_clear(QUEUE_FLAG_DISCARD, rq); queue_flag_clear(QUEUE_FLAG_SECERASE, rq); } - blk_mq_complete_request(req, error); break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + blkif_req(req)->error = -EOPNOTSUPP; } if (unlikely(bret->status == BLKIF_RSP_ERROR && rinfo->shadow[id].req.u.rw.nr_segments == 0)) { printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + blkif_req(req)->error = -EOPNOTSUPP; } - if (unlikely(error)) { - if (error == -EOPNOTSUPP) - error = 0; + if (unlikely(blkif_req(req)->error)) { + if (blkif_req(req)->error == -EOPNOTSUPP) + blkif_req(req)->error = 0; info->feature_fua = 0; info->feature_flush = 0; xlvbd_flush(info); @@ -1629,11 +1642,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " "request: %x\n", bret->status); - blk_mq_complete_request(req, error); break; default: BUG(); } + + blk_mq_complete_request(req); } rinfo->ring.rsp_cons = i; @@ -2345,6 +2359,7 @@ static void blkfront_connect(struct blkfront_info *info) unsigned long sector_size; unsigned int physical_sector_size; unsigned int binfo; + char *envp[] = { "RESIZE=1", NULL }; int err, i; switch (info->connected) { @@ -2361,6 +2376,8 @@ static void blkfront_connect(struct blkfront_info *info) sectors); set_capacity(info->gd, sectors); revalidate_disk(info->gd); + kobject_uevent_env(&disk_to_dev(info->gd)->kobj, + KOBJ_CHANGE, envp); return; case BLKIF_STATE_SUSPENDED: diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index dceb5edd1e54..6fac5fedd610 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -523,7 +523,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { - copy_page(mem, cmem); + memcpy(mem, cmem, PAGE_SIZE); } else { struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); @@ -717,7 +717,7 @@ compress_again: if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { src = kmap_atomic(page); - copy_page(cmem, src); + memcpy(cmem, src, PAGE_SIZE); kunmap_atomic(src); } else { memcpy(cmem, src, clen); @@ -829,10 +829,14 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; - if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: zram_bio_discard(zram, index, offset, bio); bio_endio(bio); return; + default: + break; } bio_for_each_segment(bvec, bio, iter) { @@ -928,7 +932,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, } index = sector >> SECTORS_PER_PAGE_SHIFT; - offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT; + offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; bv.bv_page = page; bv.bv_len = PAGE_SIZE; @@ -1192,6 +1196,8 @@ static int zram_add(void) zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE; zram->disk->queue->limits.chunk_sectors = 0; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); + /* * zram_bio_discard() will clear all logical blocks if logical block * size is identical with physical block size(PAGE_SIZE). But if it is @@ -1201,10 +1207,7 @@ static int zram_add(void) * zeroed. */ if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) - zram->disk->queue->limits.discard_zeroes_data = 1; - else - zram->disk->queue->limits.discard_zeroes_data = 0; - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); + blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); add_disk(zram->disk); diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 87739649eac2..76c952fd9ab9 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2218,7 +2218,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, rq->timeout = 60 * HZ; bio = rq->bio; - if (blk_execute_rq(q, cdi->disk, rq, 0)) { + blk_execute_rq(q, cdi->disk, rq, 0); + if (scsi_req(rq)->result) { struct request_sense *s = req->sense; ret = -EIO; cdi->last_sense = s->sense_key; diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c index d6f5d9eb102d..70d434bc1cbf 100644 --- a/drivers/char/ipmi/bt-bmc.c +++ b/drivers/char/ipmi/bt-bmc.c @@ -523,6 +523,7 @@ static int bt_bmc_remove(struct platform_device *pdev) static const struct of_device_id bt_bmc_match[] = { { .compatible = "aspeed,ast2400-ibt-bmc" }, + { .compatible = "aspeed,ast2500-ibt-bmc" }, { }, }; diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 2a7c425ddfa7..b2b618f066e0 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -1954,7 +1954,9 @@ static int hotmod_handler(const char *val, struct kernel_param *kp) kfree(info); goto out; } + mutex_lock(&smi_infos_lock); rv = try_smi_init(info); + mutex_unlock(&smi_infos_lock); if (rv) { cleanup_one_si(info); goto out; @@ -2042,8 +2044,10 @@ static int hardcode_find_bmc(void) info->slave_addr = slave_addrs[i]; if (!add_smi(info)) { + mutex_lock(&smi_infos_lock); if (try_smi_init(info)) cleanup_one_si(info); + mutex_unlock(&smi_infos_lock); ret = 0; } else { kfree(info); @@ -3492,6 +3496,11 @@ out_err: return rv; } +/* + * Try to start up an interface. Must be called with smi_infos_lock + * held, primarily to keep smi_num consistent, we only one to do these + * one at a time. + */ static int try_smi_init(struct smi_info *new_smi) { int rv = 0; @@ -3524,9 +3533,12 @@ static int try_smi_init(struct smi_info *new_smi) goto out_err; } + new_smi->intf_num = smi_num; + /* Do this early so it's available for logs. */ if (!new_smi->dev) { - init_name = kasprintf(GFP_KERNEL, "ipmi_si.%d", 0); + init_name = kasprintf(GFP_KERNEL, "ipmi_si.%d", + new_smi->intf_num); /* * If we don't already have a device from something @@ -3593,8 +3605,6 @@ static int try_smi_init(struct smi_info *new_smi) new_smi->interrupt_disabled = true; atomic_set(&new_smi->need_watch, 0); - new_smi->intf_num = smi_num; - smi_num++; rv = try_enable_event_buffer(new_smi); if (rv == 0) @@ -3661,6 +3671,9 @@ static int try_smi_init(struct smi_info *new_smi) goto out_err_stop_timer; } + /* Don't increment till we know we have succeeded. */ + smi_num++; + dev_info(new_smi->dev, "IPMI %s interface initialized\n", si_to_str[new_smi->si_type]); diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index cca6e5bc1cea..0b22a9be5029 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -891,6 +891,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, * for details on the intricacies of this. */ int left; + unsigned char *data_to_send; ssif_inc_stat(ssif_info, sent_messages_parts); @@ -899,6 +900,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, left = 32; /* Length byte. */ ssif_info->multi_data[ssif_info->multi_pos] = left; + data_to_send = ssif_info->multi_data + ssif_info->multi_pos; ssif_info->multi_pos += left; if (left < 32) /* @@ -912,7 +914,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, rv = ssif_i2c_send(ssif_info, msg_written_handler, I2C_SMBUS_WRITE, SSIF_IPMI_MULTI_PART_REQUEST_MIDDLE, - ssif_info->multi_data + ssif_info->multi_pos, + data_to_send, I2C_SMBUS_BLOCK_DATA); if (rv < 0) { /* request failed, just return the error. */ @@ -1642,9 +1644,8 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) spin_lock_init(&ssif_info->lock); ssif_info->ssif_state = SSIF_NORMAL; - init_timer(&ssif_info->retry_timer); - ssif_info->retry_timer.data = (unsigned long) ssif_info; - ssif_info->retry_timer.function = retry_timeout; + setup_timer(&ssif_info->retry_timer, retry_timeout, + (unsigned long)ssif_info); for (i = 0; i < SSIF_NUM_STATS; i++) atomic_set(&ssif_info->stats[i], 0); diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 5ca24d9b101b..d165af8abe36 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -516,7 +516,7 @@ static void panic_halt_ipmi_heartbeat(void) msg.cmd = IPMI_WDOG_RESET_TIMER; msg.data = NULL; msg.data_len = 0; - atomic_add(2, &panic_done_count); + atomic_add(1, &panic_done_count); rv = ipmi_request_supply_msgs(watchdog_user, (struct ipmi_addr *) &addr, 0, @@ -526,7 +526,7 @@ static void panic_halt_ipmi_heartbeat(void) &panic_halt_heartbeat_recv_msg, 1); if (rv) - atomic_sub(2, &panic_done_count); + atomic_sub(1, &panic_done_count); } static struct ipmi_smi_msg panic_halt_smi_msg = { @@ -550,12 +550,12 @@ static void panic_halt_ipmi_set_timeout(void) /* Wait for the messages to be free. */ while (atomic_read(&panic_done_count) != 0) ipmi_poll_interface(watchdog_user); - atomic_add(2, &panic_done_count); + atomic_add(1, &panic_done_count); rv = i_ipmi_set_timeout(&panic_halt_smi_msg, &panic_halt_recv_msg, &send_heartbeat_now); if (rv) { - atomic_sub(2, &panic_done_count); + atomic_sub(1, &panic_done_count); printk(KERN_WARNING PFX "Unable to extend the watchdog timeout."); } else { diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 6d9cc2d39d22..7e4a9d1296bb 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -60,6 +60,10 @@ static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) #endif #ifdef CONFIG_STRICT_DEVMEM +static inline int page_is_allowed(unsigned long pfn) +{ + return devmem_is_allowed(pfn); +} static inline int range_is_allowed(unsigned long pfn, unsigned long size) { u64 from = ((u64)pfn) << PAGE_SHIFT; @@ -75,6 +79,10 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) return 1; } #else +static inline int page_is_allowed(unsigned long pfn) +{ + return 1; +} static inline int range_is_allowed(unsigned long pfn, unsigned long size) { return 1; @@ -122,23 +130,31 @@ static ssize_t read_mem(struct file *file, char __user *buf, while (count > 0) { unsigned long remaining; + int allowed; sz = size_inside_page(p, count); - if (!range_is_allowed(p >> PAGE_SHIFT, count)) + allowed = page_is_allowed(p >> PAGE_SHIFT); + if (!allowed) return -EPERM; + if (allowed == 2) { + /* Show zeros for restricted memory. */ + remaining = clear_user(buf, sz); + } else { + /* + * On ia64 if a page has been mapped somewhere as + * uncached, then it must also be accessed uncached + * by the kernel or data corruption may occur. + */ + ptr = xlate_dev_mem_ptr(p); + if (!ptr) + return -EFAULT; - /* - * On ia64 if a page has been mapped somewhere as uncached, then - * it must also be accessed uncached by the kernel or data - * corruption may occur. - */ - ptr = xlate_dev_mem_ptr(p); - if (!ptr) - return -EFAULT; + remaining = copy_to_user(buf, ptr, sz); + + unxlate_dev_mem_ptr(p, ptr); + } - remaining = copy_to_user(buf, ptr, sz); - unxlate_dev_mem_ptr(p, ptr); if (remaining) return -EFAULT; @@ -181,30 +197,36 @@ static ssize_t write_mem(struct file *file, const char __user *buf, #endif while (count > 0) { + int allowed; + sz = size_inside_page(p, count); - if (!range_is_allowed(p >> PAGE_SHIFT, sz)) + allowed = page_is_allowed(p >> PAGE_SHIFT); + if (!allowed) return -EPERM; - /* - * On ia64 if a page has been mapped somewhere as uncached, then - * it must also be accessed uncached by the kernel or data - * corruption may occur. - */ - ptr = xlate_dev_mem_ptr(p); - if (!ptr) { - if (written) - break; - return -EFAULT; - } + /* Skip actual writing when a page is marked as restricted. */ + if (allowed == 1) { + /* + * On ia64 if a page has been mapped somewhere as + * uncached, then it must also be accessed uncached + * by the kernel or data corruption may occur. + */ + ptr = xlate_dev_mem_ptr(p); + if (!ptr) { + if (written) + break; + return -EFAULT; + } - copied = copy_from_user(ptr, buf, sz); - unxlate_dev_mem_ptr(p, ptr); - if (copied) { - written += sz - copied; - if (written) - break; - return -EFAULT; + copied = copy_from_user(ptr, buf, sz); + unxlate_dev_mem_ptr(p, ptr); + if (copied) { + written += sz - copied; + if (written) + break; + return -EFAULT; + } } buf += sz; diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index b708c85dc9c1..0e7fcb04f01e 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -478,18 +478,18 @@ static int sgi_clock_period; static struct timespec sgi_clock_offset; static int sgi_clock_period; -static int sgi_clock_get(clockid_t clockid, struct timespec *tp) +static int sgi_clock_get(clockid_t clockid, struct timespec64 *tp) { u64 nsec; nsec = rtc_time() * sgi_clock_period + sgi_clock_offset.tv_nsec; - *tp = ns_to_timespec(nsec); + *tp = ns_to_timespec64(nsec); tp->tv_sec += sgi_clock_offset.tv_sec; return 0; }; -static int sgi_clock_set(const clockid_t clockid, const struct timespec *tp) +static int sgi_clock_set(const clockid_t clockid, const struct timespec64 *tp) { u64 nsec; @@ -657,7 +657,7 @@ static int sgi_timer_del(struct k_itimer *timr) } /* Assumption: it_lock is already held with irq's disabled */ -static void sgi_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) +static void sgi_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) { if (timr->it.mmtimer.clock == TIMER_OFF) { @@ -668,14 +668,14 @@ static void sgi_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) return; } - cur_setting->it_interval = ns_to_timespec(timr->it.mmtimer.incr * sgi_clock_period); - cur_setting->it_value = ns_to_timespec((timr->it.mmtimer.expires - rtc_time()) * sgi_clock_period); + cur_setting->it_interval = ns_to_timespec64(timr->it.mmtimer.incr * sgi_clock_period); + cur_setting->it_value = ns_to_timespec64((timr->it.mmtimer.expires - rtc_time()) * sgi_clock_period); } static int sgi_timer_set(struct k_itimer *timr, int flags, - struct itimerspec * new_setting, - struct itimerspec * old_setting) + struct itimerspec64 *new_setting, + struct itimerspec64 *old_setting) { unsigned long when, period, irqflags; int err = 0; @@ -687,8 +687,8 @@ static int sgi_timer_set(struct k_itimer *timr, int flags, sgi_timer_get(timr, old_setting); sgi_timer_del(timr); - when = timespec_to_ns(&new_setting->it_value); - period = timespec_to_ns(&new_setting->it_interval); + when = timespec64_to_ns(&new_setting->it_value); + period = timespec64_to_ns(&new_setting->it_interval); if (when == 0) /* Clear timer */ @@ -699,11 +699,11 @@ static int sgi_timer_set(struct k_itimer *timr, int flags, return -ENOMEM; if (flags & TIMER_ABSTIME) { - struct timespec n; + struct timespec64 n; unsigned long now; - getnstimeofday(&n); - now = timespec_to_ns(&n); + getnstimeofday64(&n); + now = timespec64_to_ns(&n); if (when > now) when -= now; else @@ -765,7 +765,7 @@ static int sgi_timer_set(struct k_itimer *timr, int flags, return err; } -static int sgi_clock_getres(const clockid_t which_clock, struct timespec *tp) +static int sgi_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { tp->tv_sec = 0; tp->tv_nsec = sgi_clock_period; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index e9b7e0b3cabe..87fe111d0be6 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -2202,14 +2202,16 @@ static int virtcons_freeze(struct virtio_device *vdev) vdev->config->reset(vdev); - virtqueue_disable_cb(portdev->c_ivq); + if (use_multiport(portdev)) + virtqueue_disable_cb(portdev->c_ivq); cancel_work_sync(&portdev->control_work); cancel_work_sync(&portdev->config_work); /* * Once more: if control_work_handler() was running, it would * enable the cb as the last step. */ - virtqueue_disable_cb(portdev->c_ivq); + if (use_multiport(portdev)) + virtqueue_disable_cb(portdev->c_ivq); remove_controlq_data(portdev); list_for_each_entry(port, &portdev->ports, list) { diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index ab609a76706f..cf9449b3dbd9 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -429,6 +429,13 @@ static const struct clk_div_table pll_divp_table[] = { { 0, 2 }, { 1, 4 }, { 2, 6 }, { 3, 8 }, { 0 } }; +static const struct clk_div_table pll_divq_table[] = { + { 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 }, + { 8, 8 }, { 9, 9 }, { 10, 10 }, { 11, 11 }, { 12, 12 }, { 13, 13 }, + { 14, 14 }, { 15, 15 }, + { 0 } +}; + static const struct clk_div_table pll_divr_table[] = { { 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 }, { 0 } }; @@ -496,9 +503,9 @@ struct stm32f4_div_data { #define MAX_PLL_DIV 3 static const struct stm32f4_div_data div_data[MAX_PLL_DIV] = { - { 16, 2, 0, pll_divp_table }, - { 24, 4, CLK_DIVIDER_ONE_BASED, NULL }, - { 28, 3, 0, pll_divr_table }, + { 16, 2, 0, pll_divp_table }, + { 24, 4, 0, pll_divq_table }, + { 28, 3, 0, pll_divr_table }, }; struct stm32f4_pll_data { diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig index 72109d2cf41b..a077ab6edffa 100644 --- a/drivers/clk/sunxi-ng/Kconfig +++ b/drivers/clk/sunxi-ng/Kconfig @@ -1,6 +1,7 @@ config SUNXI_CCU bool "Clock support for Allwinner SoCs" depends on ARCH_SUNXI || COMPILE_TEST + select RESET_CONTROLLER default ARCH_SUNXI if SUNXI_CCU @@ -15,7 +16,7 @@ config SUNXI_CCU_FRAC bool config SUNXI_CCU_GATE - bool + def_bool y config SUNXI_CCU_MUX bool @@ -135,6 +136,7 @@ config SUN8I_V3S_CCU config SUN9I_A80_CCU bool "Support for the Allwinner A80 CCU" select SUNXI_CCU_DIV + select SUNXI_CCU_MULT select SUNXI_CCU_GATE select SUNXI_CCU_NKMP select SUNXI_CCU_NM diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c index a7b3c08ed0e2..2c69b631967a 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c @@ -752,6 +752,13 @@ static const struct sunxi_ccu_desc sun8i_a33_ccu_desc = { .num_resets = ARRAY_SIZE(sun8i_a33_ccu_resets), }; +static struct ccu_pll_nb sun8i_a33_pll_cpu_nb = { + .common = &pll_cpux_clk.common, + /* copy from pll_cpux_clk */ + .enable = BIT(31), + .lock = BIT(28), +}; + static struct ccu_mux_nb sun8i_a33_cpu_nb = { .common = &cpux_clk.common, .cm = &cpux_clk.mux, @@ -783,6 +790,10 @@ static void __init sun8i_a33_ccu_setup(struct device_node *node) sunxi_ccu_probe(node, reg, &sun8i_a33_ccu_desc); + /* Gate then ungate PLL CPU after any rate changes */ + ccu_pll_notifier_register(&sun8i_a33_pll_cpu_nb); + + /* Reparent CPU during PLL CPU rate changes */ ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk, &sun8i_a33_cpu_nb); } diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c index 8a47bafd7890..9d8724715a43 100644 --- a/drivers/clk/sunxi-ng/ccu_common.c +++ b/drivers/clk/sunxi-ng/ccu_common.c @@ -14,11 +14,13 @@ * GNU General Public License for more details. */ +#include <linux/clk.h> #include <linux/clk-provider.h> #include <linux/iopoll.h> #include <linux/slab.h> #include "ccu_common.h" +#include "ccu_gate.h" #include "ccu_reset.h" static DEFINE_SPINLOCK(ccu_lock); @@ -39,6 +41,53 @@ void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock) WARN_ON(readl_relaxed_poll_timeout(addr, reg, reg & lock, 100, 70000)); } +/* + * This clock notifier is called when the frequency of a PLL clock is + * changed. In common PLL designs, changes to the dividers take effect + * almost immediately, while changes to the multipliers (implemented + * as dividers in the feedback loop) take a few cycles to work into + * the feedback loop for the PLL to stablize. + * + * Sometimes when the PLL clock rate is changed, the decrease in the + * divider is too much for the decrease in the multiplier to catch up. + * The PLL clock rate will spike, and in some cases, might lock up + * completely. + * + * This notifier callback will gate and then ungate the clock, + * effectively resetting it, so it proceeds to work. Care must be + * taken to reparent consumers to other temporary clocks during the + * rate change, and that this notifier callback must be the first + * to be registered. + */ +static int ccu_pll_notifier_cb(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct ccu_pll_nb *pll = to_ccu_pll_nb(nb); + int ret = 0; + + if (event != POST_RATE_CHANGE) + goto out; + + ccu_gate_helper_disable(pll->common, pll->enable); + + ret = ccu_gate_helper_enable(pll->common, pll->enable); + if (ret) + goto out; + + ccu_helper_wait_for_lock(pll->common, pll->lock); + +out: + return notifier_from_errno(ret); +} + +int ccu_pll_notifier_register(struct ccu_pll_nb *pll_nb) +{ + pll_nb->clk_nb.notifier_call = ccu_pll_notifier_cb; + + return clk_notifier_register(pll_nb->common->hw.clk, + &pll_nb->clk_nb); +} + int sunxi_ccu_probe(struct device_node *node, void __iomem *reg, const struct sunxi_ccu_desc *desc) { diff --git a/drivers/clk/sunxi-ng/ccu_common.h b/drivers/clk/sunxi-ng/ccu_common.h index 73d81dc58fc5..d6fdd7a789aa 100644 --- a/drivers/clk/sunxi-ng/ccu_common.h +++ b/drivers/clk/sunxi-ng/ccu_common.h @@ -83,6 +83,18 @@ struct sunxi_ccu_desc { void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock); +struct ccu_pll_nb { + struct notifier_block clk_nb; + struct ccu_common *common; + + u32 enable; + u32 lock; +}; + +#define to_ccu_pll_nb(_nb) container_of(_nb, struct ccu_pll_nb, clk_nb) + +int ccu_pll_notifier_register(struct ccu_pll_nb *pll_nb); + int sunxi_ccu_probe(struct device_node *node, void __iomem *reg, const struct sunxi_ccu_desc *desc); diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 3356ab821624..545d541ae20e 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -67,20 +67,22 @@ config DW_APB_TIMER_OF select DW_APB_TIMER select CLKSRC_OF -config GEMINI_TIMER - bool "Cortina Gemini timer driver" if COMPILE_TEST +config FTTMR010_TIMER + bool "Faraday Technology timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS depends on HAS_IOMEM select CLKSRC_MMIO select CLKSRC_OF select MFD_SYSCON help - Enables support for the Gemini timer + Enables support for the Faraday Technology timer block + FTTMR010. config ROCKCHIP_TIMER bool "Rockchip timer driver" if COMPILE_TEST depends on ARM || ARM64 select CLKSRC_OF + select CLKSRC_MMIO help Enables the support for the rockchip timer driver. @@ -366,6 +368,17 @@ config HISILICON_ERRATUM_161010101 161010101. The workaround will be active if the hisilicon,erratum-161010101 property is found in the timer node. +config ARM64_ERRATUM_858921 + bool "Workaround for Cortex-A73 erratum 858921" + default y + select ARM_ARCH_TIMER_OOL_WORKAROUND + depends on ARM_ARCH_TIMER && ARM64 + help + This option enables a workaround applicable to Cortex-A73 + (all versions), whose counter may return incorrect values. + The workaround will be dynamically enabled when an affected + core is detected. + config ARM_GLOBAL_TIMER bool "Support for the ARM global timer" if COMPILE_TEST select CLKSRC_OF if OF diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index d227d1314f14..2b5b56a6f00f 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -17,7 +17,7 @@ obj-$(CONFIG_CLKSRC_MMIO) += mmio.o obj-$(CONFIG_DIGICOLOR_TIMER) += timer-digicolor.o obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o obj-$(CONFIG_DW_APB_TIMER_OF) += dw_apb_timer_of.o -obj-$(CONFIG_GEMINI_TIMER) += timer-gemini.o +obj-$(CONFIG_FTTMR010_TIMER) += timer-fttmr010.o obj-$(CONFIG_ROCKCHIP_TIMER) += rockchip_timer.o obj-$(CONFIG_CLKSRC_NOMADIK_MTU) += nomadik-mtu.o obj-$(CONFIG_CLKSRC_DBX500_PRCMU) += clksrc-dbx500-prcmu.o diff --git a/drivers/clocksource/arc_timer.c b/drivers/clocksource/arc_timer.c index 7517f959cba7..21649733827d 100644 --- a/drivers/clocksource/arc_timer.c +++ b/drivers/clocksource/arc_timer.c @@ -37,7 +37,7 @@ static int noinline arc_get_timer_clk(struct device_node *node) clk = of_clk_get(node, 0); if (IS_ERR(clk)) { - pr_err("timer missing clk"); + pr_err("timer missing clk\n"); return PTR_ERR(clk); } @@ -89,7 +89,7 @@ static int __init arc_cs_setup_gfrc(struct device_node *node) READ_BCR(ARC_REG_MCIP_BCR, mp); if (!mp.gfrc) { - pr_warn("Global-64-bit-Ctr clocksource not detected"); + pr_warn("Global-64-bit-Ctr clocksource not detected\n"); return -ENXIO; } @@ -140,13 +140,13 @@ static int __init arc_cs_setup_rtc(struct device_node *node) READ_BCR(ARC_REG_TIMERS_BCR, timer); if (!timer.rtc) { - pr_warn("Local-64-bit-Ctr clocksource not detected"); + pr_warn("Local-64-bit-Ctr clocksource not detected\n"); return -ENXIO; } /* Local to CPU hence not usable in SMP */ if (IS_ENABLED(CONFIG_SMP)) { - pr_warn("Local-64-bit-Ctr not usable in SMP"); + pr_warn("Local-64-bit-Ctr not usable in SMP\n"); return -EINVAL; } @@ -290,13 +290,13 @@ static int __init arc_clockevent_setup(struct device_node *node) arc_timer_irq = irq_of_parse_and_map(node, 0); if (arc_timer_irq <= 0) { - pr_err("clockevent: missing irq"); + pr_err("clockevent: missing irq\n"); return -EINVAL; } ret = arc_get_timer_clk(node); if (ret) { - pr_err("clockevent: missing clk"); + pr_err("clockevent: missing clk\n"); return ret; } @@ -313,7 +313,7 @@ static int __init arc_clockevent_setup(struct device_node *node) arc_timer_starting_cpu, arc_timer_dying_cpu); if (ret) { - pr_err("Failed to setup hotplug state"); + pr_err("Failed to setup hotplug state\n"); return ret; } return 0; diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 7a8a4117f123..a1fb918b8021 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -33,6 +33,9 @@ #include <clocksource/arm_arch_timer.h> +#undef pr_fmt +#define pr_fmt(fmt) "arch_timer: " fmt + #define CNTTIDR 0x08 #define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4)) @@ -52,8 +55,6 @@ #define CNTV_TVAL 0x38 #define CNTV_CTL 0x3c -#define ARCH_CP15_TIMER BIT(0) -#define ARCH_MEM_TIMER BIT(1) static unsigned arch_timers_present __initdata; static void __iomem *arch_counter_base; @@ -66,23 +67,15 @@ struct arch_timer { #define to_arch_timer(e) container_of(e, struct arch_timer, evt) static u32 arch_timer_rate; - -enum ppi_nr { - PHYS_SECURE_PPI, - PHYS_NONSECURE_PPI, - VIRT_PPI, - HYP_PPI, - MAX_TIMER_PPI -}; - -static int arch_timer_ppi[MAX_TIMER_PPI]; +static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI]; static struct clock_event_device __percpu *arch_timer_evt; -static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI; +static enum arch_timer_ppi_nr arch_timer_uses_ppi = ARCH_TIMER_VIRT_PPI; static bool arch_timer_c3stop; static bool arch_timer_mem_use_virtual; static bool arch_counter_suspend_stop; +static bool vdso_default = true; static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM); @@ -96,6 +89,105 @@ early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg); * Architected system timer support. */ +static __always_inline +void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val, + struct clock_event_device *clk) +{ + if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + writel_relaxed(val, timer->base + CNTP_CTL); + break; + case ARCH_TIMER_REG_TVAL: + writel_relaxed(val, timer->base + CNTP_TVAL); + break; + } + } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + writel_relaxed(val, timer->base + CNTV_CTL); + break; + case ARCH_TIMER_REG_TVAL: + writel_relaxed(val, timer->base + CNTV_TVAL); + break; + } + } else { + arch_timer_reg_write_cp15(access, reg, val); + } +} + +static __always_inline +u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, + struct clock_event_device *clk) +{ + u32 val; + + if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + val = readl_relaxed(timer->base + CNTP_CTL); + break; + case ARCH_TIMER_REG_TVAL: + val = readl_relaxed(timer->base + CNTP_TVAL); + break; + } + } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { + struct arch_timer *timer = to_arch_timer(clk); + switch (reg) { + case ARCH_TIMER_REG_CTRL: + val = readl_relaxed(timer->base + CNTV_CTL); + break; + case ARCH_TIMER_REG_TVAL: + val = readl_relaxed(timer->base + CNTV_TVAL); + break; + } + } else { + val = arch_timer_reg_read_cp15(access, reg); + } + + return val; +} + +/* + * Default to cp15 based access because arm64 uses this function for + * sched_clock() before DT is probed and the cp15 method is guaranteed + * to exist on arm64. arm doesn't use this before DT is probed so even + * if we don't have the cp15 accessors we won't have a problem. + */ +u64 (*arch_timer_read_counter)(void) = arch_counter_get_cntvct; + +static u64 arch_counter_read(struct clocksource *cs) +{ + return arch_timer_read_counter(); +} + +static u64 arch_counter_read_cc(const struct cyclecounter *cc) +{ + return arch_timer_read_counter(); +} + +static struct clocksource clocksource_counter = { + .name = "arch_sys_counter", + .rating = 400, + .read = arch_counter_read, + .mask = CLOCKSOURCE_MASK(56), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static struct cyclecounter cyclecounter __ro_after_init = { + .read = arch_counter_read_cc, + .mask = CLOCKSOURCE_MASK(56), +}; + +struct ate_acpi_oem_info { + char oem_id[ACPI_OEM_ID_SIZE + 1]; + char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; + u32 oem_revision; +}; + #ifdef CONFIG_FSL_ERRATUM_A008585 /* * The number of retries is an arbitrary value well beyond the highest number @@ -170,97 +262,289 @@ static u64 notrace hisi_161010101_read_cntvct_el0(void) { return __hisi_161010101_read_reg(cntvct_el0); } + +static struct ate_acpi_oem_info hisi_161010101_oem_info[] = { + /* + * Note that trailing spaces are required to properly match + * the OEM table information. + */ + { + .oem_id = "HISI ", + .oem_table_id = "HIP05 ", + .oem_revision = 0, + }, + { + .oem_id = "HISI ", + .oem_table_id = "HIP06 ", + .oem_revision = 0, + }, + { + .oem_id = "HISI ", + .oem_table_id = "HIP07 ", + .oem_revision = 0, + }, + { /* Sentinel indicating the end of the OEM array */ }, +}; +#endif + +#ifdef CONFIG_ARM64_ERRATUM_858921 +static u64 notrace arm64_858921_read_cntvct_el0(void) +{ + u64 old, new; + + old = read_sysreg(cntvct_el0); + new = read_sysreg(cntvct_el0); + return (((old ^ new) >> 32) & 1) ? old : new; +} #endif #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND -const struct arch_timer_erratum_workaround *timer_unstable_counter_workaround = NULL; +DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, + timer_unstable_counter_workaround); EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled); EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled); +static void erratum_set_next_event_tval_generic(const int access, unsigned long evt, + struct clock_event_device *clk) +{ + unsigned long ctrl; + u64 cval = evt + arch_counter_get_cntvct(); + + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); + ctrl |= ARCH_TIMER_CTRL_ENABLE; + ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; + + if (access == ARCH_TIMER_PHYS_ACCESS) + write_sysreg(cval, cntp_cval_el0); + else + write_sysreg(cval, cntv_cval_el0); + + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); +} + +static __maybe_unused int erratum_set_next_event_tval_virt(unsigned long evt, + struct clock_event_device *clk) +{ + erratum_set_next_event_tval_generic(ARCH_TIMER_VIRT_ACCESS, evt, clk); + return 0; +} + +static __maybe_unused int erratum_set_next_event_tval_phys(unsigned long evt, + struct clock_event_device *clk) +{ + erratum_set_next_event_tval_generic(ARCH_TIMER_PHYS_ACCESS, evt, clk); + return 0; +} + static const struct arch_timer_erratum_workaround ool_workarounds[] = { #ifdef CONFIG_FSL_ERRATUM_A008585 { + .match_type = ate_match_dt, .id = "fsl,erratum-a008585", + .desc = "Freescale erratum a005858", .read_cntp_tval_el0 = fsl_a008585_read_cntp_tval_el0, .read_cntv_tval_el0 = fsl_a008585_read_cntv_tval_el0, .read_cntvct_el0 = fsl_a008585_read_cntvct_el0, + .set_next_event_phys = erratum_set_next_event_tval_phys, + .set_next_event_virt = erratum_set_next_event_tval_virt, }, #endif #ifdef CONFIG_HISILICON_ERRATUM_161010101 { + .match_type = ate_match_dt, .id = "hisilicon,erratum-161010101", + .desc = "HiSilicon erratum 161010101", .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, + .set_next_event_phys = erratum_set_next_event_tval_phys, + .set_next_event_virt = erratum_set_next_event_tval_virt, + }, + { + .match_type = ate_match_acpi_oem_info, + .id = hisi_161010101_oem_info, + .desc = "HiSilicon erratum 161010101", + .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, + .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, + .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, + .set_next_event_phys = erratum_set_next_event_tval_phys, + .set_next_event_virt = erratum_set_next_event_tval_virt, + }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_858921 + { + .match_type = ate_match_local_cap_id, + .id = (void *)ARM64_WORKAROUND_858921, + .desc = "ARM erratum 858921", + .read_cntvct_el0 = arm64_858921_read_cntvct_el0, }, #endif }; -#endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ -static __always_inline -void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val, - struct clock_event_device *clk) +typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *, + const void *); + +static +bool arch_timer_check_dt_erratum(const struct arch_timer_erratum_workaround *wa, + const void *arg) { - if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { - struct arch_timer *timer = to_arch_timer(clk); - switch (reg) { - case ARCH_TIMER_REG_CTRL: - writel_relaxed(val, timer->base + CNTP_CTL); - break; - case ARCH_TIMER_REG_TVAL: - writel_relaxed(val, timer->base + CNTP_TVAL); - break; - } - } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { - struct arch_timer *timer = to_arch_timer(clk); - switch (reg) { - case ARCH_TIMER_REG_CTRL: - writel_relaxed(val, timer->base + CNTV_CTL); - break; - case ARCH_TIMER_REG_TVAL: - writel_relaxed(val, timer->base + CNTV_TVAL); - break; - } - } else { - arch_timer_reg_write_cp15(access, reg, val); + const struct device_node *np = arg; + + return of_property_read_bool(np, wa->id); +} + +static +bool arch_timer_check_local_cap_erratum(const struct arch_timer_erratum_workaround *wa, + const void *arg) +{ + return this_cpu_has_cap((uintptr_t)wa->id); +} + + +static +bool arch_timer_check_acpi_oem_erratum(const struct arch_timer_erratum_workaround *wa, + const void *arg) +{ + static const struct ate_acpi_oem_info empty_oem_info = {}; + const struct ate_acpi_oem_info *info = wa->id; + const struct acpi_table_header *table = arg; + + /* Iterate over the ACPI OEM info array, looking for a match */ + while (memcmp(info, &empty_oem_info, sizeof(*info))) { + if (!memcmp(info->oem_id, table->oem_id, ACPI_OEM_ID_SIZE) && + !memcmp(info->oem_table_id, table->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) && + info->oem_revision == table->oem_revision) + return true; + + info++; } + + return false; } -static __always_inline -u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, - struct clock_event_device *clk) +static const struct arch_timer_erratum_workaround * +arch_timer_iterate_errata(enum arch_timer_erratum_match_type type, + ate_match_fn_t match_fn, + void *arg) { - u32 val; + int i; - if (access == ARCH_TIMER_MEM_PHYS_ACCESS) { - struct arch_timer *timer = to_arch_timer(clk); - switch (reg) { - case ARCH_TIMER_REG_CTRL: - val = readl_relaxed(timer->base + CNTP_CTL); - break; - case ARCH_TIMER_REG_TVAL: - val = readl_relaxed(timer->base + CNTP_TVAL); - break; - } - } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { - struct arch_timer *timer = to_arch_timer(clk); - switch (reg) { - case ARCH_TIMER_REG_CTRL: - val = readl_relaxed(timer->base + CNTV_CTL); - break; - case ARCH_TIMER_REG_TVAL: - val = readl_relaxed(timer->base + CNTV_TVAL); - break; - } + for (i = 0; i < ARRAY_SIZE(ool_workarounds); i++) { + if (ool_workarounds[i].match_type != type) + continue; + + if (match_fn(&ool_workarounds[i], arg)) + return &ool_workarounds[i]; + } + + return NULL; +} + +static +void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa, + bool local) +{ + int i; + + if (local) { + __this_cpu_write(timer_unstable_counter_workaround, wa); } else { - val = arch_timer_reg_read_cp15(access, reg); + for_each_possible_cpu(i) + per_cpu(timer_unstable_counter_workaround, i) = wa; } - return val; + static_branch_enable(&arch_timer_read_ool_enabled); + + /* + * Don't use the vdso fastpath if errata require using the + * out-of-line counter accessor. We may change our mind pretty + * late in the game (with a per-CPU erratum, for example), so + * change both the default value and the vdso itself. + */ + if (wa->read_cntvct_el0) { + clocksource_counter.archdata.vdso_direct = false; + vdso_default = false; + } +} + +static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type type, + void *arg) +{ + const struct arch_timer_erratum_workaround *wa; + ate_match_fn_t match_fn = NULL; + bool local = false; + + switch (type) { + case ate_match_dt: + match_fn = arch_timer_check_dt_erratum; + break; + case ate_match_local_cap_id: + match_fn = arch_timer_check_local_cap_erratum; + local = true; + break; + case ate_match_acpi_oem_info: + match_fn = arch_timer_check_acpi_oem_erratum; + break; + default: + WARN_ON(1); + return; + } + + wa = arch_timer_iterate_errata(type, match_fn, arg); + if (!wa) + return; + + if (needs_unstable_timer_counter_workaround()) { + const struct arch_timer_erratum_workaround *__wa; + __wa = __this_cpu_read(timer_unstable_counter_workaround); + if (__wa && wa != __wa) + pr_warn("Can't enable workaround for %s (clashes with %s\n)", + wa->desc, __wa->desc); + + if (__wa) + return; + } + + arch_timer_enable_workaround(wa, local); + pr_info("Enabling %s workaround for %s\n", + local ? "local" : "global", wa->desc); } +#define erratum_handler(fn, r, ...) \ +({ \ + bool __val; \ + if (needs_unstable_timer_counter_workaround()) { \ + const struct arch_timer_erratum_workaround *__wa; \ + __wa = __this_cpu_read(timer_unstable_counter_workaround); \ + if (__wa && __wa->fn) { \ + r = __wa->fn(__VA_ARGS__); \ + __val = true; \ + } else { \ + __val = false; \ + } \ + } else { \ + __val = false; \ + } \ + __val; \ +}) + +static bool arch_timer_this_cpu_has_cntvct_wa(void) +{ + const struct arch_timer_erratum_workaround *wa; + + wa = __this_cpu_read(timer_unstable_counter_workaround); + return wa && wa->read_cntvct_el0; +} +#else +#define arch_timer_check_ool_workaround(t,a) do { } while(0) +#define erratum_set_next_event_tval_virt(...) ({BUG(); 0;}) +#define erratum_set_next_event_tval_phys(...) ({BUG(); 0;}) +#define erratum_handler(fn, r, ...) ({false;}) +#define arch_timer_this_cpu_has_cntvct_wa() ({false;}) +#endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ + static __always_inline irqreturn_t timer_handler(const int access, struct clock_event_device *evt) { @@ -348,43 +632,14 @@ static __always_inline void set_next_event(const int access, unsigned long evt, arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); } -#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND -static __always_inline void erratum_set_next_event_generic(const int access, - unsigned long evt, struct clock_event_device *clk) -{ - unsigned long ctrl; - u64 cval = evt + arch_counter_get_cntvct(); - - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); - ctrl |= ARCH_TIMER_CTRL_ENABLE; - ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; - - if (access == ARCH_TIMER_PHYS_ACCESS) - write_sysreg(cval, cntp_cval_el0); - else if (access == ARCH_TIMER_VIRT_ACCESS) - write_sysreg(cval, cntv_cval_el0); - - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); -} - -static int erratum_set_next_event_virt(unsigned long evt, - struct clock_event_device *clk) -{ - erratum_set_next_event_generic(ARCH_TIMER_VIRT_ACCESS, evt, clk); - return 0; -} - -static int erratum_set_next_event_phys(unsigned long evt, - struct clock_event_device *clk) -{ - erratum_set_next_event_generic(ARCH_TIMER_PHYS_ACCESS, evt, clk); - return 0; -} -#endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ - static int arch_timer_set_next_event_virt(unsigned long evt, struct clock_event_device *clk) { + int ret; + + if (erratum_handler(set_next_event_virt, ret, evt, clk)) + return ret; + set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk); return 0; } @@ -392,6 +647,11 @@ static int arch_timer_set_next_event_virt(unsigned long evt, static int arch_timer_set_next_event_phys(unsigned long evt, struct clock_event_device *clk) { + int ret; + + if (erratum_handler(set_next_event_phys, ret, evt, clk)) + return ret; + set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk); return 0; } @@ -410,25 +670,12 @@ static int arch_timer_set_next_event_phys_mem(unsigned long evt, return 0; } -static void erratum_workaround_set_sne(struct clock_event_device *clk) -{ -#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND - if (!static_branch_unlikely(&arch_timer_read_ool_enabled)) - return; - - if (arch_timer_uses_ppi == VIRT_PPI) - clk->set_next_event = erratum_set_next_event_virt; - else - clk->set_next_event = erratum_set_next_event_phys; -#endif -} - static void __arch_timer_setup(unsigned type, struct clock_event_device *clk) { clk->features = CLOCK_EVT_FEAT_ONESHOT; - if (type == ARCH_CP15_TIMER) { + if (type == ARCH_TIMER_TYPE_CP15) { if (arch_timer_c3stop) clk->features |= CLOCK_EVT_FEAT_C3STOP; clk->name = "arch_sys_timer"; @@ -436,14 +683,14 @@ static void __arch_timer_setup(unsigned type, clk->cpumask = cpumask_of(smp_processor_id()); clk->irq = arch_timer_ppi[arch_timer_uses_ppi]; switch (arch_timer_uses_ppi) { - case VIRT_PPI: + case ARCH_TIMER_VIRT_PPI: clk->set_state_shutdown = arch_timer_shutdown_virt; clk->set_state_oneshot_stopped = arch_timer_shutdown_virt; clk->set_next_event = arch_timer_set_next_event_virt; break; - case PHYS_SECURE_PPI: - case PHYS_NONSECURE_PPI: - case HYP_PPI: + case ARCH_TIMER_PHYS_SECURE_PPI: + case ARCH_TIMER_PHYS_NONSECURE_PPI: + case ARCH_TIMER_HYP_PPI: clk->set_state_shutdown = arch_timer_shutdown_phys; clk->set_state_oneshot_stopped = arch_timer_shutdown_phys; clk->set_next_event = arch_timer_set_next_event_phys; @@ -452,7 +699,7 @@ static void __arch_timer_setup(unsigned type, BUG(); } - erratum_workaround_set_sne(clk); + arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL); } else { clk->features |= CLOCK_EVT_FEAT_DYNIRQ; clk->name = "arch_mem_timer"; @@ -508,23 +755,31 @@ static void arch_counter_set_user_access(void) { u32 cntkctl = arch_timer_get_cntkctl(); - /* Disable user access to the timers and the physical counter */ + /* Disable user access to the timers and both counters */ /* Also disable virtual event stream */ cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN | ARCH_TIMER_USR_VT_ACCESS_EN + | ARCH_TIMER_USR_VCT_ACCESS_EN | ARCH_TIMER_VIRT_EVT_EN | ARCH_TIMER_USR_PCT_ACCESS_EN); - /* Enable user access to the virtual counter */ - cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; + /* + * Enable user access to the virtual counter if it doesn't + * need to be workaround. The vdso may have been already + * disabled though. + */ + if (arch_timer_this_cpu_has_cntvct_wa()) + pr_info("CPU%d: Trapping CNTVCT access\n", smp_processor_id()); + else + cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; arch_timer_set_cntkctl(cntkctl); } static bool arch_timer_has_nonsecure_ppi(void) { - return (arch_timer_uses_ppi == PHYS_SECURE_PPI && - arch_timer_ppi[PHYS_NONSECURE_PPI]); + return (arch_timer_uses_ppi == ARCH_TIMER_PHYS_SECURE_PPI && + arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]); } static u32 check_ppi_trigger(int irq) @@ -545,14 +800,15 @@ static int arch_timer_starting_cpu(unsigned int cpu) struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt); u32 flags; - __arch_timer_setup(ARCH_CP15_TIMER, clk); + __arch_timer_setup(ARCH_TIMER_TYPE_CP15, clk); flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]); enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags); if (arch_timer_has_nonsecure_ppi()) { - flags = check_ppi_trigger(arch_timer_ppi[PHYS_NONSECURE_PPI]); - enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], flags); + flags = check_ppi_trigger(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]); + enable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI], + flags); } arch_counter_set_user_access(); @@ -562,43 +818,39 @@ static int arch_timer_starting_cpu(unsigned int cpu) return 0; } -static void -arch_timer_detect_rate(void __iomem *cntbase, struct device_node *np) +/* + * For historical reasons, when probing with DT we use whichever (non-zero) + * rate was probed first, and don't verify that others match. If the first node + * probed has a clock-frequency property, this overrides the HW register. + */ +static void arch_timer_of_configure_rate(u32 rate, struct device_node *np) { /* Who has more than one independent system counter? */ if (arch_timer_rate) return; - /* - * Try to determine the frequency from the device tree or CNTFRQ, - * if ACPI is enabled, get the frequency from CNTFRQ ONLY. - */ - if (!acpi_disabled || - of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) { - if (cntbase) - arch_timer_rate = readl_relaxed(cntbase + CNTFRQ); - else - arch_timer_rate = arch_timer_get_cntfrq(); - } + if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) + arch_timer_rate = rate; /* Check the timer frequency. */ if (arch_timer_rate == 0) - pr_warn("Architected timer frequency not available\n"); + pr_warn("frequency not available\n"); } static void arch_timer_banner(unsigned type) { - pr_info("Architected %s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n", - type & ARCH_CP15_TIMER ? "cp15" : "", - type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? " and " : "", - type & ARCH_MEM_TIMER ? "mmio" : "", - (unsigned long)arch_timer_rate / 1000000, - (unsigned long)(arch_timer_rate / 10000) % 100, - type & ARCH_CP15_TIMER ? - (arch_timer_uses_ppi == VIRT_PPI) ? "virt" : "phys" : + pr_info("%s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n", + type & ARCH_TIMER_TYPE_CP15 ? "cp15" : "", + type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ? + " and " : "", + type & ARCH_TIMER_TYPE_MEM ? "mmio" : "", + (unsigned long)arch_timer_rate / 1000000, + (unsigned long)(arch_timer_rate / 10000) % 100, + type & ARCH_TIMER_TYPE_CP15 ? + (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : "phys" : "", - type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? "/" : "", - type & ARCH_MEM_TIMER ? + type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ? "/" : "", + type & ARCH_TIMER_TYPE_MEM ? arch_timer_mem_use_virtual ? "virt" : "phys" : ""); } @@ -621,37 +873,6 @@ static u64 arch_counter_get_cntvct_mem(void) return ((u64) vct_hi << 32) | vct_lo; } -/* - * Default to cp15 based access because arm64 uses this function for - * sched_clock() before DT is probed and the cp15 method is guaranteed - * to exist on arm64. arm doesn't use this before DT is probed so even - * if we don't have the cp15 accessors we won't have a problem. - */ -u64 (*arch_timer_read_counter)(void) = arch_counter_get_cntvct; - -static u64 arch_counter_read(struct clocksource *cs) -{ - return arch_timer_read_counter(); -} - -static u64 arch_counter_read_cc(const struct cyclecounter *cc) -{ - return arch_timer_read_counter(); -} - -static struct clocksource clocksource_counter = { - .name = "arch_sys_counter", - .rating = 400, - .read = arch_counter_read, - .mask = CLOCKSOURCE_MASK(56), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -static struct cyclecounter cyclecounter __ro_after_init = { - .read = arch_counter_read_cc, - .mask = CLOCKSOURCE_MASK(56), -}; - static struct arch_timer_kvm_info arch_timer_kvm_info; struct arch_timer_kvm_info *arch_timer_get_kvm_info(void) @@ -664,22 +885,14 @@ static void __init arch_counter_register(unsigned type) u64 start_count; /* Register the CP15 based counter if we have one */ - if (type & ARCH_CP15_TIMER) { - if (IS_ENABLED(CONFIG_ARM64) || arch_timer_uses_ppi == VIRT_PPI) + if (type & ARCH_TIMER_TYPE_CP15) { + if (IS_ENABLED(CONFIG_ARM64) || + arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) arch_timer_read_counter = arch_counter_get_cntvct; else arch_timer_read_counter = arch_counter_get_cntpct; - clocksource_counter.archdata.vdso_direct = true; - -#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND - /* - * Don't use the vdso fastpath if errata require using - * the out-of-line counter accessor. - */ - if (static_branch_unlikely(&arch_timer_read_ool_enabled)) - clocksource_counter.archdata.vdso_direct = false; -#endif + clocksource_counter.archdata.vdso_direct = vdso_default; } else { arch_timer_read_counter = arch_counter_get_cntvct_mem; } @@ -699,12 +912,11 @@ static void __init arch_counter_register(unsigned type) static void arch_timer_stop(struct clock_event_device *clk) { - pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n", - clk->irq, smp_processor_id()); + pr_debug("disable IRQ%d cpu #%d\n", clk->irq, smp_processor_id()); disable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi]); if (arch_timer_has_nonsecure_ppi()) - disable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI]); + disable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]); clk->set_state_shutdown(clk); } @@ -718,14 +930,14 @@ static int arch_timer_dying_cpu(unsigned int cpu) } #ifdef CONFIG_CPU_PM -static unsigned int saved_cntkctl; +static DEFINE_PER_CPU(unsigned long, saved_cntkctl); static int arch_timer_cpu_pm_notify(struct notifier_block *self, unsigned long action, void *hcpu) { if (action == CPU_PM_ENTER) - saved_cntkctl = arch_timer_get_cntkctl(); + __this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl()); else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) - arch_timer_set_cntkctl(saved_cntkctl); + arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl)); return NOTIFY_OK; } @@ -767,24 +979,24 @@ static int __init arch_timer_register(void) ppi = arch_timer_ppi[arch_timer_uses_ppi]; switch (arch_timer_uses_ppi) { - case VIRT_PPI: + case ARCH_TIMER_VIRT_PPI: err = request_percpu_irq(ppi, arch_timer_handler_virt, "arch_timer", arch_timer_evt); break; - case PHYS_SECURE_PPI: - case PHYS_NONSECURE_PPI: + case ARCH_TIMER_PHYS_SECURE_PPI: + case ARCH_TIMER_PHYS_NONSECURE_PPI: err = request_percpu_irq(ppi, arch_timer_handler_phys, "arch_timer", arch_timer_evt); - if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) { - ppi = arch_timer_ppi[PHYS_NONSECURE_PPI]; + if (!err && arch_timer_has_nonsecure_ppi()) { + ppi = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]; err = request_percpu_irq(ppi, arch_timer_handler_phys, "arch_timer", arch_timer_evt); if (err) - free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], + free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_SECURE_PPI], arch_timer_evt); } break; - case HYP_PPI: + case ARCH_TIMER_HYP_PPI: err = request_percpu_irq(ppi, arch_timer_handler_phys, "arch_timer", arch_timer_evt); break; @@ -793,8 +1005,7 @@ static int __init arch_timer_register(void) } if (err) { - pr_err("arch_timer: can't register interrupt %d (%d)\n", - ppi, err); + pr_err("can't register interrupt %d (%d)\n", ppi, err); goto out_free; } @@ -817,7 +1028,7 @@ out_unreg_cpupm: out_unreg_notify: free_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], arch_timer_evt); if (arch_timer_has_nonsecure_ppi()) - free_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], + free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI], arch_timer_evt); out_free: @@ -838,7 +1049,7 @@ static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq) t->base = base; t->evt.irq = irq; - __arch_timer_setup(ARCH_MEM_TIMER, &t->evt); + __arch_timer_setup(ARCH_TIMER_TYPE_MEM, &t->evt); if (arch_timer_mem_use_virtual) func = arch_timer_handler_virt_mem; @@ -847,7 +1058,7 @@ static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq) ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &t->evt); if (ret) { - pr_err("arch_timer: Failed to request mem timer irq\n"); + pr_err("Failed to request mem timer irq\n"); kfree(t); } @@ -865,15 +1076,28 @@ static const struct of_device_id arch_timer_mem_of_match[] __initconst = { {}, }; -static bool __init -arch_timer_needs_probing(int type, const struct of_device_id *matches) +static bool __init arch_timer_needs_of_probing(void) { struct device_node *dn; bool needs_probing = false; + unsigned int mask = ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM; + + /* We have two timers, and both device-tree nodes are probed. */ + if ((arch_timers_present & mask) == mask) + return false; - dn = of_find_matching_node(NULL, matches); - if (dn && of_device_is_available(dn) && !(arch_timers_present & type)) + /* + * Only one type of timer is probed, + * check if we have another type of timer node in device-tree. + */ + if (arch_timers_present & ARCH_TIMER_TYPE_CP15) + dn = of_find_matching_node(NULL, arch_timer_mem_of_match); + else + dn = of_find_matching_node(NULL, arch_timer_of_match); + + if (dn && of_device_is_available(dn)) needs_probing = true; + of_node_put(dn); return needs_probing; @@ -881,96 +1105,66 @@ arch_timer_needs_probing(int type, const struct of_device_id *matches) static int __init arch_timer_common_init(void) { - unsigned mask = ARCH_CP15_TIMER | ARCH_MEM_TIMER; - - /* Wait until both nodes are probed if we have two timers */ - if ((arch_timers_present & mask) != mask) { - if (arch_timer_needs_probing(ARCH_MEM_TIMER, arch_timer_mem_of_match)) - return 0; - if (arch_timer_needs_probing(ARCH_CP15_TIMER, arch_timer_of_match)) - return 0; - } - arch_timer_banner(arch_timers_present); arch_counter_register(arch_timers_present); return arch_timer_arch_init(); } -static int __init arch_timer_init(void) +/** + * arch_timer_select_ppi() - Select suitable PPI for the current system. + * + * If HYP mode is available, we know that the physical timer + * has been configured to be accessible from PL1. Use it, so + * that a guest can use the virtual timer instead. + * + * On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE + * accesses to CNTP_*_EL1 registers are silently redirected to + * their CNTHP_*_EL2 counterparts, and use a different PPI + * number. + * + * If no interrupt provided for virtual timer, we'll have to + * stick to the physical timer. It'd better be accessible... + * For arm64 we never use the secure interrupt. + * + * Return: a suitable PPI type for the current system. + */ +static enum arch_timer_ppi_nr __init arch_timer_select_ppi(void) { - int ret; - /* - * If HYP mode is available, we know that the physical timer - * has been configured to be accessible from PL1. Use it, so - * that a guest can use the virtual timer instead. - * - * If no interrupt provided for virtual timer, we'll have to - * stick to the physical timer. It'd better be accessible... - * - * On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE - * accesses to CNTP_*_EL1 registers are silently redirected to - * their CNTHP_*_EL2 counterparts, and use a different PPI - * number. - */ - if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) { - bool has_ppi; - - if (is_kernel_in_hyp_mode()) { - arch_timer_uses_ppi = HYP_PPI; - has_ppi = !!arch_timer_ppi[HYP_PPI]; - } else { - arch_timer_uses_ppi = PHYS_SECURE_PPI; - has_ppi = (!!arch_timer_ppi[PHYS_SECURE_PPI] || - !!arch_timer_ppi[PHYS_NONSECURE_PPI]); - } - - if (!has_ppi) { - pr_warn("arch_timer: No interrupt available, giving up\n"); - return -EINVAL; - } - } + if (is_kernel_in_hyp_mode()) + return ARCH_TIMER_HYP_PPI; - ret = arch_timer_register(); - if (ret) - return ret; + if (!is_hyp_mode_available() && arch_timer_ppi[ARCH_TIMER_VIRT_PPI]) + return ARCH_TIMER_VIRT_PPI; - ret = arch_timer_common_init(); - if (ret) - return ret; + if (IS_ENABLED(CONFIG_ARM64)) + return ARCH_TIMER_PHYS_NONSECURE_PPI; - arch_timer_kvm_info.virtual_irq = arch_timer_ppi[VIRT_PPI]; - - return 0; + return ARCH_TIMER_PHYS_SECURE_PPI; } static int __init arch_timer_of_init(struct device_node *np) { - int i; + int i, ret; + u32 rate; - if (arch_timers_present & ARCH_CP15_TIMER) { - pr_warn("arch_timer: multiple nodes in dt, skipping\n"); + if (arch_timers_present & ARCH_TIMER_TYPE_CP15) { + pr_warn("multiple nodes in dt, skipping\n"); return 0; } - arch_timers_present |= ARCH_CP15_TIMER; - for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++) + arch_timers_present |= ARCH_TIMER_TYPE_CP15; + for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) arch_timer_ppi[i] = irq_of_parse_and_map(np, i); - arch_timer_detect_rate(NULL, np); + arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; + + rate = arch_timer_get_cntfrq(); + arch_timer_of_configure_rate(rate, np); arch_timer_c3stop = !of_property_read_bool(np, "always-on"); -#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND - for (i = 0; i < ARRAY_SIZE(ool_workarounds); i++) { - if (of_property_read_bool(np, ool_workarounds[i].id)) { - timer_unstable_counter_workaround = &ool_workarounds[i]; - static_branch_enable(&arch_timer_read_ool_enabled); - pr_info("arch_timer: Enabling workaround for %s\n", - timer_unstable_counter_workaround->id); - break; - } - } -#endif + /* Check for globally applicable workarounds */ + arch_timer_check_ool_workaround(ate_match_dt, np); /* * If we cannot rely on firmware initializing the timer registers then @@ -978,29 +1172,63 @@ static int __init arch_timer_of_init(struct device_node *np) */ if (IS_ENABLED(CONFIG_ARM) && of_property_read_bool(np, "arm,cpu-registers-not-fw-configured")) - arch_timer_uses_ppi = PHYS_SECURE_PPI; + arch_timer_uses_ppi = ARCH_TIMER_PHYS_SECURE_PPI; + else + arch_timer_uses_ppi = arch_timer_select_ppi(); + + if (!arch_timer_ppi[arch_timer_uses_ppi]) { + pr_err("No interrupt available, giving up\n"); + return -EINVAL; + } /* On some systems, the counter stops ticking when in suspend. */ arch_counter_suspend_stop = of_property_read_bool(np, "arm,no-tick-in-suspend"); - return arch_timer_init(); + ret = arch_timer_register(); + if (ret) + return ret; + + if (arch_timer_needs_of_probing()) + return 0; + + return arch_timer_common_init(); } CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init); CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init); -static int __init arch_timer_mem_init(struct device_node *np) +static u32 __init +arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame) { - struct device_node *frame, *best_frame = NULL; - void __iomem *cntctlbase, *base; - unsigned int irq, ret = -EINVAL; + void __iomem *base; + u32 rate; + + base = ioremap(frame->cntbase, frame->size); + if (!base) { + pr_err("Unable to map frame @ %pa\n", &frame->cntbase); + return 0; + } + + rate = readl_relaxed(frame + CNTFRQ); + + iounmap(frame); + + return rate; +} + +static struct arch_timer_mem_frame * __init +arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem) +{ + struct arch_timer_mem_frame *frame, *best_frame = NULL; + void __iomem *cntctlbase; u32 cnttidr; + int i; - arch_timers_present |= ARCH_MEM_TIMER; - cntctlbase = of_iomap(np, 0); + cntctlbase = ioremap(timer_mem->cntctlbase, timer_mem->size); if (!cntctlbase) { - pr_err("arch_timer: Can't find CNTCTLBase\n"); - return -ENXIO; + pr_err("Can't map CNTCTLBase @ %pa\n", + &timer_mem->cntctlbase); + return NULL; } cnttidr = readl_relaxed(cntctlbase + CNTTIDR); @@ -1009,25 +1237,20 @@ static int __init arch_timer_mem_init(struct device_node *np) * Try to find a virtual capable frame. Otherwise fall back to a * physical capable frame. */ - for_each_available_child_of_node(np, frame) { - int n; - u32 cntacr; + for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) { + u32 cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT | + CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT; - if (of_property_read_u32(frame, "frame-number", &n)) { - pr_err("arch_timer: Missing frame-number\n"); - of_node_put(frame); - goto out; - } + frame = &timer_mem->frame[i]; + if (!frame->valid) + continue; /* Try enabling everything, and see what sticks */ - cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT | - CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT; - writel_relaxed(cntacr, cntctlbase + CNTACR(n)); - cntacr = readl_relaxed(cntctlbase + CNTACR(n)); + writel_relaxed(cntacr, cntctlbase + CNTACR(i)); + cntacr = readl_relaxed(cntctlbase + CNTACR(i)); - if ((cnttidr & CNTTIDR_VIRT(n)) && + if ((cnttidr & CNTTIDR_VIRT(i)) && !(~cntacr & (CNTACR_RWVT | CNTACR_RVCT))) { - of_node_put(best_frame); best_frame = frame; arch_timer_mem_use_virtual = true; break; @@ -1036,99 +1259,262 @@ static int __init arch_timer_mem_init(struct device_node *np) if (~cntacr & (CNTACR_RWPT | CNTACR_RPCT)) continue; - of_node_put(best_frame); - best_frame = of_node_get(frame); + best_frame = frame; } - ret= -ENXIO; - base = arch_counter_base = of_io_request_and_map(best_frame, 0, - "arch_mem_timer"); - if (IS_ERR(base)) { - pr_err("arch_timer: Can't map frame's registers\n"); - goto out; - } + iounmap(cntctlbase); + + if (!best_frame) + pr_err("Unable to find a suitable frame in timer @ %pa\n", + &timer_mem->cntctlbase); + + return frame; +} + +static int __init +arch_timer_mem_frame_register(struct arch_timer_mem_frame *frame) +{ + void __iomem *base; + int ret, irq = 0; if (arch_timer_mem_use_virtual) - irq = irq_of_parse_and_map(best_frame, 1); + irq = frame->virt_irq; else - irq = irq_of_parse_and_map(best_frame, 0); + irq = frame->phys_irq; - ret = -EINVAL; if (!irq) { - pr_err("arch_timer: Frame missing %s irq", + pr_err("Frame missing %s irq.\n", arch_timer_mem_use_virtual ? "virt" : "phys"); - goto out; + return -EINVAL; + } + + if (!request_mem_region(frame->cntbase, frame->size, + "arch_mem_timer")) + return -EBUSY; + + base = ioremap(frame->cntbase, frame->size); + if (!base) { + pr_err("Can't map frame's registers\n"); + return -ENXIO; } - arch_timer_detect_rate(base, np); ret = arch_timer_mem_register(base, irq); - if (ret) + if (ret) { + iounmap(base); + return ret; + } + + arch_counter_base = base; + arch_timers_present |= ARCH_TIMER_TYPE_MEM; + + return 0; +} + +static int __init arch_timer_mem_of_init(struct device_node *np) +{ + struct arch_timer_mem *timer_mem; + struct arch_timer_mem_frame *frame; + struct device_node *frame_node; + struct resource res; + int ret = -EINVAL; + u32 rate; + + timer_mem = kzalloc(sizeof(*timer_mem), GFP_KERNEL); + if (!timer_mem) + return -ENOMEM; + + if (of_address_to_resource(np, 0, &res)) goto out; + timer_mem->cntctlbase = res.start; + timer_mem->size = resource_size(&res); - return arch_timer_common_init(); + for_each_available_child_of_node(np, frame_node) { + u32 n; + struct arch_timer_mem_frame *frame; + + if (of_property_read_u32(frame_node, "frame-number", &n)) { + pr_err(FW_BUG "Missing frame-number.\n"); + of_node_put(frame_node); + goto out; + } + if (n >= ARCH_TIMER_MEM_MAX_FRAMES) { + pr_err(FW_BUG "Wrong frame-number, only 0-%u are permitted.\n", + ARCH_TIMER_MEM_MAX_FRAMES - 1); + of_node_put(frame_node); + goto out; + } + frame = &timer_mem->frame[n]; + + if (frame->valid) { + pr_err(FW_BUG "Duplicated frame-number.\n"); + of_node_put(frame_node); + goto out; + } + + if (of_address_to_resource(frame_node, 0, &res)) { + of_node_put(frame_node); + goto out; + } + frame->cntbase = res.start; + frame->size = resource_size(&res); + + frame->virt_irq = irq_of_parse_and_map(frame_node, + ARCH_TIMER_VIRT_SPI); + frame->phys_irq = irq_of_parse_and_map(frame_node, + ARCH_TIMER_PHYS_SPI); + + frame->valid = true; + } + + frame = arch_timer_mem_find_best_frame(timer_mem); + if (!frame) { + ret = -EINVAL; + goto out; + } + + rate = arch_timer_mem_frame_get_cntfrq(frame); + arch_timer_of_configure_rate(rate, np); + + ret = arch_timer_mem_frame_register(frame); + if (!ret && !arch_timer_needs_of_probing()) + ret = arch_timer_common_init(); out: - iounmap(cntctlbase); - of_node_put(best_frame); + kfree(timer_mem); return ret; } CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem", - arch_timer_mem_init); + arch_timer_mem_of_init); -#ifdef CONFIG_ACPI -static int __init map_generic_timer_interrupt(u32 interrupt, u32 flags) +#ifdef CONFIG_ACPI_GTDT +static int __init +arch_timer_mem_verify_cntfrq(struct arch_timer_mem *timer_mem) { - int trigger, polarity; + struct arch_timer_mem_frame *frame; + u32 rate; + int i; - if (!interrupt) - return 0; + for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) { + frame = &timer_mem->frame[i]; - trigger = (flags & ACPI_GTDT_INTERRUPT_MODE) ? ACPI_EDGE_SENSITIVE - : ACPI_LEVEL_SENSITIVE; + if (!frame->valid) + continue; + + rate = arch_timer_mem_frame_get_cntfrq(frame); + if (rate == arch_timer_rate) + continue; + + pr_err(FW_BUG "CNTFRQ mismatch: frame @ %pa: (0x%08lx), CPU: (0x%08lx)\n", + &frame->cntbase, + (unsigned long)rate, (unsigned long)arch_timer_rate); - polarity = (flags & ACPI_GTDT_INTERRUPT_POLARITY) ? ACPI_ACTIVE_LOW - : ACPI_ACTIVE_HIGH; + return -EINVAL; + } - return acpi_register_gsi(NULL, interrupt, trigger, polarity); + return 0; } -/* Initialize per-processor generic timer */ +static int __init arch_timer_mem_acpi_init(int platform_timer_count) +{ + struct arch_timer_mem *timers, *timer; + struct arch_timer_mem_frame *frame; + int timer_count, i, ret = 0; + + timers = kcalloc(platform_timer_count, sizeof(*timers), + GFP_KERNEL); + if (!timers) + return -ENOMEM; + + ret = acpi_arch_timer_mem_init(timers, &timer_count); + if (ret || !timer_count) + goto out; + + for (i = 0; i < timer_count; i++) { + ret = arch_timer_mem_verify_cntfrq(&timers[i]); + if (ret) { + pr_err("Disabling MMIO timers due to CNTFRQ mismatch\n"); + goto out; + } + } + + /* + * While unlikely, it's theoretically possible that none of the frames + * in a timer expose the combination of feature we want. + */ + for (i = i; i < timer_count; i++) { + timer = &timers[i]; + + frame = arch_timer_mem_find_best_frame(timer); + if (frame) + break; + } + + if (frame) + ret = arch_timer_mem_frame_register(frame); +out: + kfree(timers); + return ret; +} + +/* Initialize per-processor generic timer and memory-mapped timer(if present) */ static int __init arch_timer_acpi_init(struct acpi_table_header *table) { - struct acpi_table_gtdt *gtdt; + int ret, platform_timer_count; - if (arch_timers_present & ARCH_CP15_TIMER) { - pr_warn("arch_timer: already initialized, skipping\n"); + if (arch_timers_present & ARCH_TIMER_TYPE_CP15) { + pr_warn("already initialized, skipping\n"); return -EINVAL; } - gtdt = container_of(table, struct acpi_table_gtdt, header); + arch_timers_present |= ARCH_TIMER_TYPE_CP15; + + ret = acpi_gtdt_init(table, &platform_timer_count); + if (ret) { + pr_err("Failed to init GTDT table.\n"); + return ret; + } - arch_timers_present |= ARCH_CP15_TIMER; + arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI] = + acpi_gtdt_map_ppi(ARCH_TIMER_PHYS_NONSECURE_PPI); - arch_timer_ppi[PHYS_SECURE_PPI] = - map_generic_timer_interrupt(gtdt->secure_el1_interrupt, - gtdt->secure_el1_flags); + arch_timer_ppi[ARCH_TIMER_VIRT_PPI] = + acpi_gtdt_map_ppi(ARCH_TIMER_VIRT_PPI); - arch_timer_ppi[PHYS_NONSECURE_PPI] = - map_generic_timer_interrupt(gtdt->non_secure_el1_interrupt, - gtdt->non_secure_el1_flags); + arch_timer_ppi[ARCH_TIMER_HYP_PPI] = + acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI); - arch_timer_ppi[VIRT_PPI] = - map_generic_timer_interrupt(gtdt->virtual_timer_interrupt, - gtdt->virtual_timer_flags); + arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; - arch_timer_ppi[HYP_PPI] = - map_generic_timer_interrupt(gtdt->non_secure_el2_interrupt, - gtdt->non_secure_el2_flags); + /* + * When probing via ACPI, we have no mechanism to override the sysreg + * CNTFRQ value. This *must* be correct. + */ + arch_timer_rate = arch_timer_get_cntfrq(); + if (!arch_timer_rate) { + pr_err(FW_BUG "frequency not available.\n"); + return -EINVAL; + } - /* Get the frequency from CNTFRQ */ - arch_timer_detect_rate(NULL, NULL); + arch_timer_uses_ppi = arch_timer_select_ppi(); + if (!arch_timer_ppi[arch_timer_uses_ppi]) { + pr_err("No interrupt available, giving up\n"); + return -EINVAL; + } /* Always-on capability */ - arch_timer_c3stop = !(gtdt->non_secure_el1_flags & ACPI_GTDT_ALWAYS_ON); + arch_timer_c3stop = acpi_gtdt_c3stop(arch_timer_uses_ppi); - arch_timer_init(); - return 0; + /* Check for globally applicable workarounds */ + arch_timer_check_ool_workaround(ate_match_acpi_oem_info, table); + + ret = arch_timer_register(); + if (ret) + return ret; + + if (platform_timer_count && + arch_timer_mem_acpi_init(platform_timer_count)) + pr_err("Failed to initialize memory-mapped timer.\n"); + + return arch_timer_common_init(); } CLOCKSOURCE_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init); #endif diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c index 1ba871b7fe11..c6780830b8ac 100644 --- a/drivers/clocksource/asm9260_timer.c +++ b/drivers/clocksource/asm9260_timer.c @@ -193,7 +193,7 @@ static int __init asm9260_timer_init(struct device_node *np) priv.base = of_io_request_and_map(np, 0, np->name); if (IS_ERR(priv.base)) { - pr_err("%s: unable to map resource", np->name); + pr_err("%s: unable to map resource\n", np->name); return PTR_ERR(priv.base); } diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c index f2f29d2be1cf..dce44307469e 100644 --- a/drivers/clocksource/bcm2835_timer.c +++ b/drivers/clocksource/bcm2835_timer.c @@ -89,13 +89,13 @@ static int __init bcm2835_timer_init(struct device_node *node) base = of_iomap(node, 0); if (!base) { - pr_err("Can't remap registers"); + pr_err("Can't remap registers\n"); return -ENXIO; } ret = of_property_read_u32(node, "clock-frequency", &freq); if (ret) { - pr_err("Can't read clock-frequency"); + pr_err("Can't read clock-frequency\n"); goto err_iounmap; } @@ -107,7 +107,7 @@ static int __init bcm2835_timer_init(struct device_node *node) irq = irq_of_parse_and_map(node, DEFAULT_TIMER); if (irq <= 0) { - pr_err("Can't parse IRQ"); + pr_err("Can't parse IRQ\n"); ret = -EINVAL; goto err_iounmap; } diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c index 92f6e4deee74..fda5e1476638 100644 --- a/drivers/clocksource/bcm_kona_timer.c +++ b/drivers/clocksource/bcm_kona_timer.c @@ -179,7 +179,7 @@ static int __init kona_timer_init(struct device_node *node) } else if (!of_property_read_u32(node, "clock-frequency", &freq)) { arch_timer_rate = freq; } else { - pr_err("Kona Timer v1 unable to determine clock-frequency"); + pr_err("Kona Timer v1 unable to determine clock-frequency\n"); return -EINVAL; } diff --git a/drivers/clocksource/clksrc-probe.c b/drivers/clocksource/clksrc-probe.c index bc62be97f0a8..ac701ffb8d59 100644 --- a/drivers/clocksource/clksrc-probe.c +++ b/drivers/clocksource/clksrc-probe.c @@ -40,7 +40,7 @@ void __init clocksource_probe(void) ret = init_func_ret(np); if (ret) { - pr_err("Failed to initialize '%s': %d", + pr_err("Failed to initialize '%s': %d\n", of_node_full_name(np), ret); continue; } diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c index 63e4f5519577..1f5f734e4919 100644 --- a/drivers/clocksource/dw_apb_timer.c +++ b/drivers/clocksource/dw_apb_timer.c @@ -101,7 +101,7 @@ static irqreturn_t dw_apb_clockevent_irq(int irq, void *data) struct dw_apb_clock_event_device *dw_ced = ced_to_dw_apb_ced(evt); if (!evt->event_handler) { - pr_info("Spurious APBT timer interrupt %d", irq); + pr_info("Spurious APBT timer interrupt %d\n", irq); return IRQ_NONE; } @@ -257,7 +257,9 @@ dw_apb_clockevent_init(int cpu, const char *name, unsigned rating, clockevents_calc_mult_shift(&dw_ced->ced, freq, APBT_MIN_PERIOD); dw_ced->ced.max_delta_ns = clockevent_delta2ns(0x7fffffff, &dw_ced->ced); + dw_ced->ced.max_delta_ticks = 0x7fffffff; dw_ced->ced.min_delta_ns = clockevent_delta2ns(5000, &dw_ced->ced); + dw_ced->ced.min_delta_ticks = 5000; dw_ced->ced.cpumask = cpumask_of(cpu); dw_ced->ced.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ; diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c index aff87df07449..bc48cbf6a795 100644 --- a/drivers/clocksource/em_sti.c +++ b/drivers/clocksource/em_sti.c @@ -78,15 +78,12 @@ static int em_sti_enable(struct em_sti_priv *p) int ret; /* enable clock */ - ret = clk_prepare_enable(p->clk); + ret = clk_enable(p->clk); if (ret) { dev_err(&p->pdev->dev, "cannot enable clock\n"); return ret; } - /* configure channel, periodic mode and maximum timeout */ - p->rate = clk_get_rate(p->clk); - /* reset the counter */ em_sti_write(p, STI_SET_H, 0x40000000); em_sti_write(p, STI_SET_L, 0x00000000); @@ -107,7 +104,7 @@ static void em_sti_disable(struct em_sti_priv *p) em_sti_write(p, STI_INTENCLR, 3); /* stop clock */ - clk_disable_unprepare(p->clk); + clk_disable(p->clk); } static u64 em_sti_count(struct em_sti_priv *p) @@ -205,13 +202,9 @@ static u64 em_sti_clocksource_read(struct clocksource *cs) static int em_sti_clocksource_enable(struct clocksource *cs) { - int ret; struct em_sti_priv *p = cs_to_em_sti(cs); - ret = em_sti_start(p, USER_CLOCKSOURCE); - if (!ret) - __clocksource_update_freq_hz(cs, p->rate); - return ret; + return em_sti_start(p, USER_CLOCKSOURCE); } static void em_sti_clocksource_disable(struct clocksource *cs) @@ -240,8 +233,7 @@ static int em_sti_register_clocksource(struct em_sti_priv *p) dev_info(&p->pdev->dev, "used as clock source\n"); - /* Register with dummy 1 Hz value, gets updated in ->enable() */ - clocksource_register_hz(cs, 1); + clocksource_register_hz(cs, p->rate); return 0; } @@ -263,7 +255,6 @@ static int em_sti_clock_event_set_oneshot(struct clock_event_device *ced) dev_info(&p->pdev->dev, "used for oneshot clock events\n"); em_sti_start(p, USER_CLOCKEVENT); - clockevents_config(&p->ced, p->rate); return 0; } @@ -294,8 +285,7 @@ static void em_sti_register_clockevent(struct em_sti_priv *p) dev_info(&p->pdev->dev, "used for clock events\n"); - /* Register with dummy 1 Hz value, gets updated in ->set_state_oneshot() */ - clockevents_config_and_register(ced, 1, 2, 0xffffffff); + clockevents_config_and_register(ced, p->rate, 2, 0xffffffff); } static int em_sti_probe(struct platform_device *pdev) @@ -303,6 +293,7 @@ static int em_sti_probe(struct platform_device *pdev) struct em_sti_priv *p; struct resource *res; int irq; + int ret; p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL); if (p == NULL) @@ -323,6 +314,13 @@ static int em_sti_probe(struct platform_device *pdev) if (IS_ERR(p->base)) return PTR_ERR(p->base); + if (devm_request_irq(&pdev->dev, irq, em_sti_interrupt, + IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, + dev_name(&pdev->dev), p)) { + dev_err(&pdev->dev, "failed to request low IRQ\n"); + return -ENOENT; + } + /* get hold of clock */ p->clk = devm_clk_get(&pdev->dev, "sclk"); if (IS_ERR(p->clk)) { @@ -330,12 +328,20 @@ static int em_sti_probe(struct platform_device *pdev) return PTR_ERR(p->clk); } - if (devm_request_irq(&pdev->dev, irq, em_sti_interrupt, - IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, - dev_name(&pdev->dev), p)) { - dev_err(&pdev->dev, "failed to request low IRQ\n"); - return -ENOENT; + ret = clk_prepare(p->clk); + if (ret < 0) { + dev_err(&pdev->dev, "cannot prepare clock\n"); + return ret; + } + + ret = clk_enable(p->clk); + if (ret < 0) { + dev_err(&p->pdev->dev, "cannot enable clock\n"); + clk_unprepare(p->clk); + return ret; } + p->rate = clk_get_rate(p->clk); + clk_disable(p->clk); raw_spin_lock_init(&p->lock); em_sti_register_clockevent(p); diff --git a/drivers/clocksource/h8300_timer8.c b/drivers/clocksource/h8300_timer8.c index 546bb180f5a4..804c489531d6 100644 --- a/drivers/clocksource/h8300_timer8.c +++ b/drivers/clocksource/h8300_timer8.c @@ -101,15 +101,7 @@ static inline struct timer8_priv *ced_to_priv(struct clock_event_device *ced) static void timer8_clock_event_start(struct timer8_priv *p, unsigned long delta) { - struct clock_event_device *ced = &p->ced; - timer8_start(p); - - ced->shift = 32; - ced->mult = div_sc(p->rate, NSEC_PER_SEC, ced->shift); - ced->max_delta_ns = clockevent_delta2ns(0xffff, ced); - ced->min_delta_ns = clockevent_delta2ns(0x0001, ced); - timer8_set_next(p, delta); } diff --git a/drivers/clocksource/meson6_timer.c b/drivers/clocksource/meson6_timer.c index 52af591a9fc7..39d21f693a33 100644 --- a/drivers/clocksource/meson6_timer.c +++ b/drivers/clocksource/meson6_timer.c @@ -133,13 +133,13 @@ static int __init meson6_timer_init(struct device_node *node) timer_base = of_io_request_and_map(node, 0, "meson6-timer"); if (IS_ERR(timer_base)) { - pr_err("Can't map registers"); + pr_err("Can't map registers\n"); return -ENXIO; } irq = irq_of_parse_and_map(node, 0); if (irq <= 0) { - pr_err("Can't parse IRQ"); + pr_err("Can't parse IRQ\n"); return -EINVAL; } diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c index 6fcf96540631..3e5fa2f62d5f 100644 --- a/drivers/clocksource/metag_generic.c +++ b/drivers/clocksource/metag_generic.c @@ -114,7 +114,9 @@ static int arch_timer_starting_cpu(unsigned int cpu) clk->mult = div_sc(hwtimer_freq, NSEC_PER_SEC, clk->shift); clk->max_delta_ns = clockevent_delta2ns(0x7fffffff, clk); + clk->max_delta_ticks = 0x7fffffff; clk->min_delta_ns = clockevent_delta2ns(0xf, clk); + clk->min_delta_ticks = 0xf; clk->cpumask = cpumask_of(cpu); clockevents_register_device(clk); diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c index d9ef7a61e093..3f52ee219923 100644 --- a/drivers/clocksource/mips-gic-timer.c +++ b/drivers/clocksource/mips-gic-timer.c @@ -154,19 +154,6 @@ static int __init __gic_clocksource_init(void) return ret; } -void __init gic_clocksource_init(unsigned int frequency) -{ - gic_frequency = frequency; - gic_timer_irq = MIPS_GIC_IRQ_BASE + - GIC_LOCAL_TO_HWIRQ(GIC_LOCAL_INT_COMPARE); - - __gic_clocksource_init(); - gic_clockevent_init(); - - /* And finally start the counter */ - gic_start_count(); -} - static int __init gic_clocksource_of_init(struct device_node *node) { struct clk *clk; @@ -174,7 +161,7 @@ static int __init gic_clocksource_of_init(struct device_node *node) if (!gic_present || !node->parent || !of_device_is_compatible(node->parent, "mti,gic")) { - pr_warn("No DT definition for the mips gic driver"); + pr_warn("No DT definition for the mips gic driver\n"); return -ENXIO; } diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c index 3c124d1ca600..7d44de304f37 100644 --- a/drivers/clocksource/nomadik-mtu.c +++ b/drivers/clocksource/nomadik-mtu.c @@ -260,25 +260,25 @@ static int __init nmdk_timer_of_init(struct device_node *node) base = of_iomap(node, 0); if (!base) { - pr_err("Can't remap registers"); + pr_err("Can't remap registers\n"); return -ENXIO; } pclk = of_clk_get_by_name(node, "apb_pclk"); if (IS_ERR(pclk)) { - pr_err("could not get apb_pclk"); + pr_err("could not get apb_pclk\n"); return PTR_ERR(pclk); } clk = of_clk_get_by_name(node, "timclk"); if (IS_ERR(clk)) { - pr_err("could not get timclk"); + pr_err("could not get timclk\n"); return PTR_ERR(clk); } irq = irq_of_parse_and_map(node, 0); if (irq <= 0) { - pr_err("Can't parse IRQ"); + pr_err("Can't parse IRQ\n"); return -EINVAL; } diff --git a/drivers/clocksource/numachip.c b/drivers/clocksource/numachip.c index 4e0f11fd2617..6a20dc8b253f 100644 --- a/drivers/clocksource/numachip.c +++ b/drivers/clocksource/numachip.c @@ -51,7 +51,9 @@ static struct clock_event_device numachip2_clockevent = { .mult = 1, .shift = 0, .min_delta_ns = 1250, + .min_delta_ticks = 1250, .max_delta_ns = LONG_MAX, + .max_delta_ticks = LONG_MAX, }; static void numachip_timer_interrupt(void) diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c index 1c24de215c14..a10fa667325f 100644 --- a/drivers/clocksource/pxa_timer.c +++ b/drivers/clocksource/pxa_timer.c @@ -166,14 +166,14 @@ static int __init pxa_timer_common_init(int irq, unsigned long clock_tick_rate) ret = setup_irq(irq, &pxa_ost0_irq); if (ret) { - pr_err("Failed to setup irq"); + pr_err("Failed to setup irq\n"); return ret; } ret = clocksource_mmio_init(timer_base + OSCR, "oscr0", clock_tick_rate, 200, 32, clocksource_mmio_readl_up); if (ret) { - pr_err("Failed to init clocksource"); + pr_err("Failed to init clocksource\n"); return ret; } @@ -203,7 +203,7 @@ static int __init pxa_timer_dt_init(struct device_node *np) ret = clk_prepare_enable(clk); if (ret) { - pr_crit("Failed to prepare clock"); + pr_crit("Failed to prepare clock\n"); return ret; } diff --git a/drivers/clocksource/rockchip_timer.c b/drivers/clocksource/rockchip_timer.c index 23e267acba25..49c02be50eca 100644 --- a/drivers/clocksource/rockchip_timer.c +++ b/drivers/clocksource/rockchip_timer.c @@ -11,6 +11,8 @@ #include <linux/clockchips.h> #include <linux/init.h> #include <linux/interrupt.h> +#include <linux/sched_clock.h> +#include <linux/slab.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> @@ -19,6 +21,8 @@ #define TIMER_LOAD_COUNT0 0x00 #define TIMER_LOAD_COUNT1 0x04 +#define TIMER_CURRENT_VALUE0 0x08 +#define TIMER_CURRENT_VALUE1 0x0C #define TIMER_CONTROL_REG3288 0x10 #define TIMER_CONTROL_REG3399 0x1c #define TIMER_INT_STATUS 0x18 @@ -29,103 +33,118 @@ #define TIMER_MODE_USER_DEFINED_COUNT (1 << 1) #define TIMER_INT_UNMASK (1 << 2) -struct bc_timer { - struct clock_event_device ce; +struct rk_timer { void __iomem *base; void __iomem *ctrl; + struct clk *clk; + struct clk *pclk; u32 freq; + int irq; }; -static struct bc_timer bc_timer; - -static inline struct bc_timer *rk_timer(struct clock_event_device *ce) -{ - return container_of(ce, struct bc_timer, ce); -} +struct rk_clkevt { + struct clock_event_device ce; + struct rk_timer timer; +}; -static inline void __iomem *rk_base(struct clock_event_device *ce) -{ - return rk_timer(ce)->base; -} +static struct rk_clkevt *rk_clkevt; +static struct rk_timer *rk_clksrc; -static inline void __iomem *rk_ctrl(struct clock_event_device *ce) +static inline struct rk_timer *rk_timer(struct clock_event_device *ce) { - return rk_timer(ce)->ctrl; + return &container_of(ce, struct rk_clkevt, ce)->timer; } -static inline void rk_timer_disable(struct clock_event_device *ce) +static inline void rk_timer_disable(struct rk_timer *timer) { - writel_relaxed(TIMER_DISABLE, rk_ctrl(ce)); + writel_relaxed(TIMER_DISABLE, timer->ctrl); } -static inline void rk_timer_enable(struct clock_event_device *ce, u32 flags) +static inline void rk_timer_enable(struct rk_timer *timer, u32 flags) { - writel_relaxed(TIMER_ENABLE | TIMER_INT_UNMASK | flags, - rk_ctrl(ce)); + writel_relaxed(TIMER_ENABLE | flags, timer->ctrl); } static void rk_timer_update_counter(unsigned long cycles, - struct clock_event_device *ce) + struct rk_timer *timer) { - writel_relaxed(cycles, rk_base(ce) + TIMER_LOAD_COUNT0); - writel_relaxed(0, rk_base(ce) + TIMER_LOAD_COUNT1); + writel_relaxed(cycles, timer->base + TIMER_LOAD_COUNT0); + writel_relaxed(0, timer->base + TIMER_LOAD_COUNT1); } -static void rk_timer_interrupt_clear(struct clock_event_device *ce) +static void rk_timer_interrupt_clear(struct rk_timer *timer) { - writel_relaxed(1, rk_base(ce) + TIMER_INT_STATUS); + writel_relaxed(1, timer->base + TIMER_INT_STATUS); } static inline int rk_timer_set_next_event(unsigned long cycles, struct clock_event_device *ce) { - rk_timer_disable(ce); - rk_timer_update_counter(cycles, ce); - rk_timer_enable(ce, TIMER_MODE_USER_DEFINED_COUNT); + struct rk_timer *timer = rk_timer(ce); + + rk_timer_disable(timer); + rk_timer_update_counter(cycles, timer); + rk_timer_enable(timer, TIMER_MODE_USER_DEFINED_COUNT | + TIMER_INT_UNMASK); return 0; } static int rk_timer_shutdown(struct clock_event_device *ce) { - rk_timer_disable(ce); + struct rk_timer *timer = rk_timer(ce); + + rk_timer_disable(timer); return 0; } static int rk_timer_set_periodic(struct clock_event_device *ce) { - rk_timer_disable(ce); - rk_timer_update_counter(rk_timer(ce)->freq / HZ - 1, ce); - rk_timer_enable(ce, TIMER_MODE_FREE_RUNNING); + struct rk_timer *timer = rk_timer(ce); + + rk_timer_disable(timer); + rk_timer_update_counter(timer->freq / HZ - 1, timer); + rk_timer_enable(timer, TIMER_MODE_FREE_RUNNING | TIMER_INT_UNMASK); return 0; } static irqreturn_t rk_timer_interrupt(int irq, void *dev_id) { struct clock_event_device *ce = dev_id; + struct rk_timer *timer = rk_timer(ce); - rk_timer_interrupt_clear(ce); + rk_timer_interrupt_clear(timer); if (clockevent_state_oneshot(ce)) - rk_timer_disable(ce); + rk_timer_disable(timer); ce->event_handler(ce); return IRQ_HANDLED; } -static int __init rk_timer_init(struct device_node *np, u32 ctrl_reg) +static u64 notrace rk_timer_sched_read(void) +{ + return ~readl_relaxed(rk_clksrc->base + TIMER_CURRENT_VALUE0); +} + +static int __init +rk_timer_probe(struct rk_timer *timer, struct device_node *np) { - struct clock_event_device *ce = &bc_timer.ce; struct clk *timer_clk; struct clk *pclk; int ret = -EINVAL, irq; + u32 ctrl_reg = TIMER_CONTROL_REG3288; - bc_timer.base = of_iomap(np, 0); - if (!bc_timer.base) { + timer->base = of_iomap(np, 0); + if (!timer->base) { pr_err("Failed to get base address for '%s'\n", TIMER_NAME); return -ENXIO; } - bc_timer.ctrl = bc_timer.base + ctrl_reg; + + if (of_device_is_compatible(np, "rockchip,rk3399-timer")) + ctrl_reg = TIMER_CONTROL_REG3399; + + timer->ctrl = timer->base + ctrl_reg; pclk = of_clk_get_by_name(np, "pclk"); if (IS_ERR(pclk)) { @@ -139,6 +158,7 @@ static int __init rk_timer_init(struct device_node *np, u32 ctrl_reg) pr_err("Failed to enable pclk for '%s'\n", TIMER_NAME); goto out_unmap; } + timer->pclk = pclk; timer_clk = of_clk_get_by_name(np, "timer"); if (IS_ERR(timer_clk)) { @@ -152,8 +172,9 @@ static int __init rk_timer_init(struct device_node *np, u32 ctrl_reg) pr_err("Failed to enable timer clock\n"); goto out_timer_clk; } + timer->clk = timer_clk; - bc_timer.freq = clk_get_rate(timer_clk); + timer->freq = clk_get_rate(timer_clk); irq = irq_of_parse_and_map(np, 0); if (!irq) { @@ -161,51 +182,126 @@ static int __init rk_timer_init(struct device_node *np, u32 ctrl_reg) pr_err("Failed to map interrupts for '%s'\n", TIMER_NAME); goto out_irq; } + timer->irq = irq; + + rk_timer_interrupt_clear(timer); + rk_timer_disable(timer); + return 0; + +out_irq: + clk_disable_unprepare(timer_clk); +out_timer_clk: + clk_disable_unprepare(pclk); +out_unmap: + iounmap(timer->base); + + return ret; +} + +static void __init rk_timer_cleanup(struct rk_timer *timer) +{ + clk_disable_unprepare(timer->clk); + clk_disable_unprepare(timer->pclk); + iounmap(timer->base); +} + +static int __init rk_clkevt_init(struct device_node *np) +{ + struct clock_event_device *ce; + int ret = -EINVAL; + + rk_clkevt = kzalloc(sizeof(struct rk_clkevt), GFP_KERNEL); + if (!rk_clkevt) { + ret = -ENOMEM; + goto out; + } + ret = rk_timer_probe(&rk_clkevt->timer, np); + if (ret) + goto out_probe; + + ce = &rk_clkevt->ce; ce->name = TIMER_NAME; ce->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ; ce->set_next_event = rk_timer_set_next_event; ce->set_state_shutdown = rk_timer_shutdown; ce->set_state_periodic = rk_timer_set_periodic; - ce->irq = irq; + ce->irq = rk_clkevt->timer.irq; ce->cpumask = cpu_possible_mask; ce->rating = 250; - rk_timer_interrupt_clear(ce); - rk_timer_disable(ce); - - ret = request_irq(irq, rk_timer_interrupt, IRQF_TIMER, TIMER_NAME, ce); + ret = request_irq(rk_clkevt->timer.irq, rk_timer_interrupt, IRQF_TIMER, + TIMER_NAME, ce); if (ret) { - pr_err("Failed to initialize '%s': %d\n", TIMER_NAME, ret); + pr_err("Failed to initialize '%s': %d\n", + TIMER_NAME, ret); goto out_irq; } - clockevents_config_and_register(ce, bc_timer.freq, 1, UINT_MAX); - + clockevents_config_and_register(&rk_clkevt->ce, + rk_clkevt->timer.freq, 1, UINT_MAX); return 0; out_irq: - clk_disable_unprepare(timer_clk); -out_timer_clk: - clk_disable_unprepare(pclk); -out_unmap: - iounmap(bc_timer.base); - + rk_timer_cleanup(&rk_clkevt->timer); +out_probe: + kfree(rk_clkevt); +out: + /* Leave rk_clkevt not NULL to prevent future init */ + rk_clkevt = ERR_PTR(ret); return ret; } -static int __init rk3288_timer_init(struct device_node *np) +static int __init rk_clksrc_init(struct device_node *np) { - return rk_timer_init(np, TIMER_CONTROL_REG3288); + int ret = -EINVAL; + + rk_clksrc = kzalloc(sizeof(struct rk_timer), GFP_KERNEL); + if (!rk_clksrc) { + ret = -ENOMEM; + goto out; + } + + ret = rk_timer_probe(rk_clksrc, np); + if (ret) + goto out_probe; + + rk_timer_update_counter(UINT_MAX, rk_clksrc); + rk_timer_enable(rk_clksrc, 0); + + ret = clocksource_mmio_init(rk_clksrc->base + TIMER_CURRENT_VALUE0, + TIMER_NAME, rk_clksrc->freq, 250, 32, + clocksource_mmio_readl_down); + if (ret) { + pr_err("Failed to register clocksource"); + goto out_clocksource; + } + + sched_clock_register(rk_timer_sched_read, 32, rk_clksrc->freq); + return 0; + +out_clocksource: + rk_timer_cleanup(rk_clksrc); +out_probe: + kfree(rk_clksrc); +out: + /* Leave rk_clksrc not NULL to prevent future init */ + rk_clksrc = ERR_PTR(ret); + return ret; } -static int __init rk3399_timer_init(struct device_node *np) +static int __init rk_timer_init(struct device_node *np) { - return rk_timer_init(np, TIMER_CONTROL_REG3399); + if (!rk_clkevt) + return rk_clkevt_init(np); + + if (!rk_clksrc) + return rk_clksrc_init(np); + + pr_err("Too many timer definitions for '%s'\n", TIMER_NAME); + return -EINVAL; } -CLOCKSOURCE_OF_DECLARE(rk3288_timer, "rockchip,rk3288-timer", - rk3288_timer_init); -CLOCKSOURCE_OF_DECLARE(rk3399_timer, "rockchip,rk3399-timer", - rk3399_timer_init); +CLOCKSOURCE_OF_DECLARE(rk3288_timer, "rockchip,rk3288-timer", rk_timer_init); +CLOCKSOURCE_OF_DECLARE(rk3399_timer, "rockchip,rk3399-timer", rk_timer_init); diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c index 0093ece661fe..a68e6538c809 100644 --- a/drivers/clocksource/samsung_pwm_timer.c +++ b/drivers/clocksource/samsung_pwm_timer.c @@ -385,7 +385,7 @@ static int __init _samsung_pwm_clocksource_init(void) mask = ~pwm.variant.output_mask & ((1 << SAMSUNG_PWM_NUM) - 1); channel = fls(mask) - 1; if (channel < 0) { - pr_crit("failed to find PWM channel for clocksource"); + pr_crit("failed to find PWM channel for clocksource\n"); return -EINVAL; } pwm.source_id = channel; @@ -393,7 +393,7 @@ static int __init _samsung_pwm_clocksource_init(void) mask &= ~(1 << channel); channel = fls(mask) - 1; if (channel < 0) { - pr_crit("failed to find PWM channel for clock event"); + pr_crit("failed to find PWM channel for clock event\n"); return -EINVAL; } pwm.event_id = channel; @@ -448,7 +448,7 @@ static int __init samsung_pwm_alloc(struct device_node *np, pwm.timerclk = of_clk_get_by_name(np, "timers"); if (IS_ERR(pwm.timerclk)) { - pr_crit("failed to get timers clock for timer"); + pr_crit("failed to get timers clock for timer\n"); return PTR_ERR(pwm.timerclk); } diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 28757edf6aca..e09e8bf0bb9b 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -103,7 +103,6 @@ struct sh_cmt_channel { unsigned long match_value; unsigned long next_match_value; unsigned long max_match_value; - unsigned long rate; raw_spinlock_t lock; struct clock_event_device ced; struct clocksource cs; @@ -118,6 +117,7 @@ struct sh_cmt_device { void __iomem *mapbase; struct clk *clk; + unsigned long rate; raw_spinlock_t lock; /* Protect the shared start/stop register */ @@ -320,7 +320,7 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_channel *ch, int start) raw_spin_unlock_irqrestore(&ch->cmt->lock, flags); } -static int sh_cmt_enable(struct sh_cmt_channel *ch, unsigned long *rate) +static int sh_cmt_enable(struct sh_cmt_channel *ch) { int k, ret; @@ -340,11 +340,9 @@ static int sh_cmt_enable(struct sh_cmt_channel *ch, unsigned long *rate) /* configure channel, periodic mode and maximum timeout */ if (ch->cmt->info->width == 16) { - *rate = clk_get_rate(ch->cmt->clk) / 512; sh_cmt_write_cmcsr(ch, SH_CMT16_CMCSR_CMIE | SH_CMT16_CMCSR_CKS512); } else { - *rate = clk_get_rate(ch->cmt->clk) / 8; sh_cmt_write_cmcsr(ch, SH_CMT32_CMCSR_CMM | SH_CMT32_CMCSR_CMTOUT_IE | SH_CMT32_CMCSR_CMR_IRQ | @@ -572,7 +570,7 @@ static int sh_cmt_start(struct sh_cmt_channel *ch, unsigned long flag) raw_spin_lock_irqsave(&ch->lock, flags); if (!(ch->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE))) - ret = sh_cmt_enable(ch, &ch->rate); + ret = sh_cmt_enable(ch); if (ret) goto out; @@ -640,10 +638,9 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs) ch->total_cycles = 0; ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE); - if (!ret) { - __clocksource_update_freq_hz(cs, ch->rate); + if (!ret) ch->cs_enabled = true; - } + return ret; } @@ -697,8 +694,7 @@ static int sh_cmt_register_clocksource(struct sh_cmt_channel *ch, dev_info(&ch->cmt->pdev->dev, "ch%u: used as clock source\n", ch->index); - /* Register with dummy 1 Hz value, gets updated in ->enable() */ - clocksource_register_hz(cs, 1); + clocksource_register_hz(cs, ch->cmt->rate); return 0; } @@ -709,19 +705,10 @@ static struct sh_cmt_channel *ced_to_sh_cmt(struct clock_event_device *ced) static void sh_cmt_clock_event_start(struct sh_cmt_channel *ch, int periodic) { - struct clock_event_device *ced = &ch->ced; - sh_cmt_start(ch, FLAG_CLOCKEVENT); - /* TODO: calculate good shift from rate and counter bit width */ - - ced->shift = 32; - ced->mult = div_sc(ch->rate, NSEC_PER_SEC, ced->shift); - ced->max_delta_ns = clockevent_delta2ns(ch->max_match_value, ced); - ced->min_delta_ns = clockevent_delta2ns(0x1f, ced); - if (periodic) - sh_cmt_set_next(ch, ((ch->rate + HZ/2) / HZ) - 1); + sh_cmt_set_next(ch, ((ch->cmt->rate + HZ/2) / HZ) - 1); else sh_cmt_set_next(ch, ch->max_match_value); } @@ -824,6 +811,14 @@ static int sh_cmt_register_clockevent(struct sh_cmt_channel *ch, ced->suspend = sh_cmt_clock_event_suspend; ced->resume = sh_cmt_clock_event_resume; + /* TODO: calculate good shift from rate and counter bit width */ + ced->shift = 32; + ced->mult = div_sc(ch->cmt->rate, NSEC_PER_SEC, ced->shift); + ced->max_delta_ns = clockevent_delta2ns(ch->max_match_value, ced); + ced->max_delta_ticks = ch->max_match_value; + ced->min_delta_ns = clockevent_delta2ns(0x1f, ced); + ced->min_delta_ticks = 0x1f; + dev_info(&ch->cmt->pdev->dev, "ch%u: used for clock events\n", ch->index); clockevents_register_device(ced); @@ -996,6 +991,18 @@ static int sh_cmt_setup(struct sh_cmt_device *cmt, struct platform_device *pdev) if (ret < 0) goto err_clk_put; + /* Determine clock rate. */ + ret = clk_enable(cmt->clk); + if (ret < 0) + goto err_clk_unprepare; + + if (cmt->info->width == 16) + cmt->rate = clk_get_rate(cmt->clk) / 512; + else + cmt->rate = clk_get_rate(cmt->clk) / 8; + + clk_disable(cmt->clk); + /* Map the memory resource(s). */ ret = sh_cmt_map_memory(cmt); if (ret < 0) diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 1fbf2aadcfd4..31d881621e41 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -46,7 +46,6 @@ struct sh_tmu_channel { void __iomem *base; int irq; - unsigned long rate; unsigned long periodic; struct clock_event_device ced; struct clocksource cs; @@ -59,6 +58,7 @@ struct sh_tmu_device { void __iomem *mapbase; struct clk *clk; + unsigned long rate; enum sh_tmu_model model; @@ -165,7 +165,6 @@ static int __sh_tmu_enable(struct sh_tmu_channel *ch) sh_tmu_write(ch, TCNT, 0xffffffff); /* configure channel to parent clock / 4, irq off */ - ch->rate = clk_get_rate(ch->tmu->clk) / 4; sh_tmu_write(ch, TCR, TCR_TPSC_CLK4); /* enable channel */ @@ -271,10 +270,8 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs) return 0; ret = sh_tmu_enable(ch); - if (!ret) { - __clocksource_update_freq_hz(cs, ch->rate); + if (!ret) ch->cs_enabled = true; - } return ret; } @@ -334,8 +331,7 @@ static int sh_tmu_register_clocksource(struct sh_tmu_channel *ch, dev_info(&ch->tmu->pdev->dev, "ch%u: used as clock source\n", ch->index); - /* Register with dummy 1 Hz value, gets updated in ->enable() */ - clocksource_register_hz(cs, 1); + clocksource_register_hz(cs, ch->tmu->rate); return 0; } @@ -346,14 +342,10 @@ static struct sh_tmu_channel *ced_to_sh_tmu(struct clock_event_device *ced) static void sh_tmu_clock_event_start(struct sh_tmu_channel *ch, int periodic) { - struct clock_event_device *ced = &ch->ced; - sh_tmu_enable(ch); - clockevents_config(ced, ch->rate); - if (periodic) { - ch->periodic = (ch->rate + HZ/2) / HZ; + ch->periodic = (ch->tmu->rate + HZ/2) / HZ; sh_tmu_set_next(ch, ch->periodic, 1); } } @@ -435,7 +427,7 @@ static void sh_tmu_register_clockevent(struct sh_tmu_channel *ch, dev_info(&ch->tmu->pdev->dev, "ch%u: used for clock events\n", ch->index); - clockevents_config_and_register(ced, 1, 0x300, 0xffffffff); + clockevents_config_and_register(ced, ch->tmu->rate, 0x300, 0xffffffff); ret = request_irq(ch->irq, sh_tmu_interrupt, IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, @@ -561,6 +553,14 @@ static int sh_tmu_setup(struct sh_tmu_device *tmu, struct platform_device *pdev) if (ret < 0) goto err_clk_put; + /* Determine clock rate. */ + ret = clk_enable(tmu->clk); + if (ret < 0) + goto err_clk_unprepare; + + tmu->rate = clk_get_rate(tmu->clk) / 4; + clk_disable(tmu->clk); + /* Map the memory resource. */ ret = sh_tmu_map_memory(tmu); if (ret < 0) { diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index c83452cacb41..4452d5c8f304 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -159,25 +159,25 @@ static int __init sun4i_timer_init(struct device_node *node) timer_base = of_iomap(node, 0); if (!timer_base) { - pr_crit("Can't map registers"); + pr_crit("Can't map registers\n"); return -ENXIO; } irq = irq_of_parse_and_map(node, 0); if (irq <= 0) { - pr_crit("Can't parse IRQ"); + pr_crit("Can't parse IRQ\n"); return -EINVAL; } clk = of_clk_get(node, 0); if (IS_ERR(clk)) { - pr_crit("Can't get timer clock"); + pr_crit("Can't get timer clock\n"); return PTR_ERR(clk); } ret = clk_prepare_enable(clk); if (ret) { - pr_err("Failed to prepare clock"); + pr_err("Failed to prepare clock\n"); return ret; } @@ -200,7 +200,7 @@ static int __init sun4i_timer_init(struct device_node *node) ret = clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name, rate, 350, 32, clocksource_mmio_readl_down); if (ret) { - pr_err("Failed to register clocksource"); + pr_err("Failed to register clocksource\n"); return ret; } diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c index f960891aa04e..b9990b9c98c5 100644 --- a/drivers/clocksource/tegra20_timer.c +++ b/drivers/clocksource/tegra20_timer.c @@ -245,7 +245,7 @@ static int __init tegra20_init_rtc(struct device_node *np) rtc_base = of_iomap(np, 0); if (!rtc_base) { - pr_err("Can't map RTC registers"); + pr_err("Can't map RTC registers\n"); return -ENXIO; } diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index 4440aefc59cd..aea4380129ea 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -247,13 +247,13 @@ static int __init armada_370_xp_timer_common_init(struct device_node *np) timer_base = of_iomap(np, 0); if (!timer_base) { - pr_err("Failed to iomap"); + pr_err("Failed to iomap\n"); return -ENXIO; } local_base = of_iomap(np, 1); if (!local_base) { - pr_err("Failed to iomap"); + pr_err("Failed to iomap\n"); return -ENXIO; } @@ -298,7 +298,7 @@ static int __init armada_370_xp_timer_common_init(struct device_node *np) "armada_370_xp_clocksource", timer_clk, 300, 32, clocksource_mmio_readl_down); if (res) { - pr_err("Failed to initialize clocksource mmio"); + pr_err("Failed to initialize clocksource mmio\n"); return res; } @@ -315,7 +315,7 @@ static int __init armada_370_xp_timer_common_init(struct device_node *np) armada_370_xp_evt); /* Immediately configure the timer on the boot CPU */ if (res) { - pr_err("Failed to request percpu irq"); + pr_err("Failed to request percpu irq\n"); return res; } @@ -324,7 +324,7 @@ static int __init armada_370_xp_timer_common_init(struct device_node *np) armada_370_xp_timer_starting_cpu, armada_370_xp_timer_dying_cpu); if (res) { - pr_err("Failed to setup hotplug state and timer"); + pr_err("Failed to setup hotplug state and timer\n"); return res; } @@ -339,7 +339,7 @@ static int __init armada_xp_timer_init(struct device_node *np) int ret; if (IS_ERR(clk)) { - pr_err("Failed to get clock"); + pr_err("Failed to get clock\n"); return PTR_ERR(clk); } @@ -375,7 +375,7 @@ static int __init armada_375_timer_init(struct device_node *np) /* Must have at least a clock */ if (IS_ERR(clk)) { - pr_err("Failed to get clock"); + pr_err("Failed to get clock\n"); return PTR_ERR(clk); } @@ -399,7 +399,7 @@ static int __init armada_370_timer_init(struct device_node *np) clk = of_clk_get(np, 0); if (IS_ERR(clk)) { - pr_err("Failed to get clock"); + pr_err("Failed to get clock\n"); return PTR_ERR(clk); } diff --git a/drivers/clocksource/time-efm32.c b/drivers/clocksource/time-efm32.c index 5ac344b383e1..ce0f97b4e5db 100644 --- a/drivers/clocksource/time-efm32.c +++ b/drivers/clocksource/time-efm32.c @@ -235,7 +235,7 @@ static int __init efm32_clockevent_init(struct device_node *np) ret = setup_irq(irq, &efm32_clock_event_irq); if (ret) { - pr_err("Failed setup irq"); + pr_err("Failed setup irq\n"); goto err_setup_irq; } diff --git a/drivers/clocksource/time-orion.c b/drivers/clocksource/time-orion.c index a28f496e97cf..b9b97f630c4d 100644 --- a/drivers/clocksource/time-orion.c +++ b/drivers/clocksource/time-orion.c @@ -15,6 +15,7 @@ #include <linux/bitops.h> #include <linux/clk.h> #include <linux/clockchips.h> +#include <linux/delay.h> #include <linux/interrupt.h> #include <linux/of_address.h> #include <linux/of_irq.h> @@ -36,6 +37,21 @@ static void __iomem *timer_base; +static unsigned long notrace orion_read_timer(void) +{ + return ~readl(timer_base + TIMER0_VAL); +} + +static struct delay_timer orion_delay_timer = { + .read_current_timer = orion_read_timer, +}; + +static void orion_delay_timer_init(unsigned long rate) +{ + orion_delay_timer.freq = rate; + register_current_timer_delay(&orion_delay_timer); +} + /* * Free-running clocksource handling. */ @@ -106,6 +122,7 @@ static struct irqaction orion_clkevt_irq = { static int __init orion_timer_init(struct device_node *np) { + unsigned long rate; struct clk *clk; int irq, ret; @@ -124,7 +141,7 @@ static int __init orion_timer_init(struct device_node *np) ret = clk_prepare_enable(clk); if (ret) { - pr_err("Failed to prepare clock"); + pr_err("Failed to prepare clock\n"); return ret; } @@ -135,6 +152,8 @@ static int __init orion_timer_init(struct device_node *np) return -EINVAL; } + rate = clk_get_rate(clk); + /* setup timer0 as free-running clocksource */ writel(~0, timer_base + TIMER0_VAL); writel(~0, timer_base + TIMER0_RELOAD); @@ -142,15 +161,15 @@ static int __init orion_timer_init(struct device_node *np) TIMER0_RELOAD_EN | TIMER0_EN, TIMER0_RELOAD_EN | TIMER0_EN); - ret = clocksource_mmio_init(timer_base + TIMER0_VAL, "orion_clocksource", - clk_get_rate(clk), 300, 32, + ret = clocksource_mmio_init(timer_base + TIMER0_VAL, + "orion_clocksource", rate, 300, 32, clocksource_mmio_readl_down); if (ret) { - pr_err("Failed to initialize mmio timer"); + pr_err("Failed to initialize mmio timer\n"); return ret; } - sched_clock_register(orion_read_sched_clock, 32, clk_get_rate(clk)); + sched_clock_register(orion_read_sched_clock, 32, rate); /* setup timer1 as clockevent timer */ ret = setup_irq(irq, &orion_clkevt_irq); @@ -162,9 +181,12 @@ static int __init orion_timer_init(struct device_node *np) ticks_per_jiffy = (clk_get_rate(clk) + HZ/2) / HZ; orion_clkevt.cpumask = cpumask_of(0); orion_clkevt.irq = irq; - clockevents_config_and_register(&orion_clkevt, clk_get_rate(clk), + clockevents_config_and_register(&orion_clkevt, rate, ORION_ONESHOT_MIN, ORION_ONESHOT_MAX); + + orion_delay_timer_init(rate); + return 0; } CLOCKSOURCE_OF_DECLARE(orion_timer, "marvell,orion-timer", orion_timer_init); diff --git a/drivers/clocksource/timer-atlas7.c b/drivers/clocksource/timer-atlas7.c index 3d8a181f0252..50300eec4a39 100644 --- a/drivers/clocksource/timer-atlas7.c +++ b/drivers/clocksource/timer-atlas7.c @@ -192,7 +192,9 @@ static int sirfsoc_local_timer_starting_cpu(unsigned int cpu) ce->set_next_event = sirfsoc_timer_set_next_event; clockevents_calc_mult_shift(ce, atlas7_timer_rate, 60); ce->max_delta_ns = clockevent_delta2ns(-2, ce); + ce->max_delta_ticks = (unsigned long)-2; ce->min_delta_ns = clockevent_delta2ns(2, ce); + ce->min_delta_ticks = 2; ce->cpumask = cpumask_of(cpu); action->dev_id = ce; diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index c0b5df3167a0..cc112351dc70 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -226,7 +226,7 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) ret = clocksource_register_hz(&data->clksrc, pit_rate); if (ret) { - pr_err("Failed to register clocksource"); + pr_err("Failed to register clocksource\n"); return ret; } diff --git a/drivers/clocksource/timer-digicolor.c b/drivers/clocksource/timer-digicolor.c index e9f50d289362..94a161eb9cce 100644 --- a/drivers/clocksource/timer-digicolor.c +++ b/drivers/clocksource/timer-digicolor.c @@ -161,19 +161,19 @@ static int __init digicolor_timer_init(struct device_node *node) */ dc_timer_dev.base = of_iomap(node, 0); if (!dc_timer_dev.base) { - pr_err("Can't map registers"); + pr_err("Can't map registers\n"); return -ENXIO; } irq = irq_of_parse_and_map(node, dc_timer_dev.timer_id); if (irq <= 0) { - pr_err("Can't parse IRQ"); + pr_err("Can't parse IRQ\n"); return -EINVAL; } clk = of_clk_get(node, 0); if (IS_ERR(clk)) { - pr_err("Can't get timer clock"); + pr_err("Can't get timer clock\n"); return PTR_ERR(clk); } clk_prepare_enable(clk); diff --git a/drivers/clocksource/timer-gemini.c b/drivers/clocksource/timer-fttmr010.c index dda27b7bf1a1..b4a6f1e4bc54 100644 --- a/drivers/clocksource/timer-gemini.c +++ b/drivers/clocksource/timer-fttmr010.c @@ -1,5 +1,5 @@ /* - * Gemini timer driver + * Faraday Technology FTTMR010 timer driver * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org> * * Based on a rewrite of arch/arm/mach-gemini/timer.c: @@ -16,17 +16,7 @@ #include <linux/clockchips.h> #include <linux/clocksource.h> #include <linux/sched_clock.h> - -/* - * Relevant registers in the global syscon - */ -#define GLOBAL_STATUS 0x04 -#define CPU_AHB_RATIO_MASK (0x3 << 18) -#define CPU_AHB_1_1 (0x0 << 18) -#define CPU_AHB_3_2 (0x1 << 18) -#define CPU_AHB_24_13 (0x2 << 18) -#define CPU_AHB_2_1 (0x3 << 18) -#define REG_TO_AHB_SPEED(reg) ((((reg) >> 15) & 0x7) * 10 + 130) +#include <linux/clk.h> /* * Register definitions for the timers @@ -77,12 +67,12 @@ static unsigned int tick_rate; static void __iomem *base; -static u64 notrace gemini_read_sched_clock(void) +static u64 notrace fttmr010_read_sched_clock(void) { return readl(base + TIMER3_COUNT); } -static int gemini_timer_set_next_event(unsigned long cycles, +static int fttmr010_timer_set_next_event(unsigned long cycles, struct clock_event_device *evt) { u32 cr; @@ -96,7 +86,7 @@ static int gemini_timer_set_next_event(unsigned long cycles, return 0; } -static int gemini_timer_shutdown(struct clock_event_device *evt) +static int fttmr010_timer_shutdown(struct clock_event_device *evt) { u32 cr; @@ -127,7 +117,7 @@ static int gemini_timer_shutdown(struct clock_event_device *evt) return 0; } -static int gemini_timer_set_periodic(struct clock_event_device *evt) +static int fttmr010_timer_set_periodic(struct clock_event_device *evt) { u32 period = DIV_ROUND_CLOSEST(tick_rate, HZ); u32 cr; @@ -158,54 +148,40 @@ static int gemini_timer_set_periodic(struct clock_event_device *evt) } /* Use TIMER1 as clock event */ -static struct clock_event_device gemini_clockevent = { +static struct clock_event_device fttmr010_clockevent = { .name = "TIMER1", /* Reasonably fast and accurate clock event */ .rating = 300, .shift = 32, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_next_event = gemini_timer_set_next_event, - .set_state_shutdown = gemini_timer_shutdown, - .set_state_periodic = gemini_timer_set_periodic, - .set_state_oneshot = gemini_timer_shutdown, - .tick_resume = gemini_timer_shutdown, + .set_next_event = fttmr010_timer_set_next_event, + .set_state_shutdown = fttmr010_timer_shutdown, + .set_state_periodic = fttmr010_timer_set_periodic, + .set_state_oneshot = fttmr010_timer_shutdown, + .tick_resume = fttmr010_timer_shutdown, }; /* * IRQ handler for the timer */ -static irqreturn_t gemini_timer_interrupt(int irq, void *dev_id) +static irqreturn_t fttmr010_timer_interrupt(int irq, void *dev_id) { - struct clock_event_device *evt = &gemini_clockevent; + struct clock_event_device *evt = &fttmr010_clockevent; evt->event_handler(evt); return IRQ_HANDLED; } -static struct irqaction gemini_timer_irq = { - .name = "Gemini Timer Tick", +static struct irqaction fttmr010_timer_irq = { + .name = "Faraday FTTMR010 Timer Tick", .flags = IRQF_TIMER, - .handler = gemini_timer_interrupt, + .handler = fttmr010_timer_interrupt, }; -static int __init gemini_timer_of_init(struct device_node *np) +static int __init fttmr010_timer_common_init(struct device_node *np) { - static struct regmap *map; int irq; - int ret; - u32 val; - - map = syscon_regmap_lookup_by_phandle(np, "syscon"); - if (IS_ERR(map)) { - pr_err("Can't get regmap for syscon handle"); - return -ENODEV; - } - ret = regmap_read(map, GLOBAL_STATUS, &val); - if (ret) { - pr_err("Can't read syscon status register"); - return -ENXIO; - } base = of_iomap(np, 0); if (!base) { @@ -219,26 +195,6 @@ static int __init gemini_timer_of_init(struct device_node *np) return -EINVAL; } - tick_rate = REG_TO_AHB_SPEED(val) * 1000000; - printk(KERN_INFO "Bus: %dMHz", tick_rate / 1000000); - - tick_rate /= 6; /* APB bus run AHB*(1/6) */ - - switch (val & CPU_AHB_RATIO_MASK) { - case CPU_AHB_1_1: - printk(KERN_CONT "(1/1)\n"); - break; - case CPU_AHB_3_2: - printk(KERN_CONT "(3/2)\n"); - break; - case CPU_AHB_24_13: - printk(KERN_CONT "(24/13)\n"); - break; - case CPU_AHB_2_1: - printk(KERN_CONT "(2/1)\n"); - break; - } - /* * Reset the interrupt mask and status */ @@ -255,9 +211,9 @@ static int __init gemini_timer_of_init(struct device_node *np) writel(0, base + TIMER3_MATCH1); writel(0, base + TIMER3_MATCH2); clocksource_mmio_init(base + TIMER3_COUNT, - "gemini_clocksource", tick_rate, + "fttmr010_clocksource", tick_rate, 300, 32, clocksource_mmio_readl_up); - sched_clock_register(gemini_read_sched_clock, 32, tick_rate); + sched_clock_register(fttmr010_read_sched_clock, 32, tick_rate); /* * Setup clockevent timer (interrupt-driven.) @@ -266,12 +222,82 @@ static int __init gemini_timer_of_init(struct device_node *np) writel(0, base + TIMER1_LOAD); writel(0, base + TIMER1_MATCH1); writel(0, base + TIMER1_MATCH2); - setup_irq(irq, &gemini_timer_irq); - gemini_clockevent.cpumask = cpumask_of(0); - clockevents_config_and_register(&gemini_clockevent, tick_rate, + setup_irq(irq, &fttmr010_timer_irq); + fttmr010_clockevent.cpumask = cpumask_of(0); + clockevents_config_and_register(&fttmr010_clockevent, tick_rate, 1, 0xffffffff); return 0; } -CLOCKSOURCE_OF_DECLARE(nomadik_mtu, "cortina,gemini-timer", - gemini_timer_of_init); + +static int __init fttmr010_timer_of_init(struct device_node *np) +{ + /* + * These implementations require a clock reference. + * FIXME: we currently only support clocking using PCLK + * and using EXTCLK is not supported in the driver. + */ + struct clk *clk; + + clk = of_clk_get_by_name(np, "PCLK"); + if (IS_ERR(clk)) { + pr_err("could not get PCLK"); + return PTR_ERR(clk); + } + tick_rate = clk_get_rate(clk); + + return fttmr010_timer_common_init(np); +} +CLOCKSOURCE_OF_DECLARE(fttmr010, "faraday,fttmr010", fttmr010_timer_of_init); + +/* + * Gemini-specific: relevant registers in the global syscon + */ +#define GLOBAL_STATUS 0x04 +#define CPU_AHB_RATIO_MASK (0x3 << 18) +#define CPU_AHB_1_1 (0x0 << 18) +#define CPU_AHB_3_2 (0x1 << 18) +#define CPU_AHB_24_13 (0x2 << 18) +#define CPU_AHB_2_1 (0x3 << 18) +#define REG_TO_AHB_SPEED(reg) ((((reg) >> 15) & 0x7) * 10 + 130) + +static int __init gemini_timer_of_init(struct device_node *np) +{ + static struct regmap *map; + int ret; + u32 val; + + map = syscon_regmap_lookup_by_phandle(np, "syscon"); + if (IS_ERR(map)) { + pr_err("Can't get regmap for syscon handle\n"); + return -ENODEV; + } + ret = regmap_read(map, GLOBAL_STATUS, &val); + if (ret) { + pr_err("Can't read syscon status register\n"); + return -ENXIO; + } + + tick_rate = REG_TO_AHB_SPEED(val) * 1000000; + pr_info("Bus: %dMHz ", tick_rate / 1000000); + + tick_rate /= 6; /* APB bus run AHB*(1/6) */ + + switch (val & CPU_AHB_RATIO_MASK) { + case CPU_AHB_1_1: + pr_cont("(1/1)\n"); + break; + case CPU_AHB_3_2: + pr_cont("(3/2)\n"); + break; + case CPU_AHB_24_13: + pr_cont("(24/13)\n"); + break; + case CPU_AHB_2_1: + pr_cont("(2/1)\n"); + break; + } + + return fttmr010_timer_common_init(np); +} +CLOCKSOURCE_OF_DECLARE(gemini, "cortina,gemini-timer", gemini_timer_of_init); diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c index df6e672afc04..04ad3066e190 100644 --- a/drivers/clocksource/timer-integrator-ap.c +++ b/drivers/clocksource/timer-integrator-ap.c @@ -200,7 +200,7 @@ static int __init integrator_ap_timer_init_of(struct device_node *node) err = of_property_read_string(of_aliases, "arm,timer-primary", &path); if (err) { - pr_warn("Failed to read property"); + pr_warn("Failed to read property\n"); return err; } @@ -209,7 +209,7 @@ static int __init integrator_ap_timer_init_of(struct device_node *node) err = of_property_read_string(of_aliases, "arm,timer-secondary", &path); if (err) { - pr_warn("Failed to read property"); + pr_warn("Failed to read property\n"); return err; } diff --git a/drivers/clocksource/timer-nps.c b/drivers/clocksource/timer-nps.c index da1f7986e477..e74ea1722ad3 100644 --- a/drivers/clocksource/timer-nps.c +++ b/drivers/clocksource/timer-nps.c @@ -55,7 +55,7 @@ static int __init nps_get_timer_clk(struct device_node *node, *clk = of_clk_get(node, 0); ret = PTR_ERR_OR_ZERO(*clk); if (ret) { - pr_err("timer missing clk"); + pr_err("timer missing clk\n"); return ret; } @@ -247,7 +247,7 @@ static int __init nps_setup_clockevent(struct device_node *node) nps_timer0_irq = irq_of_parse_and_map(node, 0); if (nps_timer0_irq <= 0) { - pr_err("clockevent: missing irq"); + pr_err("clockevent: missing irq\n"); return -EINVAL; } @@ -270,7 +270,7 @@ static int __init nps_setup_clockevent(struct device_node *node) nps_timer_starting_cpu, nps_timer_dying_cpu); if (ret) { - pr_err("Failed to setup hotplug state"); + pr_err("Failed to setup hotplug state\n"); clk_disable_unprepare(clk); free_percpu_irq(nps_timer0_irq, &nps_clockevent_device); return ret; diff --git a/drivers/clocksource/timer-prima2.c b/drivers/clocksource/timer-prima2.c index bfa981ac1eaf..b4122ed1accb 100644 --- a/drivers/clocksource/timer-prima2.c +++ b/drivers/clocksource/timer-prima2.c @@ -196,20 +196,20 @@ static int __init sirfsoc_prima2_timer_init(struct device_node *np) clk = of_clk_get(np, 0); if (IS_ERR(clk)) { - pr_err("Failed to get clock"); + pr_err("Failed to get clock\n"); return PTR_ERR(clk); } ret = clk_prepare_enable(clk); if (ret) { - pr_err("Failed to enable clock"); + pr_err("Failed to enable clock\n"); return ret; } rate = clk_get_rate(clk); if (rate < PRIMA2_CLOCK_FREQ || rate % PRIMA2_CLOCK_FREQ) { - pr_err("Invalid clock rate"); + pr_err("Invalid clock rate\n"); return -EINVAL; } @@ -229,7 +229,7 @@ static int __init sirfsoc_prima2_timer_init(struct device_node *np) ret = clocksource_register_hz(&sirfsoc_clocksource, PRIMA2_CLOCK_FREQ); if (ret) { - pr_err("Failed to register clocksource"); + pr_err("Failed to register clocksource\n"); return ret; } @@ -237,7 +237,7 @@ static int __init sirfsoc_prima2_timer_init(struct device_node *np) ret = setup_irq(sirfsoc_timer_irq.irq, &sirfsoc_timer_irq); if (ret) { - pr_err("Failed to setup irq"); + pr_err("Failed to setup irq\n"); return ret; } diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c index d07863388e05..2d575a8c0939 100644 --- a/drivers/clocksource/timer-sp804.c +++ b/drivers/clocksource/timer-sp804.c @@ -299,13 +299,13 @@ static int __init integrator_cp_of_init(struct device_node *np) base = of_iomap(np, 0); if (!base) { - pr_err("Failed to iomap"); + pr_err("Failed to iomap\n"); return -ENXIO; } clk = of_clk_get(np, 0); if (IS_ERR(clk)) { - pr_err("Failed to get clock"); + pr_err("Failed to get clock\n"); return PTR_ERR(clk); } diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index a3e662b15964..2e9c830ae1cd 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -332,19 +332,19 @@ static int __init sun5i_timer_init(struct device_node *node) timer_base = of_io_request_and_map(node, 0, of_node_full_name(node)); if (IS_ERR(timer_base)) { - pr_err("Can't map registers"); + pr_err("Can't map registers\n"); return PTR_ERR(timer_base);; } irq = irq_of_parse_and_map(node, 0); if (irq <= 0) { - pr_err("Can't parse IRQ"); + pr_err("Can't parse IRQ\n"); return -EINVAL; } clk = of_clk_get(node, 0); if (IS_ERR(clk)) { - pr_err("Can't get timer clock"); + pr_err("Can't get timer clock\n"); return PTR_ERR(clk); } diff --git a/drivers/clocksource/vf_pit_timer.c b/drivers/clocksource/vf_pit_timer.c index 55d8d8402d90..e0849e20a307 100644 --- a/drivers/clocksource/vf_pit_timer.c +++ b/drivers/clocksource/vf_pit_timer.c @@ -165,7 +165,7 @@ static int __init pit_timer_init(struct device_node *np) timer_base = of_iomap(np, 0); if (!timer_base) { - pr_err("Failed to iomap"); + pr_err("Failed to iomap\n"); return -ENXIO; } diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 74fa5c5904d3..74ed7e9a7f27 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -247,6 +247,12 @@ config ARM_TEGRA124_CPUFREQ help This adds the CPUFreq driver support for Tegra124 SOCs. +config ARM_TEGRA186_CPUFREQ + tristate "Tegra186 CPUFreq support" + depends on ARCH_TEGRA && TEGRA_BPMP + help + This adds the CPUFreq driver support for Tegra186 SOCs. + config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" depends on ARCH_OMAP2PLUS diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 9f5a8045f36d..b7e78f063c4f 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o obj-$(CONFIG_ARM_STI_CPUFREQ) += sti-cpufreq.o obj-$(CONFIG_ARM_TEGRA20_CPUFREQ) += tegra20-cpufreq.o obj-$(CONFIG_ARM_TEGRA124_CPUFREQ) += tegra124-cpufreq.o +obj-$(CONFIG_ARM_TEGRA186_CPUFREQ) += tegra186-cpufreq.o obj-$(CONFIG_ARM_TI_CPUFREQ) += ti-cpufreq.o obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bc96d423781a..0e3f6496524d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2398,6 +2398,20 @@ EXPORT_SYMBOL_GPL(cpufreq_boost_enabled); *********************************************************************/ static enum cpuhp_state hp_online; +static int cpuhp_cpufreq_online(unsigned int cpu) +{ + cpufreq_online(cpu); + + return 0; +} + +static int cpuhp_cpufreq_offline(unsigned int cpu) +{ + cpufreq_offline(cpu); + + return 0; +} + /** * cpufreq_register_driver - register a CPU Frequency driver * @driver_data: A struct cpufreq_driver containing the values# @@ -2460,8 +2474,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) } ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "cpufreq:online", - cpufreq_online, - cpufreq_offline); + cpuhp_cpufreq_online, + cpuhp_cpufreq_offline); if (ret < 0) goto err_if_unreg; hp_online = ret; diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c index 5c3ec1dd4921..3575b82210ba 100644 --- a/drivers/cpufreq/dbx500-cpufreq.c +++ b/drivers/cpufreq/dbx500-cpufreq.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/cpufreq.h> +#include <linux/cpu_cooling.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/platform_device.h> @@ -18,6 +19,7 @@ static struct cpufreq_frequency_table *freq_table; static struct clk *armss_clk; +static struct thermal_cooling_device *cdev; static int dbx500_cpufreq_target(struct cpufreq_policy *policy, unsigned int index) @@ -32,6 +34,22 @@ static int dbx500_cpufreq_init(struct cpufreq_policy *policy) return cpufreq_generic_init(policy, freq_table, 20 * 1000); } +static int dbx500_cpufreq_exit(struct cpufreq_policy *policy) +{ + if (!IS_ERR(cdev)) + cpufreq_cooling_unregister(cdev); + return 0; +} + +static void dbx500_cpufreq_ready(struct cpufreq_policy *policy) +{ + cdev = cpufreq_cooling_register(policy->cpus); + if (IS_ERR(cdev)) + pr_err("Failed to register cooling device %ld\n", PTR_ERR(cdev)); + else + pr_info("Cooling device registered: %s\n", cdev->type); +} + static struct cpufreq_driver dbx500_cpufreq_driver = { .flags = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_INITIAL_FREQ_CHECK, @@ -39,6 +57,8 @@ static struct cpufreq_driver dbx500_cpufreq_driver = { .target_index = dbx500_cpufreq_target, .get = cpufreq_generic_get, .init = dbx500_cpufreq_init, + .exit = dbx500_cpufreq_exit, + .ready = dbx500_cpufreq_ready, .name = "DBX500", .attr = cpufreq_generic_attr, }; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 7719b02e04f5..9c13f097fd8c 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -161,8 +161,13 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) static int imx6q_cpufreq_init(struct cpufreq_policy *policy) { + int ret; + policy->clk = arm_clk; - return cpufreq_generic_init(policy, freq_table, transition_latency); + ret = cpufreq_generic_init(policy, freq_table, transition_latency); + policy->suspend_freq = policy->max; + + return ret; } static struct cpufreq_driver imx6q_cpufreq_driver = { @@ -173,6 +178,7 @@ static struct cpufreq_driver imx6q_cpufreq_driver = { .init = imx6q_cpufreq_init, .name = "imx6q-cpufreq", .attr = cpufreq_generic_attr, + .suspend = cpufreq_generic_suspend, }; static int imx6q_cpufreq_probe(struct platform_device *pdev) @@ -222,6 +228,13 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) arm_reg = regulator_get(cpu_dev, "arm"); pu_reg = regulator_get_optional(cpu_dev, "pu"); soc_reg = regulator_get(cpu_dev, "soc"); + if (PTR_ERR(arm_reg) == -EPROBE_DEFER || + PTR_ERR(soc_reg) == -EPROBE_DEFER || + PTR_ERR(pu_reg) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + dev_dbg(cpu_dev, "regulators not ready, defer\n"); + goto put_reg; + } if (IS_ERR(arm_reg) || IS_ERR(soc_reg)) { dev_err(cpu_dev, "failed to get regulators\n"); ret = -ENOENT; @@ -255,7 +268,7 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); - goto put_reg; + goto out_free_opp; } /* Make imx6_soc_volt array's size same as arm opp number */ diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 283491f742d3..b7de5bd76a31 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -37,7 +37,11 @@ #include <asm/cpufeature.h> #include <asm/intel-family.h> +#define INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) +#define INTEL_PSTATE_HWP_SAMPLING_INTERVAL (50 * NSEC_PER_MSEC) + #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000 +#define INTEL_CPUFREQ_TRANSITION_DELAY 500 #ifdef CONFIG_ACPI #include <acpi/processor.h> @@ -74,6 +78,11 @@ static inline int ceiling_fp(int32_t x) return ret; } +static inline int32_t percent_fp(int percent) +{ + return div_fp(percent, 100); +} + static inline u64 mul_ext_fp(u64 x, u64 y) { return (x * y) >> EXT_FRAC_BITS; @@ -186,45 +195,22 @@ struct _pid { }; /** - * struct perf_limits - Store user and policy limits - * @no_turbo: User requested turbo state from intel_pstate sysfs - * @turbo_disabled: Platform turbo status either from msr - * MSR_IA32_MISC_ENABLE or when maximum available pstate - * matches the maximum turbo pstate - * @max_perf_pct: Effective maximum performance limit in percentage, this - * is minimum of either limits enforced by cpufreq policy - * or limits from user set limits via intel_pstate sysfs - * @min_perf_pct: Effective minimum performance limit in percentage, this - * is maximum of either limits enforced by cpufreq policy - * or limits from user set limits via intel_pstate sysfs - * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct - * This value is used to limit max pstate - * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct - * This value is used to limit min pstate - * @max_policy_pct: The maximum performance in percentage enforced by - * cpufreq setpolicy interface - * @max_sysfs_pct: The maximum performance in percentage enforced by - * intel pstate sysfs interface, unused when per cpu - * controls are enforced - * @min_policy_pct: The minimum performance in percentage enforced by - * cpufreq setpolicy interface - * @min_sysfs_pct: The minimum performance in percentage enforced by - * intel pstate sysfs interface, unused when per cpu - * controls are enforced - * - * Storage for user and policy defined limits. + * struct global_params - Global parameters, mostly tunable via sysfs. + * @no_turbo: Whether or not to use turbo P-states. + * @turbo_disabled: Whethet or not turbo P-states are available at all, + * based on the MSR_IA32_MISC_ENABLE value and whether or + * not the maximum reported turbo P-state is different from + * the maximum reported non-turbo one. + * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo + * P-state capacity. + * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo + * P-state capacity. */ -struct perf_limits { - int no_turbo; - int turbo_disabled; +struct global_params { + bool no_turbo; + bool turbo_disabled; int max_perf_pct; int min_perf_pct; - int32_t max_perf; - int32_t min_perf; - int max_policy_pct; - int max_sysfs_pct; - int min_policy_pct; - int min_sysfs_pct; }; /** @@ -245,9 +231,10 @@ struct perf_limits { * @prev_cummulative_iowait: IO Wait time difference from last and * current sample * @sample: Storage for storing last Sample data - * @perf_limits: Pointer to perf_limit unique to this CPU - * Not all field in the structure are applicable - * when per cpu controls are enforced + * @min_perf: Minimum capacity limit as a fraction of the maximum + * turbo P-state capacity. + * @max_perf: Maximum capacity limit as a fraction of the maximum + * turbo P-state capacity. * @acpi_perf_data: Stores ACPI perf information read from _PSS * @valid_pss_table: Set to true for valid ACPI _PSS entries found * @epp_powersave: Last saved HWP energy performance preference @@ -279,7 +266,8 @@ struct cpudata { u64 prev_tsc; u64 prev_cummulative_iowait; struct sample sample; - struct perf_limits *perf_limits; + int32_t min_perf; + int32_t max_perf; #ifdef CONFIG_ACPI struct acpi_processor_performance acpi_perf_data; bool valid_pss_table; @@ -324,7 +312,7 @@ struct pstate_adjust_policy { * @get_scaling: Callback to get frequency scaling factor * @get_val: Callback to convert P state to actual MSR write value * @get_vid: Callback to get VID data for Atom platforms - * @get_target_pstate: Callback to a function to calculate next P state to use + * @update_util: Active mode utilization update callback. * * Core and Atom CPU models have different way to get P State limits. This * structure is used to store those callbacks. @@ -337,43 +325,31 @@ struct pstate_funcs { int (*get_scaling)(void); u64 (*get_val)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *); - int32_t (*get_target_pstate)(struct cpudata *); + void (*update_util)(struct update_util_data *data, u64 time, + unsigned int flags); }; -/** - * struct cpu_defaults- Per CPU model default config data - * @pid_policy: PID config data - * @funcs: Callback function data - */ -struct cpu_defaults { - struct pstate_adjust_policy pid_policy; - struct pstate_funcs funcs; +static struct pstate_funcs pstate_funcs __read_mostly; +static struct pstate_adjust_policy pid_params __read_mostly = { + .sample_rate_ms = 10, + .sample_rate_ns = 10 * NSEC_PER_MSEC, + .deadband = 0, + .setpoint = 97, + .p_gain_pct = 20, + .d_gain_pct = 0, + .i_gain_pct = 0, }; -static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu); -static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); - -static struct pstate_adjust_policy pid_params __read_mostly; -static struct pstate_funcs pstate_funcs __read_mostly; static int hwp_active __read_mostly; static bool per_cpu_limits __read_mostly; -static bool driver_registered __read_mostly; +static struct cpufreq_driver *intel_pstate_driver __read_mostly; #ifdef CONFIG_ACPI static bool acpi_ppc; #endif -static struct perf_limits global; - -static void intel_pstate_init_limits(struct perf_limits *limits) -{ - memset(limits, 0, sizeof(*limits)); - limits->max_perf_pct = 100; - limits->max_perf = int_ext_tofp(1); - limits->max_policy_pct = 100; - limits->max_sysfs_pct = 100; -} +static struct global_params global; static DEFINE_MUTEX(intel_pstate_driver_lock); static DEFINE_MUTEX(intel_pstate_limits_lock); @@ -530,29 +506,6 @@ static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) } #endif -static inline void pid_reset(struct _pid *pid, int setpoint, int busy, - int deadband, int integral) { - pid->setpoint = int_tofp(setpoint); - pid->deadband = int_tofp(deadband); - pid->integral = int_tofp(integral); - pid->last_err = int_tofp(setpoint) - int_tofp(busy); -} - -static inline void pid_p_gain_set(struct _pid *pid, int percent) -{ - pid->p_gain = div_fp(percent, 100); -} - -static inline void pid_i_gain_set(struct _pid *pid, int percent) -{ - pid->i_gain = div_fp(percent, 100); -} - -static inline void pid_d_gain_set(struct _pid *pid, int percent) -{ - pid->d_gain = div_fp(percent, 100); -} - static signed int pid_calc(struct _pid *pid, int32_t busy) { signed int result; @@ -590,23 +543,17 @@ static signed int pid_calc(struct _pid *pid, int32_t busy) return (signed int)fp_toint(result); } -static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) -{ - pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct); - pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); - pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); - - pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0); -} - -static inline void intel_pstate_reset_all_pid(void) +static inline void intel_pstate_pid_reset(struct cpudata *cpu) { - unsigned int cpu; + struct _pid *pid = &cpu->pid; - for_each_online_cpu(cpu) { - if (all_cpu_data[cpu]) - intel_pstate_busy_pid_reset(all_cpu_data[cpu]); - } + pid->p_gain = percent_fp(pid_params.p_gain_pct); + pid->d_gain = percent_fp(pid_params.d_gain_pct); + pid->i_gain = percent_fp(pid_params.i_gain_pct); + pid->setpoint = int_tofp(pid_params.setpoint); + pid->last_err = pid->setpoint - int_tofp(100); + pid->deadband = int_tofp(pid_params.deadband); + pid->integral = 0; } static inline void update_turbo_state(void) @@ -621,6 +568,14 @@ static inline void update_turbo_state(void) cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); } +static int min_perf_pct_min(void) +{ + struct cpudata *cpu = all_cpu_data[0]; + + return DIV_ROUND_UP(cpu->pstate.min_pstate * 100, + cpu->pstate.turbo_pstate); +} + static s16 intel_pstate_get_epb(struct cpudata *cpu_data) { u64 epb; @@ -838,96 +793,80 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; -static void intel_pstate_hwp_set(struct cpufreq_policy *policy) +static void intel_pstate_hwp_set(unsigned int cpu) { - int min, hw_min, max, hw_max, cpu; - struct perf_limits *perf_limits = &global; + struct cpudata *cpu_data = all_cpu_data[cpu]; + int min, hw_min, max, hw_max; u64 value, cap; + s16 epp; - for_each_cpu(cpu, policy->cpus) { - struct cpudata *cpu_data = all_cpu_data[cpu]; - s16 epp; - - if (per_cpu_limits) - perf_limits = all_cpu_data[cpu]->perf_limits; - - rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); - hw_min = HWP_LOWEST_PERF(cap); - if (global.no_turbo) - hw_max = HWP_GUARANTEED_PERF(cap); - else - hw_max = HWP_HIGHEST_PERF(cap); - - max = fp_ext_toint(hw_max * perf_limits->max_perf); - if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) - min = max; - else - min = fp_ext_toint(hw_max * perf_limits->min_perf); + rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); + hw_min = HWP_LOWEST_PERF(cap); + if (global.no_turbo) + hw_max = HWP_GUARANTEED_PERF(cap); + else + hw_max = HWP_HIGHEST_PERF(cap); - rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); + max = fp_ext_toint(hw_max * cpu_data->max_perf); + if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) + min = max; + else + min = fp_ext_toint(hw_max * cpu_data->min_perf); - value &= ~HWP_MIN_PERF(~0L); - value |= HWP_MIN_PERF(min); + rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); - value &= ~HWP_MAX_PERF(~0L); - value |= HWP_MAX_PERF(max); + value &= ~HWP_MIN_PERF(~0L); + value |= HWP_MIN_PERF(min); - if (cpu_data->epp_policy == cpu_data->policy) - goto skip_epp; + value &= ~HWP_MAX_PERF(~0L); + value |= HWP_MAX_PERF(max); - cpu_data->epp_policy = cpu_data->policy; + if (cpu_data->epp_policy == cpu_data->policy) + goto skip_epp; - if (cpu_data->epp_saved >= 0) { - epp = cpu_data->epp_saved; - cpu_data->epp_saved = -EINVAL; - goto update_epp; - } + cpu_data->epp_policy = cpu_data->policy; - if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) { - epp = intel_pstate_get_epp(cpu_data, value); - cpu_data->epp_powersave = epp; - /* If EPP read was failed, then don't try to write */ - if (epp < 0) - goto skip_epp; + if (cpu_data->epp_saved >= 0) { + epp = cpu_data->epp_saved; + cpu_data->epp_saved = -EINVAL; + goto update_epp; + } + if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) { + epp = intel_pstate_get_epp(cpu_data, value); + cpu_data->epp_powersave = epp; + /* If EPP read was failed, then don't try to write */ + if (epp < 0) + goto skip_epp; - epp = 0; - } else { - /* skip setting EPP, when saved value is invalid */ - if (cpu_data->epp_powersave < 0) - goto skip_epp; + epp = 0; + } else { + /* skip setting EPP, when saved value is invalid */ + if (cpu_data->epp_powersave < 0) + goto skip_epp; - /* - * No need to restore EPP when it is not zero. This - * means: - * - Policy is not changed - * - user has manually changed - * - Error reading EPB - */ - epp = intel_pstate_get_epp(cpu_data, value); - if (epp) - goto skip_epp; + /* + * No need to restore EPP when it is not zero. This + * means: + * - Policy is not changed + * - user has manually changed + * - Error reading EPB + */ + epp = intel_pstate_get_epp(cpu_data, value); + if (epp) + goto skip_epp; - epp = cpu_data->epp_powersave; - } + epp = cpu_data->epp_powersave; + } update_epp: - if (static_cpu_has(X86_FEATURE_HWP_EPP)) { - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; - } else { - intel_pstate_set_epb(cpu, epp); - } -skip_epp: - wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); + if (static_cpu_has(X86_FEATURE_HWP_EPP)) { + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + } else { + intel_pstate_set_epb(cpu, epp); } -} - -static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy) -{ - if (hwp_active) - intel_pstate_hwp_set(policy); - - return 0; +skip_epp: + wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) @@ -944,20 +883,17 @@ static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy) static int intel_pstate_resume(struct cpufreq_policy *policy) { - int ret; - if (!hwp_active) return 0; mutex_lock(&intel_pstate_limits_lock); all_cpu_data[policy->cpu]->epp_policy = 0; - - ret = intel_pstate_hwp_set_policy(policy); + intel_pstate_hwp_set(policy->cpu); mutex_unlock(&intel_pstate_limits_lock); - return ret; + return 0; } static void intel_pstate_update_policies(void) @@ -971,9 +907,14 @@ static void intel_pstate_update_policies(void) /************************** debugfs begin ************************/ static int pid_param_set(void *data, u64 val) { + unsigned int cpu; + *(u32 *)data = val; pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; - intel_pstate_reset_all_pid(); + for_each_possible_cpu(cpu) + if (all_cpu_data[cpu]) + intel_pstate_pid_reset(all_cpu_data[cpu]); + return 0; } @@ -1084,7 +1025,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1109,7 +1050,7 @@ static ssize_t show_num_pstates(struct kobject *kobj, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1129,7 +1070,7 @@ static ssize_t show_no_turbo(struct kobject *kobj, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1157,7 +1098,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } @@ -1174,6 +1115,15 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, global.no_turbo = clamp_t(int, input, 0, 1); + if (global.no_turbo) { + struct cpudata *cpu = all_cpu_data[0]; + int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate; + + /* Squash the global minimum into the permitted range. */ + if (global.min_perf_pct > pct) + global.min_perf_pct = pct; + } + mutex_unlock(&intel_pstate_limits_lock); intel_pstate_update_policies(); @@ -1195,18 +1145,14 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } mutex_lock(&intel_pstate_limits_lock); - global.max_sysfs_pct = clamp_t(int, input, 0 , 100); - global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct); - global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct); - global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct); - global.max_perf = percent_ext_fp(global.max_perf_pct); + global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100); mutex_unlock(&intel_pstate_limits_lock); @@ -1229,18 +1175,15 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_driver_lock); - if (!driver_registered) { + if (!intel_pstate_driver) { mutex_unlock(&intel_pstate_driver_lock); return -EAGAIN; } mutex_lock(&intel_pstate_limits_lock); - global.min_sysfs_pct = clamp_t(int, input, 0 , 100); - global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct); - global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct); - global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct); - global.min_perf = percent_ext_fp(global.min_perf_pct); + global.min_perf_pct = clamp_t(int, input, + min_perf_pct_min(), global.max_perf_pct); mutex_unlock(&intel_pstate_limits_lock); @@ -1554,132 +1497,10 @@ static int knl_get_turbo_pstate(void) return ret; } -static struct cpu_defaults core_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 97, - .p_gain_pct = 20, - .d_gain_pct = 0, - .i_gain_pct = 0, - }, - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = core_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .get_target_pstate = get_target_pstate_use_performance, - }, -}; - -static const struct cpu_defaults silvermont_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 60, - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, - }, - .funcs = { - .get_max = atom_get_max_pstate, - .get_max_physical = atom_get_max_pstate, - .get_min = atom_get_min_pstate, - .get_turbo = atom_get_turbo_pstate, - .get_val = atom_get_val, - .get_scaling = silvermont_get_scaling, - .get_vid = atom_get_vid, - .get_target_pstate = get_target_pstate_use_cpu_load, - }, -}; - -static const struct cpu_defaults airmont_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 60, - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, - }, - .funcs = { - .get_max = atom_get_max_pstate, - .get_max_physical = atom_get_max_pstate, - .get_min = atom_get_min_pstate, - .get_turbo = atom_get_turbo_pstate, - .get_val = atom_get_val, - .get_scaling = airmont_get_scaling, - .get_vid = atom_get_vid, - .get_target_pstate = get_target_pstate_use_cpu_load, - }, -}; - -static const struct cpu_defaults knl_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 97, - .p_gain_pct = 20, - .d_gain_pct = 0, - .i_gain_pct = 0, - }, - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = knl_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .get_target_pstate = get_target_pstate_use_performance, - }, -}; - -static const struct cpu_defaults bxt_params = { - .pid_policy = { - .sample_rate_ms = 10, - .deadband = 0, - .setpoint = 60, - .p_gain_pct = 14, - .d_gain_pct = 0, - .i_gain_pct = 4, - }, - .funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = core_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, - .get_target_pstate = get_target_pstate_use_cpu_load, - }, -}; - -static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) +static int intel_pstate_get_base_pstate(struct cpudata *cpu) { - int max_perf = cpu->pstate.turbo_pstate; - int max_perf_adj; - int min_perf; - struct perf_limits *perf_limits = &global; - - if (global.no_turbo || global.turbo_disabled) - max_perf = cpu->pstate.max_pstate; - - if (per_cpu_limits) - perf_limits = cpu->perf_limits; - - /* - * performance can be limited by user through sysfs, by cpufreq - * policy, or by cpu specific default values determined through - * experimentation. - */ - max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf); - *max = clamp_t(int, max_perf_adj, - cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); - - min_perf = fp_ext_toint(max_perf * perf_limits->min_perf); - *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); + return global.no_turbo || global.turbo_disabled ? + cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; } static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) @@ -1702,11 +1523,13 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu) static void intel_pstate_max_within_limits(struct cpudata *cpu) { - int min_pstate, max_pstate; + int pstate; update_turbo_state(); - intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate); - intel_pstate_set_pstate(cpu, max_pstate); + pstate = intel_pstate_get_base_pstate(cpu); + pstate = max(cpu->pstate.min_pstate, + fp_ext_toint(pstate * cpu->max_perf)); + intel_pstate_set_pstate(cpu, pstate); } static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) @@ -1767,7 +1590,11 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) * that sample.time will always be reset before setting the utilization * update hook and make the caller skip the sample then. */ - return !!cpu->last_sample_time; + if (cpu->last_sample_time) { + intel_pstate_calc_avg_perf(cpu); + return true; + } + return false; } static inline int32_t get_avg_frequency(struct cpudata *cpu) @@ -1788,6 +1615,9 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) int32_t busy_frac, boost; int target, avg_pstate; + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) + return cpu->pstate.turbo_pstate; + busy_frac = div_fp(sample->mperf, sample->tsc); boost = cpu->iowait_boost; @@ -1824,6 +1654,9 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) int32_t perf_scaled, max_pstate, current_pstate, sample_ratio; u64 duration_ns; + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) + return cpu->pstate.turbo_pstate; + /* * perf_scaled is the ratio of the average P-state during the last * sampling period to the P-state requested last time (in percent). @@ -1858,11 +1691,13 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) { - int max_perf, min_perf; + int max_pstate = intel_pstate_get_base_pstate(cpu); + int min_pstate; - intel_pstate_get_min_max(cpu, &min_perf, &max_perf); - pstate = clamp_t(int, pstate, min_perf, max_perf); - return pstate; + min_pstate = max(cpu->pstate.min_pstate, + fp_ext_toint(max_pstate * cpu->min_perf)); + max_pstate = max(min_pstate, fp_ext_toint(max_pstate * cpu->max_perf)); + return clamp_t(int, pstate, min_pstate, max_pstate); } static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) @@ -1874,16 +1709,11 @@ static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); } -static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) +static void intel_pstate_adjust_pstate(struct cpudata *cpu, int target_pstate) { - int from, target_pstate; + int from = cpu->pstate.current_pstate; struct sample *sample; - from = cpu->pstate.current_pstate; - - target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ? - cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu); - update_turbo_state(); target_pstate = intel_pstate_prepare_request(cpu, target_pstate); @@ -1902,76 +1732,155 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) fp_toint(cpu->iowait_boost * 100)); } +static void intel_pstate_update_util_hwp(struct update_util_data *data, + u64 time, unsigned int flags) +{ + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + u64 delta_ns = time - cpu->sample.time; + + if ((s64)delta_ns >= INTEL_PSTATE_HWP_SAMPLING_INTERVAL) + intel_pstate_sample(cpu, time); +} + +static void intel_pstate_update_util_pid(struct update_util_data *data, + u64 time, unsigned int flags) +{ + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + u64 delta_ns = time - cpu->sample.time; + + if ((s64)delta_ns < pid_params.sample_rate_ns) + return; + + if (intel_pstate_sample(cpu, time)) { + int target_pstate; + + target_pstate = get_target_pstate_use_performance(cpu); + intel_pstate_adjust_pstate(cpu, target_pstate); + } +} + static void intel_pstate_update_util(struct update_util_data *data, u64 time, unsigned int flags) { struct cpudata *cpu = container_of(data, struct cpudata, update_util); u64 delta_ns; - if (pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) { - if (flags & SCHED_CPUFREQ_IOWAIT) { - cpu->iowait_boost = int_tofp(1); - } else if (cpu->iowait_boost) { - /* Clear iowait_boost if the CPU may have been idle. */ - delta_ns = time - cpu->last_update; - if (delta_ns > TICK_NSEC) - cpu->iowait_boost = 0; - } - cpu->last_update = time; + if (flags & SCHED_CPUFREQ_IOWAIT) { + cpu->iowait_boost = int_tofp(1); + } else if (cpu->iowait_boost) { + /* Clear iowait_boost if the CPU may have been idle. */ + delta_ns = time - cpu->last_update; + if (delta_ns > TICK_NSEC) + cpu->iowait_boost = 0; } - + cpu->last_update = time; delta_ns = time - cpu->sample.time; - if ((s64)delta_ns >= pid_params.sample_rate_ns) { - bool sample_taken = intel_pstate_sample(cpu, time); + if ((s64)delta_ns < INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL) + return; - if (sample_taken) { - intel_pstate_calc_avg_perf(cpu); - if (!hwp_active) - intel_pstate_adjust_busy_pstate(cpu); - } + if (intel_pstate_sample(cpu, time)) { + int target_pstate; + + target_pstate = get_target_pstate_use_cpu_load(cpu); + intel_pstate_adjust_pstate(cpu, target_pstate); } } +static struct pstate_funcs core_funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util_pid, +}; + +static const struct pstate_funcs silvermont_funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .get_val = atom_get_val, + .get_scaling = silvermont_get_scaling, + .get_vid = atom_get_vid, + .update_util = intel_pstate_update_util, +}; + +static const struct pstate_funcs airmont_funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .get_val = atom_get_val, + .get_scaling = airmont_get_scaling, + .get_vid = atom_get_vid, + .update_util = intel_pstate_update_util, +}; + +static const struct pstate_funcs knl_funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = knl_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util_pid, +}; + +static const struct pstate_funcs bxt_funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .update_util = intel_pstate_update_util, +}; + #define ICPU(model, policy) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ (unsigned long)&policy } static const struct x86_cpu_id intel_pstate_cpu_ids[] = { - ICPU(INTEL_FAM6_SANDYBRIDGE, core_params), - ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_params), - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_params), - ICPU(INTEL_FAM6_IVYBRIDGE, core_params), - ICPU(INTEL_FAM6_HASWELL_CORE, core_params), - ICPU(INTEL_FAM6_BROADWELL_CORE, core_params), - ICPU(INTEL_FAM6_IVYBRIDGE_X, core_params), - ICPU(INTEL_FAM6_HASWELL_X, core_params), - ICPU(INTEL_FAM6_HASWELL_ULT, core_params), - ICPU(INTEL_FAM6_HASWELL_GT3E, core_params), - ICPU(INTEL_FAM6_BROADWELL_GT3E, core_params), - ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_params), - ICPU(INTEL_FAM6_SKYLAKE_MOBILE, core_params), - ICPU(INTEL_FAM6_BROADWELL_X, core_params), - ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params), - ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), - ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params), - ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_params), - ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params), + ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs), + ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs), + ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_funcs), + ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs), + ICPU(INTEL_FAM6_HASWELL_CORE, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_CORE, core_funcs), + ICPU(INTEL_FAM6_IVYBRIDGE_X, core_funcs), + ICPU(INTEL_FAM6_HASWELL_X, core_funcs), + ICPU(INTEL_FAM6_HASWELL_ULT, core_funcs), + ICPU(INTEL_FAM6_HASWELL_GT3E, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_GT3E, core_funcs), + ICPU(INTEL_FAM6_ATOM_AIRMONT, airmont_funcs), + ICPU(INTEL_FAM6_SKYLAKE_MOBILE, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), + ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs), + ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs), + ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs), + ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_funcs), + ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, bxt_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { - ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), - ICPU(INTEL_FAM6_BROADWELL_X, core_params), - ICPU(INTEL_FAM6_SKYLAKE_X, core_params), + ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs), + ICPU(INTEL_FAM6_BROADWELL_X, core_funcs), + ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), {} }; static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { - ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_params), + ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_funcs), {} }; +static bool pid_in_use(void); + static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; @@ -1979,18 +1888,11 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu = all_cpu_data[cpunum]; if (!cpu) { - unsigned int size = sizeof(struct cpudata); - - if (per_cpu_limits) - size += sizeof(struct perf_limits); - - cpu = kzalloc(size, GFP_KERNEL); + cpu = kzalloc(sizeof(*cpu), GFP_KERNEL); if (!cpu) return -ENOMEM; all_cpu_data[cpunum] = cpu; - if (per_cpu_limits) - cpu->perf_limits = (struct perf_limits *)(cpu + 1); cpu->epp_default = -EINVAL; cpu->epp_powersave = -EINVAL; @@ -2009,14 +1911,12 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_disable_ee(cpunum); intel_pstate_hwp_enable(cpu); - pid_params.sample_rate_ms = 50; - pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; + } else if (pid_in_use()) { + intel_pstate_pid_reset(cpu); } intel_pstate_get_cpu_pstates(cpu); - intel_pstate_busy_pid_reset(cpu); - pr_debug("controlling: cpu %d\n", cpunum); return 0; @@ -2039,7 +1939,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, - intel_pstate_update_util); + pstate_funcs.update_util); cpu->update_util_set = true; } @@ -2055,46 +1955,68 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu) synchronize_sched(); } +static int intel_pstate_get_max_freq(struct cpudata *cpu) +{ + return global.turbo_disabled || global.no_turbo ? + cpu->pstate.max_freq : cpu->pstate.turbo_freq; +} + static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, - struct perf_limits *limits) + struct cpudata *cpu) { + int max_freq = intel_pstate_get_max_freq(cpu); int32_t max_policy_perf, min_policy_perf; - max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq); + max_policy_perf = div_ext_fp(policy->max, max_freq); max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1)); if (policy->max == policy->min) { min_policy_perf = max_policy_perf; } else { - min_policy_perf = div_ext_fp(policy->min, - policy->cpuinfo.max_freq); + min_policy_perf = div_ext_fp(policy->min, max_freq); min_policy_perf = clamp_t(int32_t, min_policy_perf, 0, max_policy_perf); } /* Normalize user input to [min_perf, max_perf] */ - limits->min_perf = max(min_policy_perf, - percent_ext_fp(limits->min_sysfs_pct)); - limits->min_perf = min(limits->min_perf, max_policy_perf); - limits->max_perf = min(max_policy_perf, - percent_ext_fp(limits->max_sysfs_pct)); - limits->max_perf = max(min_policy_perf, limits->max_perf); + if (per_cpu_limits) { + cpu->min_perf = min_policy_perf; + cpu->max_perf = max_policy_perf; + } else { + int32_t global_min, global_max; + + /* Global limits are in percent of the maximum turbo P-state. */ + global_max = percent_ext_fp(global.max_perf_pct); + global_min = percent_ext_fp(global.min_perf_pct); + if (max_freq != cpu->pstate.turbo_freq) { + int32_t turbo_factor; + + turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate, + cpu->pstate.max_pstate); + global_min = mul_ext_fp(global_min, turbo_factor); + global_max = mul_ext_fp(global_max, turbo_factor); + } + global_min = clamp_t(int32_t, global_min, 0, global_max); + + cpu->min_perf = max(min_policy_perf, global_min); + cpu->min_perf = min(cpu->min_perf, max_policy_perf); + cpu->max_perf = min(max_policy_perf, global_max); + cpu->max_perf = max(min_policy_perf, cpu->max_perf); - /* Make sure min_perf <= max_perf */ - limits->min_perf = min(limits->min_perf, limits->max_perf); + /* Make sure min_perf <= max_perf */ + cpu->min_perf = min(cpu->min_perf, cpu->max_perf); + } - limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS); - limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS); - limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100); - limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100); + cpu->max_perf = round_up(cpu->max_perf, EXT_FRAC_BITS); + cpu->min_perf = round_up(cpu->min_perf, EXT_FRAC_BITS); pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, - limits->max_perf_pct, limits->min_perf_pct); + fp_ext_toint(cpu->max_perf * 100), + fp_ext_toint(cpu->min_perf * 100)); } static int intel_pstate_set_policy(struct cpufreq_policy *policy) { struct cpudata *cpu; - struct perf_limits *perf_limits = &global; if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -2105,19 +2027,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; cpu->policy = policy->policy; - if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && - policy->max < policy->cpuinfo.max_freq && - policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { - pr_debug("policy->max > max non turbo frequency\n"); - policy->max = policy->cpuinfo.max_freq; - } - - if (per_cpu_limits) - perf_limits = cpu->perf_limits; - mutex_lock(&intel_pstate_limits_lock); - intel_pstate_update_perf_limits(policy, perf_limits); + intel_pstate_update_perf_limits(policy, cpu); if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { /* @@ -2130,38 +2042,38 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_set_update_util_hook(policy->cpu); - intel_pstate_hwp_set_policy(policy); + if (hwp_active) + intel_pstate_hwp_set(policy->cpu); mutex_unlock(&intel_pstate_limits_lock); return 0; } +static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy, + struct cpudata *cpu) +{ + if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && + policy->max < policy->cpuinfo.max_freq && + policy->max > cpu->pstate.max_freq) { + pr_debug("policy->max > max non turbo frequency\n"); + policy->max = policy->cpuinfo.max_freq; + } +} + static int intel_pstate_verify_policy(struct cpufreq_policy *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; update_turbo_state(); - policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ? - cpu->pstate.max_freq : - cpu->pstate.turbo_freq; - - cpufreq_verify_within_cpu_limits(policy); + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + intel_pstate_get_max_freq(cpu)); if (policy->policy != CPUFREQ_POLICY_POWERSAVE && policy->policy != CPUFREQ_POLICY_PERFORMANCE) return -EINVAL; - /* When per-CPU limits are used, sysfs limits are not used */ - if (!per_cpu_limits) { - unsigned int max_freq, min_freq; - - max_freq = policy->cpuinfo.max_freq * - global.max_sysfs_pct / 100; - min_freq = policy->cpuinfo.max_freq * - global.min_sysfs_pct / 100; - cpufreq_verify_within_limits(policy, min_freq, max_freq); - } + intel_pstate_adjust_policy_max(policy, cpu); return 0; } @@ -2202,8 +2114,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; - if (per_cpu_limits) - intel_pstate_init_limits(cpu->perf_limits); + cpu->max_perf = int_ext_tofp(1); + cpu->min_perf = 0; policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; @@ -2257,10 +2169,12 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) struct cpudata *cpu = all_cpu_data[policy->cpu]; update_turbo_state(); - policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ? - cpu->pstate.max_freq : cpu->pstate.turbo_freq; + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + intel_pstate_get_max_freq(cpu)); - cpufreq_verify_within_cpu_limits(policy); + intel_pstate_adjust_policy_max(policy, cpu); + + intel_pstate_update_perf_limits(policy, cpu); return 0; } @@ -2324,6 +2238,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) return ret; policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY; + policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY; /* This reflects the intel_pstate_get_cpu_pstates() setting. */ policy->cur = policy->cpuinfo.min_freq; @@ -2341,7 +2256,13 @@ static struct cpufreq_driver intel_cpufreq = { .name = "intel_cpufreq", }; -static struct cpufreq_driver *intel_pstate_driver = &intel_pstate; +static struct cpufreq_driver *default_driver = &intel_pstate; + +static bool pid_in_use(void) +{ + return intel_pstate_driver == &intel_pstate && + pstate_funcs.update_util == intel_pstate_update_util_pid; +} static void intel_pstate_driver_cleanup(void) { @@ -2358,26 +2279,26 @@ static void intel_pstate_driver_cleanup(void) } } put_online_cpus(); + intel_pstate_driver = NULL; } -static int intel_pstate_register_driver(void) +static int intel_pstate_register_driver(struct cpufreq_driver *driver) { int ret; - intel_pstate_init_limits(&global); + memset(&global, 0, sizeof(global)); + global.max_perf_pct = 100; + intel_pstate_driver = driver; ret = cpufreq_register_driver(intel_pstate_driver); if (ret) { intel_pstate_driver_cleanup(); return ret; } - mutex_lock(&intel_pstate_limits_lock); - driver_registered = true; - mutex_unlock(&intel_pstate_limits_lock); + global.min_perf_pct = min_perf_pct_min(); - if (intel_pstate_driver == &intel_pstate && !hwp_active && - pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) + if (pid_in_use()) intel_pstate_debug_expose_params(); return 0; @@ -2388,14 +2309,9 @@ static int intel_pstate_unregister_driver(void) if (hwp_active) return -EBUSY; - if (intel_pstate_driver == &intel_pstate && !hwp_active && - pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load) + if (pid_in_use()) intel_pstate_debug_hide_params(); - mutex_lock(&intel_pstate_limits_lock); - driver_registered = false; - mutex_unlock(&intel_pstate_limits_lock); - cpufreq_unregister_driver(intel_pstate_driver); intel_pstate_driver_cleanup(); @@ -2404,7 +2320,7 @@ static int intel_pstate_unregister_driver(void) static ssize_t intel_pstate_show_status(char *buf) { - if (!driver_registered) + if (!intel_pstate_driver) return sprintf(buf, "off\n"); return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ? @@ -2416,11 +2332,11 @@ static int intel_pstate_update_status(const char *buf, size_t size) int ret; if (size == 3 && !strncmp(buf, "off", size)) - return driver_registered ? + return intel_pstate_driver ? intel_pstate_unregister_driver() : -EINVAL; if (size == 6 && !strncmp(buf, "active", size)) { - if (driver_registered) { + if (intel_pstate_driver) { if (intel_pstate_driver == &intel_pstate) return 0; @@ -2429,13 +2345,12 @@ static int intel_pstate_update_status(const char *buf, size_t size) return ret; } - intel_pstate_driver = &intel_pstate; - return intel_pstate_register_driver(); + return intel_pstate_register_driver(&intel_pstate); } if (size == 7 && !strncmp(buf, "passive", size)) { - if (driver_registered) { - if (intel_pstate_driver != &intel_pstate) + if (intel_pstate_driver) { + if (intel_pstate_driver == &intel_cpufreq) return 0; ret = intel_pstate_unregister_driver(); @@ -2443,8 +2358,7 @@ static int intel_pstate_update_status(const char *buf, size_t size) return ret; } - intel_pstate_driver = &intel_cpufreq; - return intel_pstate_register_driver(); + return intel_pstate_register_driver(&intel_cpufreq); } return -EINVAL; @@ -2465,23 +2379,17 @@ static int __init intel_pstate_msrs_not_valid(void) return 0; } -static void __init copy_pid_params(struct pstate_adjust_policy *policy) -{ - pid_params.sample_rate_ms = policy->sample_rate_ms; - pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; - pid_params.p_gain_pct = policy->p_gain_pct; - pid_params.i_gain_pct = policy->i_gain_pct; - pid_params.d_gain_pct = policy->d_gain_pct; - pid_params.deadband = policy->deadband; - pid_params.setpoint = policy->setpoint; -} - #ifdef CONFIG_ACPI static void intel_pstate_use_acpi_profile(void) { - if (acpi_gbl_FADT.preferred_profile == PM_MOBILE) - pstate_funcs.get_target_pstate = - get_target_pstate_use_cpu_load; + switch (acpi_gbl_FADT.preferred_profile) { + case PM_MOBILE: + case PM_TABLET: + case PM_APPLIANCE_PC: + case PM_DESKTOP: + case PM_WORKSTATION: + pstate_funcs.update_util = intel_pstate_update_util; + } } #else static void intel_pstate_use_acpi_profile(void) @@ -2498,7 +2406,7 @@ static void __init copy_cpu_funcs(struct pstate_funcs *funcs) pstate_funcs.get_scaling = funcs->get_scaling; pstate_funcs.get_val = funcs->get_val; pstate_funcs.get_vid = funcs->get_vid; - pstate_funcs.get_target_pstate = funcs->get_target_pstate; + pstate_funcs.update_util = funcs->update_util; intel_pstate_use_acpi_profile(); } @@ -2637,28 +2545,30 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = { static int __init intel_pstate_init(void) { - const struct x86_cpu_id *id; - struct cpu_defaults *cpu_def; - int rc = 0; + int rc; if (no_load) return -ENODEV; - if (x86_match_cpu(hwp_support_ids) && !no_hwp) { - copy_cpu_funcs(&core_params.funcs); - hwp_active++; - intel_pstate.attr = hwp_cpufreq_attrs; - goto hwp_cpu_matched; - } - - id = x86_match_cpu(intel_pstate_cpu_ids); - if (!id) - return -ENODEV; + if (x86_match_cpu(hwp_support_ids)) { + copy_cpu_funcs(&core_funcs); + if (no_hwp) { + pstate_funcs.update_util = intel_pstate_update_util; + } else { + hwp_active++; + intel_pstate.attr = hwp_cpufreq_attrs; + pstate_funcs.update_util = intel_pstate_update_util_hwp; + goto hwp_cpu_matched; + } + } else { + const struct x86_cpu_id *id; - cpu_def = (struct cpu_defaults *)id->driver_data; + id = x86_match_cpu(intel_pstate_cpu_ids); + if (!id) + return -ENODEV; - copy_pid_params(&cpu_def->pid_policy); - copy_cpu_funcs(&cpu_def->funcs); + copy_cpu_funcs((struct pstate_funcs *)id->driver_data); + } if (intel_pstate_msrs_not_valid()) return -ENODEV; @@ -2685,7 +2595,7 @@ hwp_cpu_matched: intel_pstate_sysfs_expose_params(); mutex_lock(&intel_pstate_driver_lock); - rc = intel_pstate_register_driver(); + rc = intel_pstate_register_driver(default_driver); mutex_unlock(&intel_pstate_driver_lock); if (rc) return rc; @@ -2706,7 +2616,7 @@ static int __init intel_pstate_setup(char *str) no_load = 1; } else if (!strcmp(str, "passive")) { pr_info("Passive mode enabled\n"); - intel_pstate_driver = &intel_cpufreq; + default_driver = &intel_cpufreq; no_hwp = 1; } if (!strcmp(str, "no_hwp")) { diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c index ab25b1235a5e..fd1886faf33a 100644 --- a/drivers/cpufreq/mt8173-cpufreq.c +++ b/drivers/cpufreq/mt8173-cpufreq.c @@ -573,14 +573,33 @@ static struct platform_driver mt8173_cpufreq_platdrv = { .probe = mt8173_cpufreq_probe, }; -static int mt8173_cpufreq_driver_init(void) +/* List of machines supported by this driver */ +static const struct of_device_id mt8173_cpufreq_machines[] __initconst = { + { .compatible = "mediatek,mt817x", }, + { .compatible = "mediatek,mt8173", }, + { .compatible = "mediatek,mt8176", }, + + { } +}; + +static int __init mt8173_cpufreq_driver_init(void) { + struct device_node *np; + const struct of_device_id *match; struct platform_device *pdev; int err; - if (!of_machine_is_compatible("mediatek,mt8173")) + np = of_find_node_by_path("/"); + if (!np) return -ENODEV; + match = of_match_node(mt8173_cpufreq_machines, np); + of_node_put(np); + if (!match) { + pr_warn("Machine is not compatible with mt8173-cpufreq\n"); + return -ENODEV; + } + err = platform_driver_register(&mt8173_cpufreq_platdrv); if (err) return err; diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index bfec1bcd3835..e2ea433a5f9c 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -52,17 +52,27 @@ static u32 get_bus_freq(void) { struct device_node *soc; u32 sysfreq; + struct clk *pltclk; + int ret; + /* get platform freq by searching bus-frequency property */ soc = of_find_node_by_type(NULL, "soc"); - if (!soc) - return 0; - - if (of_property_read_u32(soc, "bus-frequency", &sysfreq)) - sysfreq = 0; + if (soc) { + ret = of_property_read_u32(soc, "bus-frequency", &sysfreq); + of_node_put(soc); + if (!ret) + return sysfreq; + } - of_node_put(soc); + /* get platform freq by its clock name */ + pltclk = clk_get(NULL, "cg-pll0-div1"); + if (IS_ERR(pltclk)) { + pr_err("%s: can't get bus frequency %ld\n", + __func__, PTR_ERR(pltclk)); + return PTR_ERR(pltclk); + } - return sysfreq; + return clk_get_rate(pltclk); } static struct clk *cpu_to_clk(int cpu) diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c new file mode 100644 index 000000000000..fe7875311d62 --- /dev/null +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/cpufreq.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> + +#include <soc/tegra/bpmp.h> +#include <soc/tegra/bpmp-abi.h> + +#define EDVD_CORE_VOLT_FREQ(core) (0x20 + (core) * 0x4) +#define EDVD_CORE_VOLT_FREQ_F_SHIFT 0 +#define EDVD_CORE_VOLT_FREQ_V_SHIFT 16 + +struct tegra186_cpufreq_cluster_info { + unsigned long offset; + int cpus[4]; + unsigned int bpmp_cluster_id; +}; + +#define NO_CPU -1 +static const struct tegra186_cpufreq_cluster_info tegra186_clusters[] = { + /* Denver cluster */ + { + .offset = SZ_64K * 7, + .cpus = { 1, 2, NO_CPU, NO_CPU }, + .bpmp_cluster_id = 0, + }, + /* A57 cluster */ + { + .offset = SZ_64K * 6, + .cpus = { 0, 3, 4, 5 }, + .bpmp_cluster_id = 1, + }, +}; + +struct tegra186_cpufreq_cluster { + const struct tegra186_cpufreq_cluster_info *info; + struct cpufreq_frequency_table *table; +}; + +struct tegra186_cpufreq_data { + void __iomem *regs; + + size_t num_clusters; + struct tegra186_cpufreq_cluster *clusters; +}; + +static int tegra186_cpufreq_init(struct cpufreq_policy *policy) +{ + struct tegra186_cpufreq_data *data = cpufreq_get_driver_data(); + unsigned int i; + + for (i = 0; i < data->num_clusters; i++) { + struct tegra186_cpufreq_cluster *cluster = &data->clusters[i]; + const struct tegra186_cpufreq_cluster_info *info = + cluster->info; + int core; + + for (core = 0; core < ARRAY_SIZE(info->cpus); core++) { + if (info->cpus[core] == policy->cpu) + break; + } + if (core == ARRAY_SIZE(info->cpus)) + continue; + + policy->driver_data = + data->regs + info->offset + EDVD_CORE_VOLT_FREQ(core); + cpufreq_table_validate_and_show(policy, cluster->table); + } + + policy->cpuinfo.transition_latency = 300 * 1000; + + return 0; +} + +static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy, + unsigned int index) +{ + struct cpufreq_frequency_table *tbl = policy->freq_table + index; + void __iomem *edvd_reg = policy->driver_data; + u32 edvd_val = tbl->driver_data; + + writel(edvd_val, edvd_reg); + + return 0; +} + +static struct cpufreq_driver tegra186_cpufreq_driver = { + .name = "tegra186", + .flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY, + .verify = cpufreq_generic_frequency_table_verify, + .target_index = tegra186_cpufreq_set_target, + .init = tegra186_cpufreq_init, + .attr = cpufreq_generic_attr, +}; + +static struct cpufreq_frequency_table *init_vhint_table( + struct platform_device *pdev, struct tegra_bpmp *bpmp, + unsigned int cluster_id) +{ + struct cpufreq_frequency_table *table; + struct mrq_cpu_vhint_request req; + struct tegra_bpmp_message msg; + struct cpu_vhint_data *data; + int err, i, j, num_rates = 0; + dma_addr_t phys; + void *virt; + + virt = dma_alloc_coherent(bpmp->dev, sizeof(*data), &phys, + GFP_KERNEL | GFP_DMA32); + if (!virt) + return ERR_PTR(-ENOMEM); + + data = (struct cpu_vhint_data *)virt; + + memset(&req, 0, sizeof(req)); + req.addr = phys; + req.cluster_id = cluster_id; + + memset(&msg, 0, sizeof(msg)); + msg.mrq = MRQ_CPU_VHINT; + msg.tx.data = &req; + msg.tx.size = sizeof(req); + + err = tegra_bpmp_transfer(bpmp, &msg); + if (err) { + table = ERR_PTR(err); + goto free; + } + + for (i = data->vfloor; i <= data->vceil; i++) { + u16 ndiv = data->ndiv[i]; + + if (ndiv < data->ndiv_min || ndiv > data->ndiv_max) + continue; + + /* Only store lowest voltage index for each rate */ + if (i > 0 && ndiv == data->ndiv[i - 1]) + continue; + + num_rates++; + } + + table = devm_kcalloc(&pdev->dev, num_rates + 1, sizeof(*table), + GFP_KERNEL); + if (!table) { + table = ERR_PTR(-ENOMEM); + goto free; + } + + for (i = data->vfloor, j = 0; i <= data->vceil; i++) { + struct cpufreq_frequency_table *point; + u16 ndiv = data->ndiv[i]; + u32 edvd_val = 0; + + if (ndiv < data->ndiv_min || ndiv > data->ndiv_max) + continue; + + /* Only store lowest voltage index for each rate */ + if (i > 0 && ndiv == data->ndiv[i - 1]) + continue; + + edvd_val |= i << EDVD_CORE_VOLT_FREQ_V_SHIFT; + edvd_val |= ndiv << EDVD_CORE_VOLT_FREQ_F_SHIFT; + + point = &table[j++]; + point->driver_data = edvd_val; + point->frequency = data->ref_clk_hz * ndiv / data->pdiv / + data->mdiv / 1000; + } + + table[j].frequency = CPUFREQ_TABLE_END; + +free: + dma_free_coherent(bpmp->dev, sizeof(*data), virt, phys); + + return table; +} + +static int tegra186_cpufreq_probe(struct platform_device *pdev) +{ + struct tegra186_cpufreq_data *data; + struct tegra_bpmp *bpmp; + struct resource *res; + unsigned int i = 0, err; + + data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->clusters = devm_kcalloc(&pdev->dev, ARRAY_SIZE(tegra186_clusters), + sizeof(*data->clusters), GFP_KERNEL); + if (!data->clusters) + return -ENOMEM; + + data->num_clusters = ARRAY_SIZE(tegra186_clusters); + + bpmp = tegra_bpmp_get(&pdev->dev); + if (IS_ERR(bpmp)) + return PTR_ERR(bpmp); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(data->regs)) { + err = PTR_ERR(data->regs); + goto put_bpmp; + } + + for (i = 0; i < data->num_clusters; i++) { + struct tegra186_cpufreq_cluster *cluster = &data->clusters[i]; + + cluster->info = &tegra186_clusters[i]; + cluster->table = init_vhint_table( + pdev, bpmp, cluster->info->bpmp_cluster_id); + if (IS_ERR(cluster->table)) { + err = PTR_ERR(cluster->table); + goto put_bpmp; + } + } + + tegra_bpmp_put(bpmp); + + tegra186_cpufreq_driver.driver_data = data; + + err = cpufreq_register_driver(&tegra186_cpufreq_driver); + if (err) + return err; + + return 0; + +put_bpmp: + tegra_bpmp_put(bpmp); + + return err; +} + +static int tegra186_cpufreq_remove(struct platform_device *pdev) +{ + cpufreq_unregister_driver(&tegra186_cpufreq_driver); + + return 0; +} + +static const struct of_device_id tegra186_cpufreq_of_match[] = { + { .compatible = "nvidia,tegra186-ccplex-cluster", }, + { } +}; +MODULE_DEVICE_TABLE(of, tegra186_cpufreq_of_match); + +static struct platform_driver tegra186_cpufreq_platform_driver = { + .driver = { + .name = "tegra186-cpufreq", + .of_match_table = tegra186_cpufreq_of_match, + }, + .probe = tegra186_cpufreq_probe, + .remove = tegra186_cpufreq_remove, +}; +module_platform_driver(tegra186_cpufreq_platform_driver); + +MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>"); +MODULE_DESCRIPTION("NVIDIA Tegra186 cpufreq driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/cpuidle/cpuidle-cps.c b/drivers/cpuidle/cpuidle-cps.c index 926ba9871c62..12b9145913de 100644 --- a/drivers/cpuidle/cpuidle-cps.c +++ b/drivers/cpuidle/cpuidle-cps.c @@ -118,7 +118,7 @@ static void __init cps_cpuidle_unregister(void) static int __init cps_cpuidle_init(void) { - int err, cpu, core, i; + int err, cpu, i; struct cpuidle_device *device; /* Detect supported states */ @@ -160,7 +160,6 @@ static int __init cps_cpuidle_init(void) } for_each_possible_cpu(cpu) { - core = cpu_data[cpu].core; device = &per_cpu(cpuidle_dev, cpu); device->cpu = cpu; #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index cda8f62d555b..12409a519cc5 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -56,10 +56,9 @@ static int snooze_loop(struct cpuidle_device *dev, snooze_exit_time = get_tb() + snooze_timeout; ppc64_runlatch_off(); + HMT_very_low(); while (!need_resched()) { - HMT_low(); - HMT_very_low(); - if (snooze_timeout_en && get_tb() > snooze_exit_time) + if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) break; } @@ -215,11 +214,25 @@ static inline void add_powernv_state(int index, const char *name, stop_psscr_table[index].mask = psscr_mask; } +/* + * Returns 0 if prop1_len == prop2_len. Else returns -1 + */ +static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len, + const char *prop2, int prop2_len) +{ + if (prop1_len == prop2_len) + return 0; + + pr_warn("cpuidle-powernv: array sizes don't match for %s and %s\n", + prop1, prop2); + return -1; +} + static int powernv_add_idle_states(void) { struct device_node *power_mgt; int nr_idle_states = 1; /* Snooze */ - int dt_idle_states; + int dt_idle_states, count; u32 latency_ns[CPUIDLE_STATE_MAX]; u32 residency_ns[CPUIDLE_STATE_MAX]; u32 flags[CPUIDLE_STATE_MAX]; @@ -244,6 +257,21 @@ static int powernv_add_idle_states(void) goto out; } + count = of_property_count_u32_elems(power_mgt, + "ibm,cpu-idle-state-latencies-ns"); + + if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states, + "ibm,cpu-idle-state-latencies-ns", + count) != 0) + goto out; + + count = of_property_count_strings(power_mgt, + "ibm,cpu-idle-state-names"); + if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states, + "ibm,cpu-idle-state-names", + count) != 0) + goto out; + /* * Since snooze is used as first idle state, max idle states allowed is * CPUIDLE_STATE_MAX -1 @@ -278,6 +306,22 @@ static int powernv_add_idle_states(void) has_stop_states = (flags[0] & (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP)); if (has_stop_states) { + count = of_property_count_u64_elems(power_mgt, + "ibm,cpu-idle-state-psscr"); + if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", + dt_idle_states, + "ibm,cpu-idle-state-psscr", + count) != 0) + goto out; + + count = of_property_count_u64_elems(power_mgt, + "ibm,cpu-idle-state-psscr-mask"); + if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", + dt_idle_states, + "ibm,cpu-idle-state-psscr-mask", + count) != 0) + goto out; + if (of_property_read_u64_array(power_mgt, "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) { pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); @@ -292,8 +336,21 @@ static int powernv_add_idle_states(void) } } - rc = of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states); + count = of_property_count_u32_elems(power_mgt, + "ibm,cpu-idle-state-residency-ns"); + + if (count < 0) { + rc = count; + } else if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", + dt_idle_states, + "ibm,cpu-idle-state-residency-ns", + count) != 0) { + goto out; + } else { + rc = of_property_read_u32_array(power_mgt, + "ibm,cpu-idle-state-residency-ns", + residency_ns, dt_idle_states); + } for (i = 0; i < dt_idle_states; i++) { unsigned int exit_latency, target_residency; diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 32100c4851dd..49cbdcba7883 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -506,7 +506,7 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) ctx->dev = caam_jr_alloc(); if (IS_ERR(ctx->dev)) { - dev_err(ctx->dev, "Job Ring Device allocation for transform failed\n"); + pr_err("Job Ring Device allocation for transform failed\n"); return PTR_ERR(ctx->dev); } diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index fef39f9f41ee..5d7f73d60515 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -281,7 +281,8 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask) /* Try to run it through DECO0 */ ret = run_descriptor_deco0(ctrldev, desc, &status); - if (ret || status) { + if (ret || + (status && status != JRSTA_SSRC_JUMP_HALT_CC)) { dev_err(ctrldev, "Failed to deinstantiate RNG4 SH%d\n", sh_idx); @@ -301,15 +302,13 @@ static int caam_remove(struct platform_device *pdev) struct device *ctrldev; struct caam_drv_private *ctrlpriv; struct caam_ctrl __iomem *ctrl; - int ring; ctrldev = &pdev->dev; ctrlpriv = dev_get_drvdata(ctrldev); ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; - /* Remove platform devices for JobRs */ - for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) - of_device_unregister(ctrlpriv->jrpdev[ring]); + /* Remove platform devices under the crypto node */ + of_platform_depopulate(ctrldev); /* De-initialize RNG state handles initialized by this driver. */ if (ctrlpriv->rng4_sh_init) @@ -418,10 +417,21 @@ DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n"); DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n"); #endif +static const struct of_device_id caam_match[] = { + { + .compatible = "fsl,sec-v4.0", + }, + { + .compatible = "fsl,sec4.0", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, caam_match); + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { - int ret, ring, ridx, rspec, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; + int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; u64 caam_id; struct device *dev; struct device_node *nprop, *np; @@ -597,47 +607,24 @@ static int caam_probe(struct platform_device *pdev) goto iounmap_ctrl; } - /* - * Detect and enable JobRs - * First, find out how many ring spec'ed, allocate references - * for all, then go probe each one. - */ - rspec = 0; - for_each_available_child_of_node(nprop, np) - if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") || - of_device_is_compatible(np, "fsl,sec4.0-job-ring")) - rspec++; - - ctrlpriv->jrpdev = devm_kcalloc(&pdev->dev, rspec, - sizeof(*ctrlpriv->jrpdev), GFP_KERNEL); - if (ctrlpriv->jrpdev == NULL) { - ret = -ENOMEM; + ret = of_platform_populate(nprop, caam_match, NULL, dev); + if (ret) { + dev_err(dev, "JR platform devices creation error\n"); goto iounmap_ctrl; } ring = 0; - ridx = 0; - ctrlpriv->total_jobrs = 0; for_each_available_child_of_node(nprop, np) if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") || of_device_is_compatible(np, "fsl,sec4.0-job-ring")) { - ctrlpriv->jrpdev[ring] = - of_platform_device_create(np, NULL, dev); - if (!ctrlpriv->jrpdev[ring]) { - pr_warn("JR physical index %d: Platform device creation error\n", - ridx); - ridx++; - continue; - } ctrlpriv->jr[ring] = (struct caam_job_ring __iomem __force *) ((__force uint8_t *)ctrl + - (ridx + JR_BLOCK_NUMBER) * + (ring + JR_BLOCK_NUMBER) * BLOCK_OFFSET ); ctrlpriv->total_jobrs++; ring++; - ridx++; - } + } /* Check to see if QI present. If so, enable */ ctrlpriv->qi_present = @@ -847,17 +834,6 @@ disable_caam_ipg: return ret; } -static struct of_device_id caam_match[] = { - { - .compatible = "fsl,sec-v4.0", - }, - { - .compatible = "fsl,sec4.0", - }, - {}, -}; -MODULE_DEVICE_TABLE(of, caam_match); - static struct platform_driver caam_driver = { .driver = { .name = "caam", diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index e2bcacc1a921..dbed8baeebe5 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -66,7 +66,6 @@ struct caam_drv_private_jr { struct caam_drv_private { struct device *dev; - struct platform_device **jrpdev; /* Alloc'ed array per sub-device */ struct platform_device *pdev; /* Physical-presence section */ diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 3e2ab3b14eea..9e95bf94eb13 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -2,6 +2,7 @@ menuconfig DEV_DAX tristate "DAX: direct access to differentiated memory" default m if NVDIMM_DAX depends on TRANSPARENT_HUGEPAGE + select SRCU help Support raw access to differentiated (persistence, bandwidth, latency...) memory via an mmap(2) capable character diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 80c6db279ae1..806f180c80d8 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -25,6 +25,7 @@ #include "dax.h" static dev_t dax_devt; +DEFINE_STATIC_SRCU(dax_srcu); static struct class *dax_class; static DEFINE_IDA(dax_minor_ida); static int nr_dax = CONFIG_NR_DEV_DAX; @@ -60,7 +61,7 @@ struct dax_region { * @region - parent region * @dev - device backing the character device * @cdev - core chardev data - * @alive - !alive + rcu grace period == no new mappings can be established + * @alive - !alive + srcu grace period == no new mappings can be established * @id - child id in the region * @num_resources - number of physical address extents in this device * @res - array of physical address ranges @@ -569,7 +570,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) static int dax_dev_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size) { - int rc; + int rc, id; struct file *filp = vmf->vma->vm_file; struct dax_dev *dax_dev = filp->private_data; @@ -578,7 +579,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf, ? "write" : "read", vmf->vma->vm_start, vmf->vma->vm_end); - rcu_read_lock(); + id = srcu_read_lock(&dax_srcu); switch (pe_size) { case PE_SIZE_PTE: rc = __dax_dev_pte_fault(dax_dev, vmf); @@ -592,7 +593,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf, default: return VM_FAULT_FALLBACK; } - rcu_read_unlock(); + srcu_read_unlock(&dax_srcu, id); return rc; } @@ -713,11 +714,11 @@ static void unregister_dax_dev(void *dev) * Note, rcu is not protecting the liveness of dax_dev, rcu is * ensuring that any fault handlers that might have seen * dax_dev->alive == true, have completed. Any fault handlers - * that start after synchronize_rcu() has started will abort + * that start after synchronize_srcu() has started will abort * upon seeing dax_dev->alive == false. */ dax_dev->alive = false; - synchronize_rcu(); + synchronize_srcu(&dax_srcu); unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1); cdev_del(cdev); device_unregister(dev); diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index 71576b8bdfef..a4f2fa1091e4 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -25,6 +25,35 @@ #define DEVFREQ_GOV_SUSPEND 0x4 #define DEVFREQ_GOV_RESUME 0x5 +/** + * struct devfreq_governor - Devfreq policy governor + * @node: list node - contains registered devfreq governors + * @name: Governor's name + * @immutable: Immutable flag for governor. If the value is 1, + * this govenror is never changeable to other governor. + * @get_target_freq: Returns desired operating frequency for the device. + * Basically, get_target_freq will run + * devfreq_dev_profile.get_dev_status() to get the + * status of the device (load = busy_time / total_time). + * If no_central_polling is set, this callback is called + * only with update_devfreq() notified by OPP. + * @event_handler: Callback for devfreq core framework to notify events + * to governors. Events include per device governor + * init and exit, opp changes out of devfreq, suspend + * and resume of per device devfreq during device idle. + * + * Note that the callbacks are called with devfreq->lock locked by devfreq. + */ +struct devfreq_governor { + struct list_head node; + + const char name[DEVFREQ_NAME_LEN]; + const unsigned int immutable; + int (*get_target_freq)(struct devfreq *this, unsigned long *freq); + int (*event_handler)(struct devfreq *devfreq, + unsigned int event, void *data); +}; + /* Caution: devfreq->lock must be locked before calling update_devfreq */ extern int update_devfreq(struct devfreq *devfreq); diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 4773f2867234..96afb2aeed18 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -10,26 +10,16 @@ config EDAC_SUPPORT bool menuconfig EDAC - bool "EDAC (Error Detection And Correction) reporting" - depends on HAS_IOMEM && EDAC_SUPPORT + tristate "EDAC (Error Detection And Correction) reporting" + depends on HAS_IOMEM && EDAC_SUPPORT && RAS help - EDAC is designed to report errors in the core system. - These are low-level errors that are reported in the CPU or - supporting chipset or other subsystems: + EDAC is a subsystem along with hardware-specific drivers designed to + report hardware errors. These are low-level errors that are reported + in the CPU or supporting chipset or other subsystems: memory errors, cache errors, PCI errors, thermal throttling, etc.. If unsure, select 'Y'. - If this code is reporting problems on your system, please - see the EDAC project web pages for more information at: - - <http://bluesmoke.sourceforge.net/> - - and: - - <http://buttersideup.com/edacwiki> - - There is also a mailing list for the EDAC project, which can - be found via the sourceforge page. + The mailing list for the EDAC project is linux-edac@vger.kernel.org. if EDAC @@ -62,21 +52,9 @@ config EDAC_DECODE_MCE which occur really early upon boot, before the module infrastructure has been initialized. -config EDAC_MM_EDAC - tristate "Main Memory EDAC (Error Detection And Correction) reporting" - select RAS - help - Some systems are able to detect and correct errors in main - memory. EDAC can report statistics on memory error - detection and correction (EDAC - or commonly referred to ECC - errors). EDAC will also try to decode where these errors - occurred so that a particular failing memory module can be - replaced. If unsure, select 'Y'. - config EDAC_GHES bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" - depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) - default y + depends on ACPI_APEI_GHES && (EDAC=y) help Not all machines support hardware-driven error report. Some of those provide a BIOS-driven error report mechanism via ACPI, using the @@ -98,7 +76,7 @@ config EDAC_GHES config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" - depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE + depends on AMD_NB && EDAC_DECODE_MCE help Support for error detection and correction of DRAM ECC errors on the AMD64 families (>= K8) of memory controllers. @@ -124,28 +102,28 @@ config EDAC_AMD64_ERROR_INJECTION config EDAC_AMD76X tristate "AMD 76x (760, 762, 768)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the AMD 76x series of chipsets used with the Athlon processor. config EDAC_E7XXX tristate "Intel e7xxx (e7205, e7500, e7501, e7505)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel E7205, E7500, E7501 and E7505 server chipsets. config EDAC_E752X tristate "Intel e752x (e7520, e7525, e7320) and 3100" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel E7520, E7525, E7320 server chipsets. config EDAC_I82443BXGX tristate "Intel 82443BX/GX (440BX/GX)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 depends on BROKEN help Support for error detection and correction on the Intel @@ -153,56 +131,56 @@ config EDAC_I82443BXGX config EDAC_I82875P tristate "Intel 82875p (D82875P, E7210)" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel DP82785P and E7210 server chipsets. config EDAC_I82975X tristate "Intel 82975x (D82975x)" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel DP82975x server chipsets. config EDAC_I3000 tristate "Intel 3000/3010" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel 3000 and 3010 server chipsets. config EDAC_I3200 tristate "Intel 3200" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel 3200 and 3210 server chipsets. config EDAC_IE31200 tristate "Intel e312xx" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel E3-1200 based DRAM controllers. config EDAC_X38 tristate "Intel X38" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction on the Intel X38 server chipsets. config EDAC_I5400 tristate "Intel 5400 (Seaburg) chipsets" - depends on EDAC_MM_EDAC && PCI && X86 + depends on PCI && X86 help Support for error detection and correction the Intel i5400 MCH chipset (Seaburg). config EDAC_I7CORE tristate "Intel i7 Core (Nehalem) processors" - depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL + depends on PCI && X86 && X86_MCE_INTEL help Support for error detection and correction the Intel i7 Core (Nehalem) Integrated Memory Controller that exists on @@ -211,58 +189,56 @@ config EDAC_I7CORE config EDAC_I82860 tristate "Intel 82860" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Intel 82860 chipset. config EDAC_R82600 tristate "Radisys 82600 embedded chipset" - depends on EDAC_MM_EDAC && PCI && X86_32 + depends on PCI && X86_32 help Support for error detection and correction on the Radisys 82600 embedded chipset. config EDAC_I5000 tristate "Intel Greencreek/Blackford chipset" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel Greekcreek/Blackford chipsets. config EDAC_I5100 tristate "Intel San Clemente MCH" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel San Clemente MCH. config EDAC_I7300 tristate "Intel Clarksboro MCH" - depends on EDAC_MM_EDAC && X86 && PCI + depends on X86 && PCI help Support for error detection and correction the Intel Clarksboro MCH (Intel 7300 chipset). config EDAC_SBRIDGE tristate "Intel Sandy-Bridge/Ivy-Bridge/Haswell Integrated MC" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on PCI_MMCONFIG + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG help Support for error detection and correction the Intel Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers. config EDAC_SKX tristate "Intel Skylake server Integrated MC" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on PCI_MMCONFIG + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG help Support for error detection and correction the Intel Skylake server Integrated Memory Controllers. config EDAC_PND2 tristate "Intel Pondicherry2" - depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL + depends on PCI && X86_64 && X86_MCE_INTEL help Support for error detection and correction on the Intel Pondicherry2 Integrated Memory Controller. This SoC IP is @@ -271,36 +247,35 @@ config EDAC_PND2 config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" - depends on EDAC_MM_EDAC && FSL_SOC + depends on FSL_SOC help Support for error detection and correction on the Freescale MPC8349, MPC8560, MPC8540, MPC8548, T4240 config EDAC_LAYERSCAPE tristate "Freescale Layerscape DDR" - depends on EDAC_MM_EDAC && ARCH_LAYERSCAPE + depends on ARCH_LAYERSCAPE help Support for error detection and correction on Freescale memory controllers on Layerscape SoCs. config EDAC_MV64X60 tristate "Marvell MV64x60" - depends on EDAC_MM_EDAC && MV64X60 + depends on MV64X60 help Support for error detection and correction on the Marvell MV64360 and MV64460 chipsets. config EDAC_PASEMI tristate "PA Semi PWRficient" - depends on EDAC_MM_EDAC && PCI - depends on PPC_PASEMI + depends on PPC_PASEMI && PCI help Support for error detection and correction on PA Semi PWRficient. config EDAC_CELL tristate "Cell Broadband Engine memory controller" - depends on EDAC_MM_EDAC && PPC_CELL_COMMON + depends on PPC_CELL_COMMON help Support for error detection and correction on the Cell Broadband Engine internal memory controller @@ -308,7 +283,7 @@ config EDAC_CELL config EDAC_PPC4XX tristate "PPC4xx IBM DDR2 Memory Controller" - depends on EDAC_MM_EDAC && 4xx + depends on 4xx help This enables support for EDAC on the ECC memory used with the IBM DDR2 memory controller found in various @@ -317,7 +292,7 @@ config EDAC_PPC4XX config EDAC_AMD8131 tristate "AMD8131 HyperTransport PCI-X Tunnel" - depends on EDAC_MM_EDAC && PCI && PPC_MAPLE + depends on PCI && PPC_MAPLE help Support for error detection and correction on the AMD8131 HyperTransport PCI-X Tunnel chip. @@ -326,7 +301,7 @@ config EDAC_AMD8131 config EDAC_AMD8111 tristate "AMD8111 HyperTransport I/O Hub" - depends on EDAC_MM_EDAC && PCI && PPC_MAPLE + depends on PCI && PPC_MAPLE help Support for error detection and correction on the AMD8111 HyperTransport I/O Hub chip. @@ -335,7 +310,7 @@ config EDAC_AMD8111 config EDAC_CPC925 tristate "IBM CPC925 Memory Controller (PPC970FX)" - depends on EDAC_MM_EDAC && PPC64 + depends on PPC64 help Support for error detection and correction on the IBM CPC925 Bridge and Memory Controller, which is @@ -344,7 +319,7 @@ config EDAC_CPC925 config EDAC_TILE tristate "Tilera Memory Controller" - depends on EDAC_MM_EDAC && TILE + depends on TILE default y help Support for error detection and correction on the @@ -352,49 +327,59 @@ config EDAC_TILE config EDAC_HIGHBANK_MC tristate "Highbank Memory Controller" - depends on EDAC_MM_EDAC && ARCH_HIGHBANK + depends on ARCH_HIGHBANK help Support for error detection and correction on the Calxeda Highbank memory controller. config EDAC_HIGHBANK_L2 tristate "Highbank L2 Cache" - depends on EDAC_MM_EDAC && ARCH_HIGHBANK + depends on ARCH_HIGHBANK help Support for error detection and correction on the Calxeda Highbank memory controller. config EDAC_OCTEON_PC tristate "Cavium Octeon Primary Caches" - depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + depends on CPU_CAVIUM_OCTEON help Support for error detection and correction on the primary caches of the cnMIPS cores of Cavium Octeon family SOCs. config EDAC_OCTEON_L2C tristate "Cavium Octeon Secondary Caches (L2C)" - depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + depends on CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_LMC tristate "Cavium Octeon DRAM Memory Controller (LMC)" - depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + depends on CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_PCI tristate "Cavium Octeon PCI Controller" - depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC + depends on PCI && CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. +config EDAC_THUNDERX + tristate "Cavium ThunderX EDAC" + depends on ARM64 + depends on PCI + help + Support for error detection and correction on the + Cavium ThunderX memory controllers (LMC), Cache + Coherent Processor Interconnect (CCPI) and L2 cache + blocks (TAD, CBC, MCI). + config EDAC_ALTERA bool "Altera SOCFPGA ECC" - depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA + depends on EDAC=y && ARCH_SOCFPGA help Support for error detection and correction on the Altera SOCs. This must be selected for SDRAM ECC. @@ -460,14 +445,14 @@ config EDAC_ALTERA_SDMMC config EDAC_SYNOPSYS tristate "Synopsys DDR Memory Controller" - depends on EDAC_MM_EDAC && ARCH_ZYNQ + depends on ARCH_ZYNQ help Support for error detection and correction on the Synopsys DDR memory controller. config EDAC_XGENE tristate "APM X-Gene SoC" - depends on EDAC_MM_EDAC && (ARM64 || COMPILE_TEST) + depends on (ARM64 || COMPILE_TEST) help Support for error detection and correction on the APM X-Gene family of SOCs. diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 587107e90996..0fd9ffa63299 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -6,8 +6,7 @@ # GNU General Public License. # -obj-$(CONFIG_EDAC) := edac_stub.o -obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o +obj-$(CONFIG_EDAC) := edac_core.o edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o wq.o @@ -67,13 +66,14 @@ obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o obj-$(CONFIG_EDAC_TILE) += tile_edac.o -obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o -obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o +obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o +obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o +obj-$(CONFIG_EDAC_THUNDERX) += thunderx_edac.o obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index c5a5b91f37f0..7717b094fabb 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1023,13 +1023,23 @@ out: return ret; } +static int socfpga_is_a10(void) +{ + return of_machine_is_compatible("altr,socfpga-arria10"); +} + static int validate_parent_available(struct device_node *np); static const struct of_device_id altr_edac_a10_device_of_match[]; static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) { int irq; - struct device_node *child, *np = of_find_compatible_node(NULL, NULL, - "altr,socfpga-a10-ecc-manager"); + struct device_node *child, *np; + + if (!socfpga_is_a10()) + return -ENODEV; + + np = of_find_compatible_node(NULL, NULL, + "altr,socfpga-a10-ecc-manager"); if (!np) { edac_printk(KERN_ERR, EDAC_DEVICE, "ECC Manager not found\n"); return -ENODEV; @@ -1545,8 +1555,12 @@ static const struct edac_device_prv_data a10_sdmmceccb_data = { static int __init socfpga_init_sdmmc_ecc(void) { int rc = -ENODEV; - struct device_node *child = of_find_compatible_node(NULL, NULL, - "altr,socfpga-sdmmc-ecc"); + struct device_node *child; + + if (!socfpga_is_a10()) + return -ENODEV; + + child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); if (!child) { edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n"); return -ENODEV; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index e5573c56b15e..480072139b7a 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -40,6 +40,11 @@ #define edac_atomic_scrub(va, size) do { } while (0) #endif +int edac_op_state = EDAC_OPSTATE_INVAL; +EXPORT_SYMBOL_GPL(edac_op_state); + +static int edac_report = EDAC_REPORTING_ENABLED; + /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); @@ -52,6 +57,65 @@ static void const *edac_mc_owner; static struct bus_type mc_bus[EDAC_MAX_MCS]; +int edac_get_report_status(void) +{ + return edac_report; +} +EXPORT_SYMBOL_GPL(edac_get_report_status); + +void edac_set_report_status(int new) +{ + if (new == EDAC_REPORTING_ENABLED || + new == EDAC_REPORTING_DISABLED || + new == EDAC_REPORTING_FORCE) + edac_report = new; +} +EXPORT_SYMBOL_GPL(edac_set_report_status); + +static int edac_report_set(const char *str, const struct kernel_param *kp) +{ + if (!str) + return -EINVAL; + + if (!strncmp(str, "on", 2)) + edac_report = EDAC_REPORTING_ENABLED; + else if (!strncmp(str, "off", 3)) + edac_report = EDAC_REPORTING_DISABLED; + else if (!strncmp(str, "force", 5)) + edac_report = EDAC_REPORTING_FORCE; + + return 0; +} + +static int edac_report_get(char *buffer, const struct kernel_param *kp) +{ + int ret = 0; + + switch (edac_report) { + case EDAC_REPORTING_ENABLED: + ret = sprintf(buffer, "on"); + break; + case EDAC_REPORTING_DISABLED: + ret = sprintf(buffer, "off"); + break; + case EDAC_REPORTING_FORCE: + ret = sprintf(buffer, "force"); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct kernel_param_ops edac_report_ops = { + .set = edac_report_set, + .get = edac_report_get, +}; + +module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644); + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -505,22 +569,6 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev) EXPORT_SYMBOL_GPL(find_mci_by_dev); /* - * handler for EDAC to check if NMI type handler has asserted interrupt - */ -static int edac_mc_assert_error_check_and_clear(void) -{ - int old_state; - - if (edac_op_state == EDAC_OPSTATE_POLL) - return 1; - - old_state = edac_err_assert; - edac_err_assert = 0; - - return old_state; -} - -/* * edac_mc_workq_function * performs the operation scheduled by a workq request */ @@ -536,7 +584,7 @@ static void edac_mc_workq_function(struct work_struct *work_req) return; } - if (edac_mc_assert_error_check_and_clear()) + if (edac_op_state == EDAC_OPSTATE_POLL) mci->edac_check(mci); mutex_unlock(&mem_ctls_mutex); @@ -601,7 +649,6 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) } list_add_tail_rcu(&mci->link, insert_before); - atomic_inc(&edac_handlers); return 0; fail0: @@ -619,7 +666,6 @@ fail1: static int del_mc_from_global_list(struct mem_ctl_info *mci) { - int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -628,7 +674,7 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci) synchronize_rcu(); INIT_LIST_HEAD(&mci->link); - return handlers; + return list_empty(&mc_devices); } struct mem_ctl_info *edac_mc_find(int idx) @@ -763,7 +809,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) /* mark MCI offline: */ mci->op_state = OP_OFFLINE; - if (!del_mc_from_global_list(mci)) + if (del_mc_from_global_list(mci)) edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); @@ -1195,10 +1241,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, /* Report the error via the trace interface */ grain_bits = fls_long(e->grain) + 1; - trace_mc_event(type, e->msg, e->label, e->error_count, - mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, - (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, - grain_bits, e->syndrome, e->other_detail); + + if (IS_ENABLED(CONFIG_RAS)) + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, + e->low_layer, + (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); edac_raw_mc_handle_error(type, mci, e); } diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c deleted file mode 100644 index 952e411f01f2..000000000000 --- a/drivers/edac/edac_stub.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * common EDAC components that must be in kernel - * - * Author: Dave Jiang <djiang@mvista.com> - * - * 2007 (c) MontaVista Software, Inc. - * 2010 (c) Advanced Micro Devices Inc. - * Borislav Petkov <bp@alien8.de> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - * - */ -#include <linux/module.h> -#include <linux/edac.h> -#include <linux/atomic.h> -#include <linux/device.h> - -int edac_op_state = EDAC_OPSTATE_INVAL; -EXPORT_SYMBOL_GPL(edac_op_state); - -atomic_t edac_handlers = ATOMIC_INIT(0); -EXPORT_SYMBOL_GPL(edac_handlers); - -int edac_err_assert = 0; -EXPORT_SYMBOL_GPL(edac_err_assert); - -int edac_report_status = EDAC_REPORTING_ENABLED; -EXPORT_SYMBOL_GPL(edac_report_status); - -static int __init edac_report_setup(char *str) -{ - if (!str) - return -EINVAL; - - if (!strncmp(str, "on", 2)) - set_edac_report_status(EDAC_REPORTING_ENABLED); - else if (!strncmp(str, "off", 3)) - set_edac_report_status(EDAC_REPORTING_DISABLED); - else if (!strncmp(str, "force", 5)) - set_edac_report_status(EDAC_REPORTING_FORCE); - - return 0; -} -__setup("edac_report=", edac_report_setup); - -/* - * called to determine if there is an EDAC driver interested in - * knowing an event (such as NMI) occurred - */ -int edac_handler_set(void) -{ - if (edac_op_state == EDAC_OPSTATE_POLL) - return 0; - - return atomic_read(&edac_handlers); -} -EXPORT_SYMBOL_GPL(edac_handler_set); - -/* - * handler for NMI type of interrupts to assert error - */ -void edac_atomic_assert_error(void) -{ - edac_err_assert++; -} -EXPORT_SYMBOL_GPL(edac_atomic_assert_error); diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index 928e0dba41fc..1cad5a9af8d0 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1349,7 +1349,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo struct dram_addr daddr; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; mci = pnd2_mci; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index a65ea44e3b0b..ea21cb651b3c 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3075,7 +3075,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, struct sbridge_pvt *pvt; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; mci = get_mci_for_node_id(mce->socketid); @@ -3441,7 +3441,7 @@ static int __init sbridge_init(void) if (rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; } diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index 1159dba4671f..64bef6c9cfb4 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -971,7 +971,7 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; /* ignore unless this is memory related with an address */ diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c new file mode 100644 index 000000000000..86d585cb6d32 --- /dev/null +++ b/drivers/edac/thunderx_edac.c @@ -0,0 +1,2174 @@ +/* + * Cavium ThunderX memory controller kernel module + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved. + * + */ + +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/string.h> +#include <linux/stop_machine.h> +#include <linux/delay.h> +#include <linux/sizes.h> +#include <linux/atomic.h> +#include <linux/bitfield.h> +#include <linux/circ_buf.h> + +#include <asm/page.h> + +#include "edac_module.h" + +#define phys_to_pfn(phys) (PFN_DOWN(phys)) + +#define THUNDERX_NODE GENMASK(45, 44) + +enum { + ERR_CORRECTED = 1, + ERR_UNCORRECTED = 2, + ERR_UNKNOWN = 3, +}; + +#define MAX_SYNDROME_REGS 4 + +struct error_syndrome { + u64 reg[MAX_SYNDROME_REGS]; +}; + +struct error_descr { + int type; + u64 mask; + char *descr; +}; + +static void decode_register(char *str, size_t size, + const struct error_descr *descr, + const uint64_t reg) +{ + int ret = 0; + + while (descr->type && descr->mask && descr->descr) { + if (reg & descr->mask) { + ret = snprintf(str, size, "\n\t%s, %s", + descr->type == ERR_CORRECTED ? + "Corrected" : "Uncorrected", + descr->descr); + str += ret; + size -= ret; + } + descr++; + } +} + +static unsigned long get_bits(unsigned long data, int pos, int width) +{ + return (data >> pos) & ((1 << width) - 1); +} + +#define L2C_CTL 0x87E080800000 +#define L2C_CTL_DISIDXALIAS BIT(0) + +#define PCI_DEVICE_ID_THUNDER_LMC 0xa022 + +#define LMC_FADR 0x20 +#define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1) +#define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1) +#define LMC_FADR_FBANK(x) ((x >> 32) & 0xf) +#define LMC_FADR_FROW(x) ((x >> 14) & 0xffff) +#define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff) + +#define LMC_NXM_FADR 0x28 +#define LMC_ECC_SYND 0x38 + +#define LMC_ECC_PARITY_TEST 0x108 + +#define LMC_INT_W1S 0x150 + +#define LMC_INT_ENA_W1C 0x158 +#define LMC_INT_ENA_W1S 0x160 + +#define LMC_CONFIG 0x188 + +#define LMC_CONFIG_BG2 BIT(62) +#define LMC_CONFIG_RANK_ENA BIT(42) +#define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF) +#define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7) + +#define LMC_CONTROL 0x190 +#define LMC_CONTROL_XOR_BANK BIT(16) + +#define LMC_INT 0x1F0 + +#define LMC_INT_DDR_ERR BIT(11) +#define LMC_INT_DED_ERR (0xFUL << 5) +#define LMC_INT_SEC_ERR (0xFUL << 1) +#define LMC_INT_NXM_WR_MASK BIT(0) + +#define LMC_DDR_PLL_CTL 0x258 +#define LMC_DDR_PLL_CTL_DDR4 BIT(29) + +#define LMC_FADR_SCRAMBLED 0x330 + +#define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \ + LMC_INT_NXM_WR_MASK) + +#define LMC_INT_CE (LMC_INT_SEC_ERR) + +static const struct error_descr lmc_errors[] = { + { + .type = ERR_CORRECTED, + .mask = LMC_INT_SEC_ERR, + .descr = "Single-bit ECC error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_DDR_ERR, + .descr = "DDR chip error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_DED_ERR, + .descr = "Double-bit ECC error", + }, + { + .type = ERR_UNCORRECTED, + .mask = LMC_INT_NXM_WR_MASK, + .descr = "Non-existent memory write", + }, + {0, 0, NULL}, +}; + +#define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5) +#define LMC_INT_EN_DLCRAM_DED_ERR BIT(4) +#define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3) +#define LMC_INT_INTR_DED_ENA BIT(2) +#define LMC_INT_INTR_SEC_ENA BIT(1) +#define LMC_INT_INTR_NXM_WR_ENA BIT(0) + +#define LMC_INT_ENA_ALL GENMASK(5, 0) + +#define LMC_DDR_PLL_CTL 0x258 +#define LMC_DDR_PLL_CTL_DDR4 BIT(29) + +#define LMC_CONTROL 0x190 +#define LMC_CONTROL_RDIMM BIT(0) + +#define LMC_SCRAM_FADR 0x330 + +#define LMC_CHAR_MASK0 0x228 +#define LMC_CHAR_MASK2 0x238 + +#define RING_ENTRIES 8 + +struct debugfs_entry { + const char *name; + umode_t mode; + const struct file_operations fops; +}; + +struct lmc_err_ctx { + u64 reg_int; + u64 reg_fadr; + u64 reg_nxm_fadr; + u64 reg_scram_fadr; + u64 reg_ecc_synd; +}; + +struct thunderx_lmc { + void __iomem *regs; + struct pci_dev *pdev; + struct msix_entry msix_ent; + + atomic_t ecc_int; + + u64 mask0; + u64 mask2; + u64 parity_test; + u64 node; + + int xbits; + int bank_width; + int pbank_lsb; + int dimm_lsb; + int rank_lsb; + int bank_lsb; + int row_lsb; + int col_hi_lsb; + + int xor_bank; + int l2c_alias; + + struct page *mem; + + struct lmc_err_ctx err_ctx[RING_ENTRIES]; + unsigned long ring_head; + unsigned long ring_tail; +}; + +#define ring_pos(pos, size) ((pos) & (size - 1)) + +#define DEBUGFS_STRUCT(_name, _mode, _write, _read) \ +static struct debugfs_entry debugfs_##_name = { \ + .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ + .fops = { \ + .open = simple_open, \ + .write = _write, \ + .read = _read, \ + .llseek = generic_file_llseek, \ + }, \ +} + +#define DEBUGFS_FIELD_ATTR(_type, _field) \ +static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \ + char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + char buf[20]; \ + \ + snprintf(buf, count, "0x%016llx", pdata->_field); \ + return simple_read_from_buffer(data, count, ppos, \ + buf, sizeof(buf)); \ +} \ + \ +static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \ + const char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + int res; \ + \ + res = kstrtoull_from_user(data, count, 0, &pdata->_field); \ + \ + return res ? res : count; \ +} \ + \ +DEBUGFS_STRUCT(_field, 0600, \ + thunderx_##_type##_##_field##_write, \ + thunderx_##_type##_##_field##_read) \ + +#define DEBUGFS_REG_ATTR(_type, _name, _reg) \ +static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \ + char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + char buf[20]; \ + \ + sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \ + return simple_read_from_buffer(data, count, ppos, \ + buf, sizeof(buf)); \ +} \ + \ +static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \ + const char __user *data, \ + size_t count, loff_t *ppos) \ +{ \ + struct thunderx_##_type *pdata = file->private_data; \ + u64 val; \ + int res; \ + \ + res = kstrtoull_from_user(data, count, 0, &val); \ + \ + if (!res) { \ + writeq(val, pdata->regs + _reg); \ + res = count; \ + } \ + \ + return res; \ +} \ + \ +DEBUGFS_STRUCT(_name, 0600, \ + thunderx_##_type##_##_name##_write, \ + thunderx_##_type##_##_name##_read) + +#define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field) + +/* + * To get an ECC error injected, the following steps are needed: + * - Setup the ECC injection by writing the appropriate parameters: + * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0 + * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2 + * echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test + * - Do the actual injection: + * echo 1 > /sys/kernel/debug/<device number>/inject_ecc + */ +static ssize_t thunderx_lmc_inject_int_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + u64 val; + int res; + + res = kstrtoull_from_user(data, count, 0, &val); + + if (!res) { + /* Trigger the interrupt */ + writeq(val, lmc->regs + LMC_INT_W1S); + res = count; + } + + return res; +} + +static ssize_t thunderx_lmc_int_read(struct file *file, + char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + char buf[20]; + u64 lmc_int = readq(lmc->regs + LMC_INT); + + snprintf(buf, sizeof(buf), "0x%016llx", lmc_int); + return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf)); +} + +#define TEST_PATTERN 0xa5 + +static int inject_ecc_fn(void *arg) +{ + struct thunderx_lmc *lmc = arg; + uintptr_t addr, phys; + unsigned int cline_size = cache_line_size(); + const unsigned int lines = PAGE_SIZE / cline_size; + unsigned int i, cl_idx; + + addr = (uintptr_t)page_address(lmc->mem); + phys = (uintptr_t)page_to_phys(lmc->mem); + + cl_idx = (phys & 0x7f) >> 4; + lmc->parity_test &= ~(7ULL << 8); + lmc->parity_test |= (cl_idx << 8); + + writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0); + writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2); + writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST); + + readq(lmc->regs + LMC_CHAR_MASK0); + readq(lmc->regs + LMC_CHAR_MASK2); + readq(lmc->regs + LMC_ECC_PARITY_TEST); + + for (i = 0; i < lines; i++) { + memset((void *)addr, TEST_PATTERN, cline_size); + barrier(); + + /* + * Flush L1 cachelines to the PoC (L2). + * This will cause cacheline eviction to the L2. + */ + asm volatile("dc civac, %0\n" + "dsb sy\n" + : : "r"(addr + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Flush L2 cachelines to the DRAM. + * This will cause cacheline eviction to the DRAM + * and ECC corruption according to the masks set. + */ + __asm__ volatile("sys #0,c11,C1,#2, %0\n" + : : "r"(phys + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Invalidate L2 cachelines. + * The subsequent load will cause cacheline fetch + * from the DRAM and an error interrupt + */ + __asm__ volatile("sys #0,c11,C1,#1, %0" + : : "r"(phys + i * cline_size)); + } + + for (i = 0; i < lines; i++) { + /* + * Invalidate L1 cachelines. + * The subsequent load will cause cacheline fetch + * from the L2 and/or DRAM + */ + asm volatile("dc ivac, %0\n" + "dsb sy\n" + : : "r"(addr + i * cline_size)); + } + + return 0; +} + +static ssize_t thunderx_lmc_inject_ecc_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct thunderx_lmc *lmc = file->private_data; + + unsigned int cline_size = cache_line_size(); + + u8 tmp[cline_size]; + void __iomem *addr; + unsigned int offs, timeout = 100000; + + atomic_set(&lmc->ecc_int, 0); + + lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0); + + if (!lmc->mem) + return -ENOMEM; + + addr = page_address(lmc->mem); + + while (!atomic_read(&lmc->ecc_int) && timeout--) { + stop_machine(inject_ecc_fn, lmc, NULL); + + for (offs = 0; offs < PAGE_SIZE; offs += sizeof(tmp)) { + /* + * Do a load from the previously rigged location + * This should generate an error interrupt. + */ + memcpy(tmp, addr + offs, cline_size); + asm volatile("dsb ld\n"); + } + } + + __free_pages(lmc->mem, 0); + + return count; +} + +LMC_DEBUGFS_ENT(mask0); +LMC_DEBUGFS_ENT(mask2); +LMC_DEBUGFS_ENT(parity_test); + +DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL); +DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL); +DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read); + +struct debugfs_entry *lmc_dfs_ents[] = { + &debugfs_mask0, + &debugfs_mask2, + &debugfs_parity_test, + &debugfs_inject_ecc, + &debugfs_inject_int, + &debugfs_int_w1c, +}; + +static int thunderx_create_debugfs_nodes(struct dentry *parent, + struct debugfs_entry *attrs[], + void *data, + size_t num) +{ + int i; + struct dentry *ent; + + if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) + return 0; + + if (!parent) + return -ENOENT; + + for (i = 0; i < num; i++) { + ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode, + parent, data, &attrs[i]->fops); + + if (!ent) + break; + } + + return i; +} + +static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc) +{ + phys_addr_t addr = 0; + int bank, xbits; + + addr |= lmc->node << 40; + addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb; + addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb; + addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb; + addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb; + + bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb; + + if (lmc->xor_bank) + bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width); + + addr |= bank << lmc->bank_lsb; + + xbits = PCI_FUNC(lmc->pdev->devfn); + + if (lmc->l2c_alias) + xbits ^= get_bits(addr, 20, lmc->xbits) ^ + get_bits(addr, 12, lmc->xbits); + + addr |= xbits << 7; + + return addr; +} + +static unsigned int thunderx_get_num_lmcs(unsigned int node) +{ + unsigned int number = 0; + struct pci_dev *pdev = NULL; + + do { + pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_LMC, + pdev); + if (pdev) { +#ifdef CONFIG_NUMA + if (pdev->dev.numa_node == node) + number++; +#else + number++; +#endif + } + } while (pdev); + + return number; +} + +#define LMC_MESSAGE_SIZE 120 +#define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors)) + +static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct thunderx_lmc *lmc = mci->pvt_info; + + unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx)); + struct lmc_err_ctx *ctx = &lmc->err_ctx[head]; + + writeq(0, lmc->regs + LMC_CHAR_MASK0); + writeq(0, lmc->regs + LMC_CHAR_MASK2); + writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST); + + ctx->reg_int = readq(lmc->regs + LMC_INT); + ctx->reg_fadr = readq(lmc->regs + LMC_FADR); + ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR); + ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR); + ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND); + + lmc->ring_head++; + + atomic_set(&lmc->ecc_int, 1); + + /* Clear the interrupt */ + writeq(ctx->reg_int, lmc->regs + LMC_INT); + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct thunderx_lmc *lmc = mci->pvt_info; + phys_addr_t phys_addr; + + unsigned long tail; + struct lmc_err_ctx *ctx; + + irqreturn_t ret = IRQ_NONE; + + char *msg; + char *other; + + msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(lmc->ring_head, lmc->ring_tail, + ARRAY_SIZE(lmc->err_ctx))) { + tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx)); + + ctx = &lmc->err_ctx[tail]; + + dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n", + ctx->reg_int); + dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n", + ctx->reg_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n", + ctx->reg_nxm_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n", + ctx->reg_scram_fadr); + dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n", + ctx->reg_ecc_synd); + + snprintf(msg, LMC_MESSAGE_SIZE, + "DIMM %lld rank %lld bank %lld row %lld col %lld", + LMC_FADR_FDIMM(ctx->reg_scram_fadr), + LMC_FADR_FBUNK(ctx->reg_scram_fadr), + LMC_FADR_FBANK(ctx->reg_scram_fadr), + LMC_FADR_FROW(ctx->reg_scram_fadr), + LMC_FADR_FCOL(ctx->reg_scram_fadr)); + + decode_register(other, LMC_OTHER_SIZE, lmc_errors, + ctx->reg_int); + + phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc); + + if (ctx->reg_int & LMC_INT_UE) + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + phys_to_pfn(phys_addr), + offset_in_page(phys_addr), + 0, -1, -1, -1, msg, other); + else if (ctx->reg_int & LMC_INT_CE) + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + phys_to_pfn(phys_addr), + offset_in_page(phys_addr), + 0, -1, -1, -1, msg, other); + + lmc->ring_tail++; + } + + ret = IRQ_HANDLED; + +err_free: + kfree(msg); + kfree(other); + + return ret; +} + +#ifdef CONFIG_PM +static int thunderx_lmc_suspend(struct pci_dev *pdev, pm_message_t state) +{ + pci_save_state(pdev); + pci_disable_device(pdev); + + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + + return 0; +} + +static int thunderx_lmc_resume(struct pci_dev *pdev) +{ + pci_set_power_state(pdev, PCI_D0); + pci_enable_wake(pdev, PCI_D0, 0); + pci_restore_state(pdev); + + return 0; +} +#endif + +static const struct pci_device_id thunderx_lmc_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) }, + { 0, }, +}; + +static inline int pci_dev_to_mc_idx(struct pci_dev *pdev) +{ + int node = dev_to_node(&pdev->dev); + int ret = PCI_FUNC(pdev->devfn); + + ret += max(node, 0) << 3; + + return ret; +} + +static int thunderx_lmc_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_lmc *lmc; + struct edac_mc_layer layer; + struct mem_ctl_info *mci; + u64 lmc_control, lmc_ddr_pll_ctl, lmc_config; + int ret; + u64 lmc_int; + void *l2c_ioaddr; + + layer.type = EDAC_MC_LAYER_SLOT; + layer.size = 2; + layer.is_virt_csrow = false; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer, + sizeof(struct thunderx_lmc)); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + lmc = mci->pvt_info; + + pci_set_drvdata(pdev, mci); + + lmc->regs = pcim_iomap_table(pdev)[0]; + + lmc_control = readq(lmc->regs + LMC_CONTROL); + lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL); + lmc_config = readq(lmc->regs + LMC_CONFIG); + + if (lmc_control & LMC_CONTROL_RDIMM) { + mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, + lmc_ddr_pll_ctl) ? + MEM_RDDR4 : MEM_RDDR3; + } else { + mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, + lmc_ddr_pll_ctl) ? + MEM_DDR4 : MEM_DDR3; + } + + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + + mci->mod_name = "thunderx-lmc"; + mci->mod_ver = "1"; + mci->ctl_name = "thunderx-lmc"; + mci->dev_name = dev_name(&pdev->dev); + mci->scrub_mode = SCRUB_NONE; + + lmc->pdev = pdev; + lmc->msix_ent.entry = 0; + + lmc->ring_head = 0; + lmc->ring_tail = 0; + + ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector, + thunderx_lmc_err_isr, + thunderx_lmc_threaded_isr, 0, + "[EDAC] ThunderX LMC", mci); + if (ret) { + dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret); + goto err_free; + } + + lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0)); + + lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1; + lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) && + FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3; + + lmc->pbank_lsb = (lmc_config >> 5) & 0xf; + lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits; + lmc->rank_lsb = lmc->dimm_lsb; + lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0; + lmc->bank_lsb = 7 + lmc->xbits; + lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits; + + lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width; + + lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK; + + l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), + PAGE_SIZE); + + if (!l2c_ioaddr) { + dev_err(&pdev->dev, "Cannot map L2C_CTL\n"); + goto err_free; + } + + lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS); + + iounmap(l2c_ioaddr); + + ret = edac_mc_add_mc(mci); + if (ret) { + dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret); + goto err_free; + } + + lmc_int = readq(lmc->regs + LMC_INT); + writeq(lmc_int, lmc->regs + LMC_INT); + + writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S); + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + ret = thunderx_create_debugfs_nodes(mci->debugfs, + lmc_dfs_ents, + lmc, + ARRAY_SIZE(lmc_dfs_ents)); + + if (ret != ARRAY_SIZE(lmc_dfs_ents)) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + return 0; + +err_free: + pci_set_drvdata(pdev, NULL); + edac_mc_free(mci); + + return ret; +} + +static void thunderx_lmc_remove(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci = pci_get_drvdata(pdev); + struct thunderx_lmc *lmc = mci->pvt_info; + + writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); +} + +MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl); + +static struct pci_driver thunderx_lmc_driver = { + .name = "thunderx_lmc_edac", + .probe = thunderx_lmc_probe, + .remove = thunderx_lmc_remove, +#ifdef CONFIG_PM + .suspend = thunderx_lmc_suspend, + .resume = thunderx_lmc_resume, +#endif + .id_table = thunderx_lmc_pci_tbl, +}; + +/*---------------------- OCX driver ---------------------------------*/ + +#define PCI_DEVICE_ID_THUNDER_OCX 0xa013 + +#define OCX_LINK_INTS 3 +#define OCX_INTS (OCX_LINK_INTS + 1) +#define OCX_RX_LANES 24 +#define OCX_RX_LANE_STATS 15 + +#define OCX_COM_INT 0x100 +#define OCX_COM_INT_W1S 0x108 +#define OCX_COM_INT_ENA_W1S 0x110 +#define OCX_COM_INT_ENA_W1C 0x118 + +#define OCX_COM_IO_BADID BIT(54) +#define OCX_COM_MEM_BADID BIT(53) +#define OCX_COM_COPR_BADID BIT(52) +#define OCX_COM_WIN_REQ_BADID BIT(51) +#define OCX_COM_WIN_REQ_TOUT BIT(50) +#define OCX_COM_RX_LANE GENMASK(23, 0) + +#define OCX_COM_INT_CE (OCX_COM_IO_BADID | \ + OCX_COM_MEM_BADID | \ + OCX_COM_COPR_BADID | \ + OCX_COM_WIN_REQ_BADID | \ + OCX_COM_WIN_REQ_TOUT) + +static const struct error_descr ocx_com_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_COM_IO_BADID, + .descr = "Invalid IO transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_MEM_BADID, + .descr = "Invalid memory transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_COPR_BADID, + .descr = "Invalid coprocessor transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_WIN_REQ_BADID, + .descr = "Invalid SLI transaction node ID", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_WIN_REQ_TOUT, + .descr = "Window/core request timeout", + }, + {0, 0, NULL}, +}; + +#define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8) +#define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8) +#define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8) +#define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8) + +#define OCX_COM_LINK_BAD_WORD BIT(13) +#define OCX_COM_LINK_ALIGN_FAIL BIT(12) +#define OCX_COM_LINK_ALIGN_DONE BIT(11) +#define OCX_COM_LINK_UP BIT(10) +#define OCX_COM_LINK_STOP BIT(9) +#define OCX_COM_LINK_BLK_ERR BIT(8) +#define OCX_COM_LINK_REINIT BIT(7) +#define OCX_COM_LINK_LNK_DATA BIT(6) +#define OCX_COM_LINK_RXFIFO_DBE BIT(5) +#define OCX_COM_LINK_RXFIFO_SBE BIT(4) +#define OCX_COM_LINK_TXFIFO_DBE BIT(3) +#define OCX_COM_LINK_TXFIFO_SBE BIT(2) +#define OCX_COM_LINK_REPLAY_DBE BIT(1) +#define OCX_COM_LINK_REPLAY_SBE BIT(0) + +static const struct error_descr ocx_com_link_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_REPLAY_SBE, + .descr = "Replay buffer single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_TXFIFO_SBE, + .descr = "TX FIFO single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_RXFIFO_SBE, + .descr = "RX FIFO single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_BLK_ERR, + .descr = "Block code error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_ALIGN_FAIL, + .descr = "Link alignment failure", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_COM_LINK_BAD_WORD, + .descr = "Bad code word", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_REPLAY_DBE, + .descr = "Replay buffer double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_TXFIFO_DBE, + .descr = "TX FIFO double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_RXFIFO_DBE, + .descr = "RX FIFO double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = OCX_COM_LINK_STOP, + .descr = "Link stopped", + }, + {0, 0, NULL}, +}; + +#define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \ + OCX_COM_LINK_TXFIFO_DBE | \ + OCX_COM_LINK_RXFIFO_DBE | \ + OCX_COM_LINK_STOP) + +#define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \ + OCX_COM_LINK_TXFIFO_SBE | \ + OCX_COM_LINK_RXFIFO_SBE | \ + OCX_COM_LINK_BLK_ERR | \ + OCX_COM_LINK_ALIGN_FAIL | \ + OCX_COM_LINK_BAD_WORD) + +#define OCX_LNE_INT(x) (0x8018 + (x) * 0x100) +#define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100) +#define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100) +#define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100) +#define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8) + +#define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8) +#define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2) +#define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1) +#define OCX_LNE_CFG_RX_STAT_ENA BIT(0) + + +#define OCX_LANE_BAD_64B67B BIT(8) +#define OCX_LANE_DSKEW_FIFO_OVFL BIT(5) +#define OCX_LANE_SCRM_SYNC_LOSS BIT(4) +#define OCX_LANE_UKWN_CNTL_WORD BIT(3) +#define OCX_LANE_CRC32_ERR BIT(2) +#define OCX_LANE_BDRY_SYNC_LOSS BIT(1) +#define OCX_LANE_SERDES_LOCK_LOSS BIT(0) + +#define OCX_COM_LANE_INT_UE (0) +#define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \ + OCX_LANE_BDRY_SYNC_LOSS | \ + OCX_LANE_CRC32_ERR | \ + OCX_LANE_UKWN_CNTL_WORD | \ + OCX_LANE_SCRM_SYNC_LOSS | \ + OCX_LANE_DSKEW_FIFO_OVFL | \ + OCX_LANE_BAD_64B67B) + +static const struct error_descr ocx_lane_errors[] = { + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_SERDES_LOCK_LOSS, + .descr = "RX SerDes lock lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_BDRY_SYNC_LOSS, + .descr = "RX word boundary lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_CRC32_ERR, + .descr = "CRC32 error", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_UKWN_CNTL_WORD, + .descr = "Unknown control word", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_SCRM_SYNC_LOSS, + .descr = "Scrambler synchronization lost", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_DSKEW_FIFO_OVFL, + .descr = "RX deskew FIFO overflow", + }, + { + .type = ERR_CORRECTED, + .mask = OCX_LANE_BAD_64B67B, + .descr = "Bad 64B/67B codeword", + }, + {0, 0, NULL}, +}; + +#define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0)) +#define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0)) +#define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \ + GENMASK(9, 7) | GENMASK(5, 0)) + +#define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000) +#define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000) + +struct ocx_com_err_ctx { + u64 reg_com_int; + u64 reg_lane_int[OCX_RX_LANES]; + u64 reg_lane_stat11[OCX_RX_LANES]; +}; + +struct ocx_link_err_ctx { + u64 reg_com_link_int; + int link; +}; + +struct thunderx_ocx { + void __iomem *regs; + int com_link; + struct pci_dev *pdev; + struct edac_device_ctl_info *edac_dev; + + struct dentry *debugfs; + struct msix_entry msix_ent[OCX_INTS]; + + struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES]; + struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES]; + + unsigned long com_ring_head; + unsigned long com_ring_tail; + + unsigned long link_ring_head; + unsigned long link_ring_tail; +}; + +#define OCX_MESSAGE_SIZE SZ_1K +#define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors)) + +/* This handler is threaded */ +static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + + int lane; + unsigned long head = ring_pos(ocx->com_ring_head, + ARRAY_SIZE(ocx->com_err_ctx)); + struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head]; + + ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT); + + for (lane = 0; lane < OCX_RX_LANES; lane++) { + ctx->reg_lane_int[lane] = + readq(ocx->regs + OCX_LNE_INT(lane)); + ctx->reg_lane_stat11[lane] = + readq(ocx->regs + OCX_LNE_STAT(lane, 11)); + + writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane)); + } + + writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT); + + ocx->com_ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + + irqreturn_t ret = IRQ_NONE; + + unsigned long tail; + struct ocx_com_err_ctx *ctx; + int lane; + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail, + ARRAY_SIZE(ocx->com_err_ctx))) { + tail = ring_pos(ocx->com_ring_tail, + ARRAY_SIZE(ocx->com_err_ctx)); + ctx = &ocx->com_err_ctx[tail]; + + snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx", + ocx->edac_dev->ctl_name, ctx->reg_com_int); + + decode_register(other, OCX_OTHER_SIZE, + ocx_com_errors, ctx->reg_com_int); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + for (lane = 0; lane < OCX_RX_LANES; lane++) + if (ctx->reg_com_int & BIT(lane)) { + snprintf(other, OCX_OTHER_SIZE, + "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx", + lane, ctx->reg_lane_int[lane], + lane, ctx->reg_lane_stat11[lane]); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + decode_register(other, OCX_OTHER_SIZE, + ocx_lane_errors, + ctx->reg_lane_int[lane]); + strncat(msg, other, OCX_MESSAGE_SIZE); + } + + if (ctx->reg_com_int & OCX_COM_INT_CE) + edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); + + ocx->com_ring_tail++; + } + + ret = IRQ_HANDLED; + +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + unsigned long head = ring_pos(ocx->link_ring_head, + ARRAY_SIZE(ocx->link_err_ctx)); + struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head]; + + ctx->link = msix->entry; + ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link)); + + writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link)); + + ocx->link_ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, + msix_ent[msix->entry]); + irqreturn_t ret = IRQ_NONE; + unsigned long tail; + struct ocx_link_err_ctx *ctx; + + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail, + ARRAY_SIZE(ocx->link_err_ctx))) { + tail = ring_pos(ocx->link_ring_head, + ARRAY_SIZE(ocx->link_err_ctx)); + + ctx = &ocx->link_err_ctx[tail]; + + snprintf(msg, OCX_MESSAGE_SIZE, + "%s: OCX_COM_LINK_INT[%d]: %016llx", + ocx->edac_dev->ctl_name, + ctx->link, ctx->reg_com_link_int); + + decode_register(other, OCX_OTHER_SIZE, + ocx_com_link_errors, ctx->reg_com_link_int); + + strncat(msg, other, OCX_MESSAGE_SIZE); + + if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE) + edac_device_handle_ue(ocx->edac_dev, 0, 0, msg); + else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE) + edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); + + ocx->link_ring_tail++; + } + + ret = IRQ_HANDLED; +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +#define OCX_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(ocx, _name, _reg) + +OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0)); +OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1)); +OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2)); + +OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0)); +OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1)); +OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2)); + +OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0)); +OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1)); +OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2)); + +OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0)); +OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1)); +OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2)); +OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3)); +OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4)); +OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5)); +OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6)); +OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7)); + +OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8)); +OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9)); +OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10)); +OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11)); +OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12)); +OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13)); +OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14)); +OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15)); + +OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16)); +OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17)); +OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18)); +OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19)); +OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20)); +OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21)); +OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22)); +OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23)); + +OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S); + +struct debugfs_entry *ocx_dfs_ents[] = { + &debugfs_tlk0_ecc_ctl, + &debugfs_tlk1_ecc_ctl, + &debugfs_tlk2_ecc_ctl, + + &debugfs_rlk0_ecc_ctl, + &debugfs_rlk1_ecc_ctl, + &debugfs_rlk2_ecc_ctl, + + &debugfs_com_link0_int, + &debugfs_com_link1_int, + &debugfs_com_link2_int, + + &debugfs_lne00_badcnt, + &debugfs_lne01_badcnt, + &debugfs_lne02_badcnt, + &debugfs_lne03_badcnt, + &debugfs_lne04_badcnt, + &debugfs_lne05_badcnt, + &debugfs_lne06_badcnt, + &debugfs_lne07_badcnt, + &debugfs_lne08_badcnt, + &debugfs_lne09_badcnt, + &debugfs_lne10_badcnt, + &debugfs_lne11_badcnt, + &debugfs_lne12_badcnt, + &debugfs_lne13_badcnt, + &debugfs_lne14_badcnt, + &debugfs_lne15_badcnt, + &debugfs_lne16_badcnt, + &debugfs_lne17_badcnt, + &debugfs_lne18_badcnt, + &debugfs_lne19_badcnt, + &debugfs_lne20_badcnt, + &debugfs_lne21_badcnt, + &debugfs_lne22_badcnt, + &debugfs_lne23_badcnt, + + &debugfs_com_int, +}; + +static const struct pci_device_id thunderx_ocx_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) }, + { 0, }, +}; + +static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx) +{ + int lane, stat, cfg; + + for (lane = 0; lane < OCX_RX_LANES; lane++) { + cfg = readq(ocx->regs + OCX_LNE_CFG(lane)); + cfg |= OCX_LNE_CFG_RX_STAT_RDCLR; + cfg &= ~OCX_LNE_CFG_RX_STAT_ENA; + writeq(cfg, ocx->regs + OCX_LNE_CFG(lane)); + + for (stat = 0; stat < OCX_RX_LANE_STATS; stat++) + readq(ocx->regs + OCX_LNE_STAT(lane, stat)); + } +} + +static int thunderx_ocx_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_ocx *ocx; + struct edac_device_ctl_info *edac_dev; + char name[32]; + int idx; + int i; + int ret; + u64 reg; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + idx = edac_device_alloc_index(); + snprintf(name, sizeof(name), "OCX%d", idx); + edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx), + name, 1, "CCPI", 1, + 0, NULL, 0, idx); + if (!edac_dev) { + dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret); + return -ENOMEM; + } + ocx = edac_dev->pvt_info; + ocx->edac_dev = edac_dev; + ocx->com_ring_head = 0; + ocx->com_ring_tail = 0; + ocx->link_ring_head = 0; + ocx->link_ring_tail = 0; + + ocx->regs = pcim_iomap_table(pdev)[0]; + if (!ocx->regs) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + ret = -ENODEV; + goto err_free; + } + + ocx->pdev = pdev; + + for (i = 0; i < OCX_INTS; i++) { + ocx->msix_ent[i].entry = i; + ocx->msix_ent[i].vector = 0; + } + + ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + for (i = 0; i < OCX_INTS; i++) { + ret = devm_request_threaded_irq(&pdev->dev, + ocx->msix_ent[i].vector, + (i == 3) ? + thunderx_ocx_com_isr : + thunderx_ocx_lnk_isr, + (i == 3) ? + thunderx_ocx_com_threaded_isr : + thunderx_ocx_lnk_threaded_isr, + 0, "[EDAC] ThunderX OCX", + &ocx->msix_ent[i]); + if (ret) + goto err_free; + } + + edac_dev->dev = &pdev->dev; + edac_dev->dev_name = dev_name(&pdev->dev); + edac_dev->mod_name = "thunderx-ocx"; + edac_dev->ctl_name = "thunderx-ocx"; + + ret = edac_device_add_device(edac_dev); + if (ret) { + dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); + goto err_free; + } + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); + + ret = thunderx_create_debugfs_nodes(ocx->debugfs, + ocx_dfs_ents, + ocx, + ARRAY_SIZE(ocx_dfs_ents)); + if (ret != ARRAY_SIZE(ocx_dfs_ents)) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + pci_set_drvdata(pdev, edac_dev); + + thunderx_ocx_clearstats(ocx); + + for (i = 0; i < OCX_RX_LANES; i++) { + writeq(OCX_LNE_INT_ENA_ALL, + ocx->regs + OCX_LNE_INT_EN(i)); + + reg = readq(ocx->regs + OCX_LNE_INT(i)); + writeq(reg, ocx->regs + OCX_LNE_INT(i)); + + } + + for (i = 0; i < OCX_LINK_INTS; i++) { + reg = readq(ocx->regs + OCX_COM_LINKX_INT(i)); + writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i)); + + writeq(OCX_COM_LINKX_INT_ENA_ALL, + ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i)); + } + + reg = readq(ocx->regs + OCX_COM_INT); + writeq(reg, ocx->regs + OCX_COM_INT); + + writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S); + + return 0; +err_free: + edac_device_free_ctl_info(edac_dev); + + return ret; +} + +static void thunderx_ocx_remove(struct pci_dev *pdev) +{ + struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); + struct thunderx_ocx *ocx = edac_dev->pvt_info; + int i; + + writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C); + + for (i = 0; i < OCX_INTS; i++) { + writeq(OCX_COM_LINKX_INT_ENA_ALL, + ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i)); + } + + edac_debugfs_remove_recursive(ocx->debugfs); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(edac_dev); +} + +MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl); + +static struct pci_driver thunderx_ocx_driver = { + .name = "thunderx_ocx_edac", + .probe = thunderx_ocx_probe, + .remove = thunderx_ocx_remove, + .id_table = thunderx_ocx_pci_tbl, +}; + +/*---------------------- L2C driver ---------------------------------*/ + +#define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e +#define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f +#define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030 + +#define L2C_TAD_INT_W1C 0x40000 +#define L2C_TAD_INT_W1S 0x40008 + +#define L2C_TAD_INT_ENA_W1C 0x40020 +#define L2C_TAD_INT_ENA_W1S 0x40028 + + +#define L2C_TAD_INT_L2DDBE BIT(1) +#define L2C_TAD_INT_SBFSBE BIT(2) +#define L2C_TAD_INT_SBFDBE BIT(3) +#define L2C_TAD_INT_FBFSBE BIT(4) +#define L2C_TAD_INT_FBFDBE BIT(5) +#define L2C_TAD_INT_TAGDBE BIT(9) +#define L2C_TAD_INT_RDDISLMC BIT(15) +#define L2C_TAD_INT_WRDISLMC BIT(16) +#define L2C_TAD_INT_LFBTO BIT(17) +#define L2C_TAD_INT_GSYNCTO BIT(18) +#define L2C_TAD_INT_RTGSBE BIT(32) +#define L2C_TAD_INT_RTGDBE BIT(33) +#define L2C_TAD_INT_RDDISOCI BIT(34) +#define L2C_TAD_INT_WRDISOCI BIT(35) + +#define L2C_TAD_INT_ECC (L2C_TAD_INT_L2DDBE | \ + L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \ + L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE) + +#define L2C_TAD_INT_CE (L2C_TAD_INT_SBFSBE | \ + L2C_TAD_INT_FBFSBE) + +#define L2C_TAD_INT_UE (L2C_TAD_INT_L2DDBE | \ + L2C_TAD_INT_SBFDBE | \ + L2C_TAD_INT_FBFDBE | \ + L2C_TAD_INT_TAGDBE | \ + L2C_TAD_INT_RTGDBE | \ + L2C_TAD_INT_WRDISOCI | \ + L2C_TAD_INT_RDDISOCI | \ + L2C_TAD_INT_WRDISLMC | \ + L2C_TAD_INT_RDDISLMC | \ + L2C_TAD_INT_LFBTO | \ + L2C_TAD_INT_GSYNCTO) + +static const struct error_descr l2_tad_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_TAD_INT_SBFSBE, + .descr = "SBF single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = L2C_TAD_INT_FBFSBE, + .descr = "FBF single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_L2DDBE, + .descr = "L2D double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_SBFDBE, + .descr = "SBF double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_FBFDBE, + .descr = "FBF double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_TAGDBE, + .descr = "TAG double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RTGDBE, + .descr = "RTG double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_WRDISOCI, + .descr = "Write to a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RDDISOCI, + .descr = "Read from a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_WRDISLMC, + .descr = "Write to a disabled LMC", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_RDDISLMC, + .descr = "Read from a disabled LMC", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_LFBTO, + .descr = "LFB entry timeout", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_TAD_INT_GSYNCTO, + .descr = "Global sync CCPI timeout", + }, + {0, 0, NULL}, +}; + +#define L2C_TAD_INT_TAG (L2C_TAD_INT_TAGDBE) + +#define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE) + +#define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC) + +#define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI) + +#define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \ + L2C_TAD_INT_RTG | \ + L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \ + L2C_TAD_INT_LFBTO) + +#define L2C_TAD_TIMETWO 0x50000 +#define L2C_TAD_TIMEOUT 0x50100 +#define L2C_TAD_ERR 0x60000 +#define L2C_TAD_TQD_ERR 0x60100 +#define L2C_TAD_TTG_ERR 0x60200 + + +#define L2C_CBC_INT_W1C 0x60000 + +#define L2C_CBC_INT_RSDSBE BIT(0) +#define L2C_CBC_INT_RSDDBE BIT(1) + +#define L2C_CBC_INT_RSD (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE) + +#define L2C_CBC_INT_MIBSBE BIT(4) +#define L2C_CBC_INT_MIBDBE BIT(5) + +#define L2C_CBC_INT_MIB (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE) + +#define L2C_CBC_INT_IORDDISOCI BIT(6) +#define L2C_CBC_INT_IOWRDISOCI BIT(7) + +#define L2C_CBC_INT_IODISOCI (L2C_CBC_INT_IORDDISOCI | \ + L2C_CBC_INT_IOWRDISOCI) + +#define L2C_CBC_INT_CE (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE) +#define L2C_CBC_INT_UE (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE) + + +static const struct error_descr l2_cbc_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_CBC_INT_RSDSBE, + .descr = "RSD single-bit error", + }, + { + .type = ERR_CORRECTED, + .mask = L2C_CBC_INT_MIBSBE, + .descr = "MIB single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_RSDDBE, + .descr = "RSD double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_MIBDBE, + .descr = "MIB double-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_IORDDISOCI, + .descr = "Read from a disabled CCPI", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_CBC_INT_IOWRDISOCI, + .descr = "Write to a disabled CCPI", + }, + {0, 0, NULL}, +}; + +#define L2C_CBC_INT_W1S 0x60008 +#define L2C_CBC_INT_ENA_W1C 0x60020 + +#define L2C_CBC_INT_ENA_ALL (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \ + L2C_CBC_INT_IODISOCI) + +#define L2C_CBC_INT_ENA_W1S 0x60028 + +#define L2C_CBC_IODISOCIERR 0x80008 +#define L2C_CBC_IOCERR 0x80010 +#define L2C_CBC_RSDERR 0x80018 +#define L2C_CBC_MIBERR 0x80020 + + +#define L2C_MCI_INT_W1C 0x0 + +#define L2C_MCI_INT_VBFSBE BIT(0) +#define L2C_MCI_INT_VBFDBE BIT(1) + +static const struct error_descr l2_mci_errors[] = { + { + .type = ERR_CORRECTED, + .mask = L2C_MCI_INT_VBFSBE, + .descr = "VBF single-bit error", + }, + { + .type = ERR_UNCORRECTED, + .mask = L2C_MCI_INT_VBFDBE, + .descr = "VBF double-bit error", + }, + {0, 0, NULL}, +}; + +#define L2C_MCI_INT_W1S 0x8 +#define L2C_MCI_INT_ENA_W1C 0x20 + +#define L2C_MCI_INT_ENA_ALL (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE) + +#define L2C_MCI_INT_ENA_W1S 0x28 + +#define L2C_MCI_ERR 0x10000 + +#define L2C_MESSAGE_SIZE SZ_1K +#define L2C_OTHER_SIZE (50 * ARRAY_SIZE(l2_tad_errors)) + +struct l2c_err_ctx { + char *reg_ext_name; + u64 reg_int; + u64 reg_ext; +}; + +struct thunderx_l2c { + void __iomem *regs; + struct pci_dev *pdev; + struct edac_device_ctl_info *edac_dev; + + struct dentry *debugfs; + + int index; + + struct msix_entry msix_ent; + + struct l2c_err_ctx err_ctx[RING_ENTRIES]; + unsigned long ring_head; + unsigned long ring_tail; +}; + +static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx)); + struct l2c_err_ctx *ctx = &tad->err_ctx[head]; + + ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C); + + if (ctx->reg_int & L2C_TAD_INT_ECC) { + ctx->reg_ext_name = "TQD_ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR); + } else if (ctx->reg_int & L2C_TAD_INT_TAG) { + ctx->reg_ext_name = "TTG_ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR); + } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) { + ctx->reg_ext_name = "TIMEOUT"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT); + } else if (ctx->reg_int & L2C_TAD_INT_DISOCI) { + ctx->reg_ext_name = "ERR"; + ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR); + } + + writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C); + + tad->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx)); + struct l2c_err_ctx *ctx = &cbc->err_ctx[head]; + + ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C); + + if (ctx->reg_int & L2C_CBC_INT_RSD) { + ctx->reg_ext_name = "RSDERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR); + } else if (ctx->reg_int & L2C_CBC_INT_MIB) { + ctx->reg_ext_name = "MIBERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR); + } else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) { + ctx->reg_ext_name = "IODISOCIERR"; + ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR); + } + + writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C); + + cbc->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx)); + struct l2c_err_ctx *ctx = &mci->err_ctx[head]; + + ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C); + ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR); + + writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C); + + ctx->reg_ext_name = "ERR"; + + mci->ring_head++; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) +{ + struct msix_entry *msix = irq_id; + struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c, + msix_ent); + + unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx)); + struct l2c_err_ctx *ctx = &l2c->err_ctx[tail]; + irqreturn_t ret = IRQ_NONE; + + u64 mask_ue, mask_ce; + const struct error_descr *l2_errors; + char *reg_int_name; + + char *msg; + char *other; + + msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); + other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); + + if (!msg || !other) + goto err_free; + + switch (l2c->pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + reg_int_name = "L2C_TAD_INT"; + mask_ue = L2C_TAD_INT_UE; + mask_ce = L2C_TAD_INT_CE; + l2_errors = l2_tad_errors; + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + reg_int_name = "L2C_CBC_INT"; + mask_ue = L2C_CBC_INT_UE; + mask_ce = L2C_CBC_INT_CE; + l2_errors = l2_cbc_errors; + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + reg_int_name = "L2C_MCI_INT"; + mask_ue = L2C_MCI_INT_VBFDBE; + mask_ce = L2C_MCI_INT_VBFSBE; + l2_errors = l2_mci_errors; + break; + default: + dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n", + l2c->pdev->device); + return IRQ_NONE; + } + + while (CIRC_CNT(l2c->ring_head, l2c->ring_tail, + ARRAY_SIZE(l2c->err_ctx))) { + snprintf(msg, L2C_MESSAGE_SIZE, + "%s: %s: %016llx, %s: %016llx", + l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int, + ctx->reg_ext_name, ctx->reg_ext); + + decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int); + + strncat(msg, other, L2C_MESSAGE_SIZE); + + if (ctx->reg_int & mask_ue) + edac_device_handle_ue(l2c->edac_dev, 0, 0, msg); + else if (ctx->reg_int & mask_ce) + edac_device_handle_ce(l2c->edac_dev, 0, 0, msg); + + l2c->ring_tail++; + } + + return IRQ_HANDLED; + +err_free: + kfree(other); + kfree(msg); + + return ret; +} + +#define L2C_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(l2c, _name, _reg) + +L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S); + +struct debugfs_entry *l2c_tad_dfs_ents[] = { + &debugfs_tad_int, +}; + +L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S); + +struct debugfs_entry *l2c_cbc_dfs_ents[] = { + &debugfs_cbc_int, +}; + +L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S); + +struct debugfs_entry *l2c_mci_dfs_ents[] = { + &debugfs_mci_int, +}; + +static const struct pci_device_id thunderx_l2c_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), }, + { 0, }, +}; + +static int thunderx_l2c_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct thunderx_l2c *l2c; + struct edac_device_ctl_info *edac_dev; + struct debugfs_entry **l2c_devattr; + size_t dfs_entries; + irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL; + char name[32]; + const char *fmt; + u64 reg_en_offs, reg_en_mask; + int idx; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c"); + if (ret) { + dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); + return ret; + } + + switch (pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + thunderx_l2c_isr = thunderx_l2c_tad_isr; + l2c_devattr = l2c_tad_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents); + fmt = "L2C-TAD%d"; + reg_en_offs = L2C_TAD_INT_ENA_W1S; + reg_en_mask = L2C_TAD_INT_ENA_ALL; + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + thunderx_l2c_isr = thunderx_l2c_cbc_isr; + l2c_devattr = l2c_cbc_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents); + fmt = "L2C-CBC%d"; + reg_en_offs = L2C_CBC_INT_ENA_W1S; + reg_en_mask = L2C_CBC_INT_ENA_ALL; + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + thunderx_l2c_isr = thunderx_l2c_mci_isr; + l2c_devattr = l2c_mci_dfs_ents; + dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents); + fmt = "L2C-MCI%d"; + reg_en_offs = L2C_MCI_INT_ENA_W1S; + reg_en_mask = L2C_MCI_INT_ENA_ALL; + break; + default: + //Should never ever get here + dev_err(&pdev->dev, "Unsupported PCI device: %04x\n", + pdev->device); + return -EINVAL; + } + + idx = edac_device_alloc_index(); + snprintf(name, sizeof(name), fmt, idx); + + edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c), + name, 1, "L2C", 1, 0, + NULL, 0, idx); + if (!edac_dev) { + dev_err(&pdev->dev, "Cannot allocate EDAC device\n"); + return -ENOMEM; + } + + l2c = edac_dev->pvt_info; + l2c->edac_dev = edac_dev; + + l2c->regs = pcim_iomap_table(pdev)[0]; + if (!l2c->regs) { + dev_err(&pdev->dev, "Cannot map PCI resources\n"); + ret = -ENODEV; + goto err_free; + } + + l2c->pdev = pdev; + + l2c->ring_head = 0; + l2c->ring_tail = 0; + + l2c->msix_ent.entry = 0; + l2c->msix_ent.vector = 0; + + ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1); + if (ret) { + dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); + goto err_free; + } + + ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector, + thunderx_l2c_isr, + thunderx_l2c_threaded_isr, + 0, "[EDAC] ThunderX L2C", + &l2c->msix_ent); + if (ret) + goto err_free; + + edac_dev->dev = &pdev->dev; + edac_dev->dev_name = dev_name(&pdev->dev); + edac_dev->mod_name = "thunderx-l2c"; + edac_dev->ctl_name = "thunderx-l2c"; + + ret = edac_device_add_device(edac_dev); + if (ret) { + dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); + goto err_free; + } + + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); + + thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr, + l2c, dfs_entries); + + if (ret != dfs_entries) { + dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", + ret, ret >= 0 ? " created" : ""); + } + } + + pci_set_drvdata(pdev, edac_dev); + + writeq(reg_en_mask, l2c->regs + reg_en_offs); + + return 0; + +err_free: + edac_device_free_ctl_info(edac_dev); + + return ret; +} + +static void thunderx_l2c_remove(struct pci_dev *pdev) +{ + struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); + struct thunderx_l2c *l2c = edac_dev->pvt_info; + + switch (pdev->device) { + case PCI_DEVICE_ID_THUNDER_L2C_TAD: + writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C); + break; + case PCI_DEVICE_ID_THUNDER_L2C_CBC: + writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C); + break; + case PCI_DEVICE_ID_THUNDER_L2C_MCI: + writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C); + break; + } + + edac_debugfs_remove_recursive(l2c->debugfs); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(edac_dev); +} + +MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl); + +static struct pci_driver thunderx_l2c_driver = { + .name = "thunderx_l2c_edac", + .probe = thunderx_l2c_probe, + .remove = thunderx_l2c_remove, + .id_table = thunderx_l2c_pci_tbl, +}; + +static int __init thunderx_edac_init(void) +{ + int rc = 0; + + rc = pci_register_driver(&thunderx_lmc_driver); + if (rc) + return rc; + + rc = pci_register_driver(&thunderx_ocx_driver); + if (rc) + goto err_lmc; + + rc = pci_register_driver(&thunderx_l2c_driver); + if (rc) + goto err_ocx; + + return rc; +err_ocx: + pci_unregister_driver(&thunderx_ocx_driver); +err_lmc: + pci_unregister_driver(&thunderx_lmc_driver); + + return rc; +} + +static void __exit thunderx_edac_exit(void) +{ + pci_unregister_driver(&thunderx_l2c_driver); + pci_unregister_driver(&thunderx_ocx_driver); + pci_unregister_driver(&thunderx_lmc_driver); + +} + +module_init(thunderx_edac_init); +module_exit(thunderx_edac_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Cavium, Inc."); +MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX"); diff --git a/drivers/extcon/devres.c b/drivers/extcon/devres.c index b40eb1805927..186fd735eb28 100644 --- a/drivers/extcon/devres.c +++ b/drivers/extcon/devres.c @@ -50,6 +50,13 @@ static void devm_extcon_dev_notifier_unreg(struct device *dev, void *res) extcon_unregister_notifier(this->edev, this->id, this->nb); } +static void devm_extcon_dev_notifier_all_unreg(struct device *dev, void *res) +{ + struct extcon_dev_notifier_devres *this = res; + + extcon_unregister_notifier_all(this->edev, this->nb); +} + /** * devm_extcon_dev_allocate - Allocate managed extcon device * @dev: device owning the extcon device being created @@ -214,3 +221,57 @@ void devm_extcon_unregister_notifier(struct device *dev, devm_extcon_dev_match, edev)); } EXPORT_SYMBOL(devm_extcon_unregister_notifier); + +/** + * devm_extcon_register_notifier_all() + * - Resource-managed extcon_register_notifier_all() + * @dev: device to allocate extcon device + * @edev: the extcon device that has the external connecotr. + * @nb: a notifier block to be registered. + * + * This function manages automatically the notifier of extcon device using + * device resource management and simplify the control of unregistering + * the notifier of extcon device. To get more information, refer that function. + * + * Returns 0 if success or negaive error number if failure. + */ +int devm_extcon_register_notifier_all(struct device *dev, struct extcon_dev *edev, + struct notifier_block *nb) +{ + struct extcon_dev_notifier_devres *ptr; + int ret; + + ptr = devres_alloc(devm_extcon_dev_notifier_all_unreg, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + ret = extcon_register_notifier_all(edev, nb); + if (ret) { + devres_free(ptr); + return ret; + } + + ptr->edev = edev; + ptr->nb = nb; + devres_add(dev, ptr); + + return 0; +} +EXPORT_SYMBOL(devm_extcon_register_notifier_all); + +/** + * devm_extcon_unregister_notifier_all() + * - Resource-managed extcon_unregister_notifier_all() + * @dev: device to allocate extcon device + * @edev: the extcon device that has the external connecotr. + * @nb: a notifier block to be registered. + */ +void devm_extcon_unregister_notifier_all(struct device *dev, + struct extcon_dev *edev, + struct notifier_block *nb) +{ + WARN_ON(devres_release(dev, devm_extcon_dev_notifier_all_unreg, + devm_extcon_dev_match, edev)); +} +EXPORT_SYMBOL(devm_extcon_unregister_notifier_all); diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index 09ac5e70c2f3..e7750545469f 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -448,8 +448,19 @@ int extcon_sync(struct extcon_dev *edev, unsigned int id) spin_lock_irqsave(&edev->lock, flags); state = !!(edev->state & BIT(index)); + + /* + * Call functions in a raw notifier chain for the specific one + * external connector. + */ raw_notifier_call_chain(&edev->nh[index], state, edev); + /* + * Call functions in a raw notifier chain for the all supported + * external connectors. + */ + raw_notifier_call_chain(&edev->nh_all, state, edev); + /* This could be in interrupt handler */ prop_buf = (char *)get_zeroed_page(GFP_ATOMIC); if (!prop_buf) { @@ -954,6 +965,59 @@ int extcon_unregister_notifier(struct extcon_dev *edev, unsigned int id, } EXPORT_SYMBOL_GPL(extcon_unregister_notifier); +/** + * extcon_register_notifier_all() - Register a notifier block for all connectors + * @edev: the extcon device that has the external connecotr. + * @nb: a notifier block to be registered. + * + * This fucntion registers a notifier block in order to receive the state + * change of all supported external connectors from extcon device. + * And The second parameter given to the callback of nb (val) is + * the current state and third parameter is the edev pointer. + * + * Returns 0 if success or error number if fail + */ +int extcon_register_notifier_all(struct extcon_dev *edev, + struct notifier_block *nb) +{ + unsigned long flags; + int ret; + + if (!edev || !nb) + return -EINVAL; + + spin_lock_irqsave(&edev->lock, flags); + ret = raw_notifier_chain_register(&edev->nh_all, nb); + spin_unlock_irqrestore(&edev->lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(extcon_register_notifier_all); + +/** + * extcon_unregister_notifier_all() - Unregister a notifier block from extcon. + * @edev: the extcon device that has the external connecotr. + * @nb: a notifier block to be registered. + * + * Returns 0 if success or error number if fail + */ +int extcon_unregister_notifier_all(struct extcon_dev *edev, + struct notifier_block *nb) +{ + unsigned long flags; + int ret; + + if (!edev || !nb) + return -EINVAL; + + spin_lock_irqsave(&edev->lock, flags); + ret = raw_notifier_chain_unregister(&edev->nh_all, nb); + spin_unlock_irqrestore(&edev->lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(extcon_unregister_notifier_all); + static struct attribute *extcon_attrs[] = { &dev_attr_state.attr, &dev_attr_name.attr, @@ -1212,6 +1276,8 @@ int extcon_dev_register(struct extcon_dev *edev) for (index = 0; index < edev->max_supported; index++) RAW_INIT_NOTIFIER_HEAD(&edev->nh[index]); + RAW_INIT_NOTIFIER_HEAD(&edev->nh_all); + dev_set_drvdata(&edev->dev, edev); edev->state = 0; diff --git a/drivers/extcon/extcon.h b/drivers/extcon/extcon.h index 993ddccafe11..dddddcfa0587 100644 --- a/drivers/extcon/extcon.h +++ b/drivers/extcon/extcon.h @@ -21,6 +21,8 @@ * @dev: Device of this extcon. * @state: Attach/detach state of this extcon. Do not provide at * register-time. + * @nh_all: Notifier for the state change events for all supported + * external connectors from this extcon. * @nh: Notifier for the state change events from this extcon * @entry: To support list of extcon devices so that users can * search for extcon devices based on the extcon name. @@ -43,6 +45,7 @@ struct extcon_dev { /* Internal data. Please do not set. */ struct device dev; + struct raw_notifier_head nh_all; struct raw_notifier_head *nh; struct list_head entry; int max_supported; diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index ad67342313ed..0329d319d89a 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -9,6 +9,7 @@ # KASAN_SANITIZE_runtime-wrappers.o := n +obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o obj-$(CONFIG_EFI) += efi.o vars.o reboot.o memattr.o obj-$(CONFIG_EFI) += capsule.o memmap.o obj-$(CONFIG_EFI_VARS) += efivars.o diff --git a/drivers/firmware/efi/efi-bgrt.c b/drivers/firmware/efi/efi-bgrt.c new file mode 100644 index 000000000000..04ca8764f0c0 --- /dev/null +++ b/drivers/firmware/efi/efi-bgrt.c @@ -0,0 +1,84 @@ +/* + * Copyright 2012 Intel Corporation + * Author: Josh Triplett <josh@joshtriplett.org> + * + * Based on the bgrt driver: + * Copyright 2012 Red Hat, Inc <mjg@redhat.com> + * Author: Matthew Garrett + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/acpi.h> +#include <linux/efi.h> +#include <linux/efi-bgrt.h> + +struct acpi_table_bgrt bgrt_tab; +size_t __initdata bgrt_image_size; + +struct bmp_header { + u16 id; + u32 size; +} __packed; + +void __init efi_bgrt_init(struct acpi_table_header *table) +{ + void *image; + struct bmp_header bmp_header; + struct acpi_table_bgrt *bgrt = &bgrt_tab; + + if (acpi_disabled) + return; + + if (table->length < sizeof(bgrt_tab)) { + pr_notice("Ignoring BGRT: invalid length %u (expected %zu)\n", + table->length, sizeof(bgrt_tab)); + return; + } + *bgrt = *(struct acpi_table_bgrt *)table; + if (bgrt->version != 1) { + pr_notice("Ignoring BGRT: invalid version %u (expected 1)\n", + bgrt->version); + goto out; + } + if (bgrt->status & 0xfe) { + pr_notice("Ignoring BGRT: reserved status bits are non-zero %u\n", + bgrt->status); + goto out; + } + if (bgrt->image_type != 0) { + pr_notice("Ignoring BGRT: invalid image type %u (expected 0)\n", + bgrt->image_type); + goto out; + } + if (!bgrt->image_address) { + pr_notice("Ignoring BGRT: null image address\n"); + goto out; + } + + image = early_memremap(bgrt->image_address, sizeof(bmp_header)); + if (!image) { + pr_notice("Ignoring BGRT: failed to map image header memory\n"); + goto out; + } + + memcpy(&bmp_header, image, sizeof(bmp_header)); + early_memunmap(image, sizeof(bmp_header)); + if (bmp_header.id != 0x4d42) { + pr_notice("Ignoring BGRT: Incorrect BMP magic number 0x%x (expected 0x4d42)\n", + bmp_header.id); + goto out; + } + bgrt_image_size = bmp_header.size; + efi_mem_reserve(bgrt->image_address, bgrt_image_size); + + return; +out: + memset(bgrt, 0, sizeof(bgrt_tab)); +} diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index f402ba2eed46..6b5acefce6b3 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -274,9 +274,9 @@ static int efi_pstore_write(enum pstore_type_id type, for (i = 0; i < DUMP_NAME_LEN; i++) efi_name[i] = name[i]; - efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES, - !pstore_cannot_block_path(reason), - size, psi->buf); + ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES, + !pstore_cannot_block_path(reason), + size, psi->buf); if (reason == KMSG_DUMP_OOPS) efivar_run_worker(); diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index d4056c6be1ec..8181ac179d14 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -18,7 +18,27 @@ #include "efistub.h" -bool __nokaslr; +/* + * This is the base address at which to start allocating virtual memory ranges + * for UEFI Runtime Services. This is in the low TTBR0 range so that we can use + * any allocation we choose, and eliminate the risk of a conflict after kexec. + * The value chosen is the largest non-zero power of 2 suitable for this purpose + * both on 32-bit and 64-bit ARM CPUs, to maximize the likelihood that it can + * be mapped efficiently. + * Since 32-bit ARM could potentially execute with a 1G/3G user/kernel split, + * map everything below 1 GB. (512 MB is a reasonable upper bound for the + * entire footprint of the UEFI runtime services memory regions) + */ +#define EFI_RT_VIRTUAL_BASE SZ_512M +#define EFI_RT_VIRTUAL_SIZE SZ_512M + +#ifdef CONFIG_ARM64 +# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_64 +#else +# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE +#endif + +static u64 virtmap_base = EFI_RT_VIRTUAL_BASE; efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, void **__fh) @@ -118,8 +138,6 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) goto fail; - pr_efi(sys_table, "Booting Linux Kernel...\n"); - status = check_platform_features(sys_table); if (status != EFI_SUCCESS) goto fail; @@ -153,17 +171,15 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, goto fail; } - /* check whether 'nokaslr' was passed on the command line */ - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { - static const u8 default_cmdline[] = CONFIG_CMDLINE; - const u8 *str, *cmdline = cmdline_ptr; + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || + IS_ENABLED(CONFIG_CMDLINE_FORCE) || + cmdline_size == 0) + efi_parse_options(CONFIG_CMDLINE); - if (IS_ENABLED(CONFIG_CMDLINE_FORCE)) - cmdline = default_cmdline; - str = strstr(cmdline, "nokaslr"); - if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) - __nokaslr = true; - } + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && cmdline_size > 0) + efi_parse_options(cmdline_ptr); + + pr_efi(sys_table, "Booting Linux Kernel...\n"); si = setup_graphics(sys_table); @@ -176,10 +192,6 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, goto fail_free_cmdline; } - status = efi_parse_options(cmdline_ptr); - if (status != EFI_SUCCESS) - pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n"); - secure_boot = efi_get_secureboot(sys_table); /* @@ -213,8 +225,9 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, if (!fdt_addr) pr_efi(sys_table, "Generating empty DTB\n"); - status = handle_cmdline_files(sys_table, image, cmdline_ptr, - "initrd=", dram_base + SZ_512M, + status = handle_cmdline_files(sys_table, image, cmdline_ptr, "initrd=", + efi_get_max_initrd_addr(dram_base, + *image_addr), (unsigned long *)&initrd_addr, (unsigned long *)&initrd_size); if (status != EFI_SUCCESS) @@ -222,9 +235,29 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, efi_random_get_seed(sys_table); + if (!nokaslr()) { + /* + * Randomize the base of the UEFI runtime services region. + * Preserve the 2 MB alignment of the region by taking a + * shift of 21 bit positions into account when scaling + * the headroom value using a 32-bit random value. + */ + static const u64 headroom = EFI_RT_VIRTUAL_LIMIT - + EFI_RT_VIRTUAL_BASE - + EFI_RT_VIRTUAL_SIZE; + u32 rnd; + + status = efi_get_random_bytes(sys_table, sizeof(rnd), + (u8 *)&rnd); + if (status == EFI_SUCCESS) { + virtmap_base = EFI_RT_VIRTUAL_BASE + + (((headroom >> 21) * rnd) >> (32 - 21)); + } + } + new_fdt_addr = fdt_addr; status = allocate_new_fdt_and_exit_boot(sys_table, handle, - &new_fdt_addr, dram_base + MAX_FDT_OFFSET, + &new_fdt_addr, efi_get_max_fdt_addr(dram_base), initrd_addr, initrd_size, cmdline_ptr, fdt_addr, fdt_size); @@ -251,18 +284,6 @@ fail: return EFI_ERROR; } -/* - * This is the base address at which to start allocating virtual memory ranges - * for UEFI Runtime Services. This is in the low TTBR0 range so that we can use - * any allocation we choose, and eliminate the risk of a conflict after kexec. - * The value chosen is the largest non-zero power of 2 suitable for this purpose - * both on 32-bit and 64-bit ARM CPUs, to maximize the likelihood that it can - * be mapped efficiently. - * Since 32-bit ARM could potentially execute with a 1G/3G user/kernel split, - * map everything below 1 GB. - */ -#define EFI_RT_VIRTUAL_BASE SZ_512M - static int cmp_mem_desc(const void *l, const void *r) { const efi_memory_desc_t *left = l, *right = r; @@ -312,7 +333,7 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, unsigned long desc_size, efi_memory_desc_t *runtime_map, int *count) { - u64 efi_virt_base = EFI_RT_VIRTUAL_BASE; + u64 efi_virt_base = virtmap_base; efi_memory_desc_t *in, *prev = NULL, *out = runtime_map; int l; diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index e1f0b28e1dcb..becbda445913 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -9,6 +9,8 @@ #include <linux/efi.h> #include <asm/efi.h> +#include "efistub.h" + efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) { int block; @@ -63,6 +65,132 @@ void free_screen_info(efi_system_table_t *sys_table_arg, struct screen_info *si) efi_call_early(free_pool, si); } +static efi_status_t reserve_kernel_base(efi_system_table_t *sys_table_arg, + unsigned long dram_base, + unsigned long *reserve_addr, + unsigned long *reserve_size) +{ + efi_physical_addr_t alloc_addr; + efi_memory_desc_t *memory_map; + unsigned long nr_pages, map_size, desc_size, buff_size; + efi_status_t status; + unsigned long l; + + struct efi_boot_memmap map = { + .map = &memory_map, + .map_size = &map_size, + .desc_size = &desc_size, + .desc_ver = NULL, + .key_ptr = NULL, + .buff_size = &buff_size, + }; + + /* + * Reserve memory for the uncompressed kernel image. This is + * all that prevents any future allocations from conflicting + * with the kernel. Since we can't tell from the compressed + * image how much DRAM the kernel actually uses (due to BSS + * size uncertainty) we allocate the maximum possible size. + * Do this very early, as prints can cause memory allocations + * that may conflict with this. + */ + alloc_addr = dram_base + MAX_UNCOMP_KERNEL_SIZE; + nr_pages = MAX_UNCOMP_KERNEL_SIZE / EFI_PAGE_SIZE; + status = efi_call_early(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, + EFI_BOOT_SERVICES_DATA, nr_pages, &alloc_addr); + if (status == EFI_SUCCESS) { + if (alloc_addr == dram_base) { + *reserve_addr = alloc_addr; + *reserve_size = MAX_UNCOMP_KERNEL_SIZE; + return EFI_SUCCESS; + } + /* + * If we end up here, the allocation succeeded but starts below + * dram_base. This can only occur if the real base of DRAM is + * not a multiple of 128 MB, in which case dram_base will have + * been rounded up. Since this implies that a part of the region + * was already occupied, we need to fall through to the code + * below to ensure that the existing allocations don't conflict. + * For this reason, we use EFI_BOOT_SERVICES_DATA above and not + * EFI_LOADER_DATA, which we wouldn't able to distinguish from + * allocations that we want to disallow. + */ + } + + /* + * If the allocation above failed, we may still be able to proceed: + * if the only allocations in the region are of types that will be + * released to the OS after ExitBootServices(), the decompressor can + * safely overwrite them. + */ + status = efi_get_memory_map(sys_table_arg, &map); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table_arg, + "reserve_kernel_base(): Unable to retrieve memory map.\n"); + return status; + } + + for (l = 0; l < map_size; l += desc_size) { + efi_memory_desc_t *desc; + u64 start, end; + + desc = (void *)memory_map + l; + start = desc->phys_addr; + end = start + desc->num_pages * EFI_PAGE_SIZE; + + /* Skip if entry does not intersect with region */ + if (start >= dram_base + MAX_UNCOMP_KERNEL_SIZE || + end <= dram_base) + continue; + + switch (desc->type) { + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + /* Ignore types that are released to the OS anyway */ + continue; + + case EFI_CONVENTIONAL_MEMORY: + /* + * Reserve the intersection between this entry and the + * region. + */ + start = max(start, (u64)dram_base); + end = min(end, (u64)dram_base + MAX_UNCOMP_KERNEL_SIZE); + + status = efi_call_early(allocate_pages, + EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, + (end - start) / EFI_PAGE_SIZE, + &start); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table_arg, + "reserve_kernel_base(): alloc failed.\n"); + goto out; + } + break; + + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + /* + * These regions may be released and reallocated for + * another purpose (including EFI_RUNTIME_SERVICE_DATA) + * at any time during the execution of the OS loader, + * so we cannot consider them as safe. + */ + default: + /* + * Treat any other allocation in the region as unsafe */ + status = EFI_OUT_OF_RESOURCES; + goto out; + } + } + + status = EFI_SUCCESS; +out: + efi_call_early(free_pool, memory_map); + return status; +} + efi_status_t handle_kernel_image(efi_system_table_t *sys_table, unsigned long *image_addr, unsigned long *image_size, @@ -71,10 +199,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, unsigned long dram_base, efi_loaded_image_t *image) { - unsigned long nr_pages; efi_status_t status; - /* Use alloc_addr to tranlsate between types */ - efi_physical_addr_t alloc_addr; /* * Verify that the DRAM base address is compatible with the ARM @@ -85,27 +210,12 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, */ dram_base = round_up(dram_base, SZ_128M); - /* - * Reserve memory for the uncompressed kernel image. This is - * all that prevents any future allocations from conflicting - * with the kernel. Since we can't tell from the compressed - * image how much DRAM the kernel actually uses (due to BSS - * size uncertainty) we allocate the maximum possible size. - * Do this very early, as prints can cause memory allocations - * that may conflict with this. - */ - alloc_addr = dram_base; - *reserve_size = MAX_UNCOMP_KERNEL_SIZE; - nr_pages = round_up(*reserve_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; - status = sys_table->boottime->allocate_pages(EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, - nr_pages, &alloc_addr); + status = reserve_kernel_base(sys_table, dram_base, reserve_addr, + reserve_size); if (status != EFI_SUCCESS) { - *reserve_size = 0; pr_efi_err(sys_table, "Unable to allocate memory for uncompressed kernel.\n"); return status; } - *reserve_addr = alloc_addr; /* * Relocate the zImage, so that it appears in the lowest 128 MB diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index eae693eb3e91..b4c2589d7c91 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -16,8 +16,6 @@ #include "efistub.h" -extern bool __nokaslr; - efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) { u64 tg; @@ -52,7 +50,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, u64 phys_seed = 0; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { - if (!__nokaslr) { + if (!nokaslr()) { status = efi_get_random_bytes(sys_table_arg, sizeof(phys_seed), (u8 *)&phys_seed); diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 919822b7773d..b0184360efc6 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -32,6 +32,18 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE; +static int __section(.data) __nokaslr; +static int __section(.data) __quiet; + +int __pure nokaslr(void) +{ + return __nokaslr; +} +int __pure is_quiet(void) +{ + return __quiet; +} + #define EFI_MMAP_NR_SLACK_SLOTS 8 struct file_info { @@ -409,17 +421,17 @@ static efi_status_t efi_file_close(void *handle) * environments, first in the early boot environment of the EFI boot * stub, and subsequently during the kernel boot. */ -efi_status_t efi_parse_options(char *cmdline) +efi_status_t efi_parse_options(char const *cmdline) { char *str; - /* - * Currently, the only efi= option we look for is 'nochunk', which - * is intended to work around known issues on certain x86 UEFI - * versions. So ignore for now on other architectures. - */ - if (!IS_ENABLED(CONFIG_X86)) - return EFI_SUCCESS; + str = strstr(cmdline, "nokaslr"); + if (str == cmdline || (str && str > cmdline && *(str - 1) == ' ')) + __nokaslr = 1; + + str = strstr(cmdline, "quiet"); + if (str == cmdline || (str && str > cmdline && *(str - 1) == ' ')) + __quiet = 1; /* * If no EFI parameters were specified on the cmdline we've got @@ -436,14 +448,14 @@ efi_status_t efi_parse_options(char *cmdline) * Remember, because efi= is also used by the kernel we need to * skip over arguments we don't understand. */ - while (*str) { + while (*str && *str != ' ') { if (!strncmp(str, "nochunk", 7)) { str += strlen("nochunk"); __chunk_size = -1UL; } /* Group words together, delimited by "," */ - while (*str && *str != ',') + while (*str && *str != ' ' && *str != ',') str++; if (*str == ',') diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 71c4d0e3c4ed..83f268c05007 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -24,6 +24,15 @@ #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE #endif +extern int __pure nokaslr(void); +extern int __pure is_quiet(void); + +#define pr_efi(sys_table, msg) do { \ + if (!is_quiet()) efi_printk(sys_table, "EFI stub: "msg); \ +} while (0) + +#define pr_efi_err(sys_table, msg) efi_printk(sys_table, "EFI stub: ERROR: "msg) + void efi_char16_printk(efi_system_table_t *, efi_char16_t *); efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 260c4b4b492e..41f457be64e8 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -206,6 +206,10 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, return update_fdt_memmap(p->new_fdt_addr, map); } +#ifndef MAX_FDT_SIZE +#define MAX_FDT_SIZE SZ_2M +#endif + /* * Allocate memory for a new FDT, then add EFI, commandline, and * initrd related fields to the FDT. This routine increases the @@ -233,7 +237,6 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, u32 desc_ver; unsigned long mmap_key; efi_memory_desc_t *memory_map, *runtime_map; - unsigned long new_fdt_size; efi_status_t status; int runtime_entry_count = 0; struct efi_boot_memmap map; @@ -262,41 +265,29 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, "Exiting boot services and installing virtual address map...\n"); map.map = &memory_map; + status = efi_high_alloc(sys_table, MAX_FDT_SIZE, EFI_FDT_ALIGN, + new_fdt_addr, max_addr); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, + "Unable to allocate memory for new device tree.\n"); + goto fail; + } + /* - * Estimate size of new FDT, and allocate memory for it. We - * will allocate a bigger buffer if this ends up being too - * small, so a rough guess is OK here. + * Now that we have done our final memory allocation (and free) + * we can get the memory map key needed for exit_boot_services(). */ - new_fdt_size = fdt_size + EFI_PAGE_SIZE; - while (1) { - status = efi_high_alloc(sys_table, new_fdt_size, EFI_FDT_ALIGN, - new_fdt_addr, max_addr); - if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Unable to allocate memory for new device tree.\n"); - goto fail; - } - - status = update_fdt(sys_table, - (void *)fdt_addr, fdt_size, - (void *)*new_fdt_addr, new_fdt_size, - cmdline_ptr, initrd_addr, initrd_size); + status = efi_get_memory_map(sys_table, &map); + if (status != EFI_SUCCESS) + goto fail_free_new_fdt; - /* Succeeding the first time is the expected case. */ - if (status == EFI_SUCCESS) - break; + status = update_fdt(sys_table, (void *)fdt_addr, fdt_size, + (void *)*new_fdt_addr, MAX_FDT_SIZE, cmdline_ptr, + initrd_addr, initrd_size); - if (status == EFI_BUFFER_TOO_SMALL) { - /* - * We need to allocate more space for the new - * device tree, so free existing buffer that is - * too small. - */ - efi_free(sys_table, new_fdt_size, *new_fdt_addr); - new_fdt_size += EFI_PAGE_SIZE; - } else { - pr_efi_err(sys_table, "Unable to construct new device tree.\n"); - goto fail_free_new_fdt; - } + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Unable to construct new device tree.\n"); + goto fail_free_new_fdt; } priv.runtime_map = runtime_map; @@ -340,7 +331,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, pr_efi_err(sys_table, "Exit boot services failed.\n"); fail_free_new_fdt: - efi_free(sys_table, new_fdt_size, *new_fdt_addr); + efi_free(sys_table, MAX_FDT_SIZE, *new_fdt_addr); fail: sys_table->boottime->free_pool(runtime_map); diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index 932742e4cf23..24c461dea7af 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -149,7 +149,8 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, status = __gop_query32(sys_table_arg, gop32, &info, &size, ¤t_fb_base); - if (status == EFI_SUCCESS && (!first_gop || conout_found)) { + if (status == EFI_SUCCESS && (!first_gop || conout_found) && + info->pixel_format != PIXEL_BLT_ONLY) { /* * Systems that use the UEFI Console Splitter may * provide multiple GOP devices, not all of which are @@ -266,7 +267,8 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, status = __gop_query64(sys_table_arg, gop64, &info, &size, ¤t_fb_base); - if (status == EFI_SUCCESS && (!first_gop || conout_found)) { + if (status == EFI_SUCCESS && (!first_gop || conout_found) && + info->pixel_format != PIXEL_BLT_ONLY) { /* * Systems that use the UEFI Console Splitter may * provide multiple GOP devices, not all of which are diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index 5da36e56b36a..8c34d50a4d80 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -12,6 +12,8 @@ #include <linux/efi.h> #include <asm/efi.h> +#include "efistub.h" + /* BIOS variables */ static const efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; static const efi_char16_t const efi_SecureBoot_name[] = { diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index da48819ff2e6..b78d9239e48f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1317,7 +1317,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, if (!fence) { event_free(gpu, event); ret = -ENOMEM; - goto out_pm_put; + goto out_unlock; } gpu->event[event].fence = fence; @@ -1357,6 +1357,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, hangcheck_timer_reset(gpu); ret = 0; +out_unlock: mutex_unlock(&gpu->lock); out_pm_put: diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index b7d7721e72fa..40af17ec6312 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -285,9 +285,6 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, { int ret; - if (vgpu->failsafe) - return 0; - if (WARN_ON(bytes > 4)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index f1f426a97aa9..d186c157f65f 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -775,7 +775,8 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) _EL_OFFSET_STATUS_PTR); ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg); - ctx_status_ptr.read_ptr = ctx_status_ptr.write_ptr = 0x7; + ctx_status_ptr.read_ptr = 0; + ctx_status_ptr.write_ptr = 0x7; vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; } diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index 933a7c211a1c..dce8d15f706f 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -75,11 +75,11 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) struct gvt_firmware_header *h; void *firmware; void *p; - unsigned long size; + unsigned long size, crc32_start; int i; int ret; - size = sizeof(*h) + info->mmio_size + info->cfg_space_size - 1; + size = sizeof(*h) + info->mmio_size + info->cfg_space_size; firmware = vzalloc(size); if (!firmware) return -ENOMEM; @@ -112,6 +112,9 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) memcpy(gvt->firmware.mmio, p, info->mmio_size); + crc32_start = offsetof(struct gvt_firmware_header, crc32) + 4; + h->crc32 = crc32_le(0, firmware + crc32_start, size - crc32_start); + firmware_attr.size = size; firmware_attr.private = firmware; @@ -234,7 +237,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt) firmware->mmio = mem; - sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%04x.golden_hw_state", + sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%02x.golden_hw_state", GVT_FIRMWARE_PATH, pdev->vendor, pdev->device, pdev->revision); diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 3b9d59e457ba..ef3baa0c4754 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -52,6 +52,8 @@ static const struct intel_gvt_ops intel_gvt_ops = { .vgpu_create = intel_gvt_create_vgpu, .vgpu_destroy = intel_gvt_destroy_vgpu, .vgpu_reset = intel_gvt_reset_vgpu, + .vgpu_activate = intel_gvt_activate_vgpu, + .vgpu_deactivate = intel_gvt_deactivate_vgpu, }; /** diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 6dfc48b63b71..becae2fa3b29 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -382,7 +382,8 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu); void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, unsigned int engine_mask); void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu); - +void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu); +void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu); /* validating GM functions */ #define vgpu_gmadr_is_aperture(vgpu, gmadr) \ @@ -449,6 +450,8 @@ struct intel_gvt_ops { struct intel_vgpu_type *); void (*vgpu_destroy)(struct intel_vgpu *); void (*vgpu_reset)(struct intel_vgpu *); + void (*vgpu_activate)(struct intel_vgpu *); + void (*vgpu_deactivate)(struct intel_vgpu *); }; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index d641214578a7..e466259034e2 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -544,6 +544,8 @@ static int intel_vgpu_open(struct mdev_device *mdev) if (ret) goto undo_group; + intel_gvt_ops->vgpu_activate(vgpu); + atomic_set(&vgpu->vdev.released, 0); return ret; @@ -569,6 +571,8 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1)) return; + intel_gvt_ops->vgpu_deactivate(vgpu); + ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY, &vgpu->vdev.iommu_notifier); WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret); @@ -1340,13 +1344,6 @@ static int kvmgt_guest_init(struct mdev_device *mdev) static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) { - struct intel_vgpu *vgpu = info->vgpu; - - if (!info) { - gvt_vgpu_err("kvmgt_guest_info invalid\n"); - return false; - } - kvm_page_track_unregister_notifier(info->kvm, &info->track_node); kvm_put_kvm(info->kvm); kvmgt_protect_table_destroy(info); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 41cfa5ccae84..649ef280cc9a 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -72,7 +72,7 @@ static struct { char *name; } vgpu_types[] = { /* Fixed vGPU type table */ - { MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" }, + { MB_TO_BYTES(64), MB_TO_BYTES(384), 4, GVT_EDID_1024_768, "8" }, { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" }, { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" }, { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" }, @@ -179,20 +179,34 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) } /** - * intel_gvt_destroy_vgpu - destroy a virtual GPU + * intel_gvt_active_vgpu - activate a virtual GPU * @vgpu: virtual GPU * - * This function is called when user wants to destroy a virtual GPU. + * This function is called when user wants to activate a virtual GPU. * */ -void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) +void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu) +{ + mutex_lock(&vgpu->gvt->lock); + vgpu->active = true; + mutex_unlock(&vgpu->gvt->lock); +} + +/** + * intel_gvt_deactive_vgpu - deactivate a virtual GPU + * @vgpu: virtual GPU + * + * This function is called when user wants to deactivate a virtual GPU. + * All virtual GPU runtime information will be destroyed. + * + */ +void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; mutex_lock(&gvt->lock); vgpu->active = false; - idr_remove(&gvt->vgpu_idr, vgpu->id); if (atomic_read(&vgpu->running_workload_num)) { mutex_unlock(&gvt->lock); @@ -201,6 +215,26 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) } intel_vgpu_stop_schedule(vgpu); + + mutex_unlock(&gvt->lock); +} + +/** + * intel_gvt_destroy_vgpu - destroy a virtual GPU + * @vgpu: virtual GPU + * + * This function is called when user wants to destroy a virtual GPU. + * + */ +void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) +{ + struct intel_gvt *gvt = vgpu->gvt; + + mutex_lock(&gvt->lock); + + WARN(vgpu->active, "vGPU is still active!\n"); + + idr_remove(&gvt->vgpu_idr, vgpu->id); intel_vgpu_clean_sched_policy(vgpu); intel_vgpu_clean_gvt_context(vgpu); intel_vgpu_clean_execlist(vgpu); @@ -277,7 +311,6 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_shadow_ctx; - vgpu->active = true; mutex_unlock(&gvt->lock); return vgpu; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1c75402a59c1..5c089b3c2a7e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1434,8 +1434,6 @@ static int i915_drm_suspend(struct drm_device *dev) goto out; } - intel_guc_suspend(dev_priv); - intel_display_suspend(dev); intel_dp_mst_suspend(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1e53c31b6826..46fcd8b7080a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -806,6 +806,7 @@ struct intel_csr { func(has_resource_streamer); \ func(has_runtime_pm); \ func(has_snoop); \ + func(unfenced_needs_alignment); \ func(cursor_needs_physical); \ func(hws_needs_physical); \ func(overlay_needs_physical); \ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 67b1fc5a0331..fe531f904062 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4348,6 +4348,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) i915_gem_context_lost(dev_priv); mutex_unlock(&dev->struct_mutex); + intel_guc_suspend(dev_priv); + cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); cancel_delayed_work_sync(&dev_priv->gt.retire_work); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 30e0675fd7da..15a15d00a6bf 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -888,6 +888,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, struct list_head ordered_vmas; struct list_head pinned_vmas; bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; + bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment; int retry; vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; @@ -908,7 +909,8 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, if (!has_fenced_gpu_access) entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; need_fence = - entry->flags & EXEC_OBJECT_NEEDS_FENCE && + (entry->flags & EXEC_OBJECT_NEEDS_FENCE || + needs_unfenced_map) && i915_gem_object_is_tiled(obj); need_mappable = need_fence || need_reloc_mappable(vma); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2801a4d56324..96e45a4d5441 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2704,7 +2704,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &dev_priv->ggtt; if (unlikely(ggtt->do_idle_maps)) { - if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) { + if (i915_gem_wait_for_idle(dev_priv, 0)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e7c3c0318ff6..da70bfe97ec5 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -37,6 +37,17 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence) static const char *i915_fence_get_timeline_name(struct dma_fence *fence) { + /* The timeline struct (as part of the ppgtt underneath a context) + * may be freed when the request is no longer in use by the GPU. + * We could extend the life of a context to beyond that of all + * fences, possibly keeping the hw resource around indefinitely, + * or we just give them a false name. Since + * dma_fence_ops.get_timeline_name is a debug feature, the occasional + * lie seems justifiable. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return "signaled"; + return to_request(fence)->timeline->common->name; } diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index d5d2b4c6ed38..70b3832a79dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -53,6 +53,17 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) BUG(); } +static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock) +{ + if (!unlock) + return; + + mutex_unlock(&dev->struct_mutex); + + /* expedite the RCU grace period to free some request slabs */ + synchronize_rcu_expedited(); +} + static bool any_vma_pinned(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -232,11 +243,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, intel_runtime_pm_put(dev_priv); i915_gem_retire_requests(dev_priv); - if (unlock) - mutex_unlock(&dev_priv->drm.struct_mutex); - /* expedite the RCU grace period to free some request slabs */ - synchronize_rcu_expedited(); + i915_gem_shrinker_unlock(&dev_priv->drm, unlock); return count; } @@ -293,8 +301,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) count += obj->base.size >> PAGE_SHIFT; } - if (unlock) - mutex_unlock(&dev->struct_mutex); + i915_gem_shrinker_unlock(dev, unlock); return count; } @@ -321,8 +328,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) sc->nr_to_scan - freed, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - if (unlock) - mutex_unlock(&dev->struct_mutex); + + i915_gem_shrinker_unlock(dev, unlock); return freed; } @@ -364,8 +371,7 @@ i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv, struct shrinker_lock_uninterruptible *slu) { dev_priv->mm.interruptible = slu->was_interruptible; - if (slu->unlock) - mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock); } static int diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ecb487b5356f..9bbbd4e83e3c 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -60,6 +60,7 @@ .has_overlay = 1, .overlay_needs_physical = 1, \ .has_gmch_display = 1, \ .hws_needs_physical = 1, \ + .unfenced_needs_alignment = 1, \ .ring_mask = RENDER_RING, \ GEN_DEFAULT_PIPEOFFSETS, \ CURSOR_OFFSETS @@ -101,6 +102,7 @@ static const struct intel_device_info intel_i915g_info = { .platform = INTEL_I915G, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_i915gm_info = { @@ -112,6 +114,7 @@ static const struct intel_device_info intel_i915gm_info = { .supports_tv = 1, .has_fbc = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_i945g_info = { @@ -120,6 +123,7 @@ static const struct intel_device_info intel_i945g_info = { .has_hotplug = 1, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_i945gm_info = { @@ -130,6 +134,7 @@ static const struct intel_device_info intel_i945gm_info = { .supports_tv = 1, .has_fbc = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_g33_info = { diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a1b7eec58be2..70964ca9251e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1705,7 +1705,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, */ if (WARN_ON(stream->sample_flags != props->sample_flags)) { ret = -ENODEV; - goto err_alloc; + goto err_flags; } list_add(&stream->link, &dev_priv->perf.streams); @@ -1728,6 +1728,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, err_open: list_del(&stream->link); +err_flags: if (stream->ops->destroy) stream->ops->destroy(stream); err_alloc: @@ -1793,6 +1794,11 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, if (ret) return ret; + if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { + DRM_DEBUG("Unknown i915 perf property ID\n"); + return -EINVAL; + } + switch ((enum drm_i915_perf_property_id)id) { case DRM_I915_PERF_PROP_CTX_HANDLE: props->single_context = 1; @@ -1862,9 +1868,8 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->oa_periodic = true; props->oa_period_exponent = value; break; - default: + case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); - DRM_DEBUG("Unknown i915 perf property ID\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 471af3b480ad..47517a02f0a4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -670,15 +670,14 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) static struct intel_engine_cs * pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine; + struct intel_engine_cs *engine = + container_of(pt, struct drm_i915_gem_request, priotree)->engine; + + GEM_BUG_ON(!locked); - engine = container_of(pt, - struct drm_i915_gem_request, - priotree)->engine; if (engine != locked) { - if (locked) - spin_unlock_irq(&locked->timeline->lock); - spin_lock_irq(&engine->timeline->lock); + spin_unlock(&locked->timeline->lock); + spin_lock(&engine->timeline->lock); } return engine; @@ -686,7 +685,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) static void execlists_schedule(struct drm_i915_gem_request *request, int prio) { - struct intel_engine_cs *engine = NULL; + struct intel_engine_cs *engine; struct i915_dependency *dep, *p; struct i915_dependency stack; LIST_HEAD(dfs); @@ -720,26 +719,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) list_for_each_entry_safe(dep, p, &dfs, dfs_link) { struct i915_priotree *pt = dep->signaler; - list_for_each_entry(p, &pt->signalers_list, signal_link) + /* Within an engine, there can be no cycle, but we may + * refer to the same dependency chain multiple times + * (redundant dependencies are not eliminated) and across + * engines. + */ + list_for_each_entry(p, &pt->signalers_list, signal_link) { + GEM_BUG_ON(p->signaler->priority < pt->priority); if (prio > READ_ONCE(p->signaler->priority)) list_move_tail(&p->dfs_link, &dfs); + } list_safe_reset_next(dep, p, dfs_link); - if (!RB_EMPTY_NODE(&pt->node)) - continue; - - engine = pt_lock_engine(pt, engine); - - /* If it is not already in the rbtree, we can update the - * priority inplace and skip over it (and its dependencies) - * if it is referenced *again* as we descend the dfs. - */ - if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) { - pt->priority = prio; - list_del_init(&dep->dfs_link); - } } + engine = request->engine; + spin_lock_irq(&engine->timeline->lock); + /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { struct i915_priotree *pt = dep->signaler; @@ -751,16 +747,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) if (prio <= pt->priority) continue; - GEM_BUG_ON(RB_EMPTY_NODE(&pt->node)); - pt->priority = prio; - rb_erase(&pt->node, &engine->execlist_queue); - if (insert_request(pt, &engine->execlist_queue)) - engine->execlist_first = &pt->node; + if (!RB_EMPTY_NODE(&pt->node)) { + rb_erase(&pt->node, &engine->execlist_queue); + if (insert_request(pt, &engine->execlist_queue)) + engine->execlist_first = &pt->node; + } } - if (engine) - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline->lock); /* XXX Do we need to preempt to make room for us and our deps? */ } @@ -1440,7 +1435,9 @@ static void reset_common_ring(struct intel_engine_cs *engine, GEM_BUG_ON(request->ctx != port[0].request->ctx); /* Reset WaIdleLiteRestore:bdw,skl as well */ - request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32); + request->tail = + intel_ring_wrap(request->ring, + request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 13dccb18cd43..8cb2078c5bfc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -521,11 +521,17 @@ static inline void intel_ring_advance(struct intel_ring *ring) */ } +static inline u32 +intel_ring_wrap(const struct intel_ring *ring, u32 pos) +{ + return pos & (ring->size - 1); +} + static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ u32 offset = addr - ring->vaddr; - return offset & (ring->size - 1); + return intel_ring_wrap(ring, offset); } int __intel_ring_space(int head, int tail, int size); diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 0b4440ffbeae..a9182d5e6011 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -995,7 +995,6 @@ nv50_wndw_atomic_destroy_state(struct drm_plane *plane, { struct nv50_wndw_atom *asyw = nv50_wndw_atom(state); __drm_atomic_helper_plane_destroy_state(&asyw->state); - dma_fence_put(asyw->state.fence); kfree(asyw); } @@ -1007,7 +1006,6 @@ nv50_wndw_atomic_duplicate_state(struct drm_plane *plane) if (!(asyw = kmalloc(sizeof(*asyw), GFP_KERNEL))) return NULL; __drm_atomic_helper_plane_duplicate_state(plane, &asyw->state); - asyw->state.fence = NULL; asyw->interval = 1; asyw->sema = armw->sema; asyw->ntfy = armw->ntfy; @@ -2036,6 +2034,7 @@ nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh) u32 vbackp = (mode->vtotal - mode->vsync_end) * vscan / ilace; u32 hfrontp = mode->hsync_start - mode->hdisplay; u32 vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace; + u32 blankus; struct nv50_head_mode *m = &asyh->mode; m->h.active = mode->htotal; @@ -2049,9 +2048,10 @@ nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh) m->v.blanks = m->v.active - vfrontp - 1; /*XXX: Safe underestimate, even "0" works */ - m->v.blankus = (m->v.active - mode->vdisplay - 2) * m->h.active; - m->v.blankus *= 1000; - m->v.blankus /= mode->clock; + blankus = (m->v.active - mode->vdisplay - 2) * m->h.active; + blankus *= 1000; + blankus /= mode->clock; + m->v.blankus = blankus; if (mode->flags & DRM_MODE_FLAG_INTERLACE) { m->v.blank2e = m->v.active + m->v.synce + vbackp; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 273562dd6bbd..3b86a7399567 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -714,7 +714,7 @@ nv4a_chipset = { .i2c = nv04_i2c_new, .imem = nv40_instmem_new, .mc = nv44_mc_new, - .mmu = nv44_mmu_new, + .mmu = nv04_mmu_new, .pci = nv40_pci_new, .therm = nv40_therm_new, .timer = nv41_timer_new, @@ -2271,6 +2271,35 @@ nv136_chipset = { .fifo = gp100_fifo_new, }; +static const struct nvkm_device_chip +nv137_chipset = { + .name = "GP107", + .bar = gf100_bar_new, + .bios = nvkm_bios_new, + .bus = gf100_bus_new, + .devinit = gm200_devinit_new, + .fb = gp102_fb_new, + .fuse = gm107_fuse_new, + .gpio = gk104_gpio_new, + .i2c = gm200_i2c_new, + .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, + .ltc = gp100_ltc_new, + .mc = gp100_mc_new, + .mmu = gf100_mmu_new, + .pci = gp100_pci_new, + .pmu = gp102_pmu_new, + .timer = gk20a_timer_new, + .top = gk104_top_new, + .ce[0] = gp102_ce_new, + .ce[1] = gp102_ce_new, + .ce[2] = gp102_ce_new, + .ce[3] = gp102_ce_new, + .disp = gp102_disp_new, + .dma = gf119_dma_new, + .fifo = gp100_fifo_new, +}; + static int nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size, struct nvkm_notify *notify) @@ -2708,6 +2737,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x132: device->chip = &nv132_chipset; break; case 0x134: device->chip = &nv134_chipset; break; case 0x136: device->chip = &nv136_chipset; break; + case 0x137: device->chip = &nv137_chipset; break; default: nvdev_error(device, "unknown chipset (%08x)\n", boot0); goto done; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c index 003ac915eaad..8a8895246d26 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c @@ -198,7 +198,7 @@ nv31_mpeg_intr(struct nvkm_engine *engine) } if (type == 0x00000010) { - if (!nv31_mpeg_mthd(mpeg, mthd, data)) + if (nv31_mpeg_mthd(mpeg, mthd, data)) show &= ~0x01000000; } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c index e536f37e24b0..c3cf02ed468e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c @@ -172,7 +172,7 @@ nv44_mpeg_intr(struct nvkm_engine *engine) } if (type == 0x00000010) { - if (!nv44_mpeg_mthd(subdev->device, mthd, data)) + if (nv44_mpeg_mthd(subdev->device, mthd, data)) show &= ~0x01000000; } } diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c index 917dcb978c2c..0c87b1ac6b68 100644 --- a/drivers/gpu/drm/udl/udl_transfer.c +++ b/drivers/gpu/drm/udl/udl_transfer.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/fb.h> #include <linux/prefetch.h> +#include <asm/unaligned.h> #include <drm/drmP.h> #include "udl_drv.h" @@ -163,7 +164,7 @@ static void udl_compress_hline16( const u8 *const start = pixel; const uint16_t repeating_pixel_val16 = pixel_val16; - *(uint16_t *)cmd = cpu_to_be16(pixel_val16); + put_unaligned_be16(pixel_val16, cmd); cmd += 2; pixel += bpp; diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 63ec1993eaaa..d162f0dc76e3 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -819,8 +819,7 @@ static int hid_scan_report(struct hid_device *hid) hid->group = HID_GROUP_WACOM; break; case USB_VENDOR_ID_SYNAPTICS: - if (hid->group == HID_GROUP_GENERIC || - hid->group == HID_GROUP_MULTITOUCH_WIN_8) + if (hid->group == HID_GROUP_GENERIC) if ((parser->scan_flags & HID_SCAN_FLAG_VENDOR_SPECIFIC) && (parser->scan_flags & HID_SCAN_FLAG_GD_POINTER)) /* @@ -2096,6 +2095,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) }, + { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 4e2648c86c8c..b26c030926c1 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1028,6 +1028,9 @@ #define USB_DEVICE_ID_UGEE_TABLET_45 0x0045 #define USB_DEVICE_ID_YIYNOVA_TABLET 0x004d +#define USB_VENDOR_ID_UGEE 0x28bd +#define USB_DEVICE_ID_UGEE_TABLET_EX07S 0x0071 + #define USB_VENDOR_ID_UNITEC 0x227d #define USB_DEVICE_ID_UNITEC_USB_TOUCH_0709 0x0709 #define USB_DEVICE_ID_UNITEC_USB_TOUCH_0A19 0x0a19 diff --git a/drivers/hid/hid-uclogic.c b/drivers/hid/hid-uclogic.c index 1509d7287ff3..e3e6e5c893cc 100644 --- a/drivers/hid/hid-uclogic.c +++ b/drivers/hid/hid-uclogic.c @@ -977,6 +977,7 @@ static int uclogic_probe(struct hid_device *hdev, } break; case USB_DEVICE_ID_UGTIZER_TABLET_GP0610: + case USB_DEVICE_ID_UGEE_TABLET_EX07S: /* If this is the pen interface */ if (intf->cur_altsetting->desc.bInterfaceNumber == 1) { rc = uclogic_tablet_enable(hdev); @@ -1069,6 +1070,7 @@ static const struct hid_device_id uclogic_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) }, + { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) }, { } }; MODULE_DEVICE_TABLE(hid, uclogic_devices); diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 94250c293be2..c68ac65db7ff 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2006,7 +2006,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field return; case HID_DG_TOOLSERIALNUMBER: wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL); - wacom_wac->serial[0] |= value; + wacom_wac->serial[0] |= (__u32)value; return; case WACOM_HID_WD_SENSE: wacom_wac->hid_data.sense_state = value; @@ -2176,6 +2176,16 @@ static void wacom_wac_finger_usage_mapping(struct hid_device *hdev, wacom_wac->hid_data.cc_index = field->index; wacom_wac->hid_data.cc_value_index = usage->usage_index; break; + case HID_DG_CONTACTID: + if ((field->logical_maximum - field->logical_minimum) < touch_max) { + /* + * The HID descriptor for G11 sensors leaves logical + * maximum set to '1' despite it being a multitouch + * device. Override to a sensible number. + */ + field->logical_maximum = 255; + } + break; } } diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c index 7ef819680acd..26b05106f0d3 100644 --- a/drivers/hsi/clients/ssi_protocol.c +++ b/drivers/hsi/clients/ssi_protocol.c @@ -980,7 +980,7 @@ static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; /* Pad to 32-bits - FIXME: Revisit*/ if ((skb->len & 3) && skb_pad(skb, 4 - (skb->len & 3))) - goto drop; + goto inc_dropped; /* * Modem sends Phonet messages over SSI with its own endianess... @@ -1032,8 +1032,9 @@ static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev) drop2: hsi_free_msg(msg); drop: - dev->stats.tx_dropped++; dev_kfree_skb(skb); +inc_dropped: + dev->stats.tx_dropped++; return 0; } diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 0649d53f3d16..22d5eafd6815 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -341,6 +341,15 @@ config SENSORS_ASB100 This driver can also be built as a module. If so, the module will be called asb100. +config SENSORS_ASPEED + tristate "ASPEED AST2400/AST2500 PWM and Fan tach driver" + help + This driver provides support for ASPEED AST2400/AST2500 PWM + and Fan Tacho controllers. + + This driver can also be built as a module. If so, the module + will be called aspeed_pwm_tacho. + config SENSORS_ATXP1 tristate "Attansic ATXP1 VID controller" depends on I2C @@ -1643,16 +1652,6 @@ config SENSORS_TMP421 This driver can also be built as a module. If so, the module will be called tmp421. -config SENSORS_TWL4030_MADC - tristate "Texas Instruments TWL4030 MADC Hwmon" - depends on TWL4030_MADC - help - If you say yes here you get hwmon support for triton - TWL4030-MADC. - - This driver can also be built as a module. If so it will be called - twl4030-madc-hwmon. - config SENSORS_VEXPRESS tristate "Versatile Express" depends on VEXPRESS_CONFIG diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 5509edf6186a..d4641a9f16c1 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_SENSORS_ADT7475) += adt7475.o obj-$(CONFIG_SENSORS_APPLESMC) += applesmc.o obj-$(CONFIG_SENSORS_ARM_SCPI) += scpi-hwmon.o obj-$(CONFIG_SENSORS_ASC7621) += asc7621.o +obj-$(CONFIG_SENSORS_ASPEED) += aspeed-pwm-tacho.o obj-$(CONFIG_SENSORS_ATXP1) += atxp1.o obj-$(CONFIG_SENSORS_CORETEMP) += coretemp.o obj-$(CONFIG_SENSORS_DA9052_ADC)+= da9052-hwmon.o @@ -157,7 +158,6 @@ obj-$(CONFIG_SENSORS_TMP103) += tmp103.o obj-$(CONFIG_SENSORS_TMP108) += tmp108.o obj-$(CONFIG_SENSORS_TMP401) += tmp401.o obj-$(CONFIG_SENSORS_TMP421) += tmp421.o -obj-$(CONFIG_SENSORS_TWL4030_MADC)+= twl4030-madc-hwmon.o obj-$(CONFIG_SENSORS_VEXPRESS) += vexpress-hwmon.o obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o obj-$(CONFIG_SENSORS_VIA686A) += via686a.o diff --git a/drivers/hwmon/ad7414.c b/drivers/hwmon/ad7414.c index 763490acc0df..cec227f13874 100644 --- a/drivers/hwmon/ad7414.c +++ b/drivers/hwmon/ad7414.c @@ -217,9 +217,16 @@ static const struct i2c_device_id ad7414_id[] = { }; MODULE_DEVICE_TABLE(i2c, ad7414_id); +static const struct of_device_id ad7414_of_match[] = { + { .compatible = "ad,ad7414" }, + { }, +}; +MODULE_DEVICE_TABLE(of, ad7414_of_match); + static struct i2c_driver ad7414_driver = { .driver = { .name = "ad7414", + .of_match_table = of_match_ptr(ad7414_of_match), }, .probe = ad7414_probe, .id_table = ad7414_id, diff --git a/drivers/hwmon/adc128d818.c b/drivers/hwmon/adc128d818.c index bbe3a5c5b3f5..a557b46dbe8e 100644 --- a/drivers/hwmon/adc128d818.c +++ b/drivers/hwmon/adc128d818.c @@ -546,10 +546,17 @@ static const struct i2c_device_id adc128_id[] = { }; MODULE_DEVICE_TABLE(i2c, adc128_id); +static const struct of_device_id adc128_of_match[] = { + { .compatible = "ti,adc128d818" }, + { }, +}; +MODULE_DEVICE_TABLE(of, adc128_of_match); + static struct i2c_driver adc128_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "adc128d818", + .of_match_table = of_match_ptr(adc128_of_match), }, .probe = adc128_probe, .remove = adc128_remove, diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c index 2b3105c8aed3..5140c27d16dd 100644 --- a/drivers/hwmon/ads1015.c +++ b/drivers/hwmon/ads1015.c @@ -31,6 +31,7 @@ #include <linux/hwmon-sysfs.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/of_device.h> #include <linux/of.h> #include <linux/i2c/ads1015.h> @@ -268,7 +269,12 @@ static int ads1015_probe(struct i2c_client *client, GFP_KERNEL); if (!data) return -ENOMEM; - data->id = id->driver_data; + + if (client->dev.of_node) + data->id = (enum ads1015_chips) + of_device_get_match_data(&client->dev); + else + data->id = id->driver_data; i2c_set_clientdata(client, data); mutex_init(&data->update_lock); @@ -303,9 +309,23 @@ static const struct i2c_device_id ads1015_id[] = { }; MODULE_DEVICE_TABLE(i2c, ads1015_id); +static const struct of_device_id ads1015_of_match[] = { + { + .compatible = "ti,ads1015", + .data = (void *)ads1015 + }, + { + .compatible = "ti,ads1115", + .data = (void *)ads1115 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ads1015_of_match); + static struct i2c_driver ads1015_driver = { .driver = { .name = "ads1015", + .of_match_table = of_match_ptr(ads1015_of_match), }, .probe = ads1015_probe, .remove = ads1015_remove, diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c index ee396ff167d9..898607bf682b 100644 --- a/drivers/hwmon/ads7828.c +++ b/drivers/hwmon/ads7828.c @@ -31,9 +31,11 @@ #include <linux/i2c.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/platform_data/ads7828.h> #include <linux/regmap.h> #include <linux/slab.h> +#include <linux/regulator/consumer.h> /* The ADS7828 registers */ #define ADS7828_CMD_SD_SE 0x80 /* Single ended inputs */ @@ -118,9 +120,12 @@ static int ads7828_probe(struct i2c_client *client, struct ads7828_data *data; struct device *hwmon_dev; unsigned int vref_mv = ADS7828_INT_VREF_MV; + unsigned int vref_uv; bool diff_input = false; bool ext_vref = false; unsigned int regval; + enum ads7828_chips chip; + struct regulator *reg; data = devm_kzalloc(dev, sizeof(struct ads7828_data), GFP_KERNEL); if (!data) @@ -131,14 +136,32 @@ static int ads7828_probe(struct i2c_client *client, ext_vref = pdata->ext_vref; if (ext_vref && pdata->vref_mv) vref_mv = pdata->vref_mv; + } else if (dev->of_node) { + diff_input = of_property_read_bool(dev->of_node, + "ti,differential-input"); + reg = devm_regulator_get_optional(dev, "vref"); + if (!IS_ERR(reg)) { + vref_uv = regulator_get_voltage(reg); + vref_mv = DIV_ROUND_CLOSEST(vref_uv, 1000); + if (vref_mv < ADS7828_EXT_VREF_MV_MIN || + vref_mv > ADS7828_EXT_VREF_MV_MAX) + return -EINVAL; + ext_vref = true; + } } + if (client->dev.of_node) + chip = (enum ads7828_chips) + of_device_get_match_data(&client->dev); + else + chip = id->driver_data; + /* Bound Vref with min/max values */ vref_mv = clamp_val(vref_mv, ADS7828_EXT_VREF_MV_MIN, ADS7828_EXT_VREF_MV_MAX); /* ADS7828 uses 12-bit samples, while ADS7830 is 8-bit */ - if (id->driver_data == ads7828) { + if (chip == ads7828) { data->lsb_resol = DIV_ROUND_CLOSEST(vref_mv * 1000, 4096); data->regmap = devm_regmap_init_i2c(client, &ads2828_regmap_config); @@ -177,9 +200,23 @@ static const struct i2c_device_id ads7828_device_ids[] = { }; MODULE_DEVICE_TABLE(i2c, ads7828_device_ids); +static const struct of_device_id ads7828_of_match[] = { + { + .compatible = "ti,ads7828", + .data = (void *)ads7828 + }, + { + .compatible = "ti,ads7830", + .data = (void *)ads7830 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ads7828_of_match); + static struct i2c_driver ads7828_driver = { .driver = { .name = "ads7828", + .of_match_table = of_match_ptr(ads7828_of_match), }, .id_table = ads7828_device_ids, diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index c646670b9ea9..c803e3c5fcd4 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -13,6 +13,7 @@ */ #include <linux/module.h> +#include <linux/of_device.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/i2c.h> @@ -58,6 +59,8 @@ #define REG_VENDID 0x3E #define REG_DEVID2 0x3F +#define REG_CONFIG1 0x40 + #define REG_STATUS1 0x41 #define REG_STATUS2 0x42 @@ -161,6 +164,27 @@ static const struct i2c_device_id adt7475_id[] = { }; MODULE_DEVICE_TABLE(i2c, adt7475_id); +static const struct of_device_id adt7475_of_match[] = { + { + .compatible = "adi,adt7473", + .data = (void *)adt7473 + }, + { + .compatible = "adi,adt7475", + .data = (void *)adt7475 + }, + { + .compatible = "adi,adt7476", + .data = (void *)adt7476 + }, + { + .compatible = "adi,adt7490", + .data = (void *)adt7490 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, adt7475_of_match); + struct adt7475_data { struct device *hwmon_dev; struct mutex lock; @@ -1250,6 +1274,7 @@ static void adt7475_remove_files(struct i2c_client *client, static int adt7475_probe(struct i2c_client *client, const struct i2c_device_id *id) { + enum chips chip; static const char * const names[] = { [adt7473] = "ADT7473", [adt7475] = "ADT7475", @@ -1268,8 +1293,13 @@ static int adt7475_probe(struct i2c_client *client, mutex_init(&data->lock); i2c_set_clientdata(client, data); + if (client->dev.of_node) + chip = (enum chips)of_device_get_match_data(&client->dev); + else + chip = id->driver_data; + /* Initialize device-specific values */ - switch (id->driver_data) { + switch (chip) { case adt7476: data->has_voltage = 0x0e; /* in1 to in3 */ revision = adt7475_read(REG_DEVID2) & 0x07; @@ -1343,6 +1373,17 @@ static int adt7475_probe(struct i2c_client *client, for (i = 0; i < ADT7475_PWM_COUNT; i++) adt7475_read_pwm(client, i); + /* Start monitoring */ + switch (chip) { + case adt7475: + case adt7476: + i2c_smbus_write_byte_data(client, REG_CONFIG1, + adt7475_read(REG_CONFIG1) | 0x01); + break; + default: + break; + } + ret = sysfs_create_group(&client->dev.kobj, &adt7475_attr_group); if (ret) return ret; @@ -1428,6 +1469,7 @@ static struct i2c_driver adt7475_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "adt7475", + .of_match_table = of_match_ptr(adt7475_of_match), }, .probe = adt7475_probe, .remove = adt7475_remove, diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c new file mode 100644 index 000000000000..48403a2115be --- /dev/null +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -0,0 +1,835 @@ +/* + * Copyright (c) 2016 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or later as + * published by the Free Software Foundation. + */ + +#include <linux/clk.h> +#include <linux/gpio/consumer.h> +#include <linux/delay.h> +#include <linux/hwmon.h> +#include <linux/hwmon-sysfs.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of_platform.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/sysfs.h> +#include <linux/regmap.h> + +/* ASPEED PWM & FAN Tach Register Definition */ +#define ASPEED_PTCR_CTRL 0x00 +#define ASPEED_PTCR_CLK_CTRL 0x04 +#define ASPEED_PTCR_DUTY0_CTRL 0x08 +#define ASPEED_PTCR_DUTY1_CTRL 0x0c +#define ASPEED_PTCR_TYPEM_CTRL 0x10 +#define ASPEED_PTCR_TYPEM_CTRL1 0x14 +#define ASPEED_PTCR_TYPEN_CTRL 0x18 +#define ASPEED_PTCR_TYPEN_CTRL1 0x1c +#define ASPEED_PTCR_TACH_SOURCE 0x20 +#define ASPEED_PTCR_TRIGGER 0x28 +#define ASPEED_PTCR_RESULT 0x2c +#define ASPEED_PTCR_INTR_CTRL 0x30 +#define ASPEED_PTCR_INTR_STS 0x34 +#define ASPEED_PTCR_TYPEM_LIMIT 0x38 +#define ASPEED_PTCR_TYPEN_LIMIT 0x3C +#define ASPEED_PTCR_CTRL_EXT 0x40 +#define ASPEED_PTCR_CLK_CTRL_EXT 0x44 +#define ASPEED_PTCR_DUTY2_CTRL 0x48 +#define ASPEED_PTCR_DUTY3_CTRL 0x4c +#define ASPEED_PTCR_TYPEO_CTRL 0x50 +#define ASPEED_PTCR_TYPEO_CTRL1 0x54 +#define ASPEED_PTCR_TACH_SOURCE_EXT 0x60 +#define ASPEED_PTCR_TYPEO_LIMIT 0x78 + +/* ASPEED_PTCR_CTRL : 0x00 - General Control Register */ +#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART1 15 +#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART2 6 +#define ASPEED_PTCR_CTRL_SET_PWMD_TYPE_MASK (BIT(7) | BIT(15)) + +#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART1 14 +#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART2 5 +#define ASPEED_PTCR_CTRL_SET_PWMC_TYPE_MASK (BIT(6) | BIT(14)) + +#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART1 13 +#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART2 4 +#define ASPEED_PTCR_CTRL_SET_PWMB_TYPE_MASK (BIT(5) | BIT(13)) + +#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART1 12 +#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART2 3 +#define ASPEED_PTCR_CTRL_SET_PWMA_TYPE_MASK (BIT(4) | BIT(12)) + +#define ASPEED_PTCR_CTRL_FAN_NUM_EN(x) BIT(16 + (x)) + +#define ASPEED_PTCR_CTRL_PWMD_EN BIT(11) +#define ASPEED_PTCR_CTRL_PWMC_EN BIT(10) +#define ASPEED_PTCR_CTRL_PWMB_EN BIT(9) +#define ASPEED_PTCR_CTRL_PWMA_EN BIT(8) + +#define ASPEED_PTCR_CTRL_CLK_SRC BIT(1) +#define ASPEED_PTCR_CTRL_CLK_EN BIT(0) + +/* ASPEED_PTCR_CLK_CTRL : 0x04 - Clock Control Register */ +/* TYPE N */ +#define ASPEED_PTCR_CLK_CTRL_TYPEN_MASK GENMASK(31, 16) +#define ASPEED_PTCR_CLK_CTRL_TYPEN_UNIT 24 +#define ASPEED_PTCR_CLK_CTRL_TYPEN_H 20 +#define ASPEED_PTCR_CLK_CTRL_TYPEN_L 16 +/* TYPE M */ +#define ASPEED_PTCR_CLK_CTRL_TYPEM_MASK GENMASK(15, 0) +#define ASPEED_PTCR_CLK_CTRL_TYPEM_UNIT 8 +#define ASPEED_PTCR_CLK_CTRL_TYPEM_H 4 +#define ASPEED_PTCR_CLK_CTRL_TYPEM_L 0 + +/* + * ASPEED_PTCR_DUTY_CTRL/1/2/3 : 0x08/0x0C/0x48/0x4C - PWM-FAN duty control + * 0/1/2/3 register + */ +#define DUTY_CTRL_PWM2_FALL_POINT 24 +#define DUTY_CTRL_PWM2_RISE_POINT 16 +#define DUTY_CTRL_PWM2_RISE_FALL_MASK GENMASK(31, 16) +#define DUTY_CTRL_PWM1_FALL_POINT 8 +#define DUTY_CTRL_PWM1_RISE_POINT 0 +#define DUTY_CTRL_PWM1_RISE_FALL_MASK GENMASK(15, 0) + +/* ASPEED_PTCR_TYPEM_CTRL : 0x10/0x18/0x50 - Type M/N/O Ctrl 0 Register */ +#define TYPE_CTRL_FAN_MASK (GENMASK(5, 1) | GENMASK(31, 16)) +#define TYPE_CTRL_FAN1_MASK GENMASK(31, 0) +#define TYPE_CTRL_FAN_PERIOD 16 +#define TYPE_CTRL_FAN_MODE 4 +#define TYPE_CTRL_FAN_DIVISION 1 +#define TYPE_CTRL_FAN_TYPE_EN 1 + +/* ASPEED_PTCR_TACH_SOURCE : 0x20/0x60 - Tach Source Register */ +/* bit [0,1] at 0x20, bit [2] at 0x60 */ +#define TACH_PWM_SOURCE_BIT01(x) ((x) * 2) +#define TACH_PWM_SOURCE_BIT2(x) ((x) * 2) +#define TACH_PWM_SOURCE_MASK_BIT01(x) (0x3 << ((x) * 2)) +#define TACH_PWM_SOURCE_MASK_BIT2(x) BIT((x) * 2) + +/* ASPEED_PTCR_RESULT : 0x2c - Result Register */ +#define RESULT_STATUS_MASK BIT(31) +#define RESULT_VALUE_MASK 0xfffff + +/* ASPEED_PTCR_CTRL_EXT : 0x40 - General Control Extension #1 Register */ +#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART1 15 +#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART2 6 +#define ASPEED_PTCR_CTRL_SET_PWMH_TYPE_MASK (BIT(7) | BIT(15)) + +#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART1 14 +#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART2 5 +#define ASPEED_PTCR_CTRL_SET_PWMG_TYPE_MASK (BIT(6) | BIT(14)) + +#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART1 13 +#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART2 4 +#define ASPEED_PTCR_CTRL_SET_PWMF_TYPE_MASK (BIT(5) | BIT(13)) + +#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART1 12 +#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART2 3 +#define ASPEED_PTCR_CTRL_SET_PWME_TYPE_MASK (BIT(4) | BIT(12)) + +#define ASPEED_PTCR_CTRL_PWMH_EN BIT(11) +#define ASPEED_PTCR_CTRL_PWMG_EN BIT(10) +#define ASPEED_PTCR_CTRL_PWMF_EN BIT(9) +#define ASPEED_PTCR_CTRL_PWME_EN BIT(8) + +/* ASPEED_PTCR_CLK_EXT_CTRL : 0x44 - Clock Control Extension #1 Register */ +/* TYPE O */ +#define ASPEED_PTCR_CLK_CTRL_TYPEO_MASK GENMASK(15, 0) +#define ASPEED_PTCR_CLK_CTRL_TYPEO_UNIT 8 +#define ASPEED_PTCR_CLK_CTRL_TYPEO_H 4 +#define ASPEED_PTCR_CLK_CTRL_TYPEO_L 0 + +#define PWM_MAX 255 + +#define M_PWM_DIV_H 0x00 +#define M_PWM_DIV_L 0x05 +#define M_PWM_PERIOD 0x5F +#define M_TACH_CLK_DIV 0x00 +#define M_TACH_MODE 0x00 +#define M_TACH_UNIT 0x1000 +#define INIT_FAN_CTRL 0xFF + +struct aspeed_pwm_tacho_data { + struct regmap *regmap; + unsigned long clk_freq; + bool pwm_present[8]; + bool fan_tach_present[16]; + u8 type_pwm_clock_unit[3]; + u8 type_pwm_clock_division_h[3]; + u8 type_pwm_clock_division_l[3]; + u8 type_fan_tach_clock_division[3]; + u16 type_fan_tach_unit[3]; + u8 pwm_port_type[8]; + u8 pwm_port_fan_ctrl[8]; + u8 fan_tach_ch_source[16]; + const struct attribute_group *groups[3]; +}; + +enum type { TYPEM, TYPEN, TYPEO }; + +struct type_params { + u32 l_value; + u32 h_value; + u32 unit_value; + u32 clk_ctrl_mask; + u32 clk_ctrl_reg; + u32 ctrl_reg; + u32 ctrl_reg1; +}; + +static const struct type_params type_params[] = { + [TYPEM] = { + .l_value = ASPEED_PTCR_CLK_CTRL_TYPEM_L, + .h_value = ASPEED_PTCR_CLK_CTRL_TYPEM_H, + .unit_value = ASPEED_PTCR_CLK_CTRL_TYPEM_UNIT, + .clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEM_MASK, + .clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL, + .ctrl_reg = ASPEED_PTCR_TYPEM_CTRL, + .ctrl_reg1 = ASPEED_PTCR_TYPEM_CTRL1, + }, + [TYPEN] = { + .l_value = ASPEED_PTCR_CLK_CTRL_TYPEN_L, + .h_value = ASPEED_PTCR_CLK_CTRL_TYPEN_H, + .unit_value = ASPEED_PTCR_CLK_CTRL_TYPEN_UNIT, + .clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEN_MASK, + .clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL, + .ctrl_reg = ASPEED_PTCR_TYPEN_CTRL, + .ctrl_reg1 = ASPEED_PTCR_TYPEN_CTRL1, + }, + [TYPEO] = { + .l_value = ASPEED_PTCR_CLK_CTRL_TYPEO_L, + .h_value = ASPEED_PTCR_CLK_CTRL_TYPEO_H, + .unit_value = ASPEED_PTCR_CLK_CTRL_TYPEO_UNIT, + .clk_ctrl_mask = ASPEED_PTCR_CLK_CTRL_TYPEO_MASK, + .clk_ctrl_reg = ASPEED_PTCR_CLK_CTRL_EXT, + .ctrl_reg = ASPEED_PTCR_TYPEO_CTRL, + .ctrl_reg1 = ASPEED_PTCR_TYPEO_CTRL1, + } +}; + +enum pwm_port { PWMA, PWMB, PWMC, PWMD, PWME, PWMF, PWMG, PWMH }; + +struct pwm_port_params { + u32 pwm_en; + u32 ctrl_reg; + u32 type_part1; + u32 type_part2; + u32 type_mask; + u32 duty_ctrl_rise_point; + u32 duty_ctrl_fall_point; + u32 duty_ctrl_reg; + u32 duty_ctrl_rise_fall_mask; +}; + +static const struct pwm_port_params pwm_port_params[] = { + [PWMA] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMA_EN, + .ctrl_reg = ASPEED_PTCR_CTRL, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMA_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY0_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK, + }, + [PWMB] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMB_EN, + .ctrl_reg = ASPEED_PTCR_CTRL, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMB_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY0_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK, + }, + [PWMC] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMC_EN, + .ctrl_reg = ASPEED_PTCR_CTRL, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMC_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY1_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK, + }, + [PWMD] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMD_EN, + .ctrl_reg = ASPEED_PTCR_CTRL, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMD_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY1_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK, + }, + [PWME] = { + .pwm_en = ASPEED_PTCR_CTRL_PWME_EN, + .ctrl_reg = ASPEED_PTCR_CTRL_EXT, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWME_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWME_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY2_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK, + }, + [PWMF] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMF_EN, + .ctrl_reg = ASPEED_PTCR_CTRL_EXT, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMF_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY2_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK, + }, + [PWMG] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMG_EN, + .ctrl_reg = ASPEED_PTCR_CTRL_EXT, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMG_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM1_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM1_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY3_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM1_RISE_FALL_MASK, + }, + [PWMH] = { + .pwm_en = ASPEED_PTCR_CTRL_PWMH_EN, + .ctrl_reg = ASPEED_PTCR_CTRL_EXT, + .type_part1 = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART1, + .type_part2 = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_PART2, + .type_mask = ASPEED_PTCR_CTRL_SET_PWMH_TYPE_MASK, + .duty_ctrl_rise_point = DUTY_CTRL_PWM2_RISE_POINT, + .duty_ctrl_fall_point = DUTY_CTRL_PWM2_FALL_POINT, + .duty_ctrl_reg = ASPEED_PTCR_DUTY3_CTRL, + .duty_ctrl_rise_fall_mask = DUTY_CTRL_PWM2_RISE_FALL_MASK, + } +}; + +static int regmap_aspeed_pwm_tacho_reg_write(void *context, unsigned int reg, + unsigned int val) +{ + void __iomem *regs = (void __iomem *)context; + + writel(val, regs + reg); + return 0; +} + +static int regmap_aspeed_pwm_tacho_reg_read(void *context, unsigned int reg, + unsigned int *val) +{ + void __iomem *regs = (void __iomem *)context; + + *val = readl(regs + reg); + return 0; +} + +static const struct regmap_config aspeed_pwm_tacho_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = ASPEED_PTCR_TYPEO_LIMIT, + .reg_write = regmap_aspeed_pwm_tacho_reg_write, + .reg_read = regmap_aspeed_pwm_tacho_reg_read, + .fast_io = true, +}; + +static void aspeed_set_clock_enable(struct regmap *regmap, bool val) +{ + regmap_update_bits(regmap, ASPEED_PTCR_CTRL, + ASPEED_PTCR_CTRL_CLK_EN, + val ? ASPEED_PTCR_CTRL_CLK_EN : 0); +} + +static void aspeed_set_clock_source(struct regmap *regmap, int val) +{ + regmap_update_bits(regmap, ASPEED_PTCR_CTRL, + ASPEED_PTCR_CTRL_CLK_SRC, + val ? ASPEED_PTCR_CTRL_CLK_SRC : 0); +} + +static void aspeed_set_pwm_clock_values(struct regmap *regmap, u8 type, + u8 div_high, u8 div_low, u8 unit) +{ + u32 reg_value = ((div_high << type_params[type].h_value) | + (div_low << type_params[type].l_value) | + (unit << type_params[type].unit_value)); + + regmap_update_bits(regmap, type_params[type].clk_ctrl_reg, + type_params[type].clk_ctrl_mask, reg_value); +} + +static void aspeed_set_pwm_port_enable(struct regmap *regmap, u8 pwm_port, + bool enable) +{ + regmap_update_bits(regmap, pwm_port_params[pwm_port].ctrl_reg, + pwm_port_params[pwm_port].pwm_en, + enable ? pwm_port_params[pwm_port].pwm_en : 0); +} + +static void aspeed_set_pwm_port_type(struct regmap *regmap, + u8 pwm_port, u8 type) +{ + u32 reg_value = (type & 0x1) << pwm_port_params[pwm_port].type_part1; + + reg_value |= (type & 0x2) << pwm_port_params[pwm_port].type_part2; + + regmap_update_bits(regmap, pwm_port_params[pwm_port].ctrl_reg, + pwm_port_params[pwm_port].type_mask, reg_value); +} + +static void aspeed_set_pwm_port_duty_rising_falling(struct regmap *regmap, + u8 pwm_port, u8 rising, + u8 falling) +{ + u32 reg_value = (rising << + pwm_port_params[pwm_port].duty_ctrl_rise_point); + reg_value |= (falling << + pwm_port_params[pwm_port].duty_ctrl_fall_point); + + regmap_update_bits(regmap, pwm_port_params[pwm_port].duty_ctrl_reg, + pwm_port_params[pwm_port].duty_ctrl_rise_fall_mask, + reg_value); +} + +static void aspeed_set_tacho_type_enable(struct regmap *regmap, u8 type, + bool enable) +{ + regmap_update_bits(regmap, type_params[type].ctrl_reg, + TYPE_CTRL_FAN_TYPE_EN, + enable ? TYPE_CTRL_FAN_TYPE_EN : 0); +} + +static void aspeed_set_tacho_type_values(struct regmap *regmap, u8 type, + u8 mode, u16 unit, u8 division) +{ + u32 reg_value = ((mode << TYPE_CTRL_FAN_MODE) | + (unit << TYPE_CTRL_FAN_PERIOD) | + (division << TYPE_CTRL_FAN_DIVISION)); + + regmap_update_bits(regmap, type_params[type].ctrl_reg, + TYPE_CTRL_FAN_MASK, reg_value); + regmap_update_bits(regmap, type_params[type].ctrl_reg1, + TYPE_CTRL_FAN1_MASK, unit << 16); +} + +static void aspeed_set_fan_tach_ch_enable(struct regmap *regmap, u8 fan_tach_ch, + bool enable) +{ + regmap_update_bits(regmap, ASPEED_PTCR_CTRL, + ASPEED_PTCR_CTRL_FAN_NUM_EN(fan_tach_ch), + enable ? + ASPEED_PTCR_CTRL_FAN_NUM_EN(fan_tach_ch) : 0); +} + +static void aspeed_set_fan_tach_ch_source(struct regmap *regmap, u8 fan_tach_ch, + u8 fan_tach_ch_source) +{ + u32 reg_value1 = ((fan_tach_ch_source & 0x3) << + TACH_PWM_SOURCE_BIT01(fan_tach_ch)); + u32 reg_value2 = (((fan_tach_ch_source & 0x4) >> 2) << + TACH_PWM_SOURCE_BIT2(fan_tach_ch)); + + regmap_update_bits(regmap, ASPEED_PTCR_TACH_SOURCE, + TACH_PWM_SOURCE_MASK_BIT01(fan_tach_ch), + reg_value1); + + regmap_update_bits(regmap, ASPEED_PTCR_TACH_SOURCE_EXT, + TACH_PWM_SOURCE_MASK_BIT2(fan_tach_ch), + reg_value2); +} + +static void aspeed_set_pwm_port_fan_ctrl(struct aspeed_pwm_tacho_data *priv, + u8 index, u8 fan_ctrl) +{ + u16 period, dc_time_on; + + period = priv->type_pwm_clock_unit[priv->pwm_port_type[index]]; + period += 1; + dc_time_on = (fan_ctrl * period) / PWM_MAX; + + if (dc_time_on == 0) { + aspeed_set_pwm_port_enable(priv->regmap, index, false); + } else { + if (dc_time_on == period) + dc_time_on = 0; + + aspeed_set_pwm_port_duty_rising_falling(priv->regmap, index, 0, + dc_time_on); + aspeed_set_pwm_port_enable(priv->regmap, index, true); + } +} + +static u32 aspeed_get_fan_tach_ch_measure_period(struct aspeed_pwm_tacho_data + *priv, u8 type) +{ + u32 clk; + u16 tacho_unit; + u8 clk_unit, div_h, div_l, tacho_div; + + clk = priv->clk_freq; + clk_unit = priv->type_pwm_clock_unit[type]; + div_h = priv->type_pwm_clock_division_h[type]; + div_h = 0x1 << div_h; + div_l = priv->type_pwm_clock_division_l[type]; + if (div_l == 0) + div_l = 1; + else + div_l = div_l * 2; + + tacho_unit = priv->type_fan_tach_unit[type]; + tacho_div = priv->type_fan_tach_clock_division[type]; + + tacho_div = 0x4 << (tacho_div * 2); + return clk / (clk_unit * div_h * div_l * tacho_div * tacho_unit); +} + +static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, + u8 fan_tach_ch) +{ + u32 raw_data, tach_div, clk_source, sec, val; + u8 fan_tach_ch_source, type; + + regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0); + regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0x1 << fan_tach_ch); + + fan_tach_ch_source = priv->fan_tach_ch_source[fan_tach_ch]; + type = priv->pwm_port_type[fan_tach_ch_source]; + + sec = (1000 / aspeed_get_fan_tach_ch_measure_period(priv, type)); + msleep(sec); + + regmap_read(priv->regmap, ASPEED_PTCR_RESULT, &val); + raw_data = val & RESULT_VALUE_MASK; + tach_div = priv->type_fan_tach_clock_division[type]; + tach_div = 0x4 << (tach_div * 2); + clk_source = priv->clk_freq; + + if (raw_data == 0) + return 0; + + return (clk_source * 60) / (2 * raw_data * tach_div); +} + +static ssize_t set_pwm(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int index = sensor_attr->index; + int ret; + struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); + long fan_ctrl; + + ret = kstrtol(buf, 10, &fan_ctrl); + if (ret != 0) + return ret; + + if (fan_ctrl < 0 || fan_ctrl > PWM_MAX) + return -EINVAL; + + if (priv->pwm_port_fan_ctrl[index] == fan_ctrl) + return count; + + priv->pwm_port_fan_ctrl[index] = fan_ctrl; + aspeed_set_pwm_port_fan_ctrl(priv, index, fan_ctrl); + + return count; +} + +static ssize_t show_pwm(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int index = sensor_attr->index; + struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", priv->pwm_port_fan_ctrl[index]); +} + +static ssize_t show_rpm(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int index = sensor_attr->index; + u32 rpm; + struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); + + rpm = aspeed_get_fan_tach_ch_rpm(priv, index); + + return sprintf(buf, "%u\n", rpm); +} + +static umode_t pwm_is_visible(struct kobject *kobj, + struct attribute *a, int index) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); + + if (!priv->pwm_present[index]) + return 0; + return a->mode; +} + +static umode_t fan_dev_is_visible(struct kobject *kobj, + struct attribute *a, int index) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); + + if (!priv->fan_tach_present[index]) + return 0; + return a->mode; +} + +static SENSOR_DEVICE_ATTR(pwm0, 0644, + show_pwm, set_pwm, 0); +static SENSOR_DEVICE_ATTR(pwm1, 0644, + show_pwm, set_pwm, 1); +static SENSOR_DEVICE_ATTR(pwm2, 0644, + show_pwm, set_pwm, 2); +static SENSOR_DEVICE_ATTR(pwm3, 0644, + show_pwm, set_pwm, 3); +static SENSOR_DEVICE_ATTR(pwm4, 0644, + show_pwm, set_pwm, 4); +static SENSOR_DEVICE_ATTR(pwm5, 0644, + show_pwm, set_pwm, 5); +static SENSOR_DEVICE_ATTR(pwm6, 0644, + show_pwm, set_pwm, 6); +static SENSOR_DEVICE_ATTR(pwm7, 0644, + show_pwm, set_pwm, 7); +static struct attribute *pwm_dev_attrs[] = { + &sensor_dev_attr_pwm0.dev_attr.attr, + &sensor_dev_attr_pwm1.dev_attr.attr, + &sensor_dev_attr_pwm2.dev_attr.attr, + &sensor_dev_attr_pwm3.dev_attr.attr, + &sensor_dev_attr_pwm4.dev_attr.attr, + &sensor_dev_attr_pwm5.dev_attr.attr, + &sensor_dev_attr_pwm6.dev_attr.attr, + &sensor_dev_attr_pwm7.dev_attr.attr, + NULL, +}; + +static const struct attribute_group pwm_dev_group = { + .attrs = pwm_dev_attrs, + .is_visible = pwm_is_visible, +}; + +static SENSOR_DEVICE_ATTR(fan0_input, 0444, + show_rpm, NULL, 0); +static SENSOR_DEVICE_ATTR(fan1_input, 0444, + show_rpm, NULL, 1); +static SENSOR_DEVICE_ATTR(fan2_input, 0444, + show_rpm, NULL, 2); +static SENSOR_DEVICE_ATTR(fan3_input, 0444, + show_rpm, NULL, 3); +static SENSOR_DEVICE_ATTR(fan4_input, 0444, + show_rpm, NULL, 4); +static SENSOR_DEVICE_ATTR(fan5_input, 0444, + show_rpm, NULL, 5); +static SENSOR_DEVICE_ATTR(fan6_input, 0444, + show_rpm, NULL, 6); +static SENSOR_DEVICE_ATTR(fan7_input, 0444, + show_rpm, NULL, 7); +static SENSOR_DEVICE_ATTR(fan8_input, 0444, + show_rpm, NULL, 8); +static SENSOR_DEVICE_ATTR(fan9_input, 0444, + show_rpm, NULL, 9); +static SENSOR_DEVICE_ATTR(fan10_input, 0444, + show_rpm, NULL, 10); +static SENSOR_DEVICE_ATTR(fan11_input, 0444, + show_rpm, NULL, 11); +static SENSOR_DEVICE_ATTR(fan12_input, 0444, + show_rpm, NULL, 12); +static SENSOR_DEVICE_ATTR(fan13_input, 0444, + show_rpm, NULL, 13); +static SENSOR_DEVICE_ATTR(fan14_input, 0444, + show_rpm, NULL, 14); +static SENSOR_DEVICE_ATTR(fan15_input, 0444, + show_rpm, NULL, 15); +static struct attribute *fan_dev_attrs[] = { + &sensor_dev_attr_fan0_input.dev_attr.attr, + &sensor_dev_attr_fan1_input.dev_attr.attr, + &sensor_dev_attr_fan2_input.dev_attr.attr, + &sensor_dev_attr_fan3_input.dev_attr.attr, + &sensor_dev_attr_fan4_input.dev_attr.attr, + &sensor_dev_attr_fan5_input.dev_attr.attr, + &sensor_dev_attr_fan6_input.dev_attr.attr, + &sensor_dev_attr_fan7_input.dev_attr.attr, + &sensor_dev_attr_fan8_input.dev_attr.attr, + &sensor_dev_attr_fan9_input.dev_attr.attr, + &sensor_dev_attr_fan10_input.dev_attr.attr, + &sensor_dev_attr_fan11_input.dev_attr.attr, + &sensor_dev_attr_fan12_input.dev_attr.attr, + &sensor_dev_attr_fan13_input.dev_attr.attr, + &sensor_dev_attr_fan14_input.dev_attr.attr, + &sensor_dev_attr_fan15_input.dev_attr.attr, + NULL +}; + +static const struct attribute_group fan_dev_group = { + .attrs = fan_dev_attrs, + .is_visible = fan_dev_is_visible, +}; + +/* + * The clock type is type M : + * The PWM frequency = 24MHz / (type M clock division L bit * + * type M clock division H bit * (type M PWM period bit + 1)) + */ +static void aspeed_create_type(struct aspeed_pwm_tacho_data *priv) +{ + priv->type_pwm_clock_division_h[TYPEM] = M_PWM_DIV_H; + priv->type_pwm_clock_division_l[TYPEM] = M_PWM_DIV_L; + priv->type_pwm_clock_unit[TYPEM] = M_PWM_PERIOD; + aspeed_set_pwm_clock_values(priv->regmap, TYPEM, M_PWM_DIV_H, + M_PWM_DIV_L, M_PWM_PERIOD); + aspeed_set_tacho_type_enable(priv->regmap, TYPEM, true); + priv->type_fan_tach_clock_division[TYPEM] = M_TACH_CLK_DIV; + priv->type_fan_tach_unit[TYPEM] = M_TACH_UNIT; + aspeed_set_tacho_type_values(priv->regmap, TYPEM, M_TACH_MODE, + M_TACH_UNIT, M_TACH_CLK_DIV); +} + +static void aspeed_create_pwm_port(struct aspeed_pwm_tacho_data *priv, + u8 pwm_port) +{ + aspeed_set_pwm_port_enable(priv->regmap, pwm_port, true); + priv->pwm_present[pwm_port] = true; + + priv->pwm_port_type[pwm_port] = TYPEM; + aspeed_set_pwm_port_type(priv->regmap, pwm_port, TYPEM); + + priv->pwm_port_fan_ctrl[pwm_port] = INIT_FAN_CTRL; + aspeed_set_pwm_port_fan_ctrl(priv, pwm_port, INIT_FAN_CTRL); +} + +static void aspeed_create_fan_tach_channel(struct aspeed_pwm_tacho_data *priv, + u8 *fan_tach_ch, + int count, + u8 pwm_source) +{ + u8 val, index; + + for (val = 0; val < count; val++) { + index = fan_tach_ch[val]; + aspeed_set_fan_tach_ch_enable(priv->regmap, index, true); + priv->fan_tach_present[index] = true; + priv->fan_tach_ch_source[index] = pwm_source; + aspeed_set_fan_tach_ch_source(priv->regmap, index, pwm_source); + } +} + +static int aspeed_create_fan(struct device *dev, + struct device_node *child, + struct aspeed_pwm_tacho_data *priv) +{ + u8 *fan_tach_ch; + u32 pwm_port; + int ret, count; + + ret = of_property_read_u32(child, "reg", &pwm_port); + if (ret) + return ret; + aspeed_create_pwm_port(priv, (u8)pwm_port); + + count = of_property_count_u8_elems(child, "aspeed,fan-tach-ch"); + if (count < 1) + return -EINVAL; + fan_tach_ch = devm_kzalloc(dev, sizeof(*fan_tach_ch) * count, + GFP_KERNEL); + if (!fan_tach_ch) + return -ENOMEM; + ret = of_property_read_u8_array(child, "aspeed,fan-tach-ch", + fan_tach_ch, count); + if (ret) + return ret; + aspeed_create_fan_tach_channel(priv, fan_tach_ch, count, pwm_port); + + return 0; +} + +static int aspeed_pwm_tacho_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np, *child; + struct aspeed_pwm_tacho_data *priv; + void __iomem *regs; + struct resource *res; + struct device *hwmon; + struct clk *clk; + int ret; + + np = dev->of_node; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOENT; + regs = devm_ioremap_resource(dev, res); + if (IS_ERR(regs)) + return PTR_ERR(regs); + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + priv->regmap = devm_regmap_init(dev, NULL, (__force void *)regs, + &aspeed_pwm_tacho_regmap_config); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); + regmap_write(priv->regmap, ASPEED_PTCR_TACH_SOURCE, 0); + regmap_write(priv->regmap, ASPEED_PTCR_TACH_SOURCE_EXT, 0); + + clk = devm_clk_get(dev, NULL); + if (IS_ERR(clk)) + return -ENODEV; + priv->clk_freq = clk_get_rate(clk); + aspeed_set_clock_enable(priv->regmap, true); + aspeed_set_clock_source(priv->regmap, 0); + + aspeed_create_type(priv); + + for_each_child_of_node(np, child) { + ret = aspeed_create_fan(dev, child, priv); + of_node_put(child); + if (ret) + return ret; + } + of_node_put(np); + + priv->groups[0] = &pwm_dev_group; + priv->groups[1] = &fan_dev_group; + priv->groups[2] = NULL; + hwmon = devm_hwmon_device_register_with_groups(dev, + "aspeed_pwm_tacho", + priv, priv->groups); + return PTR_ERR_OR_ZERO(hwmon); +} + +static const struct of_device_id of_pwm_tacho_match_table[] = { + { .compatible = "aspeed,ast2400-pwm-tacho", }, + { .compatible = "aspeed,ast2500-pwm-tacho", }, + {}, +}; +MODULE_DEVICE_TABLE(of, of_pwm_tacho_match_table); + +static struct platform_driver aspeed_pwm_tacho_driver = { + .probe = aspeed_pwm_tacho_probe, + .driver = { + .name = "aspeed_pwm_tacho", + .of_match_table = of_pwm_tacho_match_table, + }, +}; + +module_platform_driver(aspeed_pwm_tacho_driver); + +MODULE_AUTHOR("Jaghathiswari Rankappagounder Natarajan <jaghu@google.com>"); +MODULE_DESCRIPTION("ASPEED PWM and Fan Tacho device driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 34704b0451b4..3189246302a6 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -995,6 +995,13 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = { }, .driver_data = (void *)&i8k_config_data[DELL_XPS], }, + { + .ident = "Dell XPS 15 9560", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS 15 9560"), + }, + }, { } }; diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 28375d59cc36..dd6e17c1076b 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -186,7 +186,7 @@ static ssize_t hwmon_attr_show_string(struct device *dev, char *buf) { struct hwmon_device_attribute *hattr = to_hwmon_attr(devattr); - char *s; + const char *s; int ret; ret = hattr->ops->read_string(dev, hattr->type, hattr->attr, diff --git a/drivers/hwmon/ina209.c b/drivers/hwmon/ina209.c index 5378fdefc1f7..aa0768ce8aea 100644 --- a/drivers/hwmon/ina209.c +++ b/drivers/hwmon/ina209.c @@ -117,7 +117,7 @@ static long ina209_from_reg(const u8 reg, const u16 val) case INA209_SHUNT_VOLTAGE_POS_WARN: case INA209_SHUNT_VOLTAGE_NEG_WARN: /* LSB=10 uV. Convert to mV. */ - return DIV_ROUND_CLOSEST(val, 100); + return DIV_ROUND_CLOSEST((s16)val, 100); case INA209_BUS_VOLTAGE: case INA209_BUS_VOLTAGE_MAX_PEAK: @@ -146,7 +146,7 @@ static long ina209_from_reg(const u8 reg, const u16 val) case INA209_CURRENT: /* LSB=1 mA (selected). Is in mA */ - return val; + return (s16)val; } /* programmer goofed */ @@ -608,11 +608,18 @@ static const struct i2c_device_id ina209_id[] = { }; MODULE_DEVICE_TABLE(i2c, ina209_id); +static const struct of_device_id ina209_of_match[] = { + { .compatible = "ti,ina209" }, + { }, +}; +MODULE_DEVICE_TABLE(of, ina209_of_match); + /* This is the driver that will be inserted */ static struct i2c_driver ina209_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "ina209", + .of_match_table = of_match_ptr(ina209_of_match), }, .probe = ina209_probe, .remove = ina209_remove, diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index b24f1d3045f0..62e38fa8cda2 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -34,6 +34,7 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/jiffies.h> +#include <linux/of_device.h> #include <linux/of.h> #include <linux/delay.h> #include <linux/util_macros.h> @@ -424,13 +425,19 @@ static int ina2xx_probe(struct i2c_client *client, struct device *hwmon_dev; u32 val; int ret, group = 0; + enum ina2xx_ids chip; + + if (client->dev.of_node) + chip = (enum ina2xx_ids)of_device_get_match_data(&client->dev); + else + chip = id->driver_data; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; /* set the device type */ - data->config = &ina2xx_config[id->driver_data]; + data->config = &ina2xx_config[chip]; if (of_property_read_u32(dev->of_node, "shunt-resistor", &val) < 0) { struct ina2xx_platform_data *pdata = dev_get_platdata(dev); @@ -487,9 +494,35 @@ static const struct i2c_device_id ina2xx_id[] = { }; MODULE_DEVICE_TABLE(i2c, ina2xx_id); +static const struct of_device_id ina2xx_of_match[] = { + { + .compatible = "ti,ina219", + .data = (void *)ina219 + }, + { + .compatible = "ti,ina220", + .data = (void *)ina219 + }, + { + .compatible = "ti,ina226", + .data = (void *)ina226 + }, + { + .compatible = "ti,ina230", + .data = (void *)ina226 + }, + { + .compatible = "ti,ina231", + .data = (void *)ina226 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ina2xx_of_match); + static struct i2c_driver ina2xx_driver = { .driver = { .name = "ina2xx", + .of_match_table = of_match_ptr(ina2xx_of_match), }, .probe = ina2xx_probe, .id_table = ina2xx_id, diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c index 2e1948699114..4c1770920d29 100644 --- a/drivers/hwmon/lm63.c +++ b/drivers/hwmon/lm63.c @@ -46,6 +46,7 @@ #include <linux/hwmon.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/of_device.h> #include <linux/sysfs.h> #include <linux/types.h> @@ -1115,6 +1116,10 @@ static int lm63_probe(struct i2c_client *client, mutex_init(&data->update_lock); /* Set the device type */ + if (client->dev.of_node) + data->kind = (enum chips)of_device_get_match_data(&client->dev); + else + data->kind = id->driver_data; data->kind = id->driver_data; if (data->kind == lm64) data->temp2_offset = 16000; @@ -1149,10 +1154,28 @@ static const struct i2c_device_id lm63_id[] = { }; MODULE_DEVICE_TABLE(i2c, lm63_id); +static const struct of_device_id lm63_of_match[] = { + { + .compatible = "national,lm63", + .data = (void *)lm63 + }, + { + .compatible = "national,lm64", + .data = (void *)lm64 + }, + { + .compatible = "national,lm96163", + .data = (void *)lm96163 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm63_of_match); + static struct i2c_driver lm63_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm63", + .of_match_table = of_match_ptr(lm63_of_match), }, .probe = lm63_probe, .id_table = lm63_id, diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index eff3b24d8473..005ffb5ffa92 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -26,6 +26,7 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/err.h> +#include <linux/of_device.h> #include <linux/of.h> #include <linux/regmap.h> #include "lm75.h" @@ -273,7 +274,12 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id) int status, err; u8 set_mask, clr_mask; int new; - enum lm75_type kind = id->driver_data; + enum lm75_type kind; + + if (client->dev.of_node) + kind = (enum lm75_type)of_device_get_match_data(&client->dev); + else + kind = id->driver_data; if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA)) @@ -424,6 +430,95 @@ static const struct i2c_device_id lm75_ids[] = { }; MODULE_DEVICE_TABLE(i2c, lm75_ids); +static const struct of_device_id lm75_of_match[] = { + { + .compatible = "adi,adt75", + .data = (void *)adt75 + }, + { + .compatible = "dallas,ds1775", + .data = (void *)ds1775 + }, + { + .compatible = "dallas,ds75", + .data = (void *)ds75 + }, + { + .compatible = "dallas,ds7505", + .data = (void *)ds7505 + }, + { + .compatible = "gmt,g751", + .data = (void *)g751 + }, + { + .compatible = "national,lm75", + .data = (void *)lm75 + }, + { + .compatible = "national,lm75a", + .data = (void *)lm75a + }, + { + .compatible = "national,lm75b", + .data = (void *)lm75b + }, + { + .compatible = "maxim,max6625", + .data = (void *)max6625 + }, + { + .compatible = "maxim,max6626", + .data = (void *)max6626 + }, + { + .compatible = "maxim,mcp980x", + .data = (void *)mcp980x + }, + { + .compatible = "st,stds75", + .data = (void *)stds75 + }, + { + .compatible = "microchip,tcn75", + .data = (void *)tcn75 + }, + { + .compatible = "ti,tmp100", + .data = (void *)tmp100 + }, + { + .compatible = "ti,tmp101", + .data = (void *)tmp101 + }, + { + .compatible = "ti,tmp105", + .data = (void *)tmp105 + }, + { + .compatible = "ti,tmp112", + .data = (void *)tmp112 + }, + { + .compatible = "ti,tmp175", + .data = (void *)tmp175 + }, + { + .compatible = "ti,tmp275", + .data = (void *)tmp275 + }, + { + .compatible = "ti,tmp75", + .data = (void *)tmp75 + }, + { + .compatible = "ti,tmp75c", + .data = (void *)tmp75c + }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm75_of_match); + #define LM75A_ID 0xA1 /* Return 0 if detection is successful, -ENODEV otherwise */ @@ -560,6 +655,7 @@ static struct i2c_driver lm75_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm75", + .of_match_table = of_match_ptr(lm75_of_match), .pm = LM75_DEV_PM_OPS, }, .probe = lm75_probe, diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c index 691469ffa24e..0a325878e8f5 100644 --- a/drivers/hwmon/lm85.c +++ b/drivers/hwmon/lm85.c @@ -25,6 +25,7 @@ */ #include <linux/module.h> +#include <linux/of_device.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/jiffies.h> @@ -1552,7 +1553,10 @@ static int lm85_probe(struct i2c_client *client, const struct i2c_device_id *id) return -ENOMEM; data->client = client; - data->type = id->driver_data; + if (client->dev.of_node) + data->type = (enum chips)of_device_get_match_data(&client->dev); + else + data->type = id->driver_data; mutex_init(&data->update_lock); /* Fill in the chip specific driver values */ @@ -1623,10 +1627,60 @@ static const struct i2c_device_id lm85_id[] = { }; MODULE_DEVICE_TABLE(i2c, lm85_id); +static const struct of_device_id lm85_of_match[] = { + { + .compatible = "adi,adm1027", + .data = (void *)adm1027 + }, + { + .compatible = "adi,adt7463", + .data = (void *)adt7463 + }, + { + .compatible = "adi,adt7468", + .data = (void *)adt7468 + }, + { + .compatible = "national,lm85", + .data = (void *)lm85 + }, + { + .compatible = "national,lm85b", + .data = (void *)lm85 + }, + { + .compatible = "national,lm85c", + .data = (void *)lm85 + }, + { + .compatible = "smsc,emc6d100", + .data = (void *)emc6d100 + }, + { + .compatible = "smsc,emc6d101", + .data = (void *)emc6d100 + }, + { + .compatible = "smsc,emc6d102", + .data = (void *)emc6d102 + }, + { + .compatible = "smsc,emc6d103", + .data = (void *)emc6d103 + }, + { + .compatible = "smsc,emc6d103s", + .data = (void *)emc6d103s + }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm85_of_match); + static struct i2c_driver lm85_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm85", + .of_match_table = of_match_ptr(lm85_of_match), }, .probe = lm85_probe, .id_table = lm85_id, diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c index e06faf9d3f0f..b48d30760388 100644 --- a/drivers/hwmon/lm87.c +++ b/drivers/hwmon/lm87.c @@ -66,6 +66,7 @@ #include <linux/hwmon-vid.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/regulator/consumer.h> /* * Addresses to scan @@ -74,8 +75,6 @@ static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; -enum chips { lm87, adm1024 }; - /* * The LM87 registers */ @@ -855,8 +854,26 @@ static int lm87_init_client(struct i2c_client *client) { struct lm87_data *data = i2c_get_clientdata(client); int rc; - - if (dev_get_platdata(&client->dev)) { + struct device_node *of_node = client->dev.of_node; + u8 val = 0; + struct regulator *vcc = NULL; + + if (of_node) { + if (of_property_read_bool(of_node, "has-temp3")) + val |= CHAN_TEMP3; + if (of_property_read_bool(of_node, "has-in6")) + val |= CHAN_NO_FAN(0); + if (of_property_read_bool(of_node, "has-in7")) + val |= CHAN_NO_FAN(1); + vcc = devm_regulator_get_optional(&client->dev, "vcc"); + if (!IS_ERR(vcc)) { + if (regulator_get_voltage(vcc) == 5000000) + val |= CHAN_VCC_5V; + } + data->channel = val; + lm87_write_value(client, + LM87_REG_CHANNEL_MODE, data->channel); + } else if (dev_get_platdata(&client->dev)) { data->channel = *(u8 *)dev_get_platdata(&client->dev); lm87_write_value(client, LM87_REG_CHANNEL_MODE, data->channel); @@ -962,16 +979,24 @@ static int lm87_probe(struct i2c_client *client, const struct i2c_device_id *id) */ static const struct i2c_device_id lm87_id[] = { - { "lm87", lm87 }, - { "adm1024", adm1024 }, + { "lm87", 0 }, + { "adm1024", 0 }, { } }; MODULE_DEVICE_TABLE(i2c, lm87_id); +static const struct of_device_id lm87_of_match[] = { + { .compatible = "ti,lm87" }, + { .compatible = "adi,adm1024" }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm87_of_match); + static struct i2c_driver lm87_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm87", + .of_match_table = lm87_of_match, }, .probe = lm87_probe, .id_table = lm87_id, diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index aff5297bc2bc..c2f411c290bf 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -92,6 +92,7 @@ #include <linux/hwmon.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/of_device.h> #include <linux/sysfs.h> #include <linux/interrupt.h> #include <linux/regulator/consumer.h> @@ -235,6 +236,99 @@ static const struct i2c_device_id lm90_id[] = { }; MODULE_DEVICE_TABLE(i2c, lm90_id); +static const struct of_device_id lm90_of_match[] = { + { + .compatible = "adi,adm1032", + .data = (void *)adm1032 + }, + { + .compatible = "adi,adt7461", + .data = (void *)adt7461 + }, + { + .compatible = "adi,adt7461a", + .data = (void *)adt7461 + }, + { + .compatible = "gmt,g781", + .data = (void *)g781 + }, + { + .compatible = "national,lm90", + .data = (void *)lm90 + }, + { + .compatible = "national,lm86", + .data = (void *)lm86 + }, + { + .compatible = "national,lm89", + .data = (void *)lm86 + }, + { + .compatible = "national,lm99", + .data = (void *)lm99 + }, + { + .compatible = "dallas,max6646", + .data = (void *)max6646 + }, + { + .compatible = "dallas,max6647", + .data = (void *)max6646 + }, + { + .compatible = "dallas,max6649", + .data = (void *)max6646 + }, + { + .compatible = "dallas,max6657", + .data = (void *)max6657 + }, + { + .compatible = "dallas,max6658", + .data = (void *)max6657 + }, + { + .compatible = "dallas,max6659", + .data = (void *)max6659 + }, + { + .compatible = "dallas,max6680", + .data = (void *)max6680 + }, + { + .compatible = "dallas,max6681", + .data = (void *)max6680 + }, + { + .compatible = "dallas,max6695", + .data = (void *)max6696 + }, + { + .compatible = "dallas,max6696", + .data = (void *)max6696 + }, + { + .compatible = "onnn,nct1008", + .data = (void *)adt7461 + }, + { + .compatible = "winbond,w83l771", + .data = (void *)w83l771 + }, + { + .compatible = "nxp,sa56004", + .data = (void *)sa56004 + }, + { + .compatible = "ti,tmp451", + .data = (void *)tmp451 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm90_of_match); + /* * chip type specific parameters */ @@ -1677,7 +1771,10 @@ static int lm90_probe(struct i2c_client *client, mutex_init(&data->update_lock); /* Set the device type */ - data->kind = id->driver_data; + if (client->dev.of_node) + data->kind = (enum chips)of_device_get_match_data(&client->dev); + else + data->kind = id->driver_data; if (data->kind == adm1032) { if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) client->flags &= ~I2C_CLIENT_PEC; @@ -1816,6 +1913,7 @@ static struct i2c_driver lm90_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm90", + .of_match_table = of_match_ptr(lm90_of_match), }, .probe = lm90_probe, .alert = lm90_alert, diff --git a/drivers/hwmon/lm95245.c b/drivers/hwmon/lm95245.c index a3bfd88752ca..27cb06d65594 100644 --- a/drivers/hwmon/lm95245.c +++ b/drivers/hwmon/lm95245.c @@ -622,10 +622,18 @@ static const struct i2c_device_id lm95245_id[] = { }; MODULE_DEVICE_TABLE(i2c, lm95245_id); +static const struct of_device_id lm95245_of_match[] = { + { .compatible = "national,lm95235" }, + { .compatible = "national,lm95245" }, + { }, +}; +MODULE_DEVICE_TABLE(of, lm95245_of_match); + static struct i2c_driver lm95245_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "lm95245", + .of_match_table = of_match_ptr(lm95245_of_match), }, .probe = lm95245_probe, .id_table = lm95245_id, diff --git a/drivers/hwmon/max6697.c b/drivers/hwmon/max6697.c index f03a71722849..221fd1492057 100644 --- a/drivers/hwmon/max6697.c +++ b/drivers/hwmon/max6697.c @@ -24,6 +24,7 @@ #include <linux/hwmon-sysfs.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/of_device.h> #include <linux/of.h> #include <linux/platform_data/max6697.h> @@ -632,7 +633,10 @@ static int max6697_probe(struct i2c_client *client, if (!data) return -ENOMEM; - data->type = id->driver_data; + if (client->dev.of_node) + data->type = (enum chips)of_device_get_match_data(&client->dev); + else + data->type = id->driver_data; data->chip = &max6697_chip_data[data->type]; data->client = client; mutex_init(&data->update_lock); @@ -662,10 +666,56 @@ static const struct i2c_device_id max6697_id[] = { }; MODULE_DEVICE_TABLE(i2c, max6697_id); +static const struct of_device_id max6697_of_match[] = { + { + .compatible = "maxim,max6581", + .data = (void *)max6581 + }, + { + .compatible = "maxim,max6602", + .data = (void *)max6602 + }, + { + .compatible = "maxim,max6622", + .data = (void *)max6622 + }, + { + .compatible = "maxim,max6636", + .data = (void *)max6636 + }, + { + .compatible = "maxim,max6689", + .data = (void *)max6689 + }, + { + .compatible = "maxim,max6693", + .data = (void *)max6693 + }, + { + .compatible = "maxim,max6694", + .data = (void *)max6694 + }, + { + .compatible = "maxim,max6697", + .data = (void *)max6697 + }, + { + .compatible = "maxim,max6698", + .data = (void *)max6698 + }, + { + .compatible = "maxim,max6699", + .data = (void *)max6699 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, max6697_of_match); + static struct i2c_driver max6697_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "max6697", + .of_match_table = of_match_ptr(max6697_of_match), }, .probe = max6697_probe, .id_table = max6697_id, diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c index 4ab5293c7bf0..00d6995af4c2 100644 --- a/drivers/hwmon/pmbus/adm1275.c +++ b/drivers/hwmon/pmbus/adm1275.c @@ -101,8 +101,8 @@ static const struct coefficients adm1075_coefficients[] = { [0] = { 27169, 0, -1 }, /* voltage */ [1] = { 806, 20475, -1 }, /* current, irange25 */ [2] = { 404, 20475, -1 }, /* current, irange50 */ - [3] = { 0, -1, 8549 }, /* power, irange25 */ - [4] = { 0, -1, 4279 }, /* power, irange50 */ + [3] = { 8549, 0, -1 }, /* power, irange25 */ + [4] = { 4279, 0, -1 }, /* power, irange50 */ }; static const struct coefficients adm1275_coefficients[] = { diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c index 3e3aa950277f..3518f0c08934 100644 --- a/drivers/hwmon/pmbus/ucd9000.c +++ b/drivers/hwmon/pmbus/ucd9000.c @@ -21,6 +21,7 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/init.h> #include <linux/err.h> #include <linux/slab.h> @@ -119,6 +120,35 @@ static const struct i2c_device_id ucd9000_id[] = { }; MODULE_DEVICE_TABLE(i2c, ucd9000_id); +static const struct of_device_id ucd9000_of_match[] = { + { + .compatible = "ti,ucd9000", + .data = (void *)ucd9000 + }, + { + .compatible = "ti,ucd90120", + .data = (void *)ucd90120 + }, + { + .compatible = "ti,ucd90124", + .data = (void *)ucd90124 + }, + { + .compatible = "ti,ucd90160", + .data = (void *)ucd90160 + }, + { + .compatible = "ti,ucd9090", + .data = (void *)ucd9090 + }, + { + .compatible = "ti,ucd90910", + .data = (void *)ucd90910 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ucd9000_of_match); + static int ucd9000_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -126,6 +156,7 @@ static int ucd9000_probe(struct i2c_client *client, struct ucd9000_data *data; struct pmbus_driver_info *info; const struct i2c_device_id *mid; + enum chips chip; int i, ret; if (!i2c_check_functionality(client->adapter, @@ -151,7 +182,12 @@ static int ucd9000_probe(struct i2c_client *client, return -ENODEV; } - if (id->driver_data != ucd9000 && id->driver_data != mid->driver_data) + if (client->dev.of_node) + chip = (enum chips)of_device_get_match_data(&client->dev); + else + chip = id->driver_data; + + if (chip != ucd9000 && chip != mid->driver_data) dev_notice(&client->dev, "Device mismatch: Configured %s, detected %s\n", id->name, mid->name); @@ -234,6 +270,7 @@ static int ucd9000_probe(struct i2c_client *client, static struct i2c_driver ucd9000_driver = { .driver = { .name = "ucd9000", + .of_match_table = of_match_ptr(ucd9000_of_match), }, .probe = ucd9000_probe, .remove = pmbus_do_remove, diff --git a/drivers/hwmon/pmbus/ucd9200.c b/drivers/hwmon/pmbus/ucd9200.c index 033d6aca47d3..a8712c5ded4e 100644 --- a/drivers/hwmon/pmbus/ucd9200.c +++ b/drivers/hwmon/pmbus/ucd9200.c @@ -20,6 +20,7 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/init.h> #include <linux/err.h> #include <linux/slab.h> @@ -46,12 +47,50 @@ static const struct i2c_device_id ucd9200_id[] = { }; MODULE_DEVICE_TABLE(i2c, ucd9200_id); +static const struct of_device_id ucd9200_of_match[] = { + { + .compatible = "ti,cd9200", + .data = (void *)ucd9200 + }, + { + .compatible = "ti,cd9220", + .data = (void *)ucd9220 + }, + { + .compatible = "ti,cd9222", + .data = (void *)ucd9222 + }, + { + .compatible = "ti,cd9224", + .data = (void *)ucd9224 + }, + { + .compatible = "ti,cd9240", + .data = (void *)ucd9240 + }, + { + .compatible = "ti,cd9244", + .data = (void *)ucd9244 + }, + { + .compatible = "ti,cd9246", + .data = (void *)ucd9246 + }, + { + .compatible = "ti,cd9248", + .data = (void *)ucd9248 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ucd9200_of_match); + static int ucd9200_probe(struct i2c_client *client, const struct i2c_device_id *id) { u8 block_buffer[I2C_SMBUS_BLOCK_MAX + 1]; struct pmbus_driver_info *info; const struct i2c_device_id *mid; + enum chips chip; int i, j, ret; if (!i2c_check_functionality(client->adapter, @@ -76,7 +115,13 @@ static int ucd9200_probe(struct i2c_client *client, dev_err(&client->dev, "Unsupported device\n"); return -ENODEV; } - if (id->driver_data != ucd9200 && id->driver_data != mid->driver_data) + + if (client->dev.of_node) + chip = (enum chips)of_device_get_match_data(&client->dev); + else + chip = id->driver_data; + + if (chip != ucd9200 && chip != mid->driver_data) dev_notice(&client->dev, "Device mismatch: Configured %s, detected %s\n", id->name, mid->name); @@ -167,6 +212,7 @@ static int ucd9200_probe(struct i2c_client *client, static struct i2c_driver ucd9200_driver = { .driver = { .name = "ucd9200", + .of_match_table = of_match_ptr(ucd9200_of_match), }, .probe = ucd9200_probe, .remove = pmbus_do_remove, diff --git a/drivers/hwmon/stts751.c b/drivers/hwmon/stts751.c index 55450680fb58..d56251d6eec2 100644 --- a/drivers/hwmon/stts751.c +++ b/drivers/hwmon/stts751.c @@ -85,6 +85,12 @@ static const struct i2c_device_id stts751_id[] = { { } }; +static const struct of_device_id stts751_of_match[] = { + { .compatible = "stts751" }, + { }, +}; +MODULE_DEVICE_TABLE(of, stts751_of_match); + struct stts751_priv { struct device *dev; struct i2c_client *client; @@ -819,6 +825,7 @@ static struct i2c_driver stts751_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = DEVNAME, + .of_match_table = of_match_ptr(stts751_of_match), }, .probe = stts751_probe, .id_table = stts751_id, diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c index 36bba2a816a4..5eafbaada795 100644 --- a/drivers/hwmon/tmp102.c +++ b/drivers/hwmon/tmp102.c @@ -323,8 +323,15 @@ static const struct i2c_device_id tmp102_id[] = { }; MODULE_DEVICE_TABLE(i2c, tmp102_id); +static const struct of_device_id tmp102_of_match[] = { + { .compatible = "ti,tmp102" }, + { }, +}; +MODULE_DEVICE_TABLE(of, tmp102_of_match); + static struct i2c_driver tmp102_driver = { .driver.name = DRIVER_NAME, + .driver.of_match_table = of_match_ptr(tmp102_of_match), .driver.pm = &tmp102_dev_pm_ops, .probe = tmp102_probe, .id_table = tmp102_id, diff --git a/drivers/hwmon/tmp103.c b/drivers/hwmon/tmp103.c index ad571ec795a3..7f85b14544df 100644 --- a/drivers/hwmon/tmp103.c +++ b/drivers/hwmon/tmp103.c @@ -150,8 +150,7 @@ static int tmp103_probe(struct i2c_client *client, return PTR_ERR_OR_ZERO(hwmon_dev); } -#ifdef CONFIG_PM -static int tmp103_suspend(struct device *dev) +static int __maybe_unused tmp103_suspend(struct device *dev) { struct regmap *regmap = dev_get_drvdata(dev); @@ -159,7 +158,7 @@ static int tmp103_suspend(struct device *dev) TMP103_CONF_SD_MASK, 0); } -static int tmp103_resume(struct device *dev) +static int __maybe_unused tmp103_resume(struct device *dev) { struct regmap *regmap = dev_get_drvdata(dev); @@ -167,15 +166,7 @@ static int tmp103_resume(struct device *dev) TMP103_CONF_SD_MASK, TMP103_CONF_SD); } -static const struct dev_pm_ops tmp103_dev_pm_ops = { - .suspend = tmp103_suspend, - .resume = tmp103_resume, -}; - -#define TMP103_DEV_PM_OPS (&tmp103_dev_pm_ops) -#else -#define TMP103_DEV_PM_OPS NULL -#endif /* CONFIG_PM */ +static SIMPLE_DEV_PM_OPS(tmp103_dev_pm_ops, tmp103_suspend, tmp103_resume); static const struct i2c_device_id tmp103_id[] = { { "tmp103", 0 }, @@ -183,10 +174,17 @@ static const struct i2c_device_id tmp103_id[] = { }; MODULE_DEVICE_TABLE(i2c, tmp103_id); +static const struct of_device_id tmp103_of_match[] = { + { .compatible = "ti,tmp103" }, + { }, +}; +MODULE_DEVICE_TABLE(of, tmp103_of_match); + static struct i2c_driver tmp103_driver = { .driver = { .name = "tmp103", - .pm = TMP103_DEV_PM_OPS, + .of_match_table = of_match_ptr(tmp103_of_match), + .pm = &tmp103_dev_pm_ops, }, .probe = tmp103_probe, .id_table = tmp103_id, diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index bfb98b96c781..e36399213324 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -29,6 +29,7 @@ #include <linux/hwmon-sysfs.h> #include <linux/err.h> #include <linux/mutex.h> +#include <linux/of_device.h> #include <linux/sysfs.h> /* Addresses to scan */ @@ -69,6 +70,31 @@ static const struct i2c_device_id tmp421_id[] = { }; MODULE_DEVICE_TABLE(i2c, tmp421_id); +static const struct of_device_id tmp421_of_match[] = { + { + .compatible = "ti,tmp421", + .data = (void *)2 + }, + { + .compatible = "ti,tmp422", + .data = (void *)3 + }, + { + .compatible = "ti,tmp423", + .data = (void *)4 + }, + { + .compatible = "ti,tmp441", + .data = (void *)2 + }, + { + .compatible = "ti,tmp422", + .data = (void *)3 + }, + { }, +}; +MODULE_DEVICE_TABLE(of, tmp421_of_match); + struct tmp421_data { struct i2c_client *client; struct mutex update_lock; @@ -78,7 +104,7 @@ struct tmp421_data { struct hwmon_chip_info chip; char valid; unsigned long last_updated; - int channels; + unsigned long channels; u8 config; s16 temp[4]; }; @@ -272,7 +298,11 @@ static int tmp421_probe(struct i2c_client *client, return -ENOMEM; mutex_init(&data->update_lock); - data->channels = id->driver_data; + if (client->dev.of_node) + data->channels = (unsigned long) + of_device_get_match_data(&client->dev); + else + data->channels = id->driver_data; data->client = client; err = tmp421_init_client(client); @@ -301,6 +331,7 @@ static struct i2c_driver tmp421_driver = { .class = I2C_CLASS_HWMON, .driver = { .name = "tmp421", + .of_match_table = of_match_ptr(tmp421_of_match), }, .probe = tmp421_probe, .id_table = tmp421_id, diff --git a/drivers/hwmon/twl4030-madc-hwmon.c b/drivers/hwmon/twl4030-madc-hwmon.c deleted file mode 100644 index b5caf7fdb487..000000000000 --- a/drivers/hwmon/twl4030-madc-hwmon.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * - * TWL4030 MADC Hwmon driver-This driver monitors the real time - * conversion of analog signals like battery temperature, - * battery type, battery level etc. User can ask for the conversion on a - * particular channel using the sysfs nodes. - * - * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ - * J Keerthy <j-keerthy@ti.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/i2c/twl.h> -#include <linux/device.h> -#include <linux/platform_device.h> -#include <linux/i2c/twl4030-madc.h> -#include <linux/hwmon.h> -#include <linux/hwmon-sysfs.h> -#include <linux/stddef.h> -#include <linux/sysfs.h> -#include <linux/err.h> -#include <linux/types.h> - -/* - * sysfs hook function - */ -static ssize_t madc_read(struct device *dev, - struct device_attribute *devattr, char *buf) -{ - struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - struct twl4030_madc_request req = { - .channels = 1 << attr->index, - .method = TWL4030_MADC_SW2, - .type = TWL4030_MADC_WAIT, - }; - long val; - - val = twl4030_madc_conversion(&req); - if (val < 0) - return val; - - return sprintf(buf, "%d\n", req.rbuf[attr->index]); -} - -/* sysfs nodes to read individual channels from user side */ -static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, madc_read, NULL, 0); -static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, madc_read, NULL, 1); -static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, madc_read, NULL, 2); -static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, madc_read, NULL, 3); -static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, madc_read, NULL, 4); -static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, madc_read, NULL, 5); -static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO, madc_read, NULL, 6); -static SENSOR_DEVICE_ATTR(in7_input, S_IRUGO, madc_read, NULL, 7); -static SENSOR_DEVICE_ATTR(in8_input, S_IRUGO, madc_read, NULL, 8); -static SENSOR_DEVICE_ATTR(in9_input, S_IRUGO, madc_read, NULL, 9); -static SENSOR_DEVICE_ATTR(curr10_input, S_IRUGO, madc_read, NULL, 10); -static SENSOR_DEVICE_ATTR(in11_input, S_IRUGO, madc_read, NULL, 11); -static SENSOR_DEVICE_ATTR(in12_input, S_IRUGO, madc_read, NULL, 12); -static SENSOR_DEVICE_ATTR(in15_input, S_IRUGO, madc_read, NULL, 15); - -static struct attribute *twl4030_madc_attrs[] = { - &sensor_dev_attr_in0_input.dev_attr.attr, - &sensor_dev_attr_temp1_input.dev_attr.attr, - &sensor_dev_attr_in2_input.dev_attr.attr, - &sensor_dev_attr_in3_input.dev_attr.attr, - &sensor_dev_attr_in4_input.dev_attr.attr, - &sensor_dev_attr_in5_input.dev_attr.attr, - &sensor_dev_attr_in6_input.dev_attr.attr, - &sensor_dev_attr_in7_input.dev_attr.attr, - &sensor_dev_attr_in8_input.dev_attr.attr, - &sensor_dev_attr_in9_input.dev_attr.attr, - &sensor_dev_attr_curr10_input.dev_attr.attr, - &sensor_dev_attr_in11_input.dev_attr.attr, - &sensor_dev_attr_in12_input.dev_attr.attr, - &sensor_dev_attr_in15_input.dev_attr.attr, - NULL -}; -ATTRIBUTE_GROUPS(twl4030_madc); - -static int twl4030_madc_hwmon_probe(struct platform_device *pdev) -{ - struct device *hwmon; - - hwmon = devm_hwmon_device_register_with_groups(&pdev->dev, - "twl4030_madc", NULL, - twl4030_madc_groups); - return PTR_ERR_OR_ZERO(hwmon); -} - -static struct platform_driver twl4030_madc_hwmon_driver = { - .probe = twl4030_madc_hwmon_probe, - .driver = { - .name = "twl4030_madc_hwmon", - }, -}; - -module_platform_driver(twl4030_madc_hwmon_driver); - -MODULE_DESCRIPTION("TWL4030 ADC Hwmon driver"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("J Keerthy"); -MODULE_ALIAS("platform:twl4030_madc_hwmon"); diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index ab346ed142de..ad68b6d9ff17 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -135,11 +135,16 @@ superio_select(int ioreg, int ld) outb(ld, ioreg + 1); } -static inline void +static inline int superio_enter(int ioreg) { + if (!request_muxed_region(ioreg, 2, DRVNAME)) + return -EBUSY; + outb(0x87, ioreg); outb(0x87, ioreg); + + return 0; } static inline void @@ -148,6 +153,7 @@ superio_exit(int ioreg) outb(0xaa, ioreg); outb(0x02, ioreg); outb(0x02, ioreg + 1); + release_region(ioreg, 2); } /* @@ -1970,8 +1976,6 @@ w83627ehf_check_fan_inputs(const struct w83627ehf_sio_data *sio_data, return; } - superio_enter(sio_data->sioreg); - /* fan4 and fan5 share some pins with the GPIO and serial flash */ if (sio_data->kind == nct6775) { /* On NCT6775, fan4 shares pins with the fdc interface */ @@ -2013,8 +2017,6 @@ w83627ehf_check_fan_inputs(const struct w83627ehf_sio_data *sio_data, fan4min = fan4pin; } - superio_exit(sio_data->sioreg); - data->has_fan = data->has_fan_min = 0x03; /* fan1 and fan2 */ data->has_fan |= (fan3pin << 2); data->has_fan_min |= (fan3pin << 2); @@ -2352,7 +2354,11 @@ static int w83627ehf_probe(struct platform_device *pdev) w83627ehf_init_device(data, sio_data->kind); data->vrm = vid_which_vrm(); - superio_enter(sio_data->sioreg); + + err = superio_enter(sio_data->sioreg); + if (err) + goto exit_release; + /* Read VID value */ if (sio_data->kind == w83667hg || sio_data->kind == w83667hg_b || sio_data->kind == nct6775 || sio_data->kind == nct6776) { @@ -2364,8 +2370,10 @@ static int w83627ehf_probe(struct platform_device *pdev) superio_select(sio_data->sioreg, W83667HG_LD_VID); data->vid = superio_inb(sio_data->sioreg, 0xe3); err = device_create_file(dev, &dev_attr_cpu0_vid); - if (err) + if (err) { + superio_exit(sio_data->sioreg); goto exit_release; + } } else if (sio_data->kind != w83627uhg) { superio_select(sio_data->sioreg, W83627EHF_LD_HWM); if (superio_inb(sio_data->sioreg, SIO_REG_VID_CTRL) & 0x80) { @@ -2401,8 +2409,10 @@ static int w83627ehf_probe(struct platform_device *pdev) data->vid &= 0x3f; err = device_create_file(dev, &dev_attr_cpu0_vid); - if (err) + if (err) { + superio_exit(sio_data->sioreg); goto exit_release; + } } else { dev_info(dev, "VID pins in output mode, CPU VID not available\n"); @@ -2424,10 +2434,10 @@ static int w83627ehf_probe(struct platform_device *pdev) pr_info("Enabled fan debounce for chip %s\n", data->name); } - superio_exit(sio_data->sioreg); - w83627ehf_check_fan_inputs(sio_data, data); + superio_exit(sio_data->sioreg); + /* Read fan clock dividers immediately */ w83627ehf_update_fan_div_common(dev, data); @@ -2712,8 +2722,11 @@ static int __init w83627ehf_find(int sioaddr, unsigned short *addr, u16 val; const char *sio_name; + int err; - superio_enter(sioaddr); + err = superio_enter(sioaddr); + if (err) + return err; if (force_id) val = force_id; diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index feb30061123b..5901937284e7 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -107,7 +107,8 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, memcpy(scsi_req(rq)->cmd, pc->c, 12); if (drive->media == ide_tape) scsi_req(rq)->cmd[13] = REQ_IDETAPE_PC1; - error = blk_execute_rq(drive->queue, disk, rq, 0); + blk_execute_rq(drive->queue, disk, rq, 0); + error = scsi_req(rq)->result ? -EIO : 0; put_req: blk_put_request(rq); return error; @@ -454,7 +455,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) debug_log("%s: I/O error\n", drive->name); if (drive->media != ide_tape) - pc->rq->errors++; + scsi_req(pc->rq)->result++; if (scsi_req(rq)->cmd[0] == REQUEST_SENSE) { printk(KERN_ERR PFX "%s: I/O error in request " @@ -488,13 +489,13 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) drive->failed_pc = NULL; if (ata_misc_request(rq)) { - rq->errors = 0; + scsi_req(rq)->result = 0; error = 0; } else { if (blk_rq_is_passthrough(rq) && uptodate <= 0) { - if (rq->errors == 0) - rq->errors = -EIO; + if (scsi_req(rq)->result == 0) + scsi_req(rq)->result = -EIO; } error = uptodate ? 0 : -EIO; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 74f1b7dc03f7..07e5ff3a64c3 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -247,10 +247,10 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq) struct cdrom_info *info = drive->driver_data; - if (!rq->errors) + if (!scsi_req(rq)->result) info->write_timeout = jiffies + ATAPI_WAIT_WRITE_BUSY; - rq->errors = 1; + scsi_req(rq)->result = 1; if (time_after(jiffies, info->write_timeout)) return 0; @@ -294,8 +294,8 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) } /* if we have an error, pass CHECK_CONDITION as the SCSI status byte */ - if (blk_rq_is_scsi(rq) && !rq->errors) - rq->errors = SAM_STAT_CHECK_CONDITION; + if (blk_rq_is_scsi(rq) && !scsi_req(rq)->result) + scsi_req(rq)->result = SAM_STAT_CHECK_CONDITION; if (blk_noretry_request(rq)) do_end_request = 1; @@ -325,7 +325,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * Arrange to retry the request but be sure to give up if we've * retried too many times. */ - if (++rq->errors > ERROR_MAX) + if (++scsi_req(rq)->result > ERROR_MAX) do_end_request = 1; break; case ILLEGAL_REQUEST: @@ -372,7 +372,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) /* go to the default handler for other errors */ ide_error(drive, "cdrom_decode_status", stat); return 1; - } else if (++rq->errors > ERROR_MAX) + } else if (++scsi_req(rq)->result > ERROR_MAX) /* we've racked up too many retries, abort */ do_end_request = 1; } @@ -452,7 +452,8 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, } } - error = blk_execute_rq(drive->queue, info->disk, rq, 0); + blk_execute_rq(drive->queue, info->disk, rq, 0); + error = scsi_req(rq)->result ? -EIO : 0; if (buffer) *bufflen = scsi_req(rq)->resid_len; @@ -683,8 +684,8 @@ out_end: if (cmd->nleft == 0) uptodate = 1; } else { - if (uptodate <= 0 && rq->errors == 0) - rq->errors = -EIO; + if (uptodate <= 0 && scsi_req(rq)->result == 0) + scsi_req(rq)->result = -EIO; } if (uptodate == 0 && rq->bio) @@ -1379,7 +1380,7 @@ static int ide_cdrom_prep_pc(struct request *rq) * appropriate action */ if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) { - rq->errors = ILLEGAL_REQUEST; + scsi_req(rq)->result = ILLEGAL_REQUEST; return BLKPREP_KILL; } diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c index 9fcefbc8425e..55cd736c39c6 100644 --- a/drivers/ide/ide-cd_ioctl.c +++ b/drivers/ide/ide-cd_ioctl.c @@ -307,7 +307,8 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi) scsi_req_init(rq); ide_req(rq)->type = ATA_PRIV_MISC; rq->rq_flags = RQF_QUIET; - ret = blk_execute_rq(drive->queue, cd->disk, rq, 0); + blk_execute_rq(drive->queue, cd->disk, rq, 0); + ret = scsi_req(rq)->result ? -EIO : 0; blk_put_request(rq); /* * A reset will unlock the door. If it was previously locked, diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index a45dda5386e4..9b69c32ee560 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -173,8 +173,8 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, *(int *)&scsi_req(rq)->cmd[1] = arg; rq->special = setting->set; - if (blk_execute_rq(q, NULL, rq, 0)) - ret = rq->errors; + blk_execute_rq(q, NULL, rq, 0); + ret = scsi_req(rq)->result; blk_put_request(rq); return ret; @@ -186,7 +186,7 @@ ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq) err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]); if (err) - rq->errors = err; - ide_complete_rq(drive, err, blk_rq_bytes(rq)); + scsi_req(rq)->result = err; + ide_complete_rq(drive, 0, blk_rq_bytes(rq)); return ide_stopped; } diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 186159715b71..7c06237f3479 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -470,7 +470,6 @@ ide_devset_get(multcount, mult_count); static int set_multcount(ide_drive_t *drive, int arg) { struct request *rq; - int error; if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff)) return -EINVAL; @@ -484,7 +483,7 @@ static int set_multcount(ide_drive_t *drive, int arg) drive->mult_req = arg; drive->special_flags |= IDE_SFLAG_SET_MULTMODE; - error = blk_execute_rq(drive->queue, NULL, rq, 0); + blk_execute_rq(drive->queue, NULL, rq, 0); blk_put_request(rq); return (drive->mult_count == arg) ? 0 : -EIO; diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 17a65ac56491..51c81223e56d 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -490,7 +490,7 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) * make sure request is sane */ if (hwif->rq) - hwif->rq->errors = 0; + scsi_req(hwif->rq)->result = 0; return ret; } diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index cf3af6840368..4b7ffd7d158d 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -12,7 +12,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, if ((stat & ATA_BUSY) || ((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) { /* other bits are useless when BUSY */ - rq->errors |= ERROR_RESET; + scsi_req(rq)->result |= ERROR_RESET; } else if (stat & ATA_ERR) { /* err has different meaning on cdrom and tape */ if (err == ATA_ABORTED) { @@ -25,10 +25,10 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, drive->crc_count++; } else if (err & (ATA_BBK | ATA_UNC)) { /* retries won't help these */ - rq->errors = ERROR_MAX; + scsi_req(rq)->result = ERROR_MAX; } else if (err & ATA_TRK0NF) { /* help it find track zero */ - rq->errors |= ERROR_RECAL; + scsi_req(rq)->result |= ERROR_RECAL; } } @@ -39,23 +39,23 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, ide_pad_transfer(drive, READ, nsect * SECTOR_SIZE); } - if (rq->errors >= ERROR_MAX || blk_noretry_request(rq)) { + if (scsi_req(rq)->result >= ERROR_MAX || blk_noretry_request(rq)) { ide_kill_rq(drive, rq); return ide_stopped; } if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ)) - rq->errors |= ERROR_RESET; + scsi_req(rq)->result |= ERROR_RESET; - if ((rq->errors & ERROR_RESET) == ERROR_RESET) { - ++rq->errors; + if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) { + ++scsi_req(rq)->result; return ide_do_reset(drive); } - if ((rq->errors & ERROR_RECAL) == ERROR_RECAL) + if ((scsi_req(rq)->result & ERROR_RECAL) == ERROR_RECAL) drive->special_flags |= IDE_SFLAG_RECALIBRATE; - ++rq->errors; + ++scsi_req(rq)->result; return ide_stopped; } @@ -68,7 +68,7 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, if ((stat & ATA_BUSY) || ((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) { /* other bits are useless when BUSY */ - rq->errors |= ERROR_RESET; + scsi_req(rq)->result |= ERROR_RESET; } else { /* add decoding error stuff */ } @@ -77,14 +77,14 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, /* force an abort */ hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE); - if (rq->errors >= ERROR_MAX) { + if (scsi_req(rq)->result >= ERROR_MAX) { ide_kill_rq(drive, rq); } else { - if ((rq->errors & ERROR_RESET) == ERROR_RESET) { - ++rq->errors; + if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) { + ++scsi_req(rq)->result; return ide_do_reset(drive); } - ++rq->errors; + ++scsi_req(rq)->result; } return ide_stopped; @@ -130,11 +130,11 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat) if (cmd) ide_complete_cmd(drive, cmd, stat, err); } else if (ata_pm_request(rq)) { - rq->errors = 1; + scsi_req(rq)->result = 1; ide_complete_pm_rq(drive, rq); return ide_stopped; } - rq->errors = err; + scsi_req(rq)->result = err; ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq)); return ide_stopped; } @@ -149,8 +149,8 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err) if (rq && ata_misc_request(rq) && scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) { - if (err <= 0 && rq->errors == 0) - rq->errors = -EIO; + if (err <= 0 && scsi_req(rq)->result == 0) + scsi_req(rq)->result = -EIO; ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq)); } } diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index a69e8013f1df..8ac6048cd2df 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -98,7 +98,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc) } if (ata_misc_request(rq)) - rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL; + scsi_req(rq)->result = uptodate ? 0 : IDE_DRV_ERROR_GENERAL; return uptodate; } @@ -239,7 +239,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, ? rq->rq_disk->disk_name : "dev?")); - if (rq->errors >= ERROR_MAX) { + if (scsi_req(rq)->result >= ERROR_MAX) { if (drive->failed_pc) { ide_floppy_report_error(floppy, drive->failed_pc); drive->failed_pc = NULL; @@ -247,7 +247,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, printk(KERN_ERR PFX "%s: I/O error\n", drive->name); if (ata_misc_request(rq)) { - rq->errors = 0; + scsi_req(rq)->result = 0; ide_complete_rq(drive, 0, blk_rq_bytes(rq)); return ide_stopped; } else @@ -301,8 +301,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, return ide_floppy_issue_pc(drive, &cmd, pc); out_end: drive->failed_pc = NULL; - if (blk_rq_is_passthrough(rq) && rq->errors == 0) - rq->errors = -EIO; + if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0) + scsi_req(rq)->result = -EIO; ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); return ide_stopped; } diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 043b1fb963cb..45b3f41a43d4 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -141,12 +141,12 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq) drive->failed_pc = NULL; if ((media == ide_floppy || media == ide_tape) && drv_req) { - rq->errors = 0; + scsi_req(rq)->result = 0; } else { if (media == ide_tape) - rq->errors = IDE_DRV_ERROR_GENERAL; - else if (blk_rq_is_passthrough(rq) && rq->errors == 0) - rq->errors = -EIO; + scsi_req(rq)->result = IDE_DRV_ERROR_GENERAL; + else if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0) + scsi_req(rq)->result = -EIO; } ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); @@ -271,7 +271,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, #ifdef DEBUG printk("%s: DRIVE_CMD (null)\n", drive->name); #endif - rq->errors = 0; + scsi_req(rq)->result = 0; ide_complete_rq(drive, 0, blk_rq_bytes(rq)); return ide_stopped; diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index 248a3e0ceb46..8c0d17297a7a 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -128,7 +128,8 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg) rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); scsi_req_init(rq); ide_req(rq)->type = ATA_PRIV_TASKFILE; - err = blk_execute_rq(drive->queue, NULL, rq, 0); + blk_execute_rq(drive->queue, NULL, rq, 0); + err = scsi_req(rq)->result ? -EIO : 0; blk_put_request(rq); return err; @@ -227,8 +228,8 @@ static int generic_drive_reset(ide_drive_t *drive) ide_req(rq)->type = ATA_PRIV_MISC; scsi_req(rq)->cmd_len = 1; scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET; - if (blk_execute_rq(drive->queue, NULL, rq, 1)) - ret = rq->errors; + blk_execute_rq(drive->queue, NULL, rq, 1); + ret = scsi_req(rq)->result; blk_put_request(rq); return ret; } diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 101aed9a61ca..94e3107f59b9 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -37,7 +37,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; rq->special = &timeout; - rc = blk_execute_rq(q, NULL, rq, 1); + blk_execute_rq(q, NULL, rq, 1); + rc = scsi_req(rq)->result ? -EIO : 0; blk_put_request(rq); if (rc) goto out; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index ec951be4b0c8..0977fc1f40ce 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -27,7 +27,8 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) mesg.event = PM_EVENT_FREEZE; rqpm.pm_state = mesg.event; - ret = blk_execute_rq(drive->queue, NULL, rq, 0); + blk_execute_rq(drive->queue, NULL, rq, 0); + ret = scsi_req(rq)->result ? -EIO : 0; blk_put_request(rq); if (ret == 0 && ide_port_acpi(hwif)) { @@ -55,8 +56,8 @@ static int ide_pm_execute_rq(struct request *rq) spin_lock_irq(q->queue_lock); if (unlikely(blk_queue_dying(q))) { rq->rq_flags |= RQF_QUIET; - rq->errors = -ENXIO; - __blk_end_request_all(rq, rq->errors); + scsi_req(rq)->result = -ENXIO; + __blk_end_request_all(rq, 0); spin_unlock_irq(q->queue_lock); return -ENXIO; } @@ -66,7 +67,7 @@ static int ide_pm_execute_rq(struct request *rq) wait_for_completion_io(&wait); - return rq->errors ? -EIO : 0; + return scsi_req(rq)->result ? -EIO : 0; } int generic_ide_resume(struct device *dev) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index d8a552b47718..a0651f948b76 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -366,7 +366,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc) err = pc->error; } } - rq->errors = err; + scsi_req(rq)->result = err; return uptodate; } @@ -879,7 +879,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) tape->valid = 0; ret = size; - if (rq->errors == IDE_DRV_ERROR_GENERAL) + if (scsi_req(rq)->result == IDE_DRV_ERROR_GENERAL) ret = -EIO; out_put: blk_put_request(rq); diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 4c0007cb74e3..d71199d23c9e 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -287,7 +287,7 @@ static void ide_pio_datablock(ide_drive_t *drive, struct ide_cmd *cmd, u8 saved_io_32bit = drive->io_32bit; if (cmd->tf_flags & IDE_TFLAG_FS) - cmd->rq->errors = 0; + scsi_req(cmd->rq)->result = 0; if (cmd->tf_flags & IDE_TFLAG_IO_16BIT) drive->io_32bit = 0; @@ -329,7 +329,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat) u8 set_xfer = !!(cmd->tf_flags & IDE_TFLAG_SET_XFER); ide_complete_cmd(drive, cmd, stat, err); - rq->errors = err; + scsi_req(rq)->result = err; if (err == 0 && set_xfer) { ide_set_xfer_rate(drive, nsect); @@ -452,8 +452,8 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, rq->special = cmd; cmd->rq = rq; - error = blk_execute_rq(drive->queue, NULL, rq, 0); - + blk_execute_rq(drive->queue, NULL, rq, 0); + error = scsi_req(rq)->result ? -EIO : 0; put_req: blk_put_request(rq); return error; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 91cbe86b25c8..fcbed35e95a8 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -817,6 +817,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count) rx_wr->sg_list = &rx_desc->rx_sg; rx_wr->num_sge = 1; rx_wr->next = rx_wr + 1; + rx_desc->in_use = false; } rx_wr--; rx_wr->next = NULL; /* mark end of work requests list */ @@ -835,6 +836,15 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) struct ib_recv_wr *rx_wr_failed, rx_wr; int ret; + if (!rx_desc->in_use) { + /* + * if the descriptor is not in-use we already reposted it + * for recv, so just silently return + */ + return 0; + } + + rx_desc->in_use = false; rx_wr.wr_cqe = &rx_desc->rx_cqe; rx_wr.sg_list = &rx_desc->rx_sg; rx_wr.num_sge = 1; @@ -1397,6 +1407,8 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc) return; } + rx_desc->in_use = true; + ib_dma_sync_single_for_cpu(ib_dev, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); @@ -1659,10 +1671,23 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr); isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn); - if (ret) - transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0); - else - isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd); + if (ret) { + /* + * transport_generic_request_failure() expects to have + * plus two references to handle queue-full, so re-add + * one here as target-core will have already dropped + * it after the first isert_put_datain() callback. + */ + kref_get(&cmd->cmd_kref); + transport_generic_request_failure(cmd, cmd->pi_err); + } else { + /* + * XXX: isert_put_response() failure is not retried. + */ + ret = isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd); + if (ret) + pr_warn_ratelimited("isert_put_response() ret: %d\n", ret); + } } static void @@ -1699,13 +1724,15 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; spin_unlock_bh(&cmd->istate_lock); - if (ret) { - target_put_sess_cmd(se_cmd); - transport_send_check_condition_and_sense(se_cmd, - se_cmd->pi_err, 0); - } else { + /* + * transport_generic_request_failure() will drop the extra + * se_cmd->cmd_kref reference after T10-PI error, and handle + * any non-zero ->queue_status() callback error retries. + */ + if (ret) + transport_generic_request_failure(se_cmd, se_cmd->pi_err); + else target_execute_cmd(se_cmd); - } } static void @@ -2171,26 +2198,28 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) chain_wr = &isert_cmd->tx_desc.send_wr; } - isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr); - isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", isert_cmd); - return 1; + rc = isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr); + isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ rc: %d\n", + isert_cmd, rc); + return rc; } static int isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) { struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + int ret; isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n", isert_cmd, cmd->se_cmd.data_length, cmd->write_data_done); isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done; - isert_rdma_rw_ctx_post(isert_cmd, conn->context, - &isert_cmd->tx_desc.tx_cqe, NULL); + ret = isert_rdma_rw_ctx_post(isert_cmd, conn->context, + &isert_cmd->tx_desc.tx_cqe, NULL); - isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", - isert_cmd); - return 0; + isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE rc: %d\n", + isert_cmd, ret); + return ret; } static int diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index c02ada57d7f5..87d994de8c91 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -60,7 +60,7 @@ #define ISER_RX_PAD_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 4096 - \ (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \ - sizeof(struct ib_cqe))) + sizeof(struct ib_cqe) + sizeof(bool))) #define ISCSI_ISER_SG_TABLESIZE 256 @@ -85,6 +85,7 @@ struct iser_rx_desc { u64 dma_addr; struct ib_sge rx_sg; struct ib_cqe rx_cqe; + bool in_use; char pad[ISER_RX_PAD_SIZE]; } __packed; diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 155fcb3b6230..153b1ee13e03 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -202,6 +202,7 @@ static const struct xpad_device { { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 }, { 0x1532, 0x0037, "Razer Sabertooth", 0, XTYPE_XBOX360 }, + { 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE }, { 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f0a, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f10, "Batarang Xbox 360 controller", 0, XTYPE_XBOX360 }, @@ -326,6 +327,7 @@ static struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x1430), /* RedOctane X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x146b), /* BigBen Interactive Controllers */ XPAD_XBOX360_VENDOR(0x1532), /* Razer Sabertooth */ + XPAD_XBOXONE_VENDOR(0x1532), /* Razer Wildcat */ XPAD_XBOX360_VENDOR(0x15e4), /* Numark X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index efc8ec342351..e73d968023f7 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1118,6 +1118,7 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse, * Asus UX32VD 0x361f02 00, 15, 0e clickpad * Avatar AVIU-145A2 0x361f00 ? clickpad * Fujitsu LIFEBOOK E544 0x470f00 d0, 12, 09 2 hw buttons + * Fujitsu LIFEBOOK E547 0x470f00 50, 12, 09 2 hw buttons * Fujitsu LIFEBOOK E554 0x570f01 40, 14, 0c 2 hw buttons * Fujitsu T725 0x470f01 05, 12, 09 2 hw buttons * Fujitsu H730 0x570f00 c0, 14, 0c 3 hw buttons (**) @@ -1524,6 +1525,13 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = { }, }, { + /* Fujitsu LIFEBOOK E547 does not work with crc_enabled == 0 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E547"), + }, + }, + { /* Fujitsu LIFEBOOK E554 does not work with crc_enabled == 0 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 312bd6ca9198..09720d950686 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -620,6 +620,13 @@ static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "20046"), }, }, + { + /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), + DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"), + }, + }, { } }; diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 8162121bb1bc..595d0c95563b 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -115,6 +115,12 @@ config DW_APB_ICTL select GENERIC_IRQ_CHIP select IRQ_DOMAIN +config FARADAY_FTINTC010 + bool + select IRQ_DOMAIN + select MULTI_IRQ_HANDLER + select SPARSE_IRQ + config HISILICON_IRQ_MBIGEN bool select ARM_GIC_V3 diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 152bc40b6762..b64c59b838a0 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_ATH79) += irq-ath79-misc.o obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2836.o obj-$(CONFIG_ARCH_EXYNOS) += exynos-combiner.o -obj-$(CONFIG_ARCH_GEMINI) += irq-gemini.o +obj-$(CONFIG_FARADAY_FTINTC010) += irq-ftintc010.o obj-$(CONFIG_ARCH_HIP04) += irq-hip04.o obj-$(CONFIG_ARCH_LPC32XX) += irq-lpc32xx.o obj-$(CONFIG_ARCH_MMP) += irq-mmp.o @@ -16,7 +16,6 @@ obj-$(CONFIG_ARCH_S3C24XX) += irq-s3c24xx.o obj-$(CONFIG_DW_APB_ICTL) += irq-dw-apb-ictl.o obj-$(CONFIG_METAG) += irq-metag-ext.o obj-$(CONFIG_METAG_PERFCOUNTER_IRQS) += irq-metag.o -obj-$(CONFIG_ARCH_MOXART) += irq-moxart.o obj-$(CONFIG_CLPS711X_IRQCHIP) += irq-clps711x.o obj-$(CONFIG_OR1K_PIC) += irq-or1k-pic.o obj-$(CONFIG_ORION_IRQCHIP) += irq-orion.o @@ -62,7 +61,7 @@ obj-$(CONFIG_BCM7120_L2_IRQ) += irq-bcm7120-l2.o obj-$(CONFIG_BRCMSTB_L2_IRQ) += irq-brcmstb-l2.o obj-$(CONFIG_KEYSTONE_IRQ) += irq-keystone.o obj-$(CONFIG_MIPS_GIC) += irq-mips-gic.o -obj-$(CONFIG_ARCH_MEDIATEK) += irq-mtk-sysirq.o +obj-$(CONFIG_ARCH_MEDIATEK) += irq-mtk-sysirq.o irq-mtk-cirq.o obj-$(CONFIG_ARCH_DIGICOLOR) += irq-digicolor.o obj-$(CONFIG_RENESAS_H8300H_INTC) += irq-renesas-h8300h.o obj-$(CONFIG_RENESAS_H8S_INTC) += irq-renesas-h8s.o diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index 2a624d87a035..c04ee9a23d09 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -150,6 +150,8 @@ static int aic5_set_type(struct irq_data *d, unsigned type) } #ifdef CONFIG_PM +static u32 *smr_cache; + static void aic5_suspend(struct irq_data *d) { struct irq_domain *domain = d->domain; @@ -159,6 +161,12 @@ static void aic5_suspend(struct irq_data *d) int i; u32 mask; + if (smr_cache) + for (i = 0; i < domain->revmap_size; i++) { + irq_reg_writel(bgc, i, AT91_AIC5_SSR); + smr_cache[i] = irq_reg_readl(bgc, AT91_AIC5_SMR); + } + irq_gc_lock(bgc); for (i = 0; i < dgc->irqs_per_chip; i++) { mask = 1 << i; @@ -184,9 +192,21 @@ static void aic5_resume(struct irq_data *d) u32 mask; irq_gc_lock(bgc); + + if (smr_cache) { + irq_reg_writel(bgc, 0xffffffff, AT91_AIC5_SPU); + for (i = 0; i < domain->revmap_size; i++) { + irq_reg_writel(bgc, i, AT91_AIC5_SSR); + irq_reg_writel(bgc, i, AT91_AIC5_SVR); + irq_reg_writel(bgc, smr_cache[i], AT91_AIC5_SMR); + } + } + for (i = 0; i < dgc->irqs_per_chip; i++) { mask = 1 << i; - if ((mask & gc->mask_cache) == (mask & gc->wake_active)) + + if (!smr_cache && + ((mask & gc->mask_cache) == (mask & gc->wake_active))) continue; irq_reg_writel(bgc, i + gc->irq_base, AT91_AIC5_SSR); @@ -342,6 +362,13 @@ static int __init aic5_of_init(struct device_node *node, static int __init sama5d2_aic5_of_init(struct device_node *node, struct device_node *parent) { +#ifdef CONFIG_PM + smr_cache = kcalloc(DIV_ROUND_UP(NR_SAMA5D2_IRQS, 32) * 32, + sizeof(*smr_cache), GFP_KERNEL); + if (!smr_cache) + return -ENOMEM; +#endif + return aic5_of_init(node, parent, NR_SAMA5D2_IRQS); } IRQCHIP_DECLARE(sama5d2_aic5, "atmel,sama5d2-aic", sama5d2_aic5_of_init); diff --git a/drivers/irqchip/irq-ftintc010.c b/drivers/irqchip/irq-ftintc010.c new file mode 100644 index 000000000000..cd2dc8bbbe9c --- /dev/null +++ b/drivers/irqchip/irq-ftintc010.c @@ -0,0 +1,194 @@ +/* + * irqchip for the Faraday Technology FTINTC010 Copyright (C) 2017 Linus + * Walleij <linus.walleij@linaro.org> + * + * Based on arch/arm/mach-gemini/irq.c + * Copyright (C) 2001-2006 Storlink, Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@gmail.com> + */ +#include <linux/bitops.h> +#include <linux/irq.h> +#include <linux/io.h> +#include <linux/irqchip.h> +#include <linux/irqchip/versatile-fpga.h> +#include <linux/irqdomain.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/cpu.h> + +#include <asm/exception.h> +#include <asm/mach/irq.h> + +#define FT010_NUM_IRQS 32 + +#define FT010_IRQ_SOURCE(base_addr) (base_addr + 0x00) +#define FT010_IRQ_MASK(base_addr) (base_addr + 0x04) +#define FT010_IRQ_CLEAR(base_addr) (base_addr + 0x08) +/* Selects level- or edge-triggered */ +#define FT010_IRQ_MODE(base_addr) (base_addr + 0x0C) +/* Selects active low/high or falling/rising edge */ +#define FT010_IRQ_POLARITY(base_addr) (base_addr + 0x10) +#define FT010_IRQ_STATUS(base_addr) (base_addr + 0x14) +#define FT010_FIQ_SOURCE(base_addr) (base_addr + 0x20) +#define FT010_FIQ_MASK(base_addr) (base_addr + 0x24) +#define FT010_FIQ_CLEAR(base_addr) (base_addr + 0x28) +#define FT010_FIQ_MODE(base_addr) (base_addr + 0x2C) +#define FT010_FIQ_POLARITY(base_addr) (base_addr + 0x30) +#define FT010_FIQ_STATUS(base_addr) (base_addr + 0x34) + +/** + * struct ft010_irq_data - irq data container for the Faraday IRQ controller + * @base: memory offset in virtual memory + * @chip: chip container for this instance + * @domain: IRQ domain for this instance + */ +struct ft010_irq_data { + void __iomem *base; + struct irq_chip chip; + struct irq_domain *domain; +}; + +static void ft010_irq_mask(struct irq_data *d) +{ + struct ft010_irq_data *f = irq_data_get_irq_chip_data(d); + unsigned int mask; + + mask = readl(FT010_IRQ_MASK(f->base)); + mask &= ~BIT(irqd_to_hwirq(d)); + writel(mask, FT010_IRQ_MASK(f->base)); +} + +static void ft010_irq_unmask(struct irq_data *d) +{ + struct ft010_irq_data *f = irq_data_get_irq_chip_data(d); + unsigned int mask; + + mask = readl(FT010_IRQ_MASK(f->base)); + mask |= BIT(irqd_to_hwirq(d)); + writel(mask, FT010_IRQ_MASK(f->base)); +} + +static void ft010_irq_ack(struct irq_data *d) +{ + struct ft010_irq_data *f = irq_data_get_irq_chip_data(d); + + writel(BIT(irqd_to_hwirq(d)), FT010_IRQ_CLEAR(f->base)); +} + +static int ft010_irq_set_type(struct irq_data *d, unsigned int trigger) +{ + struct ft010_irq_data *f = irq_data_get_irq_chip_data(d); + int offset = irqd_to_hwirq(d); + u32 mode, polarity; + + mode = readl(FT010_IRQ_MODE(f->base)); + polarity = readl(FT010_IRQ_POLARITY(f->base)); + + if (trigger & (IRQ_TYPE_LEVEL_LOW)) { + irq_set_handler_locked(d, handle_level_irq); + mode &= ~BIT(offset); + polarity |= BIT(offset); + } else if (trigger & (IRQ_TYPE_LEVEL_HIGH)) { + irq_set_handler_locked(d, handle_level_irq); + mode &= ~BIT(offset); + polarity &= ~BIT(offset); + } else if (trigger & IRQ_TYPE_EDGE_FALLING) { + irq_set_handler_locked(d, handle_edge_irq); + mode |= BIT(offset); + polarity |= BIT(offset); + } else if (trigger & IRQ_TYPE_EDGE_RISING) { + irq_set_handler_locked(d, handle_edge_irq); + mode |= BIT(offset); + polarity &= ~BIT(offset); + } else { + irq_set_handler_locked(d, handle_bad_irq); + pr_warn("Faraday IRQ: no supported trigger selected for line %d\n", + offset); + } + + writel(mode, FT010_IRQ_MODE(f->base)); + writel(polarity, FT010_IRQ_POLARITY(f->base)); + + return 0; +} + +static struct irq_chip ft010_irq_chip = { + .name = "FTINTC010", + .irq_ack = ft010_irq_ack, + .irq_mask = ft010_irq_mask, + .irq_unmask = ft010_irq_unmask, + .irq_set_type = ft010_irq_set_type, +}; + +/* Local static for the IRQ entry call */ +static struct ft010_irq_data firq; + +asmlinkage void __exception_irq_entry ft010_irqchip_handle_irq(struct pt_regs *regs) +{ + struct ft010_irq_data *f = &firq; + int irq; + u32 status; + + while ((status = readl(FT010_IRQ_STATUS(f->base)))) { + irq = ffs(status) - 1; + handle_domain_irq(f->domain, irq, regs); + } +} + +static int ft010_irqdomain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + struct ft010_irq_data *f = d->host_data; + + irq_set_chip_data(irq, f); + /* All IRQs should set up their type, flags as bad by default */ + irq_set_chip_and_handler(irq, &ft010_irq_chip, handle_bad_irq); + irq_set_probe(irq); + + return 0; +} + +static void ft010_irqdomain_unmap(struct irq_domain *d, unsigned int irq) +{ + irq_set_chip_and_handler(irq, NULL, NULL); + irq_set_chip_data(irq, NULL); +} + +static const struct irq_domain_ops ft010_irqdomain_ops = { + .map = ft010_irqdomain_map, + .unmap = ft010_irqdomain_unmap, + .xlate = irq_domain_xlate_onetwocell, +}; + +int __init ft010_of_init_irq(struct device_node *node, + struct device_node *parent) +{ + struct ft010_irq_data *f = &firq; + + /* + * Disable the idle handler by default since it is buggy + * For more info see arch/arm/mach-gemini/idle.c + */ + cpu_idle_poll_ctrl(true); + + f->base = of_iomap(node, 0); + WARN(!f->base, "unable to map gemini irq registers\n"); + + /* Disable all interrupts */ + writel(0, FT010_IRQ_MASK(f->base)); + writel(0, FT010_FIQ_MASK(f->base)); + + f->domain = irq_domain_add_simple(node, FT010_NUM_IRQS, 0, + &ft010_irqdomain_ops, f); + set_handle_irq(ft010_irqchip_handle_irq); + + return 0; +} +IRQCHIP_DECLARE(faraday, "faraday,ftintc010", + ft010_of_init_irq); +IRQCHIP_DECLARE(gemini, "cortina,gemini-interrupt-controller", + ft010_of_init_irq); +IRQCHIP_DECLARE(moxa, "moxa,moxart-ic", + ft010_of_init_irq); diff --git a/drivers/irqchip/irq-gemini.c b/drivers/irqchip/irq-gemini.c deleted file mode 100644 index 495224c743ee..000000000000 --- a/drivers/irqchip/irq-gemini.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * irqchip for the Cortina Systems Gemini Copyright (C) 2017 Linus - * Walleij <linus.walleij@linaro.org> - * - * Based on arch/arm/mach-gemini/irq.c - * Copyright (C) 2001-2006 Storlink, Corp. - * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> - */ -#include <linux/bitops.h> -#include <linux/irq.h> -#include <linux/io.h> -#include <linux/irqchip.h> -#include <linux/irqchip/versatile-fpga.h> -#include <linux/irqdomain.h> -#include <linux/module.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> -#include <linux/cpu.h> - -#include <asm/exception.h> -#include <asm/mach/irq.h> - -#define GEMINI_NUM_IRQS 32 - -#define GEMINI_IRQ_SOURCE(base_addr) (base_addr + 0x00) -#define GEMINI_IRQ_MASK(base_addr) (base_addr + 0x04) -#define GEMINI_IRQ_CLEAR(base_addr) (base_addr + 0x08) -#define GEMINI_IRQ_MODE(base_addr) (base_addr + 0x0C) -#define GEMINI_IRQ_POLARITY(base_addr) (base_addr + 0x10) -#define GEMINI_IRQ_STATUS(base_addr) (base_addr + 0x14) -#define GEMINI_FIQ_SOURCE(base_addr) (base_addr + 0x20) -#define GEMINI_FIQ_MASK(base_addr) (base_addr + 0x24) -#define GEMINI_FIQ_CLEAR(base_addr) (base_addr + 0x28) -#define GEMINI_FIQ_MODE(base_addr) (base_addr + 0x2C) -#define GEMINI_FIQ_POLARITY(base_addr) (base_addr + 0x30) -#define GEMINI_FIQ_STATUS(base_addr) (base_addr + 0x34) - -/** - * struct gemini_irq_data - irq data container for the Gemini IRQ controller - * @base: memory offset in virtual memory - * @chip: chip container for this instance - * @domain: IRQ domain for this instance - */ -struct gemini_irq_data { - void __iomem *base; - struct irq_chip chip; - struct irq_domain *domain; -}; - -static void gemini_irq_mask(struct irq_data *d) -{ - struct gemini_irq_data *g = irq_data_get_irq_chip_data(d); - unsigned int mask; - - mask = readl(GEMINI_IRQ_MASK(g->base)); - mask &= ~BIT(irqd_to_hwirq(d)); - writel(mask, GEMINI_IRQ_MASK(g->base)); -} - -static void gemini_irq_unmask(struct irq_data *d) -{ - struct gemini_irq_data *g = irq_data_get_irq_chip_data(d); - unsigned int mask; - - mask = readl(GEMINI_IRQ_MASK(g->base)); - mask |= BIT(irqd_to_hwirq(d)); - writel(mask, GEMINI_IRQ_MASK(g->base)); -} - -static void gemini_irq_ack(struct irq_data *d) -{ - struct gemini_irq_data *g = irq_data_get_irq_chip_data(d); - - writel(BIT(irqd_to_hwirq(d)), GEMINI_IRQ_CLEAR(g->base)); -} - -static int gemini_irq_set_type(struct irq_data *d, unsigned int trigger) -{ - struct gemini_irq_data *g = irq_data_get_irq_chip_data(d); - int offset = irqd_to_hwirq(d); - u32 mode, polarity; - - mode = readl(GEMINI_IRQ_MODE(g->base)); - polarity = readl(GEMINI_IRQ_POLARITY(g->base)); - - if (trigger & (IRQ_TYPE_LEVEL_HIGH)) { - irq_set_handler_locked(d, handle_level_irq); - /* Disable edge detection */ - mode &= ~BIT(offset); - polarity &= ~BIT(offset); - } else if (trigger & IRQ_TYPE_EDGE_RISING) { - irq_set_handler_locked(d, handle_edge_irq); - mode |= BIT(offset); - polarity |= BIT(offset); - } else if (trigger & IRQ_TYPE_EDGE_FALLING) { - irq_set_handler_locked(d, handle_edge_irq); - mode |= BIT(offset); - polarity &= ~BIT(offset); - } else { - irq_set_handler_locked(d, handle_bad_irq); - pr_warn("GEMINI IRQ: no supported trigger selected for line %d\n", - offset); - } - - writel(mode, GEMINI_IRQ_MODE(g->base)); - writel(polarity, GEMINI_IRQ_POLARITY(g->base)); - - return 0; -} - -static struct irq_chip gemini_irq_chip = { - .name = "GEMINI", - .irq_ack = gemini_irq_ack, - .irq_mask = gemini_irq_mask, - .irq_unmask = gemini_irq_unmask, - .irq_set_type = gemini_irq_set_type, -}; - -/* Local static for the IRQ entry call */ -static struct gemini_irq_data girq; - -asmlinkage void __exception_irq_entry gemini_irqchip_handle_irq(struct pt_regs *regs) -{ - struct gemini_irq_data *g = &girq; - int irq; - u32 status; - - while ((status = readl(GEMINI_IRQ_STATUS(g->base)))) { - irq = ffs(status) - 1; - handle_domain_irq(g->domain, irq, regs); - } -} - -static int gemini_irqdomain_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hwirq) -{ - struct gemini_irq_data *g = d->host_data; - - irq_set_chip_data(irq, g); - /* All IRQs should set up their type, flags as bad by default */ - irq_set_chip_and_handler(irq, &gemini_irq_chip, handle_bad_irq); - irq_set_probe(irq); - - return 0; -} - -static void gemini_irqdomain_unmap(struct irq_domain *d, unsigned int irq) -{ - irq_set_chip_and_handler(irq, NULL, NULL); - irq_set_chip_data(irq, NULL); -} - -static const struct irq_domain_ops gemini_irqdomain_ops = { - .map = gemini_irqdomain_map, - .unmap = gemini_irqdomain_unmap, - .xlate = irq_domain_xlate_onetwocell, -}; - -int __init gemini_of_init_irq(struct device_node *node, - struct device_node *parent) -{ - struct gemini_irq_data *g = &girq; - - /* - * Disable the idle handler by default since it is buggy - * For more info see arch/arm/mach-gemini/idle.c - */ - cpu_idle_poll_ctrl(true); - - g->base = of_iomap(node, 0); - WARN(!g->base, "unable to map gemini irq registers\n"); - - /* Disable all interrupts */ - writel(0, GEMINI_IRQ_MASK(g->base)); - writel(0, GEMINI_FIQ_MASK(g->base)); - - g->domain = irq_domain_add_simple(node, GEMINI_NUM_IRQS, 0, - &gemini_irqdomain_ops, g); - set_handle_irq(gemini_irqchip_handle_irq); - - return 0; -} -IRQCHIP_DECLARE(gemini, "cortina,gemini-interrupt-controller", - gemini_of_init_irq); diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c index 470b4aa7d62c..9e9dda33eb17 100644 --- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c @@ -15,6 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/acpi_iort.h> #include <linux/device.h> #include <linux/msi.h> #include <linux/of.h> @@ -24,15 +25,11 @@ static struct irq_chip its_pmsi_irq_chip = { .name = "ITS-pMSI", }; -static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev, - int nvec, msi_alloc_info_t *info) +static int of_pmsi_get_dev_id(struct irq_domain *domain, struct device *dev, + u32 *dev_id) { - struct msi_domain_info *msi_info; - u32 dev_id; int ret, index = 0; - msi_info = msi_get_domain_info(domain->parent); - /* Suck the DeviceID out of the msi-parent property */ do { struct of_phandle_args args; @@ -43,11 +40,32 @@ static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev, if (args.np == irq_domain_get_of_node(domain)) { if (WARN_ON(args.args_count != 1)) return -EINVAL; - dev_id = args.args[0]; + *dev_id = args.args[0]; break; } } while (!ret); + return ret; +} + +int __weak iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id) +{ + return -1; +} + +static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *info) +{ + struct msi_domain_info *msi_info; + u32 dev_id; + int ret; + + msi_info = msi_get_domain_info(domain->parent); + + if (dev->of_node) + ret = of_pmsi_get_dev_id(domain, dev, &dev_id); + else + ret = iort_pmsi_get_dev_id(dev, &dev_id); if (ret) return ret; @@ -73,34 +91,79 @@ static struct of_device_id its_device_id[] = { {}, }; -static int __init its_pmsi_init(void) +static int __init its_pmsi_init_one(struct fwnode_handle *fwnode, + const char *name) { - struct device_node *np; struct irq_domain *parent; + parent = irq_find_matching_fwnode(fwnode, DOMAIN_BUS_NEXUS); + if (!parent || !msi_get_domain_info(parent)) { + pr_err("%s: unable to locate ITS domain\n", name); + return -ENXIO; + } + + if (!platform_msi_create_irq_domain(fwnode, &its_pmsi_domain_info, + parent)) { + pr_err("%s: unable to create platform domain\n", name); + return -ENXIO; + } + + pr_info("Platform MSI: %s domain created\n", name); + return 0; +} + +#ifdef CONFIG_ACPI +static int __init +its_pmsi_parse_madt(struct acpi_subtable_header *header, + const unsigned long end) +{ + struct acpi_madt_generic_translator *its_entry; + struct fwnode_handle *domain_handle; + const char *node_name; + int err = -ENXIO; + + its_entry = (struct acpi_madt_generic_translator *)header; + node_name = kasprintf(GFP_KERNEL, "ITS@0x%lx", + (long)its_entry->base_address); + domain_handle = iort_find_domain_token(its_entry->translation_id); + if (!domain_handle) { + pr_err("%s: Unable to locate ITS domain handle\n", node_name); + goto out; + } + + err = its_pmsi_init_one(domain_handle, node_name); + +out: + kfree(node_name); + return err; +} + +static void __init its_pmsi_acpi_init(void) +{ + acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, + its_pmsi_parse_madt, 0); +} +#else +static inline void its_pmsi_acpi_init(void) { } +#endif + +static void __init its_pmsi_of_init(void) +{ + struct device_node *np; + for (np = of_find_matching_node(NULL, its_device_id); np; np = of_find_matching_node(np, its_device_id)) { if (!of_property_read_bool(np, "msi-controller")) continue; - parent = irq_find_matching_host(np, DOMAIN_BUS_NEXUS); - if (!parent || !msi_get_domain_info(parent)) { - pr_err("%s: unable to locate ITS domain\n", - np->full_name); - continue; - } - - if (!platform_msi_create_irq_domain(of_node_to_fwnode(np), - &its_pmsi_domain_info, - parent)) { - pr_err("%s: unable to create platform domain\n", - np->full_name); - continue; - } - - pr_info("Platform MSI: %s domain created\n", np->full_name); + its_pmsi_init_one(of_node_to_fwnode(np), np->full_name); } +} +static int __init its_pmsi_init(void) +{ + its_pmsi_of_init(); + its_pmsi_acpi_init(); return 0; } early_initcall(its_pmsi_init); diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index f77f840d2b5f..45ea193325d2 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -16,13 +16,13 @@ */ #include <linux/acpi.h> +#include <linux/acpi_iort.h> #include <linux/bitmap.h> #include <linux/cpu.h> #include <linux/delay.h> #include <linux/dma-iommu.h> #include <linux/interrupt.h> #include <linux/irqdomain.h> -#include <linux/acpi_iort.h> #include <linux/log2.h> #include <linux/mm.h> #include <linux/msi.h> diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c index 15af9a9753e5..9463f3557e82 100644 --- a/drivers/irqchip/irq-imx-gpcv2.c +++ b/drivers/irqchip/irq-imx-gpcv2.c @@ -230,6 +230,8 @@ static int __init imx_gpcv2_irqchip_init(struct device_node *node, return -ENOMEM; } + raw_spin_lock_init(&cd->rlock); + cd->gpc_base = of_iomap(node, 0); if (!cd->gpc_base) { pr_err("fsl-gpcv2: unable to map gpc registers\n"); @@ -266,6 +268,11 @@ static int __init imx_gpcv2_irqchip_init(struct device_node *node, imx_gpcv2_instance = cd; register_syscore_ops(&imx_gpcv2_syscore_ops); + /* + * Clear the OF_POPULATED flag set in of_irq_init so that + * later the GPC power domain driver will not be skipped. + */ + of_node_clear_flag(node, OF_POPULATED); return 0; } diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 03b79b061d24..d2306c821ebb 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -16,6 +16,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/acpi.h> #include <linux/interrupt.h> #include <linux/irqchip.h> #include <linux/module.h> @@ -180,7 +181,7 @@ static int mbigen_domain_translate(struct irq_domain *d, unsigned long *hwirq, unsigned int *type) { - if (is_of_node(fwspec->fwnode)) { + if (is_of_node(fwspec->fwnode) || is_acpi_device_node(fwspec->fwnode)) { if (fwspec->param_count != 2) return -EINVAL; @@ -236,27 +237,15 @@ static struct irq_domain_ops mbigen_domain_ops = { .free = irq_domain_free_irqs_common, }; -static int mbigen_device_probe(struct platform_device *pdev) +static int mbigen_of_create_domain(struct platform_device *pdev, + struct mbigen_device *mgn_chip) { - struct mbigen_device *mgn_chip; + struct device *parent; struct platform_device *child; struct irq_domain *domain; struct device_node *np; - struct device *parent; - struct resource *res; u32 num_pins; - mgn_chip = devm_kzalloc(&pdev->dev, sizeof(*mgn_chip), GFP_KERNEL); - if (!mgn_chip) - return -ENOMEM; - - mgn_chip->pdev = pdev; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mgn_chip->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(mgn_chip->base)) - return PTR_ERR(mgn_chip->base); - for_each_child_of_node(pdev->dev.of_node, np) { if (!of_property_read_bool(np, "interrupt-controller")) continue; @@ -280,6 +269,91 @@ static int mbigen_device_probe(struct platform_device *pdev) return -ENOMEM; } + return 0; +} + +#ifdef CONFIG_ACPI +static int mbigen_acpi_create_domain(struct platform_device *pdev, + struct mbigen_device *mgn_chip) +{ + struct irq_domain *domain; + u32 num_pins = 0; + int ret; + + /* + * "num-pins" is the total number of interrupt pins implemented in + * this mbigen instance, and mbigen is an interrupt controller + * connected to ITS converting wired interrupts into MSI, so we + * use "num-pins" to alloc MSI vectors which are needed by client + * devices connected to it. + * + * Here is the DSDT device node used for mbigen in firmware: + * Device(MBI0) { + * Name(_HID, "HISI0152") + * Name(_UID, Zero) + * Name(_CRS, ResourceTemplate() { + * Memory32Fixed(ReadWrite, 0xa0080000, 0x10000) + * }) + * + * Name(_DSD, Package () { + * ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + * Package () { + * Package () {"num-pins", 378} + * } + * }) + * } + */ + ret = device_property_read_u32(&pdev->dev, "num-pins", &num_pins); + if (ret || num_pins == 0) + return -EINVAL; + + domain = platform_msi_create_device_domain(&pdev->dev, num_pins, + mbigen_write_msg, + &mbigen_domain_ops, + mgn_chip); + if (!domain) + return -ENOMEM; + + return 0; +} +#else +static inline int mbigen_acpi_create_domain(struct platform_device *pdev, + struct mbigen_device *mgn_chip) +{ + return -ENODEV; +} +#endif + +static int mbigen_device_probe(struct platform_device *pdev) +{ + struct mbigen_device *mgn_chip; + struct resource *res; + int err; + + mgn_chip = devm_kzalloc(&pdev->dev, sizeof(*mgn_chip), GFP_KERNEL); + if (!mgn_chip) + return -ENOMEM; + + mgn_chip->pdev = pdev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + mgn_chip->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(mgn_chip->base)) + return PTR_ERR(mgn_chip->base); + + if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) + err = mbigen_of_create_domain(pdev, mgn_chip); + else if (ACPI_COMPANION(&pdev->dev)) + err = mbigen_acpi_create_domain(pdev, mgn_chip); + else + err = -EINVAL; + + if (err) { + dev_err(&pdev->dev, "Failed to create mbi-gen@%p irqdomain", + mgn_chip->base); + return err; + } + platform_set_drvdata(pdev, mgn_chip); return 0; } @@ -290,11 +364,17 @@ static const struct of_device_id mbigen_of_match[] = { }; MODULE_DEVICE_TABLE(of, mbigen_of_match); +static const struct acpi_device_id mbigen_acpi_match[] = { + { "HISI0152", 0 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, mbigen_acpi_match); + static struct platform_driver mbigen_platform_driver = { .driver = { .name = "Hisilicon MBIGEN-V2", - .owner = THIS_MODULE, .of_match_table = mbigen_of_match, + .acpi_match_table = ACPI_PTR(mbigen_acpi_match), }, .probe = mbigen_device_probe, }; diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index cd20df12d63d..eb7fbe159963 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -29,25 +29,12 @@ struct gic_pcpu_mask { DECLARE_BITMAP(pcpu_mask, GIC_MAX_INTRS); }; -struct gic_irq_spec { - enum { - GIC_DEVICE, - GIC_IPI - } type; - - union { - struct cpumask *ipimask; - unsigned int hwirq; - }; -}; - static unsigned long __gic_base_addr; static void __iomem *gic_base; static struct gic_pcpu_mask pcpu_masks[NR_CPUS]; static DEFINE_SPINLOCK(gic_lock); static struct irq_domain *gic_irq_domain; -static struct irq_domain *gic_dev_domain; static struct irq_domain *gic_ipi_domain; static int gic_shared_intrs; static int gic_vpes; @@ -55,6 +42,7 @@ static unsigned int gic_cpu_pin; static unsigned int timer_cpu_pin; static struct irq_chip gic_level_irq_controller, gic_edge_irq_controller; DECLARE_BITMAP(ipi_resrv, GIC_MAX_INTRS); +DECLARE_BITMAP(ipi_available, GIC_MAX_INTRS); static void __gic_irq_dispatch(void); @@ -693,132 +681,7 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq, return 0; } -static int gic_setup_dev_chip(struct irq_domain *d, unsigned int virq, - unsigned int hwirq) -{ - struct irq_chip *chip; - int err; - - if (hwirq >= GIC_SHARED_HWIRQ_BASE) { - err = irq_domain_set_hwirq_and_chip(d, virq, hwirq, - &gic_level_irq_controller, - NULL); - } else { - switch (GIC_HWIRQ_TO_LOCAL(hwirq)) { - case GIC_LOCAL_INT_TIMER: - case GIC_LOCAL_INT_PERFCTR: - case GIC_LOCAL_INT_FDC: - /* - * HACK: These are all really percpu interrupts, but - * the rest of the MIPS kernel code does not use the - * percpu IRQ API for them. - */ - chip = &gic_all_vpes_local_irq_controller; - irq_set_handler(virq, handle_percpu_irq); - break; - - default: - chip = &gic_local_irq_controller; - irq_set_handler(virq, handle_percpu_devid_irq); - irq_set_percpu_devid(virq); - break; - } - - err = irq_domain_set_hwirq_and_chip(d, virq, hwirq, - chip, NULL); - } - - return err; -} - -static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq, - unsigned int nr_irqs, void *arg) -{ - struct gic_irq_spec *spec = arg; - irq_hw_number_t hwirq, base_hwirq; - int cpu, ret, i; - - if (spec->type == GIC_DEVICE) { - /* verify that shared irqs don't conflict with an IPI irq */ - if ((spec->hwirq >= GIC_SHARED_HWIRQ_BASE) && - test_bit(GIC_HWIRQ_TO_SHARED(spec->hwirq), ipi_resrv)) - return -EBUSY; - - return gic_setup_dev_chip(d, virq, spec->hwirq); - } else { - base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs); - if (base_hwirq == gic_shared_intrs) { - return -ENOMEM; - } - - /* check that we have enough space */ - for (i = base_hwirq; i < nr_irqs; i++) { - if (!test_bit(i, ipi_resrv)) - return -EBUSY; - } - bitmap_clear(ipi_resrv, base_hwirq, nr_irqs); - - /* map the hwirq for each cpu consecutively */ - i = 0; - for_each_cpu(cpu, spec->ipimask) { - hwirq = GIC_SHARED_TO_HWIRQ(base_hwirq + i); - - ret = irq_domain_set_hwirq_and_chip(d, virq + i, hwirq, - &gic_level_irq_controller, - NULL); - if (ret) - goto error; - - irq_set_handler(virq + i, handle_level_irq); - - ret = gic_shared_irq_domain_map(d, virq + i, hwirq, cpu); - if (ret) - goto error; - - i++; - } - - /* - * tell the parent about the base hwirq we allocated so it can - * set its own domain data - */ - spec->hwirq = base_hwirq; - } - - return 0; -error: - bitmap_set(ipi_resrv, base_hwirq, nr_irqs); - return ret; -} - -void gic_irq_domain_free(struct irq_domain *d, unsigned int virq, - unsigned int nr_irqs) -{ - irq_hw_number_t base_hwirq; - struct irq_data *data; - - data = irq_get_irq_data(virq); - if (!data) - return; - - base_hwirq = GIC_HWIRQ_TO_SHARED(irqd_to_hwirq(data)); - bitmap_set(ipi_resrv, base_hwirq, nr_irqs); -} - -int gic_irq_domain_match(struct irq_domain *d, struct device_node *node, - enum irq_domain_bus_token bus_token) -{ - /* this domain should'nt be accessed directly */ - return 0; -} - -static const struct irq_domain_ops gic_irq_domain_ops = { - .alloc = gic_irq_domain_alloc, - .free = gic_irq_domain_free, - .match = gic_irq_domain_match, -}; - -static int gic_dev_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, +static int gic_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type) @@ -837,58 +700,82 @@ static int gic_dev_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, return 0; } -static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq, - unsigned int nr_irqs, void *arg) +static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, + irq_hw_number_t hwirq) { - struct irq_fwspec *fwspec = arg; - struct gic_irq_spec spec = { - .type = GIC_DEVICE, - }; - int i, ret; + int err; - if (fwspec->param[0] == GIC_SHARED) - spec.hwirq = GIC_SHARED_TO_HWIRQ(fwspec->param[1]); - else - spec.hwirq = GIC_LOCAL_TO_HWIRQ(fwspec->param[1]); + if (hwirq >= GIC_SHARED_HWIRQ_BASE) { + /* verify that shared irqs don't conflict with an IPI irq */ + if (test_bit(GIC_HWIRQ_TO_SHARED(hwirq), ipi_resrv)) + return -EBUSY; - ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec); - if (ret) - return ret; + err = irq_domain_set_hwirq_and_chip(d, virq, hwirq, + &gic_level_irq_controller, + NULL); + if (err) + return err; - for (i = 0; i < nr_irqs; i++) { - ret = gic_setup_dev_chip(d, virq + i, spec.hwirq + i); - if (ret) - goto error; + return gic_shared_irq_domain_map(d, virq, hwirq, 0); } - return 0; + switch (GIC_HWIRQ_TO_LOCAL(hwirq)) { + case GIC_LOCAL_INT_TIMER: + case GIC_LOCAL_INT_PERFCTR: + case GIC_LOCAL_INT_FDC: + /* + * HACK: These are all really percpu interrupts, but + * the rest of the MIPS kernel code does not use the + * percpu IRQ API for them. + */ + err = irq_domain_set_hwirq_and_chip(d, virq, hwirq, + &gic_all_vpes_local_irq_controller, + NULL); + if (err) + return err; -error: - irq_domain_free_irqs_parent(d, virq, nr_irqs); - return ret; + irq_set_handler(virq, handle_percpu_irq); + break; + + default: + err = irq_domain_set_hwirq_and_chip(d, virq, hwirq, + &gic_local_irq_controller, + NULL); + if (err) + return err; + + irq_set_handler(virq, handle_percpu_devid_irq); + irq_set_percpu_devid(virq); + break; + } + + return gic_local_irq_domain_map(d, virq, hwirq); } -void gic_dev_domain_free(struct irq_domain *d, unsigned int virq, - unsigned int nr_irqs) +static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs, void *arg) { - /* no real allocation is done for dev irqs, so no need to free anything */ - return; + struct irq_fwspec *fwspec = arg; + irq_hw_number_t hwirq; + + if (fwspec->param[0] == GIC_SHARED) + hwirq = GIC_SHARED_TO_HWIRQ(fwspec->param[1]); + else + hwirq = GIC_LOCAL_TO_HWIRQ(fwspec->param[1]); + + return gic_irq_domain_map(d, virq, hwirq); } -static void gic_dev_domain_activate(struct irq_domain *domain, - struct irq_data *d) +void gic_irq_domain_free(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs) { - if (GIC_HWIRQ_TO_LOCAL(d->hwirq) < GIC_NUM_LOCAL_INTRS) - gic_local_irq_domain_map(domain, d->irq, d->hwirq); - else - gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0); } -static struct irq_domain_ops gic_dev_domain_ops = { - .xlate = gic_dev_domain_xlate, - .alloc = gic_dev_domain_alloc, - .free = gic_dev_domain_free, - .activate = gic_dev_domain_activate, +static const struct irq_domain_ops gic_irq_domain_ops = { + .xlate = gic_irq_domain_xlate, + .alloc = gic_irq_domain_alloc, + .free = gic_irq_domain_free, + .map = gic_irq_domain_map, }; static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, @@ -910,20 +797,32 @@ static int gic_ipi_domain_alloc(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs, void *arg) { struct cpumask *ipimask = arg; - struct gic_irq_spec spec = { - .type = GIC_IPI, - .ipimask = ipimask - }; - int ret, i; - - ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec); - if (ret) - return ret; - - /* the parent should have set spec.hwirq to the base_hwirq it allocated */ - for (i = 0; i < nr_irqs; i++) { - ret = irq_domain_set_hwirq_and_chip(d, virq + i, - GIC_SHARED_TO_HWIRQ(spec.hwirq + i), + irq_hw_number_t hwirq, base_hwirq; + int cpu, ret, i; + + base_hwirq = find_first_bit(ipi_available, gic_shared_intrs); + if (base_hwirq == gic_shared_intrs) + return -ENOMEM; + + /* check that we have enough space */ + for (i = base_hwirq; i < nr_irqs; i++) { + if (!test_bit(i, ipi_available)) + return -EBUSY; + } + bitmap_clear(ipi_available, base_hwirq, nr_irqs); + + /* map the hwirq for each cpu consecutively */ + i = 0; + for_each_cpu(cpu, ipimask) { + hwirq = GIC_SHARED_TO_HWIRQ(base_hwirq + i); + + ret = irq_domain_set_hwirq_and_chip(d, virq + i, hwirq, + &gic_edge_irq_controller, + NULL); + if (ret) + goto error; + + ret = irq_domain_set_hwirq_and_chip(d->parent, virq + i, hwirq, &gic_edge_irq_controller, NULL); if (ret) @@ -932,18 +831,32 @@ static int gic_ipi_domain_alloc(struct irq_domain *d, unsigned int virq, ret = irq_set_irq_type(virq + i, IRQ_TYPE_EDGE_RISING); if (ret) goto error; + + ret = gic_shared_irq_domain_map(d, virq + i, hwirq, cpu); + if (ret) + goto error; + + i++; } return 0; error: - irq_domain_free_irqs_parent(d, virq, nr_irqs); + bitmap_set(ipi_available, base_hwirq, nr_irqs); return ret; } void gic_ipi_domain_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs) { - irq_domain_free_irqs_parent(d, virq, nr_irqs); + irq_hw_number_t base_hwirq; + struct irq_data *data; + + data = irq_get_irq_data(virq); + if (!data) + return; + + base_hwirq = GIC_HWIRQ_TO_SHARED(irqd_to_hwirq(data)); + bitmap_set(ipi_available, base_hwirq, nr_irqs); } int gic_ipi_domain_match(struct irq_domain *d, struct device_node *node, @@ -968,38 +881,6 @@ static struct irq_domain_ops gic_ipi_domain_ops = { .match = gic_ipi_domain_match, }; -static void __init gic_map_single_int(struct device_node *node, - unsigned int irq) -{ - unsigned int linux_irq; - struct irq_fwspec local_int_fwspec = { - .fwnode = &node->fwnode, - .param_count = 3, - .param = { - [0] = GIC_LOCAL, - [1] = irq, - [2] = IRQ_TYPE_NONE, - }, - }; - - if (!gic_local_irq_is_routable(irq)) - return; - - linux_irq = irq_create_fwspec_mapping(&local_int_fwspec); - WARN_ON(!linux_irq); -} - -static void __init gic_map_interrupts(struct device_node *node) -{ - gic_map_single_int(node, GIC_LOCAL_INT_WD); - gic_map_single_int(node, GIC_LOCAL_INT_COMPARE); - gic_map_single_int(node, GIC_LOCAL_INT_TIMER); - gic_map_single_int(node, GIC_LOCAL_INT_PERFCTR); - gic_map_single_int(node, GIC_LOCAL_INT_SWINT0); - gic_map_single_int(node, GIC_LOCAL_INT_SWINT1); - gic_map_single_int(node, GIC_LOCAL_INT_FDC); -} - static void __init __gic_init(unsigned long gic_base_addr, unsigned long gic_addrspace_size, unsigned int cpu_vec, unsigned int irqbase, @@ -1071,13 +952,6 @@ static void __init __gic_init(unsigned long gic_base_addr, panic("Failed to add GIC IRQ domain"); gic_irq_domain->name = "mips-gic-irq"; - gic_dev_domain = irq_domain_add_hierarchy(gic_irq_domain, 0, - GIC_NUM_LOCAL_INTRS + gic_shared_intrs, - node, &gic_dev_domain_ops, NULL); - if (!gic_dev_domain) - panic("Failed to add GIC DEV domain"); - gic_dev_domain->name = "mips-gic-dev"; - gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain, IRQ_DOMAIN_FLAG_IPI_PER_CPU, GIC_NUM_LOCAL_INTRS + gic_shared_intrs, @@ -1098,8 +972,8 @@ static void __init __gic_init(unsigned long gic_base_addr, 2 * gic_vpes); } + bitmap_copy(ipi_available, ipi_resrv, GIC_MAX_INTRS); gic_basic_init(); - gic_map_interrupts(node); } void __init gic_init(unsigned long gic_base_addr, diff --git a/drivers/irqchip/irq-moxart.c b/drivers/irqchip/irq-moxart.c deleted file mode 100644 index a24b06a1718b..000000000000 --- a/drivers/irqchip/irq-moxart.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * MOXA ART SoCs IRQ chip driver. - * - * Copyright (C) 2013 Jonas Jensen - * - * Jonas Jensen <jonas.jensen@gmail.com> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/irqchip.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> -#include <linux/irqdomain.h> - -#include <asm/exception.h> - -#define IRQ_SOURCE_REG 0 -#define IRQ_MASK_REG 0x04 -#define IRQ_CLEAR_REG 0x08 -#define IRQ_MODE_REG 0x0c -#define IRQ_LEVEL_REG 0x10 -#define IRQ_STATUS_REG 0x14 - -#define FIQ_SOURCE_REG 0x20 -#define FIQ_MASK_REG 0x24 -#define FIQ_CLEAR_REG 0x28 -#define FIQ_MODE_REG 0x2c -#define FIQ_LEVEL_REG 0x30 -#define FIQ_STATUS_REG 0x34 - - -struct moxart_irq_data { - void __iomem *base; - struct irq_domain *domain; - unsigned int interrupt_mask; -}; - -static struct moxart_irq_data intc; - -static void __exception_irq_entry handle_irq(struct pt_regs *regs) -{ - u32 irqstat; - int hwirq; - - irqstat = readl(intc.base + IRQ_STATUS_REG); - - while (irqstat) { - hwirq = ffs(irqstat) - 1; - handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs); - irqstat &= ~(1 << hwirq); - } -} - -static int __init moxart_of_intc_init(struct device_node *node, - struct device_node *parent) -{ - unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; - int ret; - struct irq_chip_generic *gc; - - intc.base = of_iomap(node, 0); - if (!intc.base) { - pr_err("%s: unable to map IC registers\n", - node->full_name); - return -EINVAL; - } - - intc.domain = irq_domain_add_linear(node, 32, &irq_generic_chip_ops, - intc.base); - if (!intc.domain) { - pr_err("%s: unable to create IRQ domain\n", node->full_name); - return -EINVAL; - } - - ret = irq_alloc_domain_generic_chips(intc.domain, 32, 1, - "MOXARTINTC", handle_edge_irq, - clr, 0, IRQ_GC_INIT_MASK_CACHE); - if (ret) { - pr_err("%s: could not allocate generic chip\n", - node->full_name); - irq_domain_remove(intc.domain); - return -EINVAL; - } - - ret = of_property_read_u32(node, "interrupt-mask", - &intc.interrupt_mask); - if (ret) - pr_err("%s: could not read interrupt-mask DT property\n", - node->full_name); - - gc = irq_get_domain_generic_chip(intc.domain, 0); - - gc->reg_base = intc.base; - gc->chip_types[0].regs.mask = IRQ_MASK_REG; - gc->chip_types[0].regs.ack = IRQ_CLEAR_REG; - gc->chip_types[0].chip.irq_ack = irq_gc_ack_set_bit; - gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit; - gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; - - writel(0, intc.base + IRQ_MASK_REG); - writel(0xffffffff, intc.base + IRQ_CLEAR_REG); - - writel(intc.interrupt_mask, intc.base + IRQ_MODE_REG); - writel(intc.interrupt_mask, intc.base + IRQ_LEVEL_REG); - - set_handle_irq(handle_irq); - - return 0; -} -IRQCHIP_DECLARE(moxa_moxart_ic, "moxa,moxart-ic", moxart_of_intc_init); diff --git a/drivers/irqchip/irq-mtk-cirq.c b/drivers/irqchip/irq-mtk-cirq.c new file mode 100644 index 000000000000..18c65c16de28 --- /dev/null +++ b/drivers/irqchip/irq-mtk-cirq.c @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: Youlin.Pei <youlin.pei@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/irqchip.h> +#include <linux/irqdomain.h> +#include <linux/of.h> +#include <linux/of_irq.h> +#include <linux/of_address.h> +#include <linux/slab.h> +#include <linux/syscore_ops.h> + +#define CIRQ_ACK 0x40 +#define CIRQ_MASK_SET 0xc0 +#define CIRQ_MASK_CLR 0x100 +#define CIRQ_SENS_SET 0x180 +#define CIRQ_SENS_CLR 0x1c0 +#define CIRQ_POL_SET 0x240 +#define CIRQ_POL_CLR 0x280 +#define CIRQ_CONTROL 0x300 + +#define CIRQ_EN 0x1 +#define CIRQ_EDGE 0x2 +#define CIRQ_FLUSH 0x4 + +struct mtk_cirq_chip_data { + void __iomem *base; + unsigned int ext_irq_start; + unsigned int ext_irq_end; + struct irq_domain *domain; +}; + +static struct mtk_cirq_chip_data *cirq_data; + +static void mtk_cirq_write_mask(struct irq_data *data, unsigned int offset) +{ + struct mtk_cirq_chip_data *chip_data = data->chip_data; + unsigned int cirq_num = data->hwirq; + u32 mask = 1 << (cirq_num % 32); + + writel_relaxed(mask, chip_data->base + offset + (cirq_num / 32) * 4); +} + +static void mtk_cirq_mask(struct irq_data *data) +{ + mtk_cirq_write_mask(data, CIRQ_MASK_SET); + irq_chip_mask_parent(data); +} + +static void mtk_cirq_unmask(struct irq_data *data) +{ + mtk_cirq_write_mask(data, CIRQ_MASK_CLR); + irq_chip_unmask_parent(data); +} + +static int mtk_cirq_set_type(struct irq_data *data, unsigned int type) +{ + int ret; + + switch (type & IRQ_TYPE_SENSE_MASK) { + case IRQ_TYPE_EDGE_FALLING: + mtk_cirq_write_mask(data, CIRQ_POL_CLR); + mtk_cirq_write_mask(data, CIRQ_SENS_CLR); + break; + case IRQ_TYPE_EDGE_RISING: + mtk_cirq_write_mask(data, CIRQ_POL_SET); + mtk_cirq_write_mask(data, CIRQ_SENS_CLR); + break; + case IRQ_TYPE_LEVEL_LOW: + mtk_cirq_write_mask(data, CIRQ_POL_CLR); + mtk_cirq_write_mask(data, CIRQ_SENS_SET); + break; + case IRQ_TYPE_LEVEL_HIGH: + mtk_cirq_write_mask(data, CIRQ_POL_SET); + mtk_cirq_write_mask(data, CIRQ_SENS_SET); + break; + default: + break; + } + + data = data->parent_data; + ret = data->chip->irq_set_type(data, type); + return ret; +} + +static struct irq_chip mtk_cirq_chip = { + .name = "MT_CIRQ", + .irq_mask = mtk_cirq_mask, + .irq_unmask = mtk_cirq_unmask, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_type = mtk_cirq_set_type, + .irq_retrigger = irq_chip_retrigger_hierarchy, +#ifdef CONFIG_SMP + .irq_set_affinity = irq_chip_set_affinity_parent, +#endif +}; + +static int mtk_cirq_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *hwirq, + unsigned int *type) +{ + if (is_of_node(fwspec->fwnode)) { + if (fwspec->param_count != 3) + return -EINVAL; + + /* No PPI should point to this domain */ + if (fwspec->param[0] != 0) + return -EINVAL; + + /* cirq support irq number check */ + if (fwspec->param[1] < cirq_data->ext_irq_start || + fwspec->param[1] > cirq_data->ext_irq_end) + return -EINVAL; + + *hwirq = fwspec->param[1] - cirq_data->ext_irq_start; + *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; + return 0; + } + + return -EINVAL; +} + +static int mtk_cirq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + int ret; + irq_hw_number_t hwirq; + unsigned int type; + struct irq_fwspec *fwspec = arg; + struct irq_fwspec parent_fwspec = *fwspec; + + ret = mtk_cirq_domain_translate(domain, fwspec, &hwirq, &type); + if (ret) + return ret; + + if (WARN_ON(nr_irqs != 1)) + return -EINVAL; + + irq_domain_set_hwirq_and_chip(domain, virq, hwirq, + &mtk_cirq_chip, + domain->host_data); + + parent_fwspec.fwnode = domain->parent->fwnode; + return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, + &parent_fwspec); +} + +static const struct irq_domain_ops cirq_domain_ops = { + .translate = mtk_cirq_domain_translate, + .alloc = mtk_cirq_domain_alloc, + .free = irq_domain_free_irqs_common, +}; + +#ifdef CONFIG_PM_SLEEP +static int mtk_cirq_suspend(void) +{ + u32 value, mask; + unsigned int irq, hwirq_num; + bool pending, masked; + int i, pendret, maskret; + + /* + * When external interrupts happened, CIRQ will record the status + * even CIRQ is not enabled. When execute flush command, CIRQ will + * resend the signals according to the status. So if don't clear the + * status, CIRQ will resend the wrong signals. + * + * arch_suspend_disable_irqs() will be called before CIRQ suspend + * callback. If clear all the status simply, the external interrupts + * which happened between arch_suspend_disable_irqs and CIRQ suspend + * callback will be lost. Using following steps to avoid this issue; + * + * - Iterate over all the CIRQ supported interrupts; + * - For each interrupt, inspect its pending and masked status at GIC + * level; + * - If pending and unmasked, it happened between + * arch_suspend_disable_irqs and CIRQ suspend callback, don't ACK + * it. Otherwise, ACK it. + */ + hwirq_num = cirq_data->ext_irq_end - cirq_data->ext_irq_start + 1; + for (i = 0; i < hwirq_num; i++) { + irq = irq_find_mapping(cirq_data->domain, i); + if (irq) { + pendret = irq_get_irqchip_state(irq, + IRQCHIP_STATE_PENDING, + &pending); + + maskret = irq_get_irqchip_state(irq, + IRQCHIP_STATE_MASKED, + &masked); + + if (pendret == 0 && maskret == 0 && + (pending && !masked)) + continue; + } + + mask = 1 << (i % 32); + writel_relaxed(mask, cirq_data->base + CIRQ_ACK + (i / 32) * 4); + } + + /* set edge_only mode, record edge-triggerd interrupts */ + /* enable cirq */ + value = readl_relaxed(cirq_data->base + CIRQ_CONTROL); + value |= (CIRQ_EDGE | CIRQ_EN); + writel_relaxed(value, cirq_data->base + CIRQ_CONTROL); + + return 0; +} + +static void mtk_cirq_resume(void) +{ + u32 value; + + /* flush recored interrupts, will send signals to parent controller */ + value = readl_relaxed(cirq_data->base + CIRQ_CONTROL); + writel_relaxed(value | CIRQ_FLUSH, cirq_data->base + CIRQ_CONTROL); + + /* disable cirq */ + value = readl_relaxed(cirq_data->base + CIRQ_CONTROL); + value &= ~(CIRQ_EDGE | CIRQ_EN); + writel_relaxed(value, cirq_data->base + CIRQ_CONTROL); +} + +static struct syscore_ops mtk_cirq_syscore_ops = { + .suspend = mtk_cirq_suspend, + .resume = mtk_cirq_resume, +}; + +static void mtk_cirq_syscore_init(void) +{ + register_syscore_ops(&mtk_cirq_syscore_ops); +} +#else +static inline void mtk_cirq_syscore_init(void) {} +#endif + +static int __init mtk_cirq_of_init(struct device_node *node, + struct device_node *parent) +{ + struct irq_domain *domain, *domain_parent; + unsigned int irq_num; + int ret; + + domain_parent = irq_find_host(parent); + if (!domain_parent) { + pr_err("mtk_cirq: interrupt-parent not found\n"); + return -EINVAL; + } + + cirq_data = kzalloc(sizeof(*cirq_data), GFP_KERNEL); + if (!cirq_data) + return -ENOMEM; + + cirq_data->base = of_iomap(node, 0); + if (!cirq_data->base) { + pr_err("mtk_cirq: unable to map cirq register\n"); + ret = -ENXIO; + goto out_free; + } + + ret = of_property_read_u32_index(node, "mediatek,ext-irq-range", 0, + &cirq_data->ext_irq_start); + if (ret) + goto out_unmap; + + ret = of_property_read_u32_index(node, "mediatek,ext-irq-range", 1, + &cirq_data->ext_irq_end); + if (ret) + goto out_unmap; + + irq_num = cirq_data->ext_irq_end - cirq_data->ext_irq_start + 1; + domain = irq_domain_add_hierarchy(domain_parent, 0, + irq_num, node, + &cirq_domain_ops, cirq_data); + if (!domain) { + ret = -ENOMEM; + goto out_unmap; + } + cirq_data->domain = domain; + + mtk_cirq_syscore_init(); + + return 0; + +out_unmap: + iounmap(cirq_data->base); +out_free: + kfree(cirq_data); + return ret; +} + +IRQCHIP_DECLARE(mtk_cirq, "mediatek,mtk-cirq", mtk_cirq_of_init); diff --git a/drivers/irqchip/irq-mtk-sysirq.c b/drivers/irqchip/irq-mtk-sysirq.c index 63ac73b1d9c8..eeac512ec5a8 100644 --- a/drivers/irqchip/irq-mtk-sysirq.c +++ b/drivers/irqchip/irq-mtk-sysirq.c @@ -24,22 +24,29 @@ struct mtk_sysirq_chip_data { spinlock_t lock; - void __iomem *intpol_base; + u32 nr_intpol_bases; + void __iomem **intpol_bases; + u32 *intpol_words; + u8 *intpol_idx; + u16 *which_word; }; static int mtk_sysirq_set_type(struct irq_data *data, unsigned int type) { irq_hw_number_t hwirq = data->hwirq; struct mtk_sysirq_chip_data *chip_data = data->chip_data; + u8 intpol_idx = chip_data->intpol_idx[hwirq]; + void __iomem *base; u32 offset, reg_index, value; unsigned long flags; int ret; + base = chip_data->intpol_bases[intpol_idx]; + reg_index = chip_data->which_word[hwirq]; offset = hwirq & 0x1f; - reg_index = hwirq >> 5; spin_lock_irqsave(&chip_data->lock, flags); - value = readl_relaxed(chip_data->intpol_base + reg_index * 4); + value = readl_relaxed(base + reg_index * 4); if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_EDGE_FALLING) { if (type == IRQ_TYPE_LEVEL_LOW) type = IRQ_TYPE_LEVEL_HIGH; @@ -49,7 +56,8 @@ static int mtk_sysirq_set_type(struct irq_data *data, unsigned int type) } else { value &= ~(1 << offset); } - writel(value, chip_data->intpol_base + reg_index * 4); + + writel_relaxed(value, base + reg_index * 4); data = data->parent_data; ret = data->chip->irq_set_type(data, type); @@ -124,8 +132,7 @@ static int __init mtk_sysirq_of_init(struct device_node *node, { struct irq_domain *domain, *domain_parent; struct mtk_sysirq_chip_data *chip_data; - int ret, size, intpol_num; - struct resource res; + int ret, size, intpol_num = 0, nr_intpol_bases = 0, i = 0; domain_parent = irq_find_host(parent); if (!domain_parent) { @@ -133,36 +140,103 @@ static int __init mtk_sysirq_of_init(struct device_node *node, return -EINVAL; } - ret = of_address_to_resource(node, 0, &res); - if (ret) - return ret; - chip_data = kzalloc(sizeof(*chip_data), GFP_KERNEL); if (!chip_data) return -ENOMEM; - size = resource_size(&res); - intpol_num = size * 8; - chip_data->intpol_base = ioremap(res.start, size); - if (!chip_data->intpol_base) { - pr_err("mtk_sysirq: unable to map sysirq register\n"); - ret = -ENXIO; - goto out_free; + while (of_get_address(node, i++, NULL, NULL)) + nr_intpol_bases++; + + if (nr_intpol_bases == 0) { + pr_err("mtk_sysirq: base address not specified\n"); + ret = -EINVAL; + goto out_free_chip; + } + + chip_data->intpol_words = kcalloc(nr_intpol_bases, + sizeof(*chip_data->intpol_words), + GFP_KERNEL); + if (!chip_data->intpol_words) { + ret = -ENOMEM; + goto out_free_chip; + } + + chip_data->intpol_bases = kcalloc(nr_intpol_bases, + sizeof(*chip_data->intpol_bases), + GFP_KERNEL); + if (!chip_data->intpol_bases) { + ret = -ENOMEM; + goto out_free_intpol_words; + } + + for (i = 0; i < nr_intpol_bases; i++) { + struct resource res; + + ret = of_address_to_resource(node, i, &res); + size = resource_size(&res); + intpol_num += size * 8; + chip_data->intpol_words[i] = size / 4; + chip_data->intpol_bases[i] = of_iomap(node, i); + if (ret || !chip_data->intpol_bases[i]) { + pr_err("%s: couldn't map region %d\n", + node->full_name, i); + ret = -ENODEV; + goto out_free_intpol; + } + } + + chip_data->intpol_idx = kcalloc(intpol_num, + sizeof(*chip_data->intpol_idx), + GFP_KERNEL); + if (!chip_data->intpol_idx) { + ret = -ENOMEM; + goto out_free_intpol; + } + + chip_data->which_word = kcalloc(intpol_num, + sizeof(*chip_data->which_word), + GFP_KERNEL); + if (!chip_data->which_word) { + ret = -ENOMEM; + goto out_free_intpol_idx; + } + + /* + * assign an index of the intpol_bases for each irq + * to set it fast later + */ + for (i = 0; i < intpol_num ; i++) { + u32 word = i / 32, j; + + for (j = 0; word >= chip_data->intpol_words[j] ; j++) + word -= chip_data->intpol_words[j]; + + chip_data->intpol_idx[i] = j; + chip_data->which_word[i] = word; } domain = irq_domain_add_hierarchy(domain_parent, 0, intpol_num, node, &sysirq_domain_ops, chip_data); if (!domain) { ret = -ENOMEM; - goto out_unmap; + goto out_free_which_word; } spin_lock_init(&chip_data->lock); return 0; -out_unmap: - iounmap(chip_data->intpol_base); -out_free: +out_free_which_word: + kfree(chip_data->which_word); +out_free_intpol_idx: + kfree(chip_data->intpol_idx); +out_free_intpol: + for (i = 0; i < nr_intpol_bases; i++) + if (chip_data->intpol_bases[i]) + iounmap(chip_data->intpol_bases[i]); + kfree(chip_data->intpol_bases); +out_free_intpol_words: + kfree(chip_data->intpol_words); +out_free_chip: kfree(chip_data); return ret; } diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 275f467956ee..6c2999872090 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -76,6 +76,15 @@ config LEDS_BCM6358 This option enables support for LEDs connected to the BCM6358 LED HW controller accessed via MMIO registers. +config LEDS_CPCAP + tristate "LED Support for Motorola CPCAP" + depends on LEDS_CLASS + depends on MFD_CPCAP + depends on OF + help + This option enables support for LEDs offered by Motorola's + CPCAP PMIC. + config LEDS_LM3530 tristate "LCD Backlight driver for LM3530" depends on LEDS_CLASS @@ -126,6 +135,14 @@ config LEDS_MIKROTIK_RB532 This option enables support for the so called "User LED" of Mikrotik's Routerboard 532. +config LEDS_MT6323 + tristate "LED Support for Mediatek MT6323 PMIC" + depends on LEDS_CLASS + depends on MFD_MT6397 + help + This option enables support for on-chip LED drivers found on + Mediatek MT6323 PMIC. + config LEDS_S3C24XX tristate "LED Support for Samsung S3C24XX GPIO LEDs" depends on LEDS_CLASS @@ -241,7 +258,6 @@ config LEDS_LP3952 tristate "LED Support for TI LP3952 2 channel LED driver" depends on LEDS_CLASS depends on I2C - depends on ACPI depends on GPIOLIB select REGMAP_I2C help @@ -463,15 +479,6 @@ config LEDS_ADP5520 To compile this driver as a module, choose M here: the module will be called leds-adp5520. -config LEDS_DELL_NETBOOKS - tristate "External LED on Dell Business Netbooks" - depends on LEDS_CLASS - depends on X86 && ACPI_WMI - depends on DELL_SMBIOS - help - This adds support for the Latitude 2100 and similar - notebooks that have an external LED. - config LEDS_MC13783 tristate "LED Support for MC13XXX PMIC" depends on LEDS_CLASS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 6b8273736478..45f133962ed8 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_LEDS_AAT1290) += leds-aat1290.o obj-$(CONFIG_LEDS_BCM6328) += leds-bcm6328.o obj-$(CONFIG_LEDS_BCM6358) += leds-bcm6358.o obj-$(CONFIG_LEDS_BD2802) += leds-bd2802.o +obj-$(CONFIG_LEDS_CPCAP) += leds-cpcap.o obj-$(CONFIG_LEDS_LOCOMO) += leds-locomo.o obj-$(CONFIG_LEDS_LM3530) += leds-lm3530.o obj-$(CONFIG_LEDS_LM3533) += leds-lm3533.o @@ -52,7 +53,6 @@ obj-$(CONFIG_LEDS_REGULATOR) += leds-regulator.o obj-$(CONFIG_LEDS_INTEL_SS4200) += leds-ss4200.o obj-$(CONFIG_LEDS_LT3593) += leds-lt3593.o obj-$(CONFIG_LEDS_ADP5520) += leds-adp5520.o -obj-$(CONFIG_LEDS_DELL_NETBOOKS) += dell-led.o obj-$(CONFIG_LEDS_MC13783) += leds-mc13783.o obj-$(CONFIG_LEDS_NS2) += leds-ns2.o obj-$(CONFIG_LEDS_NETXBIG) += leds-netxbig.o @@ -72,6 +72,7 @@ obj-$(CONFIG_LEDS_IS31FL32XX) += leds-is31fl32xx.o obj-$(CONFIG_LEDS_PM8058) += leds-pm8058.o obj-$(CONFIG_LEDS_MLXCPLD) += leds-mlxcpld.o obj-$(CONFIG_LEDS_NIC78BX) += leds-nic78bx.o +obj-$(CONFIG_LEDS_MT6323) += leds-mt6323.o # LED SPI Drivers obj-$(CONFIG_LEDS_DAC124S085) += leds-dac124s085.o diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index f2b0a80a62b4..b0e2d55acbd6 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -244,11 +244,14 @@ static int led_classdev_next_name(const char *init_name, char *name, } /** - * led_classdev_register - register a new object of led_classdev class. - * @parent: The device to register. + * of_led_classdev_register - register a new object of led_classdev class. + * + * @parent: parent of LED device * @led_cdev: the led_classdev structure for this device. + * @np: DT node describing this LED */ -int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) +int of_led_classdev_register(struct device *parent, struct device_node *np, + struct led_classdev *led_cdev) { char name[LED_MAX_NAME_SIZE]; int ret; @@ -261,6 +264,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) led_cdev, led_cdev->groups, "%s", name); if (IS_ERR(led_cdev->dev)) return PTR_ERR(led_cdev->dev); + led_cdev->dev->of_node = np; if (ret) dev_warn(parent, "Led %s renamed to %s due to name collision", @@ -303,7 +307,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) return 0; } -EXPORT_SYMBOL_GPL(led_classdev_register); +EXPORT_SYMBOL_GPL(of_led_classdev_register); /** * led_classdev_unregister - unregisters a object of led_properties class. @@ -348,12 +352,14 @@ static void devm_led_classdev_release(struct device *dev, void *res) } /** - * devm_led_classdev_register - resource managed led_classdev_register() - * @parent: The device to register. + * devm_of_led_classdev_register - resource managed led_classdev_register() + * + * @parent: parent of LED device * @led_cdev: the led_classdev structure for this device. */ -int devm_led_classdev_register(struct device *parent, - struct led_classdev *led_cdev) +int devm_of_led_classdev_register(struct device *parent, + struct device_node *np, + struct led_classdev *led_cdev) { struct led_classdev **dr; int rc; @@ -362,7 +368,7 @@ int devm_led_classdev_register(struct device *parent, if (!dr) return -ENOMEM; - rc = led_classdev_register(parent, led_cdev); + rc = of_led_classdev_register(parent, np, led_cdev); if (rc) { devres_free(dr); return rc; @@ -373,7 +379,7 @@ int devm_led_classdev_register(struct device *parent, return 0; } -EXPORT_SYMBOL_GPL(devm_led_classdev_register); +EXPORT_SYMBOL_GPL(devm_of_led_classdev_register); static int devm_led_classdev_match(struct device *dev, void *res, void *data) { diff --git a/drivers/leds/leds-cpcap.c b/drivers/leds/leds-cpcap.c new file mode 100644 index 000000000000..f0f28c442807 --- /dev/null +++ b/drivers/leds/leds-cpcap.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2017 Sebastian Reichel <sre@kernel.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * later as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/leds.h> +#include <linux/mfd/motorola-cpcap.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/regulator/consumer.h> + +#define CPCAP_LED_NO_CURRENT 0x0001 + +struct cpcap_led_info { + u16 reg; + u16 mask; + u16 limit; + u16 init_mask; + u16 init_val; +}; + +static const struct cpcap_led_info cpcap_led_red = { + .reg = CPCAP_REG_REDC, + .mask = 0x03FF, + .limit = 31, +}; + +static const struct cpcap_led_info cpcap_led_green = { + .reg = CPCAP_REG_GREENC, + .mask = 0x03FF, + .limit = 31, +}; + +static const struct cpcap_led_info cpcap_led_blue = { + .reg = CPCAP_REG_BLUEC, + .mask = 0x03FF, + .limit = 31, +}; + +/* aux display light */ +static const struct cpcap_led_info cpcap_led_adl = { + .reg = CPCAP_REG_ADLC, + .mask = 0x000F, + .limit = 1, + .init_mask = 0x7FFF, + .init_val = 0x5FF0, +}; + +/* camera privacy led */ +static const struct cpcap_led_info cpcap_led_cp = { + .reg = CPCAP_REG_CLEDC, + .mask = 0x0007, + .limit = 1, + .init_mask = 0x03FF, + .init_val = 0x0008, +}; + +struct cpcap_led { + struct led_classdev led; + const struct cpcap_led_info *info; + struct device *dev; + struct regmap *regmap; + struct mutex update_lock; + struct regulator *vdd; + bool powered; + + u32 current_limit; +}; + +static u16 cpcap_led_val(u8 current_limit, u8 duty_cycle) +{ + current_limit &= 0x1f; /* 5 bit */ + duty_cycle &= 0x0f; /* 4 bit */ + + return current_limit << 4 | duty_cycle; +} + +static int cpcap_led_set_power(struct cpcap_led *led, bool status) +{ + int err; + + if (status == led->powered) + return 0; + + if (status) + err = regulator_enable(led->vdd); + else + err = regulator_disable(led->vdd); + + if (err) { + dev_err(led->dev, "regulator failure: %d", err); + return err; + } + + led->powered = status; + + return 0; +} + +static int cpcap_led_set(struct led_classdev *ledc, enum led_brightness value) +{ + struct cpcap_led *led = container_of(ledc, struct cpcap_led, led); + int brightness; + int err; + + mutex_lock(&led->update_lock); + + if (value > LED_OFF) { + err = cpcap_led_set_power(led, true); + if (err) + goto exit; + } + + if (value == LED_OFF) { + /* Avoid HW issue by turning off current before duty cycle */ + err = regmap_update_bits(led->regmap, + led->info->reg, led->info->mask, CPCAP_LED_NO_CURRENT); + if (err) { + dev_err(led->dev, "regmap failed: %d", err); + goto exit; + } + + brightness = cpcap_led_val(value, LED_OFF); + } else { + brightness = cpcap_led_val(value, LED_ON); + } + + err = regmap_update_bits(led->regmap, led->info->reg, led->info->mask, + brightness); + if (err) { + dev_err(led->dev, "regmap failed: %d", err); + goto exit; + } + + if (value == LED_OFF) { + err = cpcap_led_set_power(led, false); + if (err) + goto exit; + } + +exit: + mutex_unlock(&led->update_lock); + return err; +} + +static const struct of_device_id cpcap_led_of_match[] = { + { .compatible = "motorola,cpcap-led-red", .data = &cpcap_led_red }, + { .compatible = "motorola,cpcap-led-green", .data = &cpcap_led_green }, + { .compatible = "motorola,cpcap-led-blue", .data = &cpcap_led_blue }, + { .compatible = "motorola,cpcap-led-adl", .data = &cpcap_led_adl }, + { .compatible = "motorola,cpcap-led-cp", .data = &cpcap_led_cp }, + {}, +}; +MODULE_DEVICE_TABLE(of, cpcap_led_of_match); + +static int cpcap_led_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + struct cpcap_led *led; + int err; + + match = of_match_device(of_match_ptr(cpcap_led_of_match), &pdev->dev); + if (!match || !match->data) + return -EINVAL; + + led = devm_kzalloc(&pdev->dev, sizeof(*led), GFP_KERNEL); + if (!led) + return -ENOMEM; + platform_set_drvdata(pdev, led); + led->info = match->data; + led->dev = &pdev->dev; + + if (led->info->reg == 0x0000) { + dev_err(led->dev, "Unsupported LED"); + return -ENODEV; + } + + led->regmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!led->regmap) + return -ENODEV; + + led->vdd = devm_regulator_get(&pdev->dev, "vdd"); + if (IS_ERR(led->vdd)) { + err = PTR_ERR(led->vdd); + dev_err(led->dev, "Couldn't get regulator: %d", err); + return err; + } + + err = device_property_read_string(&pdev->dev, "label", &led->led.name); + if (err) { + dev_err(led->dev, "Couldn't read LED label: %d", err); + return err; + } + + if (led->info->init_mask) { + err = regmap_update_bits(led->regmap, led->info->reg, + led->info->init_mask, led->info->init_val); + if (err) { + dev_err(led->dev, "regmap failed: %d", err); + return err; + } + } + + mutex_init(&led->update_lock); + + led->led.max_brightness = led->info->limit; + led->led.brightness_set_blocking = cpcap_led_set; + err = devm_led_classdev_register(&pdev->dev, &led->led); + if (err) { + dev_err(led->dev, "Couldn't register LED: %d", err); + return err; + } + + return 0; +} + +static struct platform_driver cpcap_led_driver = { + .probe = cpcap_led_probe, + .driver = { + .name = "cpcap-led", + .of_match_table = cpcap_led_of_match, + }, +}; +module_platform_driver(cpcap_led_driver); + +MODULE_DESCRIPTION("CPCAP LED driver"); +MODULE_AUTHOR("Sebastian Reichel <sre@kernel.org>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c index 066fc7590729..e753ba93ba1e 100644 --- a/drivers/leds/leds-gpio.c +++ b/drivers/leds/leds-gpio.c @@ -77,7 +77,7 @@ static int gpio_blink_set(struct led_classdev *led_cdev, static int create_gpio_led(const struct gpio_led *template, struct gpio_led_data *led_dat, struct device *parent, - gpio_blink_set_t blink_set) + struct device_node *np, gpio_blink_set_t blink_set) { int ret, state; @@ -139,7 +139,7 @@ static int create_gpio_led(const struct gpio_led *template, if (ret < 0) return ret; - return devm_led_classdev_register(parent, &led_dat->cdev); + return devm_of_led_classdev_register(parent, np, &led_dat->cdev); } struct gpio_leds_priv { @@ -208,7 +208,7 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev) if (fwnode_property_present(child, "panic-indicator")) led.panic_indicator = 1; - ret = create_gpio_led(&led, led_dat, dev, NULL); + ret = create_gpio_led(&led, led_dat, dev, np, NULL); if (ret < 0) { fwnode_handle_put(child); return ERR_PTR(ret); @@ -242,9 +242,9 @@ static int gpio_led_probe(struct platform_device *pdev) priv->num_leds = pdata->num_leds; for (i = 0; i < priv->num_leds; i++) { - ret = create_gpio_led(&pdata->leds[i], - &priv->leds[i], - &pdev->dev, pdata->gpio_blink_set); + ret = create_gpio_led(&pdata->leds[i], &priv->leds[i], + &pdev->dev, NULL, + pdata->gpio_blink_set); if (ret < 0) return ret; } diff --git a/drivers/leds/leds-lp3952.c b/drivers/leds/leds-lp3952.c index 4847e89883a7..847f7f282126 100644 --- a/drivers/leds/leds-lp3952.c +++ b/drivers/leds/leds-lp3952.c @@ -10,7 +10,6 @@ * */ -#include <linux/acpi.h> #include <linux/delay.h> #include <linux/gpio.h> #include <linux/i2c.h> @@ -103,10 +102,11 @@ static int lp3952_get_label(struct device *dev, const char *label, char *dest) const char *str; ret = device_property_read_string(dev, label, &str); - if (!ret) - strncpy(dest, str, LP3952_LABEL_MAX_LEN); + if (ret) + return ret; - return ret; + strncpy(dest, str, LP3952_LABEL_MAX_LEN); + return 0; } static int lp3952_register_led_classdev(struct lp3952_led_array *priv) @@ -276,19 +276,9 @@ static const struct i2c_device_id lp3952_id[] = { }; MODULE_DEVICE_TABLE(i2c, lp3952_id); -#ifdef CONFIG_ACPI -static const struct acpi_device_id lp3952_acpi_match[] = { - {"TXNW3952", 0}, - {} -}; - -MODULE_DEVICE_TABLE(acpi, lp3952_acpi_match); -#endif - static struct i2c_driver lp3952_i2c_driver = { .driver = { .name = LP3952_NAME, - .acpi_match_table = ACPI_PTR(lp3952_acpi_match), }, .probe = lp3952_probe, .remove = lp3952_remove, diff --git a/drivers/leds/leds-mt6323.c b/drivers/leds/leds-mt6323.c new file mode 100644 index 000000000000..8893c74e9a1f --- /dev/null +++ b/drivers/leds/leds-mt6323.c @@ -0,0 +1,502 @@ +/* + * LED driver for Mediatek MT6323 PMIC + * + * Copyright (C) 2017 Sean Wang <sean.wang@mediatek.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/kernel.h> +#include <linux/leds.h> +#include <linux/mfd/mt6323/registers.h> +#include <linux/mfd/mt6397/core.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> + +/* + * Register field for MT6323_TOP_CKPDN0 to enable + * 32K clock common for LED device. + */ +#define MT6323_RG_DRV_32K_CK_PDN BIT(11) +#define MT6323_RG_DRV_32K_CK_PDN_MASK BIT(11) + +/* + * Register field for MT6323_TOP_CKPDN2 to enable + * individual clock for LED device. + */ +#define MT6323_RG_ISINK_CK_PDN(i) BIT(i) +#define MT6323_RG_ISINK_CK_PDN_MASK(i) BIT(i) + +/* + * Register field for MT6323_TOP_CKCON1 to select + * clock source. + */ +#define MT6323_RG_ISINK_CK_SEL_MASK(i) (BIT(10) << (i)) + +/* + * Register for MT6323_ISINK_CON0 to setup the + * duty cycle of the blink. + */ +#define MT6323_ISINK_CON0(i) (MT6323_ISINK0_CON0 + 0x8 * (i)) +#define MT6323_ISINK_DIM_DUTY_MASK (0x1f << 8) +#define MT6323_ISINK_DIM_DUTY(i) (((i) << 8) & \ + MT6323_ISINK_DIM_DUTY_MASK) + +/* Register to setup the period of the blink. */ +#define MT6323_ISINK_CON1(i) (MT6323_ISINK0_CON1 + 0x8 * (i)) +#define MT6323_ISINK_DIM_FSEL_MASK (0xffff) +#define MT6323_ISINK_DIM_FSEL(i) ((i) & MT6323_ISINK_DIM_FSEL_MASK) + +/* Register to control the brightness. */ +#define MT6323_ISINK_CON2(i) (MT6323_ISINK0_CON2 + 0x8 * (i)) +#define MT6323_ISINK_CH_STEP_SHIFT 12 +#define MT6323_ISINK_CH_STEP_MASK (0x7 << 12) +#define MT6323_ISINK_CH_STEP(i) (((i) << 12) & \ + MT6323_ISINK_CH_STEP_MASK) +#define MT6323_ISINK_SFSTR0_TC_MASK (0x3 << 1) +#define MT6323_ISINK_SFSTR0_TC(i) (((i) << 1) & \ + MT6323_ISINK_SFSTR0_TC_MASK) +#define MT6323_ISINK_SFSTR0_EN_MASK BIT(0) +#define MT6323_ISINK_SFSTR0_EN BIT(0) + +/* Register to LED channel enablement. */ +#define MT6323_ISINK_CH_EN_MASK(i) BIT(i) +#define MT6323_ISINK_CH_EN(i) BIT(i) + +#define MT6323_MAX_PERIOD 10000 +#define MT6323_MAX_LEDS 4 +#define MT6323_MAX_BRIGHTNESS 6 +#define MT6323_UNIT_DUTY 3125 +#define MT6323_CAL_HW_DUTY(o, p) DIV_ROUND_CLOSEST((o) * 100000ul,\ + (p) * MT6323_UNIT_DUTY) + +struct mt6323_leds; + +/** + * struct mt6323_led - state container for the LED device + * @id: the identifier in MT6323 LED device + * @parent: the pointer to MT6323 LED controller + * @cdev: LED class device for this LED device + * @current_brightness: current state of the LED device + */ +struct mt6323_led { + int id; + struct mt6323_leds *parent; + struct led_classdev cdev; + enum led_brightness current_brightness; +}; + +/** + * struct mt6323_leds - state container for holding LED controller + * of the driver + * @dev: the device pointer + * @hw: the underlying hardware providing shared + * bus for the register operations + * @lock: the lock among process context + * @led: the array that contains the state of individual + * LED device + */ +struct mt6323_leds { + struct device *dev; + struct mt6397_chip *hw; + /* protect among process context */ + struct mutex lock; + struct mt6323_led *led[MT6323_MAX_LEDS]; +}; + +static int mt6323_led_hw_brightness(struct led_classdev *cdev, + enum led_brightness brightness) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + struct regmap *regmap = leds->hw->regmap; + u32 con2_mask = 0, con2_val = 0; + int ret; + + /* + * Setup current output for the corresponding + * brightness level. + */ + con2_mask |= MT6323_ISINK_CH_STEP_MASK | + MT6323_ISINK_SFSTR0_TC_MASK | + MT6323_ISINK_SFSTR0_EN_MASK; + con2_val |= MT6323_ISINK_CH_STEP(brightness - 1) | + MT6323_ISINK_SFSTR0_TC(2) | + MT6323_ISINK_SFSTR0_EN; + + ret = regmap_update_bits(regmap, MT6323_ISINK_CON2(led->id), + con2_mask, con2_val); + return ret; +} + +static int mt6323_led_hw_off(struct led_classdev *cdev) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + struct regmap *regmap = leds->hw->regmap; + unsigned int status; + int ret; + + status = MT6323_ISINK_CH_EN(led->id); + ret = regmap_update_bits(regmap, MT6323_ISINK_EN_CTRL, + MT6323_ISINK_CH_EN_MASK(led->id), ~status); + if (ret < 0) + return ret; + + usleep_range(100, 300); + ret = regmap_update_bits(regmap, MT6323_TOP_CKPDN2, + MT6323_RG_ISINK_CK_PDN_MASK(led->id), + MT6323_RG_ISINK_CK_PDN(led->id)); + if (ret < 0) + return ret; + + return 0; +} + +static enum led_brightness +mt6323_get_led_hw_brightness(struct led_classdev *cdev) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + struct regmap *regmap = leds->hw->regmap; + unsigned int status; + int ret; + + ret = regmap_read(regmap, MT6323_TOP_CKPDN2, &status); + if (ret < 0) + return ret; + + if (status & MT6323_RG_ISINK_CK_PDN_MASK(led->id)) + return 0; + + ret = regmap_read(regmap, MT6323_ISINK_EN_CTRL, &status); + if (ret < 0) + return ret; + + if (!(status & MT6323_ISINK_CH_EN(led->id))) + return 0; + + ret = regmap_read(regmap, MT6323_ISINK_CON2(led->id), &status); + if (ret < 0) + return ret; + + return ((status & MT6323_ISINK_CH_STEP_MASK) + >> MT6323_ISINK_CH_STEP_SHIFT) + 1; +} + +static int mt6323_led_hw_on(struct led_classdev *cdev, + enum led_brightness brightness) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + struct regmap *regmap = leds->hw->regmap; + unsigned int status; + int ret; + + /* + * Setup required clock source, enable the corresponding + * clock and channel and let work with continuous blink as + * the default. + */ + ret = regmap_update_bits(regmap, MT6323_TOP_CKCON1, + MT6323_RG_ISINK_CK_SEL_MASK(led->id), 0); + if (ret < 0) + return ret; + + status = MT6323_RG_ISINK_CK_PDN(led->id); + ret = regmap_update_bits(regmap, MT6323_TOP_CKPDN2, + MT6323_RG_ISINK_CK_PDN_MASK(led->id), + ~status); + if (ret < 0) + return ret; + + usleep_range(100, 300); + + ret = regmap_update_bits(regmap, MT6323_ISINK_EN_CTRL, + MT6323_ISINK_CH_EN_MASK(led->id), + MT6323_ISINK_CH_EN(led->id)); + if (ret < 0) + return ret; + + ret = mt6323_led_hw_brightness(cdev, brightness); + if (ret < 0) + return ret; + + ret = regmap_update_bits(regmap, MT6323_ISINK_CON0(led->id), + MT6323_ISINK_DIM_DUTY_MASK, + MT6323_ISINK_DIM_DUTY(31)); + if (ret < 0) + return ret; + + ret = regmap_update_bits(regmap, MT6323_ISINK_CON1(led->id), + MT6323_ISINK_DIM_FSEL_MASK, + MT6323_ISINK_DIM_FSEL(1000)); + if (ret < 0) + return ret; + + return 0; +} + +static int mt6323_led_set_blink(struct led_classdev *cdev, + unsigned long *delay_on, + unsigned long *delay_off) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + struct regmap *regmap = leds->hw->regmap; + unsigned long period; + u8 duty_hw; + int ret; + + /* + * Units are in ms, if over the hardware able + * to support, fallback into software blink + */ + period = *delay_on + *delay_off; + + if (period > MT6323_MAX_PERIOD) + return -EINVAL; + + /* + * LED subsystem requires a default user + * friendly blink pattern for the LED so using + * 1Hz duty cycle 50% here if without specific + * value delay_on and delay off being assigned. + */ + if (!*delay_on && !*delay_off) { + *delay_on = 500; + *delay_off = 500; + } + + /* + * Calculate duty_hw based on the percentage of period during + * which the led is ON. + */ + duty_hw = MT6323_CAL_HW_DUTY(*delay_on, period); + + /* hardware doesn't support zero duty cycle. */ + if (!duty_hw) + return -EINVAL; + + mutex_lock(&leds->lock); + /* + * Set max_brightness as the software blink behavior + * when no blink brightness. + */ + if (!led->current_brightness) { + ret = mt6323_led_hw_on(cdev, cdev->max_brightness); + if (ret < 0) + goto out; + led->current_brightness = cdev->max_brightness; + } + + ret = regmap_update_bits(regmap, MT6323_ISINK_CON0(led->id), + MT6323_ISINK_DIM_DUTY_MASK, + MT6323_ISINK_DIM_DUTY(duty_hw - 1)); + if (ret < 0) + goto out; + + ret = regmap_update_bits(regmap, MT6323_ISINK_CON1(led->id), + MT6323_ISINK_DIM_FSEL_MASK, + MT6323_ISINK_DIM_FSEL(period - 1)); +out: + mutex_unlock(&leds->lock); + + return ret; +} + +static int mt6323_led_set_brightness(struct led_classdev *cdev, + enum led_brightness brightness) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + struct mt6323_leds *leds = led->parent; + int ret; + + mutex_lock(&leds->lock); + + if (!led->current_brightness && brightness) { + ret = mt6323_led_hw_on(cdev, brightness); + if (ret < 0) + goto out; + } else if (brightness) { + ret = mt6323_led_hw_brightness(cdev, brightness); + if (ret < 0) + goto out; + } else { + ret = mt6323_led_hw_off(cdev); + if (ret < 0) + goto out; + } + + led->current_brightness = brightness; +out: + mutex_unlock(&leds->lock); + + return ret; +} + +static int mt6323_led_set_dt_default(struct led_classdev *cdev, + struct device_node *np) +{ + struct mt6323_led *led = container_of(cdev, struct mt6323_led, cdev); + const char *state; + int ret = 0; + + led->cdev.name = of_get_property(np, "label", NULL) ? : np->name; + led->cdev.default_trigger = of_get_property(np, + "linux,default-trigger", + NULL); + + state = of_get_property(np, "default-state", NULL); + if (state) { + if (!strcmp(state, "keep")) { + ret = mt6323_get_led_hw_brightness(cdev); + if (ret < 0) + return ret; + led->current_brightness = ret; + ret = 0; + } else if (!strcmp(state, "on")) { + ret = + mt6323_led_set_brightness(cdev, cdev->max_brightness); + } else { + ret = mt6323_led_set_brightness(cdev, LED_OFF); + } + } + + return ret; +} + +static int mt6323_led_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = pdev->dev.of_node; + struct device_node *child; + struct mt6397_chip *hw = dev_get_drvdata(pdev->dev.parent); + struct mt6323_leds *leds; + struct mt6323_led *led; + int ret; + unsigned int status; + u32 reg; + + leds = devm_kzalloc(dev, sizeof(*leds), GFP_KERNEL); + if (!leds) + return -ENOMEM; + + platform_set_drvdata(pdev, leds); + leds->dev = dev; + + /* + * leds->hw points to the underlying bus for the register + * controlled. + */ + leds->hw = hw; + mutex_init(&leds->lock); + + status = MT6323_RG_DRV_32K_CK_PDN; + ret = regmap_update_bits(leds->hw->regmap, MT6323_TOP_CKPDN0, + MT6323_RG_DRV_32K_CK_PDN_MASK, ~status); + if (ret < 0) { + dev_err(leds->dev, + "Failed to update MT6323_TOP_CKPDN0 Register\n"); + return ret; + } + + for_each_available_child_of_node(np, child) { + ret = of_property_read_u32(child, "reg", ®); + if (ret) { + dev_err(dev, "Failed to read led 'reg' property\n"); + goto put_child_node; + } + + if (reg >= MT6323_MAX_LEDS || leds->led[reg]) { + dev_err(dev, "Invalid led reg %u\n", reg); + ret = -EINVAL; + goto put_child_node; + } + + led = devm_kzalloc(dev, sizeof(*led), GFP_KERNEL); + if (!led) { + ret = -ENOMEM; + goto put_child_node; + } + + leds->led[reg] = led; + leds->led[reg]->id = reg; + leds->led[reg]->cdev.max_brightness = MT6323_MAX_BRIGHTNESS; + leds->led[reg]->cdev.brightness_set_blocking = + mt6323_led_set_brightness; + leds->led[reg]->cdev.blink_set = mt6323_led_set_blink; + leds->led[reg]->cdev.brightness_get = + mt6323_get_led_hw_brightness; + leds->led[reg]->parent = leds; + + ret = mt6323_led_set_dt_default(&leds->led[reg]->cdev, child); + if (ret < 0) { + dev_err(leds->dev, + "Failed to LED set default from devicetree\n"); + goto put_child_node; + } + + ret = devm_led_classdev_register(dev, &leds->led[reg]->cdev); + if (ret) { + dev_err(&pdev->dev, "Failed to register LED: %d\n", + ret); + goto put_child_node; + } + leds->led[reg]->cdev.dev->of_node = child; + } + + return 0; + +put_child_node: + of_node_put(child); + return ret; +} + +static int mt6323_led_remove(struct platform_device *pdev) +{ + struct mt6323_leds *leds = platform_get_drvdata(pdev); + int i; + + /* Turn the LEDs off on driver removal. */ + for (i = 0 ; leds->led[i] ; i++) + mt6323_led_hw_off(&leds->led[i]->cdev); + + regmap_update_bits(leds->hw->regmap, MT6323_TOP_CKPDN0, + MT6323_RG_DRV_32K_CK_PDN_MASK, + MT6323_RG_DRV_32K_CK_PDN); + + mutex_destroy(&leds->lock); + + return 0; +} + +static const struct of_device_id mt6323_led_dt_match[] = { + { .compatible = "mediatek,mt6323-led" }, + {}, +}; +MODULE_DEVICE_TABLE(of, mt6323_led_dt_match); + +static struct platform_driver mt6323_led_driver = { + .probe = mt6323_led_probe, + .remove = mt6323_led_remove, + .driver = { + .name = "mt6323-led", + .of_match_table = mt6323_led_dt_match, + }, +}; + +module_platform_driver(mt6323_led_driver); + +MODULE_DESCRIPTION("LED driver for Mediatek MT6323 PMIC"); +MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c index 06e63106ae1e..7fea18b0c15d 100644 --- a/drivers/leds/leds-pca9532.c +++ b/drivers/leds/leds-pca9532.c @@ -254,6 +254,21 @@ static void pca9532_input_work(struct work_struct *work) mutex_unlock(&data->update_lock); } +static enum pca9532_state pca9532_getled(struct pca9532_led *led) +{ + struct i2c_client *client = led->client; + struct pca9532_data *data = i2c_get_clientdata(client); + u8 maxleds = data->chip_info->num_leds; + char reg; + enum pca9532_state ret; + + mutex_lock(&data->update_lock); + reg = i2c_smbus_read_byte_data(client, LED_REG(maxleds, led->id)); + ret = reg >> LED_NUM(led->id)/2; + mutex_unlock(&data->update_lock); + return ret; +} + #ifdef CONFIG_LEDS_PCA9532_GPIO static int pca9532_gpio_request_pin(struct gpio_chip *gc, unsigned offset) { @@ -366,7 +381,10 @@ static int pca9532_configure(struct i2c_client *client, gpios++; break; case PCA9532_TYPE_LED: - led->state = pled->state; + if (pled->state == PCA9532_KEEP) + led->state = pca9532_getled(led); + else + led->state = pled->state; led->name = pled->name; led->ldev.name = led->name; led->ldev.default_trigger = pled->default_trigger; @@ -456,6 +474,7 @@ pca9532_of_populate_pdata(struct device *dev, struct device_node *np) const struct of_device_id *match; int devid, maxleds; int i = 0; + const char *state; match = of_match_device(of_pca9532_leds_match, dev); if (!match) @@ -475,6 +494,12 @@ pca9532_of_populate_pdata(struct device *dev, struct device_node *np) of_property_read_u32(child, "type", &pdata->leds[i].type); of_property_read_string(child, "linux,default-trigger", &pdata->leds[i].default_trigger); + if (!of_property_read_string(child, "default-state", &state)) { + if (!strcmp(state, "on")) + pdata->leds[i].state = PCA9532_ON; + else if (!strcmp(state, "keep")) + pdata->leds[i].state = PCA9532_KEEP; + } if (++i >= maxleds) { of_node_put(child); break; diff --git a/drivers/leds/trigger/ledtrig-cpu.c b/drivers/leds/trigger/ledtrig-cpu.c index a41896468cb3..66a626091936 100644 --- a/drivers/leds/trigger/ledtrig-cpu.c +++ b/drivers/leds/trigger/ledtrig-cpu.c @@ -31,12 +31,16 @@ #define MAX_NAME_LEN 8 struct led_trigger_cpu { + bool is_active; char name[MAX_NAME_LEN]; struct led_trigger *_trig; }; static DEFINE_PER_CPU(struct led_trigger_cpu, cpu_trig); +static struct led_trigger *trig_cpu_all; +static atomic_t num_active_cpus = ATOMIC_INIT(0); + /** * ledtrig_cpu - emit a CPU event as a trigger * @evt: CPU event to be emitted @@ -47,26 +51,46 @@ static DEFINE_PER_CPU(struct led_trigger_cpu, cpu_trig); void ledtrig_cpu(enum cpu_led_event ledevt) { struct led_trigger_cpu *trig = this_cpu_ptr(&cpu_trig); + bool is_active = trig->is_active; /* Locate the correct CPU LED */ switch (ledevt) { case CPU_LED_IDLE_END: case CPU_LED_START: /* Will turn the LED on, max brightness */ - led_trigger_event(trig->_trig, LED_FULL); + is_active = true; break; case CPU_LED_IDLE_START: case CPU_LED_STOP: case CPU_LED_HALTED: /* Will turn the LED off */ - led_trigger_event(trig->_trig, LED_OFF); + is_active = false; break; default: /* Will leave the LED as it is */ break; } + + if (is_active != trig->is_active) { + unsigned int active_cpus; + unsigned int total_cpus; + + /* Update trigger state */ + trig->is_active = is_active; + atomic_add(is_active ? 1 : -1, &num_active_cpus); + active_cpus = atomic_read(&num_active_cpus); + total_cpus = num_present_cpus(); + + led_trigger_event(trig->_trig, + is_active ? LED_FULL : LED_OFF); + + + led_trigger_event(trig_cpu_all, + DIV_ROUND_UP(LED_FULL * active_cpus, total_cpus)); + + } } EXPORT_SYMBOL(ledtrig_cpu); @@ -113,6 +137,11 @@ static int __init ledtrig_cpu_init(void) BUILD_BUG_ON(CONFIG_NR_CPUS > 9999); /* + * Registering a trigger for all CPUs. + */ + led_trigger_register_simple("cpu", &trig_cpu_all); + + /* * Registering CPU led trigger for each CPU core here * ignores CPU hotplug, but after this CPU hotplug works * fine with this trigger. diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 052714106b7b..ead61a93cb4e 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -33,4 +33,13 @@ config NVM_RRPC host. The target is implemented using a linear mapping table and cost-based garbage collection. It is optimized for 4K IO sizes. +config NVM_PBLK + tristate "Physical Block Device Open-Channel SSD target" + ---help--- + Allows an open-channel SSD to be exposed as a block device to the + host. The target assumes the device exposes raw flash and must be + explicitly managed by the host. + + Please note the disk format is considered EXPERIMENTAL for now. + endif # NVM diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile index b2a39e2d2895..82d1a117fb27 100644 --- a/drivers/lightnvm/Makefile +++ b/drivers/lightnvm/Makefile @@ -4,3 +4,8 @@ obj-$(CONFIG_NVM) := core.o obj-$(CONFIG_NVM_RRPC) += rrpc.o +obj-$(CONFIG_NVM_PBLK) += pblk.o +pblk-y := pblk-init.o pblk-core.o pblk-rb.o \ + pblk-write.o pblk-cache.o pblk-read.o \ + pblk-gc.o pblk-recovery.o pblk-map.o \ + pblk-rl.o pblk-sysfs.o diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 5262ba66a7a7..54a06c3a2b8c 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -89,7 +89,7 @@ static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin, WARN_ON(!test_and_clear_bit(i, dev->lun_map)); } -static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev) +static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear) { struct nvm_dev *dev = tgt_dev->parent; struct nvm_dev_map *dev_map = tgt_dev->map; @@ -100,11 +100,14 @@ static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev) int *lun_offs = ch_map->lun_offs; int ch = i + ch_map->ch_off; - for (j = 0; j < ch_map->nr_luns; j++) { - int lun = j + lun_offs[j]; - int lunid = (ch * dev->geo.luns_per_chnl) + lun; + if (clear) { + for (j = 0; j < ch_map->nr_luns; j++) { + int lun = j + lun_offs[j]; + int lunid = (ch * dev->geo.luns_per_chnl) + lun; - WARN_ON(!test_and_clear_bit(lunid, dev->lun_map)); + WARN_ON(!test_and_clear_bit(lunid, + dev->lun_map)); + } } kfree(ch_map->lun_offs); @@ -232,6 +235,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) struct nvm_target *t; struct nvm_tgt_dev *tgt_dev; void *targetdata; + int ret; tt = nvm_find_target_type(create->tgttype, 1); if (!tt) { @@ -252,34 +256,43 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) return -ENOMEM; t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); - if (!t) + if (!t) { + ret = -ENOMEM; goto err_reserve; + } tgt_dev = nvm_create_tgt_dev(dev, s->lun_begin, s->lun_end); if (!tgt_dev) { pr_err("nvm: could not create target device\n"); + ret = -ENOMEM; goto err_t; } - tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); - if (!tqueue) + tdisk = alloc_disk(0); + if (!tdisk) { + ret = -ENOMEM; goto err_dev; - blk_queue_make_request(tqueue, tt->make_rq); + } - tdisk = alloc_disk(0); - if (!tdisk) - goto err_queue; + tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); + if (!tqueue) { + ret = -ENOMEM; + goto err_disk; + } + blk_queue_make_request(tqueue, tt->make_rq); - sprintf(tdisk->disk_name, "%s", create->tgtname); + strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name)); tdisk->flags = GENHD_FL_EXT_DEVT; tdisk->major = 0; tdisk->first_minor = 0; tdisk->fops = &nvm_fops; tdisk->queue = tqueue; - targetdata = tt->init(tgt_dev, tdisk); - if (IS_ERR(targetdata)) + targetdata = tt->init(tgt_dev, tdisk, create->flags); + if (IS_ERR(targetdata)) { + ret = PTR_ERR(targetdata); goto err_init; + } tdisk->private_data = targetdata; tqueue->queuedata = targetdata; @@ -289,8 +302,10 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) set_capacity(tdisk, tt->capacity(targetdata)); add_disk(tdisk); - if (tt->sysfs_init && tt->sysfs_init(tdisk)) + if (tt->sysfs_init && tt->sysfs_init(tdisk)) { + ret = -ENOMEM; goto err_sysfs; + } t->type = tt; t->disk = tdisk; @@ -305,16 +320,17 @@ err_sysfs: if (tt->exit) tt->exit(targetdata); err_init: - put_disk(tdisk); -err_queue: blk_cleanup_queue(tqueue); + tdisk->queue = NULL; +err_disk: + put_disk(tdisk); err_dev: - nvm_remove_tgt_dev(tgt_dev); + nvm_remove_tgt_dev(tgt_dev, 0); err_t: kfree(t); err_reserve: nvm_release_luns_err(dev, s->lun_begin, s->lun_end); - return -ENOMEM; + return ret; } static void __nvm_remove_target(struct nvm_target *t) @@ -332,7 +348,7 @@ static void __nvm_remove_target(struct nvm_target *t) if (tt->exit) tt->exit(tdisk->private_data); - nvm_remove_tgt_dev(t->dev); + nvm_remove_tgt_dev(t->dev, 1); put_disk(tdisk); list_del(&t->list); @@ -411,6 +427,18 @@ err_rmap: return -ENOMEM; } +static void nvm_unregister_map(struct nvm_dev *dev) +{ + struct nvm_dev_map *rmap = dev->rmap; + int i; + + for (i = 0; i < dev->geo.nr_chnls; i++) + kfree(rmap->chnls[i].lun_offs); + + kfree(rmap->chnls); + kfree(rmap); +} + static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) { struct nvm_dev_map *dev_map = tgt_dev->map; @@ -486,7 +514,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries, int *lun_roffs; struct ppa_addr gaddr; u64 pba = le64_to_cpu(entries[i]); - int off; u64 diff; if (!pba) @@ -496,8 +523,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries, ch_rmap = &dev_rmap->chnls[gaddr.g.ch]; lun_roffs = ch_rmap->lun_offs; - off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun; - diff = ((ch_rmap->ch_off * geo->luns_per_chnl) + (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun; @@ -590,11 +615,11 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, memset(&rqd, 0, sizeof(struct nvm_rq)); - nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); + nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1); nvm_rq_tgt_to_dev(tgt_dev, &rqd); ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); - nvm_free_rqd_ppalist(dev, &rqd); + nvm_free_rqd_ppalist(tgt_dev, &rqd); if (ret) { pr_err("nvm: failed bb mark\n"); return -EINVAL; @@ -626,34 +651,45 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) } EXPORT_SYMBOL(nvm_submit_io); -int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int flags) +static void nvm_end_io_sync(struct nvm_rq *rqd) { - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_rq rqd; - int ret; + struct completion *waiting = rqd->private; - if (!dev->ops->erase_block) - return 0; + complete(waiting); +} - nvm_map_to_dev(tgt_dev, ppas); +int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, + int nr_ppas) +{ + struct nvm_geo *geo = &tgt_dev->geo; + struct nvm_rq rqd; + int ret; + DECLARE_COMPLETION_ONSTACK(wait); memset(&rqd, 0, sizeof(struct nvm_rq)); - ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, 1, 1); + rqd.opcode = NVM_OP_ERASE; + rqd.end_io = nvm_end_io_sync; + rqd.private = &wait; + rqd.flags = geo->plane_mode >> 1; + + ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1); if (ret) return ret; - nvm_rq_tgt_to_dev(tgt_dev, &rqd); - - rqd.flags = flags; - - ret = dev->ops->erase_block(dev, &rqd); + ret = nvm_submit_io(tgt_dev, &rqd); + if (ret) { + pr_err("rrpr: erase I/O submission failed: %d\n", ret); + goto free_ppa_list; + } + wait_for_completion_io(&wait); - nvm_free_rqd_ppalist(dev, &rqd); +free_ppa_list: + nvm_free_rqd_ppalist(tgt_dev, &rqd); return ret; } -EXPORT_SYMBOL(nvm_erase_blk); +EXPORT_SYMBOL(nvm_erase_sync); int nvm_get_l2p_tbl(struct nvm_tgt_dev *tgt_dev, u64 slba, u32 nlb, nvm_l2p_update_fn *update_l2p, void *priv) @@ -732,10 +768,11 @@ void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin) } EXPORT_SYMBOL(nvm_put_area); -int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, +int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, const struct ppa_addr *ppas, int nr_ppas, int vblk) { - struct nvm_geo *geo = &dev->geo; + struct nvm_dev *dev = tgt_dev->parent; + struct nvm_geo *geo = &tgt_dev->geo; int i, plane_cnt, pl_idx; struct ppa_addr ppa; @@ -773,12 +810,12 @@ int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, } EXPORT_SYMBOL(nvm_set_rqd_ppalist); -void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd) +void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { if (!rqd->ppa_list) return; - nvm_dev_dma_free(dev, rqd->ppa_list, rqd->dma_ppa_list); + nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); } EXPORT_SYMBOL(nvm_free_rqd_ppalist); @@ -972,7 +1009,7 @@ err_fmtype: return ret; } -void nvm_free(struct nvm_dev *dev) +static void nvm_free(struct nvm_dev *dev) { if (!dev) return; @@ -980,7 +1017,7 @@ void nvm_free(struct nvm_dev *dev) if (dev->dma_pool) dev->ops->destroy_dma_pool(dev->dma_pool); - kfree(dev->rmap); + nvm_unregister_map(dev); kfree(dev->lptbl); kfree(dev->lun_map); kfree(dev); @@ -1174,13 +1211,13 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg) list_for_each_entry(dev, &nvm_devices, devices) { struct nvm_ioctl_device_info *info = &devices->info[i]; - sprintf(info->devname, "%s", dev->name); + strlcpy(info->devname, dev->name, sizeof(info->devname)); /* kept for compatibility */ info->bmversion[0] = 1; info->bmversion[1] = 0; info->bmversion[2] = 0; - sprintf(info->bmname, "%s", "gennvm"); + strlcpy(info->bmname, "gennvm", sizeof(info->bmname)); i++; if (i > 31) { @@ -1217,8 +1254,16 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg) create.tgtname[DISK_NAME_LEN - 1] = '\0'; if (create.flags != 0) { - pr_err("nvm: no flags supported\n"); - return -EINVAL; + __u32 flags = create.flags; + + /* Check for valid flags */ + if (flags & NVM_TARGET_FACTORY) + flags &= ~NVM_TARGET_FACTORY; + + if (flags) { + pr_err("nvm: flag not supported\n"); + return -EINVAL; + } } return __nvm_configure_create(&create); diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c new file mode 100644 index 000000000000..59bcea88db84 --- /dev/null +++ b/drivers/lightnvm/pblk-cache.c @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-cache.c - pblk's write cache + */ + +#include "pblk.h" + +int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags) +{ + struct pblk_w_ctx w_ctx; + sector_t lba = pblk_get_lba(bio); + unsigned int bpos, pos; + int nr_entries = pblk_get_secs(bio); + int i, ret; + + /* Update the write buffer head (mem) with the entries that we can + * write. The write in itself cannot fail, so there is no need to + * rollback from here on. + */ +retry: + ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos); + if (ret == NVM_IO_REQUEUE) { + io_schedule(); + goto retry; + } + + if (unlikely(!bio_has_data(bio))) + goto out; + + w_ctx.flags = flags; + pblk_ppa_set_empty(&w_ctx.ppa); + + for (i = 0; i < nr_entries; i++) { + void *data = bio_data(bio); + + w_ctx.lba = lba + i; + + pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i); + pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos); + + bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(nr_entries, &pblk->inflight_writes); + atomic_long_add(nr_entries, &pblk->req_writes); +#endif + +out: + pblk_write_should_kick(pblk); + return ret; +} + +/* + * On GC the incoming lbas are not necessarily sequential. Also, some of the + * lbas might not be valid entries, which are marked as empty by the GC thread + */ +int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list, + unsigned int nr_entries, unsigned int nr_rec_entries, + struct pblk_line *gc_line, unsigned long flags) +{ + struct pblk_w_ctx w_ctx; + unsigned int bpos, pos; + int i, valid_entries; + + /* Update the write buffer head (mem) with the entries that we can + * write. The write in itself cannot fail, so there is no need to + * rollback from here on. + */ +retry: + if (!pblk_rb_may_write_gc(&pblk->rwb, nr_rec_entries, &bpos)) { + io_schedule(); + goto retry; + } + + w_ctx.flags = flags; + pblk_ppa_set_empty(&w_ctx.ppa); + + for (i = 0, valid_entries = 0; i < nr_entries; i++) { + if (lba_list[i] == ADDR_EMPTY) + continue; + + w_ctx.lba = lba_list[i]; + + pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries); + pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_line, pos); + + data += PBLK_EXPOSED_PAGE_SIZE; + valid_entries++; + } + + WARN_ONCE(nr_rec_entries != valid_entries, + "pblk: inconsistent GC write\n"); + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(valid_entries, &pblk->inflight_writes); + atomic_long_add(valid_entries, &pblk->recov_gc_writes); +#endif + + pblk_write_should_kick(pblk); + return NVM_IO_OK; +} diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c new file mode 100644 index 000000000000..5e44768ccffa --- /dev/null +++ b/drivers/lightnvm/pblk-core.c @@ -0,0 +1,1667 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-core.c - pblk's core functionality + * + */ + +#include "pblk.h" +#include <linux/time.h> + +static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, + struct ppa_addr *ppa) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int pos = pblk_dev_ppa_to_pos(geo, *ppa); + + pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos); + atomic_long_inc(&pblk->erase_failed); + + atomic_dec(&line->blk_in_line); + if (test_and_set_bit(pos, line->blk_bitmap)) + pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n", + line->id, pos); + + pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb); +} + +static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct pblk_line *line; + + line = &pblk->lines[pblk_dev_ppa_to_line(rqd->ppa_addr)]; + atomic_dec(&line->left_seblks); + + if (rqd->error) { + struct ppa_addr *ppa; + + ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC); + if (!ppa) + return; + + *ppa = rqd->ppa_addr; + pblk_mark_bb(pblk, line, ppa); + } +} + +/* Erase completion assumes that only one block is erased at the time */ +static void pblk_end_io_erase(struct nvm_rq *rqd) +{ + struct pblk *pblk = rqd->private; + + up(&pblk->erase_sem); + __pblk_end_io_erase(pblk, rqd); + mempool_free(rqd, pblk->r_rq_pool); +} + +static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, + u64 paddr) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct list_head *move_list = NULL; + + /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P + * table is modified with reclaimed sectors, a check is done to endure + * that newer updates are not overwritten. + */ + spin_lock(&line->lock); + if (line->state == PBLK_LINESTATE_GC || + line->state == PBLK_LINESTATE_FREE) { + spin_unlock(&line->lock); + return; + } + + if (test_and_set_bit(paddr, line->invalid_bitmap)) { + WARN_ONCE(1, "pblk: double invalidate\n"); + spin_unlock(&line->lock); + return; + } + line->vsc--; + + if (line->state == PBLK_LINESTATE_CLOSED) + move_list = pblk_line_gc_list(pblk, line); + spin_unlock(&line->lock); + + if (move_list) { + spin_lock(&l_mg->gc_lock); + spin_lock(&line->lock); + /* Prevent moving a line that has just been chosen for GC */ + if (line->state == PBLK_LINESTATE_GC || + line->state == PBLK_LINESTATE_FREE) { + spin_unlock(&line->lock); + spin_unlock(&l_mg->gc_lock); + return; + } + spin_unlock(&line->lock); + + list_move_tail(&line->list, move_list); + spin_unlock(&l_mg->gc_lock); + } +} + +void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa) +{ + struct pblk_line *line; + u64 paddr; + int line_id; + +#ifdef CONFIG_NVM_DEBUG + /* Callers must ensure that the ppa points to a device address */ + BUG_ON(pblk_addr_in_cache(ppa)); + BUG_ON(pblk_ppa_empty(ppa)); +#endif + + line_id = pblk_tgt_ppa_to_line(ppa); + line = &pblk->lines[line_id]; + paddr = pblk_dev_ppa_to_line_addr(pblk, ppa); + + __pblk_map_invalidate(pblk, line, paddr); +} + +void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line, + u64 paddr) +{ + __pblk_map_invalidate(pblk, line, paddr); + + pblk_rb_sync_init(&pblk->rwb, NULL); + line->left_ssecs--; + if (!line->left_ssecs) + pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws); + pblk_rb_sync_end(&pblk->rwb, NULL); +} + +static void pblk_invalidate_range(struct pblk *pblk, sector_t slba, + unsigned int nr_secs) +{ + sector_t lba; + + spin_lock(&pblk->trans_lock); + for (lba = slba; lba < slba + nr_secs; lba++) { + struct ppa_addr ppa; + + ppa = pblk_trans_map_get(pblk, lba); + + if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa)) + pblk_map_invalidate(pblk, ppa); + + pblk_ppa_set_empty(&ppa); + pblk_trans_map_set(pblk, lba, ppa); + } + spin_unlock(&pblk->trans_lock); +} + +struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw) +{ + mempool_t *pool; + struct nvm_rq *rqd; + int rq_size; + + if (rw == WRITE) { + pool = pblk->w_rq_pool; + rq_size = pblk_w_rq_size; + } else { + pool = pblk->r_rq_pool; + rq_size = pblk_r_rq_size; + } + + rqd = mempool_alloc(pool, GFP_KERNEL); + memset(rqd, 0, rq_size); + + return rqd; +} + +void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw) +{ + mempool_t *pool; + + if (rw == WRITE) + pool = pblk->w_rq_pool; + else + pool = pblk->r_rq_pool; + + mempool_free(rqd, pool); +} + +void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, + int nr_pages) +{ + struct bio_vec bv; + int i; + + WARN_ON(off + nr_pages != bio->bi_vcnt); + + bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE); + for (i = off; i < nr_pages + off; i++) { + bv = bio->bi_io_vec[i]; + mempool_free(bv.bv_page, pblk->page_pool); + } +} + +int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, + int nr_pages) +{ + struct request_queue *q = pblk->dev->q; + struct page *page; + int i, ret; + + for (i = 0; i < nr_pages; i++) { + page = mempool_alloc(pblk->page_pool, flags); + if (!page) + goto err; + + ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0); + if (ret != PBLK_EXPOSED_PAGE_SIZE) { + pr_err("pblk: could not add page to bio\n"); + mempool_free(page, pblk->page_pool); + goto err; + } + } + + return 0; +err: + pblk_bio_free_pages(pblk, bio, 0, i - 1); + return -1; +} + +static void pblk_write_kick(struct pblk *pblk) +{ + wake_up_process(pblk->writer_ts); + mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000)); +} + +void pblk_write_timer_fn(unsigned long data) +{ + struct pblk *pblk = (struct pblk *)data; + + /* kick the write thread every tick to flush outstanding data */ + pblk_write_kick(pblk); +} + +void pblk_write_should_kick(struct pblk *pblk) +{ + unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb); + + if (secs_avail >= pblk->min_write_pgs) + pblk_write_kick(pblk); +} + +void pblk_end_bio_sync(struct bio *bio) +{ + struct completion *waiting = bio->bi_private; + + complete(waiting); +} + +void pblk_end_io_sync(struct nvm_rq *rqd) +{ + struct completion *waiting = rqd->private; + + complete(waiting); +} + +void pblk_flush_writer(struct pblk *pblk) +{ + struct bio *bio; + int ret; + DECLARE_COMPLETION_ONSTACK(wait); + + bio = bio_alloc(GFP_KERNEL, 1); + if (!bio) + return; + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH); + bio->bi_private = &wait; + bio->bi_end_io = pblk_end_bio_sync; + + ret = pblk_write_to_cache(pblk, bio, 0); + if (ret == NVM_IO_OK) { + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: flush cache timed out\n"); + } + } else if (ret != NVM_IO_DONE) { + pr_err("pblk: tear down bio failed\n"); + } + + if (bio->bi_error) + pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error); + + bio_put(bio); +} + +struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct list_head *move_list = NULL; + + if (!line->vsc) { + if (line->gc_group != PBLK_LINEGC_FULL) { + line->gc_group = PBLK_LINEGC_FULL; + move_list = &l_mg->gc_full_list; + } + } else if (line->vsc < lm->mid_thrs) { + if (line->gc_group != PBLK_LINEGC_HIGH) { + line->gc_group = PBLK_LINEGC_HIGH; + move_list = &l_mg->gc_high_list; + } + } else if (line->vsc < lm->high_thrs) { + if (line->gc_group != PBLK_LINEGC_MID) { + line->gc_group = PBLK_LINEGC_MID; + move_list = &l_mg->gc_mid_list; + } + } else if (line->vsc < line->sec_in_line) { + if (line->gc_group != PBLK_LINEGC_LOW) { + line->gc_group = PBLK_LINEGC_LOW; + move_list = &l_mg->gc_low_list; + } + } else if (line->vsc == line->sec_in_line) { + if (line->gc_group != PBLK_LINEGC_EMPTY) { + line->gc_group = PBLK_LINEGC_EMPTY; + move_list = &l_mg->gc_empty_list; + } + } else { + line->state = PBLK_LINESTATE_CORRUPT; + line->gc_group = PBLK_LINEGC_NONE; + move_list = &l_mg->corrupt_list; + pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n", + line->id, line->vsc, + line->sec_in_line, + lm->high_thrs, lm->mid_thrs); + } + + return move_list; +} + +void pblk_discard(struct pblk *pblk, struct bio *bio) +{ + sector_t slba = pblk_get_lba(bio); + sector_t nr_secs = pblk_get_secs(bio); + + pblk_invalidate_range(pblk, slba, nr_secs); +} + +struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba) +{ + struct ppa_addr ppa; + + spin_lock(&pblk->trans_lock); + ppa = pblk_trans_map_get(pblk, lba); + spin_unlock(&pblk->trans_lock); + + return ppa; +} + +void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd) +{ + atomic_long_inc(&pblk->write_failed); +#ifdef CONFIG_NVM_DEBUG + pblk_print_failed_rqd(pblk, rqd, rqd->error); +#endif +} + +void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd) +{ + /* Empty page read is not necessarily an error (e.g., L2P recovery) */ + if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) { + atomic_long_inc(&pblk->read_empty); + return; + } + + switch (rqd->error) { + case NVM_RSP_WARN_HIGHECC: + atomic_long_inc(&pblk->read_high_ecc); + break; + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_ERR_FAILCRC: + atomic_long_inc(&pblk->read_failed); + break; + default: + pr_err("pblk: unknown read error:%d\n", rqd->error); + } +#ifdef CONFIG_NVM_DEBUG + pblk_print_failed_rqd(pblk, rqd, rqd->error); +#endif +} + +int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + +#ifdef CONFIG_NVM_DEBUG + struct ppa_addr *ppa_list; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) { + WARN_ON(1); + return -EINVAL; + } + + if (rqd->opcode == NVM_OP_PWRITE) { + struct pblk_line *line; + struct ppa_addr ppa; + int i; + + for (i = 0; i < rqd->nr_ppas; i++) { + ppa = ppa_list[i]; + line = &pblk->lines[pblk_dev_ppa_to_line(ppa)]; + + spin_lock(&line->lock); + if (line->state != PBLK_LINESTATE_OPEN) { + pr_err("pblk: bad ppa: line:%d,state:%d\n", + line->id, line->state); + WARN_ON(1); + spin_unlock(&line->lock); + return -EINVAL; + } + spin_unlock(&line->lock); + } + } +#endif + return nvm_submit_io(dev, rqd); +} + +struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, + unsigned int nr_secs, unsigned int len, + gfp_t gfp_mask) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + void *kaddr = data; + struct page *page; + struct bio *bio; + int i, ret; + + if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META) + return bio_map_kern(dev->q, kaddr, len, gfp_mask); + + bio = bio_kmalloc(gfp_mask, nr_secs); + if (!bio) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < nr_secs; i++) { + page = vmalloc_to_page(kaddr); + if (!page) { + pr_err("pblk: could not map vmalloc bio\n"); + bio_put(bio); + bio = ERR_PTR(-ENOMEM); + goto out; + } + + ret = bio_add_pc_page(dev->q, bio, page, PAGE_SIZE, 0); + if (ret != PAGE_SIZE) { + pr_err("pblk: could not add page to bio\n"); + bio_put(bio); + bio = ERR_PTR(-ENOMEM); + goto out; + } + + kaddr += PAGE_SIZE; + } +out: + return bio; +} + +int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, + unsigned long secs_to_flush) +{ + int max = pblk->max_write_pgs; + int min = pblk->min_write_pgs; + int secs_to_sync = 0; + + if (secs_avail >= max) + secs_to_sync = max; + else if (secs_avail >= min) + secs_to_sync = min * (secs_avail / min); + else if (secs_to_flush) + secs_to_sync = min; + + return secs_to_sync; +} + +static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, + int nr_secs) +{ + u64 addr; + int i; + + /* logic error: ppa out-of-bounds. Prevent generating bad address */ + if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) { + WARN(1, "pblk: page allocation out of bounds\n"); + nr_secs = pblk->lm.sec_per_line - line->cur_sec; + } + + line->cur_sec = addr = find_next_zero_bit(line->map_bitmap, + pblk->lm.sec_per_line, line->cur_sec); + for (i = 0; i < nr_secs; i++, line->cur_sec++) + WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap)); + + return addr; +} + +u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) +{ + u64 addr; + + /* Lock needed in case a write fails and a recovery needs to remap + * failed write buffer entries + */ + spin_lock(&line->lock); + addr = __pblk_alloc_page(pblk, line, nr_secs); + line->left_msecs -= nr_secs; + WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n"); + spin_unlock(&line->lock); + + return addr; +} + +/* + * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when + * taking the per LUN semaphore. + */ +static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line, + u64 paddr, int dir) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct bio *bio; + struct nvm_rq rqd; + struct ppa_addr *ppa_list; + dma_addr_t dma_ppa_list; + void *emeta = line->emeta; + int min = pblk->min_write_pgs; + int left_ppas = lm->emeta_sec; + int id = line->id; + int rq_ppas, rq_len; + int cmd_op, bio_op; + int flags; + int i, j; + int ret; + DECLARE_COMPLETION_ONSTACK(wait); + + if (dir == WRITE) { + bio_op = REQ_OP_WRITE; + cmd_op = NVM_OP_PWRITE; + flags = pblk_set_progr_mode(pblk, WRITE); + } else if (dir == READ) { + bio_op = REQ_OP_READ; + cmd_op = NVM_OP_PREAD; + flags = pblk_set_read_mode(pblk); + } else + return -EINVAL; + + ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list); + if (!ppa_list) + return -ENOMEM; + +next_rq: + memset(&rqd, 0, sizeof(struct nvm_rq)); + + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + rq_len = rq_ppas * geo->sec_size; + + bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + goto free_rqd_dma; + } + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, bio_op, 0); + + rqd.bio = bio; + rqd.opcode = cmd_op; + rqd.flags = flags; + rqd.nr_ppas = rq_ppas; + rqd.ppa_list = ppa_list; + rqd.dma_ppa_list = dma_ppa_list; + rqd.end_io = pblk_end_io_sync; + rqd.private = &wait; + + if (dir == WRITE) { + for (i = 0; i < rqd.nr_ppas; ) { + spin_lock(&line->lock); + paddr = __pblk_alloc_page(pblk, line, min); + spin_unlock(&line->lock); + for (j = 0; j < min; j++, i++, paddr++) + rqd.ppa_list[i] = + addr_to_gen_ppa(pblk, paddr, id); + } + } else { + for (i = 0; i < rqd.nr_ppas; ) { + struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id); + int pos = pblk_dev_ppa_to_pos(geo, ppa); + + while (test_bit(pos, line->blk_bitmap)) { + paddr += min; + if (pblk_boundary_paddr_checks(pblk, paddr)) { + pr_err("pblk: corrupt emeta line:%d\n", + line->id); + bio_put(bio); + ret = -EINTR; + goto free_rqd_dma; + } + + ppa = addr_to_gen_ppa(pblk, paddr, id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + } + + if (pblk_boundary_paddr_checks(pblk, paddr + min)) { + pr_err("pblk: corrupt emeta line:%d\n", + line->id); + bio_put(bio); + ret = -EINTR; + goto free_rqd_dma; + } + + for (j = 0; j < min; j++, i++, paddr++) + rqd.ppa_list[i] = + addr_to_gen_ppa(pblk, paddr, line->id); + } + } + + ret = pblk_submit_io(pblk, &rqd); + if (ret) { + pr_err("pblk: emeta I/O submission failed: %d\n", ret); + bio_put(bio); + goto free_rqd_dma; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: emeta I/O timed out\n"); + } + reinit_completion(&wait); + + bio_put(bio); + + if (rqd.error) { + if (dir == WRITE) + pblk_log_write_err(pblk, &rqd); + else + pblk_log_read_err(pblk, &rqd); + } + + emeta += rq_len; + left_ppas -= rq_ppas; + if (left_ppas) + goto next_rq; +free_rqd_dma: + nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list); + return ret; +} + +u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + int bit; + + /* This usually only happens on bad lines */ + bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); + if (bit >= lm->blk_per_line) + return -1; + + return bit * geo->sec_per_pl; +} + +static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, + u64 paddr, int dir) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_line_meta *lm = &pblk->lm; + struct bio *bio; + struct nvm_rq rqd; + __le64 *lba_list = NULL; + int i, ret; + int cmd_op, bio_op; + int flags; + DECLARE_COMPLETION_ONSTACK(wait); + + if (dir == WRITE) { + bio_op = REQ_OP_WRITE; + cmd_op = NVM_OP_PWRITE; + flags = pblk_set_progr_mode(pblk, WRITE); + lba_list = pblk_line_emeta_to_lbas(line->emeta); + } else if (dir == READ) { + bio_op = REQ_OP_READ; + cmd_op = NVM_OP_PREAD; + flags = pblk_set_read_mode(pblk); + } else + return -EINVAL; + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &rqd.dma_ppa_list); + if (!rqd.ppa_list) + return -ENOMEM; + + bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + goto free_ppa_list; + } + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, bio_op, 0); + + rqd.bio = bio; + rqd.opcode = cmd_op; + rqd.flags = flags; + rqd.nr_ppas = lm->smeta_sec; + rqd.end_io = pblk_end_io_sync; + rqd.private = &wait; + + for (i = 0; i < lm->smeta_sec; i++, paddr++) { + rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); + if (dir == WRITE) + lba_list[paddr] = cpu_to_le64(ADDR_EMPTY); + } + + /* + * This I/O is sent by the write thread when a line is replace. Since + * the write thread is the only one sending write and erase commands, + * there is no need to take the LUN semaphore. + */ + ret = pblk_submit_io(pblk, &rqd); + if (ret) { + pr_err("pblk: smeta I/O submission failed: %d\n", ret); + bio_put(bio); + goto free_ppa_list; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: smeta I/O timed out\n"); + } + + if (rqd.error) { + if (dir == WRITE) + pblk_log_write_err(pblk, &rqd); + else + pblk_log_read_err(pblk, &rqd); + } + +free_ppa_list: + nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list); + + return ret; +} + +int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line) +{ + u64 bpaddr = pblk_line_smeta_start(pblk, line); + + return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ); +} + +int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line) +{ + return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ); +} + +static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, + struct ppa_addr ppa) +{ + rqd->opcode = NVM_OP_ERASE; + rqd->ppa_addr = ppa; + rqd->nr_ppas = 1; + rqd->flags = pblk_set_progr_mode(pblk, ERASE); + rqd->bio = NULL; +} + +static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) +{ + struct nvm_rq rqd; + int ret; + DECLARE_COMPLETION_ONSTACK(wait); + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + pblk_setup_e_rq(pblk, &rqd, ppa); + + rqd.end_io = pblk_end_io_sync; + rqd.private = &wait; + + /* The write thread schedules erases so that it minimizes disturbances + * with writes. Thus, there is no need to take the LUN semaphore. + */ + ret = pblk_submit_io(pblk, &rqd); + if (ret) { + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + + pr_err("pblk: could not sync erase line:%d,blk:%d\n", + pblk_dev_ppa_to_line(ppa), + pblk_dev_ppa_to_pos(geo, ppa)); + + rqd.error = ret; + goto out; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: sync erase timed out\n"); + } + +out: + rqd.private = pblk; + __pblk_end_io_erase(pblk, &rqd); + + return 0; +} + +int pblk_line_erase(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct ppa_addr ppa; + int bit = -1; + + /* Erase only good blocks, one at a time */ + do { + spin_lock(&line->lock); + bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line, + bit + 1); + if (bit >= lm->blk_per_line) { + spin_unlock(&line->lock); + break; + } + + ppa = pblk->luns[bit].bppa; /* set ch and lun */ + ppa.g.blk = line->id; + + atomic_dec(&line->left_eblks); + WARN_ON(test_and_set_bit(bit, line->erase_bitmap)); + spin_unlock(&line->lock); + + if (pblk_blk_erase_sync(pblk, ppa)) { + pr_err("pblk: failed to erase line %d\n", line->id); + return -ENOMEM; + } + } while (1); + + return 0; +} + +/* For now lines are always assumed full lines. Thus, smeta former and current + * lun bitmaps are omitted. + */ +static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line, + struct pblk_line *cur) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct line_smeta *smeta = line->smeta; + struct line_emeta *emeta = line->emeta; + int nr_blk_line; + + /* After erasing the line, new bad blocks might appear and we risk + * having an invalid line + */ + nr_blk_line = lm->blk_per_line - + bitmap_weight(line->blk_bitmap, lm->blk_per_line); + if (nr_blk_line < lm->min_blk_line) { + spin_lock(&l_mg->free_lock); + spin_lock(&line->lock); + line->state = PBLK_LINESTATE_BAD; + spin_unlock(&line->lock); + + list_add_tail(&line->list, &l_mg->bad_list); + spin_unlock(&l_mg->free_lock); + + pr_debug("pblk: line %d is bad\n", line->id); + + return 0; + } + + /* Run-time metadata */ + line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta); + + /* Mark LUNs allocated in this line (all for now) */ + bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len); + + smeta->header.identifier = cpu_to_le32(PBLK_MAGIC); + memcpy(smeta->header.uuid, pblk->instance_uuid, 16); + smeta->header.id = cpu_to_le32(line->id); + smeta->header.type = cpu_to_le16(line->type); + smeta->header.version = cpu_to_le16(1); + + /* Start metadata */ + smeta->seq_nr = cpu_to_le64(line->seq_nr); + smeta->window_wr_lun = cpu_to_le32(geo->nr_luns); + + /* Fill metadata among lines */ + if (cur) { + memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len); + smeta->prev_id = cpu_to_le32(cur->id); + cur->emeta->next_id = cpu_to_le32(line->id); + } else { + smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY); + } + + /* All smeta must be set at this point */ + smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta)); + smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta)); + + /* End metadata */ + memcpy(&emeta->header, &smeta->header, sizeof(struct line_header)); + emeta->seq_nr = cpu_to_le64(line->seq_nr); + emeta->nr_lbas = cpu_to_le64(line->sec_in_line); + emeta->nr_valid_lbas = cpu_to_le64(0); + emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY); + emeta->crc = cpu_to_le32(0); + emeta->prev_id = smeta->prev_id; + + return 1; +} + +/* For now lines are always assumed full lines. Thus, smeta former and current + * lun bitmaps are omitted. + */ +static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, + int init) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + int nr_bb = 0; + u64 off; + int bit = -1; + + line->sec_in_line = lm->sec_per_line; + + /* Capture bad block information on line mapping bitmaps */ + while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line, + bit + 1)) < lm->blk_per_line) { + off = bit * geo->sec_per_pl; + bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off, + lm->sec_per_line); + bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux, + lm->sec_per_line); + line->sec_in_line -= geo->sec_per_blk; + if (bit >= lm->emeta_bb) + nr_bb++; + } + + /* Mark smeta metadata sectors as bad sectors */ + bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); + off = bit * geo->sec_per_pl; +retry_smeta: + bitmap_set(line->map_bitmap, off, lm->smeta_sec); + line->sec_in_line -= lm->smeta_sec; + line->smeta_ssec = off; + line->cur_sec = off + lm->smeta_sec; + + if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) { + pr_debug("pblk: line smeta I/O failed. Retry\n"); + off += geo->sec_per_pl; + goto retry_smeta; + } + + bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line); + + /* Mark emeta metadata sectors as bad sectors. We need to consider bad + * blocks to make sure that there are enough sectors to store emeta + */ + bit = lm->sec_per_line; + off = lm->sec_per_line - lm->emeta_sec; + bitmap_set(line->invalid_bitmap, off, lm->emeta_sec); + while (nr_bb) { + off -= geo->sec_per_pl; + if (!test_bit(off, line->invalid_bitmap)) { + bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl); + nr_bb--; + } + } + + line->sec_in_line -= lm->emeta_sec; + line->emeta_ssec = off; + line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line; + + if (lm->sec_per_line - line->sec_in_line != + bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) { + spin_lock(&line->lock); + line->state = PBLK_LINESTATE_BAD; + spin_unlock(&line->lock); + + list_add_tail(&line->list, &l_mg->bad_list); + pr_err("pblk: unexpected line %d is bad\n", line->id); + + return 0; + } + + return 1; +} + +static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + int blk_in_line = atomic_read(&line->blk_in_line); + + line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC); + if (!line->map_bitmap) + return -ENOMEM; + memset(line->map_bitmap, 0, lm->sec_bitmap_len); + + /* invalid_bitmap is special since it is used when line is closed. No + * need to zeroized; it will be initialized using bb info form + * map_bitmap + */ + line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC); + if (!line->invalid_bitmap) { + mempool_free(line->map_bitmap, pblk->line_meta_pool); + return -ENOMEM; + } + + spin_lock(&line->lock); + if (line->state != PBLK_LINESTATE_FREE) { + spin_unlock(&line->lock); + WARN(1, "pblk: corrupted line state\n"); + return -EINTR; + } + line->state = PBLK_LINESTATE_OPEN; + + atomic_set(&line->left_eblks, blk_in_line); + atomic_set(&line->left_seblks, blk_in_line); + spin_unlock(&line->lock); + + /* Bad blocks do not need to be erased */ + bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line); + + kref_init(&line->ref); + + return 0; +} + +int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + int ret; + + spin_lock(&l_mg->free_lock); + l_mg->data_line = line; + list_del(&line->list); + + ret = pblk_line_prepare(pblk, line); + if (ret) { + list_add(&line->list, &l_mg->free_list); + spin_unlock(&l_mg->free_lock); + return ret; + } + spin_unlock(&l_mg->free_lock); + + pblk_rl_free_lines_dec(&pblk->rl, line); + + if (!pblk_line_init_bb(pblk, line, 0)) { + list_add(&line->list, &l_mg->free_list); + return -EINTR; + } + + return 0; +} + +void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) +{ + mempool_free(line->map_bitmap, pblk->line_meta_pool); + line->map_bitmap = NULL; + line->smeta = NULL; + line->emeta = NULL; +} + +struct pblk_line *pblk_line_get(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line *line = NULL; + int bit; + + lockdep_assert_held(&l_mg->free_lock); + +retry_get: + if (list_empty(&l_mg->free_list)) { + pr_err("pblk: no free lines\n"); + goto out; + } + + line = list_first_entry(&l_mg->free_list, struct pblk_line, list); + list_del(&line->list); + l_mg->nr_free_lines--; + + bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); + if (unlikely(bit >= lm->blk_per_line)) { + spin_lock(&line->lock); + line->state = PBLK_LINESTATE_BAD; + spin_unlock(&line->lock); + + list_add_tail(&line->list, &l_mg->bad_list); + + pr_debug("pblk: line %d is bad\n", line->id); + goto retry_get; + } + + if (pblk_line_prepare(pblk, line)) { + pr_err("pblk: failed to prepare line %d\n", line->id); + list_add(&line->list, &l_mg->free_list); + return NULL; + } + +out: + return line; +} + +static struct pblk_line *pblk_line_retry(struct pblk *pblk, + struct pblk_line *line) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *retry_line; + + spin_lock(&l_mg->free_lock); + retry_line = pblk_line_get(pblk); + if (!retry_line) { + l_mg->data_line = NULL; + spin_unlock(&l_mg->free_lock); + return NULL; + } + + retry_line->smeta = line->smeta; + retry_line->emeta = line->emeta; + retry_line->meta_line = line->meta_line; + + pblk_line_free(pblk, line); + l_mg->data_line = retry_line; + spin_unlock(&l_mg->free_lock); + + if (pblk_line_erase(pblk, retry_line)) { + spin_lock(&l_mg->free_lock); + l_mg->data_line = NULL; + spin_unlock(&l_mg->free_lock); + return NULL; + } + + pblk_rl_free_lines_dec(&pblk->rl, retry_line); + + return retry_line; +} + +struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *line; + int meta_line; + int is_next = 0; + + spin_lock(&l_mg->free_lock); + line = pblk_line_get(pblk); + if (!line) { + spin_unlock(&l_mg->free_lock); + return NULL; + } + + line->seq_nr = l_mg->d_seq_nr++; + line->type = PBLK_LINETYPE_DATA; + l_mg->data_line = line; + + meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); + set_bit(meta_line, &l_mg->meta_bitmap); + line->smeta = l_mg->sline_meta[meta_line].meta; + line->emeta = l_mg->eline_meta[meta_line].meta; + line->meta_line = meta_line; + + /* Allocate next line for preparation */ + l_mg->data_next = pblk_line_get(pblk); + if (l_mg->data_next) { + l_mg->data_next->seq_nr = l_mg->d_seq_nr++; + l_mg->data_next->type = PBLK_LINETYPE_DATA; + is_next = 1; + } + spin_unlock(&l_mg->free_lock); + + pblk_rl_free_lines_dec(&pblk->rl, line); + if (is_next) + pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); + + if (pblk_line_erase(pblk, line)) + return NULL; + +retry_setup: + if (!pblk_line_set_metadata(pblk, line, NULL)) { + line = pblk_line_retry(pblk, line); + if (!line) + return NULL; + + goto retry_setup; + } + + if (!pblk_line_init_bb(pblk, line, 1)) { + line = pblk_line_retry(pblk, line); + if (!line) + return NULL; + + goto retry_setup; + } + + return line; +} + +struct pblk_line *pblk_line_replace_data(struct pblk *pblk) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *cur, *new; + unsigned int left_seblks; + int meta_line; + int is_next = 0; + + cur = l_mg->data_line; + new = l_mg->data_next; + if (!new) + return NULL; + l_mg->data_line = new; + +retry_line: + left_seblks = atomic_read(&new->left_seblks); + if (left_seblks) { + /* If line is not fully erased, erase it */ + if (atomic_read(&new->left_eblks)) { + if (pblk_line_erase(pblk, new)) + return NULL; + } else { + io_schedule(); + } + goto retry_line; + } + + spin_lock(&l_mg->free_lock); + /* Allocate next line for preparation */ + l_mg->data_next = pblk_line_get(pblk); + if (l_mg->data_next) { + l_mg->data_next->seq_nr = l_mg->d_seq_nr++; + l_mg->data_next->type = PBLK_LINETYPE_DATA; + is_next = 1; + } + +retry_meta: + meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); + if (meta_line == PBLK_DATA_LINES) { + spin_unlock(&l_mg->free_lock); + io_schedule(); + spin_lock(&l_mg->free_lock); + goto retry_meta; + } + + set_bit(meta_line, &l_mg->meta_bitmap); + new->smeta = l_mg->sline_meta[meta_line].meta; + new->emeta = l_mg->eline_meta[meta_line].meta; + new->meta_line = meta_line; + + memset(new->smeta, 0, lm->smeta_len); + memset(new->emeta, 0, lm->emeta_len); + spin_unlock(&l_mg->free_lock); + + if (is_next) + pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); + +retry_setup: + if (!pblk_line_set_metadata(pblk, new, cur)) { + new = pblk_line_retry(pblk, new); + if (!new) + return NULL; + + goto retry_setup; + } + + if (!pblk_line_init_bb(pblk, new, 1)) { + new = pblk_line_retry(pblk, new); + if (!new) + return NULL; + + goto retry_setup; + } + + return new; +} + +void pblk_line_free(struct pblk *pblk, struct pblk_line *line) +{ + if (line->map_bitmap) + mempool_free(line->map_bitmap, pblk->line_meta_pool); + if (line->invalid_bitmap) + mempool_free(line->invalid_bitmap, pblk->line_meta_pool); + + line->map_bitmap = NULL; + line->invalid_bitmap = NULL; + line->smeta = NULL; + line->emeta = NULL; +} + +void pblk_line_put(struct kref *ref) +{ + struct pblk_line *line = container_of(ref, struct pblk_line, ref); + struct pblk *pblk = line->pblk; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + + spin_lock(&line->lock); + WARN_ON(line->state != PBLK_LINESTATE_GC); + line->state = PBLK_LINESTATE_FREE; + line->gc_group = PBLK_LINEGC_NONE; + pblk_line_free(pblk, line); + spin_unlock(&line->lock); + + spin_lock(&l_mg->free_lock); + list_add_tail(&line->list, &l_mg->free_list); + l_mg->nr_free_lines++; + spin_unlock(&l_mg->free_lock); + + pblk_rl_free_lines_inc(&pblk->rl, line); +} + +int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) +{ + struct nvm_rq *rqd; + int err; + + rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL); + memset(rqd, 0, pblk_r_rq_size); + + pblk_setup_e_rq(pblk, rqd, ppa); + + rqd->end_io = pblk_end_io_erase; + rqd->private = pblk; + + /* The write thread schedules erases so that it minimizes disturbances + * with writes. Thus, there is no need to take the LUN semaphore. + */ + err = pblk_submit_io(pblk, rqd); + if (err) { + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + + pr_err("pblk: could not async erase line:%d,blk:%d\n", + pblk_dev_ppa_to_line(ppa), + pblk_dev_ppa_to_pos(geo, ppa)); + } + + return err; +} + +struct pblk_line *pblk_line_get_data(struct pblk *pblk) +{ + return pblk->l_mg.data_line; +} + +struct pblk_line *pblk_line_get_data_next(struct pblk *pblk) +{ + return pblk->l_mg.data_next; +} + +int pblk_line_is_full(struct pblk_line *line) +{ + return (line->left_msecs == 0); +} + +void pblk_line_close(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct list_head *move_list; + + line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta)); + + if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE)) + pr_err("pblk: line %d close I/O failed\n", line->id); + + WARN(!bitmap_full(line->map_bitmap, line->sec_in_line), + "pblk: corrupt closed line %d\n", line->id); + + spin_lock(&l_mg->free_lock); + WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap)); + spin_unlock(&l_mg->free_lock); + + spin_lock(&l_mg->gc_lock); + spin_lock(&line->lock); + WARN_ON(line->state != PBLK_LINESTATE_OPEN); + line->state = PBLK_LINESTATE_CLOSED; + move_list = pblk_line_gc_list(pblk, line); + + list_add_tail(&line->list, move_list); + + mempool_free(line->map_bitmap, pblk->line_meta_pool); + line->map_bitmap = NULL; + line->smeta = NULL; + line->emeta = NULL; + + spin_unlock(&line->lock); + spin_unlock(&l_mg->gc_lock); +} + +void pblk_line_close_ws(struct work_struct *work) +{ + struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, + ws); + struct pblk *pblk = line_ws->pblk; + struct pblk_line *line = line_ws->line; + + pblk_line_close(pblk, line); + mempool_free(line_ws, pblk->line_ws_pool); +} + +void pblk_line_mark_bb(struct work_struct *work) +{ + struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, + ws); + struct pblk *pblk = line_ws->pblk; + struct nvm_tgt_dev *dev = pblk->dev; + struct ppa_addr *ppa = line_ws->priv; + int ret; + + ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); + if (ret) { + struct pblk_line *line; + int pos; + + line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)]; + pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa); + + pr_err("pblk: failed to mark bb, line:%d, pos:%d\n", + line->id, pos); + } + + kfree(ppa); + mempool_free(line_ws, pblk->line_ws_pool); +} + +void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, + void (*work)(struct work_struct *)) +{ + struct pblk_line_ws *line_ws; + + line_ws = mempool_alloc(pblk->line_ws_pool, GFP_ATOMIC); + if (!line_ws) + return; + + line_ws->pblk = pblk; + line_ws->line = line; + line_ws->priv = priv; + + INIT_WORK(&line_ws->ws, work); + queue_work(pblk->kw_wq, &line_ws->ws); +} + +void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, + unsigned long *lun_bitmap) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_lun *rlun; + int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun; + int ret; + + /* + * Only send one inflight I/O per LUN. Since we map at a page + * granurality, all ppas in the I/O will map to the same LUN + */ +#ifdef CONFIG_NVM_DEBUG + int i; + + for (i = 1; i < nr_ppas; i++) + WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun || + ppa_list[0].g.ch != ppa_list[i].g.ch); +#endif + /* If the LUN has been locked for this same request, do no attempt to + * lock it again + */ + if (test_and_set_bit(lun_id, lun_bitmap)) + return; + + rlun = &pblk->luns[lun_id]; + ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000)); + if (ret) { + switch (ret) { + case -ETIME: + pr_err("pblk: lun semaphore timed out\n"); + break; + case -EINTR: + pr_err("pblk: lun semaphore timed out\n"); + break; + } + } +} + +void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, + unsigned long *lun_bitmap) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_lun *rlun; + int nr_luns = geo->nr_luns; + int bit = -1; + + while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) { + rlun = &pblk->luns[bit]; + up(&rlun->wr_sem); + } + + kfree(lun_bitmap); +} + +void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) +{ + struct ppa_addr l2p_ppa; + + /* logic error: lba out-of-bounds. Ignore update */ + if (!(lba < pblk->rl.nr_secs)) { + WARN(1, "pblk: corrupted L2P map request\n"); + return; + } + + spin_lock(&pblk->trans_lock); + l2p_ppa = pblk_trans_map_get(pblk, lba); + + if (!pblk_addr_in_cache(l2p_ppa) && !pblk_ppa_empty(l2p_ppa)) + pblk_map_invalidate(pblk, l2p_ppa); + + pblk_trans_map_set(pblk, lba, ppa); + spin_unlock(&pblk->trans_lock); +} + +void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) +{ +#ifdef CONFIG_NVM_DEBUG + /* Callers must ensure that the ppa points to a cache address */ + BUG_ON(!pblk_addr_in_cache(ppa)); + BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa))); +#endif + + pblk_update_map(pblk, lba, ppa); +} + +int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, + struct pblk_line *gc_line) +{ + struct ppa_addr l2p_ppa; + int ret = 1; + +#ifdef CONFIG_NVM_DEBUG + /* Callers must ensure that the ppa points to a cache address */ + BUG_ON(!pblk_addr_in_cache(ppa)); + BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa))); +#endif + + /* logic error: lba out-of-bounds. Ignore update */ + if (!(lba < pblk->rl.nr_secs)) { + WARN(1, "pblk: corrupted L2P map request\n"); + return 0; + } + + spin_lock(&pblk->trans_lock); + l2p_ppa = pblk_trans_map_get(pblk, lba); + + /* Prevent updated entries to be overwritten by GC */ + if (pblk_addr_in_cache(l2p_ppa) || pblk_ppa_empty(l2p_ppa) || + pblk_tgt_ppa_to_line(l2p_ppa) != gc_line->id) { + ret = 0; + goto out; + } + + pblk_trans_map_set(pblk, lba, ppa); +out: + spin_unlock(&pblk->trans_lock); + return ret; +} + +void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, + struct ppa_addr entry_line) +{ + struct ppa_addr l2p_line; + +#ifdef CONFIG_NVM_DEBUG + /* Callers must ensure that the ppa points to a device address */ + BUG_ON(pblk_addr_in_cache(ppa)); +#endif + /* Invalidate and discard padded entries */ + if (lba == ADDR_EMPTY) { +#ifdef CONFIG_NVM_DEBUG + atomic_long_inc(&pblk->padded_wb); +#endif + pblk_map_invalidate(pblk, ppa); + return; + } + + /* logic error: lba out-of-bounds. Ignore update */ + if (!(lba < pblk->rl.nr_secs)) { + WARN(1, "pblk: corrupted L2P map request\n"); + return; + } + + spin_lock(&pblk->trans_lock); + l2p_line = pblk_trans_map_get(pblk, lba); + + /* Do not update L2P if the cacheline has been updated. In this case, + * the mapped ppa must be invalidated + */ + if (l2p_line.ppa != entry_line.ppa) { + if (!pblk_ppa_empty(ppa)) + pblk_map_invalidate(pblk, ppa); + goto out; + } + +#ifdef CONFIG_NVM_DEBUG + WARN_ON(!pblk_addr_in_cache(l2p_line) && !pblk_ppa_empty(l2p_line)); +#endif + + pblk_trans_map_set(pblk, lba, ppa); +out: + spin_unlock(&pblk->trans_lock); +} + +void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, + sector_t blba, int nr_secs) +{ + int i; + + spin_lock(&pblk->trans_lock); + for (i = 0; i < nr_secs; i++) + ppas[i] = pblk_trans_map_get(pblk, blba + i); + spin_unlock(&pblk->trans_lock); +} + +void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, + u64 *lba_list, int nr_secs) +{ + sector_t lba; + int i; + + spin_lock(&pblk->trans_lock); + for (i = 0; i < nr_secs; i++) { + lba = lba_list[i]; + if (lba == ADDR_EMPTY) { + ppas[i].ppa = ADDR_EMPTY; + } else { + /* logic error: lba out-of-bounds. Ignore update */ + if (!(lba < pblk->rl.nr_secs)) { + WARN(1, "pblk: corrupted L2P map request\n"); + continue; + } + ppas[i] = pblk_trans_map_get(pblk, lba); + } + } + spin_unlock(&pblk->trans_lock); +} diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c new file mode 100644 index 000000000000..eaf479c6b63c --- /dev/null +++ b/drivers/lightnvm/pblk-gc.c @@ -0,0 +1,555 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-gc.c - pblk's garbage collector + */ + +#include "pblk.h" +#include <linux/delay.h> + +static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) +{ + kfree(gc_rq->data); + kfree(gc_rq->lba_list); + kfree(gc_rq); +} + +static int pblk_gc_write(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + struct pblk_gc_rq *gc_rq, *tgc_rq; + LIST_HEAD(w_list); + + spin_lock(&gc->w_lock); + if (list_empty(&gc->w_list)) { + spin_unlock(&gc->w_lock); + return 1; + } + + list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) { + list_move_tail(&gc_rq->list, &w_list); + gc->w_entries--; + } + spin_unlock(&gc->w_lock); + + list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) { + pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list, + gc_rq->nr_secs, gc_rq->secs_to_gc, + gc_rq->line, PBLK_IOTYPE_GC); + + kref_put(&gc_rq->line->ref, pblk_line_put); + + list_del(&gc_rq->list); + pblk_gc_free_gc_rq(gc_rq); + } + + return 0; +} + +static void pblk_gc_writer_kick(struct pblk_gc *gc) +{ + wake_up_process(gc->gc_writer_ts); +} + +/* + * Responsible for managing all memory related to a gc request. Also in case of + * failure + */ +static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line, + u64 *lba_list, unsigned int nr_secs) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_gc *gc = &pblk->gc; + struct pblk_gc_rq *gc_rq; + void *data; + unsigned int secs_to_gc; + int ret = NVM_IO_OK; + + data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL); + if (!data) { + ret = NVM_IO_ERR; + goto free_lba_list; + } + + /* Read from GC victim block */ + if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs, + &secs_to_gc, line)) { + ret = NVM_IO_ERR; + goto free_data; + } + + if (!secs_to_gc) + goto free_data; + + gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL); + if (!gc_rq) { + ret = NVM_IO_ERR; + goto free_data; + } + + gc_rq->line = line; + gc_rq->data = data; + gc_rq->lba_list = lba_list; + gc_rq->nr_secs = nr_secs; + gc_rq->secs_to_gc = secs_to_gc; + + kref_get(&line->ref); + +retry: + spin_lock(&gc->w_lock); + if (gc->w_entries > 256) { + spin_unlock(&gc->w_lock); + usleep_range(256, 1024); + goto retry; + } + gc->w_entries++; + list_add_tail(&gc_rq->list, &gc->w_list); + spin_unlock(&gc->w_lock); + + pblk_gc_writer_kick(&pblk->gc); + + return NVM_IO_OK; + +free_data: + kfree(data); +free_lba_list: + kfree(lba_list); + + return ret; +} + +static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct list_head *move_list; + + spin_lock(&line->lock); + WARN_ON(line->state != PBLK_LINESTATE_GC); + line->state = PBLK_LINESTATE_CLOSED; + move_list = pblk_line_gc_list(pblk, line); + spin_unlock(&line->lock); + + if (move_list) { + spin_lock(&l_mg->gc_lock); + list_add_tail(&line->list, move_list); + spin_unlock(&l_mg->gc_lock); + } +} + +static void pblk_gc_line_ws(struct work_struct *work) +{ + struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, + ws); + struct pblk *pblk = line_ws->pblk; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *line = line_ws->line; + struct pblk_line_meta *lm = &pblk->lm; + __le64 *lba_list = line_ws->priv; + u64 *gc_list; + int sec_left; + int nr_ppas, bit; + int put_line = 1; + + pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id); + + spin_lock(&line->lock); + sec_left = line->vsc; + if (!sec_left) { + /* Lines are erased before being used (l_mg->data_/log_next) */ + spin_unlock(&line->lock); + goto out; + } + spin_unlock(&line->lock); + + if (sec_left < 0) { + pr_err("pblk: corrupted GC line (%d)\n", line->id); + put_line = 0; + pblk_put_line_back(pblk, line); + goto out; + } + + bit = -1; +next_rq: + gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL); + if (!gc_list) { + put_line = 0; + pblk_put_line_back(pblk, line); + goto out; + } + + nr_ppas = 0; + do { + bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line, + bit + 1); + if (bit > line->emeta_ssec) + break; + + gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]); + } while (nr_ppas < pblk->max_write_pgs); + + if (unlikely(!nr_ppas)) { + kfree(gc_list); + goto out; + } + + if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) { + pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n", + line->id, line->vsc, + nr_ppas, nr_ppas); + put_line = 0; + pblk_put_line_back(pblk, line); + goto out; + } + + sec_left -= nr_ppas; + if (sec_left > 0) + goto next_rq; + +out: + pblk_mfree(line->emeta, l_mg->emeta_alloc_type); + mempool_free(line_ws, pblk->line_ws_pool); + atomic_dec(&pblk->gc.inflight_gc); + if (put_line) + kref_put(&line->ref, pblk_line_put); +} + +static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_ws *line_ws; + __le64 *lba_list; + int ret; + + line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); + line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type, + GFP_KERNEL); + if (!line->emeta) { + pr_err("pblk: cannot use GC emeta\n"); + goto fail_free_ws; + } + + ret = pblk_line_read_emeta(pblk, line); + if (ret) { + pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret); + goto fail_free_emeta; + } + + /* If this read fails, it means that emeta is corrupted. For now, leave + * the line untouched. TODO: Implement a recovery routine that scans and + * moves all sectors on the line. + */ + lba_list = pblk_recov_get_lba_list(pblk, line->emeta); + if (!lba_list) { + pr_err("pblk: could not interpret emeta (line %d)\n", line->id); + goto fail_free_emeta; + } + + line_ws->pblk = pblk; + line_ws->line = line; + line_ws->priv = lba_list; + + INIT_WORK(&line_ws->ws, pblk_gc_line_ws); + queue_work(pblk->gc.gc_reader_wq, &line_ws->ws); + + return 0; + +fail_free_emeta: + pblk_mfree(line->emeta, l_mg->emeta_alloc_type); +fail_free_ws: + mempool_free(line_ws, pblk->line_ws_pool); + pblk_put_line_back(pblk, line); + + return 1; +} + +static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list) +{ + struct pblk_line *line, *tline; + + list_for_each_entry_safe(line, tline, gc_list, list) { + if (pblk_gc_line(pblk, line)) + pr_err("pblk: failed to GC line %d\n", line->id); + list_del(&line->list); + } +} + +/* + * Lines with no valid sectors will be returned to the free list immediately. If + * GC is activated - either because the free block count is under the determined + * threshold, or because it is being forced from user space - only lines with a + * high count of invalid sectors will be recycled. + */ +static void pblk_gc_run(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_gc *gc = &pblk->gc; + struct pblk_line *line, *tline; + unsigned int nr_blocks_free, nr_blocks_need; + struct list_head *group_list; + int run_gc, gc_group = 0; + int prev_gc = 0; + int inflight_gc = atomic_read(&gc->inflight_gc); + LIST_HEAD(gc_list); + + spin_lock(&l_mg->gc_lock); + list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) { + spin_lock(&line->lock); + WARN_ON(line->state != PBLK_LINESTATE_CLOSED); + line->state = PBLK_LINESTATE_GC; + spin_unlock(&line->lock); + + list_del(&line->list); + kref_put(&line->ref, pblk_line_put); + } + spin_unlock(&l_mg->gc_lock); + + nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl); + nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl); + run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); + +next_gc_group: + group_list = l_mg->gc_lists[gc_group++]; + spin_lock(&l_mg->gc_lock); + while (run_gc && !list_empty(group_list)) { + /* No need to queue up more GC lines than we can handle */ + if (!run_gc || inflight_gc > gc->gc_jobs_active) { + spin_unlock(&l_mg->gc_lock); + pblk_gc_lines(pblk, &gc_list); + return; + } + + line = list_first_entry(group_list, struct pblk_line, list); + nr_blocks_free += atomic_read(&line->blk_in_line); + + spin_lock(&line->lock); + WARN_ON(line->state != PBLK_LINESTATE_CLOSED); + line->state = PBLK_LINESTATE_GC; + list_move_tail(&line->list, &gc_list); + atomic_inc(&gc->inflight_gc); + inflight_gc++; + spin_unlock(&line->lock); + + prev_gc = 1; + run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); + } + spin_unlock(&l_mg->gc_lock); + + pblk_gc_lines(pblk, &gc_list); + + if (!prev_gc && pblk->rl.rb_state > gc_group && + gc_group < PBLK_NR_GC_LISTS) + goto next_gc_group; +} + + +static void pblk_gc_kick(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + wake_up_process(gc->gc_ts); + pblk_gc_writer_kick(gc); + mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); +} + +static void pblk_gc_timer(unsigned long data) +{ + struct pblk *pblk = (struct pblk *)data; + + pblk_gc_kick(pblk); +} + +static int pblk_gc_ts(void *data) +{ + struct pblk *pblk = data; + + while (!kthread_should_stop()) { + pblk_gc_run(pblk); + set_current_state(TASK_INTERRUPTIBLE); + io_schedule(); + } + + return 0; +} + +static int pblk_gc_writer_ts(void *data) +{ + struct pblk *pblk = data; + + while (!kthread_should_stop()) { + if (!pblk_gc_write(pblk)) + continue; + set_current_state(TASK_INTERRUPTIBLE); + io_schedule(); + } + + return 0; +} + +static void pblk_gc_start(struct pblk *pblk) +{ + pblk->gc.gc_active = 1; + + pr_debug("pblk: gc start\n"); +} + +int pblk_gc_status(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + int ret; + + spin_lock(&gc->lock); + ret = gc->gc_active; + spin_unlock(&gc->lock); + + return ret; +} + +static void __pblk_gc_should_start(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + lockdep_assert_held(&gc->lock); + + if (gc->gc_enabled && !gc->gc_active) + pblk_gc_start(pblk); +} + +void pblk_gc_should_start(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + spin_lock(&gc->lock); + __pblk_gc_should_start(pblk); + spin_unlock(&gc->lock); +} + +/* + * If flush_wq == 1 then no lock should be held by the caller since + * flush_workqueue can sleep + */ +static void pblk_gc_stop(struct pblk *pblk, int flush_wq) +{ + spin_lock(&pblk->gc.lock); + pblk->gc.gc_active = 0; + spin_unlock(&pblk->gc.lock); + + pr_debug("pblk: gc stop\n"); +} + +void pblk_gc_should_stop(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + if (gc->gc_active && !gc->gc_forced) + pblk_gc_stop(pblk, 0); +} + +void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, + int *gc_active) +{ + struct pblk_gc *gc = &pblk->gc; + + spin_lock(&gc->lock); + *gc_enabled = gc->gc_enabled; + *gc_active = gc->gc_active; + spin_unlock(&gc->lock); +} + +void pblk_gc_sysfs_force(struct pblk *pblk, int force) +{ + struct pblk_gc *gc = &pblk->gc; + int rsv = 0; + + spin_lock(&gc->lock); + if (force) { + gc->gc_enabled = 1; + rsv = 64; + } + pblk_rl_set_gc_rsc(&pblk->rl, rsv); + gc->gc_forced = force; + __pblk_gc_should_start(pblk); + spin_unlock(&gc->lock); +} + +int pblk_gc_init(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + int ret; + + gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts"); + if (IS_ERR(gc->gc_ts)) { + pr_err("pblk: could not allocate GC main kthread\n"); + return PTR_ERR(gc->gc_ts); + } + + gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk, + "pblk-gc-writer-ts"); + if (IS_ERR(gc->gc_writer_ts)) { + pr_err("pblk: could not allocate GC writer kthread\n"); + ret = PTR_ERR(gc->gc_writer_ts); + goto fail_free_main_kthread; + } + + setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk); + mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); + + gc->gc_active = 0; + gc->gc_forced = 0; + gc->gc_enabled = 1; + gc->gc_jobs_active = 8; + gc->w_entries = 0; + atomic_set(&gc->inflight_gc, 0); + + gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active); + if (!gc->gc_reader_wq) { + pr_err("pblk: could not allocate GC reader workqueue\n"); + ret = -ENOMEM; + goto fail_free_writer_kthread; + } + + spin_lock_init(&gc->lock); + spin_lock_init(&gc->w_lock); + INIT_LIST_HEAD(&gc->w_list); + + return 0; + +fail_free_writer_kthread: + kthread_stop(gc->gc_writer_ts); +fail_free_main_kthread: + kthread_stop(gc->gc_ts); + + return ret; +} + +void pblk_gc_exit(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + flush_workqueue(gc->gc_reader_wq); + + del_timer(&gc->gc_timer); + pblk_gc_stop(pblk, 1); + + if (gc->gc_ts) + kthread_stop(gc->gc_ts); + + if (pblk->gc.gc_reader_wq) + destroy_workqueue(pblk->gc.gc_reader_wq); + + if (gc->gc_writer_ts) + kthread_stop(gc->gc_writer_ts); +} diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c new file mode 100644 index 000000000000..ae8cd6d5af8b --- /dev/null +++ b/drivers/lightnvm/pblk-init.c @@ -0,0 +1,962 @@ +/* + * Copyright (C) 2015 IT University of Copenhagen (rrpc.c) + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Implementation of a physical block-device target for Open-channel SSDs. + * + * pblk-init.c - pblk's initialization. + */ + +#include "pblk.h" + +static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache, + *pblk_w_rq_cache, *pblk_line_meta_cache; +static DECLARE_RWSEM(pblk_lock); + +static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, + struct bio *bio) +{ + int ret; + + /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap + * constraint. Writes can be of arbitrary size. + */ + if (bio_data_dir(bio) == READ) { + blk_queue_split(q, &bio, q->bio_split); + ret = pblk_submit_read(pblk, bio); + if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED)) + bio_put(bio); + + return ret; + } + + /* Prevent deadlock in the case of a modest LUN configuration and large + * user I/Os. Unless stalled, the rate limiter leaves at least 256KB + * available for user I/O. + */ + if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl))) + blk_queue_split(q, &bio, q->bio_split); + + return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); +} + +static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) +{ + struct pblk *pblk = q->queuedata; + + if (bio_op(bio) == REQ_OP_DISCARD) { + pblk_discard(pblk, bio); + if (!(bio->bi_opf & REQ_PREFLUSH)) { + bio_endio(bio); + return BLK_QC_T_NONE; + } + } + + switch (pblk_rw_io(q, pblk, bio)) { + case NVM_IO_ERR: + bio_io_error(bio); + break; + case NVM_IO_DONE: + bio_endio(bio); + break; + } + + return BLK_QC_T_NONE; +} + +static void pblk_l2p_free(struct pblk *pblk) +{ + vfree(pblk->trans_map); +} + +static int pblk_l2p_init(struct pblk *pblk) +{ + sector_t i; + struct ppa_addr ppa; + int entry_size = 8; + + if (pblk->ppaf_bitsize < 32) + entry_size = 4; + + pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs); + if (!pblk->trans_map) + return -ENOMEM; + + pblk_ppa_set_empty(&ppa); + + for (i = 0; i < pblk->rl.nr_secs; i++) + pblk_trans_map_set(pblk, i, ppa); + + return 0; +} + +static void pblk_rwb_free(struct pblk *pblk) +{ + if (pblk_rb_tear_down_check(&pblk->rwb)) + pr_err("pblk: write buffer error on tear down\n"); + + pblk_rb_data_free(&pblk->rwb); + vfree(pblk_rb_entries_ref(&pblk->rwb)); +} + +static int pblk_rwb_init(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_rb_entry *entries; + unsigned long nr_entries; + unsigned int power_size, power_seg_sz; + + nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer); + + entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry)); + if (!entries) + return -ENOMEM; + + power_size = get_count_order(nr_entries); + power_seg_sz = get_count_order(geo->sec_size); + + return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz); +} + +/* Minimum pages needed within a lun */ +#define PAGE_POOL_SIZE 16 +#define ADDR_POOL_SIZE 64 + +static int pblk_set_ppaf(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct nvm_addr_format ppaf = geo->ppaf; + int power_len; + + /* Re-calculate channel and lun format to adapt to configuration */ + power_len = get_count_order(geo->nr_chnls); + if (1 << power_len != geo->nr_chnls) { + pr_err("pblk: supports only power-of-two channel config.\n"); + return -EINVAL; + } + ppaf.ch_len = power_len; + + power_len = get_count_order(geo->luns_per_chnl); + if (1 << power_len != geo->luns_per_chnl) { + pr_err("pblk: supports only power-of-two LUN config.\n"); + return -EINVAL; + } + ppaf.lun_len = power_len; + + pblk->ppaf.sec_offset = 0; + pblk->ppaf.pln_offset = ppaf.sect_len; + pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len; + pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len; + pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len; + pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len; + pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1; + pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) << + pblk->ppaf.pln_offset; + pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) << + pblk->ppaf.ch_offset; + pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) << + pblk->ppaf.lun_offset; + pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) << + pblk->ppaf.pg_offset; + pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) << + pblk->ppaf.blk_offset; + + pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len; + + return 0; +} + +static int pblk_init_global_caches(struct pblk *pblk) +{ + char cache_name[PBLK_CACHE_NAME_LEN]; + + down_write(&pblk_lock); + pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws", + sizeof(struct pblk_line_ws), 0, 0, NULL); + if (!pblk_blk_ws_cache) { + up_write(&pblk_lock); + return -ENOMEM; + } + + pblk_rec_cache = kmem_cache_create("pblk_rec", + sizeof(struct pblk_rec_ctx), 0, 0, NULL); + if (!pblk_rec_cache) { + kmem_cache_destroy(pblk_blk_ws_cache); + up_write(&pblk_lock); + return -ENOMEM; + } + + pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size, + 0, 0, NULL); + if (!pblk_r_rq_cache) { + kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + up_write(&pblk_lock); + return -ENOMEM; + } + + pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, + 0, 0, NULL); + if (!pblk_w_rq_cache) { + kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + kmem_cache_destroy(pblk_r_rq_cache); + up_write(&pblk_lock); + return -ENOMEM; + } + + snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s", + pblk->disk->disk_name); + pblk_line_meta_cache = kmem_cache_create(cache_name, + pblk->lm.sec_bitmap_len, 0, 0, NULL); + if (!pblk_line_meta_cache) { + kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + kmem_cache_destroy(pblk_r_rq_cache); + kmem_cache_destroy(pblk_w_rq_cache); + up_write(&pblk_lock); + return -ENOMEM; + } + up_write(&pblk_lock); + + return 0; +} + +static int pblk_core_init(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int max_write_ppas; + int mod; + + pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE); + max_write_ppas = pblk->min_write_pgs * geo->nr_luns; + pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ? + max_write_ppas : nvm_max_phys_sects(dev); + pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg * + geo->nr_planes * geo->nr_luns; + + if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) { + pr_err("pblk: cannot support device max_phys_sect\n"); + return -EINVAL; + } + + div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod); + if (mod) { + pr_err("pblk: bad configuration of sectors/pages\n"); + return -EINVAL; + } + + if (pblk_init_global_caches(pblk)) + return -ENOMEM; + + pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0); + if (!pblk->page_pool) + return -ENOMEM; + + pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns, + pblk_blk_ws_cache); + if (!pblk->line_ws_pool) + goto free_page_pool; + + pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache); + if (!pblk->rec_pool) + goto free_blk_ws_pool; + + pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache); + if (!pblk->r_rq_pool) + goto free_rec_pool; + + pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache); + if (!pblk->w_rq_pool) + goto free_r_rq_pool; + + pblk->line_meta_pool = + mempool_create_slab_pool(16, pblk_line_meta_cache); + if (!pblk->line_meta_pool) + goto free_w_rq_pool; + + pblk->kw_wq = alloc_workqueue("pblk-aux-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, 1); + if (!pblk->kw_wq) + goto free_line_meta_pool; + + if (pblk_set_ppaf(pblk)) + goto free_kw_wq; + + if (pblk_rwb_init(pblk)) + goto free_kw_wq; + + INIT_LIST_HEAD(&pblk->compl_list); + return 0; + +free_kw_wq: + destroy_workqueue(pblk->kw_wq); +free_line_meta_pool: + mempool_destroy(pblk->line_meta_pool); +free_w_rq_pool: + mempool_destroy(pblk->w_rq_pool); +free_r_rq_pool: + mempool_destroy(pblk->r_rq_pool); +free_rec_pool: + mempool_destroy(pblk->rec_pool); +free_blk_ws_pool: + mempool_destroy(pblk->line_ws_pool); +free_page_pool: + mempool_destroy(pblk->page_pool); + return -ENOMEM; +} + +static void pblk_core_free(struct pblk *pblk) +{ + if (pblk->kw_wq) + destroy_workqueue(pblk->kw_wq); + + mempool_destroy(pblk->page_pool); + mempool_destroy(pblk->line_ws_pool); + mempool_destroy(pblk->rec_pool); + mempool_destroy(pblk->r_rq_pool); + mempool_destroy(pblk->w_rq_pool); + mempool_destroy(pblk->line_meta_pool); + + kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + kmem_cache_destroy(pblk_r_rq_cache); + kmem_cache_destroy(pblk_w_rq_cache); + kmem_cache_destroy(pblk_line_meta_cache); +} + +static void pblk_luns_free(struct pblk *pblk) +{ + kfree(pblk->luns); +} + +static void pblk_lines_free(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *line; + int i; + + spin_lock(&l_mg->free_lock); + for (i = 0; i < l_mg->nr_lines; i++) { + line = &pblk->lines[i]; + + pblk_line_free(pblk, line); + kfree(line->blk_bitmap); + kfree(line->erase_bitmap); + } + spin_unlock(&l_mg->free_lock); +} + +static void pblk_line_meta_free(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + int i; + + kfree(l_mg->bb_template); + kfree(l_mg->bb_aux); + + for (i = 0; i < PBLK_DATA_LINES; i++) { + pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type); + pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type); + } + + kfree(pblk->lines); +} + +static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun) +{ + struct nvm_geo *geo = &dev->geo; + struct ppa_addr ppa; + u8 *blks; + int nr_blks, ret; + + nr_blks = geo->blks_per_lun * geo->plane_mode; + blks = kmalloc(nr_blks, GFP_KERNEL); + if (!blks) + return -ENOMEM; + + ppa.ppa = 0; + ppa.g.ch = rlun->bppa.g.ch; + ppa.g.lun = rlun->bppa.g.lun; + + ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); + if (ret) + goto out; + + nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); + if (nr_blks < 0) { + ret = nr_blks; + goto out; + } + + rlun->bb_list = blks; + + return 0; +out: + kfree(blks); + return ret; +} + +static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_lun *rlun; + int bb_cnt = 0; + int i; + + line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); + if (!line->blk_bitmap) + return -ENOMEM; + + line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); + if (!line->erase_bitmap) { + kfree(line->blk_bitmap); + return -ENOMEM; + } + + for (i = 0; i < lm->blk_per_line; i++) { + rlun = &pblk->luns[i]; + if (rlun->bb_list[line->id] == NVM_BLK_T_FREE) + continue; + + set_bit(i, line->blk_bitmap); + bb_cnt++; + } + + return bb_cnt; +} + +static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_lun *rlun; + int i, ret; + + /* TODO: Implement unbalanced LUN support */ + if (geo->luns_per_chnl < 0) { + pr_err("pblk: unbalanced LUN config.\n"); + return -EINVAL; + } + + pblk->luns = kcalloc(geo->nr_luns, sizeof(struct pblk_lun), GFP_KERNEL); + if (!pblk->luns) + return -ENOMEM; + + for (i = 0; i < geo->nr_luns; i++) { + /* Stripe across channels */ + int ch = i % geo->nr_chnls; + int lun_raw = i / geo->nr_chnls; + int lunid = lun_raw + ch * geo->luns_per_chnl; + + rlun = &pblk->luns[i]; + rlun->bppa = luns[lunid]; + + sema_init(&rlun->wr_sem, 1); + + ret = pblk_bb_discovery(dev, rlun); + if (ret) { + while (--i >= 0) + kfree(pblk->luns[i].bb_list); + return ret; + } + } + + return 0; +} + +static int pblk_lines_configure(struct pblk *pblk, int flags) +{ + struct pblk_line *line = NULL; + int ret = 0; + + if (!(flags & NVM_TARGET_FACTORY)) { + line = pblk_recov_l2p(pblk); + if (IS_ERR(line)) { + pr_err("pblk: could not recover l2p table\n"); + ret = -EFAULT; + } + } + + if (!line) { + /* Configure next line for user data */ + line = pblk_line_get_first_data(pblk); + if (!line) { + pr_err("pblk: line list corrupted\n"); + ret = -EFAULT; + } + } + + return ret; +} + +/* See comment over struct line_emeta definition */ +static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm) +{ + return (sizeof(struct line_emeta) + + ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) + + (pblk->l_mg.nr_lines * sizeof(u32)) + + lm->blk_bitmap_len); +} + +static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + sector_t provisioned; + + pblk->over_pct = 20; + + provisioned = nr_free_blks; + provisioned *= (100 - pblk->over_pct); + sector_div(provisioned, 100); + + /* Internally pblk manages all free blocks, but all calculations based + * on user capacity consider only provisioned blocks + */ + pblk->rl.total_blocks = nr_free_blks; + pblk->rl.nr_secs = nr_free_blks * geo->sec_per_blk; + pblk->capacity = provisioned * geo->sec_per_blk; + atomic_set(&pblk->rl.free_blocks, nr_free_blks); +} + +static int pblk_lines_init(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line *line; + unsigned int smeta_len, emeta_len; + long nr_bad_blks, nr_meta_blks, nr_free_blks; + int bb_distance; + int i; + int ret; + + lm->sec_per_line = geo->sec_per_blk * geo->nr_luns; + lm->blk_per_line = geo->nr_luns; + lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); + lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); + lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); + lm->high_thrs = lm->sec_per_line / 2; + lm->mid_thrs = lm->sec_per_line / 4; + + /* Calculate necessary pages for smeta. See comment over struct + * line_smeta definition + */ + lm->smeta_len = sizeof(struct line_smeta) + + PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len; + + i = 1; +add_smeta_page: + lm->smeta_sec = i * geo->sec_per_pl; + lm->smeta_len = lm->smeta_sec * geo->sec_size; + + smeta_len = sizeof(struct line_smeta) + + PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len; + if (smeta_len > lm->smeta_len) { + i++; + goto add_smeta_page; + } + + /* Calculate necessary pages for emeta. See comment over struct + * line_emeta definition + */ + i = 1; +add_emeta_page: + lm->emeta_sec = i * geo->sec_per_pl; + lm->emeta_len = lm->emeta_sec * geo->sec_size; + + emeta_len = calc_emeta_len(pblk, lm); + if (emeta_len > lm->emeta_len) { + i++; + goto add_emeta_page; + } + lm->emeta_bb = geo->nr_luns - i; + + nr_meta_blks = (lm->smeta_sec + lm->emeta_sec + + (geo->sec_per_blk / 2)) / geo->sec_per_blk; + lm->min_blk_line = nr_meta_blks + 1; + + l_mg->nr_lines = geo->blks_per_lun; + l_mg->log_line = l_mg->data_line = NULL; + l_mg->l_seq_nr = l_mg->d_seq_nr = 0; + l_mg->nr_free_lines = 0; + bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES); + + /* smeta is always small enough to fit on a kmalloc memory allocation, + * emeta depends on the number of LUNs allocated to the pblk instance + */ + l_mg->smeta_alloc_type = PBLK_KMALLOC_META; + for (i = 0; i < PBLK_DATA_LINES; i++) { + l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL); + if (!l_mg->sline_meta[i].meta) + while (--i >= 0) { + kfree(l_mg->sline_meta[i].meta); + ret = -ENOMEM; + goto fail; + } + } + + if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) { + l_mg->emeta_alloc_type = PBLK_VMALLOC_META; + + for (i = 0; i < PBLK_DATA_LINES; i++) { + l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len); + if (!l_mg->eline_meta[i].meta) + while (--i >= 0) { + vfree(l_mg->eline_meta[i].meta); + ret = -ENOMEM; + goto fail; + } + } + } else { + l_mg->emeta_alloc_type = PBLK_KMALLOC_META; + + for (i = 0; i < PBLK_DATA_LINES; i++) { + l_mg->eline_meta[i].meta = + kmalloc(lm->emeta_len, GFP_KERNEL); + if (!l_mg->eline_meta[i].meta) + while (--i >= 0) { + kfree(l_mg->eline_meta[i].meta); + ret = -ENOMEM; + goto fail; + } + } + } + + l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!l_mg->bb_template) { + ret = -ENOMEM; + goto fail_free_meta; + } + + l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!l_mg->bb_aux) { + ret = -ENOMEM; + goto fail_free_bb_template; + } + + bb_distance = (geo->nr_luns) * geo->sec_per_pl; + for (i = 0; i < lm->sec_per_line; i += bb_distance) + bitmap_set(l_mg->bb_template, i, geo->sec_per_pl); + + INIT_LIST_HEAD(&l_mg->free_list); + INIT_LIST_HEAD(&l_mg->corrupt_list); + INIT_LIST_HEAD(&l_mg->bad_list); + INIT_LIST_HEAD(&l_mg->gc_full_list); + INIT_LIST_HEAD(&l_mg->gc_high_list); + INIT_LIST_HEAD(&l_mg->gc_mid_list); + INIT_LIST_HEAD(&l_mg->gc_low_list); + INIT_LIST_HEAD(&l_mg->gc_empty_list); + + l_mg->gc_lists[0] = &l_mg->gc_high_list; + l_mg->gc_lists[1] = &l_mg->gc_mid_list; + l_mg->gc_lists[2] = &l_mg->gc_low_list; + + spin_lock_init(&l_mg->free_lock); + spin_lock_init(&l_mg->gc_lock); + + pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), + GFP_KERNEL); + if (!pblk->lines) { + ret = -ENOMEM; + goto fail_free_bb_aux; + } + + nr_free_blks = 0; + for (i = 0; i < l_mg->nr_lines; i++) { + int blk_in_line; + + line = &pblk->lines[i]; + + line->pblk = pblk; + line->id = i; + line->type = PBLK_LINETYPE_FREE; + line->state = PBLK_LINESTATE_FREE; + line->gc_group = PBLK_LINEGC_NONE; + spin_lock_init(&line->lock); + + nr_bad_blks = pblk_bb_line(pblk, line); + if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) { + ret = -EINVAL; + goto fail_free_lines; + } + + blk_in_line = lm->blk_per_line - nr_bad_blks; + if (blk_in_line < lm->min_blk_line) { + line->state = PBLK_LINESTATE_BAD; + list_add_tail(&line->list, &l_mg->bad_list); + continue; + } + + nr_free_blks += blk_in_line; + atomic_set(&line->blk_in_line, blk_in_line); + + l_mg->nr_free_lines++; + list_add_tail(&line->list, &l_mg->free_list); + } + + pblk_set_provision(pblk, nr_free_blks); + + sema_init(&pblk->erase_sem, 1); + + /* Cleanup per-LUN bad block lists - managed within lines on run-time */ + for (i = 0; i < geo->nr_luns; i++) + kfree(pblk->luns[i].bb_list); + + return 0; +fail_free_lines: + kfree(pblk->lines); +fail_free_bb_aux: + kfree(l_mg->bb_aux); +fail_free_bb_template: + kfree(l_mg->bb_template); +fail_free_meta: + for (i = 0; i < PBLK_DATA_LINES; i++) { + pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type); + pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type); + } +fail: + for (i = 0; i < geo->nr_luns; i++) + kfree(pblk->luns[i].bb_list); + + return ret; +} + +static int pblk_writer_init(struct pblk *pblk) +{ + setup_timer(&pblk->wtimer, pblk_write_timer_fn, (unsigned long)pblk); + mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100)); + + pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t"); + if (IS_ERR(pblk->writer_ts)) { + pr_err("pblk: could not allocate writer kthread\n"); + return PTR_ERR(pblk->writer_ts); + } + + return 0; +} + +static void pblk_writer_stop(struct pblk *pblk) +{ + if (pblk->writer_ts) + kthread_stop(pblk->writer_ts); + del_timer(&pblk->wtimer); +} + +static void pblk_free(struct pblk *pblk) +{ + pblk_luns_free(pblk); + pblk_lines_free(pblk); + pblk_line_meta_free(pblk); + pblk_core_free(pblk); + pblk_l2p_free(pblk); + + kfree(pblk); +} + +static void pblk_tear_down(struct pblk *pblk) +{ + pblk_flush_writer(pblk); + pblk_writer_stop(pblk); + pblk_rb_sync_l2p(&pblk->rwb); + pblk_recov_pad(pblk); + pblk_rwb_free(pblk); + pblk_rl_free(&pblk->rl); + + pr_debug("pblk: consistent tear down\n"); +} + +static void pblk_exit(void *private) +{ + struct pblk *pblk = private; + + down_write(&pblk_lock); + pblk_gc_exit(pblk); + pblk_tear_down(pblk); + pblk_free(pblk); + up_write(&pblk_lock); +} + +static sector_t pblk_capacity(void *private) +{ + struct pblk *pblk = private; + + return pblk->capacity * NR_PHY_IN_LOG; +} + +static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, + int flags) +{ + struct nvm_geo *geo = &dev->geo; + struct request_queue *bqueue = dev->q; + struct request_queue *tqueue = tdisk->queue; + struct pblk *pblk; + int ret; + + if (dev->identity.dom & NVM_RSP_L2P) { + pr_err("pblk: device-side L2P table not supported. (%x)\n", + dev->identity.dom); + return ERR_PTR(-EINVAL); + } + + pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL); + if (!pblk) + return ERR_PTR(-ENOMEM); + + pblk->dev = dev; + pblk->disk = tdisk; + + spin_lock_init(&pblk->trans_lock); + spin_lock_init(&pblk->lock); + + if (flags & NVM_TARGET_FACTORY) + pblk_setup_uuid(pblk); + +#ifdef CONFIG_NVM_DEBUG + atomic_long_set(&pblk->inflight_writes, 0); + atomic_long_set(&pblk->padded_writes, 0); + atomic_long_set(&pblk->padded_wb, 0); + atomic_long_set(&pblk->nr_flush, 0); + atomic_long_set(&pblk->req_writes, 0); + atomic_long_set(&pblk->sub_writes, 0); + atomic_long_set(&pblk->sync_writes, 0); + atomic_long_set(&pblk->compl_writes, 0); + atomic_long_set(&pblk->inflight_reads, 0); + atomic_long_set(&pblk->sync_reads, 0); + atomic_long_set(&pblk->recov_writes, 0); + atomic_long_set(&pblk->recov_writes, 0); + atomic_long_set(&pblk->recov_gc_writes, 0); +#endif + + atomic_long_set(&pblk->read_failed, 0); + atomic_long_set(&pblk->read_empty, 0); + atomic_long_set(&pblk->read_high_ecc, 0); + atomic_long_set(&pblk->read_failed_gc, 0); + atomic_long_set(&pblk->write_failed, 0); + atomic_long_set(&pblk->erase_failed, 0); + + ret = pblk_luns_init(pblk, dev->luns); + if (ret) { + pr_err("pblk: could not initialize luns\n"); + goto fail; + } + + ret = pblk_lines_init(pblk); + if (ret) { + pr_err("pblk: could not initialize lines\n"); + goto fail_free_luns; + } + + ret = pblk_core_init(pblk); + if (ret) { + pr_err("pblk: could not initialize core\n"); + goto fail_free_line_meta; + } + + ret = pblk_l2p_init(pblk); + if (ret) { + pr_err("pblk: could not initialize maps\n"); + goto fail_free_core; + } + + ret = pblk_lines_configure(pblk, flags); + if (ret) { + pr_err("pblk: could not configure lines\n"); + goto fail_free_l2p; + } + + ret = pblk_writer_init(pblk); + if (ret) { + pr_err("pblk: could not initialize write thread\n"); + goto fail_free_lines; + } + + ret = pblk_gc_init(pblk); + if (ret) { + pr_err("pblk: could not initialize gc\n"); + goto fail_stop_writer; + } + + /* inherit the size from the underlying device */ + blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue)); + blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue)); + + blk_queue_write_cache(tqueue, true, false); + + tqueue->limits.discard_granularity = geo->pgs_per_blk * geo->pfpg_size; + tqueue->limits.discard_alignment = 0; + blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue); + + pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n", + geo->nr_luns, pblk->l_mg.nr_lines, + (unsigned long long)pblk->rl.nr_secs, + pblk->rwb.nr_entries); + + wake_up_process(pblk->writer_ts); + return pblk; + +fail_stop_writer: + pblk_writer_stop(pblk); +fail_free_lines: + pblk_lines_free(pblk); +fail_free_l2p: + pblk_l2p_free(pblk); +fail_free_core: + pblk_core_free(pblk); +fail_free_line_meta: + pblk_line_meta_free(pblk); +fail_free_luns: + pblk_luns_free(pblk); +fail: + kfree(pblk); + return ERR_PTR(ret); +} + +/* physical block device target */ +static struct nvm_tgt_type tt_pblk = { + .name = "pblk", + .version = {1, 0, 0}, + + .make_rq = pblk_make_rq, + .capacity = pblk_capacity, + + .init = pblk_init, + .exit = pblk_exit, + + .sysfs_init = pblk_sysfs_init, + .sysfs_exit = pblk_sysfs_exit, +}; + +static int __init pblk_module_init(void) +{ + return nvm_register_tgt_type(&tt_pblk); +} + +static void pblk_module_exit(void) +{ + nvm_unregister_tgt_type(&tt_pblk); +} + +module_init(pblk_module_init); +module_exit(pblk_module_exit); +MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>"); +MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>"); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs"); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c new file mode 100644 index 000000000000..17c16955284d --- /dev/null +++ b/drivers/lightnvm/pblk-map.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-map.c - pblk's lba-ppa mapping strategy + * + */ + +#include "pblk.h" + +static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, + struct ppa_addr *ppa_list, + unsigned long *lun_bitmap, + struct pblk_sec_meta *meta_list, + unsigned int valid_secs) +{ + struct pblk_line *line = pblk_line_get_data(pblk); + struct line_emeta *emeta = line->emeta; + struct pblk_w_ctx *w_ctx; + __le64 *lba_list = pblk_line_emeta_to_lbas(emeta); + u64 paddr; + int nr_secs = pblk->min_write_pgs; + int i; + + paddr = pblk_alloc_page(pblk, line, nr_secs); + + for (i = 0; i < nr_secs; i++, paddr++) { + /* ppa to be sent to the device */ + ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); + + /* Write context for target bio completion on write buffer. Note + * that the write buffer is protected by the sync backpointer, + * and a single writer thread have access to each specific entry + * at a time. Thus, it is safe to modify the context for the + * entry we are setting up for submission without taking any + * lock or memory barrier. + */ + if (i < valid_secs) { + kref_get(&line->ref); + w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i); + w_ctx->ppa = ppa_list[i]; + meta_list[i].lba = cpu_to_le64(w_ctx->lba); + lba_list[paddr] = cpu_to_le64(w_ctx->lba); + le64_add_cpu(&line->emeta->nr_valid_lbas, 1); + } else { + meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); + lba_list[paddr] = cpu_to_le64(ADDR_EMPTY); + pblk_map_pad_invalidate(pblk, line, paddr); + } + } + + if (pblk_line_is_full(line)) { + line = pblk_line_replace_data(pblk); + if (!line) + return; + } + + pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap); +} + +void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, + unsigned long *lun_bitmap, unsigned int valid_secs, + unsigned int off) +{ + struct pblk_sec_meta *meta_list = rqd->meta_list; + unsigned int map_secs; + int min = pblk->min_write_pgs; + int i; + + for (i = off; i < rqd->nr_ppas; i += min) { + map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; + pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + lun_bitmap, &meta_list[i], map_secs); + } +} + +/* only if erase_ppa is set, acquire erase semaphore */ +void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, + unsigned int sentry, unsigned long *lun_bitmap, + unsigned int valid_secs, struct ppa_addr *erase_ppa) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line *e_line = pblk_line_get_data_next(pblk); + struct pblk_sec_meta *meta_list = rqd->meta_list; + unsigned int map_secs; + int min = pblk->min_write_pgs; + int i, erase_lun; + + for (i = 0; i < rqd->nr_ppas; i += min) { + map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; + pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + lun_bitmap, &meta_list[i], map_secs); + + erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls + + rqd->ppa_list[i].g.ch; + + if (!test_bit(erase_lun, e_line->erase_bitmap)) { + if (down_trylock(&pblk->erase_sem)) + continue; + + set_bit(erase_lun, e_line->erase_bitmap); + atomic_dec(&e_line->left_eblks); + *erase_ppa = rqd->ppa_list[i]; + erase_ppa->g.blk = e_line->id; + + /* Avoid evaluating e_line->left_eblks */ + return pblk_map_rq(pblk, rqd, sentry, lun_bitmap, + valid_secs, i + min); + } + } + + /* Erase blocks that are bad in this line but might not be in next */ + if (unlikely(ppa_empty(*erase_ppa))) { + struct pblk_line_meta *lm = &pblk->lm; + + i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line); + if (i == lm->blk_per_line) + return; + + set_bit(i, e_line->erase_bitmap); + atomic_dec(&e_line->left_eblks); + *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */ + erase_ppa->g.blk = e_line->id; + } +} diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c new file mode 100644 index 000000000000..045384ddc1f9 --- /dev/null +++ b/drivers/lightnvm/pblk-rb.c @@ -0,0 +1,852 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * + * Based upon the circular ringbuffer. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-rb.c - pblk's write buffer + */ + +#include <linux/circ_buf.h> + +#include "pblk.h" + +static DECLARE_RWSEM(pblk_rb_lock); + +void pblk_rb_data_free(struct pblk_rb *rb) +{ + struct pblk_rb_pages *p, *t; + + down_write(&pblk_rb_lock); + list_for_each_entry_safe(p, t, &rb->pages, list) { + free_pages((unsigned long)page_address(p->pages), p->order); + list_del(&p->list); + kfree(p); + } + up_write(&pblk_rb_lock); +} + +/* + * Initialize ring buffer. The data and metadata buffers must be previously + * allocated and their size must be a power of two + * (Documentation/circular-buffers.txt) + */ +int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, + unsigned int power_size, unsigned int power_seg_sz) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + unsigned int init_entry = 0; + unsigned int alloc_order = power_size; + unsigned int max_order = MAX_ORDER - 1; + unsigned int order, iter; + + down_write(&pblk_rb_lock); + rb->entries = rb_entry_base; + rb->seg_size = (1 << power_seg_sz); + rb->nr_entries = (1 << power_size); + rb->mem = rb->subm = rb->sync = rb->l2p_update = 0; + rb->sync_point = EMPTY_ENTRY; + + spin_lock_init(&rb->w_lock); + spin_lock_init(&rb->s_lock); + + INIT_LIST_HEAD(&rb->pages); + + if (alloc_order >= max_order) { + order = max_order; + iter = (1 << (alloc_order - max_order)); + } else { + order = alloc_order; + iter = 1; + } + + do { + struct pblk_rb_entry *entry; + struct pblk_rb_pages *page_set; + void *kaddr; + unsigned long set_size; + int i; + + page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL); + if (!page_set) { + up_write(&pblk_rb_lock); + return -ENOMEM; + } + + page_set->order = order; + page_set->pages = alloc_pages(GFP_KERNEL, order); + if (!page_set->pages) { + kfree(page_set); + pblk_rb_data_free(rb); + up_write(&pblk_rb_lock); + return -ENOMEM; + } + kaddr = page_address(page_set->pages); + + entry = &rb->entries[init_entry]; + entry->data = kaddr; + entry->cacheline = pblk_cacheline_to_addr(init_entry++); + entry->w_ctx.flags = PBLK_WRITABLE_ENTRY; + + set_size = (1 << order); + for (i = 1; i < set_size; i++) { + entry = &rb->entries[init_entry]; + entry->cacheline = pblk_cacheline_to_addr(init_entry++); + entry->data = kaddr + (i * rb->seg_size); + entry->w_ctx.flags = PBLK_WRITABLE_ENTRY; + bio_list_init(&entry->w_ctx.bios); + } + + list_add_tail(&page_set->list, &rb->pages); + iter--; + } while (iter > 0); + up_write(&pblk_rb_lock); + +#ifdef CONFIG_NVM_DEBUG + atomic_set(&rb->inflight_sync_point, 0); +#endif + + /* + * Initialize rate-limiter, which controls access to the write buffer + * but user and GC I/O + */ + pblk_rl_init(&pblk->rl, rb->nr_entries); + + return 0; +} + +/* + * pblk_rb_calculate_size -- calculate the size of the write buffer + */ +unsigned int pblk_rb_calculate_size(unsigned int nr_entries) +{ + /* Alloc a write buffer that can at least fit 128 entries */ + return (1 << max(get_count_order(nr_entries), 7)); +} + +void *pblk_rb_entries_ref(struct pblk_rb *rb) +{ + return rb->entries; +} + +static void clean_wctx(struct pblk_w_ctx *w_ctx) +{ + int flags; + +try: + flags = READ_ONCE(w_ctx->flags); + if (!(flags & PBLK_SUBMITTED_ENTRY)) + goto try; + + /* Release flags on context. Protect from writes and reads */ + smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY); + pblk_ppa_set_empty(&w_ctx->ppa); +} + +#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size) +#define pblk_rb_ring_space(rb, head, tail, size) \ + (CIRC_SPACE(head, tail, size)) + +/* + * Buffer space is calculated with respect to the back pointer signaling + * synchronized entries to the media. + */ +static unsigned int pblk_rb_space(struct pblk_rb *rb) +{ + unsigned int mem = READ_ONCE(rb->mem); + unsigned int sync = READ_ONCE(rb->sync); + + return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries); +} + +/* + * Buffer count is calculated with respect to the submission entry signaling the + * entries that are available to send to the media + */ +unsigned int pblk_rb_read_count(struct pblk_rb *rb) +{ + unsigned int mem = READ_ONCE(rb->mem); + unsigned int subm = READ_ONCE(rb->subm); + + return pblk_rb_ring_count(mem, subm, rb->nr_entries); +} + +unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) +{ + unsigned int subm; + + subm = READ_ONCE(rb->subm); + /* Commit read means updating submission pointer */ + smp_store_release(&rb->subm, + (subm + nr_entries) & (rb->nr_entries - 1)); + + return subm; +} + +static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, + unsigned int to_update) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_line *line; + struct pblk_rb_entry *entry; + struct pblk_w_ctx *w_ctx; + unsigned int i; + + for (i = 0; i < to_update; i++) { + entry = &rb->entries[*l2p_upd]; + w_ctx = &entry->w_ctx; + + pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, + entry->cacheline); + + line = &pblk->lines[pblk_tgt_ppa_to_line(w_ctx->ppa)]; + kref_put(&line->ref, pblk_line_put); + clean_wctx(w_ctx); + *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1); + } + + return 0; +} + +/* + * When we move the l2p_update pointer, we update the l2p table - lookups will + * point to the physical address instead of to the cacheline in the write buffer + * from this moment on. + */ +static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int mem, unsigned int sync) +{ + unsigned int space, count; + int ret = 0; + + lockdep_assert_held(&rb->w_lock); + + /* Update l2p only as buffer entries are being overwritten */ + space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries); + if (space > nr_entries) + goto out; + + count = nr_entries - space; + /* l2p_update used exclusively under rb->w_lock */ + ret = __pblk_rb_update_l2p(rb, &rb->l2p_update, count); + +out: + return ret; +} + +/* + * Update the l2p entry for all sectors stored on the write buffer. This means + * that all future lookups to the l2p table will point to a device address, not + * to the cacheline in the write buffer. + */ +void pblk_rb_sync_l2p(struct pblk_rb *rb) +{ + unsigned int sync; + unsigned int to_update; + + spin_lock(&rb->w_lock); + + /* Protect from reads and writes */ + sync = smp_load_acquire(&rb->sync); + + to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries); + __pblk_rb_update_l2p(rb, &rb->l2p_update, to_update); + + spin_unlock(&rb->w_lock); +} + +/* + * Write @nr_entries to ring buffer from @data buffer if there is enough space. + * Typically, 4KB data chunks coming from a bio will be copied to the ring + * buffer, thus the write will fail if not all incoming data can be copied. + * + */ +static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data, + struct pblk_w_ctx w_ctx, + struct pblk_rb_entry *entry) +{ + memcpy(entry->data, data, rb->seg_size); + + entry->w_ctx.lba = w_ctx.lba; + entry->w_ctx.ppa = w_ctx.ppa; +} + +void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, + struct pblk_w_ctx w_ctx, unsigned int ring_pos) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_rb_entry *entry; + int flags; + + entry = &rb->entries[ring_pos]; + flags = READ_ONCE(entry->w_ctx.flags); +#ifdef CONFIG_NVM_DEBUG + /* Caller must guarantee that the entry is free */ + BUG_ON(!(flags & PBLK_WRITABLE_ENTRY)); +#endif + + __pblk_rb_write_entry(rb, data, w_ctx, entry); + + pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline); + flags = w_ctx.flags | PBLK_WRITTEN_DATA; + + /* Release flags on write context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); +} + +void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, + struct pblk_w_ctx w_ctx, struct pblk_line *gc_line, + unsigned int ring_pos) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_rb_entry *entry; + int flags; + + entry = &rb->entries[ring_pos]; + flags = READ_ONCE(entry->w_ctx.flags); +#ifdef CONFIG_NVM_DEBUG + /* Caller must guarantee that the entry is free */ + BUG_ON(!(flags & PBLK_WRITABLE_ENTRY)); +#endif + + __pblk_rb_write_entry(rb, data, w_ctx, entry); + + if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, gc_line)) + entry->w_ctx.lba = ADDR_EMPTY; + + flags = w_ctx.flags | PBLK_WRITTEN_DATA; + + /* Release flags on write context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); +} + +static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio, + unsigned int pos) +{ + struct pblk_rb_entry *entry; + unsigned int subm, sync_point; + int flags; + + subm = READ_ONCE(rb->subm); + +#ifdef CONFIG_NVM_DEBUG + atomic_inc(&rb->inflight_sync_point); +#endif + + if (pos == subm) + return 0; + + sync_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); + entry = &rb->entries[sync_point]; + + flags = READ_ONCE(entry->w_ctx.flags); + flags |= PBLK_FLUSH_ENTRY; + + /* Release flags on context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); + + /* Protect syncs */ + smp_store_release(&rb->sync_point, sync_point); + + spin_lock_irq(&rb->s_lock); + bio_list_add(&entry->w_ctx.bios, bio); + spin_unlock_irq(&rb->s_lock); + + return 1; +} + +static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int *pos) +{ + unsigned int mem; + unsigned int sync; + + sync = READ_ONCE(rb->sync); + mem = READ_ONCE(rb->mem); + + if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < nr_entries) + return 0; + + if (pblk_rb_update_l2p(rb, nr_entries, mem, sync)) + return 0; + + *pos = mem; + + return 1; +} + +static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int *pos) +{ + if (!__pblk_rb_may_write(rb, nr_entries, pos)) + return 0; + + /* Protect from read count */ + smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1)); + return 1; +} + +static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int *pos, struct bio *bio, + int *io_ret) +{ + unsigned int mem; + + if (!__pblk_rb_may_write(rb, nr_entries, pos)) + return 0; + + mem = (*pos + nr_entries) & (rb->nr_entries - 1); + *io_ret = NVM_IO_DONE; + + if (bio->bi_opf & REQ_PREFLUSH) { + struct pblk *pblk = container_of(rb, struct pblk, rwb); + +#ifdef CONFIG_NVM_DEBUG + atomic_long_inc(&pblk->nr_flush); +#endif + if (pblk_rb_sync_point_set(&pblk->rwb, bio, mem)) + *io_ret = NVM_IO_OK; + } + + /* Protect from read count */ + smp_store_release(&rb->mem, mem); + return 1; +} + +/* + * Atomically check that (i) there is space on the write buffer for the + * incoming I/O, and (ii) the current I/O type has enough budget in the write + * buffer (rate-limiter). + */ +int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, + unsigned int nr_entries, unsigned int *pos) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + int flush_done; + + spin_lock(&rb->w_lock); + if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) { + spin_unlock(&rb->w_lock); + return NVM_IO_REQUEUE; + } + + if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) { + spin_unlock(&rb->w_lock); + return NVM_IO_REQUEUE; + } + + pblk_rl_user_in(&pblk->rl, nr_entries); + spin_unlock(&rb->w_lock); + + return flush_done; +} + +/* + * Look at pblk_rb_may_write_user comment + */ +int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int *pos) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + + spin_lock(&rb->w_lock); + if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) { + spin_unlock(&rb->w_lock); + return 0; + } + + if (!pblk_rb_may_write(rb, nr_entries, pos)) { + spin_unlock(&rb->w_lock); + return 0; + } + + pblk_rl_gc_in(&pblk->rl, nr_entries); + spin_unlock(&rb->w_lock); + + return 1; +} + +/* + * The caller of this function must ensure that the backpointer will not + * overwrite the entries passed on the list. + */ +unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, + struct list_head *list, + unsigned int max) +{ + struct pblk_rb_entry *entry, *tentry; + struct page *page; + unsigned int read = 0; + int ret; + + list_for_each_entry_safe(entry, tentry, list, index) { + if (read > max) { + pr_err("pblk: too many entries on list\n"); + goto out; + } + + page = virt_to_page(entry->data); + if (!page) { + pr_err("pblk: could not allocate write bio page\n"); + goto out; + } + + ret = bio_add_page(bio, page, rb->seg_size, 0); + if (ret != rb->seg_size) { + pr_err("pblk: could not add page to write bio\n"); + goto out; + } + + list_del(&entry->index); + read++; + } + +out: + return read; +} + +/* + * Read available entries on rb and add them to the given bio. To avoid a memory + * copy, a page reference to the write buffer is used to be added to the bio. + * + * This function is used by the write thread to form the write bio that will + * persist data on the write buffer to the media. + */ +unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, + struct pblk_c_ctx *c_ctx, + unsigned int pos, + unsigned int nr_entries, + unsigned int count) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_rb_entry *entry; + struct page *page; + unsigned int pad = 0, read = 0, to_read = nr_entries; + unsigned int user_io = 0, gc_io = 0; + unsigned int i; + int flags; + int ret; + + if (count < nr_entries) { + pad = nr_entries - count; + to_read = count; + } + + c_ctx->sentry = pos; + c_ctx->nr_valid = to_read; + c_ctx->nr_padded = pad; + + for (i = 0; i < to_read; i++) { + entry = &rb->entries[pos]; + + /* A write has been allowed into the buffer, but data is still + * being copied to it. It is ok to busy wait. + */ +try: + flags = READ_ONCE(entry->w_ctx.flags); + if (!(flags & PBLK_WRITTEN_DATA)) + goto try; + + if (flags & PBLK_IOTYPE_USER) + user_io++; + else if (flags & PBLK_IOTYPE_GC) + gc_io++; + else + WARN(1, "pblk: unknown IO type\n"); + + page = virt_to_page(entry->data); + if (!page) { + pr_err("pblk: could not allocate write bio page\n"); + flags &= ~PBLK_WRITTEN_DATA; + flags |= PBLK_SUBMITTED_ENTRY; + /* Release flags on context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); + goto out; + } + + ret = bio_add_page(bio, page, rb->seg_size, 0); + if (ret != rb->seg_size) { + pr_err("pblk: could not add page to write bio\n"); + flags &= ~PBLK_WRITTEN_DATA; + flags |= PBLK_SUBMITTED_ENTRY; + /* Release flags on context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); + goto out; + } + + if (flags & PBLK_FLUSH_ENTRY) { + unsigned int sync_point; + + sync_point = READ_ONCE(rb->sync_point); + if (sync_point == pos) { + /* Protect syncs */ + smp_store_release(&rb->sync_point, EMPTY_ENTRY); + } + + flags &= ~PBLK_FLUSH_ENTRY; +#ifdef CONFIG_NVM_DEBUG + atomic_dec(&rb->inflight_sync_point); +#endif + } + + flags &= ~PBLK_WRITTEN_DATA; + flags |= PBLK_SUBMITTED_ENTRY; + + /* Release flags on context. Protect from writes */ + smp_store_release(&entry->w_ctx.flags, flags); + + pos = (pos + 1) & (rb->nr_entries - 1); + } + + read = to_read; + pblk_rl_out(&pblk->rl, user_io, gc_io); +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(pad, &((struct pblk *) + (container_of(rb, struct pblk, rwb)))->padded_writes); +#endif +out: + return read; +} + +/* + * Copy to bio only if the lba matches the one on the given cache entry. + * Otherwise, it means that the entry has been overwritten, and the bio should + * be directed to disk. + */ +int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, + u64 pos, int bio_iter) +{ + struct pblk_rb_entry *entry; + struct pblk_w_ctx *w_ctx; + void *data; + int flags; + int ret = 1; + + spin_lock(&rb->w_lock); + +#ifdef CONFIG_NVM_DEBUG + /* Caller must ensure that the access will not cause an overflow */ + BUG_ON(pos >= rb->nr_entries); +#endif + entry = &rb->entries[pos]; + w_ctx = &entry->w_ctx; + flags = READ_ONCE(w_ctx->flags); + + /* Check if the entry has been overwritten or is scheduled to be */ + if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) { + ret = 0; + goto out; + } + + /* Only advance the bio if it hasn't been advanced already. If advanced, + * this bio is at least a partial bio (i.e., it has partially been + * filled with data from the cache). If part of the data resides on the + * media, we will read later on + */ + if (unlikely(!bio->bi_iter.bi_idx)) + bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE); + + data = bio_data(bio); + memcpy(data, entry->data, rb->seg_size); + +out: + spin_unlock(&rb->w_lock); + return ret; +} + +struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos) +{ + unsigned int entry = pos & (rb->nr_entries - 1); + + return &rb->entries[entry].w_ctx; +} + +unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags) + __acquires(&rb->s_lock) +{ + if (flags) + spin_lock_irqsave(&rb->s_lock, *flags); + else + spin_lock_irq(&rb->s_lock); + + return rb->sync; +} + +void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags) + __releases(&rb->s_lock) +{ + lockdep_assert_held(&rb->s_lock); + + if (flags) + spin_unlock_irqrestore(&rb->s_lock, *flags); + else + spin_unlock_irq(&rb->s_lock); +} + +unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries) +{ + unsigned int sync; + unsigned int i; + + lockdep_assert_held(&rb->s_lock); + + sync = READ_ONCE(rb->sync); + + for (i = 0; i < nr_entries; i++) + sync = (sync + 1) & (rb->nr_entries - 1); + + /* Protect from counts */ + smp_store_release(&rb->sync, sync); + + return sync; +} + +unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb) +{ + unsigned int subm, sync_point; + unsigned int count; + + /* Protect syncs */ + sync_point = smp_load_acquire(&rb->sync_point); + if (sync_point == EMPTY_ENTRY) + return 0; + + subm = READ_ONCE(rb->subm); + + /* The sync point itself counts as a sector to sync */ + count = pblk_rb_ring_count(sync_point, subm, rb->nr_entries) + 1; + + return count; +} + +/* + * Scan from the current position of the sync pointer to find the entry that + * corresponds to the given ppa. This is necessary since write requests can be + * completed out of order. The assumption is that the ppa is close to the sync + * pointer thus the search will not take long. + * + * The caller of this function must guarantee that the sync pointer will no + * reach the entry while it is using the metadata associated with it. With this + * assumption in mind, there is no need to take the sync lock. + */ +struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, + struct ppa_addr *ppa) +{ + unsigned int sync, subm, count; + unsigned int i; + + sync = READ_ONCE(rb->sync); + subm = READ_ONCE(rb->subm); + count = pblk_rb_ring_count(subm, sync, rb->nr_entries); + + for (i = 0; i < count; i++) + sync = (sync + 1) & (rb->nr_entries - 1); + + return NULL; +} + +int pblk_rb_tear_down_check(struct pblk_rb *rb) +{ + struct pblk_rb_entry *entry; + int i; + int ret = 0; + + spin_lock(&rb->w_lock); + spin_lock_irq(&rb->s_lock); + + if ((rb->mem == rb->subm) && (rb->subm == rb->sync) && + (rb->sync == rb->l2p_update) && + (rb->sync_point == EMPTY_ENTRY)) { + goto out; + } + + if (!rb->entries) { + ret = 1; + goto out; + } + + for (i = 0; i < rb->nr_entries; i++) { + entry = &rb->entries[i]; + + if (!entry->data) { + ret = 1; + goto out; + } + } + +out: + spin_unlock(&rb->w_lock); + spin_unlock_irq(&rb->s_lock); + + return ret; +} + +unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos) +{ + return (pos & (rb->nr_entries - 1)); +} + +int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos) +{ + return (pos >= rb->nr_entries); +} + +ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf) +{ + struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_c_ctx *c; + ssize_t offset; + int queued_entries = 0; + + spin_lock_irq(&rb->s_lock); + list_for_each_entry(c, &pblk->compl_list, list) + queued_entries++; + spin_unlock_irq(&rb->s_lock); + + if (rb->sync_point != EMPTY_ENTRY) + offset = scnprintf(buf, PAGE_SIZE, + "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n", + rb->nr_entries, + rb->mem, + rb->subm, + rb->sync, + rb->l2p_update, +#ifdef CONFIG_NVM_DEBUG + atomic_read(&rb->inflight_sync_point), +#else + 0, +#endif + rb->sync_point, + pblk_rb_read_count(rb), + pblk_rb_space(rb), + pblk_rb_sync_point_count(rb), + queued_entries); + else + offset = scnprintf(buf, PAGE_SIZE, + "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n", + rb->nr_entries, + rb->mem, + rb->subm, + rb->sync, + rb->l2p_update, +#ifdef CONFIG_NVM_DEBUG + atomic_read(&rb->inflight_sync_point), +#else + 0, +#endif + pblk_rb_read_count(rb), + pblk_rb_space(rb), + pblk_rb_sync_point_count(rb), + queued_entries); + + return offset; +} diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c new file mode 100644 index 000000000000..4a12f14d78c6 --- /dev/null +++ b/drivers/lightnvm/pblk-read.c @@ -0,0 +1,529 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-read.c - pblk's read path + */ + +#include "pblk.h" + +/* + * There is no guarantee that the value read from cache has not been updated and + * resides at another location in the cache. We guarantee though that if the + * value is read from the cache, it belongs to the mapped lba. In order to + * guarantee and order between writes and reads are ordered, a flush must be + * issued. + */ +static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, + sector_t lba, struct ppa_addr ppa, + int bio_iter) +{ +#ifdef CONFIG_NVM_DEBUG + /* Callers must ensure that the ppa points to a cache address */ + BUG_ON(pblk_ppa_empty(ppa)); + BUG_ON(!pblk_addr_in_cache(ppa)); +#endif + + return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, + pblk_addr_to_cacheline(ppa), bio_iter); +} + +static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, + unsigned long *read_bitmap) +{ + struct bio *bio = rqd->bio; + struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; + sector_t blba = pblk_get_lba(bio); + int nr_secs = rqd->nr_ppas; + int advanced_bio = 0; + int i, j = 0; + + /* logic error: lba out-of-bounds. Ignore read request */ + if (blba + nr_secs >= pblk->rl.nr_secs) { + WARN(1, "pblk: read lbas out of bounds\n"); + return; + } + + pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs); + + for (i = 0; i < nr_secs; i++) { + struct ppa_addr p = ppas[i]; + sector_t lba = blba + i; + +retry: + if (pblk_ppa_empty(p)) { + WARN_ON(test_and_set_bit(i, read_bitmap)); + continue; + } + + /* Try to read from write buffer. The address is later checked + * on the write buffer to prevent retrieving overwritten data. + */ + if (pblk_addr_in_cache(p)) { + if (!pblk_read_from_cache(pblk, bio, lba, p, i)) { + pblk_lookup_l2p_seq(pblk, &p, lba, 1); + goto retry; + } + WARN_ON(test_and_set_bit(i, read_bitmap)); + advanced_bio = 1; + } else { + /* Read from media non-cached sectors */ + rqd->ppa_list[j++] = p; + } + + if (advanced_bio) + bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(nr_secs, &pblk->inflight_reads); +#endif +} + +static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd) +{ + int err; + + rqd->flags = pblk_set_read_mode(pblk); + + err = pblk_submit_io(pblk, rqd); + if (err) + return NVM_IO_ERR; + + return NVM_IO_OK; +} + +static void pblk_end_io_read(struct nvm_rq *rqd) +{ + struct pblk *pblk = rqd->private; + struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd); + struct bio *bio = rqd->bio; + + if (rqd->error) + pblk_log_read_err(pblk, rqd); +#ifdef CONFIG_NVM_DEBUG + else + WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n"); +#endif + + if (rqd->nr_ppas > 1) + nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list); + + bio_put(bio); + if (r_ctx->orig_bio) { +#ifdef CONFIG_NVM_DEBUG + WARN_ONCE(r_ctx->orig_bio->bi_error, + "pblk: corrupted read bio\n"); +#endif + bio_endio(r_ctx->orig_bio); + bio_put(r_ctx->orig_bio); + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); + atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads); +#endif + + pblk_free_rqd(pblk, rqd, READ); +} + +static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, + unsigned int bio_init_idx, + unsigned long *read_bitmap) +{ + struct bio *new_bio, *bio = rqd->bio; + struct bio_vec src_bv, dst_bv; + void *ppa_ptr = NULL; + void *src_p, *dst_p; + dma_addr_t dma_ppa_list = 0; + int nr_secs = rqd->nr_ppas; + int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); + int i, ret, hole; + DECLARE_COMPLETION_ONSTACK(wait); + + new_bio = bio_alloc(GFP_KERNEL, nr_holes); + if (!new_bio) { + pr_err("pblk: could not alloc read bio\n"); + return NVM_IO_ERR; + } + + if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) + goto err; + + if (nr_holes != new_bio->bi_vcnt) { + pr_err("pblk: malformed bio\n"); + goto err; + } + + new_bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(new_bio, REQ_OP_READ, 0); + new_bio->bi_private = &wait; + new_bio->bi_end_io = pblk_end_bio_sync; + + rqd->bio = new_bio; + rqd->nr_ppas = nr_holes; + rqd->end_io = NULL; + + if (unlikely(nr_secs > 1 && nr_holes == 1)) { + ppa_ptr = rqd->ppa_list; + dma_ppa_list = rqd->dma_ppa_list; + rqd->ppa_addr = rqd->ppa_list[0]; + } + + ret = pblk_submit_read_io(pblk, rqd); + if (ret) { + bio_put(rqd->bio); + pr_err("pblk: read IO submission failed\n"); + goto err; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: partial read I/O timed out\n"); + } + + if (rqd->error) { + atomic_long_inc(&pblk->read_failed); +#ifdef CONFIG_NVM_DEBUG + pblk_print_failed_rqd(pblk, rqd, rqd->error); +#endif + } + + if (unlikely(nr_secs > 1 && nr_holes == 1)) { + rqd->ppa_list = ppa_ptr; + rqd->dma_ppa_list = dma_ppa_list; + } + + /* Fill the holes in the original bio */ + i = 0; + hole = find_first_zero_bit(read_bitmap, nr_secs); + do { + src_bv = new_bio->bi_io_vec[i++]; + dst_bv = bio->bi_io_vec[bio_init_idx + hole]; + + src_p = kmap_atomic(src_bv.bv_page); + dst_p = kmap_atomic(dst_bv.bv_page); + + memcpy(dst_p + dst_bv.bv_offset, + src_p + src_bv.bv_offset, + PBLK_EXPOSED_PAGE_SIZE); + + kunmap_atomic(src_p); + kunmap_atomic(dst_p); + + mempool_free(src_bv.bv_page, pblk->page_pool); + + hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); + } while (hole < nr_secs); + + bio_put(new_bio); + + /* Complete the original bio and associated request */ + rqd->bio = bio; + rqd->nr_ppas = nr_secs; + rqd->private = pblk; + + bio_endio(bio); + pblk_end_io_read(rqd); + return NVM_IO_OK; + +err: + /* Free allocated pages in new bio */ + pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt); + rqd->private = pblk; + pblk_end_io_read(rqd); + return NVM_IO_ERR; +} + +static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, + unsigned long *read_bitmap) +{ + struct bio *bio = rqd->bio; + struct ppa_addr ppa; + sector_t lba = pblk_get_lba(bio); + + /* logic error: lba out-of-bounds. Ignore read request */ + if (lba >= pblk->rl.nr_secs) { + WARN(1, "pblk: read lba out of bounds\n"); + return; + } + + pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); + +#ifdef CONFIG_NVM_DEBUG + atomic_long_inc(&pblk->inflight_reads); +#endif + +retry: + if (pblk_ppa_empty(ppa)) { + WARN_ON(test_and_set_bit(0, read_bitmap)); + return; + } + + /* Try to read from write buffer. The address is later checked on the + * write buffer to prevent retrieving overwritten data. + */ + if (pblk_addr_in_cache(ppa)) { + if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0)) { + pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); + goto retry; + } + WARN_ON(test_and_set_bit(0, read_bitmap)); + } else { + rqd->ppa_addr = ppa; + } +} + +int pblk_submit_read(struct pblk *pblk, struct bio *bio) +{ + struct nvm_tgt_dev *dev = pblk->dev; + unsigned int nr_secs = pblk_get_secs(bio); + struct nvm_rq *rqd; + unsigned long read_bitmap; /* Max 64 ppas per request */ + unsigned int bio_init_idx; + int ret = NVM_IO_ERR; + + if (nr_secs > PBLK_MAX_REQ_ADDRS) + return NVM_IO_ERR; + + bitmap_zero(&read_bitmap, nr_secs); + + rqd = pblk_alloc_rqd(pblk, READ); + if (IS_ERR(rqd)) { + pr_err_ratelimited("pblk: not able to alloc rqd"); + return NVM_IO_ERR; + } + + rqd->opcode = NVM_OP_PREAD; + rqd->bio = bio; + rqd->nr_ppas = nr_secs; + rqd->private = pblk; + rqd->end_io = pblk_end_io_read; + + /* Save the index for this bio's start. This is needed in case + * we need to fill a partial read. + */ + bio_init_idx = pblk_get_bi_idx(bio); + + if (nr_secs > 1) { + rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &rqd->dma_ppa_list); + if (!rqd->ppa_list) { + pr_err("pblk: not able to allocate ppa list\n"); + goto fail_rqd_free; + } + + pblk_read_ppalist_rq(pblk, rqd, &read_bitmap); + } else { + pblk_read_rq(pblk, rqd, &read_bitmap); + } + + bio_get(bio); + if (bitmap_full(&read_bitmap, nr_secs)) { + bio_endio(bio); + pblk_end_io_read(rqd); + return NVM_IO_OK; + } + + /* All sectors are to be read from the device */ + if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) { + struct bio *int_bio = NULL; + struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd); + + /* Clone read bio to deal with read errors internally */ + int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set); + if (!int_bio) { + pr_err("pblk: could not clone read bio\n"); + return NVM_IO_ERR; + } + + rqd->bio = int_bio; + r_ctx->orig_bio = bio; + + ret = pblk_submit_read_io(pblk, rqd); + if (ret) { + pr_err("pblk: read IO submission failed\n"); + if (int_bio) + bio_put(int_bio); + return ret; + } + + return NVM_IO_OK; + } + + /* The read bio request could be partially filled by the write buffer, + * but there are some holes that need to be read from the drive. + */ + ret = pblk_fill_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap); + if (ret) { + pr_err("pblk: failed to perform partial read\n"); + return ret; + } + + return NVM_IO_OK; + +fail_rqd_free: + pblk_free_rqd(pblk, rqd, READ); + return ret; +} + +static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_line *line, u64 *lba_list, + unsigned int nr_secs) +{ + struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; + int valid_secs = 0; + int i; + + pblk_lookup_l2p_rand(pblk, ppas, lba_list, nr_secs); + + for (i = 0; i < nr_secs; i++) { + if (pblk_addr_in_cache(ppas[i]) || ppas[i].g.blk != line->id || + pblk_ppa_empty(ppas[i])) { + lba_list[i] = ADDR_EMPTY; + continue; + } + + rqd->ppa_list[valid_secs++] = ppas[i]; + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(valid_secs, &pblk->inflight_reads); +#endif + return valid_secs; +} + +static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_line *line, sector_t lba) +{ + struct ppa_addr ppa; + int valid_secs = 0; + + if (lba == ADDR_EMPTY) + goto out; + + /* logic error: lba out-of-bounds */ + if (lba >= pblk->rl.nr_secs) { + WARN(1, "pblk: read lba out of bounds\n"); + goto out; + } + + spin_lock(&pblk->trans_lock); + ppa = pblk_trans_map_get(pblk, lba); + spin_unlock(&pblk->trans_lock); + + /* Ignore updated values until the moment */ + if (pblk_addr_in_cache(ppa) || ppa.g.blk != line->id || + pblk_ppa_empty(ppa)) + goto out; + + rqd->ppa_addr = ppa; + valid_secs = 1; + +#ifdef CONFIG_NVM_DEBUG + atomic_long_inc(&pblk->inflight_reads); +#endif + +out: + return valid_secs; +} + +int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, + unsigned int nr_secs, unsigned int *secs_to_gc, + struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct request_queue *q = dev->q; + struct bio *bio; + struct nvm_rq rqd; + int ret, data_len; + DECLARE_COMPLETION_ONSTACK(wait); + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + if (nr_secs > 1) { + rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &rqd.dma_ppa_list); + if (!rqd.ppa_list) + return NVM_IO_ERR; + + *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list, + nr_secs); + if (*secs_to_gc == 1) { + struct ppa_addr ppa; + + ppa = rqd.ppa_list[0]; + nvm_dev_dma_free(dev->parent, rqd.ppa_list, + rqd.dma_ppa_list); + rqd.ppa_addr = ppa; + } + } else { + *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]); + } + + if (!(*secs_to_gc)) + goto out; + + data_len = (*secs_to_gc) * geo->sec_size; + bio = bio_map_kern(q, data, data_len, GFP_KERNEL); + if (IS_ERR(bio)) { + pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio)); + goto err_free_dma; + } + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + rqd.opcode = NVM_OP_PREAD; + rqd.end_io = pblk_end_io_sync; + rqd.private = &wait; + rqd.nr_ppas = *secs_to_gc; + rqd.bio = bio; + + ret = pblk_submit_read_io(pblk, &rqd); + if (ret) { + bio_endio(bio); + pr_err("pblk: GC read request failed\n"); + goto err_free_dma; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: GC read I/O timed out\n"); + } + + if (rqd.error) { + atomic_long_inc(&pblk->read_failed_gc); +#ifdef CONFIG_NVM_DEBUG + pblk_print_failed_rqd(pblk, &rqd, rqd.error); +#endif + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(*secs_to_gc, &pblk->sync_reads); + atomic_long_add(*secs_to_gc, &pblk->recov_gc_reads); + atomic_long_sub(*secs_to_gc, &pblk->inflight_reads); +#endif + +out: + if (rqd.nr_ppas > 1) + nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list); + return NVM_IO_OK; + +err_free_dma: + if (rqd.nr_ppas > 1) + nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list); + return NVM_IO_ERR; +} diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c new file mode 100644 index 000000000000..f8f85087cd3c --- /dev/null +++ b/drivers/lightnvm/pblk-recovery.c @@ -0,0 +1,998 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial: Javier Gonzalez <javier@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-recovery.c - pblk's recovery path + */ + +#include "pblk.h" + +void pblk_submit_rec(struct work_struct *work) +{ + struct pblk_rec_ctx *recovery = + container_of(work, struct pblk_rec_ctx, ws_rec); + struct pblk *pblk = recovery->pblk; + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_rq *rqd = recovery->rqd; + struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); + int max_secs = nvm_max_phys_sects(dev); + struct bio *bio; + unsigned int nr_rec_secs; + unsigned int pgs_read; + int ret; + + nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status, + max_secs); + + bio = bio_alloc(GFP_KERNEL, nr_rec_secs); + if (!bio) { + pr_err("pblk: not able to create recovery bio\n"); + return; + } + + bio->bi_iter.bi_sector = 0; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + rqd->bio = bio; + rqd->nr_ppas = nr_rec_secs; + + pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed, + nr_rec_secs); + if (pgs_read != nr_rec_secs) { + pr_err("pblk: could not read recovery entries\n"); + goto err; + } + + if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) { + pr_err("pblk: could not setup recovery request\n"); + goto err; + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(nr_rec_secs, &pblk->recov_writes); +#endif + + ret = pblk_submit_io(pblk, rqd); + if (ret) { + pr_err("pblk: I/O submission failed: %d\n", ret); + goto err; + } + + mempool_free(recovery, pblk->rec_pool); + return; + +err: + bio_put(bio); + pblk_free_rqd(pblk, rqd, WRITE); +} + +int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, + struct pblk_rec_ctx *recovery, u64 *comp_bits, + unsigned int comp) +{ + struct nvm_tgt_dev *dev = pblk->dev; + int max_secs = nvm_max_phys_sects(dev); + struct nvm_rq *rec_rqd; + struct pblk_c_ctx *rec_ctx; + int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded; + + rec_rqd = pblk_alloc_rqd(pblk, WRITE); + if (IS_ERR(rec_rqd)) { + pr_err("pblk: could not create recovery req.\n"); + return -ENOMEM; + } + + rec_ctx = nvm_rq_to_pdu(rec_rqd); + + /* Copy completion bitmap, but exclude the first X completed entries */ + bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status, + (unsigned long int *)comp_bits, + comp, max_secs); + + /* Save the context for the entries that need to be re-written and + * update current context with the completed entries. + */ + rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp); + if (comp >= c_ctx->nr_valid) { + rec_ctx->nr_valid = 0; + rec_ctx->nr_padded = nr_entries - comp; + + c_ctx->nr_padded = comp - c_ctx->nr_valid; + } else { + rec_ctx->nr_valid = c_ctx->nr_valid - comp; + rec_ctx->nr_padded = c_ctx->nr_padded; + + c_ctx->nr_valid = comp; + c_ctx->nr_padded = 0; + } + + recovery->rqd = rec_rqd; + recovery->pblk = pblk; + + return 0; +} + +__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta) +{ + u32 crc; + + crc = pblk_calc_emeta_crc(pblk, emeta); + if (le32_to_cpu(emeta->crc) != crc) + return NULL; + + if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC) + return NULL; + + return pblk_line_emeta_to_lbas(emeta); +} + +static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct line_emeta *emeta = line->emeta; + __le64 *lba_list; + int data_start; + int nr_data_lbas, nr_valid_lbas, nr_lbas = 0; + int i; + + lba_list = pblk_recov_get_lba_list(pblk, emeta); + if (!lba_list) + return 1; + + data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; + nr_data_lbas = lm->sec_per_line - lm->emeta_sec; + nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas); + + for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) { + struct ppa_addr ppa; + int pos; + + ppa = addr_to_pblk_ppa(pblk, i, line->id); + pos = pblk_ppa_to_pos(geo, ppa); + + /* Do not update bad blocks */ + if (test_bit(pos, line->blk_bitmap)) + continue; + + if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) { + spin_lock(&line->lock); + if (test_and_set_bit(i, line->invalid_bitmap)) + WARN_ONCE(1, "pblk: rec. double invalidate:\n"); + else + line->vsc--; + spin_unlock(&line->lock); + + continue; + } + + pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa); + nr_lbas++; + } + + if (nr_valid_lbas != nr_lbas) + pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n", + line->id, line->emeta->nr_valid_lbas, nr_lbas); + + line->left_msecs = 0; + + return 0; +} + +static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); + + return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec - + nr_bb * geo->sec_per_blk; +} + +struct pblk_recov_alloc { + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct nvm_rq *rqd; + void *data; + dma_addr_t dma_ppa_list; + dma_addr_t dma_meta_list; +}; + +static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line, + struct pblk_recov_alloc p, u64 r_ptr) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct nvm_rq *rqd; + struct bio *bio; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + u64 r_ptr_int; + int left_ppas; + int rq_ppas, rq_len; + int i, j; + int ret = 0; + DECLARE_COMPLETION_ONSTACK(wait); + + ppa_list = p.ppa_list; + meta_list = p.meta_list; + rqd = p.rqd; + data = p.data; + dma_ppa_list = p.dma_ppa_list; + dma_meta_list = p.dma_meta_list; + + left_ppas = line->cur_sec - r_ptr; + if (!left_ppas) + return 0; + + r_ptr_int = r_ptr; + +next_read_rq: + memset(rqd, 0, pblk_r_rq_size); + + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + if (!rq_ppas) + rq_ppas = pblk->min_write_pgs; + rq_len = rq_ppas * geo->sec_size; + + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + rqd->bio = bio; + rqd->opcode = NVM_OP_PREAD; + rqd->flags = pblk_set_read_mode(pblk); + rqd->meta_list = meta_list; + rqd->nr_ppas = rq_ppas; + rqd->ppa_list = ppa_list; + rqd->dma_ppa_list = dma_ppa_list; + rqd->dma_meta_list = dma_meta_list; + rqd->end_io = pblk_end_io_sync; + rqd->private = &wait; + + for (i = 0; i < rqd->nr_ppas; ) { + struct ppa_addr ppa; + int pos; + + ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + + while (test_bit(pos, line->blk_bitmap)) { + r_ptr_int += pblk->min_write_pgs; + ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + } + + for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++) + rqd->ppa_list[i] = + addr_to_gen_ppa(pblk, r_ptr_int, line->id); + } + + /* If read fails, more padding is needed */ + ret = pblk_submit_io(pblk, rqd); + if (ret) { + pr_err("pblk: I/O submission failed: %d\n", ret); + return ret; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: L2P recovery read timed out\n"); + return -EINTR; + } + + reinit_completion(&wait); + + /* At this point, the read should not fail. If it does, it is a problem + * we cannot recover from here. Need FTL log. + */ + if (rqd->error) { + pr_err("pblk: L2P recovery failed (%d)\n", rqd->error); + return -EINTR; + } + + for (i = 0; i < rqd->nr_ppas; i++) { + u64 lba = le64_to_cpu(meta_list[i].lba); + + if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) + continue; + + pblk_update_map(pblk, lba, rqd->ppa_list[i]); + } + + left_ppas -= rq_ppas; + if (left_ppas > 0) + goto next_read_rq; + + return 0; +} + +static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, + struct pblk_recov_alloc p, int left_ppas) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct nvm_rq *rqd; + struct bio *bio; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta); + u64 w_ptr = line->cur_sec; + int left_line_ppas = line->left_msecs; + int rq_ppas, rq_len; + int i, j; + int ret = 0; + DECLARE_COMPLETION_ONSTACK(wait); + + ppa_list = p.ppa_list; + meta_list = p.meta_list; + rqd = p.rqd; + data = p.data; + dma_ppa_list = p.dma_ppa_list; + dma_meta_list = p.dma_meta_list; + +next_pad_rq: + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + if (!rq_ppas) + rq_ppas = pblk->min_write_pgs; + rq_len = rq_ppas * geo->sec_size; + + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + memset(rqd, 0, pblk_r_rq_size); + + rqd->bio = bio; + rqd->opcode = NVM_OP_PWRITE; + rqd->flags = pblk_set_progr_mode(pblk, WRITE); + rqd->meta_list = meta_list; + rqd->nr_ppas = rq_ppas; + rqd->ppa_list = ppa_list; + rqd->dma_ppa_list = dma_ppa_list; + rqd->dma_meta_list = dma_meta_list; + rqd->end_io = pblk_end_io_sync; + rqd->private = &wait; + + for (i = 0; i < rqd->nr_ppas; ) { + struct ppa_addr ppa; + int pos; + + w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); + ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id); + pos = pblk_ppa_to_pos(geo, ppa); + + while (test_bit(pos, line->blk_bitmap)) { + w_ptr += pblk->min_write_pgs; + ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id); + pos = pblk_ppa_to_pos(geo, ppa); + } + + for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) { + struct ppa_addr dev_ppa; + + dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); + + pblk_map_invalidate(pblk, dev_ppa); + meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); + lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY); + rqd->ppa_list[i] = dev_ppa; + } + } + + ret = pblk_submit_io(pblk, rqd); + if (ret) { + pr_err("pblk: I/O submission failed: %d\n", ret); + return ret; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: L2P recovery write timed out\n"); + } + reinit_completion(&wait); + + left_line_ppas -= rq_ppas; + left_ppas -= rq_ppas; + if (left_ppas > 0 && left_line_ppas) + goto next_pad_rq; + + return 0; +} + +/* When this function is called, it means that not all upper pages have been + * written in a page that contains valid data. In order to recover this data, we + * first find the write pointer on the device, then we pad all necessary + * sectors, and finally attempt to read the valid data + */ +static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line, + struct pblk_recov_alloc p) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct nvm_rq *rqd; + struct bio *bio; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + u64 w_ptr = 0, r_ptr; + int rq_ppas, rq_len; + int i, j; + int ret = 0; + int rec_round; + int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec; + DECLARE_COMPLETION_ONSTACK(wait); + + ppa_list = p.ppa_list; + meta_list = p.meta_list; + rqd = p.rqd; + data = p.data; + dma_ppa_list = p.dma_ppa_list; + dma_meta_list = p.dma_meta_list; + + /* we could recover up until the line write pointer */ + r_ptr = line->cur_sec; + rec_round = 0; + +next_rq: + memset(rqd, 0, pblk_r_rq_size); + + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + if (!rq_ppas) + rq_ppas = pblk->min_write_pgs; + rq_len = rq_ppas * geo->sec_size; + + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + rqd->bio = bio; + rqd->opcode = NVM_OP_PREAD; + rqd->flags = pblk_set_read_mode(pblk); + rqd->meta_list = meta_list; + rqd->nr_ppas = rq_ppas; + rqd->ppa_list = ppa_list; + rqd->dma_ppa_list = dma_ppa_list; + rqd->dma_meta_list = dma_meta_list; + rqd->end_io = pblk_end_io_sync; + rqd->private = &wait; + + for (i = 0; i < rqd->nr_ppas; ) { + struct ppa_addr ppa; + int pos; + + w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); + ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + + while (test_bit(pos, line->blk_bitmap)) { + w_ptr += pblk->min_write_pgs; + ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + } + + for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) + rqd->ppa_list[i] = + addr_to_gen_ppa(pblk, w_ptr, line->id); + } + + ret = pblk_submit_io(pblk, rqd); + if (ret) { + pr_err("pblk: I/O submission failed: %d\n", ret); + return ret; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: L2P recovery read timed out\n"); + } + reinit_completion(&wait); + + /* This should not happen since the read failed during normal recovery, + * but the media works funny sometimes... + */ + if (!rec_round++ && !rqd->error) { + rec_round = 0; + for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) { + u64 lba = le64_to_cpu(meta_list[i].lba); + + if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) + continue; + + pblk_update_map(pblk, lba, rqd->ppa_list[i]); + } + } + + /* Reached the end of the written line */ + if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) { + int pad_secs, nr_error_bits, bit; + int ret; + + bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); + nr_error_bits = rqd->nr_ppas - bit; + + /* Roll back failed sectors */ + line->cur_sec -= nr_error_bits; + line->left_msecs += nr_error_bits; + bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); + + pad_secs = pblk_pad_distance(pblk); + if (pad_secs > line->left_msecs) + pad_secs = line->left_msecs; + + ret = pblk_recov_pad_oob(pblk, line, p, pad_secs); + if (ret) + pr_err("pblk: OOB padding failed (err:%d)\n", ret); + + ret = pblk_recov_read_oob(pblk, line, p, r_ptr); + if (ret) + pr_err("pblk: OOB read failed (err:%d)\n", ret); + + line->left_ssecs = line->left_msecs; + left_ppas = 0; + } + + left_ppas -= rq_ppas; + if (left_ppas > 0) + goto next_rq; + + return ret; +} + +static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, + struct pblk_recov_alloc p, int *done) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct nvm_rq *rqd; + struct bio *bio; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + u64 paddr; + int rq_ppas, rq_len; + int i, j; + int ret = 0; + int left_ppas = pblk_calc_sec_in_line(pblk, line); + DECLARE_COMPLETION_ONSTACK(wait); + + ppa_list = p.ppa_list; + meta_list = p.meta_list; + rqd = p.rqd; + data = p.data; + dma_ppa_list = p.dma_ppa_list; + dma_meta_list = p.dma_meta_list; + + *done = 1; + +next_rq: + memset(rqd, 0, pblk_r_rq_size); + + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + if (!rq_ppas) + rq_ppas = pblk->min_write_pgs; + rq_len = rq_ppas * geo->sec_size; + + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + rqd->bio = bio; + rqd->opcode = NVM_OP_PREAD; + rqd->flags = pblk_set_read_mode(pblk); + rqd->meta_list = meta_list; + rqd->nr_ppas = rq_ppas; + rqd->ppa_list = ppa_list; + rqd->dma_ppa_list = dma_ppa_list; + rqd->dma_meta_list = dma_meta_list; + rqd->end_io = pblk_end_io_sync; + rqd->private = &wait; + + for (i = 0; i < rqd->nr_ppas; ) { + struct ppa_addr ppa; + int pos; + + paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); + ppa = addr_to_gen_ppa(pblk, paddr, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + + while (test_bit(pos, line->blk_bitmap)) { + paddr += pblk->min_write_pgs; + ppa = addr_to_gen_ppa(pblk, paddr, line->id); + pos = pblk_dev_ppa_to_pos(geo, ppa); + } + + for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++) + rqd->ppa_list[i] = + addr_to_gen_ppa(pblk, paddr, line->id); + } + + ret = pblk_submit_io(pblk, rqd); + if (ret) { + pr_err("pblk: I/O submission failed: %d\n", ret); + bio_put(bio); + return ret; + } + + if (!wait_for_completion_io_timeout(&wait, + msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { + pr_err("pblk: L2P recovery read timed out\n"); + } + reinit_completion(&wait); + + /* Reached the end of the written line */ + if (rqd->error) { + int nr_error_bits, bit; + + bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); + nr_error_bits = rqd->nr_ppas - bit; + + /* Roll back failed sectors */ + line->cur_sec -= nr_error_bits; + line->left_msecs += nr_error_bits; + line->left_ssecs = line->left_msecs; + bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); + + left_ppas = 0; + rqd->nr_ppas = bit; + + if (rqd->error != NVM_RSP_ERR_EMPTYPAGE) + *done = 0; + } + + for (i = 0; i < rqd->nr_ppas; i++) { + u64 lba = le64_to_cpu(meta_list[i].lba); + + if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) + continue; + + pblk_update_map(pblk, lba, rqd->ppa_list[i]); + } + + left_ppas -= rq_ppas; + if (left_ppas > 0) + goto next_rq; + + return ret; +} + +/* Scan line for lbas on out of bound area */ +static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct nvm_rq *rqd; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + struct pblk_recov_alloc p; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + int done, ret = 0; + + rqd = pblk_alloc_rqd(pblk, READ); + if (IS_ERR(rqd)) + return PTR_ERR(rqd); + + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); + if (!meta_list) { + ret = -ENOMEM; + goto free_rqd; + } + + ppa_list = (void *)(meta_list) + pblk_dma_meta_size; + dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + + data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto free_meta_list; + } + + p.ppa_list = ppa_list; + p.meta_list = meta_list; + p.rqd = rqd; + p.data = data; + p.dma_ppa_list = dma_ppa_list; + p.dma_meta_list = dma_meta_list; + + ret = pblk_recov_scan_oob(pblk, line, p, &done); + if (ret) { + pr_err("pblk: could not recover L2P from OOB\n"); + goto out; + } + + if (!done) { + ret = pblk_recov_scan_all_oob(pblk, line, p); + if (ret) { + pr_err("pblk: could not recover L2P from OOB\n"); + goto out; + } + } + + if (pblk_line_is_full(line)) + pblk_line_recov_close(pblk, line); + +out: + kfree(data); +free_meta_list: + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); +free_rqd: + pblk_free_rqd(pblk, rqd, READ); + + return ret; +} + +/* Insert lines ordered by sequence number (seq_num) on list */ +static void pblk_recov_line_add_ordered(struct list_head *head, + struct pblk_line *line) +{ + struct pblk_line *t = NULL; + + list_for_each_entry(t, head, list) + if (t->seq_nr > line->seq_nr) + break; + + __list_add(&line->list, t->list.prev, &t->list); +} + +struct pblk_line *pblk_recov_l2p(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *line, *tline, *data_line = NULL; + struct line_smeta *smeta; + struct line_emeta *emeta; + int found_lines = 0, recovered_lines = 0, open_lines = 0; + int is_next = 0; + int meta_line; + int i, valid_uuid = 0; + LIST_HEAD(recov_list); + + /* TODO: Implement FTL snapshot */ + + /* Scan recovery - takes place when FTL snapshot fails */ + spin_lock(&l_mg->free_lock); + meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); + set_bit(meta_line, &l_mg->meta_bitmap); + smeta = l_mg->sline_meta[meta_line].meta; + emeta = l_mg->eline_meta[meta_line].meta; + spin_unlock(&l_mg->free_lock); + + /* Order data lines using their sequence number */ + for (i = 0; i < l_mg->nr_lines; i++) { + u32 crc; + + line = &pblk->lines[i]; + + memset(smeta, 0, lm->smeta_len); + line->smeta = smeta; + line->lun_bitmap = ((void *)(smeta)) + + sizeof(struct line_smeta); + + /* Lines that cannot be read are assumed as not written here */ + if (pblk_line_read_smeta(pblk, line)) + continue; + + crc = pblk_calc_smeta_crc(pblk, smeta); + if (le32_to_cpu(smeta->crc) != crc) + continue; + + if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC) + continue; + + if (le16_to_cpu(smeta->header.version) != 1) { + pr_err("pblk: found incompatible line version %u\n", + smeta->header.version); + return ERR_PTR(-EINVAL); + } + + /* The first valid instance uuid is used for initialization */ + if (!valid_uuid) { + memcpy(pblk->instance_uuid, smeta->header.uuid, 16); + valid_uuid = 1; + } + + if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) { + pr_debug("pblk: ignore line %u due to uuid mismatch\n", + i); + continue; + } + + /* Update line metadata */ + spin_lock(&line->lock); + line->id = le32_to_cpu(line->smeta->header.id); + line->type = le16_to_cpu(line->smeta->header.type); + line->seq_nr = le64_to_cpu(line->smeta->seq_nr); + spin_unlock(&line->lock); + + /* Update general metadata */ + spin_lock(&l_mg->free_lock); + if (line->seq_nr >= l_mg->d_seq_nr) + l_mg->d_seq_nr = line->seq_nr + 1; + l_mg->nr_free_lines--; + spin_unlock(&l_mg->free_lock); + + if (pblk_line_recov_alloc(pblk, line)) + goto out; + + pblk_recov_line_add_ordered(&recov_list, line); + found_lines++; + pr_debug("pblk: recovering data line %d, seq:%llu\n", + line->id, smeta->seq_nr); + } + + if (!found_lines) { + pblk_setup_uuid(pblk); + + spin_lock(&l_mg->free_lock); + WARN_ON_ONCE(!test_and_clear_bit(meta_line, + &l_mg->meta_bitmap)); + spin_unlock(&l_mg->free_lock); + + goto out; + } + + /* Verify closed blocks and recover this portion of L2P table*/ + list_for_each_entry_safe(line, tline, &recov_list, list) { + int off, nr_bb; + + recovered_lines++; + /* Calculate where emeta starts based on the line bb */ + off = lm->sec_per_line - lm->emeta_sec; + nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); + off -= nr_bb * geo->sec_per_pl; + + memset(emeta, 0, lm->emeta_len); + line->emeta = emeta; + line->emeta_ssec = off; + + if (pblk_line_read_emeta(pblk, line)) { + pblk_recov_l2p_from_oob(pblk, line); + goto next; + } + + if (pblk_recov_l2p_from_emeta(pblk, line)) + pblk_recov_l2p_from_oob(pblk, line); + +next: + if (pblk_line_is_full(line)) { + struct list_head *move_list; + + spin_lock(&line->lock); + line->state = PBLK_LINESTATE_CLOSED; + move_list = pblk_line_gc_list(pblk, line); + spin_unlock(&line->lock); + + spin_lock(&l_mg->gc_lock); + list_move_tail(&line->list, move_list); + spin_unlock(&l_mg->gc_lock); + + mempool_free(line->map_bitmap, pblk->line_meta_pool); + line->map_bitmap = NULL; + line->smeta = NULL; + line->emeta = NULL; + } else { + if (open_lines > 1) + pr_err("pblk: failed to recover L2P\n"); + + open_lines++; + line->meta_line = meta_line; + data_line = line; + } + } + + spin_lock(&l_mg->free_lock); + if (!open_lines) { + WARN_ON_ONCE(!test_and_clear_bit(meta_line, + &l_mg->meta_bitmap)); + pblk_line_replace_data(pblk); + } else { + /* Allocate next line for preparation */ + l_mg->data_next = pblk_line_get(pblk); + if (l_mg->data_next) { + l_mg->data_next->seq_nr = l_mg->d_seq_nr++; + l_mg->data_next->type = PBLK_LINETYPE_DATA; + is_next = 1; + } + } + spin_unlock(&l_mg->free_lock); + + if (is_next) { + pblk_line_erase(pblk, l_mg->data_next); + pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); + } + +out: + if (found_lines != recovered_lines) + pr_err("pblk: failed to recover all found lines %d/%d\n", + found_lines, recovered_lines); + + return data_line; +} + +/* + * Pad until smeta can be read on current data line + */ +void pblk_recov_pad(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line *line; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct nvm_rq *rqd; + struct pblk_recov_alloc p; + struct ppa_addr *ppa_list; + struct pblk_sec_meta *meta_list; + void *data; + dma_addr_t dma_ppa_list, dma_meta_list; + + spin_lock(&l_mg->free_lock); + line = l_mg->data_line; + spin_unlock(&l_mg->free_lock); + + rqd = pblk_alloc_rqd(pblk, READ); + if (IS_ERR(rqd)) + return; + + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); + if (!meta_list) + goto free_rqd; + + ppa_list = (void *)(meta_list) + pblk_dma_meta_size; + dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + + data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL); + if (!data) + goto free_meta_list; + + p.ppa_list = ppa_list; + p.meta_list = meta_list; + p.rqd = rqd; + p.data = data; + p.dma_ppa_list = dma_ppa_list; + p.dma_meta_list = dma_meta_list; + + if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) { + pr_err("pblk: Tear down padding failed\n"); + goto free_data; + } + + pblk_line_close(pblk, line); + +free_data: + kfree(data); +free_meta_list: + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); +free_rqd: + pblk_free_rqd(pblk, rqd, READ); +} diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c new file mode 100644 index 000000000000..ab7cbb144f3f --- /dev/null +++ b/drivers/lightnvm/pblk-rl.c @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-rl.c - pblk's rate limiter for user I/O + * + */ + +#include "pblk.h" + +static void pblk_rl_kick_u_timer(struct pblk_rl *rl) +{ + mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000)); +} + +int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries) +{ + int rb_user_cnt = atomic_read(&rl->rb_user_cnt); + + return (!(rb_user_cnt + nr_entries > rl->rb_user_max)); +} + +int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries) +{ + int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt); + int rb_user_active; + + /* If there is no user I/O let GC take over space on the write buffer */ + rb_user_active = READ_ONCE(rl->rb_user_active); + return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active)); +} + +void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries) +{ + atomic_add(nr_entries, &rl->rb_user_cnt); + + /* Release user I/O state. Protect from GC */ + smp_store_release(&rl->rb_user_active, 1); + pblk_rl_kick_u_timer(rl); +} + +void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries) +{ + atomic_add(nr_entries, &rl->rb_gc_cnt); +} + +void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc) +{ + atomic_sub(nr_user, &rl->rb_user_cnt); + atomic_sub(nr_gc, &rl->rb_gc_cnt); +} + +unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl) +{ + return atomic_read(&rl->free_blocks); +} + +/* + * We check for (i) the number of free blocks in the current LUN and (ii) the + * total number of free blocks in the pblk instance. This is to even out the + * number of free blocks on each LUN when GC kicks in. + * + * Only the total number of free blocks is used to configure the rate limiter. + */ +static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max) +{ + unsigned long free_blocks = pblk_rl_nr_free_blks(rl); + + if (free_blocks >= rl->high) { + rl->rb_user_max = max - rl->rb_gc_rsv; + rl->rb_gc_max = rl->rb_gc_rsv; + rl->rb_state = PBLK_RL_HIGH; + } else if (free_blocks < rl->high) { + int shift = rl->high_pw - rl->rb_windows_pw; + int user_windows = free_blocks >> shift; + int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW; + int gc_max; + + rl->rb_user_max = user_max; + gc_max = max - rl->rb_user_max; + rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv); + + if (free_blocks > rl->low) + rl->rb_state = PBLK_RL_MID; + else + rl->rb_state = PBLK_RL_LOW; + } + + return rl->rb_state; +} + +void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv) +{ + rl->rb_gc_rsv = rl->rb_gc_max = rsv; +} + +void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) +{ + struct pblk *pblk = container_of(rl, struct pblk, rl); + int blk_in_line = atomic_read(&line->blk_in_line); + int ret; + + atomic_add(blk_in_line, &rl->free_blocks); + /* Rates will not change that often - no need to lock update */ + ret = pblk_rl_update_rates(rl, rl->rb_budget); + + if (ret == (PBLK_RL_MID | PBLK_RL_LOW)) + pblk_gc_should_start(pblk); + else + pblk_gc_should_stop(pblk); +} + +void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) +{ + struct pblk *pblk = container_of(rl, struct pblk, rl); + int blk_in_line = atomic_read(&line->blk_in_line); + int ret; + + atomic_sub(blk_in_line, &rl->free_blocks); + + /* Rates will not change that often - no need to lock update */ + ret = pblk_rl_update_rates(rl, rl->rb_budget); + if (ret == (PBLK_RL_MID | PBLK_RL_LOW)) + pblk_gc_should_start(pblk); + else + pblk_gc_should_stop(pblk); +} + +int pblk_rl_gc_thrs(struct pblk_rl *rl) +{ + return rl->high; +} + +int pblk_rl_sysfs_rate_show(struct pblk_rl *rl) +{ + return rl->rb_user_max; +} + +static void pblk_rl_u_timer(unsigned long data) +{ + struct pblk_rl *rl = (struct pblk_rl *)data; + + /* Release user I/O state. Protect from GC */ + smp_store_release(&rl->rb_user_active, 0); +} + +void pblk_rl_free(struct pblk_rl *rl) +{ + del_timer(&rl->u_timer); +} + +void pblk_rl_init(struct pblk_rl *rl, int budget) +{ + unsigned int rb_windows; + + rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS; + rl->low = rl->total_blocks / PBLK_USER_LOW_THRS; + rl->high_pw = get_count_order(rl->high); + + /* This will always be a power-of-2 */ + rb_windows = budget / PBLK_MAX_REQ_ADDRS; + rl->rb_windows_pw = get_count_order(rb_windows) + 1; + + /* To start with, all buffer is available to user I/O writers */ + rl->rb_budget = budget; + rl->rb_user_max = budget; + atomic_set(&rl->rb_user_cnt, 0); + rl->rb_gc_max = 0; + rl->rb_state = PBLK_RL_HIGH; + atomic_set(&rl->rb_gc_cnt, 0); + + setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl); + rl->rb_user_active = 0; +} diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c new file mode 100644 index 000000000000..f0af1d1ceeff --- /dev/null +++ b/drivers/lightnvm/pblk-sysfs.c @@ -0,0 +1,507 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Implementation of a physical block-device target for Open-channel SSDs. + * + * pblk-sysfs.c - pblk's sysfs + * + */ + +#include "pblk.h" + +static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_lun *rlun; + ssize_t sz = 0; + int i; + + for (i = 0; i < geo->nr_luns; i++) { + int active = 1; + + rlun = &pblk->luns[i]; + if (!down_trylock(&rlun->wr_sem)) { + active = 0; + up(&rlun->wr_sem); + } + sz += snprintf(page + sz, PAGE_SIZE - sz, + "pblk: pos:%d, ch:%d, lun:%d - %d\n", + i, + rlun->bppa.g.ch, + rlun->bppa.g.lun, + active); + } + + return sz; +} + +static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int free_blocks, total_blocks; + int rb_user_max, rb_user_cnt; + int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state; + + free_blocks = atomic_read(&pblk->rl.free_blocks); + rb_user_max = pblk->rl.rb_user_max; + rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt); + rb_gc_max = pblk->rl.rb_gc_max; + rb_gc_rsv = pblk->rl.rb_gc_rsv; + rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt); + rb_budget = pblk->rl.rb_budget; + rb_state = pblk->rl.rb_state; + + total_blocks = geo->blks_per_lun * geo->nr_luns; + + return snprintf(page, PAGE_SIZE, + "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n", + rb_user_cnt, + rb_user_max, + rb_gc_cnt, + rb_gc_max, + rb_gc_rsv, + rb_state, + rb_budget, + pblk->rl.low, + pblk->rl.high, + free_blocks, + total_blocks, + READ_ONCE(pblk->rl.rb_user_active)); +} + +static ssize_t pblk_sysfs_gc_state_show(struct pblk *pblk, char *page) +{ + int gc_enabled, gc_active; + + pblk_gc_sysfs_state_show(pblk, &gc_enabled, &gc_active); + return snprintf(page, PAGE_SIZE, "gc_enabled=%d, gc_active=%d\n", + gc_enabled, gc_active); +} + +static ssize_t pblk_sysfs_stats(struct pblk *pblk, char *page) +{ + ssize_t sz; + + sz = snprintf(page, PAGE_SIZE, + "read_failed=%lu, read_high_ecc=%lu, read_empty=%lu, read_failed_gc=%lu, write_failed=%lu, erase_failed=%lu\n", + atomic_long_read(&pblk->read_failed), + atomic_long_read(&pblk->read_high_ecc), + atomic_long_read(&pblk->read_empty), + atomic_long_read(&pblk->read_failed_gc), + atomic_long_read(&pblk->write_failed), + atomic_long_read(&pblk->erase_failed)); + + return sz; +} + +static ssize_t pblk_sysfs_write_buffer(struct pblk *pblk, char *page) +{ + return pblk_rb_sysfs(&pblk->rwb, page); +} + +static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + ssize_t sz = 0; + + sz = snprintf(page, PAGE_SIZE - sz, + "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", + pblk->ppaf_bitsize, + pblk->ppaf.blk_offset, geo->ppaf.blk_len, + pblk->ppaf.pg_offset, geo->ppaf.pg_len, + pblk->ppaf.lun_offset, geo->ppaf.lun_len, + pblk->ppaf.ch_offset, geo->ppaf.ch_len, + pblk->ppaf.pln_offset, geo->ppaf.pln_len, + pblk->ppaf.sec_offset, geo->ppaf.sect_len); + + sz += snprintf(page + sz, PAGE_SIZE - sz, + "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", + geo->ppaf.blk_offset, geo->ppaf.blk_len, + geo->ppaf.pg_offset, geo->ppaf.pg_len, + geo->ppaf.lun_offset, geo->ppaf.lun_len, + geo->ppaf.ch_offset, geo->ppaf.ch_len, + geo->ppaf.pln_offset, geo->ppaf.pln_len, + geo->ppaf.sect_offset, geo->ppaf.sect_len); + + return sz; +} + +static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *line; + ssize_t sz = 0; + int nr_free_lines; + int cur_data, cur_log; + int free_line_cnt = 0, closed_line_cnt = 0; + int d_line_cnt = 0, l_line_cnt = 0; + int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0; + int free = 0, bad = 0, cor = 0; + int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0; + int map_weight = 0, meta_weight = 0; + + spin_lock(&l_mg->free_lock); + cur_data = (l_mg->data_line) ? l_mg->data_line->id : -1; + cur_log = (l_mg->log_line) ? l_mg->log_line->id : -1; + nr_free_lines = l_mg->nr_free_lines; + + list_for_each_entry(line, &l_mg->free_list, list) + free_line_cnt++; + spin_unlock(&l_mg->free_lock); + + spin_lock(&l_mg->gc_lock); + list_for_each_entry(line, &l_mg->gc_full_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_full++; + } + + list_for_each_entry(line, &l_mg->gc_high_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_high++; + } + + list_for_each_entry(line, &l_mg->gc_mid_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_mid++; + } + + list_for_each_entry(line, &l_mg->gc_low_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_low++; + } + + list_for_each_entry(line, &l_mg->gc_empty_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_empty++; + } + + list_for_each_entry(line, &l_mg->free_list, list) + free++; + list_for_each_entry(line, &l_mg->bad_list, list) + bad++; + list_for_each_entry(line, &l_mg->corrupt_list, list) + cor++; + spin_unlock(&l_mg->gc_lock); + + spin_lock(&l_mg->free_lock); + if (l_mg->data_line) { + cur_sec = l_mg->data_line->cur_sec; + msecs = l_mg->data_line->left_msecs; + ssecs = l_mg->data_line->left_ssecs; + vsc = l_mg->data_line->vsc; + sec_in_line = l_mg->data_line->sec_in_line; + meta_weight = bitmap_weight(&l_mg->meta_bitmap, + PBLK_DATA_LINES); + map_weight = bitmap_weight(l_mg->data_line->map_bitmap, + lm->sec_per_line); + } + spin_unlock(&l_mg->free_lock); + + if (nr_free_lines != free_line_cnt) + pr_err("pblk: corrupted free line list\n"); + + sz = snprintf(page, PAGE_SIZE - sz, + "line: nluns:%d, nblks:%d, nsecs:%d\n", + geo->nr_luns, lm->blk_per_line, lm->sec_per_line); + + sz += snprintf(page + sz, PAGE_SIZE - sz, + "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n", + cur_data, cur_log, + free, nr_free_lines, bad, cor, + closed_line_cnt, + d_line_cnt, l_line_cnt, + l_mg->nr_lines); + + sz += snprintf(page + sz, PAGE_SIZE - sz, + "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n", + gc_full, gc_high, gc_mid, gc_low, gc_empty, + atomic_read(&pblk->gc.inflight_gc)); + + sz += snprintf(page + sz, PAGE_SIZE - sz, + "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n", + cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line, + map_weight, lm->sec_per_line, meta_weight); + + return sz; +} + +static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + ssize_t sz = 0; + + sz = snprintf(page, PAGE_SIZE - sz, + "smeta - len:%d, secs:%d\n", + lm->smeta_len, lm->smeta_sec); + sz += snprintf(page + sz, PAGE_SIZE - sz, + "emeta - len:%d, sec:%d, bb_start:%d\n", + lm->emeta_len, lm->emeta_sec, + lm->emeta_bb); + sz += snprintf(page + sz, PAGE_SIZE - sz, + "bitmap lengths: sec:%d, blk:%d, lun:%d\n", + lm->sec_bitmap_len, + lm->blk_bitmap_len, + lm->lun_bitmap_len); + sz += snprintf(page + sz, PAGE_SIZE - sz, + "blk_line:%d, sec_line:%d, sec_blk:%d\n", + lm->blk_per_line, + lm->sec_per_line, + geo->sec_per_blk); + + return sz; +} + +#ifdef CONFIG_NVM_DEBUG +static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page) +{ + return snprintf(page, PAGE_SIZE, + "%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n", + atomic_long_read(&pblk->inflight_writes), + atomic_long_read(&pblk->inflight_reads), + atomic_long_read(&pblk->req_writes), + atomic_long_read(&pblk->nr_flush), + atomic_long_read(&pblk->padded_writes), + atomic_long_read(&pblk->padded_wb), + atomic_long_read(&pblk->sub_writes), + atomic_long_read(&pblk->sync_writes), + atomic_long_read(&pblk->compl_writes), + atomic_long_read(&pblk->recov_writes), + atomic_long_read(&pblk->recov_gc_writes), + atomic_long_read(&pblk->recov_gc_reads), + atomic_long_read(&pblk->sync_reads)); +} +#endif + +static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page, + size_t len) +{ + struct pblk_gc *gc = &pblk->gc; + size_t c_len; + int value; + + c_len = strcspn(page, "\n"); + if (c_len >= len) + return -EINVAL; + + if (kstrtouint(page, 0, &value)) + return -EINVAL; + + spin_lock(&gc->lock); + pblk_rl_set_gc_rsc(&pblk->rl, value); + spin_unlock(&gc->lock); + + return len; +} + +static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page, + size_t len) +{ + size_t c_len; + int force; + + c_len = strcspn(page, "\n"); + if (c_len >= len) + return -EINVAL; + + if (kstrtouint(page, 0, &force)) + return -EINVAL; + + if (force < 0 || force > 1) + return -EINVAL; + + pblk_gc_sysfs_force(pblk, force); + + return len; +} + +static struct attribute sys_write_luns = { + .name = "write_luns", + .mode = 0444, +}; + +static struct attribute sys_rate_limiter_attr = { + .name = "rate_limiter", + .mode = 0444, +}; + +static struct attribute sys_gc_state = { + .name = "gc_state", + .mode = 0444, +}; + +static struct attribute sys_errors_attr = { + .name = "errors", + .mode = 0444, +}; + +static struct attribute sys_rb_attr = { + .name = "write_buffer", + .mode = 0444, +}; + +static struct attribute sys_stats_ppaf_attr = { + .name = "ppa_format", + .mode = 0444, +}; + +static struct attribute sys_lines_attr = { + .name = "lines", + .mode = 0444, +}; + +static struct attribute sys_lines_info_attr = { + .name = "lines_info", + .mode = 0444, +}; + +static struct attribute sys_gc_force = { + .name = "gc_force", + .mode = 0200, +}; + +static struct attribute sys_gc_rl_max = { + .name = "gc_rl_max", + .mode = 0200, +}; + +#ifdef CONFIG_NVM_DEBUG +static struct attribute sys_stats_debug_attr = { + .name = "stats", + .mode = 0444, +}; +#endif + +static struct attribute *pblk_attrs[] = { + &sys_write_luns, + &sys_rate_limiter_attr, + &sys_errors_attr, + &sys_gc_state, + &sys_gc_force, + &sys_gc_rl_max, + &sys_rb_attr, + &sys_stats_ppaf_attr, + &sys_lines_attr, + &sys_lines_info_attr, +#ifdef CONFIG_NVM_DEBUG + &sys_stats_debug_attr, +#endif + NULL, +}; + +static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct pblk *pblk = container_of(kobj, struct pblk, kobj); + + if (strcmp(attr->name, "rate_limiter") == 0) + return pblk_sysfs_rate_limiter(pblk, buf); + else if (strcmp(attr->name, "write_luns") == 0) + return pblk_sysfs_luns_show(pblk, buf); + else if (strcmp(attr->name, "gc_state") == 0) + return pblk_sysfs_gc_state_show(pblk, buf); + else if (strcmp(attr->name, "errors") == 0) + return pblk_sysfs_stats(pblk, buf); + else if (strcmp(attr->name, "write_buffer") == 0) + return pblk_sysfs_write_buffer(pblk, buf); + else if (strcmp(attr->name, "ppa_format") == 0) + return pblk_sysfs_ppaf(pblk, buf); + else if (strcmp(attr->name, "lines") == 0) + return pblk_sysfs_lines(pblk, buf); + else if (strcmp(attr->name, "lines_info") == 0) + return pblk_sysfs_lines_info(pblk, buf); +#ifdef CONFIG_NVM_DEBUG + else if (strcmp(attr->name, "stats") == 0) + return pblk_sysfs_stats_debug(pblk, buf); +#endif + return 0; +} + +static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct pblk *pblk = container_of(kobj, struct pblk, kobj); + + if (strcmp(attr->name, "gc_rl_max") == 0) + return pblk_sysfs_rate_store(pblk, buf, len); + else if (strcmp(attr->name, "gc_force") == 0) + return pblk_sysfs_gc_force(pblk, buf, len); + + return 0; +} + +static const struct sysfs_ops pblk_sysfs_ops = { + .show = pblk_sysfs_show, + .store = pblk_sysfs_store, +}; + +static struct kobj_type pblk_ktype = { + .sysfs_ops = &pblk_sysfs_ops, + .default_attrs = pblk_attrs, +}; + +int pblk_sysfs_init(struct gendisk *tdisk) +{ + struct pblk *pblk = tdisk->private_data; + struct device *parent_dev = disk_to_dev(pblk->disk); + int ret; + + ret = kobject_init_and_add(&pblk->kobj, &pblk_ktype, + kobject_get(&parent_dev->kobj), + "%s", "pblk"); + if (ret) { + pr_err("pblk: could not register %s/pblk\n", + tdisk->disk_name); + return ret; + } + + kobject_uevent(&pblk->kobj, KOBJ_ADD); + return 0; +} + +void pblk_sysfs_exit(struct gendisk *tdisk) +{ + struct pblk *pblk = tdisk->private_data; + + kobject_uevent(&pblk->kobj, KOBJ_REMOVE); + kobject_del(&pblk->kobj); + kobject_put(&pblk->kobj); +} diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c new file mode 100644 index 000000000000..aef6fd7c4a0c --- /dev/null +++ b/drivers/lightnvm/pblk-write.c @@ -0,0 +1,414 @@ +/* + * Copyright (C) 2016 CNEX Labs + * Initial release: Javier Gonzalez <javier@cnexlabs.com> + * Matias Bjorling <matias@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * pblk-write.c - pblk's write path from write buffer to media + */ + +#include "pblk.h" + +static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line) +{ +#ifdef CONFIG_NVM_DEBUG + atomic_long_inc(&pblk->sync_writes); +#endif + + /* Counter protected by rb sync lock */ + line->left_ssecs--; + if (!line->left_ssecs) + pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws); +} + +static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct bio *original_bio; + unsigned long ret; + int i; + + for (i = 0; i < c_ctx->nr_valid; i++) { + struct pblk_w_ctx *w_ctx; + struct ppa_addr p; + struct pblk_line *line; + + w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i); + + p = rqd->ppa_list[i]; + line = &pblk->lines[pblk_dev_ppa_to_line(p)]; + pblk_sync_line(pblk, line); + + while ((original_bio = bio_list_pop(&w_ctx->bios))) + bio_endio(original_bio); + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes); +#endif + + ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid); + + if (rqd->meta_list) + nvm_dev_dma_free(dev->parent, rqd->meta_list, + rqd->dma_meta_list); + + bio_put(rqd->bio); + pblk_free_rqd(pblk, rqd, WRITE); + + return ret; +} + +static unsigned long pblk_end_queued_w_bio(struct pblk *pblk, + struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx) +{ + list_del(&c_ctx->list); + return pblk_end_w_bio(pblk, rqd, c_ctx); +} + +static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx) +{ + struct pblk_c_ctx *c, *r; + unsigned long flags; + unsigned long pos; + +#ifdef CONFIG_NVM_DEBUG + atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes); +#endif + + pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap); + + pos = pblk_rb_sync_init(&pblk->rwb, &flags); + if (pos == c_ctx->sentry) { + pos = pblk_end_w_bio(pblk, rqd, c_ctx); + +retry: + list_for_each_entry_safe(c, r, &pblk->compl_list, list) { + rqd = nvm_rq_from_c_ctx(c); + if (c->sentry == pos) { + pos = pblk_end_queued_w_bio(pblk, rqd, c); + goto retry; + } + } + } else { + WARN_ON(nvm_rq_from_c_ctx(c_ctx) != rqd); + list_add_tail(&c_ctx->list, &pblk->compl_list); + } + pblk_rb_sync_end(&pblk->rwb, &flags); +} + +/* When a write fails, we are not sure whether the block has grown bad or a page + * range is more susceptible to write errors. If a high number of pages fail, we + * assume that the block is bad and we mark it accordingly. In all cases, we + * remap and resubmit the failed entries as fast as possible; if a flush is + * waiting on a completion, the whole stack would stall otherwise. + */ +static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd) +{ + void *comp_bits = &rqd->ppa_status; + struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); + struct pblk_rec_ctx *recovery; + struct ppa_addr *ppa_list = rqd->ppa_list; + int nr_ppas = rqd->nr_ppas; + unsigned int c_entries; + int bit, ret; + + if (unlikely(nr_ppas == 1)) + ppa_list = &rqd->ppa_addr; + + recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC); + if (!recovery) { + pr_err("pblk: could not allocate recovery context\n"); + return; + } + INIT_LIST_HEAD(&recovery->failed); + + bit = -1; + while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) { + struct pblk_rb_entry *entry; + struct ppa_addr ppa; + + /* Logic error */ + if (bit > c_ctx->nr_valid) { + WARN_ONCE(1, "pblk: corrupted write request\n"); + mempool_free(recovery, pblk->rec_pool); + goto out; + } + + ppa = ppa_list[bit]; + entry = pblk_rb_sync_scan_entry(&pblk->rwb, &ppa); + if (!entry) { + pr_err("pblk: could not scan entry on write failure\n"); + mempool_free(recovery, pblk->rec_pool); + goto out; + } + + /* The list is filled first and emptied afterwards. No need for + * protecting it with a lock + */ + list_add_tail(&entry->index, &recovery->failed); + } + + c_entries = find_first_bit(comp_bits, nr_ppas); + ret = pblk_recov_setup_rq(pblk, c_ctx, recovery, comp_bits, c_entries); + if (ret) { + pr_err("pblk: could not recover from write failure\n"); + mempool_free(recovery, pblk->rec_pool); + goto out; + } + + INIT_WORK(&recovery->ws_rec, pblk_submit_rec); + queue_work(pblk->kw_wq, &recovery->ws_rec); + +out: + pblk_complete_write(pblk, rqd, c_ctx); +} + +static void pblk_end_io_write(struct nvm_rq *rqd) +{ + struct pblk *pblk = rqd->private; + struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); + + if (rqd->error) { + pblk_log_write_err(pblk, rqd); + return pblk_end_w_fail(pblk, rqd); + } +#ifdef CONFIG_NVM_DEBUG + else + WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n"); +#endif + + pblk_complete_write(pblk, rqd, c_ctx); +} + +static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, + unsigned int nr_secs) +{ + struct nvm_tgt_dev *dev = pblk->dev; + + /* Setup write request */ + rqd->opcode = NVM_OP_PWRITE; + rqd->nr_ppas = nr_secs; + rqd->flags = pblk_set_progr_mode(pblk, WRITE); + rqd->private = pblk; + rqd->end_io = pblk_end_io_write; + + rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &rqd->dma_meta_list); + if (!rqd->meta_list) + return -ENOMEM; + + if (unlikely(nr_secs == 1)) + return 0; + + rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; + rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; + + return 0; +} + +static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line *e_line = pblk_line_get_data_next(pblk); + struct ppa_addr erase_ppa; + unsigned int valid = c_ctx->nr_valid; + unsigned int padded = c_ctx->nr_padded; + unsigned int nr_secs = valid + padded; + unsigned long *lun_bitmap; + int ret = 0; + + lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); + if (!lun_bitmap) { + ret = -ENOMEM; + goto out; + } + c_ctx->lun_bitmap = lun_bitmap; + + ret = pblk_alloc_w_rq(pblk, rqd, nr_secs); + if (ret) { + kfree(lun_bitmap); + goto out; + } + + ppa_set_empty(&erase_ppa); + if (likely(!e_line || !atomic_read(&e_line->left_eblks))) + pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0); + else + pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, + valid, &erase_ppa); + +out: + if (unlikely(e_line && !ppa_empty(erase_ppa))) { + if (pblk_blk_erase_async(pblk, erase_ppa)) { + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int bit; + + atomic_inc(&e_line->left_eblks); + bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch; + WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap)); + up(&pblk->erase_sem); + } + } + + return ret; +} + +int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx) +{ + struct pblk_line_meta *lm = &pblk->lm; + unsigned long *lun_bitmap; + int ret; + + lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); + if (!lun_bitmap) + return -ENOMEM; + + c_ctx->lun_bitmap = lun_bitmap; + + ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas); + if (ret) + return ret; + + pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0); + + rqd->ppa_status = (u64)0; + rqd->flags = pblk_set_progr_mode(pblk, WRITE); + + return ret; +} + +static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, + unsigned int secs_to_flush) +{ + int secs_to_sync; + + secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush); + +#ifdef CONFIG_NVM_DEBUG + if ((!secs_to_sync && secs_to_flush) + || (secs_to_sync < 0) + || (secs_to_sync > secs_avail && !secs_to_flush)) { + pr_err("pblk: bad sector calculation (a:%d,s:%d,f:%d)\n", + secs_avail, secs_to_sync, secs_to_flush); + } +#endif + + return secs_to_sync; +} + +static int pblk_submit_write(struct pblk *pblk) +{ + struct bio *bio; + struct nvm_rq *rqd; + struct pblk_c_ctx *c_ctx; + unsigned int pgs_read; + unsigned int secs_avail, secs_to_sync, secs_to_com; + unsigned int secs_to_flush; + unsigned long pos; + int err; + + /* If there are no sectors in the cache, flushes (bios without data) + * will be cleared on the cache threads + */ + secs_avail = pblk_rb_read_count(&pblk->rwb); + if (!secs_avail) + return 1; + + secs_to_flush = pblk_rb_sync_point_count(&pblk->rwb); + if (!secs_to_flush && secs_avail < pblk->min_write_pgs) + return 1; + + rqd = pblk_alloc_rqd(pblk, WRITE); + if (IS_ERR(rqd)) { + pr_err("pblk: cannot allocate write req.\n"); + return 1; + } + c_ctx = nvm_rq_to_pdu(rqd); + + bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs); + if (!bio) { + pr_err("pblk: cannot allocate write bio\n"); + goto fail_free_rqd; + } + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + rqd->bio = bio; + + secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush); + if (secs_to_sync > pblk->max_write_pgs) { + pr_err("pblk: bad buffer sync calculation\n"); + goto fail_put_bio; + } + + secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync; + pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); + + pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos, + secs_to_sync, secs_avail); + if (!pgs_read) { + pr_err("pblk: corrupted write bio\n"); + goto fail_put_bio; + } + + if (c_ctx->nr_padded) + if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded)) + goto fail_put_bio; + + /* Assign lbas to ppas and populate request structure */ + err = pblk_setup_w_rq(pblk, rqd, c_ctx); + if (err) { + pr_err("pblk: could not setup write request\n"); + goto fail_free_bio; + } + + err = pblk_submit_io(pblk, rqd); + if (err) { + pr_err("pblk: I/O submission failed: %d\n", err); + goto fail_free_bio; + } + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(secs_to_sync, &pblk->sub_writes); +#endif + + return 0; + +fail_free_bio: + if (c_ctx->nr_padded) + pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded); +fail_put_bio: + bio_put(bio); +fail_free_rqd: + pblk_free_rqd(pblk, rqd, WRITE); + + return 1; +} + +int pblk_write_ts(void *data) +{ + struct pblk *pblk = data; + + while (!kthread_should_stop()) { + if (!pblk_submit_write(pblk)) + continue; + set_current_state(TASK_INTERRUPTIBLE); + io_schedule(); + } + + return 0; +} diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h new file mode 100644 index 000000000000..99f3186b5288 --- /dev/null +++ b/drivers/lightnvm/pblk.h @@ -0,0 +1,1121 @@ +/* + * Copyright (C) 2015 IT University of Copenhagen (rrpc.h) + * Copyright (C) 2016 CNEX Labs + * Initial release: Matias Bjorling <matias@cnexlabs.com> + * Write buffering: Javier Gonzalez <javier@cnexlabs.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Implementation of a Physical Block-device target for Open-channel SSDs. + * + */ + +#ifndef PBLK_H_ +#define PBLK_H_ + +#include <linux/blkdev.h> +#include <linux/blk-mq.h> +#include <linux/bio.h> +#include <linux/module.h> +#include <linux/kthread.h> +#include <linux/vmalloc.h> +#include <linux/crc32.h> +#include <linux/uuid.h> + +#include <linux/lightnvm.h> + +/* Run only GC if less than 1/X blocks are free */ +#define GC_LIMIT_INVERSE 5 +#define GC_TIME_MSECS 1000 + +#define PBLK_SECTOR (512) +#define PBLK_EXPOSED_PAGE_SIZE (4096) +#define PBLK_MAX_REQ_ADDRS (64) +#define PBLK_MAX_REQ_ADDRS_PW (6) + +#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) + +#define PBLK_COMMAND_TIMEOUT_MS 30000 + +/* Max 512 LUNs per device */ +#define PBLK_MAX_LUNS_BITMAP (4) + +#define NR_PHY_IN_LOG (PBLK_EXPOSED_PAGE_SIZE / PBLK_SECTOR) + +#define pblk_for_each_lun(pblk, rlun, i) \ + for ((i) = 0, rlun = &(pblk)->luns[0]; \ + (i) < (pblk)->nr_luns; (i)++, rlun = &(pblk)->luns[(i)]) + +#define ERASE 2 /* READ = 0, WRITE = 1 */ + +enum { + /* IO Types */ + PBLK_IOTYPE_USER = 1 << 0, + PBLK_IOTYPE_GC = 1 << 1, + + /* Write buffer flags */ + PBLK_FLUSH_ENTRY = 1 << 2, + PBLK_WRITTEN_DATA = 1 << 3, + PBLK_SUBMITTED_ENTRY = 1 << 4, + PBLK_WRITABLE_ENTRY = 1 << 5, +}; + +enum { + PBLK_BLK_ST_OPEN = 0x1, + PBLK_BLK_ST_CLOSED = 0x2, +}; + +/* The number of GC lists and the rate-limiter states go together. This way the + * rate-limiter can dictate how much GC is needed based on resource utilization. + */ +#define PBLK_NR_GC_LISTS 3 +#define PBLK_MAX_GC_JOBS 32 + +enum { + PBLK_RL_HIGH = 1, + PBLK_RL_MID = 2, + PBLK_RL_LOW = 3, +}; + +struct pblk_sec_meta { + u64 reserved; + __le64 lba; +}; + +#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) + +/* write completion context */ +struct pblk_c_ctx { + struct list_head list; /* Head for out-of-order completion */ + + unsigned long *lun_bitmap; /* Luns used on current request */ + unsigned int sentry; + unsigned int nr_valid; + unsigned int nr_padded; +}; + +/* Read context */ +struct pblk_r_ctx { + struct bio *orig_bio; +}; + +/* Recovery context */ +struct pblk_rec_ctx { + struct pblk *pblk; + struct nvm_rq *rqd; + struct list_head failed; + struct work_struct ws_rec; +}; + +/* Write context */ +struct pblk_w_ctx { + struct bio_list bios; /* Original bios - used for completion + * in REQ_FUA, REQ_FLUSH case + */ + u64 lba; /* Logic addr. associated with entry */ + struct ppa_addr ppa; /* Physic addr. associated with entry */ + int flags; /* Write context flags */ +}; + +struct pblk_rb_entry { + struct ppa_addr cacheline; /* Cacheline for this entry */ + void *data; /* Pointer to data on this entry */ + struct pblk_w_ctx w_ctx; /* Context for this entry */ + struct list_head index; /* List head to enable indexes */ +}; + +#define EMPTY_ENTRY (~0U) + +struct pblk_rb_pages { + struct page *pages; + int order; + struct list_head list; +}; + +struct pblk_rb { + struct pblk_rb_entry *entries; /* Ring buffer entries */ + unsigned int mem; /* Write offset - points to next + * writable entry in memory + */ + unsigned int subm; /* Read offset - points to last entry + * that has been submitted to the media + * to be persisted + */ + unsigned int sync; /* Synced - backpointer that signals + * the last submitted entry that has + * been successfully persisted to media + */ + unsigned int sync_point; /* Sync point - last entry that must be + * flushed to the media. Used with + * REQ_FLUSH and REQ_FUA + */ + unsigned int l2p_update; /* l2p update point - next entry for + * which l2p mapping will be updated to + * contain a device ppa address (instead + * of a cacheline + */ + unsigned int nr_entries; /* Number of entries in write buffer - + * must be a power of two + */ + unsigned int seg_size; /* Size of the data segments being + * stored on each entry. Typically this + * will be 4KB + */ + + struct list_head pages; /* List of data pages */ + + spinlock_t w_lock; /* Write lock */ + spinlock_t s_lock; /* Sync lock */ + +#ifdef CONFIG_NVM_DEBUG + atomic_t inflight_sync_point; /* Not served REQ_FLUSH | REQ_FUA */ +#endif +}; + +#define PBLK_RECOVERY_SECTORS 16 + +struct pblk_lun { + struct ppa_addr bppa; + + u8 *bb_list; /* Bad block list for LUN. Only used on + * bring up. Bad blocks are managed + * within lines on run-time. + */ + + struct semaphore wr_sem; +}; + +struct pblk_gc_rq { + struct pblk_line *line; + void *data; + u64 *lba_list; + int nr_secs; + int secs_to_gc; + struct list_head list; +}; + +struct pblk_gc { + int gc_active; + int gc_enabled; + int gc_forced; + int gc_jobs_active; + atomic_t inflight_gc; + + struct task_struct *gc_ts; + struct task_struct *gc_writer_ts; + struct workqueue_struct *gc_reader_wq; + struct timer_list gc_timer; + + int w_entries; + struct list_head w_list; + + spinlock_t lock; + spinlock_t w_lock; +}; + +struct pblk_rl { + unsigned int high; /* Upper threshold for rate limiter (free run - + * user I/O rate limiter + */ + unsigned int low; /* Lower threshold for rate limiter (user I/O + * rate limiter - stall) + */ + unsigned int high_pw; /* High rounded up as a power of 2 */ + +#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent + * available blks + */ +#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */ + + int rb_windows_pw; /* Number of rate windows in the write buffer + * given as a power-of-2. This guarantees that + * when user I/O is being rate limited, there + * will be reserved enough space for the GC to + * place its payload. A window is of + * pblk->max_write_pgs size, which in NVMe is + * 64, i.e., 256kb. + */ + int rb_budget; /* Total number of entries available for I/O */ + int rb_user_max; /* Max buffer entries available for user I/O */ + atomic_t rb_user_cnt; /* User I/O buffer counter */ + int rb_gc_max; /* Max buffer entries available for GC I/O */ + int rb_gc_rsv; /* Reserved buffer entries for GC I/O */ + int rb_state; /* Rate-limiter current state */ + atomic_t rb_gc_cnt; /* GC I/O buffer counter */ + + int rb_user_active; + struct timer_list u_timer; + + unsigned long long nr_secs; + unsigned long total_blocks; + atomic_t free_blocks; +}; + +#define PBLK_LINE_NR_LUN_BITMAP 2 +#define PBLK_LINE_NR_SEC_BITMAP 2 +#define PBLK_LINE_EMPTY (~0U) + +enum { + /* Line Types */ + PBLK_LINETYPE_FREE = 0, + PBLK_LINETYPE_LOG = 1, + PBLK_LINETYPE_DATA = 2, + + /* Line state */ + PBLK_LINESTATE_FREE = 10, + PBLK_LINESTATE_OPEN = 11, + PBLK_LINESTATE_CLOSED = 12, + PBLK_LINESTATE_GC = 13, + PBLK_LINESTATE_BAD = 14, + PBLK_LINESTATE_CORRUPT = 15, + + /* GC group */ + PBLK_LINEGC_NONE = 20, + PBLK_LINEGC_EMPTY = 21, + PBLK_LINEGC_LOW = 22, + PBLK_LINEGC_MID = 23, + PBLK_LINEGC_HIGH = 24, + PBLK_LINEGC_FULL = 25, +}; + +#define PBLK_MAGIC 0x70626c6b /*pblk*/ + +struct line_header { + __le32 crc; + __le32 identifier; /* pblk identifier */ + __u8 uuid[16]; /* instance uuid */ + __le16 type; /* line type */ + __le16 version; /* type version */ + __le32 id; /* line id for current line */ +}; + +struct line_smeta { + struct line_header header; + + __le32 crc; /* Full structure including struct crc */ + /* Previous line metadata */ + __le32 prev_id; /* Line id for previous line */ + + /* Current line metadata */ + __le64 seq_nr; /* Sequence number for current line */ + + /* Active writers */ + __le32 window_wr_lun; /* Number of parallel LUNs to write */ + + __le32 rsvd[2]; +}; + +/* + * Metadata Layout: + * 1. struct pblk_emeta + * 2. nr_lbas u64 forming lba list + * 3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line) + * 4. nr_luns bits (u64 format) forming line bad block bitmap + * + * 3. and 4. will be part of FTL log + */ +struct line_emeta { + struct line_header header; + + __le32 crc; /* Full structure including struct crc */ + + /* Previous line metadata */ + __le32 prev_id; /* Line id for prev line */ + + /* Current line metadata */ + __le64 seq_nr; /* Sequence number for current line */ + + /* Active writers */ + __le32 window_wr_lun; /* Number of parallel LUNs to write */ + + /* Bookkeeping for recovery */ + __le32 next_id; /* Line id for next line */ + __le64 nr_lbas; /* Number of lbas mapped in line */ + __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */ +}; + +struct pblk_line { + struct pblk *pblk; + unsigned int id; /* Line number corresponds to the + * block line + */ + unsigned int seq_nr; /* Unique line sequence number */ + + int state; /* PBLK_LINESTATE_X */ + int type; /* PBLK_LINETYPE_X */ + int gc_group; /* PBLK_LINEGC_X */ + struct list_head list; /* Free, GC lists */ + + unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */ + + struct line_smeta *smeta; /* Start metadata */ + struct line_emeta *emeta; /* End metadata */ + int meta_line; /* Metadata line id */ + u64 smeta_ssec; /* Sector where smeta starts */ + u64 emeta_ssec; /* Sector where emeta starts */ + + unsigned int sec_in_line; /* Number of usable secs in line */ + + atomic_t blk_in_line; /* Number of good blocks in line */ + unsigned long *blk_bitmap; /* Bitmap for valid/invalid blocks */ + unsigned long *erase_bitmap; /* Bitmap for erased blocks */ + + unsigned long *map_bitmap; /* Bitmap for mapped sectors in line */ + unsigned long *invalid_bitmap; /* Bitmap for invalid sectors in line */ + + atomic_t left_eblks; /* Blocks left for erasing */ + atomic_t left_seblks; /* Blocks left for sync erasing */ + + int left_msecs; /* Sectors left for mapping */ + int left_ssecs; /* Sectors left to sync */ + unsigned int cur_sec; /* Sector map pointer */ + unsigned int vsc; /* Valid sector count in line */ + + struct kref ref; /* Write buffer L2P references */ + + spinlock_t lock; /* Necessary for invalid_bitmap only */ +}; + +#define PBLK_DATA_LINES 4 + +enum{ + PBLK_KMALLOC_META = 1, + PBLK_VMALLOC_META = 2, +}; + +struct pblk_line_metadata { + void *meta; +}; + +struct pblk_line_mgmt { + int nr_lines; /* Total number of full lines */ + int nr_free_lines; /* Number of full lines in free list */ + + /* Free lists - use free_lock */ + struct list_head free_list; /* Full lines ready to use */ + struct list_head corrupt_list; /* Full lines corrupted */ + struct list_head bad_list; /* Full lines bad */ + + /* GC lists - use gc_lock */ + struct list_head *gc_lists[PBLK_NR_GC_LISTS]; + struct list_head gc_high_list; /* Full lines ready to GC, high isc */ + struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */ + struct list_head gc_low_list; /* Full lines ready to GC, low isc */ + + struct list_head gc_full_list; /* Full lines ready to GC, no valid */ + struct list_head gc_empty_list; /* Full lines close, all valid */ + + struct pblk_line *log_line; /* Current FTL log line */ + struct pblk_line *data_line; /* Current data line */ + struct pblk_line *log_next; /* Next FTL log line */ + struct pblk_line *data_next; /* Next data line */ + + /* Metadata allocation type: VMALLOC | KMALLOC */ + int smeta_alloc_type; + int emeta_alloc_type; + + /* Pre-allocated metadata for data lines */ + struct pblk_line_metadata sline_meta[PBLK_DATA_LINES]; + struct pblk_line_metadata eline_meta[PBLK_DATA_LINES]; + unsigned long meta_bitmap; + + /* Helpers for fast bitmap calculations */ + unsigned long *bb_template; + unsigned long *bb_aux; + + unsigned long d_seq_nr; /* Data line unique sequence number */ + unsigned long l_seq_nr; /* Log line unique sequence number */ + + spinlock_t free_lock; + spinlock_t gc_lock; +}; + +struct pblk_line_meta { + unsigned int smeta_len; /* Total length for smeta */ + unsigned int smeta_sec; /* Sectors needed for smeta*/ + unsigned int emeta_len; /* Total length for emeta */ + unsigned int emeta_sec; /* Sectors needed for emeta*/ + unsigned int emeta_bb; /* Boundary for bb that affects emeta */ + unsigned int sec_bitmap_len; /* Length for sector bitmap in line */ + unsigned int blk_bitmap_len; /* Length for block bitmap in line */ + unsigned int lun_bitmap_len; /* Length for lun bitmap in line */ + + unsigned int blk_per_line; /* Number of blocks in a full line */ + unsigned int sec_per_line; /* Number of sectors in a line */ + unsigned int min_blk_line; /* Min. number of good blocks in line */ + + unsigned int mid_thrs; /* Threshold for GC mid list */ + unsigned int high_thrs; /* Threshold for GC high list */ +}; + +struct pblk_addr_format { + u64 ch_mask; + u64 lun_mask; + u64 pln_mask; + u64 blk_mask; + u64 pg_mask; + u64 sec_mask; + u8 ch_offset; + u8 lun_offset; + u8 pln_offset; + u8 blk_offset; + u8 pg_offset; + u8 sec_offset; +}; + +struct pblk { + struct nvm_tgt_dev *dev; + struct gendisk *disk; + + struct kobject kobj; + + struct pblk_lun *luns; + + struct pblk_line *lines; /* Line array */ + struct pblk_line_mgmt l_mg; /* Line management */ + struct pblk_line_meta lm; /* Line metadata */ + + int ppaf_bitsize; + struct pblk_addr_format ppaf; + + struct pblk_rb rwb; + + int min_write_pgs; /* Minimum amount of pages required by controller */ + int max_write_pgs; /* Maximum amount of pages supported by controller */ + int pgs_in_buffer; /* Number of pages that need to be held in buffer to + * guarantee successful reads. + */ + + sector_t capacity; /* Device capacity when bad blocks are subtracted */ + int over_pct; /* Percentage of device used for over-provisioning */ + + /* pblk provisioning values. Used by rate limiter */ + struct pblk_rl rl; + + struct semaphore erase_sem; + + unsigned char instance_uuid[16]; +#ifdef CONFIG_NVM_DEBUG + /* All debug counters apply to 4kb sector I/Os */ + atomic_long_t inflight_writes; /* Inflight writes (user and gc) */ + atomic_long_t padded_writes; /* Sectors padded due to flush/fua */ + atomic_long_t padded_wb; /* Sectors padded in write buffer */ + atomic_long_t nr_flush; /* Number of flush/fua I/O */ + atomic_long_t req_writes; /* Sectors stored on write buffer */ + atomic_long_t sub_writes; /* Sectors submitted from buffer */ + atomic_long_t sync_writes; /* Sectors synced to media */ + atomic_long_t compl_writes; /* Sectors completed in write bio */ + atomic_long_t inflight_reads; /* Inflight sector read requests */ + atomic_long_t sync_reads; /* Completed sector read requests */ + atomic_long_t recov_writes; /* Sectors submitted from recovery */ + atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */ + atomic_long_t recov_gc_reads; /* Sectors submitted from read GC */ +#endif + + spinlock_t lock; + + atomic_long_t read_failed; + atomic_long_t read_empty; + atomic_long_t read_high_ecc; + atomic_long_t read_failed_gc; + atomic_long_t write_failed; + atomic_long_t erase_failed; + + struct task_struct *writer_ts; + + /* Simple translation map of logical addresses to physical addresses. + * The logical addresses is known by the host system, while the physical + * addresses are used when writing to the disk block device. + */ + unsigned char *trans_map; + spinlock_t trans_lock; + + struct list_head compl_list; + + mempool_t *page_pool; + mempool_t *line_ws_pool; + mempool_t *rec_pool; + mempool_t *r_rq_pool; + mempool_t *w_rq_pool; + mempool_t *line_meta_pool; + + struct workqueue_struct *kw_wq; + struct timer_list wtimer; + + struct pblk_gc gc; +}; + +struct pblk_line_ws { + struct pblk *pblk; + struct pblk_line *line; + void *priv; + struct work_struct ws; +}; + +#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx)) +#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx)) + +/* + * pblk ring buffer operations + */ +int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, + unsigned int power_size, unsigned int power_seg_sz); +unsigned int pblk_rb_calculate_size(unsigned int nr_entries); +void *pblk_rb_entries_ref(struct pblk_rb *rb); +int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, + unsigned int nr_entries, unsigned int *pos); +int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, + unsigned int *pos); +void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, + struct pblk_w_ctx w_ctx, unsigned int pos); +void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, + struct pblk_w_ctx w_ctx, struct pblk_line *gc_line, + unsigned int pos); +struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos); + +void pblk_rb_sync_l2p(struct pblk_rb *rb); +unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, + struct pblk_c_ctx *c_ctx, + unsigned int pos, + unsigned int nr_entries, + unsigned int count); +unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, + struct list_head *list, + unsigned int max); +int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, + u64 pos, int bio_iter); +unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); + +unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); +unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries); +struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, + struct ppa_addr *ppa); +void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags); +unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb); + +unsigned int pblk_rb_read_count(struct pblk_rb *rb); +unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos); + +int pblk_rb_tear_down_check(struct pblk_rb *rb); +int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos); +void pblk_rb_data_free(struct pblk_rb *rb); +ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); + +/* + * pblk core + */ +struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw); +int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, + struct pblk_c_ctx *c_ctx); +void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw); +void pblk_flush_writer(struct pblk *pblk); +struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba); +void pblk_discard(struct pblk *pblk, struct bio *bio); +void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); +void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); +int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); +struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, + unsigned int nr_secs, unsigned int len, + gfp_t gfp_mask); +struct pblk_line *pblk_line_get(struct pblk *pblk); +struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); +struct pblk_line *pblk_line_replace_data(struct pblk *pblk); +int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); +void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); +struct pblk_line *pblk_line_get_data(struct pblk *pblk); +struct pblk_line *pblk_line_get_data_next(struct pblk *pblk); +int pblk_line_erase(struct pblk *pblk, struct pblk_line *line); +int pblk_line_is_full(struct pblk_line *line); +void pblk_line_free(struct pblk *pblk, struct pblk_line *line); +void pblk_line_close_ws(struct work_struct *work); +void pblk_line_close(struct pblk *pblk, struct pblk_line *line); +void pblk_line_mark_bb(struct work_struct *work); +void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, + void (*work)(struct work_struct *)); +u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line); +int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line); +int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line); +int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); +void pblk_line_put(struct kref *ref); +struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line); +u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); +int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, + unsigned long secs_to_flush); +void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, + unsigned long *lun_bitmap); +void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, + unsigned long *lun_bitmap); +void pblk_end_bio_sync(struct bio *bio); +void pblk_end_io_sync(struct nvm_rq *rqd); +int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, + int nr_pages); +void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line, + u64 paddr); +void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, + int nr_pages); +void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa); +void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa); +void pblk_update_map_cache(struct pblk *pblk, sector_t lba, + struct ppa_addr ppa); +void pblk_update_map_dev(struct pblk *pblk, sector_t lba, + struct ppa_addr ppa, struct ppa_addr entry_line); +int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, + struct pblk_line *gc_line); +void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, + u64 *lba_list, int nr_secs); +void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, + sector_t blba, int nr_secs); + +/* + * pblk user I/O write path + */ +int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, + unsigned long flags); +int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list, + unsigned int nr_entries, unsigned int nr_rec_entries, + struct pblk_line *gc_line, unsigned long flags); + +/* + * pblk map + */ +void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, + unsigned int sentry, unsigned long *lun_bitmap, + unsigned int valid_secs, struct ppa_addr *erase_ppa); +void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, + unsigned long *lun_bitmap, unsigned int valid_secs, + unsigned int off); + +/* + * pblk write thread + */ +int pblk_write_ts(void *data); +void pblk_write_timer_fn(unsigned long data); +void pblk_write_should_kick(struct pblk *pblk); + +/* + * pblk read path + */ +int pblk_submit_read(struct pblk *pblk, struct bio *bio); +int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, + unsigned int nr_secs, unsigned int *secs_to_gc, + struct pblk_line *line); +/* + * pblk recovery + */ +void pblk_submit_rec(struct work_struct *work); +struct pblk_line *pblk_recov_l2p(struct pblk *pblk); +void pblk_recov_pad(struct pblk *pblk); +__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta); +int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, + struct pblk_rec_ctx *recovery, u64 *comp_bits, + unsigned int comp); + +/* + * pblk gc + */ +#define PBLK_GC_TRIES 3 + +int pblk_gc_init(struct pblk *pblk); +void pblk_gc_exit(struct pblk *pblk); +void pblk_gc_should_start(struct pblk *pblk); +void pblk_gc_should_stop(struct pblk *pblk); +int pblk_gc_status(struct pblk *pblk); +void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, + int *gc_active); +void pblk_gc_sysfs_force(struct pblk *pblk, int force); + +/* + * pblk rate limiter + */ +void pblk_rl_init(struct pblk_rl *rl, int budget); +void pblk_rl_free(struct pblk_rl *rl); +int pblk_rl_gc_thrs(struct pblk_rl *rl); +unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); +int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); +void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); +int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); +void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); +void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); +void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv); +int pblk_rl_sysfs_rate_show(struct pblk_rl *rl); +void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); +void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); + +/* + * pblk sysfs + */ +int pblk_sysfs_init(struct gendisk *tdisk); +void pblk_sysfs_exit(struct gendisk *tdisk); + +static inline void *pblk_malloc(size_t size, int type, gfp_t flags) +{ + if (type == PBLK_KMALLOC_META) + return kmalloc(size, flags); + return vmalloc(size); +} + +static inline void pblk_mfree(void *ptr, int type) +{ + if (type == PBLK_KMALLOC_META) + kfree(ptr); + else + vfree(ptr); +} + +static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx) +{ + return c_ctx - sizeof(struct nvm_rq); +} + +static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta) +{ + return (emeta) + 1; +} + +#define NVM_MEM_PAGE_WRITE (8) + +static inline int pblk_pad_distance(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + + return NVM_MEM_PAGE_WRITE * geo->nr_luns * geo->sec_per_pl; +} + +static inline int pblk_dev_ppa_to_line(struct ppa_addr p) +{ + return p.g.blk; +} + +static inline int pblk_tgt_ppa_to_line(struct ppa_addr p) +{ + return p.g.blk; +} + +static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) +{ + return p.g.lun * geo->nr_chnls + p.g.ch; +} + +/* A block within a line corresponds to the lun */ +static inline int pblk_dev_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) +{ + return p.g.lun * geo->nr_chnls + p.g.ch; +} + +static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) +{ + struct ppa_addr ppa64; + + ppa64.ppa = 0; + + if (ppa32 == -1) { + ppa64.ppa = ADDR_EMPTY; + } else if (ppa32 & (1U << 31)) { + ppa64.c.line = ppa32 & ((~0U) >> 1); + ppa64.c.is_cached = 1; + } else { + ppa64.g.blk = (ppa32 & pblk->ppaf.blk_mask) >> + pblk->ppaf.blk_offset; + ppa64.g.pg = (ppa32 & pblk->ppaf.pg_mask) >> + pblk->ppaf.pg_offset; + ppa64.g.lun = (ppa32 & pblk->ppaf.lun_mask) >> + pblk->ppaf.lun_offset; + ppa64.g.ch = (ppa32 & pblk->ppaf.ch_mask) >> + pblk->ppaf.ch_offset; + ppa64.g.pl = (ppa32 & pblk->ppaf.pln_mask) >> + pblk->ppaf.pln_offset; + ppa64.g.sec = (ppa32 & pblk->ppaf.sec_mask) >> + pblk->ppaf.sec_offset; + } + + return ppa64; +} + +static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, + sector_t lba) +{ + struct ppa_addr ppa; + + if (pblk->ppaf_bitsize < 32) { + u32 *map = (u32 *)pblk->trans_map; + + ppa = pblk_ppa32_to_ppa64(pblk, map[lba]); + } else { + struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map; + + ppa = map[lba]; + } + + return ppa; +} + +static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) +{ + u32 ppa32 = 0; + + if (ppa64.ppa == ADDR_EMPTY) { + ppa32 = ~0U; + } else if (ppa64.c.is_cached) { + ppa32 |= ppa64.c.line; + ppa32 |= 1U << 31; + } else { + ppa32 |= ppa64.g.blk << pblk->ppaf.blk_offset; + ppa32 |= ppa64.g.pg << pblk->ppaf.pg_offset; + ppa32 |= ppa64.g.lun << pblk->ppaf.lun_offset; + ppa32 |= ppa64.g.ch << pblk->ppaf.ch_offset; + ppa32 |= ppa64.g.pl << pblk->ppaf.pln_offset; + ppa32 |= ppa64.g.sec << pblk->ppaf.sec_offset; + } + + return ppa32; +} + +static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba, + struct ppa_addr ppa) +{ + if (pblk->ppaf_bitsize < 32) { + u32 *map = (u32 *)pblk->trans_map; + + map[lba] = pblk_ppa64_to_ppa32(pblk, ppa); + } else { + u64 *map = (u64 *)pblk->trans_map; + + map[lba] = ppa.ppa; + } +} + +static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, + struct ppa_addr p) +{ + u64 paddr; + + paddr = 0; + paddr |= (u64)p.g.pg << pblk->ppaf.pg_offset; + paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset; + paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset; + paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset; + paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset; + + return paddr; +} + +static inline int pblk_ppa_empty(struct ppa_addr ppa_addr) +{ + return (ppa_addr.ppa == ADDR_EMPTY); +} + +static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr) +{ + ppa_addr->ppa = ADDR_EMPTY; +} + +static inline int pblk_addr_in_cache(struct ppa_addr ppa) +{ + return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached); +} + +static inline int pblk_addr_to_cacheline(struct ppa_addr ppa) +{ + return ppa.c.line; +} + +static inline struct ppa_addr pblk_cacheline_to_addr(int addr) +{ + struct ppa_addr p; + + p.c.line = addr; + p.c.is_cached = 1; + + return p; +} + +static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, + u64 line_id) +{ + struct ppa_addr ppa; + + ppa.ppa = 0; + ppa.g.blk = line_id; + ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset; + ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset; + ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset; + ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset; + ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset; + + return ppa; +} + +static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr, + u64 line_id) +{ + struct ppa_addr ppa; + + ppa = addr_to_gen_ppa(pblk, paddr, line_id); + + return ppa; +} + +static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk, + struct line_smeta *smeta) +{ + u32 crc = ~(u32)0; + + crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc), + sizeof(struct line_header) - sizeof(crc)); + + return crc; +} + +static inline u32 pblk_calc_smeta_crc(struct pblk *pblk, + struct line_smeta *smeta) +{ + struct pblk_line_meta *lm = &pblk->lm; + u32 crc = ~(u32)0; + + crc = crc32_le(crc, (unsigned char *)smeta + + sizeof(struct line_header) + sizeof(crc), + lm->smeta_len - + sizeof(struct line_header) - sizeof(crc)); + + return crc; +} + +static inline u32 pblk_calc_emeta_crc(struct pblk *pblk, + struct line_emeta *emeta) +{ + struct pblk_line_meta *lm = &pblk->lm; + u32 crc = ~(u32)0; + + crc = crc32_le(crc, (unsigned char *)emeta + + sizeof(struct line_header) + sizeof(crc), + lm->emeta_len - + sizeof(struct line_header) - sizeof(crc)); + + return crc; +} + +static inline int pblk_set_progr_mode(struct pblk *pblk, int type) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int flags; + + flags = geo->plane_mode >> 1; + + if (type == WRITE) + flags |= NVM_IO_SCRAMBLE_ENABLE; + + return flags; +} + +static inline int pblk_set_read_mode(struct pblk *pblk) +{ + return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE; +} + +#ifdef CONFIG_NVM_DEBUG +static inline void print_ppa(struct ppa_addr *p, char *msg, int error) +{ + if (p->c.is_cached) { + pr_err("ppa: (%s: %x) cache line: %llu\n", + msg, error, (u64)p->c.line); + } else { + pr_err("ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n", + msg, error, + p->g.ch, p->g.lun, p->g.blk, + p->g.pg, p->g.pl, p->g.sec); + } +} + +static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd, + int error) +{ + int bit = -1; + + if (rqd->nr_ppas == 1) { + print_ppa(&rqd->ppa_addr, "rqd", error); + return; + } + + while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas, + bit + 1)) < rqd->nr_ppas) { + print_ppa(&rqd->ppa_list[bit], "rqd", error); + } + + pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status); +} +#endif + +static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, + struct ppa_addr *ppas, int nr_ppas) +{ + struct nvm_geo *geo = &tgt_dev->geo; + struct ppa_addr *ppa; + int i; + + for (i = 0; i < nr_ppas; i++) { + ppa = &ppas[i]; + + if (!ppa->c.is_cached && + ppa->g.ch < geo->nr_chnls && + ppa->g.lun < geo->luns_per_chnl && + ppa->g.pl < geo->nr_planes && + ppa->g.blk < geo->blks_per_lun && + ppa->g.pg < geo->pgs_per_blk && + ppa->g.sec < geo->sec_per_pg) + continue; + +#ifdef CONFIG_NVM_DEBUG + print_ppa(ppa, "boundary", i); +#endif + return 1; + } + return 0; +} + +static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr) +{ + struct pblk_line_meta *lm = &pblk->lm; + + if (paddr > lm->sec_per_line) + return 1; + + return 0; +} + +static inline unsigned int pblk_get_bi_idx(struct bio *bio) +{ + return bio->bi_iter.bi_idx; +} + +static inline sector_t pblk_get_lba(struct bio *bio) +{ + return bio->bi_iter.bi_sector / NR_PHY_IN_LOG; +} + +static inline unsigned int pblk_get_secs(struct bio *bio) +{ + return bio->bi_iter.bi_size / PBLK_EXPOSED_PAGE_SIZE; +} + +static inline sector_t pblk_get_sector(sector_t lba) +{ + return lba * NR_PHY_IN_LOG; +} + +static inline void pblk_setup_uuid(struct pblk *pblk) +{ + uuid_le uuid; + + uuid_le_gen(&uuid); + memcpy(pblk->instance_uuid, uuid.b, 16); +} +#endif /* PBLK_H_ */ diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index e00b1d7b976f..cf0e28a0ff61 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -318,10 +318,6 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk) } page = mempool_alloc(rrpc->page_pool, GFP_NOIO); - if (!page) { - bio_put(bio); - return -ENOMEM; - } while ((slot = find_first_zero_bit(rblk->invalid_pages, nr_sec_per_blk)) < nr_sec_per_blk) { @@ -414,7 +410,6 @@ static void rrpc_block_gc(struct work_struct *work) struct rrpc *rrpc = gcb->rrpc; struct rrpc_block *rblk = gcb->rblk; struct rrpc_lun *rlun = rblk->rlun; - struct nvm_tgt_dev *dev = rrpc->dev; struct ppa_addr ppa; mempool_free(gcb, rrpc->gcb_pool); @@ -430,7 +425,7 @@ static void rrpc_block_gc(struct work_struct *work) ppa.g.lun = rlun->bppa.g.lun; ppa.g.blk = rblk->id; - if (nvm_erase_blk(dev, &ppa, 0)) + if (nvm_erase_sync(rrpc->dev, &ppa, 1)) goto put_back; rrpc_put_blk(rrpc, rblk); @@ -822,7 +817,7 @@ static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio, for (i = 0; i < npages; i++) { /* We assume that mapping occurs at 4KB granularity */ - BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects)); + BUG_ON(!(laddr + i < rrpc->nr_sects)); gp = &rrpc->trans_map[laddr + i]; if (gp->rblk) { @@ -851,7 +846,7 @@ static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd, if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) return NVM_IO_REQUEUE; - BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects)); + BUG_ON(!(laddr < rrpc->nr_sects)); gp = &rrpc->trans_map[laddr]; if (gp->rblk) { @@ -1007,11 +1002,6 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio) } rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL); - if (!rqd) { - pr_err_ratelimited("rrpc: not able to queue bio."); - bio_io_error(bio); - return BLK_QC_T_NONE; - } memset(rqd, 0, sizeof(struct nvm_rq)); err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE); @@ -1275,8 +1265,10 @@ static int rrpc_bb_discovery(struct nvm_tgt_dev *dev, struct rrpc_lun *rlun) } nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); - if (nr_blks < 0) - return nr_blks; + if (nr_blks < 0) { + ret = nr_blks; + goto out; + } for (i = 0; i < nr_blks; i++) { if (blks[i] == NVM_BLK_T_FREE) @@ -1514,7 +1506,8 @@ err: static struct nvm_tgt_type tt_rrpc; -static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk) +static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, + int flags) { struct request_queue *bqueue = dev->q; struct request_queue *tqueue = tdisk->queue; diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index ceff415f201c..ee1a3d9147ef 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -144,12 +144,22 @@ config XGENE_SLIMPRO_MBOX want to use the APM X-Gene SLIMpro IPCM support. config BCM_PDC_MBOX - tristate "Broadcom PDC Mailbox" - depends on ARM64 || COMPILE_TEST + tristate "Broadcom FlexSparx DMA Mailbox" + depends on ARCH_BCM_IPROC || COMPILE_TEST depends on HAS_DMA + help + Mailbox implementation for the Broadcom FlexSparx DMA ring manager, + which provides access to various offload engines on Broadcom + SoCs, including FA2/FA+ on Northstar Plus and PDC on Northstar 2. + +config BCM_FLEXRM_MBOX + tristate "Broadcom FlexRM Mailbox" + depends on ARM64 + depends on HAS_DMA + select GENERIC_MSI_IRQ_DOMAIN default ARCH_BCM_IPROC help - Mailbox implementation for the Broadcom PDC ring manager, + Mailbox implementation of the Broadcom FlexRM ring manager, which provides access to various offload engines on Broadcom - SoCs. Say Y here if you want to use the Broadcom PDC. + SoCs. Say Y here if you want to use the Broadcom FlexRM. endif diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile index 7dde4f609ae8..e2bcb03cd35b 100644 --- a/drivers/mailbox/Makefile +++ b/drivers/mailbox/Makefile @@ -30,4 +30,6 @@ obj-$(CONFIG_HI6220_MBOX) += hi6220-mailbox.o obj-$(CONFIG_BCM_PDC_MBOX) += bcm-pdc-mailbox.o +obj-$(CONFIG_BCM_FLEXRM_MBOX) += bcm-flexrm-mailbox.o + obj-$(CONFIG_TEGRA_HSP_MBOX) += tegra-hsp.o diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c new file mode 100644 index 000000000000..da67882caa7b --- /dev/null +++ b/drivers/mailbox/bcm-flexrm-mailbox.c @@ -0,0 +1,1595 @@ +/* Broadcom FlexRM Mailbox Driver + * + * Copyright (C) 2017 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Each Broadcom FlexSparx4 offload engine is implemented as an + * extension to Broadcom FlexRM ring manager. The FlexRM ring + * manager provides a set of rings which can be used to submit + * work to a FlexSparx4 offload engine. + * + * This driver creates a mailbox controller using a set of FlexRM + * rings where each mailbox channel represents a separate FlexRM ring. + */ + +#include <asm/barrier.h> +#include <asm/byteorder.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/err.h> +#include <linux/idr.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/mailbox_controller.h> +#include <linux/mailbox_client.h> +#include <linux/mailbox/brcm-message.h> +#include <linux/module.h> +#include <linux/msi.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> + +/* ====== FlexRM register defines ===== */ + +/* FlexRM configuration */ +#define RING_REGS_SIZE 0x10000 +#define RING_DESC_SIZE 8 +#define RING_DESC_INDEX(offset) \ + ((offset) / RING_DESC_SIZE) +#define RING_DESC_OFFSET(index) \ + ((index) * RING_DESC_SIZE) +#define RING_MAX_REQ_COUNT 1024 +#define RING_BD_ALIGN_ORDER 12 +#define RING_BD_ALIGN_CHECK(addr) \ + (!((addr) & ((0x1 << RING_BD_ALIGN_ORDER) - 1))) +#define RING_BD_TOGGLE_INVALID(offset) \ + (((offset) >> RING_BD_ALIGN_ORDER) & 0x1) +#define RING_BD_TOGGLE_VALID(offset) \ + (!RING_BD_TOGGLE_INVALID(offset)) +#define RING_BD_DESC_PER_REQ 32 +#define RING_BD_DESC_COUNT \ + (RING_MAX_REQ_COUNT * RING_BD_DESC_PER_REQ) +#define RING_BD_SIZE \ + (RING_BD_DESC_COUNT * RING_DESC_SIZE) +#define RING_CMPL_ALIGN_ORDER 13 +#define RING_CMPL_DESC_COUNT RING_MAX_REQ_COUNT +#define RING_CMPL_SIZE \ + (RING_CMPL_DESC_COUNT * RING_DESC_SIZE) +#define RING_VER_MAGIC 0x76303031 + +/* Per-Ring register offsets */ +#define RING_VER 0x000 +#define RING_BD_START_ADDR 0x004 +#define RING_BD_READ_PTR 0x008 +#define RING_BD_WRITE_PTR 0x00c +#define RING_BD_READ_PTR_DDR_LS 0x010 +#define RING_BD_READ_PTR_DDR_MS 0x014 +#define RING_CMPL_START_ADDR 0x018 +#define RING_CMPL_WRITE_PTR 0x01c +#define RING_NUM_REQ_RECV_LS 0x020 +#define RING_NUM_REQ_RECV_MS 0x024 +#define RING_NUM_REQ_TRANS_LS 0x028 +#define RING_NUM_REQ_TRANS_MS 0x02c +#define RING_NUM_REQ_OUTSTAND 0x030 +#define RING_CONTROL 0x034 +#define RING_FLUSH_DONE 0x038 +#define RING_MSI_ADDR_LS 0x03c +#define RING_MSI_ADDR_MS 0x040 +#define RING_MSI_CONTROL 0x048 +#define RING_BD_READ_PTR_DDR_CONTROL 0x04c +#define RING_MSI_DATA_VALUE 0x064 + +/* Register RING_BD_START_ADDR fields */ +#define BD_LAST_UPDATE_HW_SHIFT 28 +#define BD_LAST_UPDATE_HW_MASK 0x1 +#define BD_START_ADDR_VALUE(pa) \ + ((u32)((((dma_addr_t)(pa)) >> RING_BD_ALIGN_ORDER) & 0x0fffffff)) +#define BD_START_ADDR_DECODE(val) \ + ((dma_addr_t)((val) & 0x0fffffff) << RING_BD_ALIGN_ORDER) + +/* Register RING_CMPL_START_ADDR fields */ +#define CMPL_START_ADDR_VALUE(pa) \ + ((u32)((((u64)(pa)) >> RING_CMPL_ALIGN_ORDER) & 0x03ffffff)) + +/* Register RING_CONTROL fields */ +#define CONTROL_MASK_DISABLE_CONTROL 12 +#define CONTROL_FLUSH_SHIFT 5 +#define CONTROL_ACTIVE_SHIFT 4 +#define CONTROL_RATE_ADAPT_MASK 0xf +#define CONTROL_RATE_DYNAMIC 0x0 +#define CONTROL_RATE_FAST 0x8 +#define CONTROL_RATE_MEDIUM 0x9 +#define CONTROL_RATE_SLOW 0xa +#define CONTROL_RATE_IDLE 0xb + +/* Register RING_FLUSH_DONE fields */ +#define FLUSH_DONE_MASK 0x1 + +/* Register RING_MSI_CONTROL fields */ +#define MSI_TIMER_VAL_SHIFT 16 +#define MSI_TIMER_VAL_MASK 0xffff +#define MSI_ENABLE_SHIFT 15 +#define MSI_ENABLE_MASK 0x1 +#define MSI_COUNT_SHIFT 0 +#define MSI_COUNT_MASK 0x3ff + +/* Register RING_BD_READ_PTR_DDR_CONTROL fields */ +#define BD_READ_PTR_DDR_TIMER_VAL_SHIFT 16 +#define BD_READ_PTR_DDR_TIMER_VAL_MASK 0xffff +#define BD_READ_PTR_DDR_ENABLE_SHIFT 15 +#define BD_READ_PTR_DDR_ENABLE_MASK 0x1 + +/* ====== FlexRM ring descriptor defines ===== */ + +/* Completion descriptor format */ +#define CMPL_OPAQUE_SHIFT 0 +#define CMPL_OPAQUE_MASK 0xffff +#define CMPL_ENGINE_STATUS_SHIFT 16 +#define CMPL_ENGINE_STATUS_MASK 0xffff +#define CMPL_DME_STATUS_SHIFT 32 +#define CMPL_DME_STATUS_MASK 0xffff +#define CMPL_RM_STATUS_SHIFT 48 +#define CMPL_RM_STATUS_MASK 0xffff + +/* Completion DME status code */ +#define DME_STATUS_MEM_COR_ERR BIT(0) +#define DME_STATUS_MEM_UCOR_ERR BIT(1) +#define DME_STATUS_FIFO_UNDERFLOW BIT(2) +#define DME_STATUS_FIFO_OVERFLOW BIT(3) +#define DME_STATUS_RRESP_ERR BIT(4) +#define DME_STATUS_BRESP_ERR BIT(5) +#define DME_STATUS_ERROR_MASK (DME_STATUS_MEM_COR_ERR | \ + DME_STATUS_MEM_UCOR_ERR | \ + DME_STATUS_FIFO_UNDERFLOW | \ + DME_STATUS_FIFO_OVERFLOW | \ + DME_STATUS_RRESP_ERR | \ + DME_STATUS_BRESP_ERR) + +/* Completion RM status code */ +#define RM_STATUS_CODE_SHIFT 0 +#define RM_STATUS_CODE_MASK 0x3ff +#define RM_STATUS_CODE_GOOD 0x0 +#define RM_STATUS_CODE_AE_TIMEOUT 0x3ff + +/* General descriptor format */ +#define DESC_TYPE_SHIFT 60 +#define DESC_TYPE_MASK 0xf +#define DESC_PAYLOAD_SHIFT 0 +#define DESC_PAYLOAD_MASK 0x0fffffffffffffff + +/* Null descriptor format */ +#define NULL_TYPE 0 +#define NULL_TOGGLE_SHIFT 58 +#define NULL_TOGGLE_MASK 0x1 + +/* Header descriptor format */ +#define HEADER_TYPE 1 +#define HEADER_TOGGLE_SHIFT 58 +#define HEADER_TOGGLE_MASK 0x1 +#define HEADER_ENDPKT_SHIFT 57 +#define HEADER_ENDPKT_MASK 0x1 +#define HEADER_STARTPKT_SHIFT 56 +#define HEADER_STARTPKT_MASK 0x1 +#define HEADER_BDCOUNT_SHIFT 36 +#define HEADER_BDCOUNT_MASK 0x1f +#define HEADER_BDCOUNT_MAX HEADER_BDCOUNT_MASK +#define HEADER_FLAGS_SHIFT 16 +#define HEADER_FLAGS_MASK 0xffff +#define HEADER_OPAQUE_SHIFT 0 +#define HEADER_OPAQUE_MASK 0xffff + +/* Source (SRC) descriptor format */ +#define SRC_TYPE 2 +#define SRC_LENGTH_SHIFT 44 +#define SRC_LENGTH_MASK 0xffff +#define SRC_ADDR_SHIFT 0 +#define SRC_ADDR_MASK 0x00000fffffffffff + +/* Destination (DST) descriptor format */ +#define DST_TYPE 3 +#define DST_LENGTH_SHIFT 44 +#define DST_LENGTH_MASK 0xffff +#define DST_ADDR_SHIFT 0 +#define DST_ADDR_MASK 0x00000fffffffffff + +/* Immediate (IMM) descriptor format */ +#define IMM_TYPE 4 +#define IMM_DATA_SHIFT 0 +#define IMM_DATA_MASK 0x0fffffffffffffff + +/* Next pointer (NPTR) descriptor format */ +#define NPTR_TYPE 5 +#define NPTR_TOGGLE_SHIFT 58 +#define NPTR_TOGGLE_MASK 0x1 +#define NPTR_ADDR_SHIFT 0 +#define NPTR_ADDR_MASK 0x00000fffffffffff + +/* Mega source (MSRC) descriptor format */ +#define MSRC_TYPE 6 +#define MSRC_LENGTH_SHIFT 44 +#define MSRC_LENGTH_MASK 0xffff +#define MSRC_ADDR_SHIFT 0 +#define MSRC_ADDR_MASK 0x00000fffffffffff + +/* Mega destination (MDST) descriptor format */ +#define MDST_TYPE 7 +#define MDST_LENGTH_SHIFT 44 +#define MDST_LENGTH_MASK 0xffff +#define MDST_ADDR_SHIFT 0 +#define MDST_ADDR_MASK 0x00000fffffffffff + +/* Source with tlast (SRCT) descriptor format */ +#define SRCT_TYPE 8 +#define SRCT_LENGTH_SHIFT 44 +#define SRCT_LENGTH_MASK 0xffff +#define SRCT_ADDR_SHIFT 0 +#define SRCT_ADDR_MASK 0x00000fffffffffff + +/* Destination with tlast (DSTT) descriptor format */ +#define DSTT_TYPE 9 +#define DSTT_LENGTH_SHIFT 44 +#define DSTT_LENGTH_MASK 0xffff +#define DSTT_ADDR_SHIFT 0 +#define DSTT_ADDR_MASK 0x00000fffffffffff + +/* Immediate with tlast (IMMT) descriptor format */ +#define IMMT_TYPE 10 +#define IMMT_DATA_SHIFT 0 +#define IMMT_DATA_MASK 0x0fffffffffffffff + +/* Descriptor helper macros */ +#define DESC_DEC(_d, _s, _m) (((_d) >> (_s)) & (_m)) +#define DESC_ENC(_d, _v, _s, _m) \ + do { \ + (_d) &= ~((u64)(_m) << (_s)); \ + (_d) |= (((u64)(_v) & (_m)) << (_s)); \ + } while (0) + +/* ====== FlexRM data structures ===== */ + +struct flexrm_ring { + /* Unprotected members */ + int num; + struct flexrm_mbox *mbox; + void __iomem *regs; + bool irq_requested; + unsigned int irq; + unsigned int msi_timer_val; + unsigned int msi_count_threshold; + struct ida requests_ida; + struct brcm_message *requests[RING_MAX_REQ_COUNT]; + void *bd_base; + dma_addr_t bd_dma_base; + u32 bd_write_offset; + void *cmpl_base; + dma_addr_t cmpl_dma_base; + /* Protected members */ + spinlock_t lock; + struct brcm_message *last_pending_msg; + u32 cmpl_read_offset; +}; + +struct flexrm_mbox { + struct device *dev; + void __iomem *regs; + u32 num_rings; + struct flexrm_ring *rings; + struct dma_pool *bd_pool; + struct dma_pool *cmpl_pool; + struct mbox_controller controller; +}; + +/* ====== FlexRM ring descriptor helper routines ===== */ + +static u64 flexrm_read_desc(void *desc_ptr) +{ + return le64_to_cpu(*((u64 *)desc_ptr)); +} + +static void flexrm_write_desc(void *desc_ptr, u64 desc) +{ + *((u64 *)desc_ptr) = cpu_to_le64(desc); +} + +static u32 flexrm_cmpl_desc_to_reqid(u64 cmpl_desc) +{ + return (u32)(cmpl_desc & CMPL_OPAQUE_MASK); +} + +static int flexrm_cmpl_desc_to_error(u64 cmpl_desc) +{ + u32 status; + + status = DESC_DEC(cmpl_desc, CMPL_DME_STATUS_SHIFT, + CMPL_DME_STATUS_MASK); + if (status & DME_STATUS_ERROR_MASK) + return -EIO; + + status = DESC_DEC(cmpl_desc, CMPL_RM_STATUS_SHIFT, + CMPL_RM_STATUS_MASK); + status &= RM_STATUS_CODE_MASK; + if (status == RM_STATUS_CODE_AE_TIMEOUT) + return -ETIMEDOUT; + + return 0; +} + +static bool flexrm_is_next_table_desc(void *desc_ptr) +{ + u64 desc = flexrm_read_desc(desc_ptr); + u32 type = DESC_DEC(desc, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + + return (type == NPTR_TYPE) ? true : false; +} + +static u64 flexrm_next_table_desc(u32 toggle, dma_addr_t next_addr) +{ + u64 desc = 0; + + DESC_ENC(desc, NPTR_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, toggle, NPTR_TOGGLE_SHIFT, NPTR_TOGGLE_MASK); + DESC_ENC(desc, next_addr, NPTR_ADDR_SHIFT, NPTR_ADDR_MASK); + + return desc; +} + +static u64 flexrm_null_desc(u32 toggle) +{ + u64 desc = 0; + + DESC_ENC(desc, NULL_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, toggle, NULL_TOGGLE_SHIFT, NULL_TOGGLE_MASK); + + return desc; +} + +static u32 flexrm_estimate_header_desc_count(u32 nhcnt) +{ + u32 hcnt = nhcnt / HEADER_BDCOUNT_MAX; + + if (!(nhcnt % HEADER_BDCOUNT_MAX)) + hcnt += 1; + + return hcnt; +} + +static void flexrm_flip_header_toogle(void *desc_ptr) +{ + u64 desc = flexrm_read_desc(desc_ptr); + + if (desc & ((u64)0x1 << HEADER_TOGGLE_SHIFT)) + desc &= ~((u64)0x1 << HEADER_TOGGLE_SHIFT); + else + desc |= ((u64)0x1 << HEADER_TOGGLE_SHIFT); + + flexrm_write_desc(desc_ptr, desc); +} + +static u64 flexrm_header_desc(u32 toggle, u32 startpkt, u32 endpkt, + u32 bdcount, u32 flags, u32 opaque) +{ + u64 desc = 0; + + DESC_ENC(desc, HEADER_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, toggle, HEADER_TOGGLE_SHIFT, HEADER_TOGGLE_MASK); + DESC_ENC(desc, startpkt, HEADER_STARTPKT_SHIFT, HEADER_STARTPKT_MASK); + DESC_ENC(desc, endpkt, HEADER_ENDPKT_SHIFT, HEADER_ENDPKT_MASK); + DESC_ENC(desc, bdcount, HEADER_BDCOUNT_SHIFT, HEADER_BDCOUNT_MASK); + DESC_ENC(desc, flags, HEADER_FLAGS_SHIFT, HEADER_FLAGS_MASK); + DESC_ENC(desc, opaque, HEADER_OPAQUE_SHIFT, HEADER_OPAQUE_MASK); + + return desc; +} + +static void flexrm_enqueue_desc(u32 nhpos, u32 nhcnt, u32 reqid, + u64 desc, void **desc_ptr, u32 *toggle, + void *start_desc, void *end_desc) +{ + u64 d; + u32 nhavail, _toggle, _startpkt, _endpkt, _bdcount; + + /* Sanity check */ + if (nhcnt <= nhpos) + return; + + /* + * Each request or packet start with a HEADER descriptor followed + * by one or more non-HEADER descriptors (SRC, SRCT, MSRC, DST, + * DSTT, MDST, IMM, and IMMT). The number of non-HEADER descriptors + * following a HEADER descriptor is represented by BDCOUNT field + * of HEADER descriptor. The max value of BDCOUNT field is 31 which + * means we can only have 31 non-HEADER descriptors following one + * HEADER descriptor. + * + * In general use, number of non-HEADER descriptors can easily go + * beyond 31. To tackle this situation, we have packet (or request) + * extenstion bits (STARTPKT and ENDPKT) in the HEADER descriptor. + * + * To use packet extension, the first HEADER descriptor of request + * (or packet) will have STARTPKT=1 and ENDPKT=0. The intermediate + * HEADER descriptors will have STARTPKT=0 and ENDPKT=0. The last + * HEADER descriptor will have STARTPKT=0 and ENDPKT=1. Also, the + * TOGGLE bit of the first HEADER will be set to invalid state to + * ensure that FlexRM does not start fetching descriptors till all + * descriptors are enqueued. The user of this function will flip + * the TOGGLE bit of first HEADER after all descriptors are + * enqueued. + */ + + if ((nhpos % HEADER_BDCOUNT_MAX == 0) && (nhcnt - nhpos)) { + /* Prepare the header descriptor */ + nhavail = (nhcnt - nhpos); + _toggle = (nhpos == 0) ? !(*toggle) : (*toggle); + _startpkt = (nhpos == 0) ? 0x1 : 0x0; + _endpkt = (nhavail <= HEADER_BDCOUNT_MAX) ? 0x1 : 0x0; + _bdcount = (nhavail <= HEADER_BDCOUNT_MAX) ? + nhavail : HEADER_BDCOUNT_MAX; + if (nhavail <= HEADER_BDCOUNT_MAX) + _bdcount = nhavail; + else + _bdcount = HEADER_BDCOUNT_MAX; + d = flexrm_header_desc(_toggle, _startpkt, _endpkt, + _bdcount, 0x0, reqid); + + /* Write header descriptor */ + flexrm_write_desc(*desc_ptr, d); + + /* Point to next descriptor */ + *desc_ptr += sizeof(desc); + if (*desc_ptr == end_desc) + *desc_ptr = start_desc; + + /* Skip next pointer descriptors */ + while (flexrm_is_next_table_desc(*desc_ptr)) { + *toggle = (*toggle) ? 0 : 1; + *desc_ptr += sizeof(desc); + if (*desc_ptr == end_desc) + *desc_ptr = start_desc; + } + } + + /* Write desired descriptor */ + flexrm_write_desc(*desc_ptr, desc); + + /* Point to next descriptor */ + *desc_ptr += sizeof(desc); + if (*desc_ptr == end_desc) + *desc_ptr = start_desc; + + /* Skip next pointer descriptors */ + while (flexrm_is_next_table_desc(*desc_ptr)) { + *toggle = (*toggle) ? 0 : 1; + *desc_ptr += sizeof(desc); + if (*desc_ptr == end_desc) + *desc_ptr = start_desc; + } +} + +static u64 flexrm_src_desc(dma_addr_t addr, unsigned int length) +{ + u64 desc = 0; + + DESC_ENC(desc, SRC_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length, SRC_LENGTH_SHIFT, SRC_LENGTH_MASK); + DESC_ENC(desc, addr, SRC_ADDR_SHIFT, SRC_ADDR_MASK); + + return desc; +} + +static u64 flexrm_msrc_desc(dma_addr_t addr, unsigned int length_div_16) +{ + u64 desc = 0; + + DESC_ENC(desc, MSRC_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length_div_16, MSRC_LENGTH_SHIFT, MSRC_LENGTH_MASK); + DESC_ENC(desc, addr, MSRC_ADDR_SHIFT, MSRC_ADDR_MASK); + + return desc; +} + +static u64 flexrm_dst_desc(dma_addr_t addr, unsigned int length) +{ + u64 desc = 0; + + DESC_ENC(desc, DST_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length, DST_LENGTH_SHIFT, DST_LENGTH_MASK); + DESC_ENC(desc, addr, DST_ADDR_SHIFT, DST_ADDR_MASK); + + return desc; +} + +static u64 flexrm_mdst_desc(dma_addr_t addr, unsigned int length_div_16) +{ + u64 desc = 0; + + DESC_ENC(desc, MDST_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length_div_16, MDST_LENGTH_SHIFT, MDST_LENGTH_MASK); + DESC_ENC(desc, addr, MDST_ADDR_SHIFT, MDST_ADDR_MASK); + + return desc; +} + +static u64 flexrm_imm_desc(u64 data) +{ + u64 desc = 0; + + DESC_ENC(desc, IMM_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, data, IMM_DATA_SHIFT, IMM_DATA_MASK); + + return desc; +} + +static u64 flexrm_srct_desc(dma_addr_t addr, unsigned int length) +{ + u64 desc = 0; + + DESC_ENC(desc, SRCT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length, SRCT_LENGTH_SHIFT, SRCT_LENGTH_MASK); + DESC_ENC(desc, addr, SRCT_ADDR_SHIFT, SRCT_ADDR_MASK); + + return desc; +} + +static u64 flexrm_dstt_desc(dma_addr_t addr, unsigned int length) +{ + u64 desc = 0; + + DESC_ENC(desc, DSTT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, length, DSTT_LENGTH_SHIFT, DSTT_LENGTH_MASK); + DESC_ENC(desc, addr, DSTT_ADDR_SHIFT, DSTT_ADDR_MASK); + + return desc; +} + +static u64 flexrm_immt_desc(u64 data) +{ + u64 desc = 0; + + DESC_ENC(desc, IMMT_TYPE, DESC_TYPE_SHIFT, DESC_TYPE_MASK); + DESC_ENC(desc, data, IMMT_DATA_SHIFT, IMMT_DATA_MASK); + + return desc; +} + +static bool flexrm_spu_sanity_check(struct brcm_message *msg) +{ + struct scatterlist *sg; + + if (!msg->spu.src || !msg->spu.dst) + return false; + for (sg = msg->spu.src; sg; sg = sg_next(sg)) { + if (sg->length & 0xf) { + if (sg->length > SRC_LENGTH_MASK) + return false; + } else { + if (sg->length > (MSRC_LENGTH_MASK * 16)) + return false; + } + } + for (sg = msg->spu.dst; sg; sg = sg_next(sg)) { + if (sg->length & 0xf) { + if (sg->length > DST_LENGTH_MASK) + return false; + } else { + if (sg->length > (MDST_LENGTH_MASK * 16)) + return false; + } + } + + return true; +} + +static u32 flexrm_spu_estimate_nonheader_desc_count(struct brcm_message *msg) +{ + u32 cnt = 0; + unsigned int dst_target = 0; + struct scatterlist *src_sg = msg->spu.src, *dst_sg = msg->spu.dst; + + while (src_sg || dst_sg) { + if (src_sg) { + cnt++; + dst_target = src_sg->length; + src_sg = sg_next(src_sg); + } else + dst_target = UINT_MAX; + + while (dst_target && dst_sg) { + cnt++; + if (dst_sg->length < dst_target) + dst_target -= dst_sg->length; + else + dst_target = 0; + dst_sg = sg_next(dst_sg); + } + } + + return cnt; +} + +static int flexrm_spu_dma_map(struct device *dev, struct brcm_message *msg) +{ + int rc; + + rc = dma_map_sg(dev, msg->spu.src, sg_nents(msg->spu.src), + DMA_TO_DEVICE); + if (rc < 0) + return rc; + + rc = dma_map_sg(dev, msg->spu.dst, sg_nents(msg->spu.dst), + DMA_FROM_DEVICE); + if (rc < 0) { + dma_unmap_sg(dev, msg->spu.src, sg_nents(msg->spu.src), + DMA_TO_DEVICE); + return rc; + } + + return 0; +} + +static void flexrm_spu_dma_unmap(struct device *dev, struct brcm_message *msg) +{ + dma_unmap_sg(dev, msg->spu.dst, sg_nents(msg->spu.dst), + DMA_FROM_DEVICE); + dma_unmap_sg(dev, msg->spu.src, sg_nents(msg->spu.src), + DMA_TO_DEVICE); +} + +static void *flexrm_spu_write_descs(struct brcm_message *msg, u32 nhcnt, + u32 reqid, void *desc_ptr, u32 toggle, + void *start_desc, void *end_desc) +{ + u64 d; + u32 nhpos = 0; + void *orig_desc_ptr = desc_ptr; + unsigned int dst_target = 0; + struct scatterlist *src_sg = msg->spu.src, *dst_sg = msg->spu.dst; + + while (src_sg || dst_sg) { + if (src_sg) { + if (sg_dma_len(src_sg) & 0xf) + d = flexrm_src_desc(sg_dma_address(src_sg), + sg_dma_len(src_sg)); + else + d = flexrm_msrc_desc(sg_dma_address(src_sg), + sg_dma_len(src_sg)/16); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + dst_target = sg_dma_len(src_sg); + src_sg = sg_next(src_sg); + } else + dst_target = UINT_MAX; + + while (dst_target && dst_sg) { + if (sg_dma_len(dst_sg) & 0xf) + d = flexrm_dst_desc(sg_dma_address(dst_sg), + sg_dma_len(dst_sg)); + else + d = flexrm_mdst_desc(sg_dma_address(dst_sg), + sg_dma_len(dst_sg)/16); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + if (sg_dma_len(dst_sg) < dst_target) + dst_target -= sg_dma_len(dst_sg); + else + dst_target = 0; + dst_sg = sg_next(dst_sg); + } + } + + /* Null descriptor with invalid toggle bit */ + flexrm_write_desc(desc_ptr, flexrm_null_desc(!toggle)); + + /* Ensure that descriptors have been written to memory */ + wmb(); + + /* Flip toggle bit in header */ + flexrm_flip_header_toogle(orig_desc_ptr); + + return desc_ptr; +} + +static bool flexrm_sba_sanity_check(struct brcm_message *msg) +{ + u32 i; + + if (!msg->sba.cmds || !msg->sba.cmds_count) + return false; + + for (i = 0; i < msg->sba.cmds_count; i++) { + if (((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) || + (msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C)) && + (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT)) + return false; + if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) && + (msg->sba.cmds[i].data_len > SRCT_LENGTH_MASK)) + return false; + if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C) && + (msg->sba.cmds[i].data_len > SRCT_LENGTH_MASK)) + return false; + if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_RESP) && + (msg->sba.cmds[i].resp_len > DSTT_LENGTH_MASK)) + return false; + if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT) && + (msg->sba.cmds[i].data_len > DSTT_LENGTH_MASK)) + return false; + } + + return true; +} + +static u32 flexrm_sba_estimate_nonheader_desc_count(struct brcm_message *msg) +{ + u32 i, cnt; + + cnt = 0; + for (i = 0; i < msg->sba.cmds_count; i++) { + cnt++; + + if ((msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_B) || + (msg->sba.cmds[i].flags & BRCM_SBA_CMD_TYPE_C)) + cnt++; + + if (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_RESP) + cnt++; + + if (msg->sba.cmds[i].flags & BRCM_SBA_CMD_HAS_OUTPUT) + cnt++; + } + + return cnt; +} + +static void *flexrm_sba_write_descs(struct brcm_message *msg, u32 nhcnt, + u32 reqid, void *desc_ptr, u32 toggle, + void *start_desc, void *end_desc) +{ + u64 d; + u32 i, nhpos = 0; + struct brcm_sba_command *c; + void *orig_desc_ptr = desc_ptr; + + /* Convert SBA commands into descriptors */ + for (i = 0; i < msg->sba.cmds_count; i++) { + c = &msg->sba.cmds[i]; + + if ((c->flags & BRCM_SBA_CMD_HAS_RESP) && + (c->flags & BRCM_SBA_CMD_HAS_OUTPUT)) { + /* Destination response descriptor */ + d = flexrm_dst_desc(c->resp, c->resp_len); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } else if (c->flags & BRCM_SBA_CMD_HAS_RESP) { + /* Destination response with tlast descriptor */ + d = flexrm_dstt_desc(c->resp, c->resp_len); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } + + if (c->flags & BRCM_SBA_CMD_HAS_OUTPUT) { + /* Destination with tlast descriptor */ + d = flexrm_dstt_desc(c->data, c->data_len); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } + + if (c->flags & BRCM_SBA_CMD_TYPE_B) { + /* Command as immediate descriptor */ + d = flexrm_imm_desc(c->cmd); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } else { + /* Command as immediate descriptor with tlast */ + d = flexrm_immt_desc(c->cmd); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } + + if ((c->flags & BRCM_SBA_CMD_TYPE_B) || + (c->flags & BRCM_SBA_CMD_TYPE_C)) { + /* Source with tlast descriptor */ + d = flexrm_srct_desc(c->data, c->data_len); + flexrm_enqueue_desc(nhpos, nhcnt, reqid, + d, &desc_ptr, &toggle, + start_desc, end_desc); + nhpos++; + } + } + + /* Null descriptor with invalid toggle bit */ + flexrm_write_desc(desc_ptr, flexrm_null_desc(!toggle)); + + /* Ensure that descriptors have been written to memory */ + wmb(); + + /* Flip toggle bit in header */ + flexrm_flip_header_toogle(orig_desc_ptr); + + return desc_ptr; +} + +static bool flexrm_sanity_check(struct brcm_message *msg) +{ + if (!msg) + return false; + + switch (msg->type) { + case BRCM_MESSAGE_SPU: + return flexrm_spu_sanity_check(msg); + case BRCM_MESSAGE_SBA: + return flexrm_sba_sanity_check(msg); + default: + return false; + }; +} + +static u32 flexrm_estimate_nonheader_desc_count(struct brcm_message *msg) +{ + if (!msg) + return 0; + + switch (msg->type) { + case BRCM_MESSAGE_SPU: + return flexrm_spu_estimate_nonheader_desc_count(msg); + case BRCM_MESSAGE_SBA: + return flexrm_sba_estimate_nonheader_desc_count(msg); + default: + return 0; + }; +} + +static int flexrm_dma_map(struct device *dev, struct brcm_message *msg) +{ + if (!dev || !msg) + return -EINVAL; + + switch (msg->type) { + case BRCM_MESSAGE_SPU: + return flexrm_spu_dma_map(dev, msg); + default: + break; + } + + return 0; +} + +static void flexrm_dma_unmap(struct device *dev, struct brcm_message *msg) +{ + if (!dev || !msg) + return; + + switch (msg->type) { + case BRCM_MESSAGE_SPU: + flexrm_spu_dma_unmap(dev, msg); + break; + default: + break; + } +} + +static void *flexrm_write_descs(struct brcm_message *msg, u32 nhcnt, + u32 reqid, void *desc_ptr, u32 toggle, + void *start_desc, void *end_desc) +{ + if (!msg || !desc_ptr || !start_desc || !end_desc) + return ERR_PTR(-ENOTSUPP); + + if ((desc_ptr < start_desc) || (end_desc <= desc_ptr)) + return ERR_PTR(-ERANGE); + + switch (msg->type) { + case BRCM_MESSAGE_SPU: + return flexrm_spu_write_descs(msg, nhcnt, reqid, + desc_ptr, toggle, + start_desc, end_desc); + case BRCM_MESSAGE_SBA: + return flexrm_sba_write_descs(msg, nhcnt, reqid, + desc_ptr, toggle, + start_desc, end_desc); + default: + return ERR_PTR(-ENOTSUPP); + }; +} + +/* ====== FlexRM driver helper routines ===== */ + +static int flexrm_new_request(struct flexrm_ring *ring, + struct brcm_message *batch_msg, + struct brcm_message *msg) +{ + void *next; + unsigned long flags; + u32 val, count, nhcnt; + u32 read_offset, write_offset; + bool exit_cleanup = false; + int ret = 0, reqid; + + /* Do sanity check on message */ + if (!flexrm_sanity_check(msg)) + return -EIO; + msg->error = 0; + + /* If no requests possible then save data pointer and goto done. */ + reqid = ida_simple_get(&ring->requests_ida, 0, + RING_MAX_REQ_COUNT, GFP_KERNEL); + if (reqid < 0) { + spin_lock_irqsave(&ring->lock, flags); + if (batch_msg) + ring->last_pending_msg = batch_msg; + else + ring->last_pending_msg = msg; + spin_unlock_irqrestore(&ring->lock, flags); + return 0; + } + ring->requests[reqid] = msg; + + /* Do DMA mappings for the message */ + ret = flexrm_dma_map(ring->mbox->dev, msg); + if (ret < 0) { + ring->requests[reqid] = NULL; + ida_simple_remove(&ring->requests_ida, reqid); + return ret; + } + + /* If last_pending_msg is already set then goto done with error */ + spin_lock_irqsave(&ring->lock, flags); + if (ring->last_pending_msg) + ret = -ENOSPC; + spin_unlock_irqrestore(&ring->lock, flags); + if (ret < 0) { + dev_warn(ring->mbox->dev, "no space in ring %d\n", ring->num); + exit_cleanup = true; + goto exit; + } + + /* Determine current HW BD read offset */ + read_offset = readl_relaxed(ring->regs + RING_BD_READ_PTR); + val = readl_relaxed(ring->regs + RING_BD_START_ADDR); + read_offset *= RING_DESC_SIZE; + read_offset += (u32)(BD_START_ADDR_DECODE(val) - ring->bd_dma_base); + + /* + * Number required descriptors = number of non-header descriptors + + * number of header descriptors + + * 1x null descriptor + */ + nhcnt = flexrm_estimate_nonheader_desc_count(msg); + count = flexrm_estimate_header_desc_count(nhcnt) + nhcnt + 1; + + /* Check for available descriptor space. */ + write_offset = ring->bd_write_offset; + while (count) { + if (!flexrm_is_next_table_desc(ring->bd_base + write_offset)) + count--; + write_offset += RING_DESC_SIZE; + if (write_offset == RING_BD_SIZE) + write_offset = 0x0; + if (write_offset == read_offset) + break; + } + if (count) { + spin_lock_irqsave(&ring->lock, flags); + if (batch_msg) + ring->last_pending_msg = batch_msg; + else + ring->last_pending_msg = msg; + spin_unlock_irqrestore(&ring->lock, flags); + ret = 0; + exit_cleanup = true; + goto exit; + } + + /* Write descriptors to ring */ + next = flexrm_write_descs(msg, nhcnt, reqid, + ring->bd_base + ring->bd_write_offset, + RING_BD_TOGGLE_VALID(ring->bd_write_offset), + ring->bd_base, ring->bd_base + RING_BD_SIZE); + if (IS_ERR(next)) { + ret = PTR_ERR(next); + exit_cleanup = true; + goto exit; + } + + /* Save ring BD write offset */ + ring->bd_write_offset = (unsigned long)(next - ring->bd_base); + +exit: + /* Update error status in message */ + msg->error = ret; + + /* Cleanup if we failed */ + if (exit_cleanup) { + flexrm_dma_unmap(ring->mbox->dev, msg); + ring->requests[reqid] = NULL; + ida_simple_remove(&ring->requests_ida, reqid); + } + + return ret; +} + +static int flexrm_process_completions(struct flexrm_ring *ring) +{ + u64 desc; + int err, count = 0; + unsigned long flags; + struct brcm_message *msg = NULL; + u32 reqid, cmpl_read_offset, cmpl_write_offset; + struct mbox_chan *chan = &ring->mbox->controller.chans[ring->num]; + + spin_lock_irqsave(&ring->lock, flags); + + /* Check last_pending_msg */ + if (ring->last_pending_msg) { + msg = ring->last_pending_msg; + ring->last_pending_msg = NULL; + } + + /* + * Get current completion read and write offset + * + * Note: We should read completion write pointer atleast once + * after we get a MSI interrupt because HW maintains internal + * MSI status which will allow next MSI interrupt only after + * completion write pointer is read. + */ + cmpl_write_offset = readl_relaxed(ring->regs + RING_CMPL_WRITE_PTR); + cmpl_write_offset *= RING_DESC_SIZE; + cmpl_read_offset = ring->cmpl_read_offset; + ring->cmpl_read_offset = cmpl_write_offset; + + spin_unlock_irqrestore(&ring->lock, flags); + + /* If last_pending_msg was set then queue it back */ + if (msg) + mbox_send_message(chan, msg); + + /* For each completed request notify mailbox clients */ + reqid = 0; + while (cmpl_read_offset != cmpl_write_offset) { + /* Dequeue next completion descriptor */ + desc = *((u64 *)(ring->cmpl_base + cmpl_read_offset)); + + /* Next read offset */ + cmpl_read_offset += RING_DESC_SIZE; + if (cmpl_read_offset == RING_CMPL_SIZE) + cmpl_read_offset = 0; + + /* Decode error from completion descriptor */ + err = flexrm_cmpl_desc_to_error(desc); + if (err < 0) { + dev_warn(ring->mbox->dev, + "got completion desc=0x%lx with error %d", + (unsigned long)desc, err); + } + + /* Determine request id from completion descriptor */ + reqid = flexrm_cmpl_desc_to_reqid(desc); + + /* Determine message pointer based on reqid */ + msg = ring->requests[reqid]; + if (!msg) { + dev_warn(ring->mbox->dev, + "null msg pointer for completion desc=0x%lx", + (unsigned long)desc); + continue; + } + + /* Release reqid for recycling */ + ring->requests[reqid] = NULL; + ida_simple_remove(&ring->requests_ida, reqid); + + /* Unmap DMA mappings */ + flexrm_dma_unmap(ring->mbox->dev, msg); + + /* Give-back message to mailbox client */ + msg->error = err; + mbox_chan_received_data(chan, msg); + + /* Increment number of completions processed */ + count++; + } + + return count; +} + +/* ====== FlexRM interrupt handler ===== */ + +static irqreturn_t flexrm_irq_event(int irq, void *dev_id) +{ + /* We only have MSI for completions so just wakeup IRQ thread */ + /* Ring related errors will be informed via completion descriptors */ + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t flexrm_irq_thread(int irq, void *dev_id) +{ + flexrm_process_completions(dev_id); + + return IRQ_HANDLED; +} + +/* ====== FlexRM mailbox callbacks ===== */ + +static int flexrm_send_data(struct mbox_chan *chan, void *data) +{ + int i, rc; + struct flexrm_ring *ring = chan->con_priv; + struct brcm_message *msg = data; + + if (msg->type == BRCM_MESSAGE_BATCH) { + for (i = msg->batch.msgs_queued; + i < msg->batch.msgs_count; i++) { + rc = flexrm_new_request(ring, msg, + &msg->batch.msgs[i]); + if (rc) { + msg->error = rc; + return rc; + } + msg->batch.msgs_queued++; + } + return 0; + } + + return flexrm_new_request(ring, NULL, data); +} + +static bool flexrm_peek_data(struct mbox_chan *chan) +{ + int cnt = flexrm_process_completions(chan->con_priv); + + return (cnt > 0) ? true : false; +} + +static int flexrm_startup(struct mbox_chan *chan) +{ + u64 d; + u32 val, off; + int ret = 0; + dma_addr_t next_addr; + struct flexrm_ring *ring = chan->con_priv; + + /* Allocate BD memory */ + ring->bd_base = dma_pool_alloc(ring->mbox->bd_pool, + GFP_KERNEL, &ring->bd_dma_base); + if (!ring->bd_base) { + dev_err(ring->mbox->dev, "can't allocate BD memory\n"); + ret = -ENOMEM; + goto fail; + } + + /* Configure next table pointer entries in BD memory */ + for (off = 0; off < RING_BD_SIZE; off += RING_DESC_SIZE) { + next_addr = off + RING_DESC_SIZE; + if (next_addr == RING_BD_SIZE) + next_addr = 0; + next_addr += ring->bd_dma_base; + if (RING_BD_ALIGN_CHECK(next_addr)) + d = flexrm_next_table_desc(RING_BD_TOGGLE_VALID(off), + next_addr); + else + d = flexrm_null_desc(RING_BD_TOGGLE_INVALID(off)); + flexrm_write_desc(ring->bd_base + off, d); + } + + /* Allocate completion memory */ + ring->cmpl_base = dma_pool_alloc(ring->mbox->cmpl_pool, + GFP_KERNEL, &ring->cmpl_dma_base); + if (!ring->cmpl_base) { + dev_err(ring->mbox->dev, "can't allocate completion memory\n"); + ret = -ENOMEM; + goto fail_free_bd_memory; + } + memset(ring->cmpl_base, 0, RING_CMPL_SIZE); + + /* Request IRQ */ + if (ring->irq == UINT_MAX) { + dev_err(ring->mbox->dev, "ring IRQ not available\n"); + ret = -ENODEV; + goto fail_free_cmpl_memory; + } + ret = request_threaded_irq(ring->irq, + flexrm_irq_event, + flexrm_irq_thread, + 0, dev_name(ring->mbox->dev), ring); + if (ret) { + dev_err(ring->mbox->dev, "failed to request ring IRQ\n"); + goto fail_free_cmpl_memory; + } + ring->irq_requested = true; + + /* Disable/inactivate ring */ + writel_relaxed(0x0, ring->regs + RING_CONTROL); + + /* Program BD start address */ + val = BD_START_ADDR_VALUE(ring->bd_dma_base); + writel_relaxed(val, ring->regs + RING_BD_START_ADDR); + + /* BD write pointer will be same as HW write pointer */ + ring->bd_write_offset = + readl_relaxed(ring->regs + RING_BD_WRITE_PTR); + ring->bd_write_offset *= RING_DESC_SIZE; + + /* Program completion start address */ + val = CMPL_START_ADDR_VALUE(ring->cmpl_dma_base); + writel_relaxed(val, ring->regs + RING_CMPL_START_ADDR); + + /* Ensure last pending message is cleared */ + ring->last_pending_msg = NULL; + + /* Completion read pointer will be same as HW write pointer */ + ring->cmpl_read_offset = + readl_relaxed(ring->regs + RING_CMPL_WRITE_PTR); + ring->cmpl_read_offset *= RING_DESC_SIZE; + + /* Read ring Tx, Rx, and Outstanding counts to clear */ + readl_relaxed(ring->regs + RING_NUM_REQ_RECV_LS); + readl_relaxed(ring->regs + RING_NUM_REQ_RECV_MS); + readl_relaxed(ring->regs + RING_NUM_REQ_TRANS_LS); + readl_relaxed(ring->regs + RING_NUM_REQ_TRANS_MS); + readl_relaxed(ring->regs + RING_NUM_REQ_OUTSTAND); + + /* Configure RING_MSI_CONTROL */ + val = 0; + val |= (ring->msi_timer_val << MSI_TIMER_VAL_SHIFT); + val |= BIT(MSI_ENABLE_SHIFT); + val |= (ring->msi_count_threshold & MSI_COUNT_MASK) << MSI_COUNT_SHIFT; + writel_relaxed(val, ring->regs + RING_MSI_CONTROL); + + /* Enable/activate ring */ + val = BIT(CONTROL_ACTIVE_SHIFT); + writel_relaxed(val, ring->regs + RING_CONTROL); + + return 0; + +fail_free_cmpl_memory: + dma_pool_free(ring->mbox->cmpl_pool, + ring->cmpl_base, ring->cmpl_dma_base); + ring->cmpl_base = NULL; +fail_free_bd_memory: + dma_pool_free(ring->mbox->bd_pool, + ring->bd_base, ring->bd_dma_base); + ring->bd_base = NULL; +fail: + return ret; +} + +static void flexrm_shutdown(struct mbox_chan *chan) +{ + u32 reqid; + unsigned int timeout; + struct brcm_message *msg; + struct flexrm_ring *ring = chan->con_priv; + + /* Disable/inactivate ring */ + writel_relaxed(0x0, ring->regs + RING_CONTROL); + + /* Flush ring with timeout of 1s */ + timeout = 1000; + writel_relaxed(BIT(CONTROL_FLUSH_SHIFT), + ring->regs + RING_CONTROL); + do { + if (readl_relaxed(ring->regs + RING_FLUSH_DONE) & + FLUSH_DONE_MASK) + break; + mdelay(1); + } while (timeout--); + + /* Abort all in-flight requests */ + for (reqid = 0; reqid < RING_MAX_REQ_COUNT; reqid++) { + msg = ring->requests[reqid]; + if (!msg) + continue; + + /* Release reqid for recycling */ + ring->requests[reqid] = NULL; + ida_simple_remove(&ring->requests_ida, reqid); + + /* Unmap DMA mappings */ + flexrm_dma_unmap(ring->mbox->dev, msg); + + /* Give-back message to mailbox client */ + msg->error = -EIO; + mbox_chan_received_data(chan, msg); + } + + /* Release IRQ */ + if (ring->irq_requested) { + free_irq(ring->irq, ring); + ring->irq_requested = false; + } + + /* Free-up completion descriptor ring */ + if (ring->cmpl_base) { + dma_pool_free(ring->mbox->cmpl_pool, + ring->cmpl_base, ring->cmpl_dma_base); + ring->cmpl_base = NULL; + } + + /* Free-up BD descriptor ring */ + if (ring->bd_base) { + dma_pool_free(ring->mbox->bd_pool, + ring->bd_base, ring->bd_dma_base); + ring->bd_base = NULL; + } +} + +static bool flexrm_last_tx_done(struct mbox_chan *chan) +{ + bool ret; + unsigned long flags; + struct flexrm_ring *ring = chan->con_priv; + + spin_lock_irqsave(&ring->lock, flags); + ret = (ring->last_pending_msg) ? false : true; + spin_unlock_irqrestore(&ring->lock, flags); + + return ret; +} + +static const struct mbox_chan_ops flexrm_mbox_chan_ops = { + .send_data = flexrm_send_data, + .startup = flexrm_startup, + .shutdown = flexrm_shutdown, + .last_tx_done = flexrm_last_tx_done, + .peek_data = flexrm_peek_data, +}; + +static struct mbox_chan *flexrm_mbox_of_xlate(struct mbox_controller *cntlr, + const struct of_phandle_args *pa) +{ + struct mbox_chan *chan; + struct flexrm_ring *ring; + + if (pa->args_count < 3) + return ERR_PTR(-EINVAL); + + if (pa->args[0] >= cntlr->num_chans) + return ERR_PTR(-ENOENT); + + if (pa->args[1] > MSI_COUNT_MASK) + return ERR_PTR(-EINVAL); + + if (pa->args[2] > MSI_TIMER_VAL_MASK) + return ERR_PTR(-EINVAL); + + chan = &cntlr->chans[pa->args[0]]; + ring = chan->con_priv; + ring->msi_count_threshold = pa->args[1]; + ring->msi_timer_val = pa->args[2]; + + return chan; +} + +/* ====== FlexRM platform driver ===== */ + +static void flexrm_mbox_msi_write(struct msi_desc *desc, struct msi_msg *msg) +{ + struct device *dev = msi_desc_to_dev(desc); + struct flexrm_mbox *mbox = dev_get_drvdata(dev); + struct flexrm_ring *ring = &mbox->rings[desc->platform.msi_index]; + + /* Configure per-Ring MSI registers */ + writel_relaxed(msg->address_lo, ring->regs + RING_MSI_ADDR_LS); + writel_relaxed(msg->address_hi, ring->regs + RING_MSI_ADDR_MS); + writel_relaxed(msg->data, ring->regs + RING_MSI_DATA_VALUE); +} + +static int flexrm_mbox_probe(struct platform_device *pdev) +{ + int index, ret = 0; + void __iomem *regs; + void __iomem *regs_end; + struct msi_desc *desc; + struct resource *iomem; + struct flexrm_ring *ring; + struct flexrm_mbox *mbox; + struct device *dev = &pdev->dev; + + /* Allocate driver mailbox struct */ + mbox = devm_kzalloc(dev, sizeof(*mbox), GFP_KERNEL); + if (!mbox) { + ret = -ENOMEM; + goto fail; + } + mbox->dev = dev; + platform_set_drvdata(pdev, mbox); + + /* Get resource for registers */ + iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!iomem || (resource_size(iomem) < RING_REGS_SIZE)) { + ret = -ENODEV; + goto fail; + } + + /* Map registers of all rings */ + mbox->regs = devm_ioremap_resource(&pdev->dev, iomem); + if (IS_ERR(mbox->regs)) { + ret = PTR_ERR(mbox->regs); + dev_err(&pdev->dev, "Failed to remap mailbox regs: %d\n", ret); + goto fail; + } + regs_end = mbox->regs + resource_size(iomem); + + /* Scan and count available rings */ + mbox->num_rings = 0; + for (regs = mbox->regs; regs < regs_end; regs += RING_REGS_SIZE) { + if (readl_relaxed(regs + RING_VER) == RING_VER_MAGIC) + mbox->num_rings++; + } + if (!mbox->num_rings) { + ret = -ENODEV; + goto fail; + } + + /* Allocate driver ring structs */ + ring = devm_kcalloc(dev, mbox->num_rings, sizeof(*ring), GFP_KERNEL); + if (!ring) { + ret = -ENOMEM; + goto fail; + } + mbox->rings = ring; + + /* Initialize members of driver ring structs */ + regs = mbox->regs; + for (index = 0; index < mbox->num_rings; index++) { + ring = &mbox->rings[index]; + ring->num = index; + ring->mbox = mbox; + while ((regs < regs_end) && + (readl_relaxed(regs + RING_VER) != RING_VER_MAGIC)) + regs += RING_REGS_SIZE; + if (regs_end <= regs) { + ret = -ENODEV; + goto fail; + } + ring->regs = regs; + regs += RING_REGS_SIZE; + ring->irq = UINT_MAX; + ring->irq_requested = false; + ring->msi_timer_val = MSI_TIMER_VAL_MASK; + ring->msi_count_threshold = 0x1; + ida_init(&ring->requests_ida); + memset(ring->requests, 0, sizeof(ring->requests)); + ring->bd_base = NULL; + ring->bd_dma_base = 0; + ring->cmpl_base = NULL; + ring->cmpl_dma_base = 0; + spin_lock_init(&ring->lock); + ring->last_pending_msg = NULL; + ring->cmpl_read_offset = 0; + } + + /* FlexRM is capable of 40-bit physical addresses only */ + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); + if (ret) { + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) + goto fail; + } + + /* Create DMA pool for ring BD memory */ + mbox->bd_pool = dma_pool_create("bd", dev, RING_BD_SIZE, + 1 << RING_BD_ALIGN_ORDER, 0); + if (!mbox->bd_pool) { + ret = -ENOMEM; + goto fail; + } + + /* Create DMA pool for ring completion memory */ + mbox->cmpl_pool = dma_pool_create("cmpl", dev, RING_CMPL_SIZE, + 1 << RING_CMPL_ALIGN_ORDER, 0); + if (!mbox->cmpl_pool) { + ret = -ENOMEM; + goto fail_destroy_bd_pool; + } + + /* Allocate platform MSIs for each ring */ + ret = platform_msi_domain_alloc_irqs(dev, mbox->num_rings, + flexrm_mbox_msi_write); + if (ret) + goto fail_destroy_cmpl_pool; + + /* Save alloced IRQ numbers for each ring */ + for_each_msi_entry(desc, dev) { + ring = &mbox->rings[desc->platform.msi_index]; + ring->irq = desc->irq; + } + + /* Initialize mailbox controller */ + mbox->controller.txdone_irq = false; + mbox->controller.txdone_poll = true; + mbox->controller.txpoll_period = 1; + mbox->controller.ops = &flexrm_mbox_chan_ops; + mbox->controller.dev = dev; + mbox->controller.num_chans = mbox->num_rings; + mbox->controller.of_xlate = flexrm_mbox_of_xlate; + mbox->controller.chans = devm_kcalloc(dev, mbox->num_rings, + sizeof(*mbox->controller.chans), GFP_KERNEL); + if (!mbox->controller.chans) { + ret = -ENOMEM; + goto fail_free_msis; + } + for (index = 0; index < mbox->num_rings; index++) + mbox->controller.chans[index].con_priv = &mbox->rings[index]; + + /* Register mailbox controller */ + ret = mbox_controller_register(&mbox->controller); + if (ret) + goto fail_free_msis; + + dev_info(dev, "registered flexrm mailbox with %d channels\n", + mbox->controller.num_chans); + + return 0; + +fail_free_msis: + platform_msi_domain_free_irqs(dev); +fail_destroy_cmpl_pool: + dma_pool_destroy(mbox->cmpl_pool); +fail_destroy_bd_pool: + dma_pool_destroy(mbox->bd_pool); +fail: + return ret; +} + +static int flexrm_mbox_remove(struct platform_device *pdev) +{ + int index; + struct device *dev = &pdev->dev; + struct flexrm_ring *ring; + struct flexrm_mbox *mbox = platform_get_drvdata(pdev); + + mbox_controller_unregister(&mbox->controller); + + platform_msi_domain_free_irqs(dev); + + dma_pool_destroy(mbox->cmpl_pool); + dma_pool_destroy(mbox->bd_pool); + + for (index = 0; index < mbox->num_rings; index++) { + ring = &mbox->rings[index]; + ida_destroy(&ring->requests_ida); + } + + return 0; +} + +static const struct of_device_id flexrm_mbox_of_match[] = { + { .compatible = "brcm,iproc-flexrm-mbox", }, + {}, +}; +MODULE_DEVICE_TABLE(of, flexrm_mbox_of_match); + +static struct platform_driver flexrm_mbox_driver = { + .driver = { + .name = "brcm-flexrm-mbox", + .of_match_table = flexrm_mbox_of_match, + }, + .probe = flexrm_mbox_probe, + .remove = flexrm_mbox_remove, +}; +module_platform_driver(flexrm_mbox_driver); + +MODULE_AUTHOR("Anup Patel <anup.patel@broadcom.com>"); +MODULE_DESCRIPTION("Broadcom FlexRM mailbox driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c index 2aeb034d5fb9..4fe7be0bdd11 100644 --- a/drivers/mailbox/bcm-pdc-mailbox.c +++ b/drivers/mailbox/bcm-pdc-mailbox.c @@ -18,7 +18,8 @@ * Broadcom PDC Mailbox Driver * The PDC provides a ring based programming interface to one or more hardware * offload engines. For example, the PDC driver works with both SPU-M and SPU2 - * cryptographic offload hardware. In some chips the PDC is referred to as MDE. + * cryptographic offload hardware. In some chips the PDC is referred to as MDE, + * and in others the FA2/FA+ hardware is used with this PDC driver. * * The PDC driver registers with the Linux mailbox framework as a mailbox * controller, once for each PDC instance. Ring 0 for each PDC is registered as @@ -108,6 +109,7 @@ #define PDC_INTMASK_OFFSET 0x24 #define PDC_INTSTATUS_OFFSET 0x20 #define PDC_RCVLAZY0_OFFSET (0x30 + 4 * PDC_RINGSET) +#define FA_RCVLAZY0_OFFSET 0x100 /* * For SPU2, configure MDE_CKSUM_CONTROL to write 17 bytes of metadata @@ -162,6 +164,11 @@ /* Maximum size buffer the DMA engine can handle */ #define PDC_DMA_BUF_MAX 16384 +enum pdc_hw { + FA_HW, /* FA2/FA+ hardware (i.e. Northstar Plus) */ + PDC_HW /* PDC/MDE hardware (i.e. Northstar 2, Pegasus) */ +}; + struct pdc_dma_map { void *ctx; /* opaque context associated with frame */ }; @@ -211,13 +218,13 @@ struct pdc_regs { u32 gptimer; /* 0x028 */ u32 PAD; - u32 intrcvlazy_0; /* 0x030 */ - u32 intrcvlazy_1; /* 0x034 */ - u32 intrcvlazy_2; /* 0x038 */ - u32 intrcvlazy_3; /* 0x03c */ + u32 intrcvlazy_0; /* 0x030 (Only in PDC, not FA2) */ + u32 intrcvlazy_1; /* 0x034 (Only in PDC, not FA2) */ + u32 intrcvlazy_2; /* 0x038 (Only in PDC, not FA2) */ + u32 intrcvlazy_3; /* 0x03c (Only in PDC, not FA2) */ u32 PAD[48]; - u32 removed_intrecvlazy; /* 0x100 */ + u32 fa_intrecvlazy; /* 0x100 (Only in FA2, not PDC) */ u32 flowctlthresh; /* 0x104 */ u32 wrrthresh; /* 0x108 */ u32 gmac_idle_cnt_thresh; /* 0x10c */ @@ -243,7 +250,7 @@ struct pdc_regs { u32 serdes_status1; /* 0x1b0 */ u32 PAD[11]; /* 0x1b4-1dc */ u32 clk_ctl_st; /* 0x1e0 */ - u32 hw_war; /* 0x1e4 */ + u32 hw_war; /* 0x1e4 (Only in PDC, not FA2) */ u32 pwrctl; /* 0x1e8 */ u32 PAD[5]; @@ -410,6 +417,9 @@ struct pdc_state { u32 txnobuf; /* unable to create tx descriptor */ u32 rxnobuf; /* unable to create rx descriptor */ u32 rx_oflow; /* count of rx overflows */ + + /* hardware type - FA2 or PDC/MDE */ + enum pdc_hw hw_type; }; /* Global variables */ @@ -1396,7 +1406,13 @@ static int pdc_interrupts_init(struct pdc_state *pdcs) /* interrupt configuration */ iowrite32(PDC_INTMASK, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET); - iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase + PDC_RCVLAZY0_OFFSET); + + if (pdcs->hw_type == FA_HW) + iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase + + FA_RCVLAZY0_OFFSET); + else + iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase + + PDC_RCVLAZY0_OFFSET); /* read irq from device tree */ pdcs->pdc_irq = irq_of_parse_and_map(dn, 0); @@ -1465,6 +1481,17 @@ static int pdc_mb_init(struct pdc_state *pdcs) return 0; } +/* Device tree API */ +static const int pdc_hw = PDC_HW; +static const int fa_hw = FA_HW; + +static const struct of_device_id pdc_mbox_of_match[] = { + {.compatible = "brcm,iproc-pdc-mbox", .data = &pdc_hw}, + {.compatible = "brcm,iproc-fa2-mbox", .data = &fa_hw}, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, pdc_mbox_of_match); + /** * pdc_dt_read() - Read application-specific data from device tree. * @pdev: Platform device @@ -1481,6 +1508,8 @@ static int pdc_dt_read(struct platform_device *pdev, struct pdc_state *pdcs) { struct device *dev = &pdev->dev; struct device_node *dn = pdev->dev.of_node; + const struct of_device_id *match; + const int *hw_type; int err; err = of_property_read_u32(dn, "brcm,rx-status-len", @@ -1492,6 +1521,14 @@ static int pdc_dt_read(struct platform_device *pdev, struct pdc_state *pdcs) pdcs->use_bcm_hdr = of_property_read_bool(dn, "brcm,use-bcm-hdr"); + pdcs->hw_type = PDC_HW; + + match = of_match_device(of_match_ptr(pdc_mbox_of_match), dev); + if (match != NULL) { + hw_type = match->data; + pdcs->hw_type = *hw_type; + } + return 0; } @@ -1525,7 +1562,7 @@ static int pdc_probe(struct platform_device *pdev) pdcs->pdc_idx = pdcg.num_spu; pdcg.num_spu++; - err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39)); if (err) { dev_warn(dev, "PDC device cannot perform DMA. Error %d.", err); goto cleanup; @@ -1611,12 +1648,6 @@ static int pdc_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id pdc_mbox_of_match[] = { - {.compatible = "brcm,iproc-pdc-mbox"}, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, pdc_mbox_of_match); - static struct platform_driver pdc_mbox_driver = { .probe = pdc_probe, .remove = pdc_remove, diff --git a/drivers/mailbox/hi6220-mailbox.c b/drivers/mailbox/hi6220-mailbox.c index 613722db5daf..519376d3534c 100644 --- a/drivers/mailbox/hi6220-mailbox.c +++ b/drivers/mailbox/hi6220-mailbox.c @@ -221,7 +221,7 @@ static void hi6220_mbox_shutdown(struct mbox_chan *chan) mbox->irq_map_chan[mchan->ack_irq] = NULL; } -static struct mbox_chan_ops hi6220_mbox_ops = { +static const struct mbox_chan_ops hi6220_mbox_ops = { .send_data = hi6220_mbox_send_data, .startup = hi6220_mbox_startup, .shutdown = hi6220_mbox_shutdown, diff --git a/drivers/mailbox/mailbox-xgene-slimpro.c b/drivers/mailbox/mailbox-xgene-slimpro.c index dd2afbca51c9..a7040163dd43 100644 --- a/drivers/mailbox/mailbox-xgene-slimpro.c +++ b/drivers/mailbox/mailbox-xgene-slimpro.c @@ -174,7 +174,7 @@ static void slimpro_mbox_shutdown(struct mbox_chan *chan) devm_free_irq(mb_chan->dev, mb_chan->irq, mb_chan); } -static struct mbox_chan_ops slimpro_mbox_ops = { +static const struct mbox_chan_ops slimpro_mbox_ops = { .send_data = slimpro_mbox_send_data, .startup = slimpro_mbox_startup, .shutdown = slimpro_mbox_shutdown, diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 4671f8a12872..9dfbf7ea10a2 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -103,11 +103,14 @@ static void tx_tick(struct mbox_chan *chan, int r) /* Submit next message */ msg_submit(chan); + if (!mssg) + return; + /* Notify the client */ - if (mssg && chan->cl->tx_done) + if (chan->cl->tx_done) chan->cl->tx_done(chan->cl, mssg, r); - if (chan->cl->tx_block) + if (r != -ETIME && chan->cl->tx_block) complete(&chan->tx_complete); } @@ -260,7 +263,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg) msg_submit(chan); - if (chan->cl->tx_block && chan->active_req) { + if (chan->cl->tx_block) { unsigned long wait; int ret; @@ -271,8 +274,8 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg) ret = wait_for_completion_timeout(&chan->tx_complete, wait); if (ret == 0) { - t = -EIO; - tx_tick(chan, -EIO); + t = -ETIME; + tx_tick(chan, t); } } @@ -453,6 +456,12 @@ int mbox_controller_register(struct mbox_controller *mbox) txdone = TXDONE_BY_ACK; if (txdone == TXDONE_BY_POLL) { + + if (!mbox->ops->last_tx_done) { + dev_err(mbox->dev, "last_tx_done method is absent\n"); + return -EINVAL; + } + hrtimer_init(&mbox->poll_hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); mbox->poll_hrt.function = txdone_hrtimer; diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 9c689b34e6e7..975922c8f231 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2773,7 +2773,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->num_discard_bios = 1; ti->discards_supported = true; - ti->discard_zeroes_data_unsupported = true; ti->split_discard_bios = false; cache->features = ca->features; diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 136fda3ff9e5..fea5bd52ada8 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -132,6 +132,7 @@ void dm_init_md_queue(struct mapped_device *md); void dm_init_normal_md_queue(struct mapped_device *md); int md_in_flight(struct mapped_device *md); void disable_write_same(struct mapped_device *md); +void disable_write_zeroes(struct mapped_device *md); static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj) { diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 389a3637ffcc..ef1d836bd81b 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2030,7 +2030,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) wake_up_process(cc->write_thread); ti->num_flush_bios = 1; - ti->discard_zeroes_data_unsupported = true; return 0; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 03940bf36f6c..3702e502466d 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -312,9 +312,12 @@ static void do_region(int op, int op_flags, unsigned region, */ if (op == REQ_OP_DISCARD) special_cmd_max_sectors = q->limits.max_discard_sectors; + else if (op == REQ_OP_WRITE_ZEROES) + special_cmd_max_sectors = q->limits.max_write_zeroes_sectors; else if (op == REQ_OP_WRITE_SAME) special_cmd_max_sectors = q->limits.max_write_same_sectors; - if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_SAME) && + if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES || + op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) { dec_count(io, region, -EOPNOTSUPP); return; @@ -328,11 +331,18 @@ static void do_region(int op, int op_flags, unsigned region, /* * Allocate a suitably sized-bio. */ - if ((op == REQ_OP_DISCARD) || (op == REQ_OP_WRITE_SAME)) + switch (op) { + case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: + num_bvecs = 0; + break; + case REQ_OP_WRITE_SAME: num_bvecs = 1; - else + break; + default: num_bvecs = min_t(int, BIO_MAX_PAGES, dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT))); + } bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); @@ -341,7 +351,7 @@ static void do_region(int op, int op_flags, unsigned region, bio_set_op_attrs(bio, op, op_flags); store_io_and_region_in_bio(bio, io, region); - if (op == REQ_OP_DISCARD) { + if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) { num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; remaining -= num_sectors; diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 9e9d04cb7d51..f85846741d50 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -733,11 +733,11 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->pages = &zero_page_list; /* - * Use WRITE SAME to optimize zeroing if all dests support it. + * Use WRITE ZEROES to optimize zeroing if all dests support it. */ - job->rw = REQ_OP_WRITE_SAME; + job->rw = REQ_OP_WRITE_ZEROES; for (i = 0; i < job->num_dests; i++) - if (!bdev_write_same(job->dests[i].bdev)) { + if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) { job->rw = WRITE; break; } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 4788b0b989a9..e17fd44ceef5 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -59,6 +59,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_same_bios = 1; + ti->num_write_zeroes_bios = 1; ti->private = lc; return 0; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 7f223dbed49f..2950b145443d 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1103,6 +1103,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_same_bios = 1; + ti->num_write_zeroes_bios = 1; if (m->queue_mode == DM_TYPE_BIO_BASED) ti->per_io_data_size = multipath_per_bio_data_size(); else @@ -1491,7 +1492,7 @@ static int do_end_io(struct multipath *m, struct request *clone, */ int r = DM_ENDIO_REQUEUE; - if (!error && !clone->errors) + if (!error) return 0; /* I/O complete */ if (noretry_error(error)) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1e217ba84d09..2dae3e5b851c 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2813,7 +2813,9 @@ static void configure_discard_support(struct raid_set *rs) /* Assume discards not supported until after checks below. */ ti->discards_supported = false; - /* RAID level 4,5,6 require discard_zeroes_data for data integrity! */ + /* + * XXX: RAID level 4,5,6 require zeroing for safety. + */ raid456 = (rs->md.level == 4 || rs->md.level == 5 || rs->md.level == 6); for (i = 0; i < rs->raid_disks; i++) { @@ -2827,8 +2829,6 @@ static void configure_discard_support(struct raid_set *rs) return; if (raid456) { - if (!q->limits.discard_zeroes_data) - return; if (!devices_handle_discard_safely) { DMERR("raid456 discard support disabled due to discard_zeroes_data uncertainty."); DMERR("Set dm-raid.devices_handle_discard_safely=Y to override."); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 2ddc2d20e62d..a95cbb80fb34 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1124,7 +1124,6 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->per_io_data_size = sizeof(struct dm_raid1_bio_record); - ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_MEM_RECLAIM, 0); if (!ms->kmirrord_wq) { diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 0b081d170087..bff7e3bdb4ed 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -298,9 +298,14 @@ static void dm_done(struct request *clone, int error, bool mapped) r = rq_end_io(tio->ti, clone, error, &tio->info); } - if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) && - !clone->q->limits.max_write_same_sectors)) - disable_write_same(tio->md); + if (unlikely(r == -EREMOTEIO)) { + if (req_op(clone) == REQ_OP_WRITE_SAME && + !clone->q->limits.max_write_same_sectors) + disable_write_same(tio->md); + if (req_op(clone) == REQ_OP_WRITE_ZEROES && + !clone->q->limits.max_write_zeroes_sectors) + disable_write_zeroes(tio->md); + } if (r <= 0) /* The target wants to complete the I/O */ @@ -358,7 +363,7 @@ static void dm_complete_request(struct request *rq, int error) if (!rq->q->mq_ops) blk_complete_request(rq); else - blk_mq_complete_request(rq, error); + blk_mq_complete_request(rq); } /* @@ -762,7 +767,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_OK; } -static struct blk_mq_ops dm_mq_ops = { +static const struct blk_mq_ops dm_mq_ops = { .queue_rq = dm_mq_queue_rq, .complete = dm_softirq_done, .init_request = dm_mq_init_request, diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 28193a57bf47..5ef49c121d99 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -169,6 +169,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_bios = stripes; ti->num_discard_bios = stripes; ti->num_write_same_bios = stripes; + ti->num_write_zeroes_bios = stripes; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -293,6 +294,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } if (unlikely(bio_op(bio) == REQ_OP_DISCARD) || + unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES) || unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) { target_bio_nr = dm_bio_get_target_bio_nr(bio); BUG_ON(target_bio_nr >= sc->stripes); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 3ad16d9c9d5a..958275aca008 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1449,22 +1449,6 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush) return false; } -static bool dm_table_discard_zeroes_data(struct dm_table *t) -{ - struct dm_target *ti; - unsigned i = 0; - - /* Ensure that all targets supports discard_zeroes_data. */ - while (i < dm_table_get_num_targets(t)) { - ti = dm_table_get_target(t, i++); - - if (ti->discard_zeroes_data_unsupported) - return false; - } - - return true; -} - static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1533,6 +1517,34 @@ static bool dm_table_supports_write_same(struct dm_table *t) return true; } +static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !q->limits.max_write_zeroes_sectors; +} + +static bool dm_table_supports_write_zeroes(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i = 0; + + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!ti->num_write_zeroes_bios) + return false; + + if (!ti->type->iterate_devices || + ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL)) + return false; + } + + return true; +} + + static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1592,9 +1604,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } blk_queue_write_cache(q, wc, fua); - if (!dm_table_discard_zeroes_data(t)) - q->limits.discard_zeroes_data = 0; - /* Ensure that all underlying devices are non-rotational. */ if (dm_table_all_devices_attribute(t, device_is_nonrot)) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); @@ -1603,6 +1612,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (!dm_table_supports_write_same(t)) q->limits.max_write_same_sectors = 0; + if (!dm_table_supports_write_zeroes(t)) + q->limits.max_write_zeroes_sectors = 0; if (dm_table_all_devices_attribute(t, queue_supports_sg_merge)) queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 2b266a2b5035..a5f1916f621a 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3263,7 +3263,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * them down to the data device. The thin device's discard * processing will cause mappings to be removed from the btree. */ - ti->discard_zeroes_data_unsupported = true; if (pf.discard_enabled && pf.discard_passdown) { ti->num_discard_bios = 1; @@ -4119,7 +4118,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->per_io_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ - ti->discard_zeroes_data_unsupported = true; if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; ti->num_discard_bios = 1; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index dfb75979e455..8bf397729bbd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -810,7 +810,6 @@ static void dec_pending(struct dm_io *io, int error) queue_io(md, bio); } else { /* done with normal IO or empty flush */ - trace_block_bio_complete(md->queue, bio, io_error); bio->bi_error = io_error; bio_endio(bio); } @@ -825,6 +824,14 @@ void disable_write_same(struct mapped_device *md) limits->max_write_same_sectors = 0; } +void disable_write_zeroes(struct mapped_device *md) +{ + struct queue_limits *limits = dm_get_queue_limits(md); + + /* device doesn't really support WRITE ZEROES, disable it */ + limits->max_write_zeroes_sectors = 0; +} + static void clone_endio(struct bio *bio) { int error = bio->bi_error; @@ -851,9 +858,14 @@ static void clone_endio(struct bio *bio) } } - if (unlikely(r == -EREMOTEIO && (bio_op(bio) == REQ_OP_WRITE_SAME) && - !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)) - disable_write_same(md); + if (unlikely(r == -EREMOTEIO)) { + if (bio_op(bio) == REQ_OP_WRITE_SAME && + !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors) + disable_write_same(md); + if (bio_op(bio) == REQ_OP_WRITE_ZEROES && + !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) + disable_write_zeroes(md); + } free_tio(tio); dec_pending(io, error); @@ -1202,6 +1214,11 @@ static unsigned get_num_write_same_bios(struct dm_target *ti) return ti->num_write_same_bios; } +static unsigned get_num_write_zeroes_bios(struct dm_target *ti) +{ + return ti->num_write_zeroes_bios; +} + typedef bool (*is_split_required_fn)(struct dm_target *ti); static bool is_split_required_for_discard(struct dm_target *ti) @@ -1256,6 +1273,11 @@ static int __send_write_same(struct clone_info *ci) return __send_changing_extent_only(ci, get_num_write_same_bios, NULL); } +static int __send_write_zeroes(struct clone_info *ci) +{ + return __send_changing_extent_only(ci, get_num_write_zeroes_bios, NULL); +} + /* * Select the correct strategy for processing a non-flush bio. */ @@ -1270,6 +1292,8 @@ static int __split_and_process_non_flush(struct clone_info *ci) return __send_discard(ci); else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) return __send_write_same(ci); + else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES)) + return __send_write_zeroes(ci); ti = dm_table_find_target(ci->map, ci->sector); if (!dm_target_is_valid(ti)) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 3e38e0207a3e..377a8a3672e3 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -293,6 +293,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) split, disk_devt(mddev->gendisk), bio_sector); mddev_check_writesame(mddev, split); + mddev_check_write_zeroes(mddev, split); generic_make_request(split); } } while (split != bio); diff --git a/drivers/md/md.h b/drivers/md/md.h index dde8ecb760c8..1e76d64ce180 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -709,4 +709,11 @@ static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio) !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors) mddev->queue->limits.max_write_same_sectors = 0; } + +static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio) +{ + if (bio_op(bio) == REQ_OP_WRITE_ZEROES && + !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) + mddev->queue->limits.max_write_zeroes_sectors = 0; +} #endif /* _MD_MD_H */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 79a12b59250b..e95d521d93e9 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -139,6 +139,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) mp_bh->bio.bi_end_io = multipath_end_request; mp_bh->bio.bi_private = mp_bh; mddev_check_writesame(mddev, &mp_bh->bio); + mddev_check_write_zeroes(mddev, &mp_bh->bio); generic_make_request(&mp_bh->bio); return; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 93347ca7c7a6..ce7a6a56cf73 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -383,6 +383,7 @@ static int raid0_run(struct mddev *mddev) blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); + blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); @@ -504,6 +505,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) split, disk_devt(mddev->gendisk), bio_sector); mddev_check_writesame(mddev, split); + mddev_check_write_zeroes(mddev, split); generic_make_request(split); } } while (split != bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a34f58772022..b59cc100320a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3177,8 +3177,10 @@ static int raid1_run(struct mddev *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); - if (mddev->queue) + if (mddev->queue) { blk_queue_max_write_same_sectors(mddev->queue, 0); + blk_queue_max_write_zeroes_sectors(mddev->queue, 0); + } rdev_for_each(rdev, mddev) { if (!mddev->gendisk) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e89a8d78a9ed..28ec3a93acee 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3749,6 +3749,7 @@ static int raid10_run(struct mddev *mddev) blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, 0); + blk_queue_max_write_zeroes_sectors(mddev->queue, 0); blk_queue_io_min(mddev->queue, chunk_size); if (conf->geo.raid_disks % conf->geo.near_copies) blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ed5cd705b985..2efdb0d67460 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5031,8 +5031,6 @@ static void raid5_align_endio(struct bio *bi) rdev_dec_pending(rdev, conf->mddev); if (!error) { - trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), - raid_bi, 0); bio_endio(raid_bi); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_quiescent); @@ -7229,7 +7227,6 @@ static int raid5_run(struct mddev *mddev) if (mddev->queue) { int chunk_size; - bool discard_supported = true; /* read-ahead size must cover two whole stripes, which * is 2 * (datadisks) * chunksize where 'n' is the * number of raid devices @@ -7265,48 +7262,32 @@ static int raid5_run(struct mddev *mddev) blk_queue_max_discard_sectors(mddev->queue, 0xfffe * STRIPE_SECTORS); - /* - * unaligned part of discard request will be ignored, so can't - * guarantee discard_zeroes_data - */ - mddev->queue->limits.discard_zeroes_data = 0; - blk_queue_max_write_same_sectors(mddev->queue, 0); + blk_queue_max_write_zeroes_sectors(mddev->queue, 0); rdev_for_each(rdev, mddev) { disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->new_data_offset << 9); - /* - * discard_zeroes_data is required, otherwise data - * could be lost. Consider a scenario: discard a stripe - * (the stripe could be inconsistent if - * discard_zeroes_data is 0); write one disk of the - * stripe (the stripe could be inconsistent again - * depending on which disks are used to calculate - * parity); the disk is broken; The stripe data of this - * disk is lost. - */ - if (!blk_queue_discard(bdev_get_queue(rdev->bdev)) || - !bdev_get_queue(rdev->bdev)-> - limits.discard_zeroes_data) - discard_supported = false; - /* Unfortunately, discard_zeroes_data is not currently - * a guarantee - just a hint. So we only allow DISCARD - * if the sysadmin has confirmed that only safe devices - * are in use by setting a module parameter. - */ - if (!devices_handle_discard_safely) { - if (discard_supported) { - pr_info("md/raid456: discard support disabled due to uncertainty.\n"); - pr_info("Set raid456.devices_handle_discard_safely=Y to override.\n"); - } - discard_supported = false; - } } - if (discard_supported && + /* + * zeroing is required, otherwise data + * could be lost. Consider a scenario: discard a stripe + * (the stripe could be inconsistent if + * discard_zeroes_data is 0); write one disk of the + * stripe (the stripe could be inconsistent again + * depending on which disks are used to calculate + * parity); the disk is broken; The stripe data of this + * disk is lost. + * + * We only allow DISCARD if the sysadmin has confirmed that + * only safe devices are in use by setting a module parameter. + * A better idea might be to turn DISCARD into WRITE_ZEROES + * requests, as that is required to be safe. + */ + if (devices_handle_discard_safely && mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && mddev->queue->limits.discard_granularity >= stripe) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 493eb10ce580..4c54ad34e17a 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -167,8 +167,6 @@ static void mmc_queue_setup_discard(struct request_queue *q, queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); blk_queue_max_discard_sectors(q, max_discard); - if (card->erased_byte == 0 && !mmc_can_discard(card)) - q->limits.discard_zeroes_data = 1; q->limits.discard_granularity = card->pref_erase << 9; /* granularity must not be greater than max. discard */ if (card->pref_erase > max_discard) diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index e992a7f8a16f..2b32b88949ba 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -267,7 +267,7 @@ static void sdio_release_func(struct device *dev) sdio_free_func_cis(func); kfree(func->info); - + kfree(func->tmpbuf); kfree(func); } @@ -282,6 +282,16 @@ struct sdio_func *sdio_alloc_func(struct mmc_card *card) if (!func) return ERR_PTR(-ENOMEM); + /* + * allocate buffer separately to make sure it's properly aligned for + * DMA usage (incl. 64 bit DMA) + */ + func->tmpbuf = kmalloc(4, GFP_KERNEL); + if (!func->tmpbuf) { + kfree(func); + return ERR_PTR(-ENOMEM); + } + func->card = card; device_initialize(&func->dev); diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index a9ac0b457313..8718432751c5 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -22,6 +22,7 @@ #include <linux/ioport.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/stat.h> @@ -1621,10 +1622,16 @@ static void dw_mci_init_card(struct mmc_host *mmc, struct mmc_card *card) if (card->type == MMC_TYPE_SDIO || card->type == MMC_TYPE_SD_COMBO) { - set_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags); + if (!test_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags)) { + pm_runtime_get_noresume(mmc->parent); + set_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags); + } clk_en_a = clk_en_a_old & ~clken_low_pwr; } else { - clear_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags); + if (test_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags)) { + pm_runtime_put_noidle(mmc->parent); + clear_bit(DW_MMC_CARD_NO_LOW_PWR, &slot->flags); + } clk_en_a = clk_en_a_old | clken_low_pwr; } diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 7123ef96ed18..445fc47dc3e7 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -830,6 +830,7 @@ static int esdhc_change_pinstate(struct sdhci_host *host, switch (uhs) { case MMC_TIMING_UHS_SDR50: + case MMC_TIMING_UHS_DDR50: pinctrl = imx_data->pins_100mhz; break; case MMC_TIMING_UHS_SDR104: diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 66a9dedd1062..1517da3ddd7d 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -46,7 +46,7 @@ #include "mtdcore.h" -static struct backing_dev_info *mtd_bdi; +struct backing_dev_info *mtd_bdi; #ifdef CONFIG_PM_SLEEP @@ -496,11 +496,9 @@ int add_mtd_device(struct mtd_info *mtd) * mtd_device_parse_register() multiple times on the same master MTD, * especially with CONFIG_MTD_PARTITIONED_MASTER=y. */ - if (WARN_ONCE(mtd->backing_dev_info, "MTD already registered\n")) + if (WARN_ONCE(mtd->dev.type, "MTD already registered\n")) return -EEXIST; - mtd->backing_dev_info = mtd_bdi; - BUG_ON(mtd->writesize == 0); mutex_lock(&mtd_table_mutex); @@ -1775,13 +1773,18 @@ static struct backing_dev_info * __init mtd_bdi_init(char *name) struct backing_dev_info *bdi; int ret; - bdi = kzalloc(sizeof(*bdi), GFP_KERNEL); + bdi = bdi_alloc(GFP_KERNEL); if (!bdi) return ERR_PTR(-ENOMEM); - ret = bdi_setup_and_register(bdi, name); + bdi->name = name; + /* + * We put '-0' suffix to the name to get the same name format as we + * used to get. Since this is called only once, we get a unique name. + */ + ret = bdi_register(bdi, "%.28s-0", name); if (ret) - kfree(bdi); + bdi_put(bdi); return ret ? ERR_PTR(ret) : bdi; } @@ -1813,8 +1816,7 @@ static int __init init_mtd(void) out_procfs: if (proc_mtd) remove_proc_entry("mtd", NULL); - bdi_destroy(mtd_bdi); - kfree(mtd_bdi); + bdi_put(mtd_bdi); err_bdi: class_unregister(&mtd_class); err_reg: @@ -1828,8 +1830,7 @@ static void __exit cleanup_mtd(void) if (proc_mtd) remove_proc_entry("mtd", NULL); class_unregister(&mtd_class); - bdi_destroy(mtd_bdi); - kfree(mtd_bdi); + bdi_put(mtd_bdi); idr_destroy(&mtd_idr); } diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index 20c02a3b7417..e43fea896d1e 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -18,6 +18,7 @@ #include <linux/ctype.h> #include <linux/slab.h> #include <linux/major.h> +#include <linux/backing-dev.h> /* * compare superblocks to see if they're equivalent @@ -38,6 +39,8 @@ static int get_sb_mtd_compare(struct super_block *sb, void *_mtd) return 0; } +extern struct backing_dev_info *mtd_bdi; + /* * mark the superblock by the MTD device it is using * - set the device number to be the correct MTD block device for pesuperstence @@ -49,7 +52,8 @@ static int get_sb_mtd_set(struct super_block *sb, void *_mtd) sb->s_mtd = mtd; sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, mtd->index); - sb->s_bdi = mtd->backing_dev_info; + sb->s_bdi = bdi_get(mtd_bdi); + return 0; } diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index c80869e60909..51f2be8889b5 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -347,7 +347,7 @@ static int ubiblock_init_request(void *data, struct request *req, return 0; } -static struct blk_mq_ops ubiblock_mq_ops = { +static const struct blk_mq_ops ubiblock_mq_ops = { .queue_rq = ubiblock_queue_rq, .init_request = ubiblock_init_request, }; diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c index 0134ba32a057..39712560b4c1 100644 --- a/drivers/mtd/ubi/upd.c +++ b/drivers/mtd/ubi/upd.c @@ -148,11 +148,11 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, return err; } - if (bytes == 0) { - err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL); - if (err) - return err; + err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL); + if (err) + return err; + if (bytes == 0) { err = clear_update_marker(ubi, vol, 0); if (err) return err; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8a4ba8b88e52..34481c9be1d1 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1104,11 +1104,11 @@ static void bond_compute_features(struct bonding *bond) gso_max_size = min(gso_max_size, slave->dev->gso_max_size); gso_max_segs = min(gso_max_segs, slave->dev->gso_max_segs); } + bond_dev->hard_header_len = max_hard_header_len; done: bond_dev->vlan_features = vlan_features; bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; - bond_dev->hard_header_len = max_hard_header_len; bond_dev->gso_max_segs = gso_max_segs; netif_set_gso_max_size(bond_dev, gso_max_size); diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 138f5ae75c0b..4d1fe8d95042 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -557,7 +557,7 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota) int work_done = 0; u32 stcmd = readl(priv->base + IFI_CANFD_STCMD); - u32 rxstcmd = readl(priv->base + IFI_CANFD_STCMD); + u32 rxstcmd = readl(priv->base + IFI_CANFD_RXSTCMD); u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR); /* Handle bus state changes */ diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index caed4e6960f8..11662f479e76 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -826,8 +826,7 @@ static int rcar_can_probe(struct platform_device *pdev) devm_can_led_init(ndev); - dev_info(&pdev->dev, "device registered (regs @ %p, IRQ%d)\n", - priv->regs, ndev->irq); + dev_info(&pdev->dev, "device registered (IRQ%d)\n", ndev->irq); return 0; fail_candev: diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig index 8483a40e7e9e..5f9e0e6301d0 100644 --- a/drivers/net/can/usb/Kconfig +++ b/drivers/net/can/usb/Kconfig @@ -72,6 +72,8 @@ config CAN_PEAK_USB PCAN-USB Pro dual CAN 2.0b channels USB adapter PCAN-USB FD single CAN-FD channel USB adapter PCAN-USB Pro FD dual CAN-FD channels USB adapter + PCAN-Chip USB CAN-FD to USB stamp module + PCAN-USB X6 6 CAN-FD channels USB adapter (see also http://www.peak-system.com). diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 300349fe8dc0..eecee7f8dfb7 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -739,13 +739,18 @@ static const struct net_device_ops gs_usb_netdev_ops = { static int gs_usb_set_identify(struct net_device *netdev, bool do_identify) { struct gs_can *dev = netdev_priv(netdev); - struct gs_identify_mode imode; + struct gs_identify_mode *imode; int rc; + imode = kmalloc(sizeof(*imode), GFP_KERNEL); + + if (!imode) + return -ENOMEM; + if (do_identify) - imode.mode = GS_CAN_IDENTIFY_ON; + imode->mode = GS_CAN_IDENTIFY_ON; else - imode.mode = GS_CAN_IDENTIFY_OFF; + imode->mode = GS_CAN_IDENTIFY_OFF; rc = usb_control_msg(interface_to_usbdev(dev->iface), usb_sndctrlpipe(interface_to_usbdev(dev->iface), @@ -755,10 +760,12 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify) USB_RECIP_INTERFACE, dev->channel, 0, - &imode, - sizeof(imode), + imode, + sizeof(*imode), 100); + kfree(imode); + return (rc > 0) ? 0 : rc; } diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 0b0302af3bd2..57913dbbae0a 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -39,6 +39,7 @@ static struct usb_device_id peak_usb_table[] = { {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPRO_PRODUCT_ID)}, {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBFD_PRODUCT_ID)}, {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPROFD_PRODUCT_ID)}, + {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBCHIP_PRODUCT_ID)}, {USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBX6_PRODUCT_ID)}, {} /* Terminating entry */ }; @@ -51,6 +52,7 @@ static const struct peak_usb_adapter *const peak_usb_adapters_list[] = { &pcan_usb_pro, &pcan_usb_fd, &pcan_usb_pro_fd, + &pcan_usb_chip, &pcan_usb_x6, }; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h index 3cbfb069893d..c01316cac354 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h @@ -27,6 +27,7 @@ #define PCAN_USBPRO_PRODUCT_ID 0x000d #define PCAN_USBPROFD_PRODUCT_ID 0x0011 #define PCAN_USBFD_PRODUCT_ID 0x0012 +#define PCAN_USBCHIP_PRODUCT_ID 0x0013 #define PCAN_USBX6_PRODUCT_ID 0x0014 #define PCAN_USB_DRIVER_NAME "peak_usb" @@ -90,6 +91,7 @@ struct peak_usb_adapter { extern const struct peak_usb_adapter pcan_usb; extern const struct peak_usb_adapter pcan_usb_pro; extern const struct peak_usb_adapter pcan_usb_fd; +extern const struct peak_usb_adapter pcan_usb_chip; extern const struct peak_usb_adapter pcan_usb_pro_fd; extern const struct peak_usb_adapter pcan_usb_x6; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 304732550f0a..528d3bb4917f 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -1061,6 +1061,78 @@ const struct peak_usb_adapter pcan_usb_fd = { .do_get_berr_counter = pcan_usb_fd_get_berr_counter, }; +/* describes the PCAN-CHIP USB */ +static const struct can_bittiming_const pcan_usb_chip_const = { + .name = "pcan_chip_usb", + .tseg1_min = 1, + .tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS), + .tseg2_min = 1, + .tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS), + .sjw_max = (1 << PUCAN_TSLOW_SJW_BITS), + .brp_min = 1, + .brp_max = (1 << PUCAN_TSLOW_BRP_BITS), + .brp_inc = 1, +}; + +static const struct can_bittiming_const pcan_usb_chip_data_const = { + .name = "pcan_chip_usb", + .tseg1_min = 1, + .tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS), + .tseg2_min = 1, + .tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS), + .sjw_max = (1 << PUCAN_TFAST_SJW_BITS), + .brp_min = 1, + .brp_max = (1 << PUCAN_TFAST_BRP_BITS), + .brp_inc = 1, +}; + +const struct peak_usb_adapter pcan_usb_chip = { + .name = "PCAN-Chip USB", + .device_id = PCAN_USBCHIP_PRODUCT_ID, + .ctrl_count = PCAN_USBFD_CHANNEL_COUNT, + .ctrlmode_supported = CAN_CTRLMODE_FD | + CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY, + .clock = { + .freq = PCAN_UFD_CRYSTAL_HZ, + }, + .bittiming_const = &pcan_usb_chip_const, + .data_bittiming_const = &pcan_usb_chip_data_const, + + /* size of device private data */ + .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), + + /* timestamps usage */ + .ts_used_bits = 32, + .ts_period = 1000000, /* calibration period in ts. */ + .us_per_ts_scale = 1, /* us = (ts * scale) >> shift */ + .us_per_ts_shift = 0, + + /* give here messages in/out endpoints */ + .ep_msg_in = PCAN_USBPRO_EP_MSGIN, + .ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0}, + + /* size of rx/tx usb buffers */ + .rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE, + .tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE, + + /* device callbacks */ + .intf_probe = pcan_usb_pro_probe, /* same as PCAN-USB Pro */ + .dev_init = pcan_usb_fd_init, + + .dev_exit = pcan_usb_fd_exit, + .dev_free = pcan_usb_fd_free, + .dev_set_bus = pcan_usb_fd_set_bus, + .dev_set_bittiming = pcan_usb_fd_set_bittiming_slow, + .dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast, + .dev_decode_buf = pcan_usb_fd_decode_buf, + .dev_start = pcan_usb_fd_start, + .dev_stop = pcan_usb_fd_stop, + .dev_restart_async = pcan_usb_fd_restart_async, + .dev_encode_msg = pcan_usb_fd_encode_msg, + + .do_get_berr_counter = pcan_usb_fd_get_berr_counter, +}; + /* describes the PCAN-USB Pro FD adapter */ static const struct can_bittiming_const pcan_usb_pro_fd_const = { .name = "pcan_usb_pro_fd", diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 8cf4801994e8..fa0eece21eef 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -326,6 +326,7 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid, static void b53_set_forwarding(struct b53_device *dev, int enable) { + struct dsa_switch *ds = dev->ds; u8 mgmt; b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt); @@ -336,6 +337,15 @@ static void b53_set_forwarding(struct b53_device *dev, int enable) mgmt &= ~SM_SW_FWD_EN; b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt); + + /* Include IMP port in dumb forwarding mode when no tagging protocol is + * set + */ + if (ds->ops->get_tag_protocol(ds) == DSA_TAG_PROTO_NONE) { + b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, &mgmt); + mgmt |= B53_MII_DUMB_FWDG_EN; + b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt); + } } static void b53_enable_vlan(struct b53_device *dev, bool enable) @@ -598,7 +608,8 @@ static void b53_switch_reset_gpio(struct b53_device *dev) static int b53_switch_reset(struct b53_device *dev) { - u8 mgmt; + unsigned int timeout = 1000; + u8 mgmt, reg; b53_switch_reset_gpio(dev); @@ -607,6 +618,28 @@ static int b53_switch_reset(struct b53_device *dev) b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, 0x00); } + /* This is specific to 58xx devices here, do not use is58xx() which + * covers the larger Starfigther 2 family, including 7445/7278 which + * still use this driver as a library and need to perform the reset + * earlier. + */ + if (dev->chip_id == BCM58XX_DEVICE_ID) { + b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, ®); + reg |= SW_RST | EN_SW_RST | EN_CH_RST; + b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, reg); + + do { + b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, ®); + if (!(reg & SW_RST)) + break; + + usleep_range(1000, 2000); + } while (timeout-- > 0); + + if (timeout == 0) + return -ETIMEDOUT; + } + b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &mgmt); if (!(mgmt & SM_SW_FWD_EN)) { @@ -1731,7 +1764,7 @@ static const struct b53_chip_data b53_switch_chips[] = { .vlans = 4096, .enabled_ports = 0x1ff, .arl_entries = 4, - .cpu_port = B53_CPU_PORT_25, + .cpu_port = B53_CPU_PORT, .vta_regs = B53_VTA_REGS, .duplex_reg = B53_DUPLEX_STAT_GE, .jumbo_pm_reg = B53_JUMBO_PORT_MASK, diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 9fd24c418fa4..e5c86d44667a 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -104,6 +104,10 @@ #define B53_UC_FWD_EN BIT(6) #define B53_MC_FWD_EN BIT(7) +/* Switch control (8 bit) */ +#define B53_SWITCH_CTRL 0x22 +#define B53_MII_DUMB_FWDG_EN BIT(6) + /* (16 bit) */ #define B53_UC_FLOOD_MASK 0x32 #define B53_MC_FLOOD_MASK 0x34 @@ -139,6 +143,7 @@ /* Software reset register (8 bit) */ #define B53_SOFTRESET 0x79 #define SW_RST BIT(7) +#define EN_CH_RST BIT(6) #define EN_SW_RST BIT(4) /* Fast Aging Control register (8 bit) */ diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 64a1095e4d14..a0ca68ce3fbb 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -134,6 +134,7 @@ static void set_max_bgx_per_node(struct pci_dev *pdev) pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid); switch (sdevid) { case PCI_SUBSYS_DEVID_81XX_BGX: + case PCI_SUBSYS_DEVID_81XX_RGX: max_bgx_per_node = MAX_BGX_PER_CN81XX; break; case PCI_SUBSYS_DEVID_83XX_BGX: diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index c5080f2cead5..6b7fe6fdd13b 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -16,6 +16,7 @@ /* Subsystem device IDs */ #define PCI_SUBSYS_DEVID_88XX_BGX 0xA126 #define PCI_SUBSYS_DEVID_81XX_BGX 0xA226 +#define PCI_SUBSYS_DEVID_81XX_RGX 0xA254 #define PCI_SUBSYS_DEVID_83XX_BGX 0xA326 #define MAX_BGX_THUNDER 8 /* Max 2 nodes, 4 per node */ diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 9e757684816d..93949139e62c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -613,7 +613,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; struct mtk_tx_dma *itxd, *txd; - struct mtk_tx_buf *tx_buf; + struct mtk_tx_buf *itx_buf, *tx_buf; dma_addr_t mapped_addr; unsigned int nr_frags; int i, n_desc = 1; @@ -627,8 +627,8 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT; txd4 |= fport; - tx_buf = mtk_desc_to_tx_buf(ring, itxd); - memset(tx_buf, 0, sizeof(*tx_buf)); + itx_buf = mtk_desc_to_tx_buf(ring, itxd); + memset(itx_buf, 0, sizeof(*itx_buf)); if (gso) txd4 |= TX_DMA_TSO; @@ -647,9 +647,11 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, return -ENOMEM; WRITE_ONCE(itxd->txd1, mapped_addr); - tx_buf->flags |= MTK_TX_FLAGS_SINGLE0; - dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); - dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb)); + itx_buf->flags |= MTK_TX_FLAGS_SINGLE0; + itx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 : + MTK_TX_FLAGS_FPORT1; + dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr); + dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb)); /* TX SG offload */ txd = itxd; @@ -685,11 +687,13 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, last_frag * TX_DMA_LS0)); WRITE_ONCE(txd->txd4, fport); - tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC; tx_buf = mtk_desc_to_tx_buf(ring, txd); memset(tx_buf, 0, sizeof(*tx_buf)); - + tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC; tx_buf->flags |= MTK_TX_FLAGS_PAGE0; + tx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 : + MTK_TX_FLAGS_FPORT1; + dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); dma_unmap_len_set(tx_buf, dma_len0, frag_map_size); frag_size -= frag_map_size; @@ -698,7 +702,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, } /* store skb to cleanup */ - tx_buf->skb = skb; + itx_buf->skb = skb; WRITE_ONCE(itxd->txd4, txd4); WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) | @@ -1012,17 +1016,16 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) while ((cpu != dma) && budget) { u32 next_cpu = desc->txd2; - int mac; + int mac = 0; desc = mtk_qdma_phys_to_virt(ring, desc->txd2); if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0) break; - mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) & - TX_DMA_FPORT_MASK; - mac--; - tx_buf = mtk_desc_to_tx_buf(ring, desc); + if (tx_buf->flags & MTK_TX_FLAGS_FPORT1) + mac = 1; + skb = tx_buf->skb; if (!skb) { condition = 1; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 99b1c8e9f16f..08285a96ff70 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -406,12 +406,18 @@ struct mtk_hw_stats { struct u64_stats_sync syncp; }; -/* PDMA descriptor can point at 1-2 segments. This enum allows us to track how - * memory was allocated so that it can be freed properly - */ enum mtk_tx_flags { + /* PDMA descriptor can point at 1-2 segments. This enum allows us to + * track how memory was allocated so that it can be freed properly. + */ MTK_TX_FLAGS_SINGLE0 = 0x01, MTK_TX_FLAGS_PAGE0 = 0x02, + + /* MTK_TX_FLAGS_FPORTx allows tracking which port the transmitted + * SKB out instead of looking up through hardware TX descriptor. + */ + MTK_TX_FLAGS_FPORT0 = 0x04, + MTK_TX_FLAGS_FPORT1 = 0x08, }; /* This enum allows us to identify how the clock is defined on the array of the diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index dc52053128bc..3d9490cd2db1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -90,7 +90,7 @@ #define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX) #define MLX5_UMR_ALIGN (2048) -#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) +#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (256) #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_DEFAULT_LRO_TIMEOUT 32 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index d55fff0ba388..26fc77e80f7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -564,6 +564,7 @@ int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *i int idx = 0; int err = 0; + info->data = MAX_NUM_OF_ETHTOOL_RULES; while ((!err || err == -ENOENT) && idx < info->rule_cnt) { err = mlx5e_ethtool_get_flow(priv, info, location); if (!err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 66c133757a5e..15cc7b469d2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -174,7 +174,7 @@ unlock: static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats *s = &priv->stats.sw; + struct mlx5e_sw_stats temp, *s = &temp; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; u64 tx_offload_none = 0; @@ -229,6 +229,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->link_down_events_phy = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, counter_set.phys_layer_cntrs.link_down_events); + memcpy(&priv->stats.sw, s, sizeof(*s)); } static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) @@ -243,7 +244,6 @@ static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, other_vport, 0); - memset(out, 0, outlen); mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fade7233dac5..5436866798f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -639,7 +639,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) && rep->vport != FDB_UPLINK_VPORT) { - if (min_inline > esw->offloads.inline_mode) { + if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && + esw->offloads.inline_mode < min_inline) { netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", min_inline, esw->offloads.inline_mode); @@ -785,16 +786,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, return 0; } -static int gen_vxlan_header_ipv4(struct net_device *out_dev, - char buf[], - unsigned char h_dest[ETH_ALEN], - int ttl, - __be32 daddr, - __be32 saddr, - __be16 udp_dst_port, - __be32 vx_vni) +static void gen_vxlan_header_ipv4(struct net_device *out_dev, + char buf[], int encap_size, + unsigned char h_dest[ETH_ALEN], + int ttl, + __be32 daddr, + __be32 saddr, + __be16 udp_dst_port, + __be32 vx_vni) { - int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN; struct ethhdr *eth = (struct ethhdr *)buf; struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr)); @@ -817,20 +817,17 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev, udp->dest = udp_dst_port; vxh->vx_flags = VXLAN_HF_VNI; vxh->vx_vni = vxlan_vni_field(vx_vni); - - return encap_size; } -static int gen_vxlan_header_ipv6(struct net_device *out_dev, - char buf[], - unsigned char h_dest[ETH_ALEN], - int ttl, - struct in6_addr *daddr, - struct in6_addr *saddr, - __be16 udp_dst_port, - __be32 vx_vni) +static void gen_vxlan_header_ipv6(struct net_device *out_dev, + char buf[], int encap_size, + unsigned char h_dest[ETH_ALEN], + int ttl, + struct in6_addr *daddr, + struct in6_addr *saddr, + __be16 udp_dst_port, + __be32 vx_vni) { - int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN; struct ethhdr *eth = (struct ethhdr *)buf; struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); @@ -852,8 +849,6 @@ static int gen_vxlan_header_ipv6(struct net_device *out_dev, udp->dest = udp_dst_port; vxh->vx_flags = VXLAN_HF_VNI; vxh->vx_vni = vxlan_vni_field(vx_vni); - - return encap_size; } static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, @@ -862,13 +857,20 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device **out_dev) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; - int encap_size, ttl, err; struct neighbour *n = NULL; struct flowi4 fl4 = {}; char *encap_header; + int ttl, err; + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + return -EOPNOTSUPP; + } - encap_header = kzalloc(max_encap_size, GFP_KERNEL); + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); if (!encap_header) return -ENOMEM; @@ -903,11 +905,11 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, - e->h_dest, ttl, - fl4.daddr, - fl4.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); + gen_vxlan_header_ipv4(*out_dev, encap_header, + ipv4_encap_size, e->h_dest, ttl, + fl4.daddr, + fl4.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); break; default: err = -EOPNOTSUPP; @@ -915,7 +917,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - encap_size, encap_header, &e->encap_id); + ipv4_encap_size, encap_header, &e->encap_id); out: if (err && n) neigh_release(n); @@ -930,13 +932,20 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; - int encap_size, err, ttl = 0; struct neighbour *n = NULL; struct flowi6 fl6 = {}; char *encap_header; + int err, ttl = 0; + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + return -EOPNOTSUPP; + } - encap_header = kzalloc(max_encap_size, GFP_KERNEL); + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); if (!encap_header) return -ENOMEM; @@ -972,11 +981,11 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header, - e->h_dest, ttl, - &fl6.daddr, - &fl6.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); + gen_vxlan_header_ipv6(*out_dev, encap_header, + ipv6_encap_size, e->h_dest, ttl, + &fl6.daddr, + &fl6.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); break; default: err = -EOPNOTSUPP; @@ -984,7 +993,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - encap_size, encap_header, &e->encap_id); + ipv6_encap_size, encap_header, &e->encap_id); out: if (err && n) neigh_release(n); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 307ec6c5fd3b..d111cebca9f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -911,8 +911,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; int num_vports = esw->enabled_vports; - int err; - int vport; + int err, vport; u8 mlx5_mode; if (!MLX5_CAP_GEN(dev, vport_group_manager)) @@ -921,9 +920,17 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - if (MLX5_CAP_ETH(dev, wqe_inline_mode) != - MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) + return 0; + /* fall through */ + case MLX5_CAP_INLINE_MODE_L2: + esw_warn(dev, "Inline mode can't be set\n"); return -EOPNOTSUPP; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + break; + } if (esw->offloads.num_flows > 0) { esw_warn(dev, "Can't set inline mode when flows are configured\n"); @@ -966,18 +973,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - if (MLX5_CAP_ETH(dev, wqe_inline_mode) != - MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) - return -EOPNOTSUPP; - return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) { + u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; struct mlx5_core_dev *dev = esw->dev; int vport; - u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; if (!MLX5_CAP_GEN(dev, vport_group_manager)) return -EOPNOTSUPP; @@ -985,10 +988,18 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - if (MLX5_CAP_ETH(dev, wqe_inline_mode) != - MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) - return -EOPNOTSUPP; + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + mlx5_mode = MLX5_INLINE_MODE_NONE; + goto out; + case MLX5_CAP_INLINE_MODE_L2: + mlx5_mode = MLX5_INLINE_MODE_L2; + goto out; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + goto query_vports; + } +query_vports: for (vport = 1; vport <= nvfs; vport++) { mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); if (vport > 1 && prev_mlx5_mode != mlx5_mode) @@ -996,6 +1007,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) prev_mlx5_mode = mlx5_mode; } +out: *mode = mlx5_mode; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 60154a175bd3..0ad66324247f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1029,7 +1029,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (err) { dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n", FW_INIT_TIMEOUT_MILI); - goto out_err; + goto err_cmd_cleanup; } err = mlx5_core_enable_hca(dev, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 2e6b0f290ddc..222b25908d01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -87,6 +87,7 @@ static void up_rel_func(struct kref *kref) struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count); list_del(&up->list); + iounmap(up->map); if (mlx5_cmd_free_uar(up->mdev, up->index)) mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); kfree(up->reg_bitmap); diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 5bd36a4a8fcd..cfdadb658ade 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -64,11 +64,11 @@ ((u32)(prio_tc_tbl >> ((7 - prio) * 4)) & 0x7) static const struct qed_dcbx_app_metadata qed_dcbx_app_update[] = { - {DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_DEFAULT}, - {DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH} + {DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_ISCSI}, + {DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_FCOE}, + {DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_ETH_ROCE}, + {DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_ETH_ROCE}, + {DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH}, }; static bool qed_dcbx_app_ethtype(u32 app_info_bitmap) @@ -583,6 +583,13 @@ qed_dcbx_get_ets_data(struct qed_hwfn *p_hwfn, p_params->ets_cbs, p_ets->pri_tc_tbl[0], p_params->max_ets_tc); + if (p_params->ets_enabled && !p_params->max_ets_tc) { + p_params->max_ets_tc = QED_MAX_PFC_PRIORITIES; + DP_VERBOSE(p_hwfn, QED_MSG_DCB, + "ETS params: max_ets_tc is forced to %d\n", + p_params->max_ets_tc); + } + /* 8 bit tsa and bw data corresponding to each of the 8 TC's are * encoded in a type u32 array of size 2. */ @@ -1001,6 +1008,8 @@ qed_dcbx_set_pfc_data(struct qed_hwfn *p_hwfn, u8 pfc_map = 0; int i; + *pfc &= ~DCBX_PFC_ERROR_MASK; + if (p_params->pfc.willing) *pfc |= DCBX_PFC_WILLING_MASK; else @@ -1255,7 +1264,7 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn, { struct qed_dcbx_get *dcbx_info; - dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL); + dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_ATOMIC); if (!dcbx_info) return NULL; @@ -2073,6 +2082,8 @@ static int qed_dcbnl_ieee_setpfc(struct qed_dev *cdev, struct ieee_pfc *pfc) for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) dcbx_set.config.params.pfc.prio[i] = !!(pfc->pfc_en & BIT(i)); + dcbx_set.config.params.pfc.max_tc = pfc->pfc_cap; + ptt = qed_ptt_acquire(hwfn); if (!ptt) return -EINVAL; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 8cfc4a54f2dc..3cd7989c007d 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1516,11 +1516,12 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) spin_unlock_irqrestore(&priv->lock, flags); return NETDEV_TX_BUSY; } - entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC); - priv->tx_skb[q][entry / NUM_TX_DESC] = skb; if (skb_put_padto(skb, ETH_ZLEN)) - goto drop; + goto exit; + + entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC); + priv->tx_skb[q][entry / NUM_TX_DESC] = skb; buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) + entry / NUM_TX_DESC * DPTR_ALIGN; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 54248775f227..f68c4db656ed 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1127,12 +1127,70 @@ static struct mdiobb_ops bb_ops = { .get_mdio_data = sh_get_mdio, }; +/* free Tx skb function */ +static int sh_eth_tx_free(struct net_device *ndev, bool sent_only) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); + struct sh_eth_txdesc *txdesc; + int free_num = 0; + int entry; + bool sent; + + for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) { + entry = mdp->dirty_tx % mdp->num_tx_ring; + txdesc = &mdp->tx_ring[entry]; + sent = !(txdesc->status & cpu_to_le32(TD_TACT)); + if (sent_only && !sent) + break; + /* TACT bit must be checked before all the following reads */ + dma_rmb(); + netif_info(mdp, tx_done, ndev, + "tx entry %d status 0x%08x\n", + entry, le32_to_cpu(txdesc->status)); + /* Free the original skb. */ + if (mdp->tx_skbuff[entry]) { + dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr), + le32_to_cpu(txdesc->len) >> 16, + DMA_TO_DEVICE); + dev_kfree_skb_irq(mdp->tx_skbuff[entry]); + mdp->tx_skbuff[entry] = NULL; + free_num++; + } + txdesc->status = cpu_to_le32(TD_TFP); + if (entry >= mdp->num_tx_ring - 1) + txdesc->status |= cpu_to_le32(TD_TDLE); + + if (sent) { + ndev->stats.tx_packets++; + ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16; + } + } + return free_num; +} + /* free skb and descriptor buffer */ static void sh_eth_ring_free(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); int ringsize, i; + if (mdp->rx_ring) { + for (i = 0; i < mdp->num_rx_ring; i++) { + if (mdp->rx_skbuff[i]) { + struct sh_eth_rxdesc *rxdesc = &mdp->rx_ring[i]; + + dma_unmap_single(&ndev->dev, + le32_to_cpu(rxdesc->addr), + ALIGN(mdp->rx_buf_sz, 32), + DMA_FROM_DEVICE); + } + } + ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring; + dma_free_coherent(NULL, ringsize, mdp->rx_ring, + mdp->rx_desc_dma); + mdp->rx_ring = NULL; + } + /* Free Rx skb ringbuffer */ if (mdp->rx_skbuff) { for (i = 0; i < mdp->num_rx_ring; i++) @@ -1141,27 +1199,18 @@ static void sh_eth_ring_free(struct net_device *ndev) kfree(mdp->rx_skbuff); mdp->rx_skbuff = NULL; - /* Free Tx skb ringbuffer */ - if (mdp->tx_skbuff) { - for (i = 0; i < mdp->num_tx_ring; i++) - dev_kfree_skb(mdp->tx_skbuff[i]); - } - kfree(mdp->tx_skbuff); - mdp->tx_skbuff = NULL; - - if (mdp->rx_ring) { - ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring; - dma_free_coherent(NULL, ringsize, mdp->rx_ring, - mdp->rx_desc_dma); - mdp->rx_ring = NULL; - } - if (mdp->tx_ring) { + sh_eth_tx_free(ndev, false); + ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring; dma_free_coherent(NULL, ringsize, mdp->tx_ring, mdp->tx_desc_dma); mdp->tx_ring = NULL; } + + /* Free Tx skb ringbuffer */ + kfree(mdp->tx_skbuff); + mdp->tx_skbuff = NULL; } /* format skb and descriptor buffer */ @@ -1409,43 +1458,6 @@ static void sh_eth_dev_exit(struct net_device *ndev) update_mac_address(ndev); } -/* free Tx skb function */ -static int sh_eth_txfree(struct net_device *ndev) -{ - struct sh_eth_private *mdp = netdev_priv(ndev); - struct sh_eth_txdesc *txdesc; - int free_num = 0; - int entry; - - for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) { - entry = mdp->dirty_tx % mdp->num_tx_ring; - txdesc = &mdp->tx_ring[entry]; - if (txdesc->status & cpu_to_le32(TD_TACT)) - break; - /* TACT bit must be checked before all the following reads */ - dma_rmb(); - netif_info(mdp, tx_done, ndev, - "tx entry %d status 0x%08x\n", - entry, le32_to_cpu(txdesc->status)); - /* Free the original skb. */ - if (mdp->tx_skbuff[entry]) { - dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr), - le32_to_cpu(txdesc->len) >> 16, - DMA_TO_DEVICE); - dev_kfree_skb_irq(mdp->tx_skbuff[entry]); - mdp->tx_skbuff[entry] = NULL; - free_num++; - } - txdesc->status = cpu_to_le32(TD_TFP); - if (entry >= mdp->num_tx_ring - 1) - txdesc->status |= cpu_to_le32(TD_TDLE); - - ndev->stats.tx_packets++; - ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16; - } - return free_num; -} - /* Packet receive function */ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) { @@ -1690,7 +1702,7 @@ static void sh_eth_error(struct net_device *ndev, u32 intr_status) intr_status, mdp->cur_tx, mdp->dirty_tx, (u32)ndev->state, edtrr); /* dirty buffer free */ - sh_eth_txfree(ndev); + sh_eth_tx_free(ndev, true); /* SH7712 BUG */ if (edtrr ^ sh_eth_get_edtrr_trns(mdp)) { @@ -1751,7 +1763,7 @@ static irqreturn_t sh_eth_interrupt(int irq, void *netdev) /* Clear Tx interrupts */ sh_eth_write(ndev, intr_status & cd->tx_check, EESR); - sh_eth_txfree(ndev); + sh_eth_tx_free(ndev, true); netif_wake_queue(ndev); } @@ -2412,7 +2424,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev) spin_lock_irqsave(&mdp->lock, flags); if ((mdp->cur_tx - mdp->dirty_tx) >= (mdp->num_tx_ring - 4)) { - if (!sh_eth_txfree(ndev)) { + if (!sh_eth_tx_free(ndev, true)) { netif_warn(mdp, tx_queued, ndev, "TxFD exhausted.\n"); netif_stop_queue(ndev); spin_unlock_irqrestore(&mdp->lock, flags); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 50d28261b6b9..b9cb697b2818 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1371,6 +1371,13 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx) free_cpumask_var(thread_mask); } + if (count > EFX_MAX_RX_QUEUES) { + netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, + "Reducing number of rx queues from %u to %u.\n", + count, EFX_MAX_RX_QUEUES); + count = EFX_MAX_RX_QUEUES; + } + /* If RSS is requested for the PF *and* VFs then we can't write RSS * table entries that are inaccessible to VFs */ diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index ee14662415c5..a0c52e328102 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -74,7 +74,10 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); #define EFX_RXQ_MIN_ENT 128U #define EFX_TXQ_MIN_ENT(efx) (2 * efx_tx_max_skb_descs(efx)) -#define EFX_TXQ_MAX_ENT(efx) (EFX_WORKAROUND_35388(efx) ? \ +/* All EF10 architecture NICs steal one bit of the DMAQ size for various + * other purposes when counting TxQ entries, so we halve the queue size. + */ +#define EFX_TXQ_MAX_ENT(efx) (EFX_WORKAROUND_EF10(efx) ? \ EFX_MAX_DMAQ_SIZE / 2 : EFX_MAX_DMAQ_SIZE) static inline bool efx_rss_enabled(struct efx_nic *efx) diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index f5e5cd1659a1..29614da91cbf 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -1354,6 +1354,13 @@ static unsigned int ef4_wanted_parallelism(struct ef4_nic *efx) free_cpumask_var(thread_mask); } + if (count > EF4_MAX_RX_QUEUES) { + netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, + "Reducing number of rx queues from %u to %u.\n", + count, EF4_MAX_RX_QUEUES); + count = EF4_MAX_RX_QUEUES; + } + return count; } diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h index 103f827a1623..c67fa18b8121 100644 --- a/drivers/net/ethernet/sfc/workarounds.h +++ b/drivers/net/ethernet/sfc/workarounds.h @@ -16,6 +16,7 @@ */ #define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0) +#define EFX_WORKAROUND_EF10(efx) (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) #define EFX_WORKAROUND_10G(efx) 1 /* Bit-bashed I2C reads cause performance drop */ diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 9e631952b86f..48a541eb0af2 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -76,7 +76,7 @@ config TI_CPSW config TI_CPTS bool "TI Common Platform Time Sync (CPTS) Support" depends on TI_CPSW || TI_KEYSTONE_NETCP - depends on PTP_1588_CLOCK + depends on POSIX_TIMERS ---help--- This driver supports the Common Platform Time Sync unit of the CPSW Ethernet Switch and Keystone 2 1g/10g Switch Subsystem. @@ -87,6 +87,8 @@ config TI_CPTS_MOD tristate depends on TI_CPTS default y if TI_CPSW=y || TI_KEYSTONE_NETCP=y + select NET_PTP_CLASSIFY + imply PTP_1588_CLOCK default m config TI_KEYSTONE_NETCP diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index a45f98fa4aa7..3dadee1080b9 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -1017,8 +1017,8 @@ tc35815_free_queues(struct net_device *dev) BUG_ON(lp->tx_skbs[i].skb != skb); #endif if (skb) { - dev_kfree_skb(skb); pci_unmap_single(lp->pci_dev, lp->tx_skbs[i].skb_dma, skb->len, PCI_DMA_TODEVICE); + dev_kfree_skb(skb); lp->tx_skbs[i].skb = NULL; lp->tx_skbs[i].skb_dma = 0; } diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index f9f3dba7a588..db23cb36ae5c 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -751,7 +751,6 @@ struct netvsc_device { u32 send_section_cnt; u32 send_section_size; unsigned long *send_section_map; - int map_words; /* Used for NetVSP initialization protocol */ struct completion channel_init_wait; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 8dd0b8770328..15ef713d96c0 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -236,6 +236,7 @@ static int netvsc_init_buf(struct hv_device *device) struct netvsc_device *net_device; struct nvsp_message *init_packet; struct net_device *ndev; + size_t map_words; int node; net_device = get_outbound_net_device(device); @@ -401,11 +402,9 @@ static int netvsc_init_buf(struct hv_device *device) net_device->send_section_size, net_device->send_section_cnt); /* Setup state for managing the send buffer. */ - net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt, - BITS_PER_LONG); + map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG); - net_device->send_section_map = kcalloc(net_device->map_words, - sizeof(ulong), GFP_KERNEL); + net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL); if (net_device->send_section_map == NULL) { ret = -ENOMEM; goto cleanup; @@ -683,7 +682,7 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) unsigned long *map_addr = net_device->send_section_map; unsigned int i; - for_each_clear_bit(i, map_addr, net_device->map_words) { + for_each_clear_bit(i, map_addr, net_device->send_section_cnt) { if (sync_test_and_set_bit(i, map_addr) == 0) return i; } diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index ff0a5ed3ca80..49ce4e9f4a0f 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -617,7 +617,8 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err) static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm, unsigned char **iv, - struct scatterlist **sg) + struct scatterlist **sg, + int num_frags) { size_t size, iv_offset, sg_offset; struct aead_request *req; @@ -629,7 +630,7 @@ static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm, size = ALIGN(size, __alignof__(struct scatterlist)); sg_offset = size; - size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1); + size += sizeof(struct scatterlist) * num_frags; tmp = kmalloc(size, GFP_ATOMIC); if (!tmp) @@ -649,6 +650,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, { int ret; struct scatterlist *sg; + struct sk_buff *trailer; unsigned char *iv; struct ethhdr *eth; struct macsec_eth_header *hh; @@ -723,7 +725,14 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, return ERR_PTR(-EINVAL); } - req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg); + ret = skb_cow_data(skb, 0, &trailer); + if (unlikely(ret < 0)) { + macsec_txsa_put(tx_sa); + kfree_skb(skb); + return ERR_PTR(ret); + } + + req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg, ret); if (!req) { macsec_txsa_put(tx_sa); kfree_skb(skb); @@ -732,7 +741,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, macsec_fill_iv(iv, secy->sci, pn); - sg_init_table(sg, MAX_SKB_FRAGS + 1); + sg_init_table(sg, ret); skb_to_sgvec(skb, sg, 0, skb->len); if (tx_sc->encrypt) { @@ -917,6 +926,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, { int ret; struct scatterlist *sg; + struct sk_buff *trailer; unsigned char *iv; struct aead_request *req; struct macsec_eth_header *hdr; @@ -927,7 +937,12 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, if (!skb) return ERR_PTR(-ENOMEM); - req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg); + ret = skb_cow_data(skb, 0, &trailer); + if (unlikely(ret < 0)) { + kfree_skb(skb); + return ERR_PTR(ret); + } + req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg, ret); if (!req) { kfree_skb(skb); return ERR_PTR(-ENOMEM); @@ -936,7 +951,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, hdr = (struct macsec_eth_header *)skb->data; macsec_fill_iv(iv, sci, ntohl(hdr->packet_number)); - sg_init_table(sg, MAX_SKB_FRAGS + 1); + sg_init_table(sg, ret); skb_to_sgvec(skb, sg, 0, skb->len); if (hdr->tci_an & MACSEC_TCI_E) { diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 9261722960a7..b34eaaae03fd 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1139,6 +1139,7 @@ static int macvlan_port_create(struct net_device *dev) static void macvlan_port_destroy(struct net_device *dev) { struct macvlan_port *port = macvlan_port_get_rtnl(dev); + struct sk_buff *skb; dev->priv_flags &= ~IFF_MACVLAN_PORT; netdev_rx_handler_unregister(dev); @@ -1147,7 +1148,15 @@ static void macvlan_port_destroy(struct net_device *dev) * but we need to cancel it and purge left skbs if any. */ cancel_work_sync(&port->bc_work); - __skb_queue_purge(&port->bc_queue); + + while ((skb = __skb_dequeue(&port->bc_queue))) { + const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src; + + if (src) + dev_put(src->dev); + + kfree_skb(skb); + } kfree(port); } diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index e2460a57e4b1..ed0d10f54f26 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1438,8 +1438,6 @@ static bool dp83640_rxtstamp(struct phy_device *phydev, skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT; skb_queue_tail(&dp83640->rx_queue, skb); schedule_delayed_work(&dp83640->ts_work, SKB_TIMESTAMP_TIMEOUT); - } else { - netif_rx_ni(skb); } return true; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 6742070ca676..da5b39268370 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -297,17 +297,6 @@ static int kszphy_config_init(struct phy_device *phydev) if (priv->led_mode >= 0) kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode); - if (phy_interrupt_is_valid(phydev)) { - int ctl = phy_read(phydev, MII_BMCR); - - if (ctl < 0) - return ctl; - - ret = phy_write(phydev, MII_BMCR, ctl & ~BMCR_ANENABLE); - if (ret < 0) - return ret; - } - return 0; } @@ -798,9 +787,6 @@ static struct phy_driver ksphy_driver[] = { .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, - .get_sset_count = kszphy_get_sset_count, - .get_strings = kszphy_get_strings, - .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -940,9 +926,6 @@ static struct phy_driver ksphy_driver[] = { .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, - .get_sset_count = kszphy_get_sset_count, - .get_strings = kszphy_get_strings, - .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -952,6 +935,7 @@ static struct phy_driver ksphy_driver[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, .driver_data = &ksz9021_type, + .probe = kszphy_probe, .config_init = ksz9021_config_init, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, @@ -971,6 +955,7 @@ static struct phy_driver ksphy_driver[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, .driver_data = &ksz9021_type, + .probe = kszphy_probe, .config_init = ksz9031_config_init, .config_aneg = genphy_config_aneg, .read_status = ksz9031_read_status, @@ -989,9 +974,6 @@ static struct phy_driver ksphy_driver[] = { .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, .read_status = ksz8873mll_read_status, - .get_sset_count = kszphy_get_sset_count, - .get_strings = kszphy_get_strings, - .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -1003,9 +985,6 @@ static struct phy_driver ksphy_driver[] = { .config_init = kszphy_config_init, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, - .get_sset_count = kszphy_get_sset_count, - .get_strings = kszphy_get_strings, - .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -1017,9 +996,6 @@ static struct phy_driver ksphy_driver[] = { .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, .read_status = ksz8873mll_read_status, - .get_sset_count = kszphy_get_sset_count, - .get_strings = kszphy_get_strings, - .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, } }; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a2bfc82e95d7..97ff1278167b 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -591,16 +591,18 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) EXPORT_SYMBOL(phy_mii_ioctl); /** - * phy_start_aneg - start auto-negotiation for this PHY device + * phy_start_aneg_priv - start auto-negotiation for this PHY device * @phydev: the phy_device struct + * @sync: indicate whether we should wait for the workqueue cancelation * * Description: Sanitizes the settings (if we're not autonegotiating * them), and then calls the driver's config_aneg function. * If the PHYCONTROL Layer is operating, we change the state to * reflect the beginning of Auto-negotiation or forcing. */ -int phy_start_aneg(struct phy_device *phydev) +static int phy_start_aneg_priv(struct phy_device *phydev, bool sync) { + bool trigger = 0; int err; if (!phydev->drv) @@ -628,10 +630,40 @@ int phy_start_aneg(struct phy_device *phydev) } } + /* Re-schedule a PHY state machine to check PHY status because + * negotiation may already be done and aneg interrupt may not be + * generated. + */ + if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) { + err = phy_aneg_done(phydev); + if (err > 0) { + trigger = true; + err = 0; + } + } + out_unlock: mutex_unlock(&phydev->lock); + + if (trigger) + phy_trigger_machine(phydev, sync); + return err; } + +/** + * phy_start_aneg - start auto-negotiation for this PHY device + * @phydev: the phy_device struct + * + * Description: Sanitizes the settings (if we're not autonegotiating + * them), and then calls the driver's config_aneg function. + * If the PHYCONTROL Layer is operating, we change the state to + * reflect the beginning of Auto-negotiation or forcing. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + return phy_start_aneg_priv(phydev, true); +} EXPORT_SYMBOL(phy_start_aneg); /** @@ -659,7 +691,7 @@ void phy_start_machine(struct phy_device *phydev) * state machine runs. */ -static void phy_trigger_machine(struct phy_device *phydev, bool sync) +void phy_trigger_machine(struct phy_device *phydev, bool sync) { if (sync) cancel_delayed_work_sync(&phydev->state_queue); @@ -1154,7 +1186,7 @@ void phy_state_machine(struct work_struct *work) mutex_unlock(&phydev->lock); if (needs_aneg) - err = phy_start_aneg(phydev); + err = phy_start_aneg_priv(phydev, false); else if (do_suspend) phy_suspend(phydev); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 1b52520715ae..85c01247f2e3 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -990,7 +990,7 @@ static void team_port_disable(struct team *team, #define TEAM_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_RXCSUM | NETIF_F_ALL_TSO) -static void ___team_compute_features(struct team *team) +static void __team_compute_features(struct team *team) { struct team_port *port; u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL; @@ -1023,16 +1023,10 @@ static void ___team_compute_features(struct team *team) team->dev->priv_flags |= IFF_XMIT_DST_RELEASE; } -static void __team_compute_features(struct team *team) -{ - ___team_compute_features(team); - netdev_change_features(team->dev); -} - static void team_compute_features(struct team *team) { mutex_lock(&team->lock); - ___team_compute_features(team); + __team_compute_features(team); mutex_unlock(&team->lock); netdev_change_features(team->dev); } @@ -1641,6 +1635,7 @@ static void team_uninit(struct net_device *dev) team_notify_peers_fini(team); team_queue_override_fini(team); mutex_unlock(&team->lock); + netdev_change_features(dev); } static void team_destructor(struct net_device *dev) @@ -1928,6 +1923,10 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev) mutex_lock(&team->lock); err = team_port_add(team, port_dev); mutex_unlock(&team->lock); + + if (!err) + netdev_change_features(dev); + return err; } @@ -1939,6 +1938,10 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev) mutex_lock(&team->lock); err = team_port_del(team, port_dev); mutex_unlock(&team->lock); + + if (!err) + netdev_change_features(dev); + return err; } @@ -2358,8 +2361,10 @@ start_again: hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_OPTIONS_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(skb); return -EMSGSIZE; + } if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; @@ -2631,8 +2636,10 @@ start_again: hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_PORT_LIST_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(skb); return -EMSGSIZE; + } if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 3dd490f53e48..f28bd74ac275 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -369,7 +369,7 @@ config USB_NET_NET1080 optionally with LEDs that indicate traffic config USB_NET_PLUSB - tristate "Prolific PL-2301/2302/25A1 based cables" + tristate "Prolific PL-2301/2302/25A1/27A1 based cables" # if the handshake/init/reset problems, from original 'plusb', # are ever resolved ... then remove "experimental" depends on USB_USBNET diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c index 8a40202c0a17..c4f1c363e24b 100644 --- a/drivers/net/usb/ch9200.c +++ b/drivers/net/usb/ch9200.c @@ -254,14 +254,9 @@ static struct sk_buff *ch9200_tx_fixup(struct usbnet *dev, struct sk_buff *skb, tx_overhead = 0x40; len = skb->len; - if (skb_headroom(skb) < tx_overhead) { - struct sk_buff *skb2; - - skb2 = skb_copy_expand(skb, tx_overhead, 0, flags); + if (skb_cow_head(skb, tx_overhead)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } __skb_push(skb, tx_overhead); diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index e221bfcee76b..947bea81d924 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -293,12 +293,9 @@ static struct sk_buff *cx82310_tx_fixup(struct usbnet *dev, struct sk_buff *skb, { int len = skb->len; - if (skb_headroom(skb) < 2) { - struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags); + if (skb_cow_head(skb, 2)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } skb_push(skb, 2); diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 4f2e8141dbe2..00067a0c51ca 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2534,13 +2534,6 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, SET_NETDEV_DEV(net, &interface->dev); SET_NETDEV_DEVTYPE(net, &hso_type); - /* registering our net device */ - result = register_netdev(net); - if (result) { - dev_err(&interface->dev, "Failed to register device\n"); - goto exit; - } - /* start allocating */ for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL); @@ -2560,6 +2553,13 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, add_net_device(hso_dev); + /* registering our net device */ + result = register_netdev(net); + if (result) { + dev_err(&interface->dev, "Failed to register device\n"); + goto exit; + } + hso_log_port(hso_dev); hso_create_rfkill(hso_dev, interface); @@ -3279,9 +3279,9 @@ static void __exit hso_exit(void) pr_info("unloaded\n"); tty_unregister_driver(tty_drv); - put_tty_driver(tty_drv); /* deregister the usb driver */ usb_deregister(&hso_driver); + put_tty_driver(tty_drv); } /* Module definitions */ diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 876f02f4945e..2a2c3edb6bad 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -803,18 +803,12 @@ static netdev_tx_t kaweth_start_xmit(struct sk_buff *skb, } /* We now decide whether we can put our special header into the sk_buff */ - if (skb_cloned(skb) || skb_headroom(skb) < 2) { - /* no such luck - we make our own */ - struct sk_buff *copied_skb; - copied_skb = skb_copy_expand(skb, 2, 0, GFP_ATOMIC); - dev_kfree_skb_irq(skb); - skb = copied_skb; - if (!copied_skb) { - kaweth->stats.tx_errors++; - netif_start_queue(net); - spin_unlock_irq(&kaweth->device_lock); - return NETDEV_TX_OK; - } + if (skb_cow_head(skb, 2)) { + kaweth->stats.tx_errors++; + netif_start_queue(net); + spin_unlock_irq(&kaweth->device_lock); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; } private_header = (__le16 *)__skb_push(skb, 2); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 9889a70ff4f6..636f48f19d1e 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2607,14 +2607,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev, { u32 tx_cmd_a, tx_cmd_b; - if (skb_headroom(skb) < TX_OVERHEAD) { - struct sk_buff *skb2; - - skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } if (lan78xx_linearize(skb) < 0) diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 22e1a9a99a7d..6fe59373cba9 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -102,7 +102,7 @@ static int pl_reset(struct usbnet *dev) } static const struct driver_info prolific_info = { - .description = "Prolific PL-2301/PL-2302/PL-25A1", + .description = "Prolific PL-2301/PL-2302/PL-25A1/PL-27A1", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT, /* some PL-2302 versions seem to fail usb_set_interface() */ .reset = pl_reset, @@ -139,6 +139,17 @@ static const struct usb_device_id products [] = { * Host-to-Host Cable */ .driver_info = (unsigned long) &prolific_info, + +}, + +/* super speed cables */ +{ + USB_DEVICE(0x067b, 0x27a1), /* PL-27A1, no eeprom + * also: goobay Active USB 3.0 + * Data Link, + * Unitek Y-3501 + */ + .driver_info = (unsigned long) &prolific_info, }, { }, // END @@ -158,5 +169,5 @@ static struct usb_driver plusb_driver = { module_usb_driver(plusb_driver); MODULE_AUTHOR("David Brownell"); -MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1 USB Host to Host Link Driver"); +MODULE_DESCRIPTION("Prolific PL-2301/2302/25A1/27A1 USB Host to Host Link Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 156f7f85e486..2474618404f5 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -908,7 +908,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ - {QMI_FIXED_INTF(0x1bc7, 0x1201, 2)}, /* Telit LE920 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ {QMI_FIXED_INTF(0x0b3c, 0xc000, 4)}, /* Olivetti Olicard 100 */ {QMI_FIXED_INTF(0x0b3c, 0xc001, 4)}, /* Olivetti Olicard 120 */ diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 0b17b40d7a4f..190de9a90f73 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -2203,13 +2203,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev, { u32 tx_cmd_a, tx_cmd_b; - if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) { - struct sk_buff *skb2 = - skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS; diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 831aa33d078a..5f19fb0f025d 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -2001,13 +2001,13 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev, /* We do not advertise SG, so skbs should be already linearized */ BUG_ON(skb_shinfo(skb)->nr_frags); - if (skb_headroom(skb) < overhead) { - struct sk_buff *skb2 = skb_copy_expand(skb, - overhead, 0, flags); + /* Make writable and expand header space by overhead if required */ + if (skb_cow_head(skb, overhead)) { + /* Must deallocate here as returning NULL to indicate error + * means the skb won't be deallocated in the caller. + */ dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } if (csum) { diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 4a1e9c489f1f..aadfe1d1c37e 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -456,14 +456,9 @@ static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb, len = skb->len; - if (skb_headroom(skb) < SR_TX_OVERHEAD) { - struct sk_buff *skb2; - - skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags); + if (skb_cow_head(skb, SR_TX_OVERHEAD)) { dev_kfree_skb_any(skb); - skb = skb2; - if (!skb) - return NULL; + return NULL; } __skb_push(skb, SR_TX_OVERHEAD); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 3de65ea6531a..453244805c52 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1929,7 +1929,7 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, " value=0x%04x index=0x%04x size=%d\n", cmd, reqtype, value, index, size); - if (data) { + if (size) { buf = kmalloc(size, GFP_KERNEL); if (!buf) goto out; @@ -1938,8 +1938,13 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, err = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), cmd, reqtype, value, index, buf, size, USB_CTRL_GET_TIMEOUT); - if (err > 0 && err <= size) - memcpy(data, buf, err); + if (err > 0 && err <= size) { + if (data) + memcpy(data, buf, err); + else + netdev_dbg(dev->net, + "Huh? Data requested but thrown away.\n"); + } kfree(buf); out: return err; @@ -1960,7 +1965,13 @@ static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, buf = kmemdup(data, size, GFP_KERNEL); if (!buf) goto out; - } + } else { + if (size) { + WARN_ON_ONCE(1); + err = -EINVAL; + goto out; + } + } err = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), cmd, reqtype, value, index, buf, size, diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ea9890d61967..f36584616e7d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2230,14 +2230,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) #define MIN_MTU ETH_MIN_MTU #define MAX_MTU ETH_MAX_MTU -static int virtnet_probe(struct virtio_device *vdev) +static int virtnet_validate(struct virtio_device *vdev) { - int i, err; - struct net_device *dev; - struct virtnet_info *vi; - u16 max_queue_pairs; - int mtu; - if (!vdev->config->get) { dev_err(&vdev->dev, "%s failure: config access disabled\n", __func__); @@ -2247,6 +2241,25 @@ static int virtnet_probe(struct virtio_device *vdev) if (!virtnet_validate_features(vdev)) return -EINVAL; + if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) { + int mtu = virtio_cread16(vdev, + offsetof(struct virtio_net_config, + mtu)); + if (mtu < MIN_MTU) + __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); + } + + return 0; +} + +static int virtnet_probe(struct virtio_device *vdev) +{ + int i, err; + struct net_device *dev; + struct virtnet_info *vi; + u16 max_queue_pairs; + int mtu; + /* Find if host supports multiqueue virtio_net device */ err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ, struct virtio_net_config, @@ -2362,11 +2375,20 @@ static int virtnet_probe(struct virtio_device *vdev) offsetof(struct virtio_net_config, mtu)); if (mtu < dev->min_mtu) { - __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); - } else { - dev->mtu = mtu; - dev->max_mtu = mtu; + /* Should never trigger: MTU was previously validated + * in virtnet_validate. + */ + dev_err(&vdev->dev, "device MTU appears to have changed " + "it is now %d < %d", mtu, dev->min_mtu); + goto free_stats; } + + dev->mtu = mtu; + dev->max_mtu = mtu; + + /* TODO: size buffers correctly in this case. */ + if (dev->mtu > ETH_DATA_LEN) + vi->big_packets = true; } if (vi->any_header_sg) @@ -2544,6 +2566,7 @@ static struct virtio_driver virtio_net_driver = { .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, + .validate = virtnet_validate, .probe = virtnet_probe, .remove = virtnet_remove, .config_changed = virtnet_config_changed, diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index d6988db1930d..7d909c8183e9 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1128,7 +1128,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) goto nla_put_failure; /* rule only needs to appear once */ - nlh->nlmsg_flags &= NLM_F_EXCL; + nlh->nlmsg_flags |= NLM_F_EXCL; frh = nlmsg_data(nlh); memset(frh, 0, sizeof(*frh)); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 23d4a1728cdf..351bac8f6503 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -934,8 +934,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL); if (rc < 0) goto out_unlock; + nvdimm_bus_unlock(&nvdimm_bus->dev); + if (copy_to_user(p, buf, buf_len)) rc = -EFAULT; + + vfree(buf); + return rc; + out_unlock: nvdimm_bus_unlock(&nvdimm_bus->dev); out: diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index b3323c0697f6..ca6d572c48fc 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -243,7 +243,15 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, } if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) { - if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)) { + /* + * FIXME: nsio_rw_bytes() may be called from atomic + * context in the btt case and nvdimm_clear_poison() + * takes a sleeping lock. Until the locking can be + * reworked this capability requires that the namespace + * is not claimed by btt. + */ + if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512) + && (!ndns->claim || !is_nd_btt(ndns->claim))) { long cleared; cleared = nvdimm_clear_poison(&ndns->dev, offset, size); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 0eedc49e0d47..8b721321be5b 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(nvdimm_create); int alias_dpa_busy(struct device *dev, void *data) { - resource_size_t map_end, blk_start, new, busy; + resource_size_t map_end, blk_start, new; struct blk_alloc_info *info = data; struct nd_mapping *nd_mapping; struct nd_region *nd_region; @@ -436,29 +436,19 @@ int alias_dpa_busy(struct device *dev, void *data) retry: /* * Find the free dpa from the end of the last pmem allocation to - * the end of the interleave-set mapping that is not already - * covered by a blk allocation. + * the end of the interleave-set mapping. */ - busy = 0; for_each_dpa_resource(ndd, res) { + if (strncmp(res->name, "pmem", 4) != 0) + continue; if ((res->start >= blk_start && res->start < map_end) || (res->end >= blk_start && res->end <= map_end)) { - if (strncmp(res->name, "pmem", 4) == 0) { - new = max(blk_start, min(map_end + 1, - res->end + 1)); - if (new != blk_start) { - blk_start = new; - goto retry; - } - } else - busy += min(map_end, res->end) - - max(nd_mapping->start, res->start) + 1; - } else if (nd_mapping->start > res->start - && map_end < res->end) { - /* total eclipse of the PMEM region mapping */ - busy += nd_mapping->size; - break; + new = max(blk_start, min(map_end + 1, res->end + 1)); + if (new != blk_start) { + blk_start = new; + goto retry; + } } } @@ -470,52 +460,11 @@ int alias_dpa_busy(struct device *dev, void *data) return 1; } - info->available -= blk_start - nd_mapping->start + busy; + info->available -= blk_start - nd_mapping->start; return 0; } -static int blk_dpa_busy(struct device *dev, void *data) -{ - struct blk_alloc_info *info = data; - struct nd_mapping *nd_mapping; - struct nd_region *nd_region; - resource_size_t map_end; - int i; - - if (!is_nd_pmem(dev)) - return 0; - - nd_region = to_nd_region(dev); - for (i = 0; i < nd_region->ndr_mappings; i++) { - nd_mapping = &nd_region->mapping[i]; - if (nd_mapping->nvdimm == info->nd_mapping->nvdimm) - break; - } - - if (i >= nd_region->ndr_mappings) - return 0; - - map_end = nd_mapping->start + nd_mapping->size - 1; - if (info->res->start >= nd_mapping->start - && info->res->start < map_end) { - if (info->res->end <= map_end) { - info->busy = 0; - return 1; - } else { - info->busy -= info->res->end - map_end; - return 0; - } - } else if (info->res->end >= nd_mapping->start - && info->res->end <= map_end) { - info->busy -= nd_mapping->start - info->res->start; - return 0; - } else { - info->busy -= nd_mapping->size; - return 0; - } -} - /** * nd_blk_available_dpa - account the unused dpa of BLK region * @nd_mapping: container of dpa-resource-root + labels @@ -545,11 +494,7 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region) for_each_dpa_resource(ndd, res) { if (strncmp(res->name, "blk", 3) != 0) continue; - - info.res = res; - info.busy = resource_size(res); - device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy); - info.available -= info.busy; + info.available -= resource_size(res); } return info.available; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9583a5f58a1d..d5e0906262ea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -49,10 +49,9 @@ unsigned char shutdown_timeout = 5; module_param(shutdown_timeout, byte, 0644); MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); -unsigned int nvme_max_retries = 5; -module_param_named(max_retries, nvme_max_retries, uint, 0644); +static u8 nvme_max_retries = 5; +module_param_named(max_retries, nvme_max_retries, byte, 0644); MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); -EXPORT_SYMBOL_GPL(nvme_max_retries); static int nvme_char_major; module_param(nvme_char_major, int, 0); @@ -62,11 +61,66 @@ module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, "max power saving latency for new devices; use PM QOS to change per device"); +static bool force_apst; +module_param(force_apst, bool, 0644); +MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off"); + static LIST_HEAD(nvme_ctrl_list); static DEFINE_SPINLOCK(dev_list_lock); static struct class *nvme_class; +static int nvme_error_status(struct request *req) +{ + switch (nvme_req(req)->status & 0x7ff) { + case NVME_SC_SUCCESS: + return 0; + case NVME_SC_CAP_EXCEEDED: + return -ENOSPC; + default: + return -EIO; + + /* + * XXX: these errors are a nasty side-band protocol to + * drivers/md/dm-mpath.c:noretry_error() that aren't documented + * anywhere.. + */ + case NVME_SC_CMD_SEQ_ERROR: + return -EILSEQ; + case NVME_SC_ONCS_NOT_SUPPORTED: + return -EOPNOTSUPP; + case NVME_SC_WRITE_FAULT: + case NVME_SC_READ_ERROR: + case NVME_SC_UNWRITTEN_BLOCK: + return -ENODATA; + } +} + +static inline bool nvme_req_needs_retry(struct request *req) +{ + if (blk_noretry_request(req)) + return false; + if (nvme_req(req)->status & NVME_SC_DNR) + return false; + if (jiffies - req->start_time >= req->timeout) + return false; + if (nvme_req(req)->retries >= nvme_max_retries) + return false; + return true; +} + +void nvme_complete_rq(struct request *req) +{ + if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) { + nvme_req(req)->retries++; + blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); + return; + } + + blk_mq_end_request(req, nvme_error_status(req)); +} +EXPORT_SYMBOL_GPL(nvme_complete_rq); + void nvme_cancel_request(struct request *req, void *data, bool reserved) { int status; @@ -80,7 +134,9 @@ void nvme_cancel_request(struct request *req, void *data, bool reserved) status = NVME_SC_ABORT_REQ; if (blk_queue_dying(req->q)) status |= NVME_SC_DNR; - blk_mq_complete_request(req, status); + nvme_req(req)->status = status; + blk_mq_complete_request(req); + } EXPORT_SYMBOL_GPL(nvme_cancel_request); @@ -205,12 +261,6 @@ fail: return NULL; } -void nvme_requeue_req(struct request *req) -{ - blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); -} -EXPORT_SYMBOL_GPL(nvme_requeue_req); - struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid) { @@ -327,6 +377,12 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, { int ret = BLK_MQ_RQ_QUEUE_OK; + if (!(req->rq_flags & RQF_DONTPREP)) { + nvme_req(req)->retries = 0; + nvme_req(req)->flags = 0; + req->rq_flags |= RQF_DONTPREP; + } + switch (req_op(req)) { case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: @@ -335,6 +391,8 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, case REQ_OP_FLUSH: nvme_setup_flush(ns, cmd); break; + case REQ_OP_WRITE_ZEROES: + /* currently only aliased to deallocate for a few ctrls: */ case REQ_OP_DISCARD: ret = nvme_setup_discard(ns, req, cmd); break; @@ -378,7 +436,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, blk_execute_rq(req->q, NULL, req, at_head); if (result) *result = nvme_req(req)->result; - ret = req->errors; + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; out: blk_mq_free_request(req); return ret; @@ -463,7 +524,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, } submit: blk_execute_rq(req->q, disk, req, 0); - ret = req->errors; + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; if (result) *result = le32_to_cpu(nvme_req(req)->result.u32); if (meta && !ret && !write) { @@ -900,16 +964,14 @@ static void nvme_config_discard(struct nvme_ns *ns) BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES) - ns->queue->limits.discard_zeroes_data = 1; - else - ns->queue->limits.discard_zeroes_data = 0; - ns->queue->limits.discard_alignment = logical_block_size; ns->queue->limits.discard_granularity = logical_block_size; blk_queue_max_discard_sectors(ns->queue, UINT_MAX); blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); + + if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) + blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX); } static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) @@ -1267,7 +1329,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * heuristic: we are willing to spend at most 2% of the time * transitioning between power states. Therefore, when running * in any given state, we will enter the next lower-power - * non-operational state after waiting 100 * (enlat + exlat) + * non-operational state after waiting 50 * (enlat + exlat) * microseconds, as long as that state's total latency is under * the requested maximum latency. * @@ -1278,6 +1340,8 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) unsigned apste; struct nvme_feat_auto_pst *table; + u64 max_lat_us = 0; + int max_ps = -1; int ret; /* @@ -1299,6 +1363,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) if (ctrl->ps_max_latency_us == 0) { /* Turn off APST. */ apste = 0; + dev_dbg(ctrl->device, "APST disabled\n"); } else { __le64 target = cpu_to_le64(0); int state; @@ -1316,6 +1381,14 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) table->entries[state] = target; /* + * Don't allow transitions to the deepest state + * if it's quirked off. + */ + if (state == ctrl->npss && + (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) + continue; + + /* * Is this state a useful non-operational state for * higher-power states to autonomously transition to? */ @@ -1340,9 +1413,22 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) target = cpu_to_le64((state << 3) | (transition_ms << 8)); + + if (max_ps == -1) + max_ps = state; + + if (total_latency_us > max_lat_us) + max_lat_us = total_latency_us; } apste = 1; + + if (max_ps == -1) { + dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n"); + } else { + dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n", + max_ps, max_lat_us, (int)sizeof(*table), table); + } } ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste, @@ -1387,16 +1473,15 @@ struct nvme_core_quirk_entry { }; static const struct nvme_core_quirk_entry core_quirks[] = { - /* - * Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes - * the controller to go out to lunch. It dies when the watchdog - * timer reads CSTS and gets 0xffffffff. - */ { - .vid = 0x144d, - .fr = "BXW75D0Q", + /* + * This Toshiba device seems to die using any APST states. See: + * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678184/comments/11 + */ + .vid = 0x1179, + .mn = "THNSF5256GPUK TOSHIBA", .quirks = NVME_QUIRK_NO_APST, - }, + } }; /* match is null-terminated but idstr is space-padded. */ @@ -1481,6 +1566,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } } + if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { + dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); + ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS; + } + ctrl->oacs = le16_to_cpu(id->oacs); ctrl->vid = le16_to_cpu(id->vid); ctrl->oncs = le16_to_cpup(&id->oncs); @@ -1503,7 +1593,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->npss = id->npss; prev_apsta = ctrl->apsta; - ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta; + if (ctrl->quirks & NVME_QUIRK_NO_APST) { + if (force_apst && id->apsta) { + dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n"); + ctrl->apsta = 1; + } else { + ctrl->apsta = 0; + } + } else { + ctrl->apsta = id->apsta; + } memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); if (ctrl->ops->is_fabrics) { @@ -2386,7 +2485,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_freeze_queue_start(ns->queue); + blk_freeze_queue_start(ns->queue); mutex_unlock(&ctrl->namespaces_mutex); } EXPORT_SYMBOL_GPL(nvme_start_freeze); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5b7386f69f4d..990e6fb32a63 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -471,6 +471,16 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) } EXPORT_SYMBOL_GPL(nvmf_connect_io_queue); +bool nvmf_should_reconnect(struct nvme_ctrl *ctrl) +{ + if (ctrl->opts->max_reconnects != -1 && + ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(nvmf_should_reconnect); + /** * nvmf_register_transport() - NVMe Fabrics Library registration function. * @ops: Transport ops instance to be registered to the @@ -533,6 +543,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_QUEUE_SIZE, "queue_size=%d" }, { NVMF_OPT_NR_IO_QUEUES, "nr_io_queues=%d" }, { NVMF_OPT_RECONNECT_DELAY, "reconnect_delay=%d" }, + { NVMF_OPT_CTRL_LOSS_TMO, "ctrl_loss_tmo=%d" }, { NVMF_OPT_KATO, "keep_alive_tmo=%d" }, { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, @@ -546,6 +557,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, char *options, *o, *p; int token, ret = 0; size_t nqnlen = 0; + int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO; /* Set defaults */ opts->queue_size = NVMF_DEF_QUEUE_SIZE; @@ -655,6 +667,16 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->kato = token; break; + case NVMF_OPT_CTRL_LOSS_TMO: + if (match_int(args, &token)) { + ret = -EINVAL; + goto out; + } + + if (token < 0) + pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n"); + ctrl_loss_tmo = token; + break; case NVMF_OPT_HOSTNQN: if (opts->host) { pr_err("hostnqn already user-assigned: %s\n", @@ -710,6 +732,12 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } } + if (ctrl_loss_tmo < 0) + opts->max_reconnects = -1; + else + opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, + opts->reconnect_delay); + if (!opts->host) { kref_get(&nvmf_default_host->ref); opts->host = nvmf_default_host; diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 156018182ce4..f5a9c1fb186f 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -21,6 +21,8 @@ #define NVMF_MAX_QUEUE_SIZE 1024 #define NVMF_DEF_QUEUE_SIZE 128 #define NVMF_DEF_RECONNECT_DELAY 10 +/* default to 600 seconds of reconnect attempts before giving up */ +#define NVMF_DEF_CTRL_LOSS_TMO 600 /* * Define a host as seen by the target. We allocate one at boot, but also @@ -53,6 +55,7 @@ enum { NVMF_OPT_HOSTNQN = 1 << 8, NVMF_OPT_RECONNECT_DELAY = 1 << 9, NVMF_OPT_HOST_TRADDR = 1 << 10, + NVMF_OPT_CTRL_LOSS_TMO = 1 << 11, }; /** @@ -77,6 +80,10 @@ enum { * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN. * @kato: Keep-alive timeout. * @host: Virtual NVMe host, contains the NQN and Host ID. + * @nr_reconnects: number of reconnect attempted since the last ctrl failure + * @max_reconnects: maximum number of allowed reconnect attempts before removing + * the controller, (-1) means reconnect forever, zero means remove + * immediately; */ struct nvmf_ctrl_options { unsigned mask; @@ -91,6 +98,8 @@ struct nvmf_ctrl_options { bool discovery_nqn; unsigned int kato; struct nvmf_host *host; + int nr_reconnects; + int max_reconnects; }; /* @@ -133,5 +142,6 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); +bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9690beb15e69..4976db56e351 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -19,6 +19,7 @@ #include <linux/parser.h> #include <uapi/scsi/fc/fc_fs.h> #include <uapi/scsi/fc/fc_els.h> +#include <linux/delay.h> #include "nvme.h" #include "fabrics.h" @@ -44,6 +45,8 @@ enum nvme_fc_queue_flags { #define NVMEFC_QUEUE_DELAY 3 /* ms units */ +#define NVME_FC_MAX_CONNECT_ATTEMPTS 1 + struct nvme_fc_queue { struct nvme_fc_ctrl *ctrl; struct device *dev; @@ -61,16 +64,24 @@ struct nvme_fc_queue { unsigned long flags; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ +enum nvme_fcop_flags { + FCOP_FLAGS_TERMIO = (1 << 0), + FCOP_FLAGS_RELEASED = (1 << 1), + FCOP_FLAGS_COMPLETE = (1 << 2), + FCOP_FLAGS_AEN = (1 << 3), +}; + struct nvmefc_ls_req_op { struct nvmefc_ls_req ls_req; - struct nvme_fc_ctrl *ctrl; + struct nvme_fc_rport *rport; struct nvme_fc_queue *queue; struct request *rq; + u32 flags; int ls_error; struct completion ls_done; - struct list_head lsreq_list; /* ctrl->ls_req_list */ + struct list_head lsreq_list; /* rport->ls_req_list */ bool req_queued; }; @@ -79,6 +90,7 @@ enum nvme_fcpop_state { FCPOP_STATE_IDLE = 1, FCPOP_STATE_ACTIVE = 2, FCPOP_STATE_ABORTED = 3, + FCPOP_STATE_COMPLETE = 4, }; struct nvme_fc_fcp_op { @@ -97,6 +109,7 @@ struct nvme_fc_fcp_op { struct request *rq; atomic_t state; + u32 flags; u32 rqno; u32 nents; @@ -120,23 +133,24 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; + struct list_head ls_req_list; + struct device *dev; /* physical device for dma */ + struct nvme_fc_lport *lport; spinlock_t lock; struct kref ref; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ -enum nvme_fcctrl_state { - FCCTRL_INIT = 0, - FCCTRL_ACTIVE = 1, +enum nvme_fcctrl_flags { + FCCTRL_TERMIO = (1 << 0), }; struct nvme_fc_ctrl { spinlock_t lock; struct nvme_fc_queue *queues; - u32 queue_count; - struct device *dev; struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; + u32 queue_count; u32 cnum; u64 association_id; @@ -144,14 +158,19 @@ struct nvme_fc_ctrl { u64 cap; struct list_head ctrl_list; /* rport->ctrl_list */ - struct list_head ls_req_list; struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; struct work_struct delete_work; + struct work_struct reset_work; + struct delayed_work connect_work; + int reconnect_delay; + int connect_attempts; + struct kref ref; - int state; + u32 flags; + u32 iocnt; struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; @@ -419,9 +438,12 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, INIT_LIST_HEAD(&newrec->endp_list); INIT_LIST_HEAD(&newrec->ctrl_list); + INIT_LIST_HEAD(&newrec->ls_req_list); kref_init(&newrec->ref); spin_lock_init(&newrec->lock); newrec->remoteport.localport = &lport->localport; + newrec->dev = lport->dev; + newrec->lport = lport; newrec->remoteport.private = &newrec[1]; newrec->remoteport.port_role = pinfo->port_role; newrec->remoteport.node_name = pinfo->node_name; @@ -444,7 +466,6 @@ out_kfree_rport: out_reghost_failed: *portptr = NULL; return ret; - } EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); @@ -487,6 +508,30 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport) return kref_get_unless_zero(&rport->ref); } +static int +nvme_fc_abort_lsops(struct nvme_fc_rport *rport) +{ + struct nvmefc_ls_req_op *lsop; + unsigned long flags; + +restart: + spin_lock_irqsave(&rport->lock, flags); + + list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { + if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { + lsop->flags |= FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&rport->lock, flags); + rport->lport->ops->ls_abort(&rport->lport->localport, + &rport->remoteport, + &lsop->ls_req); + goto restart; + } + } + spin_unlock_irqrestore(&rport->lock, flags); + + return 0; +} + /** * nvme_fc_unregister_remoteport - transport entry point called by an * LLDD to deregister/remove a previously @@ -522,6 +567,8 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) spin_unlock_irqrestore(&rport->lock, flags); + nvme_fc_abort_lsops(rport); + nvme_fc_rport_put(rport); return 0; } @@ -624,16 +671,16 @@ static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); static void -__nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl, - struct nvmefc_ls_req_op *lsop) +__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) { + struct nvme_fc_rport *rport = lsop->rport; struct nvmefc_ls_req *lsreq = &lsop->ls_req; unsigned long flags; - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&rport->lock, flags); if (!lsop->req_queued) { - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); return; } @@ -641,56 +688,71 @@ __nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl, lsop->req_queued = false; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); - fc_dma_unmap_single(ctrl->dev, lsreq->rqstdma, + fc_dma_unmap_single(rport->dev, lsreq->rqstdma, (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); - nvme_fc_ctrl_put(ctrl); + nvme_fc_rport_put(rport); } static int -__nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, +__nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop, void (*done)(struct nvmefc_ls_req *req, int status)) { struct nvmefc_ls_req *lsreq = &lsop->ls_req; unsigned long flags; - int ret; + int ret = 0; - if (!nvme_fc_ctrl_get(ctrl)) + if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) + return -ECONNREFUSED; + + if (!nvme_fc_rport_get(rport)) return -ESHUTDOWN; lsreq->done = done; - lsop->ctrl = ctrl; + lsop->rport = rport; lsop->req_queued = false; INIT_LIST_HEAD(&lsop->lsreq_list); init_completion(&lsop->ls_done); - lsreq->rqstdma = fc_dma_map_single(ctrl->dev, lsreq->rqstaddr, + lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, lsreq->rqstlen + lsreq->rsplen, DMA_BIDIRECTIONAL); - if (fc_dma_mapping_error(ctrl->dev, lsreq->rqstdma)) { - nvme_fc_ctrl_put(ctrl); - dev_err(ctrl->dev, - "els request command failed EFAULT.\n"); - return -EFAULT; + if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { + ret = -EFAULT; + goto out_putrport; } lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&rport->lock, flags); - list_add_tail(&lsop->lsreq_list, &ctrl->ls_req_list); + list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); lsop->req_queued = true; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); - ret = ctrl->lport->ops->ls_req(&ctrl->lport->localport, - &ctrl->rport->remoteport, lsreq); + ret = rport->lport->ops->ls_req(&rport->lport->localport, + &rport->remoteport, lsreq); if (ret) - lsop->ls_error = ret; + goto out_unlink; + + return 0; + +out_unlink: + lsop->ls_error = ret; + spin_lock_irqsave(&rport->lock, flags); + lsop->req_queued = false; + list_del(&lsop->lsreq_list); + spin_unlock_irqrestore(&rport->lock, flags); + fc_dma_unmap_single(rport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); +out_putrport: + nvme_fc_rport_put(rport); return ret; } @@ -705,15 +767,15 @@ nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) } static int -nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) +nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) { struct nvmefc_ls_req *lsreq = &lsop->ls_req; struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; int ret; - ret = __nvme_fc_send_ls_req(ctrl, lsop, nvme_fc_send_ls_req_done); + ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); - if (!ret) + if (!ret) { /* * No timeout/not interruptible as we need the struct * to exist until the lldd calls us back. Thus mandate @@ -722,14 +784,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) */ wait_for_completion(&lsop->ls_done); - __nvme_fc_finish_ls_req(ctrl, lsop); + __nvme_fc_finish_ls_req(lsop); - if (ret) { - dev_err(ctrl->dev, - "ls request command failed (%d).\n", ret); - return ret; + ret = lsop->ls_error; } + if (ret) + return ret; + /* ACC or RJT payload ? */ if (rjt->w0.ls_cmd == FCNVME_LS_RJT) return -ENXIO; @@ -737,19 +799,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) return 0; } -static void -nvme_fc_send_ls_req_async(struct nvme_fc_ctrl *ctrl, +static int +nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop, void (*done)(struct nvmefc_ls_req *req, int status)) { - int ret; - - ret = __nvme_fc_send_ls_req(ctrl, lsop, done); - /* don't wait for completion */ - if (ret) - done(&lsop->ls_req, ret); + return __nvme_fc_send_ls_req(rport, lsop, done); } /* Validation Error indexes into the string table below */ @@ -839,7 +896,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, lsreq->rsplen = sizeof(*assoc_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - ret = nvme_fc_send_ls_req(ctrl, lsop); + ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) goto out_free_buffer; @@ -848,11 +905,12 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, /* validate the ACC response */ if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) fcret = VERR_LSACC; - if (assoc_acc->hdr.desc_list_len != + else if (assoc_acc->hdr.desc_list_len != fcnvme_lsdesc_len( sizeof(struct fcnvme_ls_cr_assoc_acc))) fcret = VERR_CR_ASSOC_ACC_LEN; - if (assoc_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) + else if (assoc_acc->hdr.rqst.desc_tag != + cpu_to_be32(FCNVME_LSDESC_RQST)) fcret = VERR_LSDESC_RQST; else if (assoc_acc->hdr.rqst.desc_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) @@ -946,7 +1004,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, lsreq->rsplen = sizeof(*conn_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - ret = nvme_fc_send_ls_req(ctrl, lsop); + ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) goto out_free_buffer; @@ -955,10 +1013,10 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, /* validate the ACC response */ if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) fcret = VERR_LSACC; - if (conn_acc->hdr.desc_list_len != + else if (conn_acc->hdr.desc_list_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) fcret = VERR_CR_CONN_ACC_LEN; - if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) + else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) fcret = VERR_LSDESC_RQST; else if (conn_acc->hdr.rqst.desc_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) @@ -997,14 +1055,8 @@ static void nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) { struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); - struct nvme_fc_ctrl *ctrl = lsop->ctrl; - __nvme_fc_finish_ls_req(ctrl, lsop); - - if (status) - dev_err(ctrl->dev, - "disconnect assoc ls request command failed (%d).\n", - status); + __nvme_fc_finish_ls_req(lsop); /* fc-nvme iniator doesn't care about success or failure of cmd */ @@ -1035,6 +1087,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) struct fcnvme_ls_disconnect_acc *discon_acc; struct nvmefc_ls_req_op *lsop; struct nvmefc_ls_req *lsreq; + int ret; lsop = kzalloc((sizeof(*lsop) + ctrl->lport->ops->lsrqst_priv_sz + @@ -1077,7 +1130,10 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) lsreq->rsplen = sizeof(*discon_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - nvme_fc_send_ls_req_async(ctrl, lsop, nvme_fc_disconnect_assoc_done); + ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, + nvme_fc_disconnect_assoc_done); + if (ret) + kfree(lsop); /* only meaningful part to terminating the association */ ctrl->association_id = 0; @@ -1086,6 +1142,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) /* *********************** NVME Ctrl Routines **************************** */ +static void __nvme_fc_final_op_cleanup(struct request *rq); static int nvme_fc_reinit_request(void *data, struct request *rq) @@ -1123,21 +1180,84 @@ nvme_fc_exit_request(void *data, struct request *rq, return __nvme_fc_exit_request(data, op); } +static int +__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) +{ + int state; + + state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); + if (state != FCPOP_STATE_ACTIVE) { + atomic_set(&op->state, state); + return -ECANCELED; + } + + ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, + &ctrl->rport->remoteport, + op->queue->lldd_handle, + &op->fcp_req); + + return 0; +} + static void -nvme_fc_exit_aen_ops(struct nvme_fc_ctrl *ctrl) +nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) { struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; - int i; + unsigned long flags; + int i, ret; for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { - if (atomic_read(&aen_op->state) == FCPOP_STATE_UNINIT) + if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) continue; - __nvme_fc_exit_request(ctrl, aen_op); - nvme_fc_ctrl_put(ctrl); + + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) { + ctrl->iocnt++; + aen_op->flags |= FCOP_FLAGS_TERMIO; + } + spin_unlock_irqrestore(&ctrl->lock, flags); + + ret = __nvme_fc_abort_op(ctrl, aen_op); + if (ret) { + /* + * if __nvme_fc_abort_op failed the io wasn't + * active. Thus this call path is running in + * parallel to the io complete. Treat as non-error. + */ + + /* back out the flags/counters */ + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; + aen_op->flags &= ~FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + return; + } } } -void +static inline int +__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, + struct nvme_fc_fcp_op *op) +{ + unsigned long flags; + bool complete_rq = false; + + spin_lock_irqsave(&ctrl->lock, flags); + if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; + } + if (op->flags & FCOP_FLAGS_RELEASED) + complete_rq = true; + else + op->flags |= FCOP_FLAGS_COMPLETE; + spin_unlock_irqrestore(&ctrl->lock, flags); + + return complete_rq; +} + +static void nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) { struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); @@ -1146,7 +1266,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) struct nvme_fc_ctrl *ctrl = op->ctrl; struct nvme_fc_queue *queue = op->queue; struct nvme_completion *cqe = &op->rsp_iu.cqe; - u16 status; + struct nvme_command *sqe = &op->cmd_iu.sqe; + __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); + union nvme_result result; + bool complete_rq; /* * WARNING: @@ -1181,9 +1304,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) sizeof(op->rsp_iu), DMA_FROM_DEVICE); if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) - status = NVME_SC_ABORT_REQ | NVME_SC_DNR; - else - status = freq->status; + status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); + else if (freq->status) + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); /* * For the linux implementation, if we have an unsuccesful @@ -1211,10 +1334,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) */ if (freq->transferred_length != be32_to_cpu(op->cmd_iu.data_len)) { - status = -EIO; + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } - op->nreq.result.u64 = 0; + result.u64 = 0; break; case sizeof(struct nvme_fc_ersp_iu): @@ -1226,28 +1349,40 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) (freq->rcv_rsplen / 4) || be32_to_cpu(op->rsp_iu.xfrd_len) != freq->transferred_length || - op->rqno != le16_to_cpu(cqe->command_id))) { - status = -EIO; + op->rsp_iu.status_code || + sqe->common.command_id != cqe->command_id)) { + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } - op->nreq.result = cqe->result; - status = le16_to_cpu(cqe->status) >> 1; + result = cqe->result; + status = cqe->status; break; default: - status = -EIO; + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } done: - if (!queue->qnum && op->rqno >= AEN_CMDID_BASE) { - nvme_complete_async_event(&queue->ctrl->ctrl, status, - &op->nreq.result); + if (op->flags & FCOP_FLAGS_AEN) { + nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); + complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); + atomic_set(&op->state, FCPOP_STATE_IDLE); + op->flags = FCOP_FLAGS_AEN; /* clear other flags */ nvme_fc_ctrl_put(ctrl); return; } - blk_mq_complete_request(rq, status); + complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); + if (!complete_rq) { + if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { + status = cpu_to_le16(NVME_SC_ABORT_REQ); + if (blk_queue_dying(rq->q)) + status |= cpu_to_le16(NVME_SC_DNR); + } + nvme_end_request(rq, status, result); + } else + __nvme_fc_final_op_cleanup(rq); } static int @@ -1328,25 +1463,55 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) struct nvme_fc_fcp_op *aen_op; struct nvme_fc_cmd_iu *cmdiu; struct nvme_command *sqe; + void *private; int i, ret; aen_op = ctrl->aen_ops; for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, + GFP_KERNEL); + if (!private) + return -ENOMEM; + cmdiu = &aen_op->cmd_iu; sqe = &cmdiu->sqe; ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], aen_op, (struct request *)NULL, (AEN_CMDID_BASE + i)); - if (ret) + if (ret) { + kfree(private); return ret; + } + + aen_op->flags = FCOP_FLAGS_AEN; + aen_op->fcp_req.first_sgl = NULL; /* no sg list */ + aen_op->fcp_req.private = private; memset(sqe, 0, sizeof(*sqe)); sqe->common.opcode = nvme_admin_async_event; + /* Note: core layer may overwrite the sqe.command_id value */ sqe->common.command_id = AEN_CMDID_BASE + i; } return 0; } +static void +nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) +{ + struct nvme_fc_fcp_op *aen_op; + int i; + + aen_op = ctrl->aen_ops; + for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + if (!aen_op->fcp_req.private) + continue; + + __nvme_fc_exit_request(ctrl, aen_op); + + kfree(aen_op->fcp_req.private); + aen_op->fcp_req.private = NULL; + } +} static inline void __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, @@ -1446,15 +1611,6 @@ __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, } static void -nvme_fc_destroy_admin_queue(struct nvme_fc_ctrl *ctrl) -{ - __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); - blk_cleanup_queue(ctrl->ctrl.admin_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); - nvme_fc_free_queue(&ctrl->queues[0]); -} - -static void nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) { int i; @@ -1541,19 +1697,27 @@ nvme_fc_ctrl_free(struct kref *ref) container_of(ref, struct nvme_fc_ctrl, ref); unsigned long flags; - if (ctrl->state != FCCTRL_INIT) { - /* remove from rport list */ - spin_lock_irqsave(&ctrl->rport->lock, flags); - list_del(&ctrl->ctrl_list); - spin_unlock_irqrestore(&ctrl->rport->lock, flags); + if (ctrl->ctrl.tagset) { + blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_free_tag_set(&ctrl->tag_set); } + /* remove from rport list */ + spin_lock_irqsave(&ctrl->rport->lock, flags); + list_del(&ctrl->ctrl_list); + spin_unlock_irqrestore(&ctrl->rport->lock, flags); + + blk_cleanup_queue(ctrl->ctrl.admin_q); + blk_mq_free_tag_set(&ctrl->admin_tag_set); + + kfree(ctrl->queues); + put_device(ctrl->dev); nvme_fc_rport_put(ctrl->rport); - kfree(ctrl->queues); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); - nvmf_free_options(ctrl->ctrl.opts); + if (ctrl->ctrl.opts) + nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); } @@ -1574,57 +1738,38 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) * controller. Called after last nvme_put_ctrl() call */ static void -nvme_fc_free_nvme_ctrl(struct nvme_ctrl *nctrl) +nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); WARN_ON(nctrl != &ctrl->ctrl); - /* - * Tear down the association, which will generate link - * traffic to terminate connections - */ - - if (ctrl->state != FCCTRL_INIT) { - /* send a Disconnect(association) LS to fc-nvme target */ - nvme_fc_xmt_disconnect_assoc(ctrl); - - if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_fc_delete_hw_io_queues(ctrl); - nvme_fc_free_io_queues(ctrl); - } - - nvme_fc_exit_aen_ops(ctrl); - - nvme_fc_destroy_admin_queue(ctrl); - } - nvme_fc_ctrl_put(ctrl); } - -static int -__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) +static void +nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) { - int state; + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: transport association error detected: %s\n", + ctrl->cnum, errmsg); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: resetting controller\n", ctrl->cnum); - state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); - if (state != FCPOP_STATE_ACTIVE) { - atomic_set(&op->state, state); - return -ECANCELED; /* fail */ + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: error_recovery: Couldn't change state " + "to RECONNECTING\n", ctrl->cnum); + return; } - ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, - &ctrl->rport->remoteport, - op->queue->lldd_handle, - &op->fcp_req); - - return 0; + if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: error_recovery: Failed to schedule " + "reset work\n", ctrl->cnum); } -enum blk_eh_timer_return +static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq, bool reserved) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); @@ -1640,11 +1785,13 @@ nvme_fc_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; /* - * TODO: force a controller reset - * when that happens, queues will be torn down and outstanding - * ios will be terminated, and the above abort, on a single io - * will no longer be needed. + * we can't individually ABTS an io without affecting the queue, + * thus killing the queue, adn thus the association. + * So resolve by performing a controller reset, which will stop + * the host/io stack, terminate the association on the link, + * and recreate an association on the link. */ + nvme_fc_error_recovery(ctrl, "io timeout error"); return BLK_EH_HANDLED; } @@ -1738,6 +1885,13 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, u32 csn; int ret; + /* + * before attempting to send the io, check to see if we believe + * the target device is present + */ + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) + return BLK_MQ_RQ_QUEUE_ERROR; + if (!nvme_fc_ctrl_get(ctrl)) return BLK_MQ_RQ_QUEUE_ERROR; @@ -1761,7 +1915,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, op->fcp_req.io_dir = io_dir; op->fcp_req.transferred_length = 0; op->fcp_req.rcv_rsplen = 0; - op->fcp_req.status = 0; + op->fcp_req.status = NVME_SC_SUCCESS; op->fcp_req.sqid = cpu_to_le16(queue->qnum); /* @@ -1782,14 +1936,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); sqe->rw.dptr.sgl.addr = 0; - /* odd that we set the command_id - should come from nvme-fabrics */ - WARN_ON_ONCE(sqe->common.command_id != cpu_to_le16(op->rqno)); - - if (op->rq) { /* skipped on aens */ + if (!(op->flags & FCOP_FLAGS_AEN)) { ret = nvme_fc_map_data(ctrl, op->rq, op); if (ret < 0) { - dev_err(queue->ctrl->ctrl.device, - "Failed to map data (%d)\n", ret); nvme_cleanup_cmd(op->rq); nvme_fc_ctrl_put(ctrl); return (ret == -ENOMEM || ret == -EAGAIN) ? @@ -1802,7 +1951,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, atomic_set(&op->state, FCPOP_STATE_ACTIVE); - if (op->rq) + if (!(op->flags & FCOP_FLAGS_AEN)) blk_mq_start_request(op->rq); ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, @@ -1810,9 +1959,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, queue->lldd_handle, &op->fcp_req); if (ret) { - dev_err(ctrl->dev, - "Send nvme command failed - lldd returned %d.\n", ret); - if (op->rq) { /* normal request */ nvme_fc_unmap_data(ctrl, op->rq, op); nvme_cleanup_cmd(op->rq); @@ -1882,12 +2028,8 @@ nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) struct nvme_fc_fcp_op *op; req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); - if (!req) { - dev_err(queue->ctrl->ctrl.device, - "tag 0x%x on QNum %#x not found\n", - tag, queue->qnum); + if (!req) return 0; - } op = blk_mq_rq_to_pdu(req); @@ -1904,11 +2046,21 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); struct nvme_fc_fcp_op *aen_op; + unsigned long flags; + bool terminating = false; int ret; if (aer_idx > NVME_FC_NR_AEN_COMMANDS) return; + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + terminating = true; + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (terminating) + return; + aen_op = &ctrl->aen_ops[aer_idx]; ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, @@ -1919,36 +2071,101 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) } static void -nvme_fc_complete_rq(struct request *rq) +__nvme_fc_final_op_cleanup(struct request *rq) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; - int error = 0, state; - state = atomic_xchg(&op->state, FCPOP_STATE_IDLE); + atomic_set(&op->state, FCPOP_STATE_IDLE); + op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | + FCOP_FLAGS_COMPLETE); nvme_cleanup_cmd(rq); - nvme_fc_unmap_data(ctrl, rq, op); + nvme_complete_rq(rq); + nvme_fc_ctrl_put(ctrl); - if (unlikely(rq->errors)) { - if (nvme_req_needs_retry(rq, rq->errors)) { - nvme_requeue_req(rq); - return; - } +} - if (blk_rq_is_passthrough(rq)) - error = rq->errors; - else - error = nvme_error_status(rq->errors); +static void +nvme_fc_complete_rq(struct request *rq) +{ + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fc_ctrl *ctrl = op->ctrl; + unsigned long flags; + bool completed = false; + + /* + * the core layer, on controller resets after calling + * nvme_shutdown_ctrl(), calls complete_rq without our + * calling blk_mq_complete_request(), thus there may still + * be live i/o outstanding with the LLDD. Means transport has + * to track complete calls vs fcpio_done calls to know what + * path to take on completes and dones. + */ + spin_lock_irqsave(&ctrl->lock, flags); + if (op->flags & FCOP_FLAGS_COMPLETE) + completed = true; + else + op->flags |= FCOP_FLAGS_RELEASED; + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (completed) + __nvme_fc_final_op_cleanup(rq); +} + +/* + * This routine is used by the transport when it needs to find active + * io on a queue that is to be terminated. The transport uses + * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke + * this routine to kill them on a 1 by 1 basis. + * + * As FC allocates FC exchange for each io, the transport must contact + * the LLDD to terminate the exchange, thus releasing the FC exchange. + * After terminating the exchange the LLDD will call the transport's + * normal io done path for the request, but it will have an aborted + * status. The done path will return the io request back to the block + * layer with an error status. + */ +static void +nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +{ + struct nvme_ctrl *nctrl = data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); + unsigned long flags; + int status; + + if (!blk_mq_request_started(req)) + return; + + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) { + ctrl->iocnt++; + op->flags |= FCOP_FLAGS_TERMIO; } + spin_unlock_irqrestore(&ctrl->lock, flags); - nvme_fc_ctrl_put(ctrl); + status = __nvme_fc_abort_op(ctrl, op); + if (status) { + /* + * if __nvme_fc_abort_op failed the io wasn't + * active. Thus this call path is running in + * parallel to the io complete. Treat as non-error. + */ - blk_mq_end_request(rq, error); + /* back out the flags/counters */ + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; + op->flags &= ~FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + return; + } } -static struct blk_mq_ops nvme_fc_mq_ops = { + +static const struct blk_mq_ops nvme_fc_mq_ops = { .queue_rq = nvme_fc_queue_rq, .complete = nvme_fc_complete_rq, .init_request = nvme_fc_init_request, @@ -1959,145 +2176,275 @@ static struct blk_mq_ops nvme_fc_mq_ops = { .timeout = nvme_fc_timeout, }; -static struct blk_mq_ops nvme_fc_admin_mq_ops = { - .queue_rq = nvme_fc_queue_rq, - .complete = nvme_fc_complete_rq, - .init_request = nvme_fc_init_admin_request, - .exit_request = nvme_fc_exit_request, - .reinit_request = nvme_fc_reinit_request, - .init_hctx = nvme_fc_init_admin_hctx, - .timeout = nvme_fc_timeout, -}; - static int -nvme_fc_configure_admin_queue(struct nvme_fc_ctrl *ctrl) +nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) { - u32 segs; - int error; + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + int ret; - nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); + ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + if (ret) { + dev_info(ctrl->ctrl.device, + "set_queue_count failed: %d\n", ret); + return ret; + } - error = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], - NVME_FC_AQ_BLKMQ_DEPTH, - (NVME_FC_AQ_BLKMQ_DEPTH / 4)); - if (error) - return error; + ctrl->queue_count = opts->nr_io_queues + 1; + if (!opts->nr_io_queues) + return 0; - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; - ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + + dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", + opts->nr_io_queues); + + nvme_fc_init_io_queues(ctrl); + + memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); + ctrl->tag_set.ops = &nvme_fc_mq_ops; + ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; + ctrl->tag_set.reserved_tags = 1; /* fabric connect */ + ctrl->tag_set.numa_node = NUMA_NO_NODE; + ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + (SG_CHUNK_SIZE * sizeof(struct scatterlist)) + ctrl->lport->ops->fcprqst_priv_sz; - ctrl->admin_tag_set.driver_data = ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; + ctrl->tag_set.driver_data = ctrl; + ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; + ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); - if (error) - goto out_free_queue; + ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + if (ret) + return ret; - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_free_tagset; + ctrl->ctrl.tagset = &ctrl->tag_set; + + ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); + if (IS_ERR(ctrl->ctrl.connect_q)) { + ret = PTR_ERR(ctrl->ctrl.connect_q); + goto out_free_tag_set; + } + + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_cleanup_blk_queue; + + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_delete_hw_queues; + + return 0; + +out_delete_hw_queues: + nvme_fc_delete_hw_io_queues(ctrl); +out_cleanup_blk_queue: + nvme_stop_keep_alive(&ctrl->ctrl); + blk_cleanup_queue(ctrl->ctrl.connect_q); +out_free_tag_set: + blk_mq_free_tag_set(&ctrl->tag_set); + nvme_fc_free_io_queues(ctrl); + + /* force put free routine to ignore io queues */ + ctrl->ctrl.tagset = NULL; + + return ret; +} + +static int +nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + int ret; + + ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + if (ret) { + dev_info(ctrl->ctrl.device, + "set_queue_count failed: %d\n", ret); + return ret; } - error = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, + /* check for io queues existing */ + if (ctrl->queue_count == 1) + return 0; + + dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n", + opts->nr_io_queues); + + nvme_fc_init_io_queues(ctrl); + + ret = blk_mq_reinit_tagset(&ctrl->tag_set); + if (ret) + goto out_free_io_queues; + + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_free_io_queues; + + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_delete_hw_queues; + + return 0; + +out_delete_hw_queues: + nvme_fc_delete_hw_io_queues(ctrl); +out_free_io_queues: + nvme_fc_free_io_queues(ctrl); + return ret; +} + +/* + * This routine restarts the controller on the host side, and + * on the link side, recreates the controller association. + */ +static int +nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + u32 segs; + int ret; + bool changed; + + ctrl->connect_attempts++; + + /* + * Create the admin queue + */ + + nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); + + ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, NVME_FC_AQ_BLKMQ_DEPTH); - if (error) - goto out_cleanup_queue; + if (ret) + goto out_free_queue; - error = nvmf_connect_admin_queue(&ctrl->ctrl); - if (error) + ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], + NVME_FC_AQ_BLKMQ_DEPTH, + (NVME_FC_AQ_BLKMQ_DEPTH / 4)); + if (ret) goto out_delete_hw_queue; - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); - if (error) { + if (ctrl->ctrl.state != NVME_CTRL_NEW) + blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); + + ret = nvmf_connect_admin_queue(&ctrl->ctrl); + if (ret) + goto out_disconnect_admin_queue; + + /* + * Check controller capabilities + * + * todo:- add code to check if ctrl attributes changed from + * prior connection values + */ + + ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); + if (ret) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); - goto out_delete_hw_queue; + goto out_disconnect_admin_queue; } ctrl->ctrl.sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); - if (error) - goto out_delete_hw_queue; + ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + if (ret) + goto out_disconnect_admin_queue; segs = min_t(u32, NVME_FC_MAX_SEGMENTS, ctrl->lport->ops->max_sgl_segments); ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); - error = nvme_init_identify(&ctrl->ctrl); - if (error) - goto out_delete_hw_queue; + ret = nvme_init_identify(&ctrl->ctrl); + if (ret) + goto out_disconnect_admin_queue; + + /* sanity checks */ + + /* FC-NVME does not have other data in the capsule */ + if (ctrl->ctrl.icdoff) { + dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", + ctrl->ctrl.icdoff); + goto out_disconnect_admin_queue; + } nvme_start_keep_alive(&ctrl->ctrl); - return 0; + /* FC-NVME supports normal SGL Data Block Descriptors */ + if (opts->queue_size > ctrl->ctrl.maxcmd) { + /* warn if maxcmd is lower than queue_size */ + dev_warn(ctrl->ctrl.device, + "queue_size %zu > ctrl maxcmd %u, reducing " + "to queue_size\n", + opts->queue_size, ctrl->ctrl.maxcmd); + opts->queue_size = ctrl->ctrl.maxcmd; + } + + ret = nvme_fc_init_aen_ops(ctrl); + if (ret) + goto out_term_aen_ops; + + /* + * Create the io queues + */ + + if (ctrl->queue_count > 1) { + if (ctrl->ctrl.state == NVME_CTRL_NEW) + ret = nvme_fc_create_io_queues(ctrl); + else + ret = nvme_fc_reinit_io_queues(ctrl); + if (ret) + goto out_term_aen_ops; + } + + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + WARN_ON_ONCE(!changed); + + ctrl->connect_attempts = 0; + + kref_get(&ctrl->ctrl.kref); + + if (ctrl->queue_count > 1) { + nvme_start_queues(&ctrl->ctrl); + nvme_queue_scan(&ctrl->ctrl); + nvme_queue_async_events(&ctrl->ctrl); + } + + return 0; /* Success */ + +out_term_aen_ops: + nvme_fc_term_aen_ops(ctrl); + nvme_stop_keep_alive(&ctrl->ctrl); +out_disconnect_admin_queue: + /* send a Disconnect(association) LS to fc-nvme target */ + nvme_fc_xmt_disconnect_assoc(ctrl); out_delete_hw_queue: __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); -out_cleanup_queue: - blk_cleanup_queue(ctrl->ctrl.admin_q); -out_free_tagset: - blk_mq_free_tag_set(&ctrl->admin_tag_set); out_free_queue: nvme_fc_free_queue(&ctrl->queues[0]); - return error; + + return ret; } /* - * This routine is used by the transport when it needs to find active - * io on a queue that is to be terminated. The transport uses - * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke - * this routine to kill them on a 1 by 1 basis. - * - * As FC allocates FC exchange for each io, the transport must contact - * the LLDD to terminate the exchange, thus releasing the FC exchange. - * After terminating the exchange the LLDD will call the transport's - * normal io done path for the request, but it will have an aborted - * status. The done path will return the io request back to the block - * layer with an error status. + * This routine stops operation of the controller on the host side. + * On the host os stack side: Admin and IO queues are stopped, + * outstanding ios on them terminated via FC ABTS. + * On the link side: the association is terminated. */ static void -nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) { - struct nvme_ctrl *nctrl = data; - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); -int status; - - if (!blk_mq_request_started(req)) - return; + unsigned long flags; - /* this performs an ABTS-LS on the FC exchange for the io */ - status = __nvme_fc_abort_op(ctrl, op); - /* - * if __nvme_fc_abort_op failed: io wasn't active to abort - * consider it done. Assume completion path already completing - * in parallel - */ - if (status) - /* io wasn't active to abort consider it done */ - /* assume completion path already completing in parallel */ - return; -} + nvme_stop_keep_alive(&ctrl->ctrl); + spin_lock_irqsave(&ctrl->lock, flags); + ctrl->flags |= FCCTRL_TERMIO; + ctrl->iocnt = 0; + spin_unlock_irqrestore(&ctrl->lock, flags); -/* - * This routine stops operation of the controller. Admin and IO queues - * are stopped, outstanding ios on them terminated, and the nvme ctrl - * is shutdown. - */ -static void -nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl) -{ /* * If io queues are present, stop them and terminate all outstanding * ios on them. As FC allocates FC exchange for each io, the @@ -2116,35 +2463,79 @@ nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl) nvme_fc_terminate_exchange, &ctrl->ctrl); } - if (ctrl->ctrl.state == NVME_CTRL_LIVE) - nvme_shutdown_ctrl(&ctrl->ctrl); + /* + * Other transports, which don't have link-level contexts bound + * to sqe's, would try to gracefully shutdown the controller by + * writing the registers for shutdown and polling (call + * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially + * just aborted and we will wait on those contexts, and given + * there was no indication of how live the controlelr is on the + * link, don't send more io to create more contexts for the + * shutdown. Let the controller fail via keepalive failure if + * its still present. + */ /* - * now clean up the admin queue. Same thing as above. + * clean up the admin queue. Same thing as above. * use blk_mq_tagset_busy_itr() and the transport routine to * terminate the exchanges. */ blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); + + /* kill the aens as they are a separate path */ + nvme_fc_abort_aen_ops(ctrl); + + /* wait for all io that had to be aborted */ + spin_lock_irqsave(&ctrl->lock, flags); + while (ctrl->iocnt) { + spin_unlock_irqrestore(&ctrl->lock, flags); + msleep(1000); + spin_lock_irqsave(&ctrl->lock, flags); + } + ctrl->flags &= ~FCCTRL_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + + nvme_fc_term_aen_ops(ctrl); + + /* + * send a Disconnect(association) LS to fc-nvme target + * Note: could have been sent at top of process, but + * cleaner on link traffic if after the aborts complete. + * Note: if association doesn't exist, association_id will be 0 + */ + if (ctrl->association_id) + nvme_fc_xmt_disconnect_assoc(ctrl); + + if (ctrl->ctrl.tagset) { + nvme_fc_delete_hw_io_queues(ctrl); + nvme_fc_free_io_queues(ctrl); + } + + __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); + nvme_fc_free_queue(&ctrl->queues[0]); } -/* - * Called to teardown an association. - * May be called with association fully in place or partially in place. - */ static void -__nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl) +nvme_fc_delete_ctrl_work(struct work_struct *work) { - nvme_stop_keep_alive(&ctrl->ctrl); + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, delete_work); - /* stop and terminate ios on admin and io queues */ - nvme_fc_shutdown_ctrl(ctrl); + cancel_work_sync(&ctrl->reset_work); + cancel_delayed_work_sync(&ctrl->connect_work); + + /* + * kill the association on the link side. this will block + * waiting for io to terminate + */ + nvme_fc_delete_association(ctrl); /* * tear down the controller * This will result in the last reference on the nvme ctrl to - * expire, calling the transport nvme_fc_free_nvme_ctrl() callback. + * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback. * From there, the transport will tear down it's logical queues and * association. */ @@ -2153,15 +2544,6 @@ __nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl) nvme_put_ctrl(&ctrl->ctrl); } -static void -nvme_fc_del_ctrl_work(struct work_struct *work) -{ - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, delete_work); - - __nvme_fc_remove_ctrl(ctrl); -} - static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) { @@ -2181,25 +2563,85 @@ static int nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - struct nvme_fc_rport *rport = ctrl->rport; - unsigned long flags; int ret; - spin_lock_irqsave(&rport->lock, flags); + if (!kref_get_unless_zero(&ctrl->ctrl.kref)) + return -EBUSY; + ret = __nvme_fc_del_ctrl(ctrl); - spin_unlock_irqrestore(&rport->lock, flags); - if (ret) - return ret; - flush_work(&ctrl->delete_work); + if (!ret) + flush_workqueue(nvme_fc_wq); - return 0; + nvme_put_ctrl(&ctrl->ctrl); + + return ret; +} + +static void +nvme_fc_reset_ctrl_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, reset_work); + int ret; + + /* will block will waiting for io to terminate */ + nvme_fc_delete_association(ctrl); + + ret = nvme_fc_create_association(ctrl); + if (ret) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", + ctrl->cnum, ret); + if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts (%d) " + "reached. Removing controller\n", + ctrl->cnum, ctrl->connect_attempts); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, + NVME_CTRL_DELETING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to change state " + "to DELETING\n", ctrl->cnum); + return; + } + + WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); + return; + } + + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", + ctrl->cnum, ctrl->reconnect_delay); + queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, + ctrl->reconnect_delay * HZ); + } else + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); } +/* + * called by the nvme core layer, for sysfs interface that requests + * a reset of the nvme controller + */ static int nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) { - return -EIO; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) + return -EBUSY; + + if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) + return -EBUSY; + + flush_work(&ctrl->reset_work); + + return 0; } static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { @@ -2210,95 +2652,75 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, .reset_ctrl = nvme_fc_reset_nvme_ctrl, - .free_ctrl = nvme_fc_free_nvme_ctrl, + .free_ctrl = nvme_fc_nvme_ctrl_freed, .submit_async_event = nvme_fc_submit_async_event, .delete_ctrl = nvme_fc_del_nvme_ctrl, .get_subsysnqn = nvmf_get_subsysnqn, .get_address = nvmf_get_address, }; -static int -nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) +static void +nvme_fc_connect_ctrl_work(struct work_struct *work) { - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; int ret; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); - if (ret) { - dev_info(ctrl->ctrl.device, - "set_queue_count failed: %d\n", ret); - return ret; - } - - ctrl->queue_count = opts->nr_io_queues + 1; - if (!opts->nr_io_queues) - return 0; - - dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", - opts->nr_io_queues); - - nvme_fc_init_io_queues(ctrl); - - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_fc_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ - ctrl->tag_set.numa_node = NUMA_NO_NODE; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; - ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); - if (ret) - return ret; - - ctrl->ctrl.tagset = &ctrl->tag_set; - - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); - goto out_free_tag_set; - } - - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); - if (ret) - goto out_cleanup_blk_queue; + struct nvme_fc_ctrl *ctrl = + container_of(to_delayed_work(work), + struct nvme_fc_ctrl, connect_work); - ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); - if (ret) - goto out_delete_hw_queues; + ret = nvme_fc_create_association(ctrl); + if (ret) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt failed (%d)\n", + ctrl->cnum, ret); + if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts (%d) " + "reached. Removing controller\n", + ctrl->cnum, ctrl->connect_attempts); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, + NVME_CTRL_DELETING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to change state " + "to DELETING\n", ctrl->cnum); + return; + } - return 0; + WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); + return; + } -out_delete_hw_queues: - nvme_fc_delete_hw_io_queues(ctrl); -out_cleanup_blk_queue: - nvme_stop_keep_alive(&ctrl->ctrl); - blk_cleanup_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_fc_free_io_queues(ctrl); + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", + ctrl->cnum, ctrl->reconnect_delay); + queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, + ctrl->reconnect_delay * HZ); + } else + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller reconnect complete\n", + ctrl->cnum); +} - /* force put free routine to ignore io queues */ - ctrl->ctrl.tagset = NULL; - return ret; -} +static const struct blk_mq_ops nvme_fc_admin_mq_ops = { + .queue_rq = nvme_fc_queue_rq, + .complete = nvme_fc_complete_rq, + .init_request = nvme_fc_init_admin_request, + .exit_request = nvme_fc_exit_request, + .reinit_request = nvme_fc_reinit_request, + .init_hctx = nvme_fc_init_admin_hctx, + .timeout = nvme_fc_timeout, +}; static struct nvme_ctrl * -__nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, +nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) { struct nvme_fc_ctrl *ctrl; unsigned long flags; int ret, idx; - bool changed; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) { @@ -2314,21 +2736,18 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->ctrl_list); - INIT_LIST_HEAD(&ctrl->ls_req_list); ctrl->lport = lport; ctrl->rport = rport; ctrl->dev = lport->dev; - ctrl->state = FCCTRL_INIT; ctrl->cnum = idx; - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); - if (ret) - goto out_free_ida; - get_device(ctrl->dev); kref_init(&ctrl->ref); - INIT_WORK(&ctrl->delete_work, nvme_fc_del_ctrl_work); + INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); + INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work); + INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); + ctrl->reconnect_delay = opts->reconnect_delay; spin_lock_init(&ctrl->lock); /* io queue count */ @@ -2345,87 +2764,87 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue), GFP_KERNEL); if (!ctrl->queues) - goto out_uninit_ctrl; - - ret = nvme_fc_configure_admin_queue(ctrl); - if (ret) - goto out_uninit_ctrl; - - /* sanity checks */ - - /* FC-NVME does not have other data in the capsule */ - if (ctrl->ctrl.icdoff) { - dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", - ctrl->ctrl.icdoff); - goto out_remove_admin_queue; - } - - /* FC-NVME supports normal SGL Data Block Descriptors */ + goto out_free_ida; - if (opts->queue_size > ctrl->ctrl.maxcmd) { - /* warn if maxcmd is lower than queue_size */ - dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl maxcmd %u, reducing " - "to queue_size\n", - opts->queue_size, ctrl->ctrl.maxcmd); - opts->queue_size = ctrl->ctrl.maxcmd; - } + memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); + ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; + ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; + ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ + ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; + ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + + (SG_CHUNK_SIZE * + sizeof(struct scatterlist)) + + ctrl->lport->ops->fcprqst_priv_sz; + ctrl->admin_tag_set.driver_data = ctrl; + ctrl->admin_tag_set.nr_hw_queues = 1; + ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; - ret = nvme_fc_init_aen_ops(ctrl); + ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (ret) - goto out_exit_aen_ops; + goto out_free_queues; - if (ctrl->queue_count > 1) { - ret = nvme_fc_create_io_queues(ctrl); - if (ret) - goto out_exit_aen_ops; + ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); + if (IS_ERR(ctrl->ctrl.admin_q)) { + ret = PTR_ERR(ctrl->ctrl.admin_q); + goto out_free_admin_tag_set; } - spin_lock_irqsave(&ctrl->lock, flags); - ctrl->state = FCCTRL_ACTIVE; - spin_unlock_irqrestore(&ctrl->lock, flags); - - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - WARN_ON_ONCE(!changed); + /* + * Would have been nice to init io queues tag set as well. + * However, we require interaction from the controller + * for max io queue count before we can do so. + * Defer this to the connect path. + */ - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", - ctrl->cnum, ctrl->ctrl.opts->subsysnqn); + ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); + if (ret) + goto out_cleanup_admin_q; - kref_get(&ctrl->ctrl.kref); + /* at this point, teardown path changes to ref counting on nvme ctrl */ spin_lock_irqsave(&rport->lock, flags); list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - if (opts->nr_io_queues) { - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); + ret = nvme_fc_create_association(ctrl); + if (ret) { + ctrl->ctrl.opts = NULL; + /* initiate nvme ctrl ref counting teardown */ + nvme_uninit_ctrl(&ctrl->ctrl); + nvme_put_ctrl(&ctrl->ctrl); + + /* as we're past the point where we transition to the ref + * counting teardown path, if we return a bad pointer here, + * the calling routine, thinking it's prior to the + * transition, will do an rport put. Since the teardown + * path also does a rport put, we do an extra get here to + * so proper order/teardown happens. + */ + nvme_fc_rport_get(rport); + + if (ret > 0) + ret = -EIO; + return ERR_PTR(ret); } - return &ctrl->ctrl; + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", + ctrl->cnum, ctrl->ctrl.opts->subsysnqn); -out_exit_aen_ops: - nvme_fc_exit_aen_ops(ctrl); -out_remove_admin_queue: - /* send a Disconnect(association) LS to fc-nvme target */ - nvme_fc_xmt_disconnect_assoc(ctrl); - nvme_stop_keep_alive(&ctrl->ctrl); - nvme_fc_destroy_admin_queue(ctrl); -out_uninit_ctrl: - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); - if (ret > 0) - ret = -EIO; - /* exit via here will follow ctlr ref point callbacks to free */ - return ERR_PTR(ret); + return &ctrl->ctrl; +out_cleanup_admin_q: + blk_cleanup_queue(ctrl->ctrl.admin_q); +out_free_admin_tag_set: + blk_mq_free_tag_set(&ctrl->admin_tag_set); +out_free_queues: + kfree(ctrl->queues); out_free_ida: + put_device(ctrl->dev); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); out_free_ctrl: kfree(ctrl); out_fail: - nvme_fc_rport_put(rport); /* exit via here doesn't follow ctlr ref points */ return ERR_PTR(ret); } @@ -2497,6 +2916,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; + struct nvme_ctrl *ctrl; struct nvmet_fc_traddr laddr = { 0L, 0L }; struct nvmet_fc_traddr raddr = { 0L, 0L }; unsigned long flags; @@ -2528,7 +2948,10 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) spin_unlock_irqrestore(&nvme_fc_lock, flags); - return __nvme_fc_create_ctrl(dev, opts, lport, rport); + ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); + if (IS_ERR(ctrl)) + nvme_fc_rport_put(rport); + return ctrl; } } spin_unlock_irqrestore(&nvme_fc_lock, flags); @@ -2546,11 +2969,20 @@ static struct nvmf_transport_ops nvme_fc_transport = { static int __init nvme_fc_init_module(void) { + int ret; + nvme_fc_wq = create_workqueue("nvme_fc_wq"); if (!nvme_fc_wq) return -ENOMEM; - return nvmf_register_transport(&nvme_fc_transport); + ret = nvmf_register_transport(&nvme_fc_transport); + if (ret) + goto err; + + return 0; +err: + destroy_workqueue(nvme_fc_wq); + return ret; } static void __exit nvme_fc_exit_module(void) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 21cac8523bd8..e4e4e60b1224 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -241,9 +241,9 @@ static inline void _nvme_nvm_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); - BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128); + BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096); - BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512); + BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); } static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) @@ -324,7 +324,7 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, - sizeof(struct nvme_nvm_addr_format)); + sizeof(struct nvm_addr_format)); ret = init_grps(nvm_id, nvme_nvm_id); out: @@ -483,8 +483,8 @@ static void nvme_nvm_end_io(struct request *rq, int error) { struct nvm_rq *rqd = rq->end_io_data; - rqd->ppa_status = nvme_req(rq)->result.u64; - rqd->error = error; + rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64); + rqd->error = nvme_req(rq)->status; nvm_end_io(rqd); kfree(nvme_req(rq)->cmd); @@ -510,12 +510,12 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) } rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; - rq->ioprio = bio_prio(bio); - if (bio_has_data(bio)) - rq->nr_phys_segments = bio_phys_segments(q, bio); - - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; + if (bio) { + blk_init_request_from_bio(rq, bio); + } else { + rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); + rq->__data_len = 0; + } nvme_nvm_rqtocmd(rq, rqd, ns, cmd); @@ -526,21 +526,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) return 0; } -static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd) -{ - struct request_queue *q = dev->q; - struct nvme_ns *ns = q->queuedata; - struct nvme_nvm_command c = {}; - - c.erase.opcode = NVM_OP_ERASE; - c.erase.nsid = cpu_to_le32(ns->ns_id); - c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa); - c.erase.length = cpu_to_le16(rqd->nr_ppas - 1); - c.erase.control = cpu_to_le16(rqd->flags); - - return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0); -} - static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name) { struct nvme_ns *ns = nvmdev->q->queuedata; @@ -576,7 +561,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .set_bb_tbl = nvme_nvm_set_bb_tbl, .submit_io = nvme_nvm_submit_io, - .erase_block = nvme_nvm_erase_block, .create_dma_pool = nvme_nvm_create_dma_pool, .destroy_dma_pool = nvme_nvm_destroy_dma_pool, @@ -611,7 +595,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q, __le64 *metadata = NULL; dma_addr_t metadata_dma; DECLARE_COMPLETION_ONSTACK(wait); - int ret; + int ret = 0; rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0, NVME_QID_ANY); @@ -681,9 +665,12 @@ submit: wait_for_completion_io(&wait); - ret = nvme_error_status(rq->errors); + if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else if (nvme_req(rq)->status & 0x7ff) + ret = -EIO; if (result) - *result = rq->errors & 0x7ff; + *result = nvme_req(rq)->status & 0x7ff; if (status) *status = le64_to_cpu(nvme_req(rq)->result.u64); @@ -766,7 +753,7 @@ static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3); /* cdw11-12 */ c.ph_rw.length = cpu_to_le16(vcmd.nppas); - c.ph_rw.control = cpu_to_le32(vcmd.control); + c.ph_rw.control = cpu_to_le16(vcmd.control); c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13); c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14); c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15); @@ -809,6 +796,8 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) struct request_queue *q = ns->queue; struct nvm_dev *dev; + _nvme_nvm_check_size(); + dev = nvm_alloc_dev(node); if (!dev) return -ENOMEM; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2aa20e3e5675..29c708ca9621 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -21,16 +21,6 @@ #include <linux/lightnvm.h> #include <linux/sed-opal.h> -enum { - /* - * Driver internal status code for commands that were cancelled due - * to timeouts or controller shutdown. The value is negative so - * that it a) doesn't overlap with the unsigned hardware error codes, - * and b) can easily be tested for. - */ - NVME_SC_CANCELLED = -EINTR, -}; - extern unsigned char nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -43,8 +33,6 @@ extern unsigned char shutdown_timeout; #define NVME_DEFAULT_KATO 5 #define NVME_KATO_GRACE 10 -extern unsigned int nvme_max_retries; - enum { NVME_NS_LBA = 0, NVME_NS_LIGHTNVM = 1, @@ -68,10 +56,10 @@ enum nvme_quirks { NVME_QUIRK_IDENTIFY_CNS = (1 << 1), /* - * The controller deterministically returns O's on reads to discarded - * logical blocks. + * The controller deterministically returns O's on reads to + * logical blocks that deallocate was called on. */ - NVME_QUIRK_DISCARD_ZEROES = (1 << 2), + NVME_QUIRK_DEALLOCATE_ZEROES = (1 << 2), /* * The controller needs a delay before starts checking the device @@ -83,6 +71,11 @@ enum nvme_quirks { * APST should not be used. */ NVME_QUIRK_NO_APST = (1 << 4), + + /* + * The deepest sleep state should not be used. + */ + NVME_QUIRK_NO_DEEPEST_PS = (1 << 5), }; /* @@ -92,6 +85,13 @@ enum nvme_quirks { struct nvme_request { struct nvme_command *cmd; union nvme_result result; + u8 retries; + u8 flags; + u16 status; +}; + +enum { + NVME_REQ_CANCELLED = (1 << 0), }; static inline struct nvme_request *nvme_req(struct request *req) @@ -249,25 +249,17 @@ static inline void nvme_cleanup_cmd(struct request *req) } } -static inline int nvme_error_status(u16 status) +static inline void nvme_end_request(struct request *req, __le16 status, + union nvme_result result) { - switch (status & 0x7ff) { - case NVME_SC_SUCCESS: - return 0; - case NVME_SC_CAP_EXCEEDED: - return -ENOSPC; - default: - return -EIO; - } -} + struct nvme_request *rq = nvme_req(req); -static inline bool nvme_req_needs_retry(struct request *req, u16 status) -{ - return !(status & NVME_SC_DNR || blk_noretry_request(req)) && - (jiffies - req->start_time) < req->timeout && - req->retries < nvme_max_retries; + rq->status = le16_to_cpu(status) >> 1; + rq->result = result; + blk_mq_complete_request(req); } +void nvme_complete_rq(struct request *req); void nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); @@ -302,7 +294,6 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid); -void nvme_requeue_req(struct request *req); int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, struct nvme_command *cmd); int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 26a5fd05fe88..c8541c3dcd19 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -19,6 +19,7 @@ #include <linux/blk-mq-pci.h> #include <linux/cpu.h> #include <linux/delay.h> +#include <linux/dmi.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/genhd.h> @@ -103,8 +104,22 @@ struct nvme_dev { u32 cmbloc; struct nvme_ctrl ctrl; struct completion ioq_wait; + u32 *dbbuf_dbs; + dma_addr_t dbbuf_dbs_dma_addr; + u32 *dbbuf_eis; + dma_addr_t dbbuf_eis_dma_addr; }; +static inline unsigned int sq_idx(unsigned int qid, u32 stride) +{ + return qid * 2 * stride; +} + +static inline unsigned int cq_idx(unsigned int qid, u32 stride) +{ + return (qid * 2 + 1) * stride; +} + static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) { return container_of(ctrl, struct nvme_dev, ctrl); @@ -133,6 +148,10 @@ struct nvme_queue { u16 qid; u8 cq_phase; u8 cqe_seen; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; }; /* @@ -171,6 +190,112 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); + BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); +} + +static inline unsigned int nvme_dbbuf_size(u32 stride) +{ + return ((num_possible_cpus() + 1) * 8 * stride); +} + +static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) +{ + unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + + if (dev->dbbuf_dbs) + return 0; + + dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size, + &dev->dbbuf_dbs_dma_addr, + GFP_KERNEL); + if (!dev->dbbuf_dbs) + return -ENOMEM; + dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size, + &dev->dbbuf_eis_dma_addr, + GFP_KERNEL); + if (!dev->dbbuf_eis) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; + return -ENOMEM; + } + + return 0; +} + +static void nvme_dbbuf_dma_free(struct nvme_dev *dev) +{ + unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + + if (dev->dbbuf_dbs) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; + } + if (dev->dbbuf_eis) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_eis, dev->dbbuf_eis_dma_addr); + dev->dbbuf_eis = NULL; + } +} + +static void nvme_dbbuf_init(struct nvme_dev *dev, + struct nvme_queue *nvmeq, int qid) +{ + if (!dev->dbbuf_dbs || !qid) + return; + + nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)]; +} + +static void nvme_dbbuf_set(struct nvme_dev *dev) +{ + struct nvme_command c; + + if (!dev->dbbuf_dbs) + return; + + memset(&c, 0, sizeof(c)); + c.dbbuf.opcode = nvme_admin_dbbuf; + c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr); + c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr); + + if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) { + dev_warn(dev->dev, "unable to set dbbuf\n"); + /* Free memory and continue on */ + nvme_dbbuf_dma_free(dev); + } +} + +static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old) +{ + return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old); +} + +/* Update dbbuf and return true if an MMIO is required */ +static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, + volatile u32 *dbbuf_ei) +{ + if (dbbuf_db) { + u16 old_value; + + /* + * Ensure that the queue is written before updating + * the doorbell in memory + */ + wmb(); + + old_value = *dbbuf_db; + *dbbuf_db = value; + + if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) + return false; + } + + return true; } /* @@ -297,7 +422,9 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq, if (++tail == nvmeq->q_depth) tail = 0; - writel(tail, nvmeq->q_db); + if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db, + nvmeq->dbbuf_sq_ei)) + writel(tail, nvmeq->q_db); nvmeq->sq_tail = tail; } @@ -326,10 +453,6 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev) iod->nents = 0; iod->length = size; - if (!(rq->rq_flags & RQF_DONTPREP)) { - rq->retries = 0; - rq->rq_flags |= RQF_DONTPREP; - } return BLK_MQ_RQ_QUEUE_OK; } @@ -628,34 +751,12 @@ out_free_cmd: return ret; } -static void nvme_complete_rq(struct request *req) +static void nvme_pci_complete_rq(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_dev *dev = iod->nvmeq->dev; - int error = 0; - - nvme_unmap_data(dev, req); - if (unlikely(req->errors)) { - if (nvme_req_needs_retry(req, req->errors)) { - req->retries++; - nvme_requeue_req(req); - return; - } - - if (blk_rq_is_passthrough(req)) - error = req->errors; - else - error = nvme_error_status(req->errors); - } - - if (unlikely(iod->aborted)) { - dev_warn(dev->ctrl.device, - "completing aborted command with status: %04x\n", - req->errors); - } - - blk_mq_end_request(req, error); + nvme_unmap_data(iod->nvmeq->dev, req); + nvme_complete_rq(req); } /* We read the CQE phase first to check if the rest of the entry is valid */ @@ -705,15 +806,16 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) } req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); - nvme_req(req)->result = cqe.result; - blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); + nvme_end_request(req, cqe.status, cqe.result); } if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) return; if (likely(nvmeq->cq_vector >= 0)) - writel(head, nvmeq->q_db + nvmeq->dev->db_stride); + if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db, + nvmeq->dbbuf_cq_ei)) + writel(head, nvmeq->q_db + nvmeq->dev->db_stride); nvmeq->cq_head = head; nvmeq->cq_phase = phase; @@ -745,10 +847,8 @@ static irqreturn_t nvme_irq_check(int irq, void *data) return IRQ_NONE; } -static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) { - struct nvme_queue *nvmeq = hctx->driver_data; - if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { spin_lock_irq(&nvmeq->q_lock); __nvme_process_cq(nvmeq, &tag); @@ -761,6 +861,13 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) return 0; } +static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + return __nvme_poll(nvmeq, tag); +} + static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx) { struct nvme_dev *dev = to_nvme_dev(ctrl); @@ -812,7 +919,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq) { struct nvme_command c; - int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; + int flags = NVME_QUEUE_PHYS_CONTIG; /* * Note: we (ab)use the fact the the prp fields survive if no data @@ -843,9 +950,9 @@ static void abort_endio(struct request *req, int error) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = iod->nvmeq; - u16 status = req->errors; - dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status); + dev_warn(nvmeq->dev->ctrl.device, + "Abort status: 0x%x", nvme_req(req)->status); atomic_inc(&nvmeq->dev->ctrl.abort_limit); blk_mq_free_request(req); } @@ -859,6 +966,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_command cmd; /* + * Did we miss an interrupt? + */ + if (__nvme_poll(nvmeq, req->tag)) { + dev_warn(dev->ctrl.device, + "I/O %d QID %d timeout, completion polled\n", + req->tag, nvmeq->qid); + return BLK_EH_HANDLED; + } + + /* * Shutdown immediately if controller times out while starting. The * reset work will see the pci device disabled when it gets the forced * cancellation error. All outstanding requests are completed on @@ -869,7 +986,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); - req->errors = NVME_SC_CANCELLED; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_HANDLED; } @@ -889,7 +1006,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * Mark the request as handled, since the inline shutdown * forces all outstanding requests to complete. */ - req->errors = NVME_SC_CANCELLED; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_HANDLED; } @@ -1097,6 +1214,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); + nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; spin_unlock_irq(&nvmeq->q_lock); } @@ -1129,18 +1247,18 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) return result; } -static struct blk_mq_ops nvme_mq_admin_ops = { +static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, - .complete = nvme_complete_rq, + .complete = nvme_pci_complete_rq, .init_hctx = nvme_admin_init_hctx, .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_admin_init_request, .timeout = nvme_timeout, }; -static struct blk_mq_ops nvme_mq_ops = { +static const struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, - .complete = nvme_complete_rq, + .complete = nvme_pci_complete_rq, .init_hctx = nvme_init_hctx, .init_request = nvme_init_request, .map_queues = nvme_pci_map_queues, @@ -1569,6 +1687,8 @@ static int nvme_dev_add(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->tagset)) return 0; dev->ctrl.tagset = &dev->tagset; + + nvme_dbbuf_set(dev); } else { blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); @@ -1755,6 +1875,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); + nvme_dbbuf_dma_free(dev); put_device(dev->dev); if (dev->tagset.tags) blk_mq_free_tag_set(&dev->tagset); @@ -1822,6 +1943,13 @@ static void nvme_reset_work(struct work_struct *work) dev->ctrl.opal_dev = NULL; } + if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) { + result = nvme_dbbuf_dma_alloc(dev); + if (result) + dev_warn(dev->dev, + "unable to allocate dma for dbbuf\n"); + } + result = nvme_setup_io_queues(dev); if (result) goto out; @@ -1943,10 +2071,31 @@ static int nvme_dev_map(struct nvme_dev *dev) return -ENODEV; } +static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) +{ + if (pdev->vendor == 0x144d && pdev->device == 0xa802) { + /* + * Several Samsung devices seem to drop off the PCIe bus + * randomly when APST is on and uses the deepest sleep state. + * This has been observed on a Samsung "SM951 NVMe SAMSUNG + * 256GB", a "PM951 NVMe SAMSUNG 512GB", and a "Samsung SSD + * 950 PRO 256GB", but it seems to be restricted to two Dell + * laptops. + */ + if (dmi_match(DMI_SYS_VENDOR, "Dell Inc.") && + (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") || + dmi_match(DMI_PRODUCT_NAME, "Precision 5510"))) + return NVME_QUIRK_NO_DEEPEST_PS; + } + + return 0; +} + static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int node, result = -ENOMEM; struct nvme_dev *dev; + unsigned long quirks = id->driver_data; node = dev_to_node(&pdev->dev); if (node == NUMA_NO_NODE) @@ -1978,8 +2127,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto put_pci; + quirks |= check_dell_samsung_bug(pdev); + result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, - id->driver_data); + quirks); if (result) goto release_pools; @@ -2135,13 +2286,13 @@ static const struct pci_error_handlers nvme_err_handler = { static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0x0953), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a53), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a54), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 47a479f26e5d..29cf88ac3f61 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -34,7 +34,7 @@ #include "fabrics.h" -#define NVME_RDMA_CONNECT_TIMEOUT_MS 1000 /* 1 second */ +#define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */ #define NVME_RDMA_MAX_SEGMENT_SIZE 0xffffff /* 24-bit SGL field */ @@ -118,7 +118,6 @@ struct nvme_rdma_ctrl { struct nvme_rdma_qe async_event_sqe; - int reconnect_delay; struct delayed_work reconnect_work; struct list_head list; @@ -129,14 +128,8 @@ struct nvme_rdma_ctrl { u64 cap; u32 max_fr_pages; - union { - struct sockaddr addr; - struct sockaddr_in addr_in; - }; - union { - struct sockaddr src_addr; - struct sockaddr_in src_addr_in; - }; + struct sockaddr_storage addr; + struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; }; @@ -569,11 +562,12 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, return PTR_ERR(queue->cm_id); } - queue->cm_error = -ETIMEDOUT; if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) - src_addr = &ctrl->src_addr; + src_addr = (struct sockaddr *)&ctrl->src_addr; - ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr, + queue->cm_error = -ETIMEDOUT; + ret = rdma_resolve_addr(queue->cm_id, src_addr, + (struct sockaddr *)&ctrl->addr, NVME_RDMA_CONNECT_TIMEOUT_MS); if (ret) { dev_info(ctrl->ctrl.device, @@ -712,6 +706,26 @@ free_ctrl: kfree(ctrl); } +static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) +{ + /* If we are resetting/deleting then do nothing */ + if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { + WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || + ctrl->ctrl.state == NVME_CTRL_LIVE); + return; + } + + if (nvmf_should_reconnect(&ctrl->ctrl)) { + dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n", + ctrl->ctrl.opts->reconnect_delay); + queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, + ctrl->ctrl.opts->reconnect_delay * HZ); + } else { + dev_info(ctrl->ctrl.device, "Removing controller...\n"); + queue_work(nvme_rdma_wq, &ctrl->delete_work); + } +} + static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), @@ -719,6 +733,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) bool changed; int ret; + ++ctrl->ctrl.opts->nr_reconnects; + if (ctrl->queue_count > 1) { nvme_rdma_free_io_queues(ctrl); @@ -763,6 +779,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); + ctrl->ctrl.opts->nr_reconnects = 0; if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); @@ -777,13 +794,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) stop_admin_q: blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); requeue: - /* Make sure we are not resetting/deleting */ - if (ctrl->ctrl.state == NVME_CTRL_RECONNECTING) { - dev_info(ctrl->ctrl.device, - "Failed reconnect attempt, requeueing...\n"); - queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, - ctrl->reconnect_delay * HZ); - } + dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", + ctrl->ctrl.opts->nr_reconnects); + nvme_rdma_reconnect_or_remove(ctrl); } static void nvme_rdma_error_recovery_work(struct work_struct *work) @@ -810,11 +823,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); - dev_info(ctrl->ctrl.device, "reconnecting in %d seconds\n", - ctrl->reconnect_delay); - - queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, - ctrl->reconnect_delay * HZ); + nvme_rdma_reconnect_or_remove(ctrl); } static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) @@ -1169,8 +1178,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, wc->ex.invalidate_rkey == req->mr->rkey) req->mr->need_inval = false; - req->req.result = cqe->result; - blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); + nvme_end_request(rq, cqe->status, cqe->result); return ret; } @@ -1407,7 +1415,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved) nvme_rdma_error_recovery(req->queue->ctrl); /* fail with DNR on cmd timeout */ - rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR; + nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; return BLK_EH_HANDLED; } @@ -1509,27 +1517,12 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) static void nvme_rdma_complete_rq(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_rdma_queue *queue = req->queue; - int error = 0; - - nvme_rdma_unmap_data(queue, rq); - if (unlikely(rq->errors)) { - if (nvme_req_needs_retry(rq, rq->errors)) { - nvme_requeue_req(rq); - return; - } - - if (blk_rq_is_passthrough(rq)) - error = rq->errors; - else - error = nvme_error_status(rq->errors); - } - - blk_mq_end_request(rq, error); + nvme_rdma_unmap_data(req->queue, rq); + nvme_complete_rq(rq); } -static struct blk_mq_ops nvme_rdma_mq_ops = { +static const struct blk_mq_ops nvme_rdma_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, .init_request = nvme_rdma_init_request, @@ -1540,7 +1533,7 @@ static struct blk_mq_ops nvme_rdma_mq_ops = { .timeout = nvme_rdma_timeout, }; -static struct blk_mq_ops nvme_rdma_admin_mq_ops = { +static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, .init_request = nvme_rdma_init_admin_request, @@ -1606,7 +1599,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (error) @@ -1857,27 +1850,13 @@ out_free_io_queues: return ret; } -static int nvme_rdma_parse_ipaddr(struct sockaddr_in *in_addr, char *p) -{ - u8 *addr = (u8 *)&in_addr->sin_addr.s_addr; - size_t buflen = strlen(p); - - /* XXX: handle IPv6 addresses */ - - if (buflen > INET_ADDRSTRLEN) - return -EINVAL; - if (in4_pton(p, buflen, addr, '\0', NULL) == 0) - return -EINVAL; - in_addr->sin_family = AF_INET; - return 0; -} - static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { struct nvme_rdma_ctrl *ctrl; int ret; bool changed; + char *port; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) @@ -1885,40 +1864,33 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - ret = nvme_rdma_parse_ipaddr(&ctrl->addr_in, opts->traddr); + if (opts->mask & NVMF_OPT_TRSVCID) + port = opts->trsvcid; + else + port = __stringify(NVME_RDMA_IP_PORT); + + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, + opts->traddr, port, &ctrl->addr); if (ret) { - pr_err("malformed IP address passed: %s\n", opts->traddr); + pr_err("malformed address passed: %s:%s\n", opts->traddr, port); goto out_free_ctrl; } if (opts->mask & NVMF_OPT_HOST_TRADDR) { - ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in, - opts->host_traddr); + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, + opts->host_traddr, NULL, &ctrl->src_addr); if (ret) { - pr_err("malformed src IP address passed: %s\n", + pr_err("malformed src address passed: %s\n", opts->host_traddr); goto out_free_ctrl; } } - if (opts->mask & NVMF_OPT_TRSVCID) { - u16 port; - - ret = kstrtou16(opts->trsvcid, 0, &port); - if (ret) - goto out_free_ctrl; - - ctrl->addr_in.sin_port = cpu_to_be16(port); - } else { - ctrl->addr_in.sin_port = cpu_to_be16(NVME_RDMA_IP_PORT); - } - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, 0 /* no quirks, we're perfect! */); if (ret) goto out_free_ctrl; - ctrl->reconnect_delay = opts->reconnect_delay; INIT_DELAYED_WORK(&ctrl->reconnect_work, nvme_rdma_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); @@ -1977,7 +1949,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); kref_get(&ctrl->ctrl.kref); @@ -2013,7 +1985,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .name = "rdma", .required_opts = NVMF_OPT_TRADDR, .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | - NVMF_OPT_HOST_TRADDR, + NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO, .create_ctrl = nvme_rdma_create_ctrl, }; @@ -2055,12 +2027,20 @@ static int __init nvme_rdma_init_module(void) return -ENOMEM; ret = ib_register_client(&nvme_rdma_ib_client); - if (ret) { - destroy_workqueue(nvme_rdma_wq); - return ret; - } + if (ret) + goto err_destroy_wq; + + ret = nvmf_register_transport(&nvme_rdma_transport); + if (ret) + goto err_unreg_client; + + return 0; - return nvmf_register_transport(&nvme_rdma_transport); +err_unreg_client: + ib_unregister_client(&nvme_rdma_ib_client); +err_destroy_wq: + destroy_workqueue(nvme_rdma_wq); + return ret; } static void __exit nvme_rdma_cleanup_module(void) diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index f49ae2758bb7..1f7671e631dd 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -1609,7 +1609,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_command c; u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read); u16 control; - u32 max_blocks = queue_max_hw_sectors(ns->queue); + u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9); num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks); @@ -2138,15 +2138,6 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, return res; } -static int nvme_trans_security_protocol(struct nvme_ns *ns, - struct sg_io_hdr *hdr, - u8 *cmd) -{ - return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, - ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND, - SCSI_ASCQ_CAUSE_NOT_REPORTABLE); -} - static int nvme_trans_synchronize_cache(struct nvme_ns *ns, struct sg_io_hdr *hdr) { @@ -2414,10 +2405,6 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) case REQUEST_SENSE: retcode = nvme_trans_request_sense(ns, hdr, cmd); break; - case SECURITY_PROTOCOL_IN: - case SECURITY_PROTOCOL_OUT: - retcode = nvme_trans_security_protocol(ns, hdr, cmd); - break; case SYNCHRONIZE_CACHE: retcode = nvme_trans_synchronize_cache(ns, hdr); break; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 76450b0c55f1..ff1f97006322 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -121,7 +121,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) } switch (req->cmd->get_log_page.lid) { - case 0x01: + case NVME_LOG_ERROR: /* * We currently never set the More bit in the status field, * so all error log entries are invalid and can be zeroed out. @@ -129,7 +129,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) * mandatory log page. */ break; - case 0x02: + case NVME_LOG_SMART: /* * XXX: fill out actual smart log * @@ -149,7 +149,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) goto err; } break; - case 0x03: + case NVME_LOG_FW_SLOT: /* * We only support a single firmware slot which always is * active, so we can zero out the whole firmware slot log and @@ -480,31 +480,25 @@ static void nvmet_execute_keep_alive(struct nvmet_req *req) nvmet_req_complete(req, 0); } -int nvmet_parse_admin_cmd(struct nvmet_req *req) +u16 nvmet_parse_admin_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; + u16 ret; req->ns = NULL; - if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { - pr_err("nvmet: got admin cmd %d while CC.EN == 0\n", - cmd->common.opcode); - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } - if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got admin cmd %d while CSTS.RDY == 0\n", - cmd->common.opcode); - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } + ret = nvmet_check_ctrl_status(req, cmd); + if (unlikely(ret)) + return ret; switch (cmd->common.opcode) { case nvme_admin_get_log_page: req->data_len = nvmet_get_log_page_len(cmd); switch (cmd->get_log_page.lid) { - case 0x01: - case 0x02: - case 0x03: + case NVME_LOG_ERROR: + case NVME_LOG_SMART: + case NVME_LOG_FW_SLOT: req->execute = nvmet_execute_get_log_page; return 0; } @@ -545,6 +539,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req) return 0; } - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, + req->sq->qid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 798653b329b2..cf90713043da 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -273,8 +273,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns) ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, NULL); if (IS_ERR(ns->bdev)) { - pr_err("nvmet: failed to open block device %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->bdev)); + pr_err("failed to open block device %s: (%ld)\n", + ns->device_path, PTR_ERR(ns->bdev)); ret = PTR_ERR(ns->bdev); ns->bdev = NULL; goto out_unlock; @@ -661,6 +661,23 @@ out: return status; } +u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) +{ + if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { + pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n", + cmd->common.opcode, req->sq->qid); + return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + } + + if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { + pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n", + cmd->common.opcode, req->sq->qid); + req->ns = NULL; + return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + } + return 0; +} + static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) { diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index af8aabf05335..1aaf597e81fc 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -159,15 +159,15 @@ out: nvmet_req_complete(req, status); } -int nvmet_parse_discovery_cmd(struct nvmet_req *req) +u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; req->ns = NULL; if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got cmd %d while not ready\n", - cmd->common.opcode); + pr_err("got cmd %d while not ready\n", + cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } @@ -180,8 +180,8 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) req->execute = nvmet_execute_get_disc_log_page; return 0; default: - pr_err("nvmet: unsupported get_log_page lid %d\n", - cmd->get_log_page.lid); + pr_err("unsupported get_log_page lid %d\n", + cmd->get_log_page.lid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } case nvme_admin_identify: @@ -192,17 +192,16 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) nvmet_execute_identify_disc_ctrl; return 0; default: - pr_err("nvmet: unsupported identify cns %d\n", - cmd->identify.cns); + pr_err("unsupported identify cns %d\n", + cmd->identify.cns); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } default: - pr_err("nvmet: unsupported cmd %d\n", - cmd->common.opcode); + pr_err("unsupported cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 8bd022af3df6..3cc17269504b 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -73,7 +73,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) nvmet_req_complete(req, status); } -int nvmet_parse_fabrics_cmd(struct nvmet_req *req) +u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; @@ -122,7 +122,15 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; u16 status = 0; - d = kmap(sg_page(req->sg)) + req->sg->offset; + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + status = NVME_SC_INTERNAL; + goto complete; + } + + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); + if (status) + goto out; /* zero out initial completion result, assign values as needed */ req->rsp->result.u32 = 0; @@ -143,7 +151,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) } status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req, - le32_to_cpu(c->kato), &ctrl); + le32_to_cpu(c->kato), &ctrl); if (status) goto out; @@ -158,7 +166,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); out: - kunmap(sg_page(req->sg)); + kfree(d); +complete: nvmet_req_complete(req, status); } @@ -170,7 +179,15 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 status = 0; - d = kmap(sg_page(req->sg)) + req->sg->offset; + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + status = NVME_SC_INTERNAL; + goto complete; + } + + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); + if (status) + goto out; /* zero out initial completion result, assign values as needed */ req->rsp->result.u32 = 0; @@ -183,8 +200,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) } status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, - le16_to_cpu(d->cntlid), - req, &ctrl); + le16_to_cpu(d->cntlid), + req, &ctrl); if (status) goto out; @@ -205,7 +222,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); out: - kunmap(sg_page(req->sg)); + kfree(d); +complete: nvmet_req_complete(req, status); return; @@ -214,7 +232,7 @@ out_ctrl_put: goto out; } -int nvmet_parse_connect_cmd(struct nvmet_req *req) +u16 nvmet_parse_connect_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 8f483ee7868c..62eba29c85fb 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -82,10 +82,13 @@ struct nvmet_fc_fcp_iod { enum nvmet_fcp_datadir io_dir; bool active; bool abort; + bool aborted; + bool writedataactive; spinlock_t flock; struct nvmet_req req; struct work_struct work; + struct work_struct done_work; struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_queue *queue; @@ -116,7 +119,7 @@ struct nvmet_fc_tgt_queue { u16 qid; u16 sqsize; u16 ersp_ratio; - u16 sqhd; + __le16 sqhd; int cpu; atomic_t connected; atomic_t sqtail; @@ -213,6 +216,7 @@ static DEFINE_IDA(nvmet_fc_tgtport_cnt); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); +static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work); static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); @@ -414,9 +418,13 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, for (i = 0; i < queue->sqsize; fod++, i++) { INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); + INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work); fod->tgtport = tgtport; fod->queue = queue; fod->active = false; + fod->abort = false; + fod->aborted = false; + fod->fcpreq = NULL; list_add_tail(&fod->fcp_list, &queue->fod_list); spin_lock_init(&fod->flock); @@ -463,7 +471,6 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) if (fod) { list_del(&fod->fcp_list); fod->active = true; - fod->abort = false; /* * no queue reference is taken, as it was taken by the * queue lookup just prior to the allocation. The iod @@ -479,17 +486,30 @@ static void nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, struct nvmet_fc_fcp_iod *fod) { + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct nvmet_fc_tgtport *tgtport = fod->tgtport; unsigned long flags; + fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, + sizeof(fod->rspiubuf), DMA_TO_DEVICE); + + fcpreq->nvmet_fc_private = NULL; + spin_lock_irqsave(&queue->qlock, flags); list_add_tail(&fod->fcp_list, &fod->queue->fod_list); fod->active = false; + fod->abort = false; + fod->aborted = false; + fod->writedataactive = false; + fod->fcpreq = NULL; spin_unlock_irqrestore(&queue->qlock, flags); /* * release the reference taken at queue lookup and fod allocation */ nvmet_fc_tgt_q_put(queue); + + tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); } static int @@ -616,32 +636,12 @@ nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue) static void -nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport, - struct nvmefc_tgt_fcp_req *fcpreq) -{ - int ret; - - fcpreq->op = NVMET_FCOP_ABORT; - fcpreq->offset = 0; - fcpreq->timeout = 0; - fcpreq->transfer_length = 0; - fcpreq->transferred_length = 0; - fcpreq->fcp_error = 0; - fcpreq->sg_cnt = 0; - - ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fcpreq); - if (ret) - /* should never reach here !! */ - WARN_ON(1); -} - - -static void nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) { + struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport; struct nvmet_fc_fcp_iod *fod = queue->fod; unsigned long flags; - int i; + int i, writedataactive; bool disconnect; disconnect = atomic_xchg(&queue->connected, 0); @@ -652,7 +652,20 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) if (fod->active) { spin_lock(&fod->flock); fod->abort = true; + writedataactive = fod->writedataactive; spin_unlock(&fod->flock); + /* + * only call lldd abort routine if waiting for + * writedata. other outstanding ops should finish + * on their own. + */ + if (writedataactive) { + spin_lock(&fod->flock); + fod->aborted = true; + spin_unlock(&fod->flock); + tgtport->ops->fcp_abort( + &tgtport->fc_target_port, fod->fcpreq); + } } } spin_unlock_irqrestore(&queue->qlock, flags); @@ -846,7 +859,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, int ret, idx; if (!template->xmt_ls_rsp || !template->fcp_op || - !template->targetport_delete || + !template->fcp_abort || + !template->fcp_req_release || !template->targetport_delete || !template->max_hw_queues || !template->max_sgl_segments || !template->max_dif_sgl_segments || !template->dma_boundary) { ret = -EINVAL; @@ -1044,7 +1058,7 @@ EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport); static void -nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, u32 desc_len, u8 rqst_ls_cmd) +nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) { struct fcnvme_ls_acc_hdr *acc = buf; @@ -1189,8 +1203,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, validation_errors[ret]); iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, - ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1281,8 +1295,9 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? - ELS_RJT_PROT : ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_INV_ASSOC : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1369,8 +1384,12 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, validation_errors[ret]); iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, - (ret == 8) ? ELS_RJT_PROT : ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + (ret == VERR_NO_ASSOC) ? + FCNVME_RJT_RC_INV_ASSOC : + (ret == VERR_NO_CONN) ? + FCNVME_RJT_RC_INV_CONN : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1479,7 +1498,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, default: iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf, NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, - ELS_RJT_INVAL, ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } nvmet_fc_xmt_ls_rsp(tgtport, iod); @@ -1619,6 +1638,8 @@ nvmet_fc_free_tgt_pgs(struct nvmet_fc_fcp_iod *fod) for_each_sg(fod->data_sg, sg, fod->data_sg_cnt, count) __free_page(sg_page(sg)); kfree(fod->data_sg); + fod->data_sg = NULL; + fod->data_sg_cnt = 0; } @@ -1679,7 +1700,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, xfr_length != fod->total_length || (le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] || (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) || - queue_90percent_full(fod->queue, cqe->sq_head)) + queue_90percent_full(fod->queue, le16_to_cpu(cqe->sq_head))) send_ersp = true; /* re-set the fields */ @@ -1704,6 +1725,26 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, static void nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq); static void +nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod) +{ + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + + /* data no longer needed */ + nvmet_fc_free_tgt_pgs(fod); + + /* + * if an ABTS was received or we issued the fcp_abort early + * don't call abort routine again. + */ + /* no need to take lock - lock was taken earlier to get here */ + if (!fod->aborted) + tgtport->ops->fcp_abort(&tgtport->fc_target_port, fcpreq); + + nvmet_fc_free_fcp_iod(fod->queue, fod); +} + +static void nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod) { @@ -1716,7 +1757,7 @@ nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport, ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fod->fcpreq); if (ret) - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); } static void @@ -1725,6 +1766,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct scatterlist *sg, *datasg; + unsigned long flags; u32 tlen, sg_off; int ret; @@ -1789,10 +1831,13 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, */ fod->abort = true; - if (op == NVMET_FCOP_WRITEDATA) + if (op == NVMET_FCOP_WRITEDATA) { + spin_lock_irqsave(&fod->flock, flags); + fod->writedataactive = false; + spin_unlock_irqrestore(&fod->flock, flags); nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); - else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { + } else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { fcpreq->fcp_error = ret; fcpreq->transferred_length = 0; nvmet_fc_xmt_fcp_op_done(fod->fcpreq); @@ -1800,32 +1845,54 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, } } +static inline bool +__nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort) +{ + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct nvmet_fc_tgtport *tgtport = fod->tgtport; + + /* if in the middle of an io and we need to tear down */ + if (abort) { + if (fcpreq->op == NVMET_FCOP_WRITEDATA) { + nvmet_req_complete(&fod->req, + NVME_SC_FC_TRANSPORT_ERROR); + return true; + } + + nvmet_fc_abort_op(tgtport, fod); + return true; + } + + return false; +} + +/* + * actual done handler for FCP operations when completed by the lldd + */ static void -nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) +nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) { - struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct nvmet_fc_tgtport *tgtport = fod->tgtport; unsigned long flags; bool abort; spin_lock_irqsave(&fod->flock, flags); abort = fod->abort; + fod->writedataactive = false; spin_unlock_irqrestore(&fod->flock, flags); - /* if in the middle of an io and we need to tear down */ - if (abort && fcpreq->op != NVMET_FCOP_ABORT) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_req_complete(&fod->req, fcpreq->fcp_error); - return; - } - switch (fcpreq->op) { case NVMET_FCOP_WRITEDATA: + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { + spin_lock(&fod->flock); + fod->abort = true; + spin_unlock(&fod->flock); + nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); return; @@ -1833,6 +1900,10 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) fod->offset += fcpreq->transferred_length; if (fod->offset != fod->total_length) { + spin_lock_irqsave(&fod->flock, flags); + fod->writedataactive = true; + spin_unlock_irqrestore(&fod->flock, flags); + /* transfer the next chunk */ nvmet_fc_transfer_fcp_data(tgtport, fod, NVMET_FCOP_WRITEDATA); @@ -1847,12 +1918,11 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); return; } @@ -1861,8 +1931,6 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) if (fcpreq->op == NVMET_FCOP_READDATA_RSP) { /* data no longer needed */ nvmet_fc_free_tgt_pgs(fod); - fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, - sizeof(fod->rspiubuf), DMA_TO_DEVICE); nvmet_fc_free_fcp_iod(fod->queue, fod); return; } @@ -1885,19 +1953,38 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) break; case NVMET_FCOP_RSP: - case NVMET_FCOP_ABORT: - fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, - sizeof(fod->rspiubuf), DMA_TO_DEVICE); + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; nvmet_fc_free_fcp_iod(fod->queue, fod); break; default: - nvmet_fc_free_tgt_pgs(fod); - nvmet_fc_abort_op(tgtport, fod->fcpreq); break; } } +static void +nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work) +{ + struct nvmet_fc_fcp_iod *fod = + container_of(work, struct nvmet_fc_fcp_iod, done_work); + + nvmet_fc_fod_op_done(fod); +} + +static void +nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmet_fc_tgt_queue *queue = fod->queue; + + if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR) + /* context switch so completion is not in ISR context */ + queue_work_on(queue->cpu, queue->work_q, &fod->done_work); + else + nvmet_fc_fod_op_done(fod); +} + /* * actual completion handler after execution by the nvmet layer */ @@ -1919,10 +2006,7 @@ __nvmet_fc_fcp_nvme_cmd_done(struct nvmet_fc_tgtport *tgtport, fod->queue->sqhd = cqe->sq_head; if (abort) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); return; } @@ -1971,7 +2055,7 @@ nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req) /* * Actual processing routine for received FC-NVME LS Requests from the LLD */ -void +static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod) { @@ -2018,8 +2102,8 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, &fod->queue->nvme_cq, &fod->queue->nvme_sq, &nvmet_fc_tgt_fcp_ops); - if (!ret) { /* bad SQE content */ - nvmet_fc_abort_op(tgtport, fod->fcpreq); + if (!ret) { /* bad SQE content or invalid ctrl state */ + nvmet_fc_abort_op(tgtport, fod); return; } @@ -2059,7 +2143,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, return; transport_error: - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); } /* @@ -2089,7 +2173,7 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) * If this routine returns error, the lldd should abort the exchange. * * @target_port: pointer to the (registered) target port the FCP CMD IU - * was receive on. + * was received on. * @fcpreq: pointer to a fcpreq request structure to be used to reference * the exchange corresponding to the FCP Exchange. * @cmdiubuf: pointer to the buffer containing the FCP CMD IU @@ -2112,7 +2196,6 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, (be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4))) return -EIO; - queue = nvmet_fc_find_target_queue(tgtport, be64_to_cpu(cmdiu->connection_id)); if (!queue) @@ -2142,12 +2225,68 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); - queue_work_on(queue->cpu, queue->work_q, &fod->work); + if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) + queue_work_on(queue->cpu, queue->work_q, &fod->work); + else + nvmet_fc_handle_fcp_rqst(tgtport, fod); return 0; } EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req); +/** + * nvmet_fc_rcv_fcp_abort - transport entry point called by an LLDD + * upon the reception of an ABTS for a FCP command + * + * Notify the transport that an ABTS has been received for a FCP command + * that had been given to the transport via nvmet_fc_rcv_fcp_req(). The + * LLDD believes the command is still being worked on + * (template_ops->fcp_req_release() has not been called). + * + * The transport will wait for any outstanding work (an op to the LLDD, + * which the lldd should complete with error due to the ABTS; or the + * completion from the nvmet layer of the nvme command), then will + * stop processing and call the nvmet_fc_rcv_fcp_req() callback to + * return the i/o context to the LLDD. The LLDD may send the BA_ACC + * to the ABTS either after return from this function (assuming any + * outstanding op work has been terminated) or upon the callback being + * called. + * + * @target_port: pointer to the (registered) target port the FCP CMD IU + * was received on. + * @fcpreq: pointer to the fcpreq request structure that corresponds + * to the exchange that received the ABTS. + */ +void +nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *target_port, + struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmet_fc_tgt_queue *queue; + unsigned long flags; + + if (!fod || fod->fcpreq != fcpreq) + /* job appears to have already completed, ignore abort */ + return; + + queue = fod->queue; + + spin_lock_irqsave(&queue->qlock, flags); + if (fod->active) { + /* + * mark as abort. The abort handler, invoked upon completion + * of any work, will detect the aborted status and do the + * callback. + */ + spin_lock(&fod->flock); + fod->abort = true; + fod->aborted = true; + spin_unlock(&fod->flock); + } + spin_unlock_irqrestore(&queue->qlock, flags); +} +EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_abort); + enum { FCT_TRADDR_ERR = 0, FCT_TRADDR_WWNN = 1 << 0, @@ -2177,7 +2316,7 @@ nvmet_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf) if (!options) return -ENOMEM; - while ((p = strsep(&o, ",\n")) != NULL) { + while ((p = strsep(&o, ":\n")) != NULL) { if (!*p) continue; @@ -2238,6 +2377,7 @@ nvmet_fc_add_port(struct nvmet_port *port) if (!tgtport->port) { tgtport->port = port; port->priv = tgtport; + nvmet_fc_tgtport_get(tgtport); ret = 0; } else ret = -EALREADY; diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 4e8e6a22bce1..15551ef79c8c 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -246,11 +246,19 @@ struct fcloop_lsreq { struct fcloop_fcpreq { struct fcloop_tport *tport; struct nvmefc_fcp_req *fcpreq; + spinlock_t reqlock; u16 status; + bool active; + bool aborted; struct work_struct work; struct nvmefc_tgt_fcp_req tgt_fcp_req; }; +struct fcloop_ini_fcpreq { + struct nvmefc_fcp_req *fcpreq; + struct fcloop_fcpreq *tfcp_req; + struct work_struct iniwork; +}; static inline struct fcloop_lsreq * tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq) @@ -341,7 +349,21 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, } /* - * FCP IO operation done. call back up initiator "done" flows. + * FCP IO operation done by initiator abort. + * call back up initiator "done" flows. + */ +static void +fcloop_tgt_fcprqst_ini_done_work(struct work_struct *work) +{ + struct fcloop_ini_fcpreq *inireq = + container_of(work, struct fcloop_ini_fcpreq, iniwork); + + inireq->fcpreq->done(inireq->fcpreq); +} + +/* + * FCP IO operation done by target completion. + * call back up initiator "done" flows. */ static void fcloop_tgt_fcprqst_done_work(struct work_struct *work) @@ -349,12 +371,18 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, work); struct fcloop_tport *tport = tfcp_req->tport; - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; - if (tport->remoteport) { + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + spin_unlock(&tfcp_req->reqlock); + + if (tport->remoteport && fcpreq) { fcpreq->status = tfcp_req->status; fcpreq->done(fcpreq); } + + kfree(tfcp_req); } @@ -364,20 +392,25 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { - struct fcloop_fcpreq *tfcp_req = fcpreq->private; struct fcloop_rport *rport = remoteport->private; + struct fcloop_ini_fcpreq *inireq = fcpreq->private; + struct fcloop_fcpreq *tfcp_req; int ret = 0; - INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); + if (!rport->targetport) + return -ECONNREFUSED; - if (!rport->targetport) { - tfcp_req->status = NVME_SC_FC_TRANSPORT_ERROR; - schedule_work(&tfcp_req->work); - return ret; - } + tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_KERNEL); + if (!tfcp_req) + return -ENOMEM; + inireq->fcpreq = fcpreq; + inireq->tfcp_req = tfcp_req; + INIT_WORK(&inireq->iniwork, fcloop_tgt_fcprqst_ini_done_work); tfcp_req->fcpreq = fcpreq; tfcp_req->tport = rport->targetport->private; + spin_lock_init(&tfcp_req->reqlock); + INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req, fcpreq->cmdaddr, fcpreq->cmdlen); @@ -444,63 +477,129 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *tgt_fcpreq) { struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; u32 rsplen = 0, xfrlen = 0; - int fcp_err = 0; + int fcp_err = 0, active, aborted; u8 op = tgt_fcpreq->op; + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + active = tfcp_req->active; + aborted = tfcp_req->aborted; + tfcp_req->active = true; + spin_unlock(&tfcp_req->reqlock); + + if (unlikely(active)) + /* illegal - call while i/o active */ + return -EALREADY; + + if (unlikely(aborted)) { + /* target transport has aborted i/o prior */ + spin_lock(&tfcp_req->reqlock); + tfcp_req->active = false; + spin_unlock(&tfcp_req->reqlock); + tgt_fcpreq->transferred_length = 0; + tgt_fcpreq->fcp_error = -ECANCELED; + tgt_fcpreq->done(tgt_fcpreq); + return 0; + } + + /* + * if fcpreq is NULL, the I/O has been aborted (from + * initiator side). For the target side, act as if all is well + * but don't actually move data. + */ + switch (op) { case NVMET_FCOP_WRITEDATA: xfrlen = tgt_fcpreq->transfer_length; - fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, - tgt_fcpreq->offset, xfrlen); - fcpreq->transferred_length += xfrlen; + if (fcpreq) { + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, + fcpreq->first_sgl, tgt_fcpreq->offset, + xfrlen); + fcpreq->transferred_length += xfrlen; + } break; case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: xfrlen = tgt_fcpreq->transfer_length; - fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, - tgt_fcpreq->offset, xfrlen); - fcpreq->transferred_length += xfrlen; + if (fcpreq) { + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, + fcpreq->first_sgl, tgt_fcpreq->offset, + xfrlen); + fcpreq->transferred_length += xfrlen; + } if (op == NVMET_FCOP_READDATA) break; /* Fall-Thru to RSP handling */ case NVMET_FCOP_RSP: - rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ? - fcpreq->rsplen : tgt_fcpreq->rsplen); - memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen); - if (rsplen < tgt_fcpreq->rsplen) - fcp_err = -E2BIG; - fcpreq->rcv_rsplen = rsplen; - fcpreq->status = 0; + if (fcpreq) { + rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ? + fcpreq->rsplen : tgt_fcpreq->rsplen); + memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen); + if (rsplen < tgt_fcpreq->rsplen) + fcp_err = -E2BIG; + fcpreq->rcv_rsplen = rsplen; + fcpreq->status = 0; + } tfcp_req->status = 0; break; - case NVMET_FCOP_ABORT: - tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; - break; - default: fcp_err = -EINVAL; break; } + spin_lock(&tfcp_req->reqlock); + tfcp_req->active = false; + spin_unlock(&tfcp_req->reqlock); + tgt_fcpreq->transferred_length = xfrlen; tgt_fcpreq->fcp_error = fcp_err; tgt_fcpreq->done(tgt_fcpreq); - if ((!fcp_err) && (op == NVMET_FCOP_RSP || - op == NVMET_FCOP_READDATA_RSP || - op == NVMET_FCOP_ABORT)) - schedule_work(&tfcp_req->work); - return 0; } static void +fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + int active; + + /* + * mark aborted only in case there were 2 threads in transport + * (one doing io, other doing abort) and only kills ops posted + * after the abort request + */ + spin_lock(&tfcp_req->reqlock); + active = tfcp_req->active; + tfcp_req->aborted = true; + spin_unlock(&tfcp_req->reqlock); + + tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; + + /* + * nothing more to do. If io wasn't active, the transport should + * immediately call the req_release. If it was active, the op + * will complete, and the lldd should call req_release. + */ +} + +static void +fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + + schedule_work(&tfcp_req->work); +} + +static void fcloop_ls_abort(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, struct nvmefc_ls_req *lsreq) @@ -513,6 +612,27 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { + struct fcloop_rport *rport = remoteport->private; + struct fcloop_ini_fcpreq *inireq = fcpreq->private; + struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req; + + if (!tfcp_req) + /* abort has already been called */ + return; + + if (rport->targetport) + nvmet_fc_rcv_fcp_abort(rport->targetport, + &tfcp_req->tgt_fcp_req); + + /* break initiator/target relationship for io */ + spin_lock(&tfcp_req->reqlock); + inireq->tfcp_req = NULL; + tfcp_req->fcpreq = NULL; + spin_unlock(&tfcp_req->reqlock); + + /* post the aborted io completion */ + fcpreq->status = -ECANCELED; + schedule_work(&inireq->iniwork); } static void @@ -546,7 +666,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) #define FCLOOP_SGL_SEGS 256 #define FCLOOP_DMABOUND_4G 0xFFFFFFFF -struct nvme_fc_port_template fctemplate = { +static struct nvme_fc_port_template fctemplate = { .localport_delete = fcloop_localport_delete, .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, @@ -563,20 +683,23 @@ struct nvme_fc_port_template fctemplate = { .local_priv_sz = sizeof(struct fcloop_lport), .remote_priv_sz = sizeof(struct fcloop_rport), .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), - .fcprqst_priv_sz = sizeof(struct fcloop_fcpreq), + .fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq), }; -struct nvmet_fc_target_template tgttemplate = { +static struct nvmet_fc_target_template tgttemplate = { .targetport_delete = fcloop_targetport_delete, .xmt_ls_rsp = fcloop_xmt_ls_rsp, .fcp_op = fcloop_fcp_op, + .fcp_abort = fcloop_tgt_fcp_abort, + .fcp_req_release = fcloop_fcp_req_release, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, .dma_boundary = FCLOOP_DMABOUND_4G, /* optional features */ - .target_features = NVMET_FCTGTFEAT_READDATA_RSP | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED, + .target_features = NVMET_FCTGTFEAT_CMD_IN_ISR | + NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | + NVMET_FCTGTFEAT_OPDONE_IN_ISR, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), }; diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 6b0baa9caab9..c77940d80fc8 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -184,7 +184,7 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req) (req->ns->blksize_shift - 9)) + 1; if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, - GFP_KERNEL, &bio, true)) + GFP_KERNEL, &bio, 0)) status = NVME_SC_INTERNAL | NVME_SC_DNR; if (bio) { @@ -196,26 +196,19 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req) } } -int nvmet_parse_io_cmd(struct nvmet_req *req) +u16 nvmet_parse_io_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; + u16 ret; - if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { - pr_err("nvmet: got io cmd %d while CC.EN == 0\n", - cmd->common.opcode); + ret = nvmet_check_ctrl_status(req, cmd); + if (unlikely(ret)) { req->ns = NULL; - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } - - if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got io cmd %d while CSTS.RDY == 0\n", - cmd->common.opcode); - req->ns = NULL; - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + return ret; } req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); - if (!req->ns) + if (unlikely(!req->ns)) return NVME_SC_INVALID_NS | NVME_SC_DNR; switch (cmd->common.opcode) { @@ -237,7 +230,8 @@ int nvmet_parse_io_cmd(struct nvmet_req *req) req->execute = nvmet_execute_write_zeroes; return 0; default: - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, + req->sq->qid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 22f7bc6bac7f..304f1c87c160 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -13,12 +13,10 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/scatterlist.h> -#include <linux/delay.h> #include <linux/blk-mq.h> #include <linux/nvme.h> #include <linux/module.h> #include <linux/parser.h> -#include <linux/t10-pi.h> #include "nvmet.h" #include "../host/nvme.h" #include "../host/fabrics.h" @@ -93,31 +91,26 @@ static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue) static void nvme_loop_complete_rq(struct request *req) { struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); - int error = 0; nvme_cleanup_cmd(req); sg_free_table_chained(&iod->sg_table, true); + nvme_complete_rq(req); +} - if (unlikely(req->errors)) { - if (nvme_req_needs_retry(req, req->errors)) { - nvme_requeue_req(req); - return; - } - - if (blk_rq_is_passthrough(req)) - error = req->errors; - else - error = nvme_error_status(req->errors); - } +static struct blk_mq_tags *nvme_loop_tagset(struct nvme_loop_queue *queue) +{ + u32 queue_idx = nvme_loop_queue_idx(queue); - blk_mq_end_request(req, error); + if (queue_idx == 0) + return queue->ctrl->admin_tag_set.tags[queue_idx]; + return queue->ctrl->tag_set.tags[queue_idx - 1]; } static void nvme_loop_queue_response(struct nvmet_req *req) { - struct nvme_loop_iod *iod = - container_of(req, struct nvme_loop_iod, req); - struct nvme_completion *cqe = &iod->rsp; + struct nvme_loop_queue *queue = + container_of(req->sq, struct nvme_loop_queue, nvme_sq); + struct nvme_completion *cqe = req->rsp; /* * AEN requests are special as they don't time out and can @@ -125,15 +118,22 @@ static void nvme_loop_queue_response(struct nvmet_req *req) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_loop_queue_idx(iod->queue) == 0 && + if (unlikely(nvme_loop_queue_idx(queue) == 0 && cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) { - nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe->status, + nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); } else { - struct request *rq = blk_mq_rq_from_pdu(iod); + struct request *rq; + + rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id); + if (!rq) { + dev_err(queue->ctrl->ctrl.device, + "tag 0x%x on queue %d not found\n", + cqe->command_id, nvme_loop_queue_idx(queue)); + return; + } - iod->nvme_req.result = cqe->result; - blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); + nvme_end_request(rq, cqe->status, cqe->result); } } @@ -154,7 +154,7 @@ nvme_loop_timeout(struct request *rq, bool reserved) schedule_work(&iod->queue->ctrl->reset_work); /* fail with DNR on admin cmd timeout */ - rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR; + nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; return BLK_EH_HANDLED; } @@ -268,7 +268,7 @@ static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static struct blk_mq_ops nvme_loop_mq_ops = { +static const struct blk_mq_ops nvme_loop_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, .init_request = nvme_loop_init_request, @@ -276,7 +276,7 @@ static struct blk_mq_ops nvme_loop_mq_ops = { .timeout = nvme_loop_timeout, }; -static struct blk_mq_ops nvme_loop_admin_mq_ops = { +static const struct blk_mq_ops nvme_loop_admin_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, .init_request = nvme_loop_init_admin_request, @@ -349,6 +349,19 @@ out_destroy_queues: return ret; } +static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) +{ + int i, ret; + + for (i = 1; i < ctrl->queue_count; i++) { + ret = nvmf_connect_io_queue(&ctrl->ctrl, i); + if (ret) + return ret; + } + + return 0; +} + static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) { int error; @@ -392,7 +405,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (error) @@ -490,7 +503,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) struct nvme_loop_ctrl *ctrl = container_of(work, struct nvme_loop_ctrl, reset_work); bool changed; - int i, ret; + int ret; nvme_loop_shutdown_ctrl(ctrl); @@ -502,11 +515,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) if (ret) goto out_destroy_admin; - for (i = 1; i < ctrl->queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) - goto out_destroy_io; - } + ret = nvme_loop_connect_io_queues(ctrl); + if (ret) + goto out_destroy_io; changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); @@ -559,7 +570,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) { - int ret, i; + int ret; ret = nvme_loop_init_io_queues(ctrl); if (ret) @@ -588,11 +599,9 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) goto out_free_tagset; } - for (i = 1; i < ctrl->queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) - goto out_cleanup_connect_q; - } + ret = nvme_loop_connect_io_queues(ctrl); + if (ret) + goto out_cleanup_connect_q; return 0; @@ -736,7 +745,12 @@ static int __init nvme_loop_init_module(void) ret = nvmet_register_transport(&nvme_loop_ops); if (ret) return ret; - return nvmf_register_transport(&nvme_loop_transport); + + ret = nvmf_register_transport(&nvme_loop_transport); + if (ret) + nvmet_unregister_transport(&nvme_loop_ops); + + return ret; } static void __exit nvme_loop_cleanup_module(void) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index f7ff15f17ca9..7cb77ba5993b 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -253,11 +253,11 @@ struct nvmet_async_event { u8 log_page; }; -int nvmet_parse_connect_cmd(struct nvmet_req *req); -int nvmet_parse_io_cmd(struct nvmet_req *req); -int nvmet_parse_admin_cmd(struct nvmet_req *req); -int nvmet_parse_discovery_cmd(struct nvmet_req *req); -int nvmet_parse_fabrics_cmd(struct nvmet_req *req); +u16 nvmet_parse_connect_cmd(struct nvmet_req *req); +u16 nvmet_parse_io_cmd(struct nvmet_req *req); +u16 nvmet_parse_admin_cmd(struct nvmet_req *req); +u16 nvmet_parse_discovery_cmd(struct nvmet_req *req); +u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops); @@ -278,6 +278,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, struct nvmet_req *req, struct nvmet_ctrl **ret); void nvmet_ctrl_put(struct nvmet_ctrl *ctrl); +u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd); struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, enum nvme_subsys_type type); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ecc4fe862561..99c69018a35f 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1199,6 +1199,11 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, } queue->port = cm_id->context; + if (queue->host_qid == 0) { + /* Let inflight controller teardown complete */ + flush_scheduled_work(); + } + ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) goto release_queue; @@ -1427,12 +1432,16 @@ restart: static int nvmet_rdma_add_port(struct nvmet_port *port) { struct rdma_cm_id *cm_id; - struct sockaddr_in addr_in; - u16 port_in; + struct sockaddr_storage addr = { }; + __kernel_sa_family_t af; int ret; switch (port->disc_addr.adrfam) { case NVMF_ADDR_FAMILY_IP4: + af = AF_INET; + break; + case NVMF_ADDR_FAMILY_IP6: + af = AF_INET6; break; default: pr_err("address family %d not supported\n", @@ -1440,13 +1449,13 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return -EINVAL; } - ret = kstrtou16(port->disc_addr.trsvcid, 0, &port_in); - if (ret) + ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, + port->disc_addr.trsvcid, &addr); + if (ret) { + pr_err("malformed ip/port passed: %s:%s\n", + port->disc_addr.traddr, port->disc_addr.trsvcid); return ret; - - addr_in.sin_family = AF_INET; - addr_in.sin_addr.s_addr = in_aton(port->disc_addr.traddr); - addr_in.sin_port = htons(port_in); + } cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, RDMA_PS_TCP, IB_QPT_RC); @@ -1455,20 +1464,32 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return PTR_ERR(cm_id); } - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr_in); + /* + * Allow both IPv4 and IPv6 sockets to bind a single port + * at the same time. + */ + ret = rdma_set_afonly(cm_id, 1); + if (ret) { + pr_err("rdma_set_afonly failed (%d)\n", ret); + goto out_destroy_id; + } + + ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); if (ret) { - pr_err("binding CM ID to %pISpc failed (%d)\n", &addr_in, ret); + pr_err("binding CM ID to %pISpcs failed (%d)\n", + (struct sockaddr *)&addr, ret); goto out_destroy_id; } ret = rdma_listen(cm_id, 128); if (ret) { - pr_err("listening to %pISpc failed (%d)\n", &addr_in, ret); + pr_err("listening to %pISpcs failed (%d)\n", + (struct sockaddr *)&addr, ret); goto out_destroy_id; } - pr_info("enabling port %d (%pISpc)\n", - le16_to_cpu(port->disc_addr.portid), &addr_in); + pr_info("enabling port %d (%pISpcs)\n", + le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); port->priv = cm_id; return 0; diff --git a/drivers/pci/dwc/pcie-hisi.c b/drivers/pci/dwc/pcie-hisi.c index fd66a3199db7..cf9d6a9d9fd4 100644 --- a/drivers/pci/dwc/pcie-hisi.c +++ b/drivers/pci/dwc/pcie-hisi.c @@ -380,9 +380,13 @@ struct pci_ecam_ops hisi_pcie_platform_ops = { static const struct of_device_id hisi_pcie_almost_ecam_of_match[] = { { - .compatible = "hisilicon,pcie-almost-ecam", + .compatible = "hisilicon,hip06-pcie-ecam", .data = (void *) &hisi_pcie_platform_ops, }, + { + .compatible = "hisilicon,hip07-pcie-ecam", + .data = (void *) &hisi_pcie_platform_ops, + }, {}, }; diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index f80134e3e0b6..9ff790174906 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -13,6 +13,7 @@ * published by the Free Software Foundation. */ +#include <linux/dmi.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -1524,10 +1525,31 @@ static void chv_gpio_irq_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); } +/* + * Certain machines seem to hardcode Linux IRQ numbers in their ACPI + * tables. Since we leave GPIOs that are not capable of generating + * interrupts out of the irqdomain the numbering will be different and + * cause devices using the hardcoded IRQ numbers fail. In order not to + * break such machines we will only mask pins from irqdomain if the machine + * is not listed below. + */ +static const struct dmi_system_id chv_no_valid_mask[] = { + { + /* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */ + .ident = "Acer Chromebook (CYAN)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"), + DMI_MATCH(DMI_BIOS_DATE, "05/21/2016"), + }, + } +}; + static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) { const struct chv_gpio_pinrange *range; struct gpio_chip *chip = &pctrl->chip; + bool need_valid_mask = !dmi_check_system(chv_no_valid_mask); int ret, i, offset; *chip = chv_gpio_chip; @@ -1536,7 +1558,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) chip->label = dev_name(pctrl->dev); chip->parent = pctrl->dev; chip->base = -1; - chip->irq_need_valid_mask = true; + chip->irq_need_valid_mask = need_valid_mask; ret = devm_gpiochip_add_data(pctrl->dev, chip, pctrl); if (ret) { @@ -1567,7 +1589,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) intsel &= CHV_PADCTRL0_INTSEL_MASK; intsel >>= CHV_PADCTRL0_INTSEL_SHIFT; - if (intsel >= pctrl->community->nirqs) + if (need_valid_mask && intsel >= pctrl->community->nirqs) clear_bit(i, chip->irq_valid_mask); } diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index f9b49967f512..63e51b56a22a 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -1468,82 +1468,82 @@ const struct samsung_pin_ctrl exynos5420_pin_ctrl[] __initconst = { /* pin banks of exynos5433 pin-controller - ALIVE */ static const struct samsung_pin_bank_data exynos5433_pin_banks0[] __initconst = { - EXYNOS_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00), - EXYNOS_PIN_BANK_EINTW(8, 0x020, "gpa1", 0x04), - EXYNOS_PIN_BANK_EINTW(8, 0x040, "gpa2", 0x08), - EXYNOS_PIN_BANK_EINTW(8, 0x060, "gpa3", 0x0c), - EXYNOS_PIN_BANK_EINTW_EXT(8, 0x020, "gpf1", 0x1004, 1), - EXYNOS_PIN_BANK_EINTW_EXT(4, 0x040, "gpf2", 0x1008, 1), - EXYNOS_PIN_BANK_EINTW_EXT(4, 0x060, "gpf3", 0x100c, 1), - EXYNOS_PIN_BANK_EINTW_EXT(8, 0x080, "gpf4", 0x1010, 1), - EXYNOS_PIN_BANK_EINTW_EXT(8, 0x0a0, "gpf5", 0x1014, 1), + EXYNOS5433_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00), + EXYNOS5433_PIN_BANK_EINTW(8, 0x020, "gpa1", 0x04), + EXYNOS5433_PIN_BANK_EINTW(8, 0x040, "gpa2", 0x08), + EXYNOS5433_PIN_BANK_EINTW(8, 0x060, "gpa3", 0x0c), + EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x020, "gpf1", 0x1004, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(4, 0x040, "gpf2", 0x1008, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(4, 0x060, "gpf3", 0x100c, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x080, "gpf4", 0x1010, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x0a0, "gpf5", 0x1014, 1), }; /* pin banks of exynos5433 pin-controller - AUD */ static const struct samsung_pin_bank_data exynos5433_pin_banks1[] __initconst = { - EXYNOS_PIN_BANK_EINTG(7, 0x000, "gpz0", 0x00), - EXYNOS_PIN_BANK_EINTG(4, 0x020, "gpz1", 0x04), + EXYNOS5433_PIN_BANK_EINTG(7, 0x000, "gpz0", 0x00), + EXYNOS5433_PIN_BANK_EINTG(4, 0x020, "gpz1", 0x04), }; /* pin banks of exynos5433 pin-controller - CPIF */ static const struct samsung_pin_bank_data exynos5433_pin_banks2[] __initconst = { - EXYNOS_PIN_BANK_EINTG(2, 0x000, "gpv6", 0x00), + EXYNOS5433_PIN_BANK_EINTG(2, 0x000, "gpv6", 0x00), }; /* pin banks of exynos5433 pin-controller - eSE */ static const struct samsung_pin_bank_data exynos5433_pin_banks3[] __initconst = { - EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj2", 0x00), + EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj2", 0x00), }; /* pin banks of exynos5433 pin-controller - FINGER */ static const struct samsung_pin_bank_data exynos5433_pin_banks4[] __initconst = { - EXYNOS_PIN_BANK_EINTG(4, 0x000, "gpd5", 0x00), + EXYNOS5433_PIN_BANK_EINTG(4, 0x000, "gpd5", 0x00), }; /* pin banks of exynos5433 pin-controller - FSYS */ static const struct samsung_pin_bank_data exynos5433_pin_banks5[] __initconst = { - EXYNOS_PIN_BANK_EINTG(6, 0x000, "gph1", 0x00), - EXYNOS_PIN_BANK_EINTG(7, 0x020, "gpr4", 0x04), - EXYNOS_PIN_BANK_EINTG(5, 0x040, "gpr0", 0x08), - EXYNOS_PIN_BANK_EINTG(8, 0x060, "gpr1", 0x0c), - EXYNOS_PIN_BANK_EINTG(2, 0x080, "gpr2", 0x10), - EXYNOS_PIN_BANK_EINTG(8, 0x0a0, "gpr3", 0x14), + EXYNOS5433_PIN_BANK_EINTG(6, 0x000, "gph1", 0x00), + EXYNOS5433_PIN_BANK_EINTG(7, 0x020, "gpr4", 0x04), + EXYNOS5433_PIN_BANK_EINTG(5, 0x040, "gpr0", 0x08), + EXYNOS5433_PIN_BANK_EINTG(8, 0x060, "gpr1", 0x0c), + EXYNOS5433_PIN_BANK_EINTG(2, 0x080, "gpr2", 0x10), + EXYNOS5433_PIN_BANK_EINTG(8, 0x0a0, "gpr3", 0x14), }; /* pin banks of exynos5433 pin-controller - IMEM */ static const struct samsung_pin_bank_data exynos5433_pin_banks6[] __initconst = { - EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpf0", 0x00), + EXYNOS5433_PIN_BANK_EINTG(8, 0x000, "gpf0", 0x00), }; /* pin banks of exynos5433 pin-controller - NFC */ static const struct samsung_pin_bank_data exynos5433_pin_banks7[] __initconst = { - EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj0", 0x00), + EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj0", 0x00), }; /* pin banks of exynos5433 pin-controller - PERIC */ static const struct samsung_pin_bank_data exynos5433_pin_banks8[] __initconst = { - EXYNOS_PIN_BANK_EINTG(6, 0x000, "gpv7", 0x00), - EXYNOS_PIN_BANK_EINTG(5, 0x020, "gpb0", 0x04), - EXYNOS_PIN_BANK_EINTG(8, 0x040, "gpc0", 0x08), - EXYNOS_PIN_BANK_EINTG(2, 0x060, "gpc1", 0x0c), - EXYNOS_PIN_BANK_EINTG(6, 0x080, "gpc2", 0x10), - EXYNOS_PIN_BANK_EINTG(8, 0x0a0, "gpc3", 0x14), - EXYNOS_PIN_BANK_EINTG(2, 0x0c0, "gpg0", 0x18), - EXYNOS_PIN_BANK_EINTG(4, 0x0e0, "gpd0", 0x1c), - EXYNOS_PIN_BANK_EINTG(6, 0x100, "gpd1", 0x20), - EXYNOS_PIN_BANK_EINTG(8, 0x120, "gpd2", 0x24), - EXYNOS_PIN_BANK_EINTG(5, 0x140, "gpd4", 0x28), - EXYNOS_PIN_BANK_EINTG(2, 0x160, "gpd8", 0x2c), - EXYNOS_PIN_BANK_EINTG(7, 0x180, "gpd6", 0x30), - EXYNOS_PIN_BANK_EINTG(3, 0x1a0, "gpd7", 0x34), - EXYNOS_PIN_BANK_EINTG(5, 0x1c0, "gpg1", 0x38), - EXYNOS_PIN_BANK_EINTG(2, 0x1e0, "gpg2", 0x3c), - EXYNOS_PIN_BANK_EINTG(8, 0x200, "gpg3", 0x40), + EXYNOS5433_PIN_BANK_EINTG(6, 0x000, "gpv7", 0x00), + EXYNOS5433_PIN_BANK_EINTG(5, 0x020, "gpb0", 0x04), + EXYNOS5433_PIN_BANK_EINTG(8, 0x040, "gpc0", 0x08), + EXYNOS5433_PIN_BANK_EINTG(2, 0x060, "gpc1", 0x0c), + EXYNOS5433_PIN_BANK_EINTG(6, 0x080, "gpc2", 0x10), + EXYNOS5433_PIN_BANK_EINTG(8, 0x0a0, "gpc3", 0x14), + EXYNOS5433_PIN_BANK_EINTG(2, 0x0c0, "gpg0", 0x18), + EXYNOS5433_PIN_BANK_EINTG(4, 0x0e0, "gpd0", 0x1c), + EXYNOS5433_PIN_BANK_EINTG(6, 0x100, "gpd1", 0x20), + EXYNOS5433_PIN_BANK_EINTG(8, 0x120, "gpd2", 0x24), + EXYNOS5433_PIN_BANK_EINTG(5, 0x140, "gpd4", 0x28), + EXYNOS5433_PIN_BANK_EINTG(2, 0x160, "gpd8", 0x2c), + EXYNOS5433_PIN_BANK_EINTG(7, 0x180, "gpd6", 0x30), + EXYNOS5433_PIN_BANK_EINTG(3, 0x1a0, "gpd7", 0x34), + EXYNOS5433_PIN_BANK_EINTG(5, 0x1c0, "gpg1", 0x38), + EXYNOS5433_PIN_BANK_EINTG(2, 0x1e0, "gpg2", 0x3c), + EXYNOS5433_PIN_BANK_EINTG(8, 0x200, "gpg3", 0x40), }; /* pin banks of exynos5433 pin-controller - TOUCH */ static const struct samsung_pin_bank_data exynos5433_pin_banks9[] __initconst = { - EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj1", 0x00), + EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj1", 0x00), }; /* diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.h b/drivers/pinctrl/samsung/pinctrl-exynos.h index a473092fb8d2..cd046eb7d705 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.h +++ b/drivers/pinctrl/samsung/pinctrl-exynos.h @@ -79,17 +79,6 @@ .name = id \ } -#define EXYNOS_PIN_BANK_EINTW_EXT(pins, reg, id, offs, pctl_idx) \ - { \ - .type = &bank_type_alive, \ - .pctl_offset = reg, \ - .nr_pins = pins, \ - .eint_type = EINT_TYPE_WKUP, \ - .eint_offset = offs, \ - .name = id, \ - .pctl_res_idx = pctl_idx, \ - } \ - #define EXYNOS5433_PIN_BANK_EINTG(pins, reg, id, offs) \ { \ .type = &exynos5433_bank_type_off, \ diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 4bc88eb52712..e1bffc9bb194 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -141,6 +141,14 @@ config DELL_WMI_AIO To compile this driver as a module, choose M here: the module will be called dell-wmi-aio. +config DELL_WMI_LED + tristate "External LED on Dell Business Netbooks" + depends on LEDS_CLASS + depends on ACPI_WMI + help + This adds support for the Latitude 2100 and similar + notebooks that have an external LED. + config DELL_SMO8800 tristate "Dell Latitude freefall driver (ACPI SMO88XX)" depends on ACPI diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 299d0f9e40f7..776b3a7a4984 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_DELL_SMBIOS) += dell-smbios.o obj-$(CONFIG_DELL_LAPTOP) += dell-laptop.o obj-$(CONFIG_DELL_WMI) += dell-wmi.o obj-$(CONFIG_DELL_WMI_AIO) += dell-wmi-aio.o +obj-$(CONFIG_DELL_WMI_LED) += dell-wmi-led.o obj-$(CONFIG_DELL_SMO8800) += dell-smo8800.o obj-$(CONFIG_DELL_RBTN) += dell-rbtn.o obj-$(CONFIG_ACER_WMI) += acer-wmi.o diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index f57dd282a002..2e237bad4995 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -29,6 +29,7 @@ #include <linux/mm.h> #include <linux/i8042.h> #include <linux/debugfs.h> +#include <linux/dell-led.h> #include <linux/seq_file.h> #include <acpi/video.h> #include "dell-rbtn.h" @@ -42,6 +43,8 @@ #define KBD_LED_AUTO_50_TOKEN 0x02EB #define KBD_LED_AUTO_75_TOKEN 0x02EC #define KBD_LED_AUTO_100_TOKEN 0x02F6 +#define GLOBAL_MIC_MUTE_ENABLE 0x0364 +#define GLOBAL_MIC_MUTE_DISABLE 0x0365 struct quirk_entry { u8 touchpad_led; @@ -1978,6 +1981,31 @@ static void kbd_led_exit(void) led_classdev_unregister(&kbd_led); } +int dell_micmute_led_set(int state) +{ + struct calling_interface_buffer *buffer; + struct calling_interface_token *token; + + if (state == 0) + token = dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE); + else if (state == 1) + token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE); + else + return -EINVAL; + + if (!token) + return -ENODEV; + + buffer = dell_smbios_get_buffer(); + buffer->input[0] = token->location; + buffer->input[1] = token->value; + dell_smbios_send_request(1, 0); + dell_smbios_release_buffer(); + + return state; +} +EXPORT_SYMBOL_GPL(dell_micmute_led_set); + static int __init dell_init(void) { struct calling_interface_buffer *buffer; diff --git a/drivers/leds/dell-led.c b/drivers/platform/x86/dell-wmi-led.c index b3d6e9c15cf9..a0c7e99530ef 100644 --- a/drivers/leds/dell-led.c +++ b/drivers/platform/x86/dell-wmi-led.c @@ -1,6 +1,4 @@ /* - * dell_led.c - Dell LED Driver - * * Copyright (C) 2010 Dell Inc. * Louis Davis <louis_davis@dell.com> * Jim Dailey <jim_dailey@dell.com> @@ -15,16 +13,12 @@ #include <linux/leds.h> #include <linux/slab.h> #include <linux/module.h> -#include <linux/dmi.h> -#include <linux/dell-led.h> -#include "../platform/x86/dell-smbios.h" MODULE_AUTHOR("Louis Davis/Jim Dailey"); MODULE_DESCRIPTION("Dell LED Control Driver"); MODULE_LICENSE("GPL"); #define DELL_LED_BIOS_GUID "F6E4FE6E-909D-47cb-8BAB-C9F6F2F8D396" -#define DELL_APP_GUID "A80593CE-A997-11DA-B012-B622A1EF5492" MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID); /* Error Result Codes: */ @@ -43,53 +37,6 @@ MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID); #define CMD_LED_OFF 17 #define CMD_LED_BLINK 18 -#define GLOBAL_MIC_MUTE_ENABLE 0x364 -#define GLOBAL_MIC_MUTE_DISABLE 0x365 - -static int dell_micmute_led_set(int state) -{ - struct calling_interface_buffer *buffer; - struct calling_interface_token *token; - - if (!wmi_has_guid(DELL_APP_GUID)) - return -ENODEV; - - if (state == 0) - token = dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE); - else if (state == 1) - token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE); - else - return -EINVAL; - - if (!token) - return -ENODEV; - - buffer = dell_smbios_get_buffer(); - buffer->input[0] = token->location; - buffer->input[1] = token->value; - dell_smbios_send_request(1, 0); - dell_smbios_release_buffer(); - - return state; -} - -int dell_app_wmi_led_set(int whichled, int on) -{ - int state = 0; - - switch (whichled) { - case DELL_LED_MICMUTE: - state = dell_micmute_led_set(on); - break; - default: - pr_warn("led type %x is not supported\n", whichled); - break; - } - - return state; -} -EXPORT_SYMBOL_GPL(dell_app_wmi_led_set); - struct bios_args { unsigned char length; unsigned char result_code; @@ -99,37 +46,29 @@ struct bios_args { unsigned char off_time; }; -static int dell_led_perform_fn(u8 length, - u8 result_code, - u8 device_id, - u8 command, - u8 on_time, - u8 off_time) +static int dell_led_perform_fn(u8 length, u8 result_code, u8 device_id, + u8 command, u8 on_time, u8 off_time) { - struct bios_args *bios_return; - u8 return_code; - union acpi_object *obj; struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; + struct bios_args *bios_return; struct acpi_buffer input; + union acpi_object *obj; acpi_status status; + u8 return_code; - struct bios_args args; - args.length = length; - args.result_code = result_code; - args.device_id = device_id; - args.command = command; - args.on_time = on_time; - args.off_time = off_time; + struct bios_args args = { + .length = length, + .result_code = result_code, + .device_id = device_id, + .command = command, + .on_time = on_time, + .off_time = off_time + }; input.length = sizeof(struct bios_args); input.pointer = &args; - status = wmi_evaluate_method(DELL_LED_BIOS_GUID, - 1, - 1, - &input, - &output); - + status = wmi_evaluate_method(DELL_LED_BIOS_GUID, 1, 1, &input, &output); if (ACPI_FAILURE(status)) return status; @@ -137,7 +76,7 @@ static int dell_led_perform_fn(u8 length, if (!obj) return -EINVAL; - else if (obj->type != ACPI_TYPE_BUFFER) { + if (obj->type != ACPI_TYPE_BUFFER) { kfree(obj); return -EINVAL; } @@ -170,8 +109,7 @@ static int led_off(void) 0); /* not used */ } -static int led_blink(unsigned char on_eighths, - unsigned char off_eighths) +static int led_blink(unsigned char on_eighths, unsigned char off_eighths) { return dell_led_perform_fn(5, /* Length of command */ INTERFACE_ERROR, /* Init to INTERFACE_ERROR */ @@ -182,7 +120,7 @@ static int led_blink(unsigned char on_eighths, } static void dell_led_set(struct led_classdev *led_cdev, - enum led_brightness value) + enum led_brightness value) { if (value == LED_OFF) led_off(); @@ -191,27 +129,22 @@ static void dell_led_set(struct led_classdev *led_cdev, } static int dell_led_blink(struct led_classdev *led_cdev, - unsigned long *delay_on, - unsigned long *delay_off) + unsigned long *delay_on, unsigned long *delay_off) { unsigned long on_eighths; unsigned long off_eighths; - /* The Dell LED delay is based on 125ms intervals. - Need to round up to next interval. */ + /* + * The Dell LED delay is based on 125ms intervals. + * Need to round up to next interval. + */ - on_eighths = (*delay_on + 124) / 125; - if (0 == on_eighths) - on_eighths = 1; - if (on_eighths > 255) - on_eighths = 255; + on_eighths = DIV_ROUND_UP(*delay_on, 125); + on_eighths = clamp_t(unsigned long, on_eighths, 1, 255); *delay_on = on_eighths * 125; - off_eighths = (*delay_off + 124) / 125; - if (0 == off_eighths) - off_eighths = 1; - if (off_eighths > 255) - off_eighths = 255; + off_eighths = DIV_ROUND_UP(*delay_off, 125); + off_eighths = clamp_t(unsigned long, off_eighths, 1, 255); *delay_off = off_eighths * 125; led_blink(on_eighths, off_eighths); @@ -232,29 +165,21 @@ static int __init dell_led_init(void) { int error = 0; - if (!wmi_has_guid(DELL_LED_BIOS_GUID) && !wmi_has_guid(DELL_APP_GUID)) + if (!wmi_has_guid(DELL_LED_BIOS_GUID)) return -ENODEV; - if (wmi_has_guid(DELL_LED_BIOS_GUID)) { - error = led_off(); - if (error != 0) - return -ENODEV; - - error = led_classdev_register(NULL, &dell_led); - } + error = led_off(); + if (error != 0) + return -ENODEV; - return error; + return led_classdev_register(NULL, &dell_led); } static void __exit dell_led_exit(void) { - int error = 0; + led_classdev_unregister(&dell_led); - if (wmi_has_guid(DELL_LED_BIOS_GUID)) { - error = led_off(); - if (error == 0) - led_classdev_unregister(&dell_led); - } + led_off(); } module_init(dell_led_init); diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c index 56bce1908be2..85812521b6ba 100644 --- a/drivers/power/avs/rockchip-io-domain.c +++ b/drivers/power/avs/rockchip-io-domain.c @@ -43,6 +43,10 @@ #define RK3288_SOC_CON2_FLASH0 BIT(7) #define RK3288_SOC_FLASH_SUPPLY_NUM 2 +#define RK3328_SOC_CON4 0x410 +#define RK3328_SOC_CON4_VCCIO2 BIT(7) +#define RK3328_SOC_VCCIO2_SUPPLY_NUM 1 + #define RK3368_SOC_CON15 0x43c #define RK3368_SOC_CON15_FLASH0 BIT(14) #define RK3368_SOC_FLASH_SUPPLY_NUM 2 @@ -166,6 +170,25 @@ static void rk3288_iodomain_init(struct rockchip_iodomain *iod) dev_warn(iod->dev, "couldn't update flash0 ctrl\n"); } +static void rk3328_iodomain_init(struct rockchip_iodomain *iod) +{ + int ret; + u32 val; + + /* if no vccio2 supply we should leave things alone */ + if (!iod->supplies[RK3328_SOC_VCCIO2_SUPPLY_NUM].reg) + return; + + /* + * set vccio2 iodomain to also use this framework + * instead of a special gpio. + */ + val = RK3328_SOC_CON4_VCCIO2 | (RK3328_SOC_CON4_VCCIO2 << 16); + ret = regmap_write(iod->grf, RK3328_SOC_CON4, val); + if (ret < 0) + dev_warn(iod->dev, "couldn't update vccio2 vsel ctrl\n"); +} + static void rk3368_iodomain_init(struct rockchip_iodomain *iod) { int ret; @@ -247,6 +270,20 @@ static const struct rockchip_iodomain_soc_data soc_data_rk3288 = { .init = rk3288_iodomain_init, }; +static const struct rockchip_iodomain_soc_data soc_data_rk3328 = { + .grf_offset = 0x410, + .supply_names = { + "vccio1", + "vccio2", + "vccio3", + "vccio4", + "vccio5", + "vccio6", + "pmuio", + }, + .init = rk3328_iodomain_init, +}; + static const struct rockchip_iodomain_soc_data soc_data_rk3368 = { .grf_offset = 0x900, .supply_names = { @@ -312,6 +349,10 @@ static const struct of_device_id rockchip_iodomain_match[] = { .data = (void *)&soc_data_rk3288 }, { + .compatible = "rockchip,rk3328-io-voltage-domain", + .data = (void *)&soc_data_rk3328 + }, + { .compatible = "rockchip,rk3368-io-voltage-domain", .data = (void *)&soc_data_rk3368 }, diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index b8cacccf18c8..13f1714cf6f7 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -67,6 +67,15 @@ config POWER_RESET_BRCMSTB Say Y here if you have a Broadcom STB board and you wish to have restart support. +config POWER_RESET_GEMINI_POWEROFF + bool "Cortina Gemini power-off driver" + depends on ARCH_GEMINI || COMPILE_TEST + depends on OF && HAS_IOMEM + default ARCH_GEMINI + help + This driver supports turning off the Cortina Gemini SoC. + Select this if you're building a kernel with Gemini SoC support. + config POWER_RESET_GPIO bool "GPIO power-off driver" depends on OF_GPIO diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile index 11dae3b56ff9..58cf5b30559f 100644 --- a/drivers/power/reset/Makefile +++ b/drivers/power/reset/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_POWER_RESET_AT91_SAMA5D2_SHDWC) += at91-sama5d2_shdwc.o obj-$(CONFIG_POWER_RESET_AXXIA) += axxia-reset.o obj-$(CONFIG_POWER_RESET_BRCMKONA) += brcm-kona-reset.o obj-$(CONFIG_POWER_RESET_BRCMSTB) += brcmstb-reboot.o +obj-$(CONFIG_POWER_RESET_GEMINI_POWEROFF) += gemini-poweroff.o obj-$(CONFIG_POWER_RESET_GPIO) += gpio-poweroff.o obj-$(CONFIG_POWER_RESET_GPIO_RESTART) += gpio-restart.o obj-$(CONFIG_POWER_RESET_HISI) += hisi-reboot.o diff --git a/drivers/power/reset/gemini-poweroff.c b/drivers/power/reset/gemini-poweroff.c new file mode 100644 index 000000000000..de878fd26f27 --- /dev/null +++ b/drivers/power/reset/gemini-poweroff.c @@ -0,0 +1,160 @@ +/* + * Gemini power management controller + * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org> + * + * Inspired by code from the SL3516 board support by Jason Lee + * Inspired by code from Janos Laube <janos.dev@gmail.com> + */ +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/bitops.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/reboot.h> + +#define GEMINI_PWC_ID 0x00010500 +#define GEMINI_PWC_IDREG 0x00 +#define GEMINI_PWC_CTRLREG 0x04 +#define GEMINI_PWC_STATREG 0x08 + +#define GEMINI_CTRL_SHUTDOWN BIT(0) +#define GEMINI_CTRL_ENABLE BIT(1) +#define GEMINI_CTRL_IRQ_CLR BIT(2) + +#define GEMINI_STAT_CIR BIT(4) +#define GEMINI_STAT_RTC BIT(5) +#define GEMINI_STAT_POWERBUTTON BIT(6) + +struct gemini_powercon { + struct device *dev; + void __iomem *base; +}; + +static irqreturn_t gemini_powerbutton_interrupt(int irq, void *data) +{ + struct gemini_powercon *gpw = data; + u32 val; + + /* ACK the IRQ */ + val = readl(gpw->base + GEMINI_PWC_CTRLREG); + val |= GEMINI_CTRL_IRQ_CLR; + writel(val, gpw->base + GEMINI_PWC_CTRLREG); + + val = readl(gpw->base + GEMINI_PWC_STATREG); + val &= 0x70U; + switch (val) { + case GEMINI_STAT_CIR: + dev_info(gpw->dev, "infrared poweroff\n"); + orderly_poweroff(true); + break; + case GEMINI_STAT_RTC: + dev_info(gpw->dev, "RTC poweroff\n"); + orderly_poweroff(true); + break; + case GEMINI_STAT_POWERBUTTON: + dev_info(gpw->dev, "poweroff button pressed\n"); + orderly_poweroff(true); + break; + default: + dev_info(gpw->dev, "other power management IRQ\n"); + break; + } + + return IRQ_HANDLED; +} + +/* This callback needs this static local as it has void as argument */ +static struct gemini_powercon *gpw_poweroff; + +static void gemini_poweroff(void) +{ + struct gemini_powercon *gpw = gpw_poweroff; + u32 val; + + dev_crit(gpw->dev, "Gemini power off\n"); + val = readl(gpw->base + GEMINI_PWC_CTRLREG); + val |= GEMINI_CTRL_ENABLE | GEMINI_CTRL_IRQ_CLR; + writel(val, gpw->base + GEMINI_PWC_CTRLREG); + + val &= ~GEMINI_CTRL_ENABLE; + val |= GEMINI_CTRL_SHUTDOWN; + writel(val, gpw->base + GEMINI_PWC_CTRLREG); +} + +static int gemini_poweroff_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *res; + struct gemini_powercon *gpw; + u32 val; + int irq; + int ret; + + gpw = devm_kzalloc(dev, sizeof(*gpw), GFP_KERNEL); + if (!gpw) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + gpw->base = devm_ioremap_resource(dev, res); + if (IS_ERR(gpw->base)) + return PTR_ERR(gpw->base); + + irq = platform_get_irq(pdev, 0); + if (!irq) + return -EINVAL; + + gpw->dev = dev; + + val = readl(gpw->base + GEMINI_PWC_IDREG); + val &= 0xFFFFFF00U; + if (val != GEMINI_PWC_ID) { + dev_err(dev, "wrong power controller ID: %08x\n", + val); + return -ENODEV; + } + + /* Clear the power management IRQ */ + val = readl(gpw->base + GEMINI_PWC_CTRLREG); + val |= GEMINI_CTRL_IRQ_CLR; + writel(val, gpw->base + GEMINI_PWC_CTRLREG); + + ret = devm_request_irq(dev, irq, gemini_powerbutton_interrupt, 0, + "poweroff", gpw); + if (ret) + return ret; + + pm_power_off = gemini_poweroff; + gpw_poweroff = gpw; + + /* + * Enable the power controller. This is crucial on Gemini + * systems: if this is not done, pressing the power button + * will result in unconditional poweroff without any warning. + * This makes the kernel handle the poweroff. + */ + val = readl(gpw->base + GEMINI_PWC_CTRLREG); + val |= GEMINI_CTRL_ENABLE; + writel(val, gpw->base + GEMINI_PWC_CTRLREG); + + dev_info(dev, "Gemini poweroff driver registered\n"); + + return 0; +} + +static const struct of_device_id gemini_poweroff_of_match[] = { + { + .compatible = "cortina,gemini-power-controller", + }, + {} +}; + +static struct platform_driver gemini_poweroff_driver = { + .probe = gemini_poweroff_probe, + .driver = { + .name = "gemini-poweroff", + .of_match_table = gemini_poweroff_of_match, + }, +}; +builtin_platform_driver(gemini_poweroff_driver); diff --git a/drivers/power/reset/syscon-poweroff.c b/drivers/power/reset/syscon-poweroff.c index b68338399e5e..f9f1cb54fbf9 100644 --- a/drivers/power/reset/syscon-poweroff.c +++ b/drivers/power/reset/syscon-poweroff.c @@ -28,12 +28,13 @@ static struct regmap *map; static u32 offset; +static u32 value; static u32 mask; static void syscon_poweroff(void) { /* Issue the poweroff */ - regmap_write(map, offset, mask); + regmap_update_bits(map, offset, mask, value); mdelay(1000); @@ -43,6 +44,7 @@ static void syscon_poweroff(void) static int syscon_poweroff_probe(struct platform_device *pdev) { char symname[KSYM_NAME_LEN]; + int mask_err, value_err; map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "regmap"); if (IS_ERR(map)) { @@ -55,11 +57,22 @@ static int syscon_poweroff_probe(struct platform_device *pdev) return -EINVAL; } - if (of_property_read_u32(pdev->dev.of_node, "mask", &mask)) { - dev_err(&pdev->dev, "unable to read 'mask'"); + value_err = of_property_read_u32(pdev->dev.of_node, "value", &value); + mask_err = of_property_read_u32(pdev->dev.of_node, "mask", &mask); + if (value_err && mask_err) { + dev_err(&pdev->dev, "unable to read 'value' and 'mask'"); return -EINVAL; } + if (value_err) { + /* support old binding */ + value = mask; + mask = 0xFFFFFFFF; + } else if (mask_err) { + /* support value without mask*/ + mask = 0xFFFFFFFF; + } + if (pm_power_off) { lookup_symbol_name((ulong)pm_power_off, symname); dev_err(&pdev->dev, diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig index da54ac88f068..da922756149f 100644 --- a/drivers/power/supply/Kconfig +++ b/drivers/power/supply/Kconfig @@ -117,6 +117,12 @@ config BATTERY_DS2782 Say Y here to enable support for the DS2782/DS2786 standalone battery gas-gauge. +config BATTERY_LEGO_EV3 + tristate "LEGO MINDSTORMS EV3 battery" + depends on OF && IIO && GPIOLIB + help + Say Y here to enable support for the LEGO MINDSTORMS EV3 battery. + config BATTERY_PMU tristate "Apple PMU battery" depends on PPC32 && ADB_PMU @@ -317,6 +323,14 @@ config BATTERY_RX51 Say Y here to enable support for battery information on Nokia RX-51, also known as N900 tablet. +config CHARGER_CPCAP + tristate "CPCAP PMIC Charger Driver" + depends on MFD_CPCAP && IIO + default MFD_CPCAP + help + Say Y to enable support for CPCAP PMIC charger driver for Motorola + mobile devices such as Droid 4. + config CHARGER_ISP1704 tristate "ISP1704 USB Charger Detection" depends on USB_PHY @@ -438,6 +452,7 @@ config CHARGER_BQ2415X config CHARGER_BQ24190 tristate "TI BQ24190 battery charger driver" depends on I2C + depends on EXTCON depends on GPIOLIB || COMPILE_TEST help Say Y to enable support for the TI BQ24190 battery charger. diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile index 3789a2c06fdf..39fc733e6cc4 100644 --- a/drivers/power/supply/Makefile +++ b/drivers/power/supply/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_BATTERY_DS2781) += ds2781_battery.o obj-$(CONFIG_BATTERY_DS2782) += ds2782_battery.o obj-$(CONFIG_BATTERY_GAUGE_LTC2941) += ltc2941-battery-gauge.o obj-$(CONFIG_BATTERY_GOLDFISH) += goldfish_battery.o +obj-$(CONFIG_BATTERY_LEGO_EV3) += lego_ev3_battery.o obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o @@ -51,6 +52,7 @@ obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o obj-$(CONFIG_BATTERY_JZ4740) += jz4740-battery.o obj-$(CONFIG_BATTERY_RX51) += rx51_battery.o obj-$(CONFIG_AB8500_BM) += ab8500_bmdata.o ab8500_charger.o ab8500_fg.o ab8500_btemp.o abx500_chargalg.o pm2301_charger.o +obj-$(CONFIG_CHARGER_CPCAP) += cpcap-charger.o obj-$(CONFIG_CHARGER_ISP1704) += isp1704_charger.o obj-$(CONFIG_CHARGER_MAX8903) += max8903_charger.o obj-$(CONFIG_CHARGER_TWL4030) += twl4030_charger.o diff --git a/drivers/power/supply/ab8500_bmdata.c b/drivers/power/supply/ab8500_bmdata.c index d29864533093..8c49586015d0 100644 --- a/drivers/power/supply/ab8500_bmdata.c +++ b/drivers/power/supply/ab8500_bmdata.c @@ -430,10 +430,10 @@ static const struct abx500_maxim_parameters ab8500_maxi_params = { }; static const struct abx500_maxim_parameters abx540_maxi_params = { - .ena_maxi = true, - .chg_curr = 3000, - .wait_cycles = 10, - .charger_curr_step = 200, + .ena_maxi = true, + .chg_curr = 3000, + .wait_cycles = 10, + .charger_curr_step = 200, }; static const struct abx500_bm_charger_parameters chg = { diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index 6be2fe27bb07..d51ebd1da65e 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -14,6 +14,7 @@ * GNU General Public License for more details. */ +#include <linux/acpi.h> #include <linux/module.h> #include <linux/device.h> #include <linux/regmap.h> @@ -113,7 +114,8 @@ #define ILIM_3000MA 3000 /* 3000mA */ #define AXP288_EXTCON_DEV_NAME "axp288_extcon" -#define USB_HOST_EXTCON_DEV_NAME "INT3496:00" +#define USB_HOST_EXTCON_HID "INT3496" +#define USB_HOST_EXTCON_NAME "INT3496:00" static const unsigned int cable_ids[] = { EXTCON_CHG_USB_SDP, EXTCON_CHG_USB_CDP, EXTCON_CHG_USB_DCP }; @@ -807,10 +809,14 @@ static int axp288_charger_probe(struct platform_device *pdev) return -EPROBE_DEFER; } - info->otg.cable = extcon_get_extcon_dev(USB_HOST_EXTCON_DEV_NAME); - if (info->otg.cable == NULL) { - dev_dbg(dev, "EXTCON_USB_HOST is not ready, probe deferred\n"); - return -EPROBE_DEFER; + if (acpi_dev_present(USB_HOST_EXTCON_HID, NULL, -1)) { + info->otg.cable = extcon_get_extcon_dev(USB_HOST_EXTCON_NAME); + if (info->otg.cable == NULL) { + dev_dbg(dev, "EXTCON_USB_HOST is not ready, probe deferred\n"); + return -EPROBE_DEFER; + } + dev_info(&pdev->dev, + "Using " USB_HOST_EXTCON_HID " extcon for usb-id\n"); } platform_set_drvdata(pdev, info); @@ -849,13 +855,15 @@ static int axp288_charger_probe(struct platform_device *pdev) /* Register for OTG notification */ INIT_WORK(&info->otg.work, axp288_charger_otg_evt_worker); info->otg.id_nb.notifier_call = axp288_charger_handle_otg_evt; - ret = devm_extcon_register_notifier(&pdev->dev, info->otg.cable, + if (info->otg.cable) { + ret = devm_extcon_register_notifier(&pdev->dev, info->otg.cable, EXTCON_USB_HOST, &info->otg.id_nb); - if (ret) { - dev_err(dev, "failed to register EXTCON_USB_HOST notifier\n"); - return ret; + if (ret) { + dev_err(dev, "failed to register EXTCON_USB_HOST notifier\n"); + return ret; + } + schedule_work(&info->otg.work); } - schedule_work(&info->otg.work); /* Register charger interrupts */ for (i = 0; i < CHRG_INTR_END; i++) { diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index a4f08492abeb..bd9e5c3d8cc2 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -11,16 +11,15 @@ #include <linux/module.h> #include <linux/interrupt.h> #include <linux/delay.h> +#include <linux/extcon.h> #include <linux/of_irq.h> #include <linux/of_device.h> #include <linux/pm_runtime.h> #include <linux/power_supply.h> +#include <linux/workqueue.h> #include <linux/gpio.h> #include <linux/i2c.h> -#include <linux/power/bq24190_charger.h> - - #define BQ24190_MANUFACTURER "Texas Instruments" #define BQ24190_REG_ISC 0x00 /* Input Source Control */ @@ -39,6 +38,9 @@ #define BQ24190_REG_POC_WDT_RESET_SHIFT 6 #define BQ24190_REG_POC_CHG_CONFIG_MASK (BIT(5) | BIT(4)) #define BQ24190_REG_POC_CHG_CONFIG_SHIFT 4 +#define BQ24190_REG_POC_CHG_CONFIG_DISABLE 0x0 +#define BQ24190_REG_POC_CHG_CONFIG_CHARGE 0x1 +#define BQ24190_REG_POC_CHG_CONFIG_OTG 0x2 #define BQ24190_REG_POC_SYS_MIN_MASK (BIT(3) | BIT(2) | BIT(1)) #define BQ24190_REG_POC_SYS_MIN_SHIFT 1 #define BQ24190_REG_POC_BOOST_LIM_MASK BIT(0) @@ -151,10 +153,12 @@ struct bq24190_dev_info { struct device *dev; struct power_supply *charger; struct power_supply *battery; + struct extcon_dev *extcon; + struct notifier_block extcon_nb; + struct delayed_work extcon_work; char model_name[I2C_NAME_SIZE]; - kernel_ulong_t model; - unsigned int gpio_int; - unsigned int irq; + bool initialized; + bool irq_event; struct mutex f_reg_lock; u8 f_reg; u8 ss_reg; @@ -168,6 +172,12 @@ struct bq24190_dev_info { * number at that index in the array is the real-world value that it * represents. */ + +/* REG00[2:0] (IINLIM) in uAh */ +static const int bq24190_isc_iinlim_values[] = { + 100000, 150000, 500000, 900000, 1200000, 1500000, 2000000, 3000000 +}; + /* REG02[7:2] (ICHG) in uAh */ static const int bq24190_ccc_ichg_values[] = { 512000, 576000, 640000, 704000, 768000, 832000, 896000, 960000, @@ -418,6 +428,7 @@ static ssize_t bq24190_sysfs_show(struct device *dev, struct power_supply *psy = dev_get_drvdata(dev); struct bq24190_dev_info *bdi = power_supply_get_drvdata(psy); struct bq24190_sysfs_field_info *info; + ssize_t count; int ret; u8 v; @@ -425,11 +436,20 @@ static ssize_t bq24190_sysfs_show(struct device *dev, if (!info) return -EINVAL; + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; + ret = bq24190_read_mask(bdi, info->reg, info->mask, info->shift, &v); if (ret) - return ret; + count = ret; + else + count = scnprintf(buf, PAGE_SIZE, "%hhx\n", v); + + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); - return scnprintf(buf, PAGE_SIZE, "%hhx\n", v); + return count; } static ssize_t bq24190_sysfs_store(struct device *dev, @@ -449,9 +469,16 @@ static ssize_t bq24190_sysfs_store(struct device *dev, if (ret < 0) return ret; + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; + ret = bq24190_write_mask(bdi, info->reg, info->mask, info->shift, v); if (ret) - return ret; + count = ret; + + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); return count; } @@ -523,16 +550,13 @@ static int bq24190_register_reset(struct bq24190_dev_info *bdi) if (ret < 0) return ret; - if (!v) - break; + if (v == 0) + return 0; - udelay(10); + usleep_range(100, 200); } while (--limit); - if (!limit) - return -EIO; - - return 0; + return -EIO; } /* Charger power supply property routines */ @@ -793,7 +817,9 @@ static int bq24190_charger_get_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); - pm_runtime_get_sync(bdi->dev); + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; switch (psp) { case POWER_SUPPLY_PROP_CHARGE_TYPE: @@ -833,7 +859,9 @@ static int bq24190_charger_get_property(struct power_supply *psy, ret = -ENODATA; } - pm_runtime_put_sync(bdi->dev); + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + return ret; } @@ -846,7 +874,9 @@ static int bq24190_charger_set_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); - pm_runtime_get_sync(bdi->dev); + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; switch (psp) { case POWER_SUPPLY_PROP_CHARGE_TYPE: @@ -862,7 +892,9 @@ static int bq24190_charger_set_property(struct power_supply *psy, ret = -EINVAL; } - pm_runtime_put_sync(bdi->dev); + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + return ret; } @@ -1063,7 +1095,9 @@ static int bq24190_battery_get_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); - pm_runtime_get_sync(bdi->dev); + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; switch (psp) { case POWER_SUPPLY_PROP_STATUS: @@ -1091,7 +1125,9 @@ static int bq24190_battery_get_property(struct power_supply *psy, ret = -ENODATA; } - pm_runtime_put_sync(bdi->dev); + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + return ret; } @@ -1104,7 +1140,9 @@ static int bq24190_battery_set_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); - pm_runtime_get_sync(bdi->dev); + ret = pm_runtime_get_sync(bdi->dev); + if (ret < 0) + return ret; switch (psp) { case POWER_SUPPLY_PROP_ONLINE: @@ -1117,7 +1155,9 @@ static int bq24190_battery_set_property(struct power_supply *psy, ret = -EINVAL; } - pm_runtime_put_sync(bdi->dev); + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + return ret; } @@ -1157,9 +1197,8 @@ static const struct power_supply_desc bq24190_battery_desc = { .property_is_writeable = bq24190_battery_property_is_writeable, }; -static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) +static void bq24190_check_status(struct bq24190_dev_info *bdi) { - struct bq24190_dev_info *bdi = data; const u8 battery_mask_ss = BQ24190_REG_SS_CHRG_STAT_MASK; const u8 battery_mask_f = BQ24190_REG_F_BAT_FAULT_MASK | BQ24190_REG_F_NTC_FAULT_MASK; @@ -1167,12 +1206,10 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) u8 ss_reg = 0, f_reg = 0; int i, ret; - pm_runtime_get_sync(bdi->dev); - ret = bq24190_read(bdi, BQ24190_REG_SS, &ss_reg); if (ret < 0) { dev_err(bdi->dev, "Can't read SS reg: %d\n", ret); - goto out; + return; } i = 0; @@ -1180,12 +1217,17 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) ret = bq24190_read(bdi, BQ24190_REG_F, &f_reg); if (ret < 0) { dev_err(bdi->dev, "Can't read F reg: %d\n", ret); - goto out; + return; } } while (f_reg && ++i < 2); + /* ignore over/under voltage fault after disconnect */ + if (f_reg == (1 << BQ24190_REG_F_CHRG_FAULT_SHIFT) && + !(ss_reg & BQ24190_REG_SS_PG_STAT_MASK)) + f_reg = 0; + if (f_reg != bdi->f_reg) { - dev_info(bdi->dev, + dev_warn(bdi->dev, "Fault: boost %d, charge %d, battery %d, ntc %d\n", !!(f_reg & BQ24190_REG_F_BOOST_FAULT_MASK), !!(f_reg & BQ24190_REG_F_CHRG_FAULT_MASK), @@ -1229,90 +1271,126 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) if (alert_battery) power_supply_changed(bdi->battery); -out: - pm_runtime_put_sync(bdi->dev); - dev_dbg(bdi->dev, "ss_reg: 0x%02x, f_reg: 0x%02x\n", ss_reg, f_reg); +} + +static irqreturn_t bq24190_irq_handler_thread(int irq, void *data) +{ + struct bq24190_dev_info *bdi = data; + int error; + + bdi->irq_event = true; + error = pm_runtime_get_sync(bdi->dev); + if (error < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); + pm_runtime_put_noidle(bdi->dev); + return IRQ_NONE; + } + bq24190_check_status(bdi); + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + bdi->irq_event = false; return IRQ_HANDLED; } -static int bq24190_hw_init(struct bq24190_dev_info *bdi) +static void bq24190_extcon_work(struct work_struct *work) { + struct bq24190_dev_info *bdi = + container_of(work, struct bq24190_dev_info, extcon_work.work); + int error, iinlim = 0; u8 v; - int ret; - - pm_runtime_get_sync(bdi->dev); - /* First check that the device really is what its supposed to be */ - ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS, - BQ24190_REG_VPRS_PN_MASK, - BQ24190_REG_VPRS_PN_SHIFT, - &v); - if (ret < 0) - goto out; + error = pm_runtime_get_sync(bdi->dev); + if (error < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); + pm_runtime_put_noidle(bdi->dev); + return; + } - if (v != bdi->model) { - ret = -ENODEV; - goto out; + if (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_SDP) == 1) + iinlim = 500000; + else if (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_CDP) == 1 || + extcon_get_state(bdi->extcon, EXTCON_CHG_USB_ACA) == 1) + iinlim = 1500000; + else if (extcon_get_state(bdi->extcon, EXTCON_CHG_USB_DCP) == 1) + iinlim = 2000000; + + if (iinlim) { + error = bq24190_set_field_val(bdi, BQ24190_REG_ISC, + BQ24190_REG_ISC_IINLIM_MASK, + BQ24190_REG_ISC_IINLIM_SHIFT, + bq24190_isc_iinlim_values, + ARRAY_SIZE(bq24190_isc_iinlim_values), + iinlim); + if (error < 0) + dev_err(bdi->dev, "Can't set IINLIM: %d\n", error); } - ret = bq24190_register_reset(bdi); - if (ret < 0) - goto out; + /* if no charger found and in USB host mode, set OTG 5V boost, else normal */ + if (!iinlim && extcon_get_state(bdi->extcon, EXTCON_USB_HOST) == 1) + v = BQ24190_REG_POC_CHG_CONFIG_OTG; + else + v = BQ24190_REG_POC_CHG_CONFIG_CHARGE; - ret = bq24190_set_mode_host(bdi); - if (ret < 0) - goto out; + error = bq24190_write_mask(bdi, BQ24190_REG_POC, + BQ24190_REG_POC_CHG_CONFIG_MASK, + BQ24190_REG_POC_CHG_CONFIG_SHIFT, + v); + if (error < 0) + dev_err(bdi->dev, "Can't set CHG_CONFIG: %d\n", error); - ret = bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg); -out: - pm_runtime_put_sync(bdi->dev); - return ret; + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); } -#ifdef CONFIG_OF -static int bq24190_setup_dt(struct bq24190_dev_info *bdi) +static int bq24190_extcon_event(struct notifier_block *nb, unsigned long event, + void *param) { - bdi->irq = irq_of_parse_and_map(bdi->dev->of_node, 0); - if (bdi->irq <= 0) - return -1; + struct bq24190_dev_info *bdi = + container_of(nb, struct bq24190_dev_info, extcon_nb); - return 0; -} -#else -static int bq24190_setup_dt(struct bq24190_dev_info *bdi) -{ - return -1; + /* + * The Power-Good detection may take up to 220ms, sometimes + * the external charger detection is quicker, and the bq24190 will + * reset to iinlim based on its own charger detection (which is not + * hooked up when using external charger detection) resulting in + * a too low default 500mA iinlim. Delay applying the extcon value + * for 300ms to avoid this. + */ + queue_delayed_work(system_wq, &bdi->extcon_work, msecs_to_jiffies(300)); + + return NOTIFY_OK; } -#endif -static int bq24190_setup_pdata(struct bq24190_dev_info *bdi, - struct bq24190_platform_data *pdata) +static int bq24190_hw_init(struct bq24190_dev_info *bdi) { + u8 v; int ret; - if (!gpio_is_valid(pdata->gpio_int)) - return -1; - - ret = gpio_request(pdata->gpio_int, dev_name(bdi->dev)); + /* First check that the device really is what its supposed to be */ + ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS, + BQ24190_REG_VPRS_PN_MASK, + BQ24190_REG_VPRS_PN_SHIFT, + &v); if (ret < 0) - return -1; + return ret; - ret = gpio_direction_input(pdata->gpio_int); - if (ret < 0) - goto out; + if (v != BQ24190_REG_VPRS_PN_24190 && + v != BQ24190_REG_VPRS_PN_24192I) { + dev_err(bdi->dev, "Error unknown model: 0x%02x\n", v); + return -ENODEV; + } - bdi->irq = gpio_to_irq(pdata->gpio_int); - if (!bdi->irq) - goto out; + ret = bq24190_register_reset(bdi); + if (ret < 0) + return ret; - bdi->gpio_int = pdata->gpio_int; - return 0; + ret = bq24190_set_mode_host(bdi); + if (ret < 0) + return ret; -out: - gpio_free(pdata->gpio_int); - return -1; + return bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg); } static int bq24190_probe(struct i2c_client *client, @@ -1320,9 +1398,9 @@ static int bq24190_probe(struct i2c_client *client, { struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); struct device *dev = &client->dev; - struct bq24190_platform_data *pdata = client->dev.platform_data; struct power_supply_config charger_cfg = {}, battery_cfg = {}; struct bq24190_dev_info *bdi; + const char *name; int ret; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { @@ -1338,7 +1416,6 @@ static int bq24190_probe(struct i2c_client *client, bdi->client = client; bdi->dev = dev; - bdi->model = id->driver_data; strncpy(bdi->model_name, id->name, I2C_NAME_SIZE); mutex_init(&bdi->f_reg_lock); bdi->f_reg = 0; @@ -1346,23 +1423,43 @@ static int bq24190_probe(struct i2c_client *client, i2c_set_clientdata(client, bdi); - if (dev->of_node) - ret = bq24190_setup_dt(bdi); - else - ret = bq24190_setup_pdata(bdi, pdata); - - if (ret) { + if (!client->irq) { dev_err(dev, "Can't get irq info\n"); return -EINVAL; } + /* + * Devicetree platforms should get extcon via phandle (not yet supported). + * On ACPI platforms, extcon clients may invoke us with: + * struct property_entry pe[] = + * { PROPERTY_ENTRY_STRING("extcon-name", client_name), ... }; + * struct i2c_board_info bi = + * { .type = "bq24190", .addr = 0x6b, .properties = pe, .irq = irq }; + * struct i2c_adapter ad = { ... }; + * i2c_add_adapter(&ad); + * i2c_new_device(&ad, &bi); + */ + if (device_property_read_string(dev, "extcon-name", &name) == 0) { + bdi->extcon = extcon_get_extcon_dev(name); + if (!bdi->extcon) + return -EPROBE_DEFER; + + dev_info(bdi->dev, "using extcon device %s\n", name); + } + pm_runtime_enable(dev); - pm_runtime_resume(dev); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, 600); + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + dev_err(dev, "pm_runtime_get failed: %i\n", ret); + goto out_pmrt; + } ret = bq24190_hw_init(bdi); if (ret < 0) { dev_err(dev, "Hardware init failed\n"); - goto out1; + goto out_pmrt; } charger_cfg.drv_data = bdi; @@ -1373,7 +1470,7 @@ static int bq24190_probe(struct i2c_client *client, if (IS_ERR(bdi->charger)) { dev_err(dev, "Can't register charger\n"); ret = PTR_ERR(bdi->charger); - goto out1; + goto out_pmrt; } battery_cfg.drv_data = bdi; @@ -1382,87 +1479,160 @@ static int bq24190_probe(struct i2c_client *client, if (IS_ERR(bdi->battery)) { dev_err(dev, "Can't register battery\n"); ret = PTR_ERR(bdi->battery); - goto out2; + goto out_charger; } ret = bq24190_sysfs_create_group(bdi); if (ret) { dev_err(dev, "Can't create sysfs entries\n"); - goto out3; + goto out_battery; } - ret = devm_request_threaded_irq(dev, bdi->irq, NULL, + bdi->initialized = true; + + ret = devm_request_threaded_irq(dev, client->irq, NULL, bq24190_irq_handler_thread, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "bq24190-charger", bdi); if (ret < 0) { dev_err(dev, "Can't set up irq handler\n"); - goto out4; + goto out_sysfs; + } + + if (bdi->extcon) { + INIT_DELAYED_WORK(&bdi->extcon_work, bq24190_extcon_work); + bdi->extcon_nb.notifier_call = bq24190_extcon_event; + ret = devm_extcon_register_notifier_all(dev, bdi->extcon, + &bdi->extcon_nb); + if (ret) { + dev_err(dev, "Can't register extcon\n"); + goto out_sysfs; + } + + /* Sync initial cable state */ + queue_delayed_work(system_wq, &bdi->extcon_work, 0); } + enable_irq_wake(client->irq); + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + return 0; -out4: +out_sysfs: bq24190_sysfs_remove_group(bdi); -out3: +out_battery: power_supply_unregister(bdi->battery); -out2: +out_charger: power_supply_unregister(bdi->charger); -out1: +out_pmrt: + pm_runtime_put_sync(dev); + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); - if (bdi->gpio_int) - gpio_free(bdi->gpio_int); return ret; } static int bq24190_remove(struct i2c_client *client) { struct bq24190_dev_info *bdi = i2c_get_clientdata(client); + int error; - pm_runtime_get_sync(bdi->dev); - bq24190_register_reset(bdi); - pm_runtime_put_sync(bdi->dev); + error = pm_runtime_get_sync(bdi->dev); + if (error < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); + pm_runtime_put_noidle(bdi->dev); + } + bq24190_register_reset(bdi); bq24190_sysfs_remove_group(bdi); power_supply_unregister(bdi->battery); power_supply_unregister(bdi->charger); + if (error >= 0) + pm_runtime_put_sync(bdi->dev); + pm_runtime_dont_use_autosuspend(bdi->dev); pm_runtime_disable(bdi->dev); - if (bdi->gpio_int) - gpio_free(bdi->gpio_int); + return 0; +} + +static __maybe_unused int bq24190_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct bq24190_dev_info *bdi = i2c_get_clientdata(client); + + if (!bdi->initialized) + return 0; + + dev_dbg(bdi->dev, "%s\n", __func__); + + return 0; +} + +static __maybe_unused int bq24190_runtime_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct bq24190_dev_info *bdi = i2c_get_clientdata(client); + + if (!bdi->initialized) + return 0; + + if (!bdi->irq_event) { + dev_dbg(bdi->dev, "checking events on possible wakeirq\n"); + bq24190_check_status(bdi); + } return 0; } -#ifdef CONFIG_PM_SLEEP -static int bq24190_pm_suspend(struct device *dev) +static __maybe_unused int bq24190_pm_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct bq24190_dev_info *bdi = i2c_get_clientdata(client); + int error; + + error = pm_runtime_get_sync(bdi->dev); + if (error < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); + pm_runtime_put_noidle(bdi->dev); + } - pm_runtime_get_sync(bdi->dev); bq24190_register_reset(bdi); - pm_runtime_put_sync(bdi->dev); + + if (error >= 0) { + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + } return 0; } -static int bq24190_pm_resume(struct device *dev) +static __maybe_unused int bq24190_pm_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct bq24190_dev_info *bdi = i2c_get_clientdata(client); + int error; bdi->f_reg = 0; bdi->ss_reg = BQ24190_REG_SS_VBUS_STAT_MASK; /* impossible state */ - pm_runtime_get_sync(bdi->dev); + error = pm_runtime_get_sync(bdi->dev); + if (error < 0) { + dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); + pm_runtime_put_noidle(bdi->dev); + } + bq24190_register_reset(bdi); bq24190_set_mode_host(bdi); bq24190_read(bdi, BQ24190_REG_SS, &bdi->ss_reg); - pm_runtime_put_sync(bdi->dev); + + if (error >= 0) { + pm_runtime_mark_last_busy(bdi->dev); + pm_runtime_put_autosuspend(bdi->dev); + } /* Things may have changed while suspended so alert upper layer */ power_supply_changed(bdi->charger); @@ -1470,17 +1640,16 @@ static int bq24190_pm_resume(struct device *dev) return 0; } -#endif -static SIMPLE_DEV_PM_OPS(bq24190_pm_ops, bq24190_pm_suspend, bq24190_pm_resume); +static const struct dev_pm_ops bq24190_pm_ops = { + SET_RUNTIME_PM_OPS(bq24190_runtime_suspend, bq24190_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(bq24190_pm_suspend, bq24190_pm_resume) +}; -/* - * Only support the bq24190 right now. The bq24192, bq24192i, and bq24193 - * are similar but not identical so the driver needs to be extended to - * support them. - */ static const struct i2c_device_id bq24190_i2c_ids[] = { - { "bq24190", BQ24190_REG_VPRS_PN_24190 }, + { "bq24190" }, + { "bq24192i" }, { }, }; MODULE_DEVICE_TABLE(i2c, bq24190_i2c_ids); diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index f993a55cde20..8e2c41ded171 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -723,7 +723,7 @@ static int bq25890_irq_probe(struct bq25890_device *bq) { struct gpio_desc *irq; - irq = devm_gpiod_get_index(bq->dev, BQ25890_IRQ_PIN, 0, GPIOD_IN); + irq = devm_gpiod_get(bq->dev, BQ25890_IRQ_PIN, GPIOD_IN); if (IS_ERR(irq)) { dev_err(bq->dev, "Could not probe irq pin.\n"); return PTR_ERR(irq); diff --git a/drivers/power/supply/charger-manager.c b/drivers/power/supply/charger-manager.c index e664ca7c0afd..adc3761831e1 100644 --- a/drivers/power/supply/charger-manager.c +++ b/drivers/power/supply/charger-manager.c @@ -1198,7 +1198,7 @@ static int charger_extcon_notifier(struct notifier_block *self, static int charger_extcon_init(struct charger_manager *cm, struct charger_cable *cable) { - int ret = 0; + int ret; /* * Charger manager use Extcon framework to identify @@ -1232,7 +1232,7 @@ static int charger_manager_register_extcon(struct charger_manager *cm) { struct charger_desc *desc = cm->desc; struct charger_regulator *charger; - int ret = 0; + int ret; int i; int j; @@ -1255,15 +1255,14 @@ static int charger_manager_register_extcon(struct charger_manager *cm) if (ret < 0) { dev_err(cm->dev, "Cannot initialize charger(%s)\n", charger->regulator_name); - goto err; + return ret; } cable->charger = charger; cable->cm = cm; } } -err: - return ret; + return 0; } /* help function of sysfs node to control charger(regulator) */ @@ -1372,7 +1371,7 @@ static int charger_manager_register_sysfs(struct charger_manager *cm) int chargers_externally_control = 1; char buf[11]; char *str; - int ret = 0; + int ret; int i; /* Create sysfs entry to control charger(regulator) */ @@ -1382,10 +1381,9 @@ static int charger_manager_register_sysfs(struct charger_manager *cm) snprintf(buf, 10, "charger.%d", i); str = devm_kzalloc(cm->dev, sizeof(char) * (strlen(buf) + 1), GFP_KERNEL); - if (!str) { - ret = -ENOMEM; - goto err; - } + if (!str) + return -ENOMEM; + strcpy(str, buf); charger->attrs[0] = &charger->attr_name.attr; @@ -1426,19 +1424,16 @@ static int charger_manager_register_sysfs(struct charger_manager *cm) if (ret < 0) { dev_err(cm->dev, "Cannot create sysfs entry of %s regulator\n", charger->regulator_name); - ret = -EINVAL; - goto err; + return ret; } } if (chargers_externally_control) { dev_err(cm->dev, "Cannot register regulator because charger-manager must need at least one charger for charging battery\n"); - ret = -EINVAL; - goto err; + return -EINVAL; } -err: - return ret; + return 0; } static int cm_init_thermal_data(struct charger_manager *cm, @@ -1626,7 +1621,7 @@ static int charger_manager_probe(struct platform_device *pdev) { struct charger_desc *desc = cm_get_drv_data(pdev); struct charger_manager *cm; - int ret = 0, i = 0; + int ret, i = 0; int j = 0; union power_supply_propval val; struct power_supply *fuel_gauge; @@ -1887,14 +1882,12 @@ MODULE_DEVICE_TABLE(platform, charger_manager_id); static int cm_suspend_noirq(struct device *dev) { - int ret = 0; - if (device_may_wakeup(dev)) { device_set_wakeup_capable(dev, false); - ret = -EAGAIN; + return -EAGAIN; } - return ret; + return 0; } static bool cm_need_to_awake(void) diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c new file mode 100644 index 000000000000..543a1bd21ab9 --- /dev/null +++ b/drivers/power/supply/cpcap-charger.c @@ -0,0 +1,681 @@ +/* + * Motorola CPCAP PMIC battery charger driver + * + * Copyright (C) 2017 Tony Lindgren <tony@atomide.com> + * + * Rewritten for Linux power framework with some parts based on + * on earlier driver found in the Motorola Linux kernel: + * + * Copyright (C) 2009-2010 Motorola, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/atomic.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/power_supply.h> +#include <linux/regmap.h> + +#include <linux/gpio/consumer.h> +#include <linux/usb/phy_companion.h> +#include <linux/phy/omap_usb.h> +#include <linux/usb/otg.h> +#include <linux/iio/consumer.h> +#include <linux/mfd/motorola-cpcap.h> + +/* CPCAP_REG_CRM register bits */ +#define CPCAP_REG_CRM_UNUSED_641_15 BIT(15) /* 641 = register number */ +#define CPCAP_REG_CRM_UNUSED_641_14 BIT(14) /* 641 = register number */ +#define CPCAP_REG_CRM_CHRG_LED_EN BIT(13) +#define CPCAP_REG_CRM_RVRSMODE BIT(12) +#define CPCAP_REG_CRM_ICHRG_TR1 BIT(11) +#define CPCAP_REG_CRM_ICHRG_TR0 BIT(10) +#define CPCAP_REG_CRM_FET_OVRD BIT(9) +#define CPCAP_REG_CRM_FET_CTRL BIT(8) +#define CPCAP_REG_CRM_VCHRG3 BIT(7) +#define CPCAP_REG_CRM_VCHRG2 BIT(6) +#define CPCAP_REG_CRM_VCHRG1 BIT(5) +#define CPCAP_REG_CRM_VCHRG0 BIT(4) +#define CPCAP_REG_CRM_ICHRG3 BIT(3) +#define CPCAP_REG_CRM_ICHRG2 BIT(2) +#define CPCAP_REG_CRM_ICHRG1 BIT(1) +#define CPCAP_REG_CRM_ICHRG0 BIT(0) + +/* CPCAP_REG_CRM trickle charge voltages */ +#define CPCAP_REG_CRM_TR(val) (((val) & 0x3) << 10) +#define CPCAP_REG_CRM_TR_0A00 CPCAP_REG_CRM_TR(0x0) +#define CPCAP_REG_CRM_TR_0A24 CPCAP_REG_CRM_TR(0x1) +#define CPCAP_REG_CRM_TR_0A48 CPCAP_REG_CRM_TR(0x2) +#define CPCAP_REG_CRM_TR_0A72 CPCAP_REG_CRM_TR(0x4) + +/* CPCAP_REG_CRM charge voltages */ +#define CPCAP_REG_CRM_VCHRG(val) (((val) & 0xf) << 4) +#define CPCAP_REG_CRM_VCHRG_3V80 CPCAP_REG_CRM_VCHRG(0x0) +#define CPCAP_REG_CRM_VCHRG_4V10 CPCAP_REG_CRM_VCHRG(0x1) +#define CPCAP_REG_CRM_VCHRG_4V15 CPCAP_REG_CRM_VCHRG(0x2) +#define CPCAP_REG_CRM_VCHRG_4V20 CPCAP_REG_CRM_VCHRG(0x3) +#define CPCAP_REG_CRM_VCHRG_4V22 CPCAP_REG_CRM_VCHRG(0x4) +#define CPCAP_REG_CRM_VCHRG_4V24 CPCAP_REG_CRM_VCHRG(0x5) +#define CPCAP_REG_CRM_VCHRG_4V26 CPCAP_REG_CRM_VCHRG(0x6) +#define CPCAP_REG_CRM_VCHRG_4V28 CPCAP_REG_CRM_VCHRG(0x7) +#define CPCAP_REG_CRM_VCHRG_4V30 CPCAP_REG_CRM_VCHRG(0x8) +#define CPCAP_REG_CRM_VCHRG_4V32 CPCAP_REG_CRM_VCHRG(0x9) +#define CPCAP_REG_CRM_VCHRG_4V34 CPCAP_REG_CRM_VCHRG(0xa) +#define CPCAP_REG_CRM_VCHRG_4V36 CPCAP_REG_CRM_VCHRG(0xb) +#define CPCAP_REG_CRM_VCHRG_4V38 CPCAP_REG_CRM_VCHRG(0xc) +#define CPCAP_REG_CRM_VCHRG_4V40 CPCAP_REG_CRM_VCHRG(0xd) +#define CPCAP_REG_CRM_VCHRG_4V42 CPCAP_REG_CRM_VCHRG(0xe) +#define CPCAP_REG_CRM_VCHRG_4V44 CPCAP_REG_CRM_VCHRG(0xf) + +/* CPCAP_REG_CRM charge currents */ +#define CPCAP_REG_CRM_ICHRG(val) (((val) & 0xf) << 0) +#define CPCAP_REG_CRM_ICHRG_0A000 CPCAP_REG_CRM_ICHRG(0x0) +#define CPCAP_REG_CRM_ICHRG_0A070 CPCAP_REG_CRM_ICHRG(0x1) +#define CPCAP_REG_CRM_ICHRG_0A176 CPCAP_REG_CRM_ICHRG(0x2) +#define CPCAP_REG_CRM_ICHRG_0A264 CPCAP_REG_CRM_ICHRG(0x3) +#define CPCAP_REG_CRM_ICHRG_0A352 CPCAP_REG_CRM_ICHRG(0x4) +#define CPCAP_REG_CRM_ICHRG_0A440 CPCAP_REG_CRM_ICHRG(0x5) +#define CPCAP_REG_CRM_ICHRG_0A528 CPCAP_REG_CRM_ICHRG(0x6) +#define CPCAP_REG_CRM_ICHRG_0A616 CPCAP_REG_CRM_ICHRG(0x7) +#define CPCAP_REG_CRM_ICHRG_0A704 CPCAP_REG_CRM_ICHRG(0x8) +#define CPCAP_REG_CRM_ICHRG_0A792 CPCAP_REG_CRM_ICHRG(0x9) +#define CPCAP_REG_CRM_ICHRG_0A880 CPCAP_REG_CRM_ICHRG(0xa) +#define CPCAP_REG_CRM_ICHRG_0A968 CPCAP_REG_CRM_ICHRG(0xb) +#define CPCAP_REG_CRM_ICHRG_1A056 CPCAP_REG_CRM_ICHRG(0xc) +#define CPCAP_REG_CRM_ICHRG_1A144 CPCAP_REG_CRM_ICHRG(0xd) +#define CPCAP_REG_CRM_ICHRG_1A584 CPCAP_REG_CRM_ICHRG(0xe) +#define CPCAP_REG_CRM_ICHRG_NO_LIMIT CPCAP_REG_CRM_ICHRG(0xf) + +enum { + CPCAP_CHARGER_IIO_BATTDET, + CPCAP_CHARGER_IIO_VOLTAGE, + CPCAP_CHARGER_IIO_VBUS, + CPCAP_CHARGER_IIO_CHRG_CURRENT, + CPCAP_CHARGER_IIO_BATT_CURRENT, + CPCAP_CHARGER_IIO_NR, +}; + +struct cpcap_charger_ddata { + struct device *dev; + struct regmap *reg; + struct list_head irq_list; + struct delayed_work detect_work; + struct delayed_work vbus_work; + struct gpio_desc *gpio[2]; /* gpio_reven0 & 1 */ + + struct iio_channel *channels[CPCAP_CHARGER_IIO_NR]; + + struct power_supply *usb; + + struct phy_companion comparator; /* For USB VBUS */ + bool vbus_enabled; + atomic_t active; + + int status; +}; + +struct cpcap_interrupt_desc { + int irq; + struct list_head node; + const char *name; +}; + +struct cpcap_charger_ints_state { + bool chrg_det; + bool rvrs_chrg; + bool vbusov; + + bool chrg_se1b; + bool rvrs_mode; + bool chrgcurr1; + bool vbusvld; + + bool battdetb; +}; + +static enum power_supply_property cpcap_charger_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_ONLINE, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CURRENT_NOW, +}; + +static bool cpcap_charger_battery_found(struct cpcap_charger_ddata *ddata) +{ + struct iio_channel *channel; + int error, value; + + channel = ddata->channels[CPCAP_CHARGER_IIO_BATTDET]; + error = iio_read_channel_raw(channel, &value); + if (error < 0) { + dev_warn(ddata->dev, "%s failed: %i\n", __func__, error); + + return false; + } + + return value == 1; +} + +static int cpcap_charger_get_charge_voltage(struct cpcap_charger_ddata *ddata) +{ + struct iio_channel *channel; + int error, value = 0; + + channel = ddata->channels[CPCAP_CHARGER_IIO_VOLTAGE]; + error = iio_read_channel_processed(channel, &value); + if (error < 0) { + dev_warn(ddata->dev, "%s failed: %i\n", __func__, error); + + return 0; + } + + return value; +} + +static int cpcap_charger_get_charge_current(struct cpcap_charger_ddata *ddata) +{ + struct iio_channel *channel; + int error, value = 0; + + channel = ddata->channels[CPCAP_CHARGER_IIO_CHRG_CURRENT]; + error = iio_read_channel_processed(channel, &value); + if (error < 0) { + dev_warn(ddata->dev, "%s failed: %i\n", __func__, error); + + return 0; + } + + return value; +} + +static int cpcap_charger_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + struct cpcap_charger_ddata *ddata = dev_get_drvdata(psy->dev.parent); + + switch (psp) { + case POWER_SUPPLY_PROP_STATUS: + val->intval = ddata->status; + break; + case POWER_SUPPLY_PROP_VOLTAGE_NOW: + if (ddata->status == POWER_SUPPLY_STATUS_CHARGING) + val->intval = cpcap_charger_get_charge_voltage(ddata) * + 1000; + else + val->intval = 0; + break; + case POWER_SUPPLY_PROP_CURRENT_NOW: + if (ddata->status == POWER_SUPPLY_STATUS_CHARGING) + val->intval = cpcap_charger_get_charge_current(ddata) * + 1000; + else + val->intval = 0; + break; + case POWER_SUPPLY_PROP_ONLINE: + val->intval = ddata->status == POWER_SUPPLY_STATUS_CHARGING; + break; + default: + return -EINVAL; + } + + return 0; +} + +static void cpcap_charger_set_cable_path(struct cpcap_charger_ddata *ddata, + bool enabled) +{ + if (!ddata->gpio[0]) + return; + + gpiod_set_value(ddata->gpio[0], enabled); +} + +static void cpcap_charger_set_inductive_path(struct cpcap_charger_ddata *ddata, + bool enabled) +{ + if (!ddata->gpio[1]) + return; + + gpiod_set_value(ddata->gpio[1], enabled); +} + +static int cpcap_charger_set_state(struct cpcap_charger_ddata *ddata, + int max_voltage, int charge_current, + int trickle_current) +{ + bool enable; + int error; + + enable = max_voltage && (charge_current || trickle_current); + dev_dbg(ddata->dev, "%s enable: %i\n", __func__, enable); + + if (!enable) { + error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM, + 0x3fff, + CPCAP_REG_CRM_FET_OVRD | + CPCAP_REG_CRM_FET_CTRL); + if (error) { + ddata->status = POWER_SUPPLY_STATUS_UNKNOWN; + goto out_err; + } + + ddata->status = POWER_SUPPLY_STATUS_DISCHARGING; + + return 0; + } + + error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM, 0x3fff, + CPCAP_REG_CRM_CHRG_LED_EN | + trickle_current | + CPCAP_REG_CRM_FET_OVRD | + CPCAP_REG_CRM_FET_CTRL | + max_voltage | + charge_current); + if (error) { + ddata->status = POWER_SUPPLY_STATUS_UNKNOWN; + goto out_err; + } + + ddata->status = POWER_SUPPLY_STATUS_CHARGING; + + return 0; + +out_err: + dev_err(ddata->dev, "%s failed with %i\n", __func__, error); + + return error; +} + +static bool cpcap_charger_vbus_valid(struct cpcap_charger_ddata *ddata) +{ + int error, value = 0; + struct iio_channel *channel = + ddata->channels[CPCAP_CHARGER_IIO_VBUS]; + + error = iio_read_channel_processed(channel, &value); + if (error >= 0) + return value > 3900 ? true : false; + + dev_err(ddata->dev, "error reading VBUS: %i\n", error); + + return false; +} + +/* VBUS control functions for the USB PHY companion */ + +static void cpcap_charger_vbus_work(struct work_struct *work) +{ + struct cpcap_charger_ddata *ddata; + bool vbus = false; + int error; + + ddata = container_of(work, struct cpcap_charger_ddata, + vbus_work.work); + + if (ddata->vbus_enabled) { + vbus = cpcap_charger_vbus_valid(ddata); + if (vbus) { + dev_info(ddata->dev, "VBUS already provided\n"); + + return; + } + + cpcap_charger_set_cable_path(ddata, false); + cpcap_charger_set_inductive_path(ddata, false); + + error = cpcap_charger_set_state(ddata, 0, 0, 0); + if (error) + goto out_err; + + error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM, + CPCAP_REG_CRM_RVRSMODE, + CPCAP_REG_CRM_RVRSMODE); + if (error) + goto out_err; + } else { + error = regmap_update_bits(ddata->reg, CPCAP_REG_CRM, + CPCAP_REG_CRM_RVRSMODE, 0); + if (error) + goto out_err; + + cpcap_charger_set_cable_path(ddata, true); + cpcap_charger_set_inductive_path(ddata, true); + } + + return; + +out_err: + dev_err(ddata->dev, "%s could not %s vbus: %i\n", __func__, + ddata->vbus_enabled ? "enable" : "disable", error); +} + +static int cpcap_charger_set_vbus(struct phy_companion *comparator, + bool enabled) +{ + struct cpcap_charger_ddata *ddata = + container_of(comparator, struct cpcap_charger_ddata, + comparator); + + ddata->vbus_enabled = enabled; + schedule_delayed_work(&ddata->vbus_work, 0); + + return 0; +} + +/* Charger interrupt handling functions */ + +static int cpcap_charger_get_ints_state(struct cpcap_charger_ddata *ddata, + struct cpcap_charger_ints_state *s) +{ + int val, error; + + error = regmap_read(ddata->reg, CPCAP_REG_INTS1, &val); + if (error) + return error; + + s->chrg_det = val & BIT(13); + s->rvrs_chrg = val & BIT(12); + s->vbusov = val & BIT(11); + + error = regmap_read(ddata->reg, CPCAP_REG_INTS2, &val); + if (error) + return error; + + s->chrg_se1b = val & BIT(13); + s->rvrs_mode = val & BIT(6); + s->chrgcurr1 = val & BIT(4); + s->vbusvld = val & BIT(3); + + error = regmap_read(ddata->reg, CPCAP_REG_INTS4, &val); + if (error) + return error; + + s->battdetb = val & BIT(6); + + return 0; +} + +static void cpcap_usb_detect(struct work_struct *work) +{ + struct cpcap_charger_ddata *ddata; + struct cpcap_charger_ints_state s; + int error; + + ddata = container_of(work, struct cpcap_charger_ddata, + detect_work.work); + + error = cpcap_charger_get_ints_state(ddata, &s); + if (error) + return; + + if (cpcap_charger_vbus_valid(ddata) && s.chrgcurr1) { + int max_current; + + if (cpcap_charger_battery_found(ddata)) + max_current = CPCAP_REG_CRM_ICHRG_1A584; + else + max_current = CPCAP_REG_CRM_ICHRG_0A528; + + error = cpcap_charger_set_state(ddata, + CPCAP_REG_CRM_VCHRG_4V20, + max_current, + CPCAP_REG_CRM_TR_0A72); + if (error) + goto out_err; + } else { + error = cpcap_charger_set_state(ddata, 0, 0, 0); + if (error) + goto out_err; + } + + return; + +out_err: + dev_err(ddata->dev, "%s failed with %i\n", __func__, error); +} + +static irqreturn_t cpcap_charger_irq_thread(int irq, void *data) +{ + struct cpcap_charger_ddata *ddata = data; + + if (!atomic_read(&ddata->active)) + return IRQ_NONE; + + schedule_delayed_work(&ddata->detect_work, 0); + + return IRQ_HANDLED; +} + +static int cpcap_usb_init_irq(struct platform_device *pdev, + struct cpcap_charger_ddata *ddata, + const char *name) +{ + struct cpcap_interrupt_desc *d; + int irq, error; + + irq = platform_get_irq_byname(pdev, name); + if (!irq) + return -ENODEV; + + error = devm_request_threaded_irq(ddata->dev, irq, NULL, + cpcap_charger_irq_thread, + IRQF_SHARED, + name, ddata); + if (error) { + dev_err(ddata->dev, "could not get irq %s: %i\n", + name, error); + + return error; + } + + d = devm_kzalloc(ddata->dev, sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->name = name; + d->irq = irq; + list_add(&d->node, &ddata->irq_list); + + return 0; +} + +static const char * const cpcap_charger_irqs[] = { + /* REG_INT_0 */ + "chrg_det", "rvrs_chrg", + + /* REG_INT1 */ + "chrg_se1b", "se0conn", "rvrs_mode", "chrgcurr1", "vbusvld", + + /* REG_INT_3 */ + "battdetb", +}; + +static int cpcap_usb_init_interrupts(struct platform_device *pdev, + struct cpcap_charger_ddata *ddata) +{ + int i, error; + + for (i = 0; i < ARRAY_SIZE(cpcap_charger_irqs); i++) { + error = cpcap_usb_init_irq(pdev, ddata, cpcap_charger_irqs[i]); + if (error) + return error; + } + + return 0; +} + +static void cpcap_charger_init_optional_gpios(struct cpcap_charger_ddata *ddata) +{ + int i; + + for (i = 0; i < 2; i++) { + ddata->gpio[i] = devm_gpiod_get_index(ddata->dev, "mode", + i, GPIOD_OUT_HIGH); + if (IS_ERR(ddata->gpio[i])) { + dev_info(ddata->dev, "no mode change GPIO%i: %li\n", + i, PTR_ERR(ddata->gpio[i])); + ddata->gpio[i] = NULL; + } + } +} + +static int cpcap_charger_init_iio(struct cpcap_charger_ddata *ddata) +{ + const char * const names[CPCAP_CHARGER_IIO_NR] = { + "battdetb", "battp", "vbus", "chg_isense", "batti", + }; + int error, i; + + for (i = 0; i < CPCAP_CHARGER_IIO_NR; i++) { + ddata->channels[i] = devm_iio_channel_get(ddata->dev, + names[i]); + if (IS_ERR(ddata->channels[i])) { + error = PTR_ERR(ddata->channels[i]); + goto out_err; + } + + if (!ddata->channels[i]->indio_dev) { + error = -ENXIO; + goto out_err; + } + } + + return 0; + +out_err: + dev_err(ddata->dev, "could not initialize VBUS or ID IIO: %i\n", + error); + + return error; +} + +static const struct power_supply_desc cpcap_charger_usb_desc = { + .name = "cpcap_usb", + .type = POWER_SUPPLY_TYPE_USB, + .properties = cpcap_charger_props, + .num_properties = ARRAY_SIZE(cpcap_charger_props), + .get_property = cpcap_charger_get_property, +}; + +#ifdef CONFIG_OF +static const struct of_device_id cpcap_charger_id_table[] = { + { + .compatible = "motorola,mapphone-cpcap-charger", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, cpcap_charger_id_table); +#endif + +static int cpcap_charger_probe(struct platform_device *pdev) +{ + struct cpcap_charger_ddata *ddata; + const struct of_device_id *of_id; + int error; + + of_id = of_match_device(of_match_ptr(cpcap_charger_id_table), + &pdev->dev); + if (!of_id) + return -EINVAL; + + ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL); + if (!ddata) + return -ENOMEM; + + ddata->dev = &pdev->dev; + + ddata->reg = dev_get_regmap(ddata->dev->parent, NULL); + if (!ddata->reg) + return -ENODEV; + + INIT_LIST_HEAD(&ddata->irq_list); + INIT_DELAYED_WORK(&ddata->detect_work, cpcap_usb_detect); + INIT_DELAYED_WORK(&ddata->vbus_work, cpcap_charger_vbus_work); + platform_set_drvdata(pdev, ddata); + + error = cpcap_charger_init_iio(ddata); + if (error) + return error; + + atomic_set(&ddata->active, 1); + + ddata->usb = devm_power_supply_register(ddata->dev, + &cpcap_charger_usb_desc, + NULL); + if (IS_ERR(ddata->usb)) { + error = PTR_ERR(ddata->usb); + dev_err(ddata->dev, "failed to register USB charger: %i\n", + error); + + return error; + } + + error = cpcap_usb_init_interrupts(pdev, ddata); + if (error) + return error; + + ddata->comparator.set_vbus = cpcap_charger_set_vbus; + error = omap_usb2_set_comparator(&ddata->comparator); + if (error == -ENODEV) { + dev_info(ddata->dev, "charger needs phy, deferring probe\n"); + return -EPROBE_DEFER; + } + + cpcap_charger_init_optional_gpios(ddata); + + schedule_delayed_work(&ddata->detect_work, 0); + + return 0; +} + +static int cpcap_charger_remove(struct platform_device *pdev) +{ + struct cpcap_charger_ddata *ddata = platform_get_drvdata(pdev); + int error; + + atomic_set(&ddata->active, 0); + error = omap_usb2_set_comparator(NULL); + if (error) + dev_warn(ddata->dev, "could not clear USB comparator: %i\n", + error); + + error = cpcap_charger_set_state(ddata, 0, 0, 0); + if (error) + dev_warn(ddata->dev, "could not clear charger: %i\n", + error); + cancel_delayed_work_sync(&ddata->vbus_work); + cancel_delayed_work_sync(&ddata->detect_work); + + return 0; +} + +static struct platform_driver cpcap_charger_driver = { + .probe = cpcap_charger_probe, + .driver = { + .name = "cpcap-charger", + .of_match_table = of_match_ptr(cpcap_charger_id_table), + }, + .remove = cpcap_charger_remove, +}; +module_platform_driver(cpcap_charger_driver); + +MODULE_AUTHOR("Tony Lindgren <tony@atomide.com>"); +MODULE_DESCRIPTION("CPCAP Battery Charger Interface driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:cpcap-charger"); diff --git a/drivers/power/supply/lego_ev3_battery.c b/drivers/power/supply/lego_ev3_battery.c new file mode 100644 index 000000000000..7b993d669f7f --- /dev/null +++ b/drivers/power/supply/lego_ev3_battery.c @@ -0,0 +1,228 @@ +/* + * Battery driver for LEGO MINDSTORMS EV3 + * + * Copyright (C) 2017 David Lechner <david@lechnology.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/gpio/consumer.h> +#include <linux/iio/consumer.h> +#include <linux/iio/types.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/power_supply.h> + +struct lego_ev3_battery { + struct iio_channel *iio_v; + struct iio_channel *iio_i; + struct gpio_desc *rechargeable_gpio; + struct power_supply *psy; + int technology; + int v_max; + int v_min; +}; + +static int lego_ev3_battery_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + struct lego_ev3_battery *batt = power_supply_get_drvdata(psy); + int val2; + + switch (psp) { + case POWER_SUPPLY_PROP_TECHNOLOGY: + val->intval = batt->technology; + break; + case POWER_SUPPLY_PROP_VOLTAGE_NOW: + /* battery voltage is iio channel * 2 + Vce of transistor */ + iio_read_channel_processed(batt->iio_v, &val->intval); + val->intval *= 2000; + val->intval += 200000; + /* plus adjust for shunt resistor drop */ + iio_read_channel_processed(batt->iio_i, &val2); + val2 *= 1000; + val2 /= 15; + val->intval += val2; + break; + case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN: + val->intval = batt->v_max; + break; + case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN: + val->intval = batt->v_min; + break; + case POWER_SUPPLY_PROP_CURRENT_NOW: + /* battery current is iio channel / 15 / 0.05 ohms */ + iio_read_channel_processed(batt->iio_i, &val->intval); + val->intval *= 20000; + val->intval /= 15; + break; + case POWER_SUPPLY_PROP_SCOPE: + val->intval = POWER_SUPPLY_SCOPE_SYSTEM; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int lego_ev3_battery_set_property(struct power_supply *psy, + enum power_supply_property psp, + const union power_supply_propval *val) +{ + struct lego_ev3_battery *batt = power_supply_get_drvdata(psy); + + switch (psp) { + case POWER_SUPPLY_PROP_TECHNOLOGY: + /* + * Only allow changing technology from Unknown to NiMH. Li-ion + * batteries are automatically detected and should not be + * overridden. Rechargeable AA batteries, on the other hand, + * cannot be automatically detected, and so must be manually + * specified. This should only be set once during system init, + * so there is no mechanism to go back to Unknown. + */ + if (batt->technology != POWER_SUPPLY_TECHNOLOGY_UNKNOWN) + return -EINVAL; + switch (val->intval) { + case POWER_SUPPLY_TECHNOLOGY_NiMH: + batt->technology = POWER_SUPPLY_TECHNOLOGY_NiMH; + batt->v_max = 7800000; + batt->v_min = 5400000; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +static int lego_ev3_battery_property_is_writeable(struct power_supply *psy, + enum power_supply_property psp) +{ + struct lego_ev3_battery *batt = power_supply_get_drvdata(psy); + + return psp == POWER_SUPPLY_PROP_TECHNOLOGY && + batt->technology == POWER_SUPPLY_TECHNOLOGY_UNKNOWN; +} + +static enum power_supply_property lego_ev3_battery_props[] = { + POWER_SUPPLY_PROP_TECHNOLOGY, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, + POWER_SUPPLY_PROP_CURRENT_NOW, + POWER_SUPPLY_PROP_SCOPE, +}; + +static const struct power_supply_desc lego_ev3_battery_desc = { + .name = "lego-ev3-battery", + .type = POWER_SUPPLY_TYPE_BATTERY, + .properties = lego_ev3_battery_props, + .num_properties = ARRAY_SIZE(lego_ev3_battery_props), + .get_property = lego_ev3_battery_get_property, + .set_property = lego_ev3_battery_set_property, + .property_is_writeable = lego_ev3_battery_property_is_writeable, +}; + +static int lego_ev3_battery_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct lego_ev3_battery *batt; + struct power_supply_config psy_cfg = {}; + int err; + + batt = devm_kzalloc(dev, sizeof(*batt), GFP_KERNEL); + if (!batt) + return -ENOMEM; + + platform_set_drvdata(pdev, batt); + + batt->iio_v = devm_iio_channel_get(dev, "voltage"); + err = PTR_ERR_OR_ZERO(batt->iio_v); + if (err) { + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get voltage iio channel\n"); + return err; + } + + batt->iio_i = devm_iio_channel_get(dev, "current"); + err = PTR_ERR_OR_ZERO(batt->iio_i); + if (err) { + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get current iio channel\n"); + return err; + } + + batt->rechargeable_gpio = devm_gpiod_get(dev, "rechargeable", GPIOD_IN); + err = PTR_ERR_OR_ZERO(batt->rechargeable_gpio); + if (err) { + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get rechargeable gpio\n"); + return err; + } + + /* + * The rechargeable battery indication switch cannot be changed without + * removing the battery, so we only need to read it once. + */ + if (gpiod_get_value(batt->rechargeable_gpio)) { + /* 2-cell Li-ion, 7.4V nominal */ + batt->technology = POWER_SUPPLY_TECHNOLOGY_LION; + batt->v_max = 84000000; + batt->v_min = 60000000; + } else { + /* 6x AA Alkaline, 9V nominal */ + batt->technology = POWER_SUPPLY_TECHNOLOGY_UNKNOWN; + batt->v_max = 90000000; + batt->v_min = 48000000; + } + + psy_cfg.of_node = pdev->dev.of_node; + psy_cfg.drv_data = batt; + + batt->psy = devm_power_supply_register(dev, &lego_ev3_battery_desc, + &psy_cfg); + err = PTR_ERR_OR_ZERO(batt->psy); + if (err) { + dev_err(dev, "failed to register power supply\n"); + return err; + } + + return 0; +} + +static const struct of_device_id of_lego_ev3_battery_match[] = { + { .compatible = "lego,ev3-battery", }, + { } +}; +MODULE_DEVICE_TABLE(of, of_lego_ev3_battery_match); + +static struct platform_driver lego_ev3_battery_driver = { + .driver = { + .name = "lego-ev3-battery", + .of_match_table = of_lego_ev3_battery_match, + }, + .probe = lego_ev3_battery_probe, +}; +module_platform_driver(lego_ev3_battery_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David Lechner <david@lechnology.com>"); +MODULE_DESCRIPTION("LEGO MINDSTORMS EV3 Battery Driver"); diff --git a/drivers/power/supply/lp8788-charger.c b/drivers/power/supply/lp8788-charger.c index 509e2b341bd6..677f7c40b25a 100644 --- a/drivers/power/supply/lp8788-charger.c +++ b/drivers/power/supply/lp8788-charger.c @@ -651,7 +651,7 @@ static ssize_t lp8788_show_eoc_time(struct device *dev, { struct lp8788_charger *pchg = dev_get_drvdata(dev); char *stime[] = { "400ms", "5min", "10min", "15min", - "20min", "25min", "30min" "No timeout" }; + "20min", "25min", "30min", "No timeout" }; u8 val; lp8788_read_byte(pchg->lp, LP8788_CHG_EOC, &val); diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c index 4adf2ba021ce..7efb908f4451 100644 --- a/drivers/power/supply/ltc2941-battery-gauge.c +++ b/drivers/power/supply/ltc2941-battery-gauge.c @@ -9,6 +9,7 @@ */ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/swab.h> @@ -61,7 +62,7 @@ struct ltc294x_info { struct power_supply *supply; /* Supply pointer */ struct power_supply_desc supply_desc; /* Supply description */ struct delayed_work work; /* Work scheduler */ - int num_regs; /* Number of registers (chip type) */ + unsigned long num_regs; /* Number of registers (chip type) */ int charge; /* Last charge register content */ int r_sense; /* mOhm */ int Qlsb; /* nAh */ @@ -387,7 +388,7 @@ static int ltc294x_i2c_probe(struct i2c_client *client, np = of_node_get(client->dev.of_node); - info->num_regs = id->driver_data; + info->num_regs = (unsigned long)of_device_get_match_data(&client->dev); info->supply_desc.name = np->name; /* r_sense can be negative, when sense+ is connected to the battery @@ -497,9 +498,23 @@ static const struct i2c_device_id ltc294x_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, ltc294x_i2c_id); +static const struct of_device_id ltc294x_i2c_of_match[] = { + { + .compatible = "lltc,ltc2941", + .data = (void *)LTC2941_NUM_REGS + }, + { + .compatible = "lltc,ltc2943", + .data = (void *)LTC2943_NUM_REGS + }, + { }, +}; +MODULE_DEVICE_TABLE(of, ltc294x_i2c_of_match); + static struct i2c_driver ltc294x_driver = { .driver = { .name = "LTC2941", + .of_match_table = ltc294x_i2c_of_match, .pm = LTC294X_PM_OPS, }, .probe = ltc294x_i2c_probe, diff --git a/drivers/power/supply/max17040_battery.c b/drivers/power/supply/max17040_battery.c index e7c3649b31a0..33c40f79d23d 100644 --- a/drivers/power/supply/max17040_battery.c +++ b/drivers/power/supply/max17040_battery.c @@ -277,9 +277,17 @@ static const struct i2c_device_id max17040_id[] = { }; MODULE_DEVICE_TABLE(i2c, max17040_id); +static const struct of_device_id max17040_of_match[] = { + { .compatible = "maxim,max17040" }, + { .compatible = "maxim,max77836-battery" }, + { }, +}; +MODULE_DEVICE_TABLE(of, max17040_of_match); + static struct i2c_driver max17040_i2c_driver = { .driver = { .name = "max17040", + .of_match_table = max17040_of_match, .pm = MAX17040_PM_OPS, }, .probe = max17040_probe, diff --git a/drivers/power/supply/sbs-charger.c b/drivers/power/supply/sbs-charger.c index 353765a5f44c..15947dbb511e 100644 --- a/drivers/power/supply/sbs-charger.c +++ b/drivers/power/supply/sbs-charger.c @@ -137,10 +137,7 @@ static enum power_supply_property sbs_properties[] = { static bool sbs_readable_reg(struct device *dev, unsigned int reg) { - if (reg < SBS_CHARGER_REG_SPEC_INFO) - return false; - else - return true; + return reg >= SBS_CHARGER_REG_SPEC_INFO; } static bool sbs_volatile_reg(struct device *dev, unsigned int reg) diff --git a/drivers/power/supply/tps65217_charger.c b/drivers/power/supply/tps65217_charger.c index 29b61e81b385..1f5234098aaf 100644 --- a/drivers/power/supply/tps65217_charger.c +++ b/drivers/power/supply/tps65217_charger.c @@ -58,8 +58,6 @@ static int tps65217_config_charger(struct tps65217_charger *charger) { int ret; - dev_dbg(charger->dev, "%s\n", __func__); - /* * tps65217 rev. G, p. 31 (see p. 32 for NTC schematic) * @@ -205,8 +203,6 @@ static int tps65217_charger_probe(struct platform_device *pdev) int ret; int i; - dev_dbg(&pdev->dev, "%s\n", __func__); - charger = devm_kzalloc(&pdev->dev, sizeof(*charger), GFP_KERNEL); if (!charger) return -ENOMEM; diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c index bcd4dc304f27..990ff3d218bc 100644 --- a/drivers/power/supply/twl4030_charger.c +++ b/drivers/power/supply/twl4030_charger.c @@ -1117,7 +1117,7 @@ fail: return ret; } -static int __exit twl4030_bci_remove(struct platform_device *pdev) +static int twl4030_bci_remove(struct platform_device *pdev) { struct twl4030_bci *bci = platform_get_drvdata(pdev); @@ -1148,11 +1148,11 @@ MODULE_DEVICE_TABLE(of, twl_bci_of_match); static struct platform_driver twl4030_bci_driver = { .probe = twl4030_bci_probe, + .remove = twl4030_bci_remove, .driver = { .name = "twl4030_bci", .of_match_table = of_match_ptr(twl_bci_of_match), }, - .remove = __exit_p(twl4030_bci_remove), }; module_platform_driver(twl4030_bci_driver); diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index e8142803a1a7..b77435783ef3 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -97,30 +97,26 @@ static s32 scaled_ppm_to_ppb(long ppm) /* posix clock implementation */ -static int ptp_clock_getres(struct posix_clock *pc, struct timespec *tp) +static int ptp_clock_getres(struct posix_clock *pc, struct timespec64 *tp) { tp->tv_sec = 0; tp->tv_nsec = 1; return 0; } -static int ptp_clock_settime(struct posix_clock *pc, const struct timespec *tp) +static int ptp_clock_settime(struct posix_clock *pc, const struct timespec64 *tp) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); - struct timespec64 ts = timespec_to_timespec64(*tp); - return ptp->info->settime64(ptp->info, &ts); + return ptp->info->settime64(ptp->info, tp); } -static int ptp_clock_gettime(struct posix_clock *pc, struct timespec *tp) +static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); - struct timespec64 ts; int err; - err = ptp->info->gettime64(ptp->info, &ts); - if (!err) - *tp = timespec64_to_timespec(ts); + err = ptp->info->gettime64(ptp->info, tp); return err; } @@ -133,7 +129,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) ops = ptp->info; if (tx->modes & ADJ_SETOFFSET) { - struct timespec ts; + struct timespec64 ts; ktime_t kt; s64 delta; @@ -146,7 +142,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) if ((unsigned long) ts.tv_nsec >= NSEC_PER_SEC) return -EINVAL; - kt = timespec_to_ktime(ts); + kt = timespec64_to_ktime(ts); delta = ktime_to_ns(kt); err = ops->adjtime(ops, delta); } else if (tx->modes & ADJ_FREQUENCY) { diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c index 053088b9b66e..c1527cb645be 100644 --- a/drivers/pwm/pwm-lpss-pci.c +++ b/drivers/pwm/pwm-lpss-pci.c @@ -36,6 +36,14 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = { .clk_rate = 19200000, .npwm = 4, .base_unit_bits = 22, + .bypass = true, +}; + +/* Tangier */ +static const struct pwm_lpss_boardinfo pwm_lpss_tng_info = { + .clk_rate = 19200000, + .npwm = 4, + .base_unit_bits = 22, }; static int pwm_lpss_probe_pci(struct pci_dev *pdev, @@ -97,7 +105,7 @@ static const struct pci_device_id pwm_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x0ac8), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&pwm_lpss_byt_info}, { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&pwm_lpss_byt_info}, - { PCI_VDEVICE(INTEL, 0x11a5), (unsigned long)&pwm_lpss_bxt_info}, + { PCI_VDEVICE(INTEL, 0x11a5), (unsigned long)&pwm_lpss_tng_info}, { PCI_VDEVICE(INTEL, 0x1ac8), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x2288), (unsigned long)&pwm_lpss_bsw_info}, { PCI_VDEVICE(INTEL, 0x2289), (unsigned long)&pwm_lpss_bsw_info}, diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c index b22b6fdadb9a..5d6ed1507d29 100644 --- a/drivers/pwm/pwm-lpss-platform.c +++ b/drivers/pwm/pwm-lpss-platform.c @@ -37,6 +37,7 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = { .clk_rate = 19200000, .npwm = 4, .base_unit_bits = 22, + .bypass = true, }; static int pwm_lpss_probe_platform(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 689d2c1cbead..8db0d40ccacd 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -57,7 +57,7 @@ static inline void pwm_lpss_write(const struct pwm_device *pwm, u32 value) writel(value, lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM); } -static int pwm_lpss_update(struct pwm_device *pwm) +static int pwm_lpss_wait_for_update(struct pwm_device *pwm) { struct pwm_lpss_chip *lpwm = to_lpwm(pwm->chip); const void __iomem *addr = lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM; @@ -65,8 +65,6 @@ static int pwm_lpss_update(struct pwm_device *pwm) u32 val; int err; - pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); - /* * PWM Configuration register has SW_UPDATE bit that is set when a new * configuration is written to the register. The bit is automatically @@ -122,6 +120,12 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm, pwm_lpss_write(pwm, ctrl); } +static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond) +{ + if (cond) + pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE); +} + static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state) { @@ -137,18 +141,21 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, return ret; } pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period); - ret = pwm_lpss_update(pwm); + pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); + pwm_lpss_cond_enable(pwm, lpwm->info->bypass == false); + ret = pwm_lpss_wait_for_update(pwm); if (ret) { pm_runtime_put(chip->dev); return ret; } - pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE); + pwm_lpss_cond_enable(pwm, lpwm->info->bypass == true); } else { ret = pwm_lpss_is_updating(pwm); if (ret) return ret; pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period); - return pwm_lpss_update(pwm); + pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); + return pwm_lpss_wait_for_update(pwm); } } else if (pwm_is_enabled(pwm)) { pwm_lpss_write(pwm, pwm_lpss_read(pwm) & ~PWM_ENABLE); diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h index c94cd7c2695d..98306bb02cfe 100644 --- a/drivers/pwm/pwm-lpss.h +++ b/drivers/pwm/pwm-lpss.h @@ -22,6 +22,7 @@ struct pwm_lpss_boardinfo { unsigned long clk_rate; unsigned int npwm; unsigned long base_unit_bits; + bool bypass; }; struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index ef89df1f7336..744d56197286 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -191,6 +191,28 @@ static int rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, return 0; } +static int rockchip_pwm_enable(struct pwm_chip *chip, + struct pwm_device *pwm, + bool enable, + enum pwm_polarity polarity) +{ + struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + int ret; + + if (enable) { + ret = clk_enable(pc->clk); + if (ret) + return ret; + } + + pc->data->set_enable(chip, pwm, enable, polarity); + + if (!enable) + clk_disable(pc->clk); + + return 0; +} + static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state) { @@ -207,22 +229,26 @@ static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, return ret; if (state->polarity != curstate.polarity && enabled) { - pc->data->set_enable(chip, pwm, false, state->polarity); + ret = rockchip_pwm_enable(chip, pwm, false, state->polarity); + if (ret) + goto out; enabled = false; } ret = rockchip_pwm_config(chip, pwm, state->duty_cycle, state->period); if (ret) { if (enabled != curstate.enabled) - pc->data->set_enable(chip, pwm, !enabled, - state->polarity); - + rockchip_pwm_enable(chip, pwm, !enabled, + state->polarity); goto out; } - if (state->enabled != enabled) - pc->data->set_enable(chip, pwm, state->enabled, - state->polarity); + if (state->enabled != enabled) { + ret = rockchip_pwm_enable(chip, pwm, state->enabled, + state->polarity); + if (ret) + goto out; + } /* * Update the state with the real hardware, which can differ a bit diff --git a/drivers/reset/core.c b/drivers/reset/core.c index f1e5e65388bb..cd739d2fa160 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -275,7 +275,7 @@ int reset_control_status(struct reset_control *rstc) } EXPORT_SYMBOL_GPL(reset_control_status); -static struct reset_control *__reset_control_get( +static struct reset_control *__reset_control_get_internal( struct reset_controller_dev *rcdev, unsigned int index, bool shared) { @@ -308,7 +308,7 @@ static struct reset_control *__reset_control_get( return rstc; } -static void __reset_control_put(struct reset_control *rstc) +static void __reset_control_put_internal(struct reset_control *rstc) { lockdep_assert_held(&reset_list_mutex); @@ -377,7 +377,7 @@ struct reset_control *__of_reset_control_get(struct device_node *node, } /* reset_list_mutex also protects the rcdev's reset_control list */ - rstc = __reset_control_get(rcdev, rstc_id, shared); + rstc = __reset_control_get_internal(rcdev, rstc_id, shared); mutex_unlock(&reset_list_mutex); @@ -385,6 +385,17 @@ struct reset_control *__of_reset_control_get(struct device_node *node, } EXPORT_SYMBOL_GPL(__of_reset_control_get); +struct reset_control *__reset_control_get(struct device *dev, const char *id, + int index, bool shared, bool optional) +{ + if (dev->of_node) + return __of_reset_control_get(dev->of_node, id, index, shared, + optional); + + return optional ? NULL : ERR_PTR(-EINVAL); +} +EXPORT_SYMBOL_GPL(__reset_control_get); + /** * reset_control_put - free the reset controller * @rstc: reset controller @@ -396,7 +407,7 @@ void reset_control_put(struct reset_control *rstc) return; mutex_lock(&reset_list_mutex); - __reset_control_put(rstc); + __reset_control_put_internal(rstc); mutex_unlock(&reset_list_mutex); } EXPORT_SYMBOL_GPL(reset_control_put); @@ -417,8 +428,7 @@ struct reset_control *__devm_reset_control_get(struct device *dev, if (!ptr) return ERR_PTR(-ENOMEM); - rstc = __of_reset_control_get(dev ? dev->of_node : NULL, - id, index, shared, optional); + rstc = __reset_control_get(dev, id, index, shared, optional); if (!IS_ERR(rstc)) { *ptr = rstc; devres_add(dev, ptr); diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c index 6ff61dad5e21..62fed9dc893e 100644 --- a/drivers/sbus/char/jsflash.c +++ b/drivers/sbus/char/jsflash.c @@ -183,11 +183,33 @@ static void jsfd_read(char *buf, unsigned long p, size_t togo) { } } -static void jsfd_do_request(struct request_queue *q) +static int jsfd_queue; + +static struct request *jsfd_next_request(void) +{ + struct request_queue *q; + struct request *rq; + int old_pos = jsfd_queue; + + do { + q = jsfd_disk[jsfd_queue]->queue; + if (++jsfd_queue == JSF_MAX) + jsfd_queue = 0; + if (q) { + rq = blk_fetch_request(q); + if (rq) + return rq; + } + } while (jsfd_queue != old_pos); + + return NULL; +} + +static void jsfd_request(void) { struct request *req; - req = blk_fetch_request(q); + req = jsfd_next_request(); while (req) { struct jsfd_part *jdp = req->rq_disk->private_data; unsigned long offset = blk_rq_pos(req) << 9; @@ -211,10 +233,15 @@ static void jsfd_do_request(struct request_queue *q) err = 0; end: if (!__blk_end_request_cur(req, err)) - req = blk_fetch_request(q); + req = jsfd_next_request(); } } +static void jsfd_do_request(struct request_queue *q) +{ + jsfd_request(); +} + /* * The memory devices use the full 32/64 bits of the offset, and so we cannot * check against negative addresses: they are ok. The return value is weird, @@ -544,8 +571,6 @@ static int jsflash_init(void) return 0; } -static struct request_queue *jsf_queue; - static int jsfd_init(void) { static DEFINE_SPINLOCK(lock); @@ -562,6 +587,11 @@ static int jsfd_init(void) struct gendisk *disk = alloc_disk(1); if (!disk) goto out; + disk->queue = blk_init_queue(jsfd_do_request, &lock); + if (!disk->queue) { + put_disk(disk); + goto out; + } jsfd_disk[i] = disk; } @@ -570,13 +600,6 @@ static int jsfd_init(void) goto out; } - jsf_queue = blk_init_queue(jsfd_do_request, &lock); - if (!jsf_queue) { - err = -ENOMEM; - unregister_blkdev(JSFD_MAJOR, "jsfd"); - goto out; - } - for (i = 0; i < JSF_MAX; i++) { struct gendisk *disk = jsfd_disk[i]; if ((i & JSF_PART_MASK) >= JSF_NPART) continue; @@ -589,7 +612,6 @@ static int jsfd_init(void) disk->fops = &jsfd_fops; set_capacity(disk, jdp->dsize >> 9); disk->private_data = jdp; - disk->queue = jsf_queue; add_disk(disk); set_disk_ro(disk, 1); } @@ -619,6 +641,7 @@ static void __exit jsflash_cleanup_module(void) for (i = 0; i < JSF_MAX; i++) { if ((i & JSF_PART_MASK) >= JSF_NPART) continue; del_gendisk(jsfd_disk[i]); + blk_cleanup_queue(jsfd_disk[i]->queue); put_disk(jsfd_disk[i]); } if (jsf0.busy) @@ -628,7 +651,6 @@ static void __exit jsflash_cleanup_module(void) misc_deregister(&jsf_dev); unregister_blkdev(JSFD_MAJOR, "jsfd"); - blk_cleanup_queue(jsf_queue); } module_init(jsflash_init_module); diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index fc2855565a51..93dbe58c47c8 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -166,6 +166,7 @@ scsi_mod-y += scsi_scan.o scsi_sysfs.o scsi_devinfo.o scsi_mod-$(CONFIG_SCSI_NETLINK) += scsi_netlink.o scsi_mod-$(CONFIG_SYSCTL) += scsi_sysctl.o scsi_mod-$(CONFIG_SCSI_PROC_FS) += scsi_proc.o +scsi_mod-$(CONFIG_BLK_DEBUG_FS) += scsi_debugfs.o scsi_mod-y += scsi_trace.o scsi_logging.o scsi_mod-$(CONFIG_PM) += scsi_pm.o scsi_mod-$(CONFIG_SCSI_DH) += scsi_dh.o diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index d036a806f31c..d281492009fb 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1690,9 +1690,6 @@ struct aac_dev #define aac_adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4) \ (dev)->a_ops.adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4) -#define aac_adapter_check_health(dev) \ - (dev)->a_ops.adapter_check_health(dev) - #define aac_adapter_restart(dev, bled, reset_type) \ ((dev)->a_ops.adapter_restart(dev, bled, reset_type)) @@ -2615,6 +2612,14 @@ static inline unsigned int cap_to_cyls(sector_t capacity, unsigned divisor) return capacity; } +static inline int aac_adapter_check_health(struct aac_dev *dev) +{ + if (unlikely(pci_channel_offline(dev->pdev))) + return -1; + + return (dev)->a_ops.adapter_check_health(dev); +} + /* SCp.phase values */ #define AAC_OWNER_MIDLEVEL 0x101 #define AAC_OWNER_LOWLEVEL 0x102 diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index c8172f16cf33..1f4918355fdb 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1873,7 +1873,8 @@ int aac_check_health(struct aac_dev * aac) spin_unlock_irqrestore(&aac->fib_lock, flagv); if (BlinkLED < 0) { - printk(KERN_ERR "%s: Host adapter dead %d\n", aac->name, BlinkLED); + printk(KERN_ERR "%s: Host adapter is dead (or got a PCI error) %d\n", + aac->name, BlinkLED); goto out; } diff --git a/drivers/scsi/esas2r/esas2r_ioctl.c b/drivers/scsi/esas2r/esas2r_ioctl.c index b35ed3829421..2d4b7f049a68 100644 --- a/drivers/scsi/esas2r/esas2r_ioctl.c +++ b/drivers/scsi/esas2r/esas2r_ioctl.c @@ -1289,32 +1289,13 @@ int esas2r_ioctl_handler(void *hostdata, int cmd, void __user *arg) || (cmd > EXPRESS_IOCTL_MAX)) return -ENOTSUPP; - if (!access_ok(VERIFY_WRITE, arg, sizeof(struct atto_express_ioctl))) { + ioctl = memdup_user(arg, sizeof(struct atto_express_ioctl)); + if (IS_ERR(ioctl)) { esas2r_log(ESAS2R_LOG_WARN, "ioctl_handler access_ok failed for cmd %d, " "address %p", cmd, arg); - return -EFAULT; - } - - /* allocate a kernel memory buffer for the IOCTL data */ - ioctl = kzalloc(sizeof(struct atto_express_ioctl), GFP_KERNEL); - if (ioctl == NULL) { - esas2r_log(ESAS2R_LOG_WARN, - "ioctl_handler kzalloc failed for %zu bytes", - sizeof(struct atto_express_ioctl)); - return -ENOMEM; - } - - err = __copy_from_user(ioctl, arg, sizeof(struct atto_express_ioctl)); - if (err != 0) { - esas2r_log(ESAS2R_LOG_WARN, - "copy_from_user didn't copy everything (err %d, cmd %d)", - err, - cmd); - kfree(ioctl); - - return -EFAULT; + return PTR_ERR(ioctl); } /* verify the signature */ diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index b29afafc2885..5d5e272fd815 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6293,7 +6293,12 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg, break; case IPR_IOASC_MED_DO_NOT_REALLOC: /* prevent retries */ case IPR_IOASA_IR_DUAL_IOA_DISABLED: - scsi_cmd->result |= (DID_PASSTHROUGH << 16); + /* + * exception: do not set DID_PASSTHROUGH on CHECK CONDITION + * so SCSI mid-layer and upper layers handle it accordingly. + */ + if (scsi_cmd->result != SAM_STAT_CHECK_CONDITION) + scsi_cmd->result |= (DID_PASSTHROUGH << 16); break; case IPR_IOASC_BUS_WAS_RESET: case IPR_IOASC_BUS_WAS_RESET_BY_OTHER: diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 257bbdd0f0b8..6d7840b096e6 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -56,7 +56,7 @@ struct lpfc_sli2_slim; #define LPFC_MAX_SG_SEG_CNT 4096 /* sg element count per scsi cmnd */ #define LPFC_MAX_SGL_SEG_CNT 512 /* SGL element count per scsi cmnd */ #define LPFC_MAX_BPL_SEG_CNT 4096 /* BPL element count per scsi cmnd */ -#define LPFC_MIN_NVME_SEG_CNT 254 +#define LPFC_MAX_NVME_SEG_CNT 128 /* max SGL element cnt per NVME cmnd */ #define LPFC_MAX_SGE_SIZE 0x80000000 /* Maximum data allowed in a SGE */ #define LPFC_IOCB_LIST_CNT 2250 /* list of IOCBs for fast-path usage. */ @@ -474,6 +474,8 @@ struct lpfc_vport { unsigned long rcv_buffer_time_stamp; uint32_t vport_flag; #define STATIC_VPORT 1 +#define FAWWPN_SET 2 +#define FAWWPN_PARAM_CHG 4 uint16_t fdmi_num_disc; uint32_t fdmi_hba_mask; @@ -781,6 +783,7 @@ struct lpfc_hba { uint32_t cfg_nvmet_fb_size; uint32_t cfg_total_seg_cnt; uint32_t cfg_sg_seg_cnt; + uint32_t cfg_nvme_seg_cnt; uint32_t cfg_sg_dma_buf_size; uint64_t cfg_soft_wwnn; uint64_t cfg_soft_wwpn; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 22819afbaef5..513fd07715cd 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -2292,6 +2292,8 @@ lpfc_soft_wwn_enable_store(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; unsigned int cnt = count; + uint8_t vvvl = vport->fc_sparam.cmn.valid_vendor_ver_level; + u32 *fawwpn_key = (uint32_t *)&vport->fc_sparam.un.vendorVersion[0]; /* * We're doing a simple sanity check for soft_wwpn setting. @@ -2305,6 +2307,12 @@ lpfc_soft_wwn_enable_store(struct device *dev, struct device_attribute *attr, * here. The intent is to protect against the random user or * application that is just writing attributes. */ + if (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "0051 "LPFC_DRIVER_NAME" soft wwpn can not" + " be enabled: fawwpn is enabled\n"); + return -EINVAL; + } /* count may include a LF at end of string */ if (buf[cnt-1] == '\n') @@ -3335,7 +3343,7 @@ LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP, * percentage will go to NVME. */ LPFC_ATTR_R(xri_split, 50, 10, 90, - "Division of XRI resources between SCSI and NVME"); + "Division of XRI resources between SCSI and NVME"); /* # lpfc_log_verbose: Only turn this flag on if you are willing to risk being diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 18157d2840a3..a1686c2d863c 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -2486,6 +2486,10 @@ static int lpfcdiag_loop_self_reg(struct lpfc_hba *phba, uint16_t *rpi) mbox, *rpi); else { *rpi = lpfc_sli4_alloc_rpi(phba); + if (*rpi == LPFC_RPI_ALLOC_ERROR) { + mempool_free(mbox, phba->mbox_mem_pool); + return -EBUSY; + } status = lpfc_reg_rpi(phba, phba->pport->vpi, phba->pport->fc_myDID, (uint8_t *)&phba->pport->fc_sparam, diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 54e6ac42fbcd..944b32ca4931 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -24,6 +24,7 @@ typedef int (*node_filter)(struct lpfc_nodelist *, void *); struct fc_rport; struct fc_frame_header; +struct lpfc_nvmet_rcv_ctx; void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_sli_read_link_ste(struct lpfc_hba *); void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t); @@ -99,7 +100,7 @@ void lpfc_issue_reg_vpi(struct lpfc_hba *, struct lpfc_vport *); int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *, struct lpfc_nodelist *); -void lpfc_nlp_init(struct lpfc_vport *, struct lpfc_nodelist *, uint32_t); +struct lpfc_nodelist *lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did); struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *); int lpfc_nlp_put(struct lpfc_nodelist *); int lpfc_nlp_not_used(struct lpfc_nodelist *ndlp); @@ -245,6 +246,10 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *); void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *); struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba); void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab); +void lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_dmabuf *mp); +int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr); void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *, uint16_t); int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, @@ -302,6 +307,8 @@ int lpfc_sli_check_eratt(struct lpfc_hba *); void lpfc_sli_handle_slow_ring_event(struct lpfc_hba *, struct lpfc_sli_ring *, uint32_t); void lpfc_sli4_handle_received_buffer(struct lpfc_hba *, struct hbq_dmabuf *); +void lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr, bool aborted); void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *, LPFC_MBOXQ_t *); int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t, diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index d3e9af983015..1487406aea77 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -537,19 +537,53 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type) } } +static void +lpfc_ns_rsp_audit_did(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type) +{ + struct lpfc_hba *phba = vport->phba; + struct lpfc_nodelist *ndlp = NULL; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + /* + * To conserve rpi's, filter out addresses for other + * vports on the same physical HBAs. + */ + if (Did != vport->fc_myDID && + (!lpfc_find_vport_by_did(phba, Did) || + vport->cfg_peer_port_login)) { + if (!phba->nvmet_support) { + /* FCPI/NVMEI path. Process Did */ + lpfc_prep_node_fc4type(vport, Did, fc4_type); + return; + } + /* NVMET path. NVMET only cares about NVMEI nodes. */ + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_type != NLP_NVME_INITIATOR || + ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) + continue; + spin_lock_irq(shost->host_lock); + if (ndlp->nlp_DID == Did) + ndlp->nlp_flag &= ~NLP_NVMET_RECOV; + else + ndlp->nlp_flag |= NLP_NVMET_RECOV; + spin_unlock_irq(shost->host_lock); + } + } +} + static int lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type, uint32_t Size) { - struct lpfc_hba *phba = vport->phba; struct lpfc_sli_ct_request *Response = (struct lpfc_sli_ct_request *) mp->virt; - struct lpfc_nodelist *ndlp = NULL; struct lpfc_dmabuf *mlast, *next_mp; uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType; uint32_t Did, CTentry; int Cnt; struct list_head head; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp = NULL; lpfc_set_disctmo(vport); vport->num_disc_nodes = 0; @@ -574,19 +608,7 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type, /* Get next DID from NameServer List */ CTentry = *ctptr++; Did = ((be32_to_cpu(CTentry)) & Mask_DID); - - ndlp = NULL; - - /* - * Check for rscn processing or not - * To conserve rpi's, filter out addresses for other - * vports on the same physical HBAs. - */ - if ((Did != vport->fc_myDID) && - ((lpfc_find_vport_by_did(phba, Did) == NULL) || - vport->cfg_peer_port_login)) - lpfc_prep_node_fc4type(vport, Did, fc4_type); - + lpfc_ns_rsp_audit_did(vport, Did, fc4_type); if (CTentry & (cpu_to_be32(SLI_CT_LAST_ENTRY))) goto nsout1; @@ -596,6 +618,22 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type, } + /* All GID_FT entries processed. If the driver is running in + * in target mode, put impacted nodes into recovery and drop + * the RPI to flush outstanding IO. + */ + if (vport->phba->nvmet_support) { + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (!(ndlp->nlp_flag & NLP_NVMET_RECOV)) + continue; + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RECOVERY); + spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_NVMET_RECOV; + spin_lock_irq(shost->host_lock); + } + } + nsout1: list_del(&head); return 0; diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 913eed822cb8..fce549a91911 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -745,73 +745,102 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) { struct lpfc_hba *phba = vport->phba; struct lpfc_nvmet_tgtport *tgtp; + struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; int len = 0; + int cnt; if (phba->nvmet_support) { if (!phba->targetport) return len; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "\nNVME Targetport Statistics\n"); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "LS: Rcv %08x Drop %08x Abort %08x\n", atomic_read(&tgtp->rcv_ls_req_in), atomic_read(&tgtp->rcv_ls_req_drop), atomic_read(&tgtp->xmt_ls_abort)); if (atomic_read(&tgtp->rcv_ls_req_in) != atomic_read(&tgtp->rcv_ls_req_out)) { - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "Rcv LS: in %08x != out %08x\n", atomic_read(&tgtp->rcv_ls_req_in), atomic_read(&tgtp->rcv_ls_req_out)); } - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "LS: Xmt %08x Drop %08x Cmpl %08x Err %08x\n", atomic_read(&tgtp->xmt_ls_rsp), atomic_read(&tgtp->xmt_ls_drop), atomic_read(&tgtp->xmt_ls_rsp_cmpl), atomic_read(&tgtp->xmt_ls_rsp_error)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "FCP: Rcv %08x Drop %08x\n", atomic_read(&tgtp->rcv_fcp_cmd_in), atomic_read(&tgtp->rcv_fcp_cmd_drop)); if (atomic_read(&tgtp->rcv_fcp_cmd_in) != atomic_read(&tgtp->rcv_fcp_cmd_out)) { - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "Rcv FCP: in %08x != out %08x\n", atomic_read(&tgtp->rcv_fcp_cmd_in), atomic_read(&tgtp->rcv_fcp_cmd_out)); } - len += snprintf(buf+len, size-len, - "FCP Rsp: read %08x readrsp %08x write %08x rsp %08x\n", + len += snprintf(buf + len, size - len, + "FCP Rsp: read %08x readrsp %08x " + "write %08x rsp %08x\n", atomic_read(&tgtp->xmt_fcp_read), atomic_read(&tgtp->xmt_fcp_read_rsp), atomic_read(&tgtp->xmt_fcp_write), atomic_read(&tgtp->xmt_fcp_rsp)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "FCP Rsp: abort %08x drop %08x\n", atomic_read(&tgtp->xmt_fcp_abort), atomic_read(&tgtp->xmt_fcp_drop)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "FCP Rsp Cmpl: %08x err %08x drop %08x\n", atomic_read(&tgtp->xmt_fcp_rsp_cmpl), atomic_read(&tgtp->xmt_fcp_rsp_error), atomic_read(&tgtp->xmt_fcp_rsp_drop)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "ABORT: Xmt %08x Err %08x Cmpl %08x", atomic_read(&tgtp->xmt_abort_rsp), atomic_read(&tgtp->xmt_abort_rsp_error), atomic_read(&tgtp->xmt_abort_cmpl)); - len += snprintf(buf+len, size-len, "\n"); + len += snprintf(buf + len, size - len, "\n"); + + cnt = 0; + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + cnt++; + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + if (cnt) { + len += snprintf(buf + len, size - len, + "ABORT: %d ctx entries\n", cnt); + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ)) + break; + len += snprintf(buf + len, size - len, + "Entry: oxid %x state %x " + "flag %x\n", + ctxp->oxid, ctxp->state, + ctxp->flag); + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + } } else { if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) return len; @@ -3128,8 +3157,6 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp, datqp->queue_id, datqp->entry_count, datqp->entry_size, datqp->host_index, datqp->hba_index); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); - return len; } @@ -5700,10 +5727,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport) #ifdef CONFIG_SCSI_LPFC_DEBUG_FS struct lpfc_hba *phba = vport->phba; - if (vport->disc_trc) { - kfree(vport->disc_trc); - vport->disc_trc = NULL; - } + kfree(vport->disc_trc); + vport->disc_trc = NULL; debugfs_remove(vport->debug_disc_trc); /* discovery_trace */ vport->debug_disc_trc = NULL; @@ -5770,10 +5795,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport) debugfs_remove(phba->debug_readRef); /* readRef */ phba->debug_readRef = NULL; - if (phba->slow_ring_trc) { - kfree(phba->slow_ring_trc); - phba->slow_ring_trc = NULL; - } + kfree(phba->slow_ring_trc); + phba->slow_ring_trc = NULL; /* slow_ring_trace */ debugfs_remove(phba->debug_slow_ring_trc); diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h index f4ff99d95db3..9d5a379f4b15 100644 --- a/drivers/scsi/lpfc/lpfc_disc.h +++ b/drivers/scsi/lpfc/lpfc_disc.h @@ -157,6 +157,7 @@ struct lpfc_node_rrq { #define NLP_LOGO_SND 0x00000100 /* sent LOGO request for this entry */ #define NLP_RNID_SND 0x00000400 /* sent RNID request for this entry */ #define NLP_ELS_SND_MASK 0x000007e0 /* sent ELS request for this entry */ +#define NLP_NVMET_RECOV 0x00001000 /* NVMET auditing node for recovery. */ #define NLP_DEFER_RM 0x00010000 /* Remove this ndlp if no longer used */ #define NLP_DELAY_TMO 0x00020000 /* delay timeout is running for node */ #define NLP_NPR_2B_DISC 0x00040000 /* node is included in num_disc_nodes */ diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index a5ca37e45fb6..67827e397431 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -603,9 +603,11 @@ lpfc_check_clean_addr_bit(struct lpfc_vport *vport, memcmp(&vport->fabric_portname, &sp->portName, sizeof(struct lpfc_name)) || memcmp(&vport->fabric_nodename, &sp->nodeName, - sizeof(struct lpfc_name))) + sizeof(struct lpfc_name)) || + (vport->vport_flag & FAWWPN_PARAM_CHG)) { fabric_param_changed = 1; - + vport->vport_flag &= ~FAWWPN_PARAM_CHG; + } /* * Word 1 Bit 31 in common service parameter is overloaded. * Word 1 Bit 31 in FLOGI request is multiple NPort request @@ -895,10 +897,9 @@ lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, * Cannot find existing Fabric ndlp, so allocate a * new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, PT2PT_RemoteID); if (!ndlp) goto fail; - lpfc_nlp_init(vport, ndlp, PT2PT_RemoteID); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); @@ -1364,7 +1365,6 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba) int lpfc_initial_flogi(struct lpfc_vport *vport) { - struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp; vport->port_state = LPFC_FLOGI; @@ -1374,10 +1374,9 @@ lpfc_initial_flogi(struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) return 0; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Set the node type */ ndlp->nlp_type |= NLP_FABRIC; /* Put ndlp onto node list */ @@ -1418,17 +1417,15 @@ lpfc_initial_flogi(struct lpfc_vport *vport) int lpfc_initial_fdisc(struct lpfc_vport *vport) { - struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp; /* First look for the Fabric ndlp */ ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) return 0; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Put ndlp onto node list */ lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { @@ -1564,14 +1561,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, phba->active_rrq_pool); return ndlp; } - new_ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC); + new_ndlp = lpfc_nlp_init(vport, ndlp->nlp_DID); if (!new_ndlp) { if (active_rrqs_xri_bitmap) mempool_free(active_rrqs_xri_bitmap, phba->active_rrq_pool); return ndlp; } - lpfc_nlp_init(vport, new_ndlp, ndlp->nlp_DID); } else if (!NLP_CHK_NODE_ACT(new_ndlp)) { rc = memcmp(&ndlp->nlp_portname, name, sizeof(struct lpfc_name)); @@ -2845,10 +2841,9 @@ lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) ndlp = lpfc_findnode_did(vport, nportid); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, nportid); if (!ndlp) return 1; - lpfc_nlp_init(vport, ndlp, nportid); lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); @@ -2938,10 +2933,9 @@ lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) ndlp = lpfc_findnode_did(vport, nportid); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, nportid); if (!ndlp) return 1; - lpfc_nlp_init(vport, ndlp, nportid); lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); @@ -4403,7 +4397,7 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb, pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); memset(pcmd, 0, cmdsize); - *((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK)); + *((uint32_t *)(pcmd)) = elsrspcmd; pcmd += sizeof(uint32_t); /* For PRLI, remainder of payload is PRLI parameter page */ @@ -5867,8 +5861,11 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport) (ndlp->nlp_state == NLP_STE_UNUSED_NODE) || !lpfc_rscn_payload_check(vport, ndlp->nlp_DID)) continue; + + /* NVME Target mode does not do RSCN Recovery. */ if (vport->phba->nvmet_support) continue; + lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RECOVERY); lpfc_cancel_retry_delay_tmo(vport, ndlp); @@ -6133,7 +6130,6 @@ int lpfc_els_handle_rscn(struct lpfc_vport *vport) { struct lpfc_nodelist *ndlp; - struct lpfc_hba *phba = vport->phba; /* Ignore RSCN if the port is being torn down. */ if (vport->load_flag & FC_UNLOADING) { @@ -6157,22 +6153,16 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, NameServer_DID); if (ndlp && NLP_CHK_NODE_ACT(ndlp) && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { - /* Good ndlp, issue CT Request to NameServer */ + /* Good ndlp, issue CT Request to NameServer. Need to + * know how many gidfts were issued. If none, then just + * flush the RSCN. Otherwise, the outstanding requests + * need to complete. + */ vport->gidft_inp = 0; - if (lpfc_issue_gidft(vport) == 0) - /* Wait for NameServer query cmpl before we can - * continue - */ + if (lpfc_issue_gidft(vport) > 0) return 1; } else { - /* If login to NameServer does not exist, issue one */ - /* Good status, issue PLOGI to NameServer */ - ndlp = lpfc_findnode_did(vport, NameServer_DID); - if (ndlp && NLP_CHK_NODE_ACT(ndlp)) - /* Wait for NameServer login cmpl before we can - continue */ - return 1; - + /* Nameserver login in question. Revalidate. */ if (ndlp) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_PLOGI_ISSUE); @@ -6182,12 +6172,11 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport) } ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE; } else { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, NameServer_DID); if (!ndlp) { lpfc_els_flush_rscn(vport); return 0; } - lpfc_nlp_init(vport, ndlp, NameServer_DID); ndlp->nlp_prev_state = ndlp->nlp_state; lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); } @@ -7746,11 +7735,9 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ndlp = lpfc_findnode_did(vport, did); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, did); if (!ndlp) goto dropit; - - lpfc_nlp_init(vport, ndlp, did); lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); newnode = 1; if ((did & Fabric_DID_MASK) == Fabric_DID_MASK) @@ -8193,7 +8180,6 @@ lpfc_els_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, static void lpfc_start_fdmi(struct lpfc_vport *vport) { - struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp; /* If this is the first time, allocate an ndlp and initialize @@ -8202,9 +8188,8 @@ lpfc_start_fdmi(struct lpfc_vport *vport) */ ndlp = lpfc_findnode_did(vport, FDMI_DID); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, FDMI_DID); if (ndlp) { - lpfc_nlp_init(vport, ndlp, FDMI_DID); ndlp->nlp_type |= NLP_FABRIC; } else { return; @@ -8257,7 +8242,7 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, NameServer_DID); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, NameServer_DID); if (!ndlp) { if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) { lpfc_disc_start(vport); @@ -8268,7 +8253,6 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport) "0251 NameServer login: no memory\n"); return; } - lpfc_nlp_init(vport, ndlp, NameServer_DID); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); if (!ndlp) { @@ -8771,7 +8755,7 @@ lpfc_issue_els_fdisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, pcmd += sizeof(uint32_t); /* Node Name */ pcmd += sizeof(uint32_t); /* Node Name */ memcpy(pcmd, &vport->fc_nodename, 8); - + memset(sp->un.vendorVersion, 0, sizeof(sp->un.vendorVersion)); lpfc_set_disctmo(vport); phba->fc_stat.elsXmitFDISC++; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 180b072beef6..0482c5580331 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -3002,6 +3002,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) MAILBOX_t *mb = &pmb->u.mb; struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1; struct lpfc_vport *vport = pmb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct serv_parm *sp = &vport->fc_sparam; uint32_t ed_tov; @@ -3031,6 +3032,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } lpfc_update_vport_wwn(vport); + fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn); if (vport->port_type == LPFC_PHYSICAL_PORT) { memcpy(&phba->wwnn, &vport->fc_nodename, sizeof(phba->wwnn)); memcpy(&phba->wwpn, &vport->fc_portname, sizeof(phba->wwnn)); @@ -3309,6 +3311,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) struct lpfc_sli_ring *pring; MAILBOX_t *mb = &pmb->u.mb; struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); + uint8_t attn_type; /* Unblock ELS traffic */ pring = lpfc_phba_elsring(phba); @@ -3325,6 +3328,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } la = (struct lpfc_mbx_read_top *) &pmb->u.mb.un.varReadTop; + attn_type = bf_get(lpfc_mbx_read_top_att_type, la); memcpy(&phba->alpa_map[0], mp->virt, 128); @@ -3337,7 +3341,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) if (phba->fc_eventTag <= la->eventTag) { phba->fc_stat.LinkMultiEvent++; - if (bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP) + if (attn_type == LPFC_ATT_LINK_UP) if (phba->fc_eventTag != 0) lpfc_linkdown(phba); } @@ -3353,7 +3357,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } phba->link_events++; - if ((bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP) && + if ((attn_type == LPFC_ATT_LINK_UP) && !(phba->sli.sli_flag & LPFC_MENLO_MAINT)) { phba->fc_stat.LinkUp++; if (phba->link_flag & LS_LOOPBACK_MODE) { @@ -3379,8 +3383,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) phba->wait_4_mlo_maint_flg); } lpfc_mbx_process_link_up(phba, la); - } else if (bf_get(lpfc_mbx_read_top_att_type, la) == - LPFC_ATT_LINK_DOWN) { + } else if (attn_type == LPFC_ATT_LINK_DOWN || + attn_type == LPFC_ATT_UNEXP_WWPN) { phba->fc_stat.LinkDown++; if (phba->link_flag & LS_LOOPBACK_MODE) lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, @@ -3389,6 +3393,14 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) "Data: x%x x%x x%x\n", la->eventTag, phba->fc_eventTag, phba->pport->port_state, vport->fc_flag); + else if (attn_type == LPFC_ATT_UNEXP_WWPN) + lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, + "1313 Link Down UNEXP WWPN Event x%x received " + "Data: x%x x%x x%x x%x x%x\n", + la->eventTag, phba->fc_eventTag, + phba->pport->port_state, vport->fc_flag, + bf_get(lpfc_mbx_read_top_mm, la), + bf_get(lpfc_mbx_read_top_fa, la)); else lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, "1305 Link Down Event x%x received " @@ -3399,8 +3411,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) bf_get(lpfc_mbx_read_top_fa, la)); lpfc_mbx_issue_link_down(phba); } - if ((phba->sli.sli_flag & LPFC_MENLO_MAINT) && - ((bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP))) { + if (phba->sli.sli_flag & LPFC_MENLO_MAINT && + attn_type == LPFC_ATT_LINK_UP) { if (phba->link_state != LPFC_LINK_DOWN) { phba->fc_stat.LinkDown++; lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, @@ -4136,7 +4148,6 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int old_state, int new_state) { struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - struct lpfc_hba *phba = vport->phba; if (new_state == NLP_STE_UNMAPPED_NODE) { ndlp->nlp_flag &= ~NLP_NODEV_REMOVE; @@ -4155,14 +4166,14 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, lpfc_unregister_remote_port(ndlp); } - /* Notify the NVME transport of this rport's loss */ - if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) || - (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) && - (vport->phba->nvmet_support == 0) && - ((ndlp->nlp_fc4_type & NLP_FC4_NVME) || - (ndlp->nlp_DID == Fabric_DID))) { + /* Notify the NVME transport of this rport's loss on the + * Initiator. For NVME Target, should upcall transport + * in the else clause when API available. + */ + if (ndlp->nlp_fc4_type & NLP_FC4_NVME) { vport->phba->nport_event_cnt++; - lpfc_nvme_unregister_port(vport, ndlp); + if (vport->phba->nvmet_support == 0) + lpfc_nvme_unregister_port(vport, ndlp); } } @@ -4368,10 +4379,17 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, uint32_t did; unsigned long flags; unsigned long *active_rrqs_xri_bitmap = NULL; + int rpi = LPFC_RPI_ALLOC_ERROR; if (!ndlp) return NULL; + if (phba->sli_rev == LPFC_SLI_REV4) { + rpi = lpfc_sli4_alloc_rpi(vport->phba); + if (rpi == LPFC_RPI_ALLOC_ERROR) + return NULL; + } + spin_lock_irqsave(&phba->ndlp_lock, flags); /* The ndlp should not be in memory free mode */ if (NLP_CHK_FREE_REQ(ndlp)) { @@ -4381,7 +4399,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, kref_read(&ndlp->kref)); - return NULL; + goto free_rpi; } /* The ndlp should not already be in active mode */ if (NLP_CHK_NODE_ACT(ndlp)) { @@ -4391,7 +4409,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, kref_read(&ndlp->kref)); - return NULL; + goto free_rpi; } /* Keep the original DID */ @@ -4409,7 +4427,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, spin_unlock_irqrestore(&phba->ndlp_lock, flags); if (vport->phba->sli_rev == LPFC_SLI_REV4) { - ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba); + ndlp->nlp_rpi = rpi; lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, "0008 rpi:%x DID:%x flg:%x refcnt:%d " "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, @@ -4426,6 +4444,11 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "node enable: did:x%x", ndlp->nlp_DID, 0, 0); return ndlp; + +free_rpi: + if (phba->sli_rev == LPFC_SLI_REV4) + lpfc_sli4_free_rpi(vport->phba, rpi); + return NULL; } void @@ -5104,65 +5127,82 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) ndlp = lpfc_findnode_did(vport, did); if (!ndlp) { + if (vport->phba->nvmet_support) + return NULL; if ((vport->fc_flag & FC_RSCN_MODE) != 0 && lpfc_rscn_payload_check(vport, did) == 0) return NULL; - ndlp = (struct lpfc_nodelist *) - mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, did); if (!ndlp) return NULL; - lpfc_nlp_init(vport, ndlp, did); lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); - if (vport->phba->nvmet_support) - return ndlp; spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); return ndlp; } else if (!NLP_CHK_NODE_ACT(ndlp)) { + if (vport->phba->nvmet_support) + return NULL; ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_NPR_NODE); if (!ndlp) return NULL; - if (vport->phba->nvmet_support) - return ndlp; spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); return ndlp; } + /* The NVME Target does not want to actively manage an rport. + * The goal is to allow the target to reset its state and clear + * pending IO in preparation for the initiator to recover. + */ if ((vport->fc_flag & FC_RSCN_MODE) && !(vport->fc_flag & FC_NDISC_ACTIVE)) { if (lpfc_rscn_payload_check(vport, did)) { - /* If we've already received a PLOGI from this NPort - * we don't need to try to discover it again. - */ - if (ndlp->nlp_flag & NLP_RCV_PLOGI) - return NULL; /* Since this node is marked for discovery, * delay timeout is not needed. */ lpfc_cancel_retry_delay_tmo(vport, ndlp); + + /* NVME Target mode waits until rport is known to be + * impacted by the RSCN before it transitions. No + * active management - just go to NPR provided the + * node had a valid login. + */ if (vport->phba->nvmet_support) return ndlp; + + /* If we've already received a PLOGI from this NPort + * we don't need to try to discover it again. + */ + if (ndlp->nlp_flag & NLP_RCV_PLOGI) + return NULL; + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); } else ndlp = NULL; } else { - /* If we've already received a PLOGI from this NPort, - * or we are already in the process of discovery on it, - * we don't need to try to discover it again. + /* If the initiator received a PLOGI from this NPort or if the + * initiator is already in the process of discovery on it, + * there's no need to try to discover it again. */ if (ndlp->nlp_state == NLP_STE_ADISC_ISSUE || ndlp->nlp_state == NLP_STE_PLOGI_ISSUE || - ndlp->nlp_flag & NLP_RCV_PLOGI) + (!vport->phba->nvmet_support && + ndlp->nlp_flag & NLP_RCV_PLOGI)) return NULL; - lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + if (vport->phba->nvmet_support) return ndlp; + + /* Moving to NPR state clears unsolicited flags and + * allows for rediscovery + */ + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); @@ -5887,16 +5927,31 @@ lpfc_find_vport_by_vpid(struct lpfc_hba *phba, uint16_t vpi) return NULL; } -void -lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - uint32_t did) +struct lpfc_nodelist * +lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did) { + struct lpfc_nodelist *ndlp; + int rpi = LPFC_RPI_ALLOC_ERROR; + + if (vport->phba->sli_rev == LPFC_SLI_REV4) { + rpi = lpfc_sli4_alloc_rpi(vport->phba); + if (rpi == LPFC_RPI_ALLOC_ERROR) + return NULL; + } + + ndlp = mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) { + if (vport->phba->sli_rev == LPFC_SLI_REV4) + lpfc_sli4_free_rpi(vport->phba, rpi); + return NULL; + } + memset(ndlp, 0, sizeof (struct lpfc_nodelist)); lpfc_initialize_node(vport, ndlp, did); INIT_LIST_HEAD(&ndlp->nlp_listp); if (vport->phba->sli_rev == LPFC_SLI_REV4) { - ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba); + ndlp->nlp_rpi = rpi; lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, "0007 rpi:%x DID:%x flg:%x refcnt:%d " "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, @@ -5918,7 +5973,7 @@ lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "node init: did:x%x", ndlp->nlp_DID, 0, 0); - return; + return ndlp; } /* This routine releases all resources associated with a specifc NPort's ndlp diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 15ca21484150..26a5647e057e 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -509,6 +509,8 @@ struct class_parms { uint8_t word3Reserved2; /* Fc Word 3, bit 0: 7 */ }; +#define FAPWWN_KEY_VENDOR 0x42524344 /*valid vendor version fawwpn key*/ + struct serv_parm { /* Structure is in Big Endian format */ struct csp cmn; struct lpfc_name portName; @@ -2885,6 +2887,7 @@ struct lpfc_mbx_read_top { #define LPFC_ATT_RESERVED 0x00 /* Reserved - attType */ #define LPFC_ATT_LINK_UP 0x01 /* Link is up */ #define LPFC_ATT_LINK_DOWN 0x02 /* Link is down */ +#define LPFC_ATT_UNEXP_WWPN 0x06 /* Link is down Unexpected WWWPN */ uint32_t word3; #define lpfc_mbx_read_top_alpa_granted_SHIFT 24 #define lpfc_mbx_read_top_alpa_granted_MASK 0x000000FF diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 15277705cb6b..1d12f2be36bc 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -2720,6 +2720,9 @@ struct lpfc_mbx_request_features { #define lpfc_mbx_rq_ftr_rq_ifip_SHIFT 7 #define lpfc_mbx_rq_ftr_rq_ifip_MASK 0x00000001 #define lpfc_mbx_rq_ftr_rq_ifip_WORD word2 +#define lpfc_mbx_rq_ftr_rq_iaar_SHIFT 9 +#define lpfc_mbx_rq_ftr_rq_iaar_MASK 0x00000001 +#define lpfc_mbx_rq_ftr_rq_iaar_WORD word2 #define lpfc_mbx_rq_ftr_rq_perfh_SHIFT 11 #define lpfc_mbx_rq_ftr_rq_perfh_MASK 0x00000001 #define lpfc_mbx_rq_ftr_rq_perfh_WORD word2 @@ -3853,6 +3856,7 @@ struct lpfc_acqe_fc_la { #define LPFC_FC_LA_TYPE_NO_HARD_ALPA 0x3 #define LPFC_FC_LA_TYPE_MDS_LINK_DOWN 0x4 #define LPFC_FC_LA_TYPE_MDS_LOOPBACK 0x5 +#define LPFC_FC_LA_TYPE_UNEXP_WWPN 0x6 #define lpfc_acqe_fc_la_port_type_SHIFT 6 #define lpfc_acqe_fc_la_port_type_MASK 0x00000003 #define lpfc_acqe_fc_la_port_type_WORD word0 diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 6cc561b04211..90ae354a9c45 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -42,6 +42,10 @@ #include <scsi/scsi_device.h> #include <scsi/scsi_host.h> #include <scsi/scsi_transport_fc.h> +#include <scsi/scsi_tcq.h> +#include <scsi/fc/fc_fs.h> + +#include <linux/nvme-fc-driver.h> #include "lpfc_hw4.h" #include "lpfc_hw.h" @@ -52,6 +56,7 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" +#include "lpfc_nvmet.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_vport.h" @@ -335,6 +340,9 @@ lpfc_dump_wakeup_param_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) void lpfc_update_vport_wwn(struct lpfc_vport *vport) { + uint8_t vvvl = vport->fc_sparam.cmn.valid_vendor_ver_level; + u32 *fawwpn_key = (u32 *)&vport->fc_sparam.un.vendorVersion[0]; + /* If the soft name exists then update it using the service params */ if (vport->phba->cfg_soft_wwnn) u64_to_wwn(vport->phba->cfg_soft_wwnn, @@ -354,9 +362,25 @@ lpfc_update_vport_wwn(struct lpfc_vport *vport) memcpy(&vport->fc_sparam.nodeName, &vport->fc_nodename, sizeof(struct lpfc_name)); - if (vport->fc_portname.u.wwn[0] == 0 || vport->phba->cfg_soft_wwpn) + /* + * If the port name has changed, then set the Param changes flag + * to unreg the login + */ + if (vport->fc_portname.u.wwn[0] != 0 && + memcmp(&vport->fc_portname, &vport->fc_sparam.portName, + sizeof(struct lpfc_name))) + vport->vport_flag |= FAWWPN_PARAM_CHG; + + if (vport->fc_portname.u.wwn[0] == 0 || + vport->phba->cfg_soft_wwpn || + (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) || + vport->vport_flag & FAWWPN_SET) { memcpy(&vport->fc_portname, &vport->fc_sparam.portName, sizeof(struct lpfc_name)); + vport->vport_flag &= ~FAWWPN_SET; + if (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) + vport->vport_flag |= FAWWPN_SET; + } else memcpy(&vport->fc_sparam.portName, &vport->fc_portname, sizeof(struct lpfc_name)); @@ -1003,8 +1027,10 @@ static int lpfc_hba_down_post_s4(struct lpfc_hba *phba) { struct lpfc_scsi_buf *psb, *psb_next; + struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next; LIST_HEAD(aborts); LIST_HEAD(nvme_aborts); + LIST_HEAD(nvmet_aborts); unsigned long iflag = 0; struct lpfc_sglq *sglq_entry = NULL; @@ -1027,16 +1053,10 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) list_for_each_entry(sglq_entry, &phba->sli4_hba.lpfc_abts_els_sgl_list, list) sglq_entry->state = SGL_FREED; - list_for_each_entry(sglq_entry, - &phba->sli4_hba.lpfc_abts_nvmet_sgl_list, list) - sglq_entry->state = SGL_FREED; list_splice_init(&phba->sli4_hba.lpfc_abts_els_sgl_list, &phba->sli4_hba.lpfc_els_sgl_list); - if (phba->sli4_hba.nvme_wq) - list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list, - &phba->sli4_hba.lpfc_nvmet_sgl_list); spin_unlock(&phba->sli4_hba.sgl_list_lock); /* abts_scsi_buf_list_lock required because worker thread uses this @@ -1053,6 +1073,8 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); list_splice_init(&phba->sli4_hba.lpfc_abts_nvme_buf_list, &nvme_aborts); + list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + &nvmet_aborts); spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); } @@ -1066,13 +1088,20 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) list_splice(&aborts, &phba->lpfc_scsi_buf_list_put); spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag); - list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) { - psb->pCmd = NULL; - psb->status = IOSTAT_SUCCESS; + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) { + psb->pCmd = NULL; + psb->status = IOSTAT_SUCCESS; + } + spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag); + list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put); + spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag); + + list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) { + ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP); + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + } } - spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag); - list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put); - spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag); lpfc_sli4_free_sp_events(phba); return 0; @@ -2874,34 +2903,38 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba) { struct lpfc_nodelist *ndlp, *next_ndlp; struct lpfc_vport **vports; - int i; + int i, rpi; + unsigned long flags; if (phba->sli_rev != LPFC_SLI_REV4) return; vports = lpfc_create_vport_work_array(phba); - if (vports != NULL) { - for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) { - if (vports[i]->load_flag & FC_UNLOADING) - continue; + if (vports == NULL) + return; - list_for_each_entry_safe(ndlp, next_ndlp, - &vports[i]->fc_nodes, - nlp_listp) { - if (NLP_CHK_NODE_ACT(ndlp)) { - ndlp->nlp_rpi = - lpfc_sli4_alloc_rpi(phba); - lpfc_printf_vlog(ndlp->vport, KERN_INFO, - LOG_NODE, - "0009 rpi:%x DID:%x " - "flg:%x map:%x %p\n", - ndlp->nlp_rpi, - ndlp->nlp_DID, - ndlp->nlp_flag, - ndlp->nlp_usg_map, - ndlp); - } + for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) { + if (vports[i]->load_flag & FC_UNLOADING) + continue; + + list_for_each_entry_safe(ndlp, next_ndlp, + &vports[i]->fc_nodes, + nlp_listp) { + if (!NLP_CHK_NODE_ACT(ndlp)) + continue; + rpi = lpfc_sli4_alloc_rpi(phba); + if (rpi == LPFC_RPI_ALLOC_ERROR) { + spin_lock_irqsave(&phba->ndlp_lock, flags); + NLP_CLR_NODE_ACT(ndlp); + spin_unlock_irqrestore(&phba->ndlp_lock, flags); + continue; } + ndlp->nlp_rpi = rpi; + lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, + "0009 rpi:%x DID:%x " + "flg:%x map:%x %p\n", ndlp->nlp_rpi, + ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->nlp_usg_map, ndlp); } } lpfc_destroy_vport_work_array(phba, vports); @@ -3508,6 +3541,12 @@ lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba) spin_unlock(&phba->scsi_buf_list_put_lock); spin_unlock_irq(&phba->scsi_buf_list_get_lock); + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "6060 Current allocated SCSI xri-sgl count:%d, " + "maximum SCSI xri count:%d (split:%d)\n", + phba->sli4_hba.scsi_xri_cnt, + phba->sli4_hba.scsi_xri_max, phba->cfg_xri_split); + if (phba->sli4_hba.scsi_xri_cnt > phba->sli4_hba.scsi_xri_max) { /* max scsi xri shrinked below the allocated scsi buffers */ scsi_xri_cnt = phba->sli4_hba.scsi_xri_cnt - @@ -4508,9 +4547,15 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc) /* Parse and translate link attention fields */ la = (struct lpfc_mbx_read_top *)&pmb->u.mb.un.varReadTop; la->eventTag = acqe_fc->event_tag; - bf_set(lpfc_mbx_read_top_att_type, la, - LPFC_FC_LA_TYPE_LINK_DOWN); + if (phba->sli4_hba.link_state.status == + LPFC_FC_LA_TYPE_UNEXP_WWPN) { + bf_set(lpfc_mbx_read_top_att_type, la, + LPFC_FC_LA_TYPE_UNEXP_WWPN); + } else { + bf_set(lpfc_mbx_read_top_att_type, la, + LPFC_FC_LA_TYPE_LINK_DOWN); + } /* Invoke the mailbox command callback function */ lpfc_mbx_cmpl_read_topology(phba, pmb); @@ -4716,10 +4761,9 @@ lpfc_sli4_perform_vport_cvl(struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) return 0; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Set the node type */ ndlp->nlp_type |= NLP_FABRIC; /* Put ndlp onto node list */ @@ -5778,6 +5822,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Initialize the Abort nvme buffer list used by driver */ spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list); + INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); /* Fast-path XRI aborted CQ Event work queue list */ INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue); } @@ -5809,6 +5854,12 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->sli4_hba.lpfc_vfi_blk_list); INIT_LIST_HEAD(&phba->lpfc_vpi_blk_list); + /* Initialize mboxq lists. If the early init routines fail + * these lists need to be correctly initialized. + */ + INIT_LIST_HEAD(&phba->sli.mboxq); + INIT_LIST_HEAD(&phba->sli.mboxq_cmpl); + /* initialize optic_state to 0xFF */ phba->sli4_hba.lnk_info.optic_state = 0xff; @@ -5874,6 +5925,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) "READ_NV, mbxStatus x%x\n", bf_get(lpfc_mqe_command, &mboxq->u.mqe), bf_get(lpfc_mqe_status, &mboxq->u.mqe)); + mempool_free(mboxq, phba->mbox_mem_pool); rc = -EIO; goto out_free_bsmbx; } @@ -6398,7 +6450,7 @@ lpfc_init_sgl_list(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->sli4_hba.lpfc_els_sgl_list); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_els_sgl_list); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_sgl_list); - INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list); + INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); /* els xri-sgl book keeping */ phba->sli4_hba.els_xri_cnt = 0; @@ -7799,7 +7851,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx) /* Create Fast Path FCP WQs */ wqesize = (phba->fcp_embed_io) ? - LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize; + LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize; qdesc = lpfc_sli4_queue_alloc(phba, wqesize, phba->sli4_hba.wq_ecount); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -7830,7 +7882,7 @@ int lpfc_sli4_queue_create(struct lpfc_hba *phba) { struct lpfc_queue *qdesc; - int idx, io_channel, max; + int idx, io_channel; /* * Create HBA Record arrays. @@ -7991,15 +8043,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) if (lpfc_alloc_nvme_wq_cq(phba, idx)) goto out_error; - /* allocate MRQ CQs */ - max = phba->cfg_nvme_io_channel; - if (max < phba->cfg_nvmet_mrq) - max = phba->cfg_nvmet_mrq; - - for (idx = 0; idx < max; idx++) - if (lpfc_alloc_nvme_wq_cq(phba, idx)) - goto out_error; - if (phba->nvmet_support) { for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) { qdesc = lpfc_sli4_queue_alloc(phba, @@ -8221,11 +8264,11 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) /* Release FCP cqs */ lpfc_sli4_release_queues(&phba->sli4_hba.fcp_cq, - phba->cfg_fcp_io_channel); + phba->cfg_fcp_io_channel); /* Release FCP wqs */ lpfc_sli4_release_queues(&phba->sli4_hba.fcp_wq, - phba->cfg_fcp_io_channel); + phba->cfg_fcp_io_channel); /* Release FCP CQ mapping array */ lpfc_sli4_release_queue_map(&phba->sli4_hba.fcp_cq_map); @@ -8571,15 +8614,15 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0528 %s not allocated\n", phba->sli4_hba.mbx_cq ? - "Mailbox WQ" : "Mailbox CQ"); + "Mailbox WQ" : "Mailbox CQ"); rc = -ENOMEM; goto out_destroy; } rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0], - phba->sli4_hba.mbx_cq, - phba->sli4_hba.mbx_wq, - NULL, 0, LPFC_MBOX); + phba->sli4_hba.mbx_cq, + phba->sli4_hba.mbx_wq, + NULL, 0, LPFC_MBOX); if (rc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0529 Failed setup of mailbox WQ/CQ: rc = 0x%x\n", @@ -9934,17 +9977,19 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) { int wait_time = 0; int nvme_xri_cmpl = 1; + int nvmet_xri_cmpl = 1; int fcp_xri_cmpl = 1; int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list); - int nvmet_xri_cmpl = - list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list); if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) fcp_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list); - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { nvme_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_nvme_buf_list); + nvmet_xri_cmpl = + list_empty(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); + } while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl || !nvmet_xri_cmpl) { @@ -9970,9 +10015,12 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) msleep(LPFC_XRI_EXCH_BUSY_WAIT_T1); wait_time += LPFC_XRI_EXCH_BUSY_WAIT_T1; } - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { nvme_xri_cmpl = list_empty( &phba->sli4_hba.lpfc_abts_nvme_buf_list); + nvmet_xri_cmpl = list_empty( + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list); + } if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) fcp_xri_cmpl = list_empty( @@ -9981,8 +10029,6 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list); - nvmet_xri_cmpl = - list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list); } } @@ -10048,9 +10094,14 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Stop kthread signal shall trigger work_done one more time */ kthread_stop(phba->worker_thread); + /* Unset the queues shared with the hardware then release all + * allocated resources. + */ + lpfc_sli4_queue_unset(phba); + lpfc_sli4_queue_destroy(phba); + /* Reset SLI4 HBA FCoE function */ lpfc_pci_function_reset(phba); - lpfc_sli4_queue_destroy(phba); /* Stop the SLI4 device port */ phba->pport->work_port_events = 0; @@ -10306,6 +10357,7 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid) } /* Initialize and populate the iocb list per host */ + error = lpfc_init_iocb_list(phba, LPFC_IOCB_LIST_CNT); if (error) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -11051,7 +11103,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) struct lpfc_hba *phba; struct lpfc_vport *vport = NULL; struct Scsi_Host *shost = NULL; - int error; + int error, cnt; uint32_t cfg_mode, intr_mode; /* Allocate memory for HBA structure */ @@ -11085,12 +11137,15 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) goto out_unset_pci_mem_s4; } - /* Initialize and populate the iocb list per host */ + cnt = phba->cfg_iocb_cnt * 1024; + if (phba->nvmet_support) + cnt += phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq; + /* Initialize and populate the iocb list per host */ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "2821 initialize iocb list %d.\n", - phba->cfg_iocb_cnt*1024); - error = lpfc_init_iocb_list(phba, phba->cfg_iocb_cnt*1024); + "2821 initialize iocb list %d total %d\n", + phba->cfg_iocb_cnt, cnt); + error = lpfc_init_iocb_list(phba, cnt); if (error) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -11177,7 +11232,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) if ((phba->nvmet_support == 0) && (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) { /* Create NVME binding with nvme_fc_transport. This - * ensures the vport is initialized. + * ensures the vport is initialized. If the localport + * create fails, it should not unload the driver to + * support field issues. */ error = lpfc_nvme_create_localport(vport); if (error) { @@ -11185,7 +11242,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) "6004 NVME registration failed, " "error x%x\n", error); - goto out_disable_intr; } } @@ -11984,6 +12040,7 @@ int lpfc_fof_queue_create(struct lpfc_hba *phba) { struct lpfc_queue *qdesc; + uint32_t wqesize; /* Create FOF EQ */ qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.eq_esize, @@ -12004,8 +12061,11 @@ lpfc_fof_queue_create(struct lpfc_hba *phba) phba->sli4_hba.oas_cq = qdesc; /* Create OAS WQ */ - qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.wq_esize, + wqesize = (phba->fcp_embed_io) ? + LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize; + qdesc = lpfc_sli4_queue_alloc(phba, wqesize, phba->sli4_hba.wq_ecount); + if (!qdesc) goto out_error; diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c index a928f5187fa4..ce25a18367b5 100644 --- a/drivers/scsi/lpfc/lpfc_mbox.c +++ b/drivers/scsi/lpfc/lpfc_mbox.c @@ -2083,9 +2083,12 @@ lpfc_request_features(struct lpfc_hba *phba, struct lpfcMboxq *mboxq) if (phba->max_vpi && phba->cfg_enable_npiv) bf_set(lpfc_mbx_rq_ftr_rq_npiv, &mboxq->u.mqe.un.req_ftrs, 1); - if (phba->nvmet_support) + if (phba->nvmet_support) { bf_set(lpfc_mbx_rq_ftr_rq_mrqp, &mboxq->u.mqe.un.req_ftrs, 1); - + /* iaab/iaar NOT set for now */ + bf_set(lpfc_mbx_rq_ftr_rq_iaab, &mboxq->u.mqe.un.req_ftrs, 0); + bf_set(lpfc_mbx_rq_ftr_rq_iaar, &mboxq->u.mqe.un.req_ftrs, 0); + } return; } diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 061626bdf701..8777c2d5f50d 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -361,8 +361,12 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, case NLP_STE_PRLI_ISSUE: case NLP_STE_UNMAPPED_NODE: case NLP_STE_MAPPED_NODE: - /* lpfc_plogi_confirm_nport skips fabric did, handle it here */ - if (!(ndlp->nlp_type & NLP_FABRIC)) { + /* For initiators, lpfc_plogi_confirm_nport skips fabric did. + * For target mode, execute implicit logo. + * Fabric nodes go into NPR. + */ + if (!(ndlp->nlp_type & NLP_FABRIC) && + !(phba->nvmet_support)) { lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL); return 1; diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 0024de1c6c1f..8008c8205fb6 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -401,6 +401,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, struct lpfc_nodelist *ndlp; struct ulp_bde64 *bpl; struct lpfc_dmabuf *bmp; + uint16_t ntype, nstate; /* there are two dma buf in the request, actually there is one and * the second one is just the start address + cmd size. @@ -417,11 +418,26 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, vport = lport->vport; ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id); - if (!ndlp) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6043 Could not find node for DID %x\n", + if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) { + lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, + "6051 DID x%06x not an active rport.\n", pnvme_rport->port_id); - return 1; + return -ENODEV; + } + + /* The remote node has to be a mapped nvme target or an + * unmapped nvme initiator or it's an error. + */ + ntype = ndlp->nlp_type; + nstate = ndlp->nlp_state; + if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) || + (ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) { + lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, + "6088 DID x%06x not ready for " + "IO. State x%x, Type x%x\n", + pnvme_rport->port_id, + ndlp->nlp_state, ndlp->nlp_type); + return -ENODEV; } bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); if (!bmp) { @@ -456,7 +472,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, /* Expand print to include key fields. */ lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, - "6051 ENTER. lport %p, rport %p lsreq%p rqstlen:%d " + "6149 ENTER. lport %p, rport %p lsreq%p rqstlen:%d " "rsplen:%d %pad %pad\n", pnvme_lport, pnvme_rport, pnvme_lsreq, pnvme_lsreq->rqstlen, @@ -745,6 +761,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, struct nvme_fc_cmd_iu *cp; struct lpfc_nvme_rport *rport; struct lpfc_nodelist *ndlp; + struct lpfc_nvme_fcpreq_priv *freqpriv; unsigned long flags; uint32_t code; uint16_t cid, sqhd, data; @@ -772,9 +789,8 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, ndlp = rport->ndlp; if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, - "6061 rport %p, ndlp %p, DID x%06x ndlp " - "not ready.\n", - rport, ndlp, rport->remoteport->port_id); + "6061 rport %p, DID x%06x node not ready.\n", + rport, rport->remoteport->port_id); ndlp = lpfc_findnode_did(vport, rport->remoteport->port_id); if (!ndlp) { @@ -853,15 +869,18 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, break; lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR, "6081 NVME Completion Protocol Error: " - "status x%x result x%x placed x%x\n", + "xri %x status x%x result x%x " + "placed x%x\n", + lpfc_ncmd->cur_iocbq.sli4_xritag, lpfc_ncmd->status, lpfc_ncmd->result, wcqe->total_data_placed); break; default: out_err: lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR, - "6072 NVME Completion Error: " + "6072 NVME Completion Error: xri %x " "status x%x result x%x placed x%x\n", + lpfc_ncmd->cur_iocbq.sli4_xritag, lpfc_ncmd->status, lpfc_ncmd->result, wcqe->total_data_placed); nCmd->transferred_length = 0; @@ -900,6 +919,8 @@ out_err: phba->cpucheck_cmpl_io[lpfc_ncmd->cpu]++; } #endif + freqpriv = nCmd->private; + freqpriv->nvme_buf = NULL; nCmd->done(nCmd); spin_lock_irqsave(&phba->hbalock, flags); @@ -1099,12 +1120,12 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport, first_data_sgl = sgl; lpfc_ncmd->seg_cnt = nCmd->sg_cnt; - if (lpfc_ncmd->seg_cnt > phba->cfg_sg_seg_cnt) { + if (lpfc_ncmd->seg_cnt > phba->cfg_nvme_seg_cnt) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6058 Too many sg segments from " "NVME Transport. Max %d, " "nvmeIO sg_cnt %d\n", - phba->cfg_sg_seg_cnt, + phba->cfg_nvme_seg_cnt, lpfc_ncmd->seg_cnt); lpfc_ncmd->seg_cnt = 0; return 1; @@ -1196,6 +1217,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, struct lpfc_nvme_buf *lpfc_ncmd; struct lpfc_nvme_rport *rport; struct lpfc_nvme_qhandle *lpfc_queue_info; + struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint64_t start = 0; #endif @@ -1274,7 +1296,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, * Do not let the IO hang out forever. There is no midlayer issuing * an abort so inform the FW of the maximum IO pending time. */ - pnvme_fcreq->private = (void *)lpfc_ncmd; + freqpriv->nvme_buf = lpfc_ncmd; lpfc_ncmd->nvmeCmd = pnvme_fcreq; lpfc_ncmd->nrport = rport; lpfc_ncmd->ndlp = ndlp; @@ -1404,6 +1426,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, struct lpfc_nvme_buf *lpfc_nbuf; struct lpfc_iocbq *abts_buf; struct lpfc_iocbq *nvmereq_wqe; + struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private; union lpfc_wqe *abts_wqe; unsigned long flags; int ret_val; @@ -1414,7 +1437,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, phba = vport->phba; /* Announce entry to new IO submit field. */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS, "6002 Abort Request to rport DID x%06x " "for nvme_fc_req %p\n", pnvme_rport->port_id, @@ -1444,7 +1467,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* The remote node has to be ready to send an abort. */ if ((ndlp->nlp_state != NLP_STE_MAPPED_NODE) && !(ndlp->nlp_type & NLP_NVME_TARGET)) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_ABTS, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6048 rport %p, DID x%06x not ready for " "IO. State x%x, Type x%x\n", rport, pnvme_rport->port_id, @@ -1459,27 +1482,28 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* driver queued commands are in process of being flushed */ if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6139 Driver in reset cleanup - flushing " "NVME Req now. hba_flag x%x\n", phba->hba_flag); return; } - lpfc_nbuf = (struct lpfc_nvme_buf *)pnvme_fcreq->private; + lpfc_nbuf = freqpriv->nvme_buf; if (!lpfc_nbuf) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6140 NVME IO req has no matching lpfc nvme " "io buffer. Skipping abort req.\n"); return; } else if (!lpfc_nbuf->nvmeCmd) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6141 lpfc NVME IO req has no nvme_fcreq " "io buffer. Skipping abort req.\n"); return; } + nvmereq_wqe = &lpfc_nbuf->cur_iocbq; /* * The lpfc_nbuf and the mapped nvme_fcreq in the driver's @@ -1490,23 +1514,22 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, */ if (lpfc_nbuf->nvmeCmd != pnvme_fcreq) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6143 NVME req mismatch: " "lpfc_nbuf %p nvmeCmd %p, " - "pnvme_fcreq %p. Skipping Abort\n", + "pnvme_fcreq %p. Skipping Abort xri x%x\n", lpfc_nbuf, lpfc_nbuf->nvmeCmd, - pnvme_fcreq); + pnvme_fcreq, nvmereq_wqe->sli4_xritag); return; } /* Don't abort IOs no longer on the pending queue. */ - nvmereq_wqe = &lpfc_nbuf->cur_iocbq; if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6142 NVME IO req %p not queued - skipping " - "abort req\n", - pnvme_fcreq); + "abort req xri x%x\n", + pnvme_fcreq, nvmereq_wqe->sli4_xritag); return; } @@ -1517,21 +1540,22 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* Outstanding abort is in progress */ if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6144 Outstanding NVME I/O Abort Request " "still pending on nvme_fcreq %p, " - "lpfc_ncmd %p\n", - pnvme_fcreq, lpfc_nbuf); + "lpfc_ncmd %p xri x%x\n", + pnvme_fcreq, lpfc_nbuf, + nvmereq_wqe->sli4_xritag); return; } abts_buf = __lpfc_sli_get_iocbq(phba); if (!abts_buf) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6136 No available abort wqes. Skipping " - "Abts req for nvme_fcreq %p.\n", - pnvme_fcreq); + "Abts req for nvme_fcreq %p xri x%x\n", + pnvme_fcreq, nvmereq_wqe->sli4_xritag); return; } @@ -1580,7 +1604,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, ret_val = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_buf); spin_unlock_irqrestore(&phba->hbalock, flags); if (ret_val == IOCB_ERROR) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6137 Failed abts issue_wqe with status x%x " "for nvme_fcreq %p.\n", ret_val, pnvme_fcreq); @@ -1588,8 +1612,8 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, return; } - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, - "6138 Transport Abort NVME Request Issued for\n" + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS, + "6138 Transport Abort NVME Request Issued for " "ox_id x%x on reqtag x%x\n", nvmereq_wqe->sli4_xritag, abts_buf->iotag); @@ -1618,7 +1642,7 @@ static struct nvme_fc_port_template lpfc_nvme_template = { .local_priv_sz = sizeof(struct lpfc_nvme_lport), .remote_priv_sz = sizeof(struct lpfc_nvme_rport), .lsrqst_priv_sz = 0, - .fcprqst_priv_sz = 0, + .fcprqst_priv_sz = sizeof(struct lpfc_nvme_fcpreq_priv), }; /** @@ -2049,7 +2073,7 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) if (lpfc_test_rrq_active(phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag)) continue; - list_del(&lpfc_ncmd->list); + list_del_init(&lpfc_ncmd->list); found = 1; break; } @@ -2064,7 +2088,7 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) if (lpfc_test_rrq_active( phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag)) continue; - list_del(&lpfc_ncmd->list); + list_del_init(&lpfc_ncmd->list); found = 1; break; } @@ -2092,6 +2116,12 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd) lpfc_ncmd->nonsg_phys = 0; if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) { + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6310 XB release deferred for " + "ox_id x%x on reqtag x%x\n", + lpfc_ncmd->cur_iocbq.sli4_xritag, + lpfc_ncmd->cur_iocbq.iotag); + spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag); lpfc_ncmd->nvmeCmd = NULL; @@ -2142,8 +2172,18 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) nfcp_info.node_name = wwn_to_u64(vport->fc_nodename.u.wwn); nfcp_info.port_name = wwn_to_u64(vport->fc_portname.u.wwn); - /* For now need + 1 to get around NVME transport logic */ - lpfc_nvme_template.max_sgl_segments = phba->cfg_sg_seg_cnt + 1; + /* Limit to LPFC_MAX_NVME_SEG_CNT. + * For now need + 1 to get around NVME transport logic. + */ + if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_INIT, + "6300 Reducing sg segment cnt to %d\n", + LPFC_MAX_NVME_SEG_CNT); + phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT; + } else { + phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; + } + lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; lpfc_nvme_template.max_hw_queues = phba->cfg_nvme_io_channel; /* localport is allocated from the stack, but the registration @@ -2249,12 +2289,23 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) void lpfc_nvme_update_localport(struct lpfc_vport *vport) { +#if (IS_ENABLED(CONFIG_NVME_FC)) struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; localport = vport->localport; + if (!localport) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME, + "6710 Update NVME fail. No localport\n"); + return; + } lport = (struct lpfc_nvme_lport *)localport->private; - + if (!lport) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME, + "6171 Update NVME fail. localP %p, No lport\n", + localport); + return; + } lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME, "6012 Update NVME lport %p did x%x\n", localport, vport->fc_myDID); @@ -2268,7 +2319,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport) lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, "6030 bound lport %p to DID x%06x\n", lport, localport->port_id); - +#endif } int @@ -2409,6 +2460,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) struct lpfc_nvme_lport *lport; struct lpfc_nvme_rport *rport; struct nvme_fc_remote_port *remoteport; + unsigned long wait_tmo; localport = vport->localport; @@ -2451,11 +2503,12 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * before proceeding. This guarantees the transport and driver * have completed the unreg process. */ - ret = wait_for_completion_timeout(&rport->rport_unreg_done, 5); + wait_tmo = msecs_to_jiffies(5000); + ret = wait_for_completion_timeout(&rport->rport_unreg_done, + wait_tmo); if (ret == 0) { lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6169 Unreg nvme wait failed %d\n", - ret); + "6169 Unreg nvme wait timeout\n"); } } return; @@ -2463,7 +2516,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) input_err: #endif lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6168: State error: lport %p, rport%p FCID x%06x\n", + "6168 State error: lport %p, rport%p FCID x%06x\n", vport->localport, ndlp->rport, ndlp->nlp_DID); } @@ -2494,7 +2547,7 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, &phba->sli4_hba.lpfc_abts_nvme_buf_list, list) { if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) { - list_del(&lpfc_ncmd->list); + list_del_init(&lpfc_ncmd->list); lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY; lpfc_ncmd->status = IOSTAT_SUCCESS; spin_unlock( @@ -2510,6 +2563,12 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, rxid, 1); lpfc_sli4_abts_err_handler(phba, ndlp, axri); } + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6311 XRI Aborted xri x%x tag x%x " + "released\n", + xri, lpfc_ncmd->cur_iocbq.iotag); + lpfc_release_nvme_buf(phba, lpfc_ncmd); if (rrq_empty) lpfc_worker_wake_up(phba); @@ -2518,4 +2577,8 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, } spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); spin_unlock_irqrestore(&phba->hbalock, iflag); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6312 XRI Aborted xri x%x not found\n", xri); + } diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index 1347deb8dd6c..ec32f45daa66 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -21,12 +21,7 @@ * included with this package. * ********************************************************************/ -#define LPFC_NVME_MIN_SEGS 16 -#define LPFC_NVME_DEFAULT_SEGS 66 /* 256K IOs - 64 + 2 */ -#define LPFC_NVME_MAX_SEGS 510 -#define LPFC_NVMET_MIN_POSTBUF 16 -#define LPFC_NVMET_DEFAULT_POSTBUF 1024 -#define LPFC_NVMET_MAX_POSTBUF 4096 +#define LPFC_NVME_DEFAULT_SEGS (64 + 1) /* 256K IOs */ #define LPFC_NVME_WQSIZE 256 #define LPFC_NVME_ERSP_LEN 0x20 @@ -102,3 +97,7 @@ struct lpfc_nvme_buf { uint64_t ts_data_nvme; #endif }; + +struct lpfc_nvme_fcpreq_priv { + struct lpfc_nvme_buf *nvme_buf; +}; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index acba1b67e505..94434e621c33 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -71,6 +71,26 @@ static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *, struct lpfc_nvmet_rcv_ctx *, uint32_t, uint16_t); +void +lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp) +{ + unsigned long iflag; + + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, + "6313 NVMET Defer ctx release xri x%x flg x%x\n", + ctxp->oxid, ctxp->flag); + + spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag); + if (ctxp->flag & LPFC_NVMET_CTX_RLS) { + spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock, + iflag); + return; + } + ctxp->flag |= LPFC_NVMET_CTX_RLS; + list_add_tail(&ctxp->list, &phba->sli4_hba.lpfc_abts_nvmet_ctx_list); + spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag); +} + /** * lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response * @phba: Pointer to HBA context object. @@ -139,6 +159,11 @@ lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp, struct lpfc_dmabuf *mp) { if (ctxp) { + if (ctxp->flag) + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6314 rq_post ctx xri x%x flag x%x\n", + ctxp->oxid, ctxp->flag); + if (ctxp->txrdy) { pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy, ctxp->txrdy_phys); @@ -337,39 +362,55 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, #endif ctxp = cmdwqe->context2; + ctxp->flag &= ~LPFC_NVMET_IO_INP; + rsp = &ctxp->ctx.fcp_req; op = rsp->op; - ctxp->flag &= ~LPFC_NVMET_IO_INP; status = bf_get(lpfc_wcqe_c_status, wcqe); result = wcqe->parameter; - if (!phba->targetport) - goto out; + if (phba->targetport) + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + else + tgtp = NULL; lpfc_nvmeio_data(phba, "NVMET FCP CMPL: xri x%x op x%x status x%x\n", ctxp->oxid, op, status); - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; if (status) { rsp->fcp_error = NVME_SC_DATA_XFER_ERROR; rsp->transferred_length = 0; - atomic_inc(&tgtp->xmt_fcp_rsp_error); + if (tgtp) + atomic_inc(&tgtp->xmt_fcp_rsp_error); + + /* pick up SLI4 exhange busy condition */ + if (bf_get(lpfc_wcqe_c_xb, wcqe)) { + ctxp->flag |= LPFC_NVMET_XBUSY; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6315 IO Cmpl XBUSY: xri x%x: %x/%x\n", + ctxp->oxid, status, result); + } else { + ctxp->flag &= ~LPFC_NVMET_XBUSY; + } + } else { rsp->fcp_error = NVME_SC_SUCCESS; if (op == NVMET_FCOP_RSP) rsp->transferred_length = rsp->rsplen; else rsp->transferred_length = rsp->transfer_length; - atomic_inc(&tgtp->xmt_fcp_rsp_cmpl); + if (tgtp) + atomic_inc(&tgtp->xmt_fcp_rsp_cmpl); } -out: if ((op == NVMET_FCOP_READDATA_RSP) || (op == NVMET_FCOP_RSP)) { /* Sanity check */ ctxp->state = LPFC_NVMET_STE_DONE; ctxp->entry_cnt++; + #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (phba->ktime_on) { if (rsp->op == NVMET_FCOP_READDATA_RSP) { @@ -408,9 +449,7 @@ out: if (phba->ktime_on) lpfc_nvmet_ktime(phba, ctxp); #endif - /* Let Abort cmpl repost the context */ - if (!(ctxp->flag & LPFC_NVMET_ABORT_OP)) - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + /* lpfc_nvmet_xmt_fcp_release() will recycle the context */ } else { ctxp->entry_cnt++; start_clean = offsetof(struct lpfc_iocbq, wqe); @@ -519,7 +558,6 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; struct lpfc_iocbq *nvmewqeq; - unsigned long iflags; int rc; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -544,32 +582,12 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, } #endif - if (rsp->op == NVMET_FCOP_ABORT) { - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6103 Abort op: oxri x%x %d cnt %d\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); - - lpfc_nvmeio_data(phba, "NVMET FCP ABRT: " - "xri x%x state x%x cnt x%x\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); - - atomic_inc(&lpfc_nvmep->xmt_fcp_abort); - ctxp->entry_cnt++; - ctxp->flag |= LPFC_NVMET_ABORT_OP; - if (ctxp->flag & LPFC_NVMET_IO_INP) - lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid, - ctxp->oxid); - else - lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, - ctxp->oxid); - return 0; - } - /* Sanity check */ - if (ctxp->state == LPFC_NVMET_STE_ABORT) { + if ((ctxp->flag & LPFC_NVMET_ABTS_RCV) || + (ctxp->state == LPFC_NVMET_STE_ABORT)) { atomic_inc(&lpfc_nvmep->xmt_fcp_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6102 Bad state IO x%x aborted\n", + "6102 IO xri x%x aborted\n", ctxp->oxid); rc = -ENXIO; goto aerr; @@ -594,10 +612,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n", ctxp->oxid, rsp->op, rsp->rsplen); - /* For now we take hbalock */ - spin_lock_irqsave(&phba->hbalock, iflags); rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq); - spin_unlock_irqrestore(&phba->hbalock, iflags); if (rc == WQE_SUCCESS) { ctxp->flag |= LPFC_NVMET_IO_INP; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -634,10 +649,79 @@ lpfc_nvmet_targetport_delete(struct nvmet_fc_target_port *targetport) complete(&tport->tport_unreg_done); } +static void +lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *req) +{ + struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; + struct lpfc_nvmet_rcv_ctx *ctxp = + container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_hba *phba = ctxp->phba; + unsigned long flags; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6103 Abort op: oxri x%x flg x%x cnt %d\n", + ctxp->oxid, ctxp->flag, ctxp->entry_cnt); + + lpfc_nvmeio_data(phba, "NVMET FCP ABRT: " + "xri x%x flg x%x cnt x%x\n", + ctxp->oxid, ctxp->flag, ctxp->entry_cnt); + + atomic_inc(&lpfc_nvmep->xmt_fcp_abort); + ctxp->entry_cnt++; + spin_lock_irqsave(&ctxp->ctxlock, flags); + + /* Since iaab/iaar are NOT set, we need to check + * if the firmware is in process of aborting IO + */ + if (ctxp->flag & LPFC_NVMET_XBUSY) { + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + return; + } + ctxp->flag |= LPFC_NVMET_ABORT_OP; + if (ctxp->flag & LPFC_NVMET_IO_INP) + lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid, + ctxp->oxid); + else + lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, + ctxp->oxid); + spin_unlock_irqrestore(&ctxp->ctxlock, flags); +} + +static void +lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *rsp) +{ + struct lpfc_nvmet_rcv_ctx *ctxp = + container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_hba *phba = ctxp->phba; + unsigned long flags; + bool aborting = false; + + spin_lock_irqsave(&ctxp->ctxlock, flags); + if ((ctxp->flag & LPFC_NVMET_ABORT_OP) || + (ctxp->flag & LPFC_NVMET_XBUSY)) { + aborting = true; + /* let the abort path do the real release */ + lpfc_nvmet_defer_release(phba, ctxp); + } + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + + lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d\n", ctxp->oxid, + ctxp->state, 0); + + if (aborting) + return; + + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); +} + static struct nvmet_fc_target_template lpfc_tgttemplate = { .targetport_delete = lpfc_nvmet_targetport_delete, .xmt_ls_rsp = lpfc_nvmet_xmt_ls_rsp, .fcp_op = lpfc_nvmet_xmt_fcp_op, + .fcp_abort = lpfc_nvmet_xmt_fcp_abort, + .fcp_req_release = lpfc_nvmet_xmt_fcp_release, .max_hw_queues = 1, .max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS, @@ -666,10 +750,23 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn); pinfo.port_id = vport->fc_myDID; + /* Limit to LPFC_MAX_NVME_SEG_CNT. + * For now need + 1 to get around NVME transport logic. + */ + if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) { + lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT, + "6400 Reducing sg segment cnt to %d\n", + LPFC_MAX_NVME_SEG_CNT); + phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT; + } else { + phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; + } + lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel; - lpfc_tgttemplate.max_sgl_segments = phba->cfg_sg_seg_cnt; lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED; + NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | + NVMET_FCTGTFEAT_CMD_IN_ISR | + NVMET_FCTGTFEAT_OPDONE_IN_ISR; #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate, @@ -750,7 +847,120 @@ void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, struct sli4_wcqe_xri_aborted *axri) { - /* TODO: work in progress */ + uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri); + uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri); + struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct lpfc_nodelist *ndlp; + unsigned long iflag = 0; + int rrq_empty = 0; + bool released = false; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6317 XB aborted xri x%x rxid x%x\n", xri, rxid); + + if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) + return; + spin_lock_irqsave(&phba->hbalock, iflag); + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + if (ctxp->rqb_buffer->sglq->sli4_xritag != xri) + continue; + + /* Check if we already received a free context call + * and we have completed processing an abort situation. + */ + if (ctxp->flag & LPFC_NVMET_CTX_RLS && + !(ctxp->flag & LPFC_NVMET_ABORT_OP)) { + list_del(&ctxp->list); + released = true; + } + ctxp->flag &= ~LPFC_NVMET_XBUSY; + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + + rrq_empty = list_empty(&phba->active_rrq_list); + spin_unlock_irqrestore(&phba->hbalock, iflag); + ndlp = lpfc_findnode_did(phba->pport, ctxp->sid); + if (ndlp && NLP_CHK_NODE_ACT(ndlp) && + (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE || + ndlp->nlp_state == NLP_STE_MAPPED_NODE)) { + lpfc_set_rrq_active(phba, ndlp, + ctxp->rqb_buffer->sglq->sli4_lxritag, + rxid, 1); + lpfc_sli4_abts_err_handler(phba, ndlp, axri); + } + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6318 XB aborted %x flg x%x (%x)\n", + ctxp->oxid, ctxp->flag, released); + if (released) + lpfc_nvmet_rq_post(phba, ctxp, + &ctxp->rqb_buffer->hbuf); + if (rrq_empty) + lpfc_worker_wake_up(phba); + return; + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&phba->hbalock, iflag); +} + +int +lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr) + +{ +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) + struct lpfc_hba *phba = vport->phba; + struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct nvmefc_tgt_fcp_req *rsp; + uint16_t xri; + unsigned long iflag = 0; + + xri = be16_to_cpu(fc_hdr->fh_ox_id); + + spin_lock_irqsave(&phba->hbalock, iflag); + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + if (ctxp->rqb_buffer->sglq->sli4_xritag != xri) + continue; + + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&phba->hbalock, iflag); + + spin_lock_irqsave(&ctxp->ctxlock, iflag); + ctxp->flag |= LPFC_NVMET_ABTS_RCV; + spin_unlock_irqrestore(&ctxp->ctxlock, iflag); + + lpfc_nvmeio_data(phba, + "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n", + xri, smp_processor_id(), 0); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6319 NVMET Rcv ABTS:acc xri x%x\n", xri); + + rsp = &ctxp->ctx.fcp_req; + nvmet_fc_rcv_fcp_abort(phba->targetport, rsp); + + /* Respond with BA_ACC accordingly */ + lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 1); + return 0; + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&phba->hbalock, iflag); + + lpfc_nvmeio_data(phba, "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n", + xri, smp_processor_id(), 1); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6320 NVMET Rcv ABTS:rjt xri x%x\n", xri); + + /* Respond with BA_RJT accordingly */ + lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 0); +#endif + return 0; } void @@ -940,6 +1150,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, ctxp->rqb_buffer = nvmebuf; ctxp->entry_cnt = 1; ctxp->flag = 0; + spin_lock_init(&ctxp->ctxlock); #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (phba->ktime_on) { @@ -962,8 +1173,8 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, } #endif - lpfc_nvmeio_data(phba, "NVMET FCP RCV: xri x%x sz %d from %06x\n", - oxid, size, sid); + lpfc_nvmeio_data(phba, "NVMET FCP RCV: xri x%x sz %d CPU %02x\n", + oxid, size, smp_processor_id()); atomic_inc(&tgtp->rcv_fcp_cmd_in); /* @@ -1237,11 +1448,11 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, return NULL; } - if (rsp->sg_cnt > phba->cfg_sg_seg_cnt) { + if (rsp->sg_cnt > phba->cfg_nvme_seg_cnt) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6109 lpfc_nvmet_prep_fcp_wqe: seg cnt err: " - "NPORT x%x oxid:x%x\n", - ctxp->sid, ctxp->oxid); + "NPORT x%x oxid:x%x cnt %d\n", + ctxp->sid, ctxp->oxid, phba->cfg_nvme_seg_cnt); return NULL; } @@ -1593,6 +1804,8 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; uint32_t status, result; + unsigned long flags; + bool released = false; ctxp = cmdwqe->context2; status = bf_get(lpfc_wcqe_c_status, wcqe); @@ -1601,21 +1814,46 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; atomic_inc(&tgtp->xmt_abort_cmpl); + ctxp->state = LPFC_NVMET_STE_DONE; + + /* Check if we already received a free context call + * and we have completed processing an abort situation. + */ + spin_lock_irqsave(&ctxp->ctxlock, flags); + if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && + !(ctxp->flag & LPFC_NVMET_XBUSY)) { + list_del(&ctxp->list); + released = true; + } + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, - "6165 Abort cmpl: xri x%x WCQE: %08x %08x %08x %08x\n", - ctxp->oxid, wcqe->word0, wcqe->total_data_placed, + "6165 ABORT cmpl: xri x%x flg x%x (%d) " + "WCQE: %08x %08x %08x %08x\n", + ctxp->oxid, ctxp->flag, released, + wcqe->word0, wcqe->total_data_placed, result, wcqe->word3); - ctxp->state = LPFC_NVMET_STE_DONE; - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + /* + * if transport has released ctx, then can reuse it. Otherwise, + * will be recycled by transport release call. + */ + if (released) + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); cmdwqe->context2 = NULL; cmdwqe->context3 = NULL; lpfc_sli_release_iocbq(phba, cmdwqe); + + /* Since iaab/iaar are NOT set, there is no work left. + * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted + * should have been called already. + */ } /** - * lpfc_nvmet_xmt_fcp_abort_cmp - Completion handler for ABTS + * lpfc_nvmet_unsol_fcp_abort_cmp - Completion handler for ABTS * @phba: Pointer to HBA context object. * @cmdwqe: Pointer to driver command WQE object. * @wcqe: Pointer to driver response CQE object. @@ -1625,12 +1863,14 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, * The function frees memory resources used for the NVME commands. **/ static void -lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - struct lpfc_wcqe_complete *wcqe) +lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe) { struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; + unsigned long flags; uint32_t status, result; + bool released = false; ctxp = cmdwqe->context2; status = bf_get(lpfc_wcqe_c_status, wcqe); @@ -1639,23 +1879,55 @@ lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; atomic_inc(&tgtp->xmt_abort_cmpl); + if (!ctxp) { + /* if context is clear, related io alrady complete */ + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6070 ABTS cmpl: WCQE: %08x %08x %08x %08x\n", + wcqe->word0, wcqe->total_data_placed, + result, wcqe->word3); + return; + } + + /* Sanity check */ + if (ctxp->state != LPFC_NVMET_STE_ABORT) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, + "6112 ABTS Wrong state:%d oxid x%x\n", + ctxp->state, ctxp->oxid); + } + + /* Check if we already received a free context call + * and we have completed processing an abort situation. + */ + ctxp->state = LPFC_NVMET_STE_DONE; + spin_lock_irqsave(&ctxp->ctxlock, flags); + if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && + !(ctxp->flag & LPFC_NVMET_XBUSY)) { + list_del(&ctxp->list); + released = true; + } + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6070 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n", - ctxp, wcqe->word0, wcqe->total_data_placed, + "6316 ABTS cmpl xri x%x flg x%x (%x) " + "WCQE: %08x %08x %08x %08x\n", + ctxp->oxid, ctxp->flag, released, + wcqe->word0, wcqe->total_data_placed, result, wcqe->word3); - - if (ctxp) { - /* Sanity check */ - if (ctxp->state != LPFC_NVMET_STE_ABORT) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, - "6112 ABORT Wrong state:%d oxid x%x\n", - ctxp->state, ctxp->oxid); - } - ctxp->state = LPFC_NVMET_STE_DONE; + /* + * if transport has released ctx, then can reuse it. Otherwise, + * will be recycled by transport release call. + */ + if (released) lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); - cmdwqe->context2 = NULL; - cmdwqe->context3 = NULL; - } + + cmdwqe->context2 = NULL; + cmdwqe->context3 = NULL; + + /* Since iaab/iaar are NOT set, there is no work left. + * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted + * should have been called already. + */ } /** @@ -1708,10 +1980,14 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp; lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6067 Abort: sid %x xri x%x/x%x\n", + "6067 ABTS: sid %x xri x%x/x%x\n", sid, xri, ctxp->wqeq->sli4_xritag); tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + if (!ctxp->wqeq) { + ctxp->wqeq = ctxp->rqb_buffer->iocbq; + ctxp->wqeq->hba_wqidx = 0; + } ndlp = lpfc_findnode_did(phba->pport, sid); if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || @@ -1817,10 +2093,11 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) { atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, - "6160 Drop ABTS - wrong NDLP state x%x.\n", + "6160 Drop ABORT - wrong NDLP state x%x.\n", (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE); /* No failure to an ABTS request. */ + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } @@ -1828,9 +2105,10 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba); if (!ctxp->abort_wqeq) { lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, - "6161 Abort failed: No wqeqs: " + "6161 ABORT failed: No wqeqs: " "xri: x%x\n", ctxp->oxid); /* No failure to an ABTS request. */ + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } abts_wqeq = ctxp->abort_wqeq; @@ -1838,8 +2116,8 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, ctxp->state = LPFC_NVMET_STE_ABORT; /* Announce entry to new IO submit field. */ - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, - "6162 Abort Request to rport DID x%06x " + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6162 ABORT Request to rport DID x%06x " "for xri x%x x%x\n", ctxp->sid, ctxp->oxid, ctxp->wqeq->sli4_xritag); @@ -1855,6 +2133,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, "NVME Req now. hba_flag x%x oxid x%x\n", phba->hba_flag, ctxp->oxid); lpfc_sli_release_iocbq(phba, abts_wqeq); + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } @@ -1866,6 +2145,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, "still pending on oxid x%x\n", ctxp->oxid); lpfc_sli_release_iocbq(phba, abts_wqeq); + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } @@ -1913,9 +2193,10 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, if (rc == WQE_SUCCESS) return 0; + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; lpfc_sli_release_iocbq(phba, abts_wqeq); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME, - "6166 Failed abts issue_wqe with status x%x " + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, + "6166 Failed ABORT issue_wqe with status x%x " "for oxid x%x.\n", rc, ctxp->oxid); return 1; @@ -1944,8 +2225,8 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, spin_lock_irqsave(&phba->hbalock, flags); abts_wqeq = ctxp->wqeq; - abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_fcp_abort_cmp; - abts_wqeq->iocb_cmpl = 0; + abts_wqeq->wqe_cmpl = lpfc_nvmet_unsol_fcp_abort_cmp; + abts_wqeq->iocb_cmpl = NULL; abts_wqeq->iocb_flag |= LPFC_IO_NVMET; rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq); spin_unlock_irqrestore(&phba->hbalock, flags); @@ -1955,7 +2236,7 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, } aerr: - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, "6135 Failed to Issue ABTS for oxid x%x. Status x%x\n", diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index ca96f05c1604..128759fe6650 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -21,9 +21,7 @@ * included with this package. * ********************************************************************/ -#define LPFC_NVMET_MIN_SEGS 16 -#define LPFC_NVMET_DEFAULT_SEGS 64 /* 256K IOs */ -#define LPFC_NVMET_MAX_SEGS 510 +#define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */ #define LPFC_NVMET_SUCCESS_LEN 12 /* Used for NVME Target */ @@ -77,10 +75,12 @@ struct lpfc_nvmet_rcv_ctx { struct nvmefc_tgt_ls_req ls_req; struct nvmefc_tgt_fcp_req fcp_req; } ctx; + struct list_head list; struct lpfc_hba *phba; struct lpfc_iocbq *wqeq; struct lpfc_iocbq *abort_wqeq; dma_addr_t txrdy_phys; + spinlock_t ctxlock; /* protect flag access */ uint32_t *txrdy; uint32_t sid; uint32_t offset; @@ -97,8 +97,11 @@ struct lpfc_nvmet_rcv_ctx { #define LPFC_NVMET_STE_RSP 4 #define LPFC_NVMET_STE_DONE 5 uint16_t flag; -#define LPFC_NVMET_IO_INP 1 -#define LPFC_NVMET_ABORT_OP 2 +#define LPFC_NVMET_IO_INP 0x1 /* IO is in progress on exchange */ +#define LPFC_NVMET_ABORT_OP 0x2 /* Abort WQE issued on exchange */ +#define LPFC_NVMET_XBUSY 0x4 /* XB bit set on IO cmpl */ +#define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */ +#define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */ struct rqb_dmabuf *rqb_buffer; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 1c9fa45df7eb..cf19f4976f5f 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -6338,7 +6338,7 @@ lpfc_sli4_get_allocated_extnts(struct lpfc_hba *phba, uint16_t type, } /** - * lpfc_sli4_repost_sgl_list - Repsot the buffers sgl pages as block + * lpfc_sli4_repost_sgl_list - Repost the buffers sgl pages as block * @phba: pointer to lpfc hba data structure. * @pring: Pointer to driver SLI ring object. * @sgl_list: linked link of sgl buffers to post @@ -13758,7 +13758,10 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue) lpfc_free_rq_buffer(queue->phba, queue); kfree(queue->rqbp); } - kfree(queue->pring); + + if (!list_empty(&queue->wq_list)) + list_del(&queue->wq_list); + kfree(queue); return; } @@ -14738,6 +14741,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, case LPFC_Q_CREATE_VERSION_1: bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1, wq->entry_count); + bf_set(lpfc_mbox_hdr_version, &shdr->request, + LPFC_Q_CREATE_VERSION_1); + switch (wq->entry_size) { default: case 64: @@ -15561,6 +15567,8 @@ lpfc_wq_destroy(struct lpfc_hba *phba, struct lpfc_queue *wq) } /* Remove wq from any list */ list_del_init(&wq->list); + kfree(wq->pring); + wq->pring = NULL; mempool_free(mbox, wq->phba->mbox_mem_pool); return status; } @@ -16513,7 +16521,7 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba, * This function sends a basic response to a previous unsol sequence abort * event after aborting the sequence handling. **/ -static void +void lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, struct fc_frame_header *fc_hdr, bool aborted) { @@ -16534,14 +16542,13 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, ndlp = lpfc_findnode_did(vport, sid); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, sid); if (!ndlp) { lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, "1268 Failed to allocate ndlp for " "oxid:x%x SID:x%x\n", oxid, sid); return; } - lpfc_nlp_init(vport, ndlp, sid); /* Put ndlp onto pport node list */ lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { @@ -16690,6 +16697,11 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport, } lpfc_in_buf_free(phba, &dmabuf->dbuf); + if (phba->nvmet_support) { + lpfc_nvmet_rcv_unsol_abort(vport, &fc_hdr); + return; + } + /* Respond with BA_ACC or BA_RJT accordingly */ lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted); } diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 710458cf11d6..da46471337c8 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -620,7 +620,7 @@ struct lpfc_sli4_hba { struct list_head lpfc_els_sgl_list; struct list_head lpfc_abts_els_sgl_list; struct list_head lpfc_nvmet_sgl_list; - struct list_head lpfc_abts_nvmet_sgl_list; + struct list_head lpfc_abts_nvmet_ctx_list; struct list_head lpfc_abts_scsi_buf_list; struct list_head lpfc_abts_nvme_buf_list; struct lpfc_sglq **lpfc_sglq_active_list; diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index d4e95e28f4e3..1c26dc67151b 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "11.2.0.10" +#define LPFC_DRIVER_VERSION "11.2.0.12" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index 9a0339dbc024..c714482bf4c5 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -738,10 +738,9 @@ lpfc_vport_delete(struct fc_vport *fc_vport) ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, allocate one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) goto skip_logo; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Indicate free memory when release */ NLP_SET_FREE_REQ(ndlp); } else { diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index 6903f03c88af..8a1b94816419 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -477,7 +477,7 @@ static void _set_error_resid(struct osd_request *or, struct request *req, int error) { or->async_error = error; - or->req_errors = req->errors ? : error; + or->req_errors = scsi_req(req)->result ? : error; or->sense_len = scsi_req(req)->sense_len; if (or->sense_len) memcpy(or->sense, scsi_req(req)->sense, or->sense_len); @@ -489,7 +489,10 @@ static void _set_error_resid(struct osd_request *or, struct request *req, int osd_execute_request(struct osd_request *or) { - int error = blk_execute_rq(or->request->q, NULL, or->request, 0); + int error; + + blk_execute_rq(or->request->q, NULL, or->request, 0); + error = scsi_req(or->request)->result ? -EIO : 0; _set_error_resid(or, or->request, error); return error; @@ -1602,7 +1605,7 @@ static int _init_blk_request(struct osd_request *or, req->rq_flags |= RQF_QUIET; req->timeout = or->timeout; - req->retries = or->retries; + scsi_req(req)->retries = or->retries; if (has_out) { or->out.req = req; diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index c47f4b349bac..67cbed92f07d 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -327,7 +327,7 @@ static void osst_end_async(struct request *req, int update) struct osst_tape *STp = SRpnt->stp; struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data; - STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors; + STp->buffer->cmdstat.midlevel_result = SRpnt->result = rq->result; #if DEBUG STp->write_pending = 0; #endif @@ -414,7 +414,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd, memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ memcpy(rq->cmd, cmd, rq->cmd_len); req->timeout = timeout; - req->retries = retries; + rq->retries = retries; req->end_io_data = SRpnt; blk_execute_rq_nowait(req->q, NULL, req, 1, osst_end_async); diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c index ed58b9104f58..e10b91cc3c62 100644 --- a/drivers/scsi/qedf/qedf_fip.c +++ b/drivers/scsi/qedf/qedf_fip.c @@ -99,7 +99,8 @@ static void qedf_fcoe_process_vlan_resp(struct qedf_ctx *qedf, qedf_set_vlan_id(qedf, vid); /* Inform waiter that it's ok to call fcoe_ctlr_link up() */ - complete(&qedf->fipvlan_compl); + if (!completion_done(&qedf->fipvlan_compl)) + complete(&qedf->fipvlan_compl); } } diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 8e2a160490e6..cceddd995a4b 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2803,6 +2803,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) atomic_set(&qedf->num_offloads, 0); qedf->stop_io_on_error = false; pci_set_drvdata(pdev, qedf); + init_completion(&qedf->fipvlan_compl); QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO, "QLogic FastLinQ FCoE Module qedf %s, " diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 84c9098cc089..b6e40fd4c3c1 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -2553,13 +2553,13 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job) ql_log(ql_log_warn, vha, 0x7089, "mbx abort_command " "failed.\n"); - bsg_job->req->errors = + scsi_req(bsg_job->req)->result = bsg_reply->result = -EIO; } else { ql_dbg(ql_dbg_user, vha, 0x708a, "mbx abort_command " "success.\n"); - bsg_job->req->errors = + scsi_req(bsg_job->req)->result = bsg_reply->result = 0; } spin_lock_irqsave(&ha->hardware_lock, flags); @@ -2570,7 +2570,7 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job) } spin_unlock_irqrestore(&ha->hardware_lock, flags); ql_log(ql_log_info, vha, 0x708b, "SRB not found to abort.\n"); - bsg_job->req->errors = bsg_reply->result = -ENXIO; + scsi_req(bsg_job->req)->result = bsg_reply->result = -ENXIO; return 0; done: diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 3e7011757c82..83d61d2142e9 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1160,8 +1160,13 @@ static inline uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha) { struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; + struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82; - return ((RD_REG_DWORD(®->host_status)) == ISP_REG_DISCONNECT); + if (IS_P3P_TYPE(ha)) + return ((RD_REG_DWORD(®82->host_int)) == ISP_REG_DISCONNECT); + else + return ((RD_REG_DWORD(®->host_status)) == + ISP_REG_DISCONNECT); } /************************************************************************** diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c new file mode 100644 index 000000000000..a97c9507103d --- /dev/null +++ b/drivers/scsi/scsi_debugfs.c @@ -0,0 +1,13 @@ +#include <linux/seq_file.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_dbg.h> +#include "scsi_debugfs.h" + +void scsi_show_rq(struct seq_file *m, struct request *rq) +{ + struct scsi_cmnd *cmd = container_of(scsi_req(rq), typeof(*cmd), req); + char buf[80]; + + __scsi_format_command(buf, sizeof(buf), cmd->cmnd, cmd->cmd_len); + seq_printf(m, ", .cmd=%s", buf); +} diff --git a/drivers/scsi/scsi_debugfs.h b/drivers/scsi/scsi_debugfs.h new file mode 100644 index 000000000000..951b043e82d0 --- /dev/null +++ b/drivers/scsi/scsi_debugfs.h @@ -0,0 +1,4 @@ +struct request; +struct seq_file; + +void scsi_show_rq(struct seq_file *m, struct request *rq); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index f2cafae150bc..2db412dd4b44 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1988,7 +1988,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev) req->rq_flags |= RQF_QUIET; req->timeout = 10 * HZ; - req->retries = 5; + rq->retries = 5; blk_execute_rq_nowait(req->q, NULL, req, 1, eh_lock_door_done); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e5a2d590a104..1c3e87d6c48f 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -34,6 +34,7 @@ #include <trace/events/scsi.h> +#include "scsi_debugfs.h" #include "scsi_priv.h" #include "scsi_logging.h" @@ -229,8 +230,8 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) * @rq_flags: flags for ->rq_flags * @resid: optional residual length * - * returns the req->errors value which is the scsi_cmnd result - * field. + * Returns the scsi_cmnd result field if a command was executed, or a negative + * Linux error code if we didn't get that far. */ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, @@ -256,7 +257,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, rq->cmd_len = COMMAND_SIZE(cmd[0]); memcpy(rq->cmd, cmd, rq->cmd_len); - req->retries = retries; + rq->retries = retries; req->timeout = timeout; req->cmd_flags |= flags; req->rq_flags |= rq_flags | RQF_QUIET | RQF_PREEMPT; @@ -281,7 +282,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE); if (sshdr) scsi_normalize_sense(rq->sense, rq->sense_len, sshdr); - ret = req->errors; + ret = rq->result; out: blk_put_request(req); @@ -797,8 +798,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) /* * __scsi_error_from_host_byte may have reset the host_byte */ - req->errors = cmd->result; - + scsi_req(req)->result = cmd->result; scsi_req(req)->resid_len = scsi_get_resid(cmd); if (scsi_bidi_cmnd(cmd)) { @@ -835,7 +835,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) /* * Recovered errors need reporting, but they're always treated as * success, so fiddle the result code here. For passthrough requests - * we already took a copy of the original into rq->errors which + * we already took a copy of the original into sreq->result which * is what gets returned to the user */ if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) { @@ -1061,10 +1061,10 @@ int scsi_init_io(struct scsi_cmnd *cmd) struct scsi_device *sdev = cmd->device; struct request *rq = cmd->request; bool is_mq = (rq->mq_ctx != NULL); - int error; + int error = BLKPREP_KILL; if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq))) - return -EINVAL; + goto err_exit; error = scsi_init_sgtable(rq, &cmd->sdb); if (error) @@ -1177,7 +1177,7 @@ static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req) cmd->cmd_len = scsi_req(req)->cmd_len; cmd->cmnd = scsi_req(req)->cmd; cmd->transfersize = blk_rq_bytes(req); - cmd->allowed = req->retries; + cmd->allowed = scsi_req(req)->retries; return BLKPREP_OK; } @@ -1281,7 +1281,7 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret) switch (ret) { case BLKPREP_KILL: case BLKPREP_INVALID: - req->errors = DID_NO_CONNECT << 16; + scsi_req(req)->result = DID_NO_CONNECT << 16; /* release the command and kill it */ if (req->special) { struct scsi_cmnd *cmd = req->special; @@ -1905,7 +1905,7 @@ static int scsi_mq_prep_fn(struct request *req) static void scsi_mq_done(struct scsi_cmnd *cmd) { trace_scsi_dispatch_cmd_done(cmd); - blk_mq_complete_request(cmd->request, cmd->request->errors); + blk_mq_complete_request(cmd->request); } static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, @@ -2154,10 +2154,13 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) return q; } -static struct blk_mq_ops scsi_mq_ops = { +static const struct blk_mq_ops scsi_mq_ops = { .queue_rq = scsi_queue_rq, .complete = scsi_softirq_done, .timeout = scsi_timeout, +#ifdef CONFIG_BLK_DEBUG_FS + .show_rq = scsi_show_rq, +#endif .init_request = scsi_init_request, .exit_request = scsi_exit_request, .map_queues = scsi_map_queues, diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index cdbb293aca08..9fdbd50c31b4 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -184,9 +184,9 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost, blk_rq_bytes(req->next_rq); handler = to_sas_internal(shost->transportt)->f->smp_handler; ret = handler(shost, rphy, req); - req->errors = ret; + scsi_req(req)->result = ret; - blk_end_request_all(req, ret); + blk_end_request_all(req, 0); spin_lock_irq(q->queue_lock); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index fcfeddc79331..0dc95e102e69 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -418,6 +418,46 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(provisioning_mode); +static const char *zeroing_mode[] = { + [SD_ZERO_WRITE] = "write", + [SD_ZERO_WS] = "writesame", + [SD_ZERO_WS16_UNMAP] = "writesame_16_unmap", + [SD_ZERO_WS10_UNMAP] = "writesame_10_unmap", +}; + +static ssize_t +zeroing_mode_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + + return snprintf(buf, 20, "%s\n", zeroing_mode[sdkp->zeroing_mode]); +} + +static ssize_t +zeroing_mode_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct scsi_disk *sdkp = to_scsi_disk(dev); + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (!strncmp(buf, zeroing_mode[SD_ZERO_WRITE], 20)) + sdkp->zeroing_mode = SD_ZERO_WRITE; + else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS], 20)) + sdkp->zeroing_mode = SD_ZERO_WS; + else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS16_UNMAP], 20)) + sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP; + else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS10_UNMAP], 20)) + sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP; + else + return -EINVAL; + + return count; +} +static DEVICE_ATTR_RW(zeroing_mode); + static ssize_t max_medium_access_timeouts_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -496,6 +536,7 @@ static struct attribute *sd_disk_attrs[] = { &dev_attr_app_tag_own.attr, &dev_attr_thin_provisioning.attr, &dev_attr_provisioning_mode.attr, + &dev_attr_zeroing_mode.attr, &dev_attr_max_write_same_blocks.attr, &dev_attr_max_medium_access_timeouts.attr, NULL, @@ -644,26 +685,11 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) unsigned int logical_block_size = sdkp->device->sector_size; unsigned int max_blocks = 0; - q->limits.discard_zeroes_data = 0; - - /* - * When LBPRZ is reported, discard alignment and granularity - * must be fixed to the logical block size. Otherwise the block - * layer will drop misaligned portions of the request which can - * lead to data corruption. If LBPRZ is not set, we honor the - * device preference. - */ - if (sdkp->lbprz) { - q->limits.discard_alignment = 0; - q->limits.discard_granularity = logical_block_size; - } else { - q->limits.discard_alignment = sdkp->unmap_alignment * - logical_block_size; - q->limits.discard_granularity = - max(sdkp->physical_block_size, - sdkp->unmap_granularity * logical_block_size); - } - + q->limits.discard_alignment = + sdkp->unmap_alignment * logical_block_size; + q->limits.discard_granularity = + max(sdkp->physical_block_size, + sdkp->unmap_granularity * logical_block_size); sdkp->provisioning_mode = mode; switch (mode) { @@ -681,19 +707,16 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) case SD_LBP_WS16: max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS16_BLOCKS); - q->limits.discard_zeroes_data = sdkp->lbprz; break; case SD_LBP_WS10: max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS10_BLOCKS); - q->limits.discard_zeroes_data = sdkp->lbprz; break; case SD_LBP_ZERO: max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS10_BLOCKS); - q->limits.discard_zeroes_data = 1; break; } @@ -701,93 +724,122 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); } -/** - * sd_setup_discard_cmnd - unmap blocks on thinly provisioned device - * @sdp: scsi device to operate on - * @rq: Request to prepare - * - * Will issue either UNMAP or WRITE SAME(16) depending on preference - * indicated by target device. - **/ -static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd) +static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) { - struct request *rq = cmd->request; struct scsi_device *sdp = cmd->device; - struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); - sector_t sector = blk_rq_pos(rq); - unsigned int nr_sectors = blk_rq_sectors(rq); - unsigned int len; - int ret; + struct request *rq = cmd->request; + u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9); + u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); + unsigned int data_len = 24; char *buf; - struct page *page; - - sector >>= ilog2(sdp->sector_size) - 9; - nr_sectors >>= ilog2(sdp->sector_size) - 9; - page = alloc_page(GFP_ATOMIC | __GFP_ZERO); - if (!page) + rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + if (!rq->special_vec.bv_page) return BLKPREP_DEFER; + rq->special_vec.bv_offset = 0; + rq->special_vec.bv_len = data_len; + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; - switch (sdkp->provisioning_mode) { - case SD_LBP_UNMAP: - buf = page_address(page); + cmd->cmd_len = 10; + cmd->cmnd[0] = UNMAP; + cmd->cmnd[8] = 24; - cmd->cmd_len = 10; - cmd->cmnd[0] = UNMAP; - cmd->cmnd[8] = 24; + buf = page_address(rq->special_vec.bv_page); + put_unaligned_be16(6 + 16, &buf[0]); + put_unaligned_be16(16, &buf[2]); + put_unaligned_be64(sector, &buf[8]); + put_unaligned_be32(nr_sectors, &buf[16]); - put_unaligned_be16(6 + 16, &buf[0]); - put_unaligned_be16(16, &buf[2]); - put_unaligned_be64(sector, &buf[8]); - put_unaligned_be32(nr_sectors, &buf[16]); + cmd->allowed = SD_MAX_RETRIES; + cmd->transfersize = data_len; + rq->timeout = SD_TIMEOUT; + scsi_req(rq)->resid_len = data_len; - len = 24; - break; + return scsi_init_io(cmd); +} - case SD_LBP_WS16: - cmd->cmd_len = 16; - cmd->cmnd[0] = WRITE_SAME_16; +static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) +{ + struct scsi_device *sdp = cmd->device; + struct request *rq = cmd->request; + u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9); + u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); + u32 data_len = sdp->sector_size; + + rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + if (!rq->special_vec.bv_page) + return BLKPREP_DEFER; + rq->special_vec.bv_offset = 0; + rq->special_vec.bv_len = data_len; + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; + + cmd->cmd_len = 16; + cmd->cmnd[0] = WRITE_SAME_16; + if (unmap) cmd->cmnd[1] = 0x8; /* UNMAP */ - put_unaligned_be64(sector, &cmd->cmnd[2]); - put_unaligned_be32(nr_sectors, &cmd->cmnd[10]); + put_unaligned_be64(sector, &cmd->cmnd[2]); + put_unaligned_be32(nr_sectors, &cmd->cmnd[10]); - len = sdkp->device->sector_size; - break; + cmd->allowed = SD_MAX_RETRIES; + cmd->transfersize = data_len; + rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT; + scsi_req(rq)->resid_len = data_len; - case SD_LBP_WS10: - case SD_LBP_ZERO: - cmd->cmd_len = 10; - cmd->cmnd[0] = WRITE_SAME; - if (sdkp->provisioning_mode == SD_LBP_WS10) - cmd->cmnd[1] = 0x8; /* UNMAP */ - put_unaligned_be32(sector, &cmd->cmnd[2]); - put_unaligned_be16(nr_sectors, &cmd->cmnd[7]); + return scsi_init_io(cmd); +} - len = sdkp->device->sector_size; - break; +static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) +{ + struct scsi_device *sdp = cmd->device; + struct request *rq = cmd->request; + u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9); + u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); + u32 data_len = sdp->sector_size; - default: - ret = BLKPREP_INVALID; - goto out; - } + rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); + if (!rq->special_vec.bv_page) + return BLKPREP_DEFER; + rq->special_vec.bv_offset = 0; + rq->special_vec.bv_len = data_len; + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; - rq->timeout = SD_TIMEOUT; + cmd->cmd_len = 10; + cmd->cmnd[0] = WRITE_SAME; + if (unmap) + cmd->cmnd[1] = 0x8; /* UNMAP */ + put_unaligned_be32(sector, &cmd->cmnd[2]); + put_unaligned_be16(nr_sectors, &cmd->cmnd[7]); - cmd->transfersize = len; cmd->allowed = SD_MAX_RETRIES; + cmd->transfersize = data_len; + rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT; + scsi_req(rq)->resid_len = data_len; - rq->special_vec.bv_page = page; - rq->special_vec.bv_offset = 0; - rq->special_vec.bv_len = len; + return scsi_init_io(cmd); +} - rq->rq_flags |= RQF_SPECIAL_PAYLOAD; - scsi_req(rq)->resid_len = len; +static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) +{ + struct request *rq = cmd->request; + struct scsi_device *sdp = cmd->device; + struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); + u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9); + u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); + + if (!(rq->cmd_flags & REQ_NOUNMAP)) { + switch (sdkp->zeroing_mode) { + case SD_ZERO_WS16_UNMAP: + return sd_setup_write_same16_cmnd(cmd, true); + case SD_ZERO_WS10_UNMAP: + return sd_setup_write_same10_cmnd(cmd, true); + } + } - ret = scsi_init_io(cmd); -out: - if (ret != BLKPREP_OK) - __free_page(page); - return ret; + if (sdp->no_write_same) + return BLKPREP_INVALID; + if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff) + return sd_setup_write_same16_cmnd(cmd, false); + return sd_setup_write_same10_cmnd(cmd, false); } static void sd_config_write_same(struct scsi_disk *sdkp) @@ -816,9 +868,20 @@ static void sd_config_write_same(struct scsi_disk *sdkp) sdkp->max_ws_blocks = 0; } + if (sdkp->lbprz && sdkp->lbpws) + sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP; + else if (sdkp->lbprz && sdkp->lbpws10) + sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP; + else if (sdkp->max_ws_blocks) + sdkp->zeroing_mode = SD_ZERO_WS; + else + sdkp->zeroing_mode = SD_ZERO_WRITE; + out: blk_queue_max_write_same_sectors(q, sdkp->max_ws_blocks * (logical_block_size >> 9)); + blk_queue_max_write_zeroes_sectors(q, sdkp->max_ws_blocks * + (logical_block_size >> 9)); } /** @@ -1155,7 +1218,20 @@ static int sd_init_command(struct scsi_cmnd *cmd) switch (req_op(rq)) { case REQ_OP_DISCARD: - return sd_setup_discard_cmnd(cmd); + switch (scsi_disk(rq->rq_disk)->provisioning_mode) { + case SD_LBP_UNMAP: + return sd_setup_unmap_cmnd(cmd); + case SD_LBP_WS16: + return sd_setup_write_same16_cmnd(cmd, true); + case SD_LBP_WS10: + return sd_setup_write_same10_cmnd(cmd, true); + case SD_LBP_ZERO: + return sd_setup_write_same10_cmnd(cmd, false); + default: + return BLKPREP_INVALID; + } + case REQ_OP_WRITE_ZEROES: + return sd_setup_write_zeroes_cmnd(cmd); case REQ_OP_WRITE_SAME: return sd_setup_write_same_cmnd(cmd); case REQ_OP_FLUSH: @@ -1795,6 +1871,7 @@ static int sd_done(struct scsi_cmnd *SCpnt) switch (req_op(req)) { case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_SAME: case REQ_OP_ZONE_RESET: if (!result) { @@ -2102,6 +2179,22 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, #define READ_CAPACITY_RETRIES_ON_RESET 10 +/* + * Ensure that we don't overflow sector_t when CONFIG_LBDAF is not set + * and the reported logical block size is bigger than 512 bytes. Note + * that last_sector is a u64 and therefore logical_to_sectors() is not + * applicable. + */ +static bool sd_addressable_capacity(u64 lba, unsigned int sector_size) +{ + u64 last_sector = (lba + 1ULL) << (ilog2(sector_size) - 9); + + if (sizeof(sector_t) == 4 && last_sector > U32_MAX) + return false; + + return true; +} + static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, unsigned char *buffer) { @@ -2167,7 +2260,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, return -ENODEV; } - if ((sizeof(sdkp->capacity) == 4) && (lba >= 0xffffffffULL)) { + if (!sd_addressable_capacity(lba, sector_size)) { sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " "kernel compiled with support for large block " "devices.\n"); @@ -2256,7 +2349,7 @@ static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, return sector_size; } - if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) { + if (!sd_addressable_capacity(lba, sector_size)) { sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " "kernel compiled with support for large block " "devices.\n"); @@ -2752,7 +2845,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp) sd_config_discard(sdkp, SD_LBP_WS16); } else { /* LBP VPD page tells us what to use */ - if (sdkp->lbpu && sdkp->max_unmap_blocks && !sdkp->lbprz) + if (sdkp->lbpu && sdkp->max_unmap_blocks) sd_config_discard(sdkp, SD_LBP_UNMAP); else if (sdkp->lbpws) sd_config_discard(sdkp, SD_LBP_WS16); @@ -2956,7 +3049,8 @@ static int sd_revalidate_disk(struct gendisk *disk) q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks); } else - rw_max = BLK_DEF_MAX_SECTORS; + rw_max = min_not_zero(logical_to_sectors(sdp, dev_max), + (sector_t)BLK_DEF_MAX_SECTORS); /* Combine with controller limits */ q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q)); diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 4dac35e96a75..a2c4b5c35379 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -59,6 +59,13 @@ enum { SD_LBP_DISABLE, /* Discard disabled due to failed cmd */ }; +enum { + SD_ZERO_WRITE = 0, /* Use WRITE(10/16) command */ + SD_ZERO_WS, /* Use WRITE SAME(10/16) command */ + SD_ZERO_WS16_UNMAP, /* Use WRITE SAME(16) with UNMAP */ + SD_ZERO_WS10_UNMAP, /* Use WRITE SAME(10) with UNMAP */ +}; + struct scsi_disk { struct scsi_driver *driver; /* always &sd_template */ struct scsi_device *device; @@ -89,6 +96,7 @@ struct scsi_disk { u8 write_prot; u8 protection_type;/* Data Integrity Field */ u8 provisioning_mode; + u8 zeroing_mode; unsigned ATO : 1; /* state of disk ATO bit */ unsigned cache_override : 1; /* temp override of WCE,RCD */ unsigned WCE : 1; /* state of disk WCE bit */ diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 92620c8ea8ad..1994f7799fce 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -329,6 +329,7 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, switch (req_op(rq)) { case REQ_OP_WRITE: + case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_SAME: case REQ_OP_ZONE_RESET: diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 225abaad4d1c..0b60245bd740 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -581,7 +581,7 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) sg_io_hdr_t *hp; unsigned char cmnd[SG_MAX_CDB_SIZE]; - if (unlikely(segment_eq(get_fs(), KERNEL_DS))) + if (unlikely(uaccess_kernel())) return -EINVAL; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) @@ -1300,7 +1300,7 @@ sg_rq_end_io(struct request *rq, int uptodate) pr_info("%s: device detaching\n", __func__); sense = req->sense; - result = rq->errors; + result = req->result; resid = req->resid_len; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp, @@ -1718,7 +1718,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) srp->rq = rq; rq->end_io_data = srp; - rq->retries = SG_DEFAULT_RETRIES; + req->retries = SG_DEFAULT_RETRIES; if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return 0; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 0b29b9329b1c..a8f630213a1a 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -836,6 +836,7 @@ static void get_capabilities(struct scsi_cd *cd) unsigned char *buffer; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; + unsigned int ms_len = 128; int rc, n; static const char *loadmech[] = @@ -862,10 +863,11 @@ static void get_capabilities(struct scsi_cd *cd) scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr); /* ask for mode page 0x2a */ - rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128, + rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, ms_len, SR_TIMEOUT, 3, &data, NULL); - if (!scsi_status_is_good(rc)) { + if (!scsi_status_is_good(rc) || data.length > ms_len || + data.header_length + data.block_descriptor_length > data.length) { /* failed, drive doesn't have capabilities mode page */ cd->cdi.speed = 1; cd->cdi.mask |= (CDC_CD_R | CDC_CD_RW | CDC_DVD_R | diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index e5ef78a6848e..1ea34d6f5437 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -480,7 +480,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req) atomic64_add(ktime_to_ns(now), &STp->stats->tot_write_time); atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time); atomic64_inc(&STp->stats->write_cnt); - if (req->errors) { + if (scsi_req(req)->result) { atomic64_add(atomic_read(&STp->stats->last_write_size) - STp->buffer->cmdstat.residual, &STp->stats->write_byte_cnt); @@ -494,7 +494,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req) atomic64_add(ktime_to_ns(now), &STp->stats->tot_read_time); atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time); atomic64_inc(&STp->stats->read_cnt); - if (req->errors) { + if (scsi_req(req)->result) { atomic64_add(atomic_read(&STp->stats->last_read_size) - STp->buffer->cmdstat.residual, &STp->stats->read_byte_cnt); @@ -518,7 +518,7 @@ static void st_scsi_execute_end(struct request *req, int uptodate) struct scsi_tape *STp = SRpnt->stp; struct bio *tmp; - STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors; + STp->buffer->cmdstat.midlevel_result = SRpnt->result = rq->result; STp->buffer->cmdstat.residual = rq->resid_len; st_do_stats(STp, req); @@ -579,7 +579,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, memset(rq->cmd, 0, BLK_MAX_CDB); memcpy(rq->cmd, cmd, rq->cmd_len); req->timeout = timeout; - req->retries = retries; + rq->retries = retries; req->end_io_data = SRpnt; blk_execute_rq_nowait(req->q, NULL, req, 1, st_scsi_execute_end); diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h index 8886458748c1..a676bccabd43 100644 --- a/drivers/staging/lustre/lustre/include/lustre_disk.h +++ b/drivers/staging/lustre/lustre/include/lustre_disk.h @@ -133,13 +133,9 @@ struct lustre_sb_info { struct obd_export *lsi_osd_exp; char lsi_osd_type[16]; char lsi_fstype[16]; - struct backing_dev_info lsi_bdi; /* each client mountpoint needs - * own backing_dev_info - */ }; #define LSI_UMOUNT_FAILOVER 0x00200000 -#define LSI_BDI_INITIALIZED 0x00400000 #define s2lsi(sb) ((struct lustre_sb_info *)((sb)->s_fs_info)) #define s2lsi_nocast(sb) ((sb)->s_fs_info) diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index b229cbc7bb33..d483c44aafe5 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -863,15 +863,6 @@ void ll_lli_init(struct ll_inode_info *lli) mutex_init(&lli->lli_layout_mutex); } -static inline int ll_bdi_register(struct backing_dev_info *bdi) -{ - static atomic_t ll_bdi_num = ATOMIC_INIT(0); - - bdi->name = "lustre"; - return bdi_register(bdi, NULL, "lustre-%d", - atomic_inc_return(&ll_bdi_num)); -} - int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) { struct lustre_profile *lprof = NULL; @@ -881,6 +872,7 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) char *profilenm = get_profile_name(sb); struct config_llog_instance *cfg; int err; + static atomic_t ll_bdi_num = ATOMIC_INIT(0); CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); @@ -903,16 +895,11 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) if (err) goto out_free; - err = bdi_init(&lsi->lsi_bdi); - if (err) - goto out_free; - lsi->lsi_flags |= LSI_BDI_INITIALIZED; - lsi->lsi_bdi.capabilities = 0; - err = ll_bdi_register(&lsi->lsi_bdi); + err = super_setup_bdi_name(sb, "lustre-%d", + atomic_inc_return(&ll_bdi_num)); if (err) goto out_free; - sb->s_bdi = &lsi->lsi_bdi; /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */ sb->s_d_op = &ll_d_ops; @@ -1033,11 +1020,6 @@ void ll_put_super(struct super_block *sb) if (profilenm) class_del_profile(profilenm); - if (lsi->lsi_flags & LSI_BDI_INITIALIZED) { - bdi_destroy(&lsi->lsi_bdi); - lsi->lsi_flags &= ~LSI_BDI_INITIALIZED; - } - ll_free_sbi(sb); lsi->lsi_llsbi = NULL; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index a91802432f2f..e3f9ed3690b7 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -485,8 +485,7 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *); int iscsit_queue_rsp(struct iscsi_conn *conn, struct iscsi_cmd *cmd) { - iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); - return 0; + return iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); } EXPORT_SYMBOL(iscsit_queue_rsp); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index bf40f03755dd..5798810197ec 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -167,10 +167,7 @@ static struct se_tpg_np *lio_target_call_addnptotpg( struct iscsi_portal_group *tpg; struct iscsi_tpg_np *tpg_np; char *str, *str2, *ip_str, *port_str; - struct sockaddr_storage sockaddr; - struct sockaddr_in *sock_in; - struct sockaddr_in6 *sock_in6; - unsigned long port; + struct sockaddr_storage sockaddr = { }; int ret; char buf[MAX_PORTAL_LEN + 1]; @@ -182,21 +179,19 @@ static struct se_tpg_np *lio_target_call_addnptotpg( memset(buf, 0, MAX_PORTAL_LEN + 1); snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name); - memset(&sockaddr, 0, sizeof(struct sockaddr_storage)); - str = strstr(buf, "["); if (str) { - const char *end; - str2 = strstr(str, "]"); if (!str2) { pr_err("Unable to locate trailing \"]\"" " in IPv6 iSCSI network portal address\n"); return ERR_PTR(-EINVAL); } - str++; /* Skip over leading "[" */ + + ip_str = str + 1; /* Skip over leading "[" */ *str2 = '\0'; /* Terminate the unbracketed IPv6 address */ str2++; /* Skip over the \0 */ + port_str = strstr(str2, ":"); if (!port_str) { pr_err("Unable to locate \":port\"" @@ -205,23 +200,8 @@ static struct se_tpg_np *lio_target_call_addnptotpg( } *port_str = '\0'; /* Terminate string for IP */ port_str++; /* Skip over ":" */ - - ret = kstrtoul(port_str, 0, &port); - if (ret < 0) { - pr_err("kstrtoul() failed for port_str: %d\n", ret); - return ERR_PTR(ret); - } - sock_in6 = (struct sockaddr_in6 *)&sockaddr; - sock_in6->sin6_family = AF_INET6; - sock_in6->sin6_port = htons((unsigned short)port); - ret = in6_pton(str, -1, - (void *)&sock_in6->sin6_addr.in6_u, -1, &end); - if (ret <= 0) { - pr_err("in6_pton returned: %d\n", ret); - return ERR_PTR(-EINVAL); - } } else { - str = ip_str = &buf[0]; + ip_str = &buf[0]; port_str = strstr(ip_str, ":"); if (!port_str) { pr_err("Unable to locate \":port\"" @@ -230,17 +210,15 @@ static struct se_tpg_np *lio_target_call_addnptotpg( } *port_str = '\0'; /* Terminate string for IP */ port_str++; /* Skip over ":" */ + } - ret = kstrtoul(port_str, 0, &port); - if (ret < 0) { - pr_err("kstrtoul() failed for port_str: %d\n", ret); - return ERR_PTR(ret); - } - sock_in = (struct sockaddr_in *)&sockaddr; - sock_in->sin_family = AF_INET; - sock_in->sin_port = htons((unsigned short)port); - sock_in->sin_addr.s_addr = in_aton(ip_str); + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, ip_str, + port_str, &sockaddr); + if (ret) { + pr_err("malformed ip/port passed: %s\n", name); + return ERR_PTR(ret); } + tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg); ret = iscsit_get_tpg(tpg); if (ret < 0) @@ -1398,11 +1376,10 @@ static u32 lio_sess_get_initiator_sid( static int lio_queue_data_in(struct se_cmd *se_cmd) { struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + struct iscsi_conn *conn = cmd->conn; cmd->i_state = ISTATE_SEND_DATAIN; - cmd->conn->conn_transport->iscsit_queue_data_in(cmd->conn, cmd); - - return 0; + return conn->conn_transport->iscsit_queue_data_in(conn, cmd); } static int lio_write_pending(struct se_cmd *se_cmd) @@ -1431,16 +1408,14 @@ static int lio_write_pending_status(struct se_cmd *se_cmd) static int lio_queue_status(struct se_cmd *se_cmd) { struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + struct iscsi_conn *conn = cmd->conn; cmd->i_state = ISTATE_SEND_STATUS; if (cmd->se_cmd.scsi_status || cmd->sense_reason) { - iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); - return 0; + return iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); } - cmd->conn->conn_transport->iscsit_queue_status(cmd->conn, cmd); - - return 0; + return conn->conn_transport->iscsit_queue_status(conn, cmd); } static void lio_queue_tm_rsp(struct se_cmd *se_cmd) diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index e65bf78ceef3..fce627628200 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -782,22 +782,6 @@ static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param) if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) SET_PSTATE_REPLY_OPTIONAL(param); /* - * The GlobalSAN iSCSI Initiator for MacOSX does - * not respond to MaxBurstLength, FirstBurstLength, - * DefaultTime2Wait or DefaultTime2Retain parameter keys. - * So, we set them to 'reply optional' here, and assume the - * the defaults from iscsi_parameters.h if the initiator - * is not RFC compliant and the keys are not negotiated. - */ - if (!strcmp(param->name, MAXBURSTLENGTH)) - SET_PSTATE_REPLY_OPTIONAL(param); - if (!strcmp(param->name, FIRSTBURSTLENGTH)) - SET_PSTATE_REPLY_OPTIONAL(param); - if (!strcmp(param->name, DEFAULTTIME2WAIT)) - SET_PSTATE_REPLY_OPTIONAL(param); - if (!strcmp(param->name, DEFAULTTIME2RETAIN)) - SET_PSTATE_REPLY_OPTIONAL(param); - /* * Required for gPXE iSCSI boot client */ if (!strcmp(param->name, MAXCONNECTIONS)) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 5041a9c8bdcb..7d3e2fcc26a0 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -567,7 +567,7 @@ static void iscsit_remove_cmd_from_immediate_queue( } } -void iscsit_add_cmd_to_response_queue( +int iscsit_add_cmd_to_response_queue( struct iscsi_cmd *cmd, struct iscsi_conn *conn, u8 state) @@ -578,7 +578,7 @@ void iscsit_add_cmd_to_response_queue( if (!qr) { pr_err("Unable to allocate memory for" " struct iscsi_queue_req\n"); - return; + return -ENOMEM; } INIT_LIST_HEAD(&qr->qr_list); qr->cmd = cmd; @@ -590,6 +590,7 @@ void iscsit_add_cmd_to_response_queue( spin_unlock_bh(&conn->response_queue_lock); wake_up(&conn->queues_wq); + return 0; } struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *conn) @@ -737,21 +738,23 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) { struct se_cmd *se_cmd = NULL; int rc; + bool op_scsi = false; /* * Determine if a struct se_cmd is associated with * this struct iscsi_cmd. */ switch (cmd->iscsi_opcode) { case ISCSI_OP_SCSI_CMD: - se_cmd = &cmd->se_cmd; - __iscsit_free_cmd(cmd, true, shutdown); + op_scsi = true; /* * Fallthrough */ case ISCSI_OP_SCSI_TMFUNC: - rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); - if (!rc && shutdown && se_cmd && se_cmd->se_sess) { - __iscsit_free_cmd(cmd, true, shutdown); + se_cmd = &cmd->se_cmd; + __iscsit_free_cmd(cmd, op_scsi, shutdown); + rc = transport_generic_free_cmd(se_cmd, shutdown); + if (!rc && shutdown && se_cmd->se_sess) { + __iscsit_free_cmd(cmd, op_scsi, shutdown); target_put_sess_cmd(se_cmd); } break; diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h index 8ff08856516a..9e4197af8708 100644 --- a/drivers/target/iscsi/iscsi_target_util.h +++ b/drivers/target/iscsi/iscsi_target_util.h @@ -31,7 +31,7 @@ extern int iscsit_find_cmd_for_recovery(struct iscsi_session *, struct iscsi_cmd struct iscsi_conn_recovery **, itt_t); extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *, struct iscsi_conn *, u8); extern struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *); -extern void iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8); +extern int iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8); extern struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *); extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_conn *); extern bool iscsit_conn_all_queues_empty(struct iscsi_conn *); diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index fd7c16a7ca6e..fc4a9c303d55 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -197,8 +197,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) /* * Set the ASYMMETRIC ACCESS State */ - buf[off++] |= (atomic_read( - &tg_pt_gp->tg_pt_gp_alua_access_state) & 0xff); + buf[off++] |= tg_pt_gp->tg_pt_gp_alua_access_state & 0xff; /* * Set supported ASYMMETRIC ACCESS State bits */ @@ -710,7 +709,7 @@ target_alua_state_check(struct se_cmd *cmd) spin_lock(&lun->lun_tg_pt_gp_lock); tg_pt_gp = lun->lun_tg_pt_gp; - out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); + out_alua_state = tg_pt_gp->tg_pt_gp_alua_access_state; nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs; // XXX: keeps using tg_pt_gp witout reference after unlock @@ -911,7 +910,7 @@ static int core_alua_write_tpg_metadata( } /* - * Called with tg_pt_gp->tg_pt_gp_md_mutex held + * Called with tg_pt_gp->tg_pt_gp_transition_mutex held */ static int core_alua_update_tpg_primary_metadata( struct t10_alua_tg_pt_gp *tg_pt_gp) @@ -934,7 +933,7 @@ static int core_alua_update_tpg_primary_metadata( "alua_access_state=0x%02x\n" "alua_access_status=0x%02x\n", tg_pt_gp->tg_pt_gp_id, - tg_pt_gp->tg_pt_gp_alua_pending_state, + tg_pt_gp->tg_pt_gp_alua_access_state, tg_pt_gp->tg_pt_gp_alua_access_status); snprintf(path, ALUA_METADATA_PATH_LEN, @@ -1013,93 +1012,41 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) spin_unlock(&tg_pt_gp->tg_pt_gp_lock); } -static void core_alua_do_transition_tg_pt_work(struct work_struct *work) -{ - struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work, - struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work); - struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; - bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == - ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); - - /* - * Update the ALUA metadata buf that has been allocated in - * core_alua_do_port_transition(), this metadata will be written - * to struct file. - * - * Note that there is the case where we do not want to update the - * metadata when the saved metadata is being parsed in userspace - * when setting the existing port access state and access status. - * - * Also note that the failure to write out the ALUA metadata to - * struct file does NOT affect the actual ALUA transition. - */ - if (tg_pt_gp->tg_pt_gp_write_metadata) { - mutex_lock(&tg_pt_gp->tg_pt_gp_md_mutex); - core_alua_update_tpg_primary_metadata(tg_pt_gp); - mutex_unlock(&tg_pt_gp->tg_pt_gp_md_mutex); - } - /* - * Set the current primary ALUA access state to the requested new state - */ - atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, - tg_pt_gp->tg_pt_gp_alua_pending_state); - - pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu" - " from primary access state %s to %s\n", (explicit) ? "explicit" : - "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), - tg_pt_gp->tg_pt_gp_id, - core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_previous_state), - core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state)); - - core_alua_queue_state_change_ua(tg_pt_gp); - - spin_lock(&dev->t10_alua.tg_pt_gps_lock); - atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt); - spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - - if (tg_pt_gp->tg_pt_gp_transition_complete) - complete(tg_pt_gp->tg_pt_gp_transition_complete); -} - static int core_alua_do_transition_tg_pt( struct t10_alua_tg_pt_gp *tg_pt_gp, int new_state, int explicit) { - struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; - DECLARE_COMPLETION_ONSTACK(wait); + int prev_state; + mutex_lock(&tg_pt_gp->tg_pt_gp_transition_mutex); /* Nothing to be done here */ - if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state) + if (tg_pt_gp->tg_pt_gp_alua_access_state == new_state) { + mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex); return 0; + } - if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) + if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) { + mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex); return -EAGAIN; - - /* - * Flush any pending transitions - */ - if (!explicit) - flush_work(&tg_pt_gp->tg_pt_gp_transition_work); + } /* * Save the old primary ALUA access state, and set the current state * to ALUA_ACCESS_STATE_TRANSITION. */ - atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, - ALUA_ACCESS_STATE_TRANSITION); + prev_state = tg_pt_gp->tg_pt_gp_alua_access_state; + tg_pt_gp->tg_pt_gp_alua_access_state = ALUA_ACCESS_STATE_TRANSITION; tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ? ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG : ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA; core_alua_queue_state_change_ua(tg_pt_gp); - if (new_state == ALUA_ACCESS_STATE_TRANSITION) + if (new_state == ALUA_ACCESS_STATE_TRANSITION) { + mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex); return 0; - - tg_pt_gp->tg_pt_gp_alua_previous_state = - atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); - tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; + } /* * Check for the optional ALUA primary state transition delay @@ -1108,19 +1055,36 @@ static int core_alua_do_transition_tg_pt( msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs); /* - * Take a reference for workqueue item + * Set the current primary ALUA access state to the requested new state */ - spin_lock(&dev->t10_alua.tg_pt_gps_lock); - atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt); - spin_unlock(&dev->t10_alua.tg_pt_gps_lock); + tg_pt_gp->tg_pt_gp_alua_access_state = new_state; - schedule_work(&tg_pt_gp->tg_pt_gp_transition_work); - if (explicit) { - tg_pt_gp->tg_pt_gp_transition_complete = &wait; - wait_for_completion(&wait); - tg_pt_gp->tg_pt_gp_transition_complete = NULL; + /* + * Update the ALUA metadata buf that has been allocated in + * core_alua_do_port_transition(), this metadata will be written + * to struct file. + * + * Note that there is the case where we do not want to update the + * metadata when the saved metadata is being parsed in userspace + * when setting the existing port access state and access status. + * + * Also note that the failure to write out the ALUA metadata to + * struct file does NOT affect the actual ALUA transition. + */ + if (tg_pt_gp->tg_pt_gp_write_metadata) { + core_alua_update_tpg_primary_metadata(tg_pt_gp); } + pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu" + " from primary access state %s to %s\n", (explicit) ? "explicit" : + "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), + tg_pt_gp->tg_pt_gp_id, + core_alua_dump_state(prev_state), + core_alua_dump_state(new_state)); + + core_alua_queue_state_change_ua(tg_pt_gp); + + mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex); return 0; } @@ -1685,14 +1649,12 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, } INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list); INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list); - mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex); + mutex_init(&tg_pt_gp->tg_pt_gp_transition_mutex); spin_lock_init(&tg_pt_gp->tg_pt_gp_lock); atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0); - INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work, - core_alua_do_transition_tg_pt_work); tg_pt_gp->tg_pt_gp_dev = dev; - atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, - ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED); + tg_pt_gp->tg_pt_gp_alua_access_state = + ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED; /* * Enable both explicit and implicit ALUA support by default */ @@ -1797,8 +1759,6 @@ void core_alua_free_tg_pt_gp( dev->t10_alua.alua_tg_pt_gps_counter--; spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - flush_work(&tg_pt_gp->tg_pt_gp_transition_work); - /* * Allow a struct t10_alua_tg_pt_gp_member * referenced by * core_alua_get_tg_pt_gp_by_name() in @@ -1938,8 +1898,8 @@ ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page) "Primary Access Status: %s\nTG Port Secondary Access" " State: %s\nTG Port Secondary Access Status: %s\n", config_item_name(tg_pt_ci), tg_pt_gp->tg_pt_gp_id, - core_alua_dump_state(atomic_read( - &tg_pt_gp->tg_pt_gp_alua_access_state)), + core_alua_dump_state( + tg_pt_gp->tg_pt_gp_alua_access_state), core_alua_dump_status( tg_pt_gp->tg_pt_gp_alua_access_status), atomic_read(&lun->lun_tg_pt_secondary_offline) ? diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 38b5025e4c7a..70657fd56440 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -2392,7 +2392,7 @@ static ssize_t target_tg_pt_gp_alua_access_state_show(struct config_item *item, char *page) { return sprintf(page, "%d\n", - atomic_read(&to_tg_pt_gp(item)->tg_pt_gp_alua_access_state)); + to_tg_pt_gp(item)->tg_pt_gp_alua_access_state); } static ssize_t target_tg_pt_gp_alua_access_state_store(struct config_item *item, diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index c754ae33bf7b..d2f089cfa9ae 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -851,7 +851,7 @@ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, attrib->unmap_granularity = q->limits.discard_granularity / block_size; attrib->unmap_granularity_alignment = q->limits.discard_alignment / block_size; - attrib->unmap_zeroes_data = q->limits.discard_zeroes_data; + attrib->unmap_zeroes_data = 0; return true; } EXPORT_SYMBOL(target_configure_unmap_from_queue); diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index d8a16ca6baa5..d1e6cab8e3d3 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -92,6 +92,11 @@ static int target_fabric_mappedlun_link( pr_err("Source se_lun->lun_se_dev does not exist\n"); return -EINVAL; } + if (lun->lun_shutdown) { + pr_err("Unable to create mappedlun symlink because" + " lun->lun_shutdown=true\n"); + return -EINVAL; + } se_tpg = lun->lun_tpg; nacl_ci = &lun_acl_ci->ci_parent->ci_group->cg_item; diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 94cda7991e80..a93d94e68ab5 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1008,7 +1008,7 @@ pscsi_execute_cmd(struct se_cmd *cmd) req->timeout = PS_TIMEOUT_DISK; else req->timeout = PS_TIMEOUT_OTHER; - req->retries = PS_RETRY; + scsi_req(req)->retries = PS_RETRY; blk_execute_rq_nowait(pdv->pdv_sd->request_queue, NULL, req, (cmd->sam_task_attr == TCM_HEAD_TAG), @@ -1050,7 +1050,7 @@ static void pscsi_req_done(struct request *req, int uptodate) struct se_cmd *cmd = req->end_io_data; struct pscsi_plugin_task *pt = cmd->priv; - pt->pscsi_result = req->errors; + pt->pscsi_result = scsi_req(req)->result; pt->pscsi_resid = scsi_req(req)->resid_len; cmd->scsi_status = status_byte(pt->pscsi_result) << 1; diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 6fb191914f45..dfaef4d3b2d2 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -642,6 +642,8 @@ void core_tpg_remove_lun( */ struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); + lun->lun_shutdown = true; + core_clear_lun_from_tpg(lun, tpg); /* * Wait for any active I/O references to percpu se_lun->lun_ref to @@ -663,6 +665,8 @@ void core_tpg_remove_lun( } if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) hlist_del_rcu(&lun->link); + + lun->lun_shutdown = false; mutex_unlock(&tpg->tpg_lun_mutex); percpu_ref_exit(&lun->lun_ref); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index b1a3cdb29468..a0cd56ee5fe9 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -64,8 +64,9 @@ struct kmem_cache *t10_alua_lba_map_cache; struct kmem_cache *t10_alua_lba_map_mem_cache; static void transport_complete_task_attr(struct se_cmd *cmd); +static int translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason); static void transport_handle_queue_full(struct se_cmd *cmd, - struct se_device *dev); + struct se_device *dev, int err, bool write_pending); static int transport_put_cmd(struct se_cmd *cmd); static void target_complete_ok_work(struct work_struct *work); @@ -804,7 +805,8 @@ void target_qf_do_work(struct work_struct *work) if (cmd->t_state == TRANSPORT_COMPLETE_QF_WP) transport_write_pending_qf(cmd); - else if (cmd->t_state == TRANSPORT_COMPLETE_QF_OK) + else if (cmd->t_state == TRANSPORT_COMPLETE_QF_OK || + cmd->t_state == TRANSPORT_COMPLETE_QF_ERR) transport_complete_qf(cmd); } } @@ -1719,7 +1721,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, } trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_status(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; goto check_stop; default: @@ -1730,7 +1732,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, } ret = transport_send_check_condition_and_sense(cmd, sense_reason, 0); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; check_stop: @@ -1739,8 +1741,7 @@ check_stop: return; queue_full: - cmd->t_state = TRANSPORT_COMPLETE_QF_OK; - transport_handle_queue_full(cmd, cmd->se_dev); + transport_handle_queue_full(cmd, cmd->se_dev, ret, false); } EXPORT_SYMBOL(transport_generic_request_failure); @@ -1977,13 +1978,29 @@ static void transport_complete_qf(struct se_cmd *cmd) int ret = 0; transport_complete_task_attr(cmd); + /* + * If a fabric driver ->write_pending() or ->queue_data_in() callback + * has returned neither -ENOMEM or -EAGAIN, assume it's fatal and + * the same callbacks should not be retried. Return CHECK_CONDITION + * if a scsi_status is not already set. + * + * If a fabric driver ->queue_status() has returned non zero, always + * keep retrying no matter what.. + */ + if (cmd->t_state == TRANSPORT_COMPLETE_QF_ERR) { + if (cmd->scsi_status) + goto queue_status; - if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) { - trace_target_cmd_complete(cmd); - ret = cmd->se_tfo->queue_status(cmd); - goto out; + cmd->se_cmd_flags |= SCF_EMULATED_TASK_SENSE; + cmd->scsi_status = SAM_STAT_CHECK_CONDITION; + cmd->scsi_sense_length = TRANSPORT_SENSE_BUFFER; + translate_sense_reason(cmd, TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE); + goto queue_status; } + if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) + goto queue_status; + switch (cmd->data_direction) { case DMA_FROM_DEVICE: if (cmd->scsi_status) @@ -2007,19 +2024,33 @@ queue_status: break; } -out: if (ret < 0) { - transport_handle_queue_full(cmd, cmd->se_dev); + transport_handle_queue_full(cmd, cmd->se_dev, ret, false); return; } transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); } -static void transport_handle_queue_full( - struct se_cmd *cmd, - struct se_device *dev) +static void transport_handle_queue_full(struct se_cmd *cmd, struct se_device *dev, + int err, bool write_pending) { + /* + * -EAGAIN or -ENOMEM signals retry of ->write_pending() and/or + * ->queue_data_in() callbacks from new process context. + * + * Otherwise for other errors, transport_complete_qf() will send + * CHECK_CONDITION via ->queue_status() instead of attempting to + * retry associated fabric driver data-transfer callbacks. + */ + if (err == -EAGAIN || err == -ENOMEM) { + cmd->t_state = (write_pending) ? TRANSPORT_COMPLETE_QF_WP : + TRANSPORT_COMPLETE_QF_OK; + } else { + pr_warn_ratelimited("Got unknown fabric queue status: %d\n", err); + cmd->t_state = TRANSPORT_COMPLETE_QF_ERR; + } + spin_lock_irq(&dev->qf_cmd_lock); list_add_tail(&cmd->se_qf_node, &cmd->se_dev->qf_cmd_list); atomic_inc_mb(&dev->dev_qf_count); @@ -2083,7 +2114,7 @@ static void target_complete_ok_work(struct work_struct *work) WARN_ON(!cmd->scsi_status); ret = transport_send_check_condition_and_sense( cmd, 0, 1); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; transport_lun_remove_cmd(cmd); @@ -2109,7 +2140,7 @@ static void target_complete_ok_work(struct work_struct *work) } else if (rc) { ret = transport_send_check_condition_and_sense(cmd, rc, 0); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; transport_lun_remove_cmd(cmd); @@ -2134,7 +2165,7 @@ queue_rsp: if (target_read_prot_action(cmd)) { ret = transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; transport_lun_remove_cmd(cmd); @@ -2144,7 +2175,7 @@ queue_rsp: trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_data_in(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; break; case DMA_TO_DEVICE: @@ -2157,7 +2188,7 @@ queue_rsp: atomic_long_add(cmd->data_length, &cmd->se_lun->lun_stats.tx_data_octets); ret = cmd->se_tfo->queue_data_in(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; break; } @@ -2166,7 +2197,7 @@ queue_rsp: queue_status: trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_status(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; break; default: @@ -2180,8 +2211,8 @@ queue_status: queue_full: pr_debug("Handling complete_ok QUEUE_FULL: se_cmd: %p," " data_direction: %d\n", cmd, cmd->data_direction); - cmd->t_state = TRANSPORT_COMPLETE_QF_OK; - transport_handle_queue_full(cmd, cmd->se_dev); + + transport_handle_queue_full(cmd, cmd->se_dev, ret, false); } void target_free_sgl(struct scatterlist *sgl, int nents) @@ -2449,18 +2480,14 @@ transport_generic_new_cmd(struct se_cmd *cmd) spin_unlock_irqrestore(&cmd->t_state_lock, flags); ret = cmd->se_tfo->write_pending(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; - /* fabric drivers should only return -EAGAIN or -ENOMEM as error */ - WARN_ON(ret); - - return (!ret) ? 0 : TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + return 0; queue_full: pr_debug("Handling write_pending QUEUE__FULL: se_cmd: %p\n", cmd); - cmd->t_state = TRANSPORT_COMPLETE_QF_WP; - transport_handle_queue_full(cmd, cmd->se_dev); + transport_handle_queue_full(cmd, cmd->se_dev, ret, true); return 0; } EXPORT_SYMBOL(transport_generic_new_cmd); @@ -2470,10 +2497,10 @@ static void transport_write_pending_qf(struct se_cmd *cmd) int ret; ret = cmd->se_tfo->write_pending(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) { + if (ret) { pr_debug("Handling write_pending QUEUE__FULL: se_cmd: %p\n", cmd); - transport_handle_queue_full(cmd, cmd->se_dev); + transport_handle_queue_full(cmd, cmd->se_dev, ret, true); } } @@ -3011,6 +3038,8 @@ static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status) __releases(&cmd->t_state_lock) __acquires(&cmd->t_state_lock) { + int ret; + assert_spin_locked(&cmd->t_state_lock); WARN_ON_ONCE(!irqs_disabled()); @@ -3034,7 +3063,9 @@ static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status) trace_target_cmd_complete(cmd); spin_unlock_irq(&cmd->t_state_lock); - cmd->se_tfo->queue_status(cmd); + ret = cmd->se_tfo->queue_status(cmd); + if (ret) + transport_handle_queue_full(cmd, cmd->se_dev, ret, false); spin_lock_irq(&cmd->t_state_lock); return 1; @@ -3055,6 +3086,7 @@ EXPORT_SYMBOL(transport_check_aborted_status); void transport_send_task_abort(struct se_cmd *cmd) { unsigned long flags; + int ret; spin_lock_irqsave(&cmd->t_state_lock, flags); if (cmd->se_cmd_flags & (SCF_SENT_CHECK_CONDITION)) { @@ -3090,7 +3122,9 @@ send_abort: cmd->t_task_cdb[0], cmd->tag); trace_target_cmd_complete(cmd); - cmd->se_tfo->queue_status(cmd); + ret = cmd->se_tfo->queue_status(cmd); + if (ret) + transport_handle_queue_full(cmd, cmd->se_dev, ret, false); } static void target_tmr_work(struct work_struct *work) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index c6874c38a10b..f615c3bbb73e 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -311,24 +311,50 @@ static void free_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd) DATA_BLOCK_BITS); } -static void gather_data_area(struct tcmu_dev *udev, unsigned long *cmd_bitmap, - struct scatterlist *data_sg, unsigned int data_nents) +static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, + bool bidi) { + struct se_cmd *se_cmd = cmd->se_cmd; int i, block; int block_remaining = 0; void *from, *to; size_t copy_bytes, from_offset; - struct scatterlist *sg; + struct scatterlist *sg, *data_sg; + unsigned int data_nents; + DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS); + + bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS); + + if (!bidi) { + data_sg = se_cmd->t_data_sg; + data_nents = se_cmd->t_data_nents; + } else { + uint32_t count; + + /* + * For bidi case, the first count blocks are for Data-Out + * buffer blocks, and before gathering the Data-In buffer + * the Data-Out buffer blocks should be discarded. + */ + count = DIV_ROUND_UP(se_cmd->data_length, DATA_BLOCK_SIZE); + while (count--) { + block = find_first_bit(bitmap, DATA_BLOCK_BITS); + clear_bit(block, bitmap); + } + + data_sg = se_cmd->t_bidi_data_sg; + data_nents = se_cmd->t_bidi_data_nents; + } for_each_sg(data_sg, sg, data_nents, i) { int sg_remaining = sg->length; to = kmap_atomic(sg_page(sg)) + sg->offset; while (sg_remaining > 0) { if (block_remaining == 0) { - block = find_first_bit(cmd_bitmap, + block = find_first_bit(bitmap, DATA_BLOCK_BITS); block_remaining = DATA_BLOCK_SIZE; - clear_bit(block, cmd_bitmap); + clear_bit(block, bitmap); } copy_bytes = min_t(size_t, sg_remaining, block_remaining); @@ -394,6 +420,27 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t d return true; } +static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd) +{ + struct se_cmd *se_cmd = tcmu_cmd->se_cmd; + size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE); + + if (se_cmd->se_cmd_flags & SCF_BIDI) { + BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); + data_length += round_up(se_cmd->t_bidi_data_sg->length, + DATA_BLOCK_SIZE); + } + + return data_length; +} + +static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd) +{ + size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); + + return data_length / DATA_BLOCK_SIZE; +} + static sense_reason_t tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) { @@ -407,7 +454,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) uint32_t cmd_head; uint64_t cdb_off; bool copy_to_data_area; - size_t data_length; + size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS); if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) @@ -421,8 +468,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) * expensive to tell how many regions are freed in the bitmap */ base_command_size = max(offsetof(struct tcmu_cmd_entry, - req.iov[se_cmd->t_bidi_data_nents + - se_cmd->t_data_nents]), + req.iov[tcmu_cmd_get_block_cnt(tcmu_cmd)]), sizeof(struct tcmu_cmd_entry)); command_size = base_command_size + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE); @@ -433,11 +479,6 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) mb = udev->mb_addr; cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ - data_length = se_cmd->data_length; - if (se_cmd->se_cmd_flags & SCF_BIDI) { - BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); - data_length += se_cmd->t_bidi_data_sg->length; - } if ((command_size > (udev->cmdr_size / 2)) || data_length > udev->data_size) { pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu " @@ -511,11 +552,14 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) entry->req.iov_dif_cnt = 0; /* Handle BIDI commands */ - iov_cnt = 0; - alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg, - se_cmd->t_bidi_data_nents, &iov, &iov_cnt, false); - entry->req.iov_bidi_cnt = iov_cnt; - + if (se_cmd->se_cmd_flags & SCF_BIDI) { + iov_cnt = 0; + iov++; + alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg, + se_cmd->t_bidi_data_nents, &iov, &iov_cnt, + false); + entry->req.iov_bidi_cnt = iov_cnt; + } /* cmd's data_bitmap is what changed in process */ bitmap_xor(tcmu_cmd->data_bitmap, old_bitmap, udev->data_bitmap, DATA_BLOCK_BITS); @@ -592,19 +636,11 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * se_cmd->scsi_sense_length); free_data_area(udev, cmd); } else if (se_cmd->se_cmd_flags & SCF_BIDI) { - DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS); - /* Get Data-In buffer before clean up */ - bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS); - gather_data_area(udev, bitmap, - se_cmd->t_bidi_data_sg, se_cmd->t_bidi_data_nents); + gather_data_area(udev, cmd, true); free_data_area(udev, cmd); } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { - DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS); - - bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS); - gather_data_area(udev, bitmap, - se_cmd->t_data_sg, se_cmd->t_data_nents); + gather_data_area(udev, cmd, false); free_data_area(udev, cmd); } else if (se_cmd->data_direction == DMA_TO_DEVICE) { free_data_area(udev, cmd); @@ -1196,11 +1232,6 @@ static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *pag if (ret < 0) return ret; - if (!val) { - pr_err("Illegal value for cmd_time_out\n"); - return -EINVAL; - } - udev->cmd_time_out = val * MSEC_PER_SEC; return count; } diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 776b34396144..0a16cf4bed39 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -291,18 +291,6 @@ config ARMADA_THERMAL Enable this option if you want to have support for thermal management controller present in Armada 370 and Armada XP SoC. -config DB8500_CPUFREQ_COOLING - tristate "DB8500 cpufreq cooling" - depends on ARCH_U8500 || COMPILE_TEST - depends on HAS_IOMEM - depends on CPU_THERMAL - default y - help - Adds DB8500 cpufreq cooling devices, and these cooling devices can be - bound to thermal zone trip points. When a trip point reached, the - bound cpufreq cooling device turns active to set CPU frequency low to - cool down the CPU. - config INTEL_POWERCLAMP tristate "Intel PowerClamp idle injection driver" depends on THERMAL diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 7adae2029355..c2372f10dae5 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -41,7 +41,6 @@ obj-$(CONFIG_TANGO_THERMAL) += tango_thermal.o obj-$(CONFIG_IMX_THERMAL) += imx_thermal.o obj-$(CONFIG_MAX77620_THERMAL) += max77620_thermal.o obj-$(CONFIG_QORIQ_THERMAL) += qoriq_thermal.o -obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o obj-$(CONFIG_INTEL_SOC_DTS_IOSF_CORE) += intel_soc_dts_iosf.o diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c deleted file mode 100644 index e58bd0b658b5..000000000000 --- a/drivers/thermal/db8500_cpufreq_cooling.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * db8500_cpufreq_cooling.c - DB8500 cpufreq works as cooling device. - * - * Copyright (C) 2012 ST-Ericsson - * Copyright (C) 2012 Linaro Ltd. - * - * Author: Hongbo Zhang <hongbo.zhang@linaro.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include <linux/cpu_cooling.h> -#include <linux/err.h> -#include <linux/module.h> -#include <linux/of.h> -#include <linux/platform_device.h> -#include <linux/slab.h> - -static int db8500_cpufreq_cooling_probe(struct platform_device *pdev) -{ - struct thermal_cooling_device *cdev; - - cdev = cpufreq_cooling_register(cpu_present_mask); - if (IS_ERR(cdev)) { - int ret = PTR_ERR(cdev); - - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "Failed to register cooling device %d\n", - ret); - - return ret; - } - - platform_set_drvdata(pdev, cdev); - - dev_info(&pdev->dev, "Cooling device registered: %s\n", cdev->type); - - return 0; -} - -static int db8500_cpufreq_cooling_remove(struct platform_device *pdev) -{ - struct thermal_cooling_device *cdev = platform_get_drvdata(pdev); - - cpufreq_cooling_unregister(cdev); - - return 0; -} - -static int db8500_cpufreq_cooling_suspend(struct platform_device *pdev, - pm_message_t state) -{ - return -ENOSYS; -} - -static int db8500_cpufreq_cooling_resume(struct platform_device *pdev) -{ - return -ENOSYS; -} - -#ifdef CONFIG_OF -static const struct of_device_id db8500_cpufreq_cooling_match[] = { - { .compatible = "stericsson,db8500-cpufreq-cooling" }, - {}, -}; -MODULE_DEVICE_TABLE(of, db8500_cpufreq_cooling_match); -#endif - -static struct platform_driver db8500_cpufreq_cooling_driver = { - .driver = { - .name = "db8500-cpufreq-cooling", - .of_match_table = of_match_ptr(db8500_cpufreq_cooling_match), - }, - .probe = db8500_cpufreq_cooling_probe, - .suspend = db8500_cpufreq_cooling_suspend, - .resume = db8500_cpufreq_cooling_resume, - .remove = db8500_cpufreq_cooling_remove, -}; - -static int __init db8500_cpufreq_cooling_init(void) -{ - return platform_driver_register(&db8500_cpufreq_cooling_driver); -} - -static void __exit db8500_cpufreq_cooling_exit(void) -{ - platform_driver_unregister(&db8500_cpufreq_cooling_driver); -} - -/* Should be later than db8500_cpufreq_register */ -late_initcall(db8500_cpufreq_cooling_init); -module_exit(db8500_cpufreq_cooling_exit); - -MODULE_AUTHOR("Hongbo Zhang <hongbo.zhang@stericsson.com>"); -MODULE_DESCRIPTION("DB8500 cpufreq cooling driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index b0500a0a87b8..e4603b09863a 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -492,6 +492,41 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) } /** + * tty_ldisc_restore - helper for tty ldisc change + * @tty: tty to recover + * @old: previous ldisc + * + * Restore the previous line discipline or N_TTY when a line discipline + * change fails due to an open error + */ + +static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) +{ + struct tty_ldisc *new_ldisc; + int r; + + /* There is an outstanding reference here so this is safe */ + old = tty_ldisc_get(tty, old->ops->num); + WARN_ON(IS_ERR(old)); + tty->ldisc = old; + tty_set_termios_ldisc(tty, old->ops->num); + if (tty_ldisc_open(tty, old) < 0) { + tty_ldisc_put(old); + /* This driver is always present */ + new_ldisc = tty_ldisc_get(tty, N_TTY); + if (IS_ERR(new_ldisc)) + panic("n_tty: get"); + tty->ldisc = new_ldisc; + tty_set_termios_ldisc(tty, N_TTY); + r = tty_ldisc_open(tty, new_ldisc); + if (r < 0) + panic("Couldn't open N_TTY ldisc for " + "%s --- error %d.", + tty_name(tty), r); + } +} + +/** * tty_set_ldisc - set line discipline * @tty: the terminal to set * @ldisc: the line discipline @@ -504,7 +539,12 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) int tty_set_ldisc(struct tty_struct *tty, int disc) { - int retval, old_disc; + int retval; + struct tty_ldisc *old_ldisc, *new_ldisc; + + new_ldisc = tty_ldisc_get(tty, disc); + if (IS_ERR(new_ldisc)) + return PTR_ERR(new_ldisc); tty_lock(tty); retval = tty_ldisc_lock(tty, 5 * HZ); @@ -517,8 +557,7 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) } /* Check the no-op case */ - old_disc = tty->ldisc->ops->num; - if (old_disc == disc) + if (tty->ldisc->ops->num == disc) goto out; if (test_bit(TTY_HUPPED, &tty->flags)) { @@ -527,25 +566,34 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) goto out; } - retval = tty_ldisc_reinit(tty, disc); + old_ldisc = tty->ldisc; + + /* Shutdown the old discipline. */ + tty_ldisc_close(tty, old_ldisc); + + /* Now set up the new line discipline. */ + tty->ldisc = new_ldisc; + tty_set_termios_ldisc(tty, disc); + + retval = tty_ldisc_open(tty, new_ldisc); if (retval < 0) { /* Back to the old one or N_TTY if we can't */ - if (tty_ldisc_reinit(tty, old_disc) < 0) { - pr_err("tty: TIOCSETD failed, reinitializing N_TTY\n"); - if (tty_ldisc_reinit(tty, N_TTY) < 0) { - /* At this point we have tty->ldisc == NULL. */ - pr_err("tty: reinitializing N_TTY failed\n"); - } - } + tty_ldisc_put(new_ldisc); + tty_ldisc_restore(tty, old_ldisc); } - if (tty->ldisc && tty->ldisc->ops->num != old_disc && - tty->ops->set_ldisc) { + if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) { down_read(&tty->termios_rwsem); tty->ops->set_ldisc(tty); up_read(&tty->termios_rwsem); } + /* At this point we hold a reference to the new ldisc and a + reference to the old ldisc, or we hold two references to + the old ldisc (if it was restored as part of error cleanup + above). In either case, releasing a single reference from + the old ldisc is correct. */ + new_ldisc = old_ldisc; out: tty_ldisc_unlock(tty); @@ -553,6 +601,7 @@ out: already running */ tty_buffer_restart_work(tty->port); err: + tty_ldisc_put(new_ldisc); /* drop the extra reference */ tty_unlock(tty); return retval; } @@ -613,8 +662,10 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) int retval; ld = tty_ldisc_get(tty, disc); - if (IS_ERR(ld)) + if (IS_ERR(ld)) { + BUG_ON(disc == N_TTY); return PTR_ERR(ld); + } if (tty->ldisc) { tty_ldisc_close(tty, tty->ldisc); @@ -626,8 +677,10 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, tty->ldisc); if (retval) { - tty_ldisc_put(tty->ldisc); - tty->ldisc = NULL; + if (!WARN_ON(disc == N_TTY)) { + tty_ldisc_put(tty->ldisc); + tty->ldisc = NULL; + } } return retval; } diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index d2351139342f..a82e2bd5ea34 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c @@ -373,7 +373,7 @@ static void bot_cleanup_old_alt(struct f_uas *fu) usb_ep_free_request(fu->ep_in, fu->bot_req_in); usb_ep_free_request(fu->ep_out, fu->bot_req_out); usb_ep_free_request(fu->ep_out, fu->cmd.req); - usb_ep_free_request(fu->ep_out, fu->bot_status.req); + usb_ep_free_request(fu->ep_in, fu->bot_status.req); kfree(fu->cmd.buf); diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index d7efcb632f7d..002f1ce22bd0 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -297,14 +297,15 @@ static int pwm_backlight_probe(struct platform_device *pdev) } /* - * If the GPIO is configured as input, change the direction to output - * and set the GPIO as active. + * If the GPIO is not known to be already configured as output, that + * is, if gpiod_get_direction returns either GPIOF_DIR_IN or -EINVAL, + * change the direction to output and set the GPIO as active. * Do not force the GPIO to active when it was already output as it * could cause backlight flickering or we would enable the backlight too * early. Leave the decision of the initial backlight state for later. */ if (pb->enable_gpio && - gpiod_get_direction(pb->enable_gpio) == GPIOF_DIR_IN) + gpiod_get_direction(pb->enable_gpio) != GPIOF_DIR_OUT) gpiod_direction_output(pb->enable_gpio, 1); pb->power_supply = devm_regulator_get(&pdev->dev, "power"); diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c index 8c4dc1e1f94f..b827a8113e26 100644 --- a/drivers/video/fbdev/efifb.c +++ b/drivers/video/fbdev/efifb.c @@ -10,6 +10,7 @@ #include <linux/efi.h> #include <linux/errno.h> #include <linux/fb.h> +#include <linux/pci.h> #include <linux/platform_device.h> #include <linux/screen_info.h> #include <video/vga.h> @@ -143,6 +144,8 @@ static struct attribute *efifb_attrs[] = { }; ATTRIBUTE_GROUPS(efifb); +static bool pci_dev_disabled; /* FB base matches BAR of a disabled device */ + static int efifb_probe(struct platform_device *dev) { struct fb_info *info; @@ -152,7 +155,7 @@ static int efifb_probe(struct platform_device *dev) unsigned int size_total; char *option = NULL; - if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) + if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || pci_dev_disabled) return -ENODEV; if (fb_get_options("efifb", &option)) @@ -360,3 +363,64 @@ static struct platform_driver efifb_driver = { }; builtin_platform_driver(efifb_driver); + +#if defined(CONFIG_PCI) && !defined(CONFIG_X86) + +static bool pci_bar_found; /* did we find a BAR matching the efifb base? */ + +static void claim_efifb_bar(struct pci_dev *dev, int idx) +{ + u16 word; + + pci_bar_found = true; + + pci_read_config_word(dev, PCI_COMMAND, &word); + if (!(word & PCI_COMMAND_MEMORY)) { + pci_dev_disabled = true; + dev_err(&dev->dev, + "BAR %d: assigned to efifb but device is disabled!\n", + idx); + return; + } + + if (pci_claim_resource(dev, idx)) { + pci_dev_disabled = true; + dev_err(&dev->dev, + "BAR %d: failed to claim resource for efifb!\n", idx); + return; + } + + dev_info(&dev->dev, "BAR %d: assigned to efifb\n", idx); +} + +static void efifb_fixup_resources(struct pci_dev *dev) +{ + u64 base = screen_info.lfb_base; + u64 size = screen_info.lfb_size; + int i; + + if (pci_bar_found || screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) + return; + + if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE) + base |= (u64)screen_info.ext_lfb_base << 32; + + if (!base) + return; + + for (i = 0; i < PCI_STD_RESOURCE_END; i++) { + struct resource *res = &dev->resource[i]; + + if (!(res->flags & IORESOURCE_MEM)) + continue; + + if (res->start <= base && res->end >= base + size - 1) { + claim_efifb_bar(dev, i); + break; + } + } +} +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY, + 16, efifb_fixup_resources); + +#endif diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index 1abba07b84b3..f4cbfb3b8a09 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1608,19 +1608,6 @@ static int omapfb_find_ctrl(struct omapfb_device *fbdev) return 0; } -static void check_required_callbacks(struct omapfb_device *fbdev) -{ -#define _C(x) (fbdev->ctrl->x != NULL) -#define _P(x) (fbdev->panel->x != NULL) - BUG_ON(fbdev->ctrl == NULL || fbdev->panel == NULL); - BUG_ON(!(_C(init) && _C(cleanup) && _C(get_caps) && - _C(set_update_mode) && _C(setup_plane) && _C(enable_plane) && - _P(init) && _P(cleanup) && _P(enable) && _P(disable) && - _P(get_caps))); -#undef _P -#undef _C -} - /* * Called by LDM binding to probe and attach a new device. * Initialization sequence: @@ -1705,8 +1692,6 @@ static int omapfb_do_probe(struct platform_device *pdev, omapfb_ops.fb_mmap = omapfb_mmap; init_state++; - check_required_callbacks(fbdev); - r = planes_init(fbdev); if (r) goto cleanup; diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c index bd017b57c47f..f599520374dd 100644 --- a/drivers/video/fbdev/ssd1307fb.c +++ b/drivers/video/fbdev/ssd1307fb.c @@ -578,10 +578,14 @@ static int ssd1307fb_probe(struct i2c_client *client, par->vbat_reg = devm_regulator_get_optional(&client->dev, "vbat"); if (IS_ERR(par->vbat_reg)) { - dev_err(&client->dev, "failed to get VBAT regulator: %ld\n", - PTR_ERR(par->vbat_reg)); ret = PTR_ERR(par->vbat_reg); - goto fb_alloc_error; + if (ret == -ENODEV) { + par->vbat_reg = NULL; + } else { + dev_err(&client->dev, "failed to get VBAT regulator: %d\n", + ret); + goto fb_alloc_error; + } } if (of_property_read_u32(node, "solomon,width", &par->width)) @@ -668,10 +672,13 @@ static int ssd1307fb_probe(struct i2c_client *client, udelay(4); } - ret = regulator_enable(par->vbat_reg); - if (ret) { - dev_err(&client->dev, "failed to enable VBAT: %d\n", ret); - goto reset_oled_error; + if (par->vbat_reg) { + ret = regulator_enable(par->vbat_reg); + if (ret) { + dev_err(&client->dev, "failed to enable VBAT: %d\n", + ret); + goto reset_oled_error; + } } ret = ssd1307fb_init(par); @@ -710,7 +717,8 @@ panel_init_error: pwm_put(par->pwm); }; regulator_enable_error: - regulator_disable(par->vbat_reg); + if (par->vbat_reg) + regulator_disable(par->vbat_reg); reset_oled_error: fb_deferred_io_cleanup(info); fb_alloc_error: diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c index d0115a7af0a9..3ee309c50b2d 100644 --- a/drivers/video/fbdev/xen-fbfront.c +++ b/drivers/video/fbdev/xen-fbfront.c @@ -643,7 +643,6 @@ static void xenfb_backend_changed(struct xenbus_device *dev, break; case XenbusStateInitWait: -InitWait: xenbus_switch_state(dev, XenbusStateConnected); break; @@ -654,7 +653,8 @@ InitWait: * get Connected twice here. */ if (dev->state != XenbusStateConnected) - goto InitWait; /* no InitWait seen yet, fudge it */ + /* no InitWait seen yet, fudge it */ + xenbus_switch_state(dev, XenbusStateConnected); if (xenbus_read_unsigned(info->xbdev->otherend, "request-update", 0)) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 400d70b69379..48230a5e12f2 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -232,6 +232,12 @@ static int virtio_dev_probe(struct device *_d) if (device_features & (1ULL << i)) __virtio_set_bit(dev, i); + if (drv->validate) { + err = drv->validate(dev); + if (err) + goto err; + } + err = virtio_finalize_features(dev); if (err) goto err; diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 590534910dc6..698d5d06fa03 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -33,8 +33,10 @@ void vp_synchronize_vectors(struct virtio_device *vdev) struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; - synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0)); - for (i = 1; i < vp_dev->msix_vectors; i++) + if (vp_dev->intx_enabled) + synchronize_irq(vp_dev->pci_dev->irq); + + for (i = 0; i < vp_dev->msix_vectors; ++i) synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); } @@ -60,13 +62,16 @@ static irqreturn_t vp_config_changed(int irq, void *opaque) static irqreturn_t vp_vring_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; + struct virtio_pci_vq_info *info; irqreturn_t ret = IRQ_NONE; - struct virtqueue *vq; + unsigned long flags; - list_for_each_entry(vq, &vp_dev->vdev.vqs, list) { - if (vq->callback && vring_interrupt(irq, vq) == IRQ_HANDLED) + spin_lock_irqsave(&vp_dev->lock, flags); + list_for_each_entry(info, &vp_dev->virtqueues, node) { + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) ret = IRQ_HANDLED; } + spin_unlock_irqrestore(&vp_dev->lock, flags); return ret; } @@ -97,186 +102,244 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) return vp_vring_interrupt(irq, opaque); } -static void vp_remove_vqs(struct virtio_device *vdev) +static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, + bool per_vq_vectors, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtqueue *vq, *n; + const char *name = dev_name(&vp_dev->vdev.dev); + unsigned i, v; + int err = -ENOMEM; - list_for_each_entry_safe(vq, n, &vdev->vqs, list) { - if (vp_dev->msix_vector_map) { - int v = vp_dev->msix_vector_map[vq->index]; + vp_dev->msix_vectors = nvectors; - if (v != VIRTIO_MSI_NO_VECTOR) - free_irq(pci_irq_vector(vp_dev->pci_dev, v), - vq); - } - vp_dev->del_vq(vq); + vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, + GFP_KERNEL); + if (!vp_dev->msix_names) + goto error; + vp_dev->msix_affinity_masks + = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, + GFP_KERNEL); + if (!vp_dev->msix_affinity_masks) + goto error; + for (i = 0; i < nvectors; ++i) + if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], + GFP_KERNEL)) + goto error; + + err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, + nvectors, PCI_IRQ_MSIX | + (desc ? PCI_IRQ_AFFINITY : 0), + desc); + if (err < 0) + goto error; + vp_dev->msix_enabled = 1; + + /* Set the vector used for configuration */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-config", name); + err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), + vp_config_changed, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error; + ++vp_dev->msix_used_vectors; + + v = vp_dev->config_vector(vp_dev, v); + /* Verify we had enough resources to assign the vector */ + if (v == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto error; } + + if (!per_vq_vectors) { + /* Shared vector for all VQs */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-virtqueues", name); + err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), + vp_vring_interrupt, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error; + ++vp_dev->msix_used_vectors; + } + return 0; +error: + return err; +} + +static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name, + u16 msix_vec) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); + struct virtqueue *vq; + unsigned long flags; + + /* fill out our structure that represents an active queue */ + if (!info) + return ERR_PTR(-ENOMEM); + + vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, + msix_vec); + if (IS_ERR(vq)) + goto out_info; + + info->vq = vq; + if (callback) { + spin_lock_irqsave(&vp_dev->lock, flags); + list_add(&info->node, &vp_dev->virtqueues); + spin_unlock_irqrestore(&vp_dev->lock, flags); + } else { + INIT_LIST_HEAD(&info->node); + } + + vp_dev->vqs[index] = info; + return vq; + +out_info: + kfree(info); + return vq; +} + +static void vp_del_vq(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; + unsigned long flags; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_del(&info->node); + spin_unlock_irqrestore(&vp_dev->lock, flags); + + vp_dev->del_vq(info); + kfree(info); } /* the config->del_vqs() implementation */ void vp_del_vqs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtqueue *vq, *n; int i; - if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs))) - return; + list_for_each_entry_safe(vq, n, &vdev->vqs, list) { + if (vp_dev->per_vq_vectors) { + int v = vp_dev->vqs[vq->index]->msix_vector; - vp_remove_vqs(vdev); + if (v != VIRTIO_MSI_NO_VECTOR) { + int irq = pci_irq_vector(vp_dev->pci_dev, v); + + irq_set_affinity_hint(irq, NULL); + free_irq(irq, vq); + } + } + vp_del_vq(vq); + } + vp_dev->per_vq_vectors = false; + + if (vp_dev->intx_enabled) { + free_irq(vp_dev->pci_dev->irq, vp_dev); + vp_dev->intx_enabled = 0; + } - if (vp_dev->pci_dev->msix_enabled) { - for (i = 0; i < vp_dev->msix_vectors; i++) + for (i = 0; i < vp_dev->msix_used_vectors; ++i) + free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev); + + for (i = 0; i < vp_dev->msix_vectors; i++) + if (vp_dev->msix_affinity_masks[i]) free_cpumask_var(vp_dev->msix_affinity_masks[i]); + if (vp_dev->msix_enabled) { /* Disable the vector used for configuration */ vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); - kfree(vp_dev->msix_affinity_masks); - kfree(vp_dev->msix_names); - kfree(vp_dev->msix_vector_map); + pci_free_irq_vectors(vp_dev->pci_dev); + vp_dev->msix_enabled = 0; } - free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); - pci_free_irq_vectors(vp_dev->pci_dev); + vp_dev->msix_vectors = 0; + vp_dev->msix_used_vectors = 0; + kfree(vp_dev->msix_names); + vp_dev->msix_names = NULL; + kfree(vp_dev->msix_affinity_masks); + vp_dev->msix_affinity_masks = NULL; + kfree(vp_dev->vqs); + vp_dev->vqs = NULL; } static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], struct irq_affinity *desc) + const char * const names[], bool per_vq_vectors, + struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - const char *name = dev_name(&vp_dev->vdev.dev); - int i, j, err = -ENOMEM, allocated_vectors, nvectors; - unsigned flags = PCI_IRQ_MSIX; - bool shared = false; u16 msix_vec; + int i, err, nvectors, allocated_vectors; - if (desc) { - flags |= PCI_IRQ_AFFINITY; - desc->pre_vectors++; /* virtio config vector */ - } - - nvectors = 1; - for (i = 0; i < nvqs; i++) - if (callbacks[i]) - nvectors++; - - /* Try one vector per queue first. */ - err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, - nvectors, flags, desc); - if (err < 0) { - /* Fallback to one vector for config, one shared for queues. */ - shared = true; - err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2, - PCI_IRQ_MSIX); - if (err < 0) - return err; - } - if (err < 0) - return err; - - vp_dev->msix_vectors = nvectors; - vp_dev->msix_names = kmalloc_array(nvectors, - sizeof(*vp_dev->msix_names), GFP_KERNEL); - if (!vp_dev->msix_names) - goto out_free_irq_vectors; - - vp_dev->msix_affinity_masks = kcalloc(nvectors, - sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL); - if (!vp_dev->msix_affinity_masks) - goto out_free_msix_names; + vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); + if (!vp_dev->vqs) + return -ENOMEM; - for (i = 0; i < nvectors; ++i) { - if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], - GFP_KERNEL)) - goto out_free_msix_affinity_masks; + if (per_vq_vectors) { + /* Best option: one for change interrupt, one per vq. */ + nvectors = 1; + for (i = 0; i < nvqs; ++i) + if (callbacks[i]) + ++nvectors; + } else { + /* Second best: one for change, shared for all vqs. */ + nvectors = 2; } - /* Set the vector used for configuration */ - snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names), - "%s-config", name); - err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed, - 0, vp_dev->msix_names[0], vp_dev); + err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, + per_vq_vectors ? desc : NULL); if (err) - goto out_free_msix_affinity_masks; + goto error_find; - /* Verify we had enough resources to assign the vector */ - if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) { - err = -EBUSY; - goto out_free_config_irq; - } - - vp_dev->msix_vector_map = kmalloc_array(nvqs, - sizeof(*vp_dev->msix_vector_map), GFP_KERNEL); - if (!vp_dev->msix_vector_map) - goto out_disable_config_irq; - - allocated_vectors = j = 1; /* vector 0 is the config interrupt */ + vp_dev->per_vq_vectors = per_vq_vectors; + allocated_vectors = vp_dev->msix_used_vectors; for (i = 0; i < nvqs; ++i) { if (!names[i]) { vqs[i] = NULL; continue; } - if (callbacks[i]) - msix_vec = allocated_vectors; - else + if (!callbacks[i]) msix_vec = VIRTIO_MSI_NO_VECTOR; - - vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], - msix_vec); + else if (vp_dev->per_vq_vectors) + msix_vec = allocated_vectors++; + else + msix_vec = VP_MSIX_VQ_VECTOR; + vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], + msix_vec); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); - goto out_remove_vqs; + goto error_find; } - if (msix_vec == VIRTIO_MSI_NO_VECTOR) { - vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; + if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) continue; - } - snprintf(vp_dev->msix_names[j], - sizeof(*vp_dev->msix_names), "%s-%s", + /* allocate per-vq irq if available and necessary */ + snprintf(vp_dev->msix_names[msix_vec], + sizeof *vp_dev->msix_names, + "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, IRQF_SHARED, - vp_dev->msix_names[j], vqs[i]); - if (err) { - /* don't free this irq on error */ - vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; - goto out_remove_vqs; - } - vp_dev->msix_vector_map[i] = msix_vec; - j++; - - /* - * Use a different vector for each queue if they are available, - * else share the same vector for all VQs. - */ - if (!shared) - allocated_vectors++; + vring_interrupt, 0, + vp_dev->msix_names[msix_vec], + vqs[i]); + if (err) + goto error_find; } - return 0; -out_remove_vqs: - vp_remove_vqs(vdev); - kfree(vp_dev->msix_vector_map); -out_disable_config_irq: - vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); -out_free_config_irq: - free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); -out_free_msix_affinity_masks: - for (i = 0; i < nvectors; i++) { - if (vp_dev->msix_affinity_masks[i]) - free_cpumask_var(vp_dev->msix_affinity_masks[i]); - } - kfree(vp_dev->msix_affinity_masks); -out_free_msix_names: - kfree(vp_dev->msix_names); -out_free_irq_vectors: - pci_free_irq_vectors(vp_dev->pci_dev); +error_find: + vp_del_vqs(vdev); return err; } @@ -287,29 +350,33 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i, err; + vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); + if (!vp_dev->vqs) + return -ENOMEM; + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) - return err; + goto out_del_vqs; + vp_dev->intx_enabled = 1; + vp_dev->per_vq_vectors = false; for (i = 0; i < nvqs; ++i) { if (!names[i]) { vqs[i] = NULL; continue; } - vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], - VIRTIO_MSI_NO_VECTOR); + vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], + VIRTIO_MSI_NO_VECTOR); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); - goto out_remove_vqs; + goto out_del_vqs; } } return 0; - -out_remove_vqs: - vp_remove_vqs(vdev); - free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); +out_del_vqs: + vp_del_vqs(vdev); return err; } @@ -320,9 +387,15 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, { int err; - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, desc); + /* Try MSI-X with one vector per queue. */ + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, desc); if (!err) return 0; + /* Fallback: MSI-X with one vector for config, one shared for queues. */ + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, desc); + if (!err) + return 0; + /* Finally fall back to regular interrupts. */ return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names); } @@ -342,15 +415,16 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu) { struct virtio_device *vdev = vq->vdev; struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; + struct cpumask *mask; + unsigned int irq; if (!vq->callback) return -EINVAL; - if (vp_dev->pci_dev->msix_enabled) { - int vec = vp_dev->msix_vector_map[vq->index]; - struct cpumask *mask = vp_dev->msix_affinity_masks[vec]; - unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec); - + if (vp_dev->msix_enabled) { + mask = vp_dev->msix_affinity_masks[info->msix_vector]; + irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); if (cpu == -1) irq_set_affinity_hint(irq, NULL); else { @@ -365,12 +439,13 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu) const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - unsigned int *map = vp_dev->msix_vector_map; - if (!map || map[index] == VIRTIO_MSI_NO_VECTOR) + if (!vp_dev->per_vq_vectors || + vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR) return NULL; - return pci_irq_get_affinity(vp_dev->pci_dev, map[index]); + return pci_irq_get_affinity(vp_dev->pci_dev, + vp_dev->vqs[index]->msix_vector); } #ifdef CONFIG_PM_SLEEP @@ -441,6 +516,8 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, vp_dev->vdev.dev.parent = &pci_dev->dev; vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->pci_dev = pci_dev; + INIT_LIST_HEAD(&vp_dev->virtqueues); + spin_lock_init(&vp_dev->lock); /* enable the device */ rc = pci_enable_device(pci_dev); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index ac8c9d788964..e96334aec1e0 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -31,6 +31,17 @@ #include <linux/highmem.h> #include <linux/spinlock.h> +struct virtio_pci_vq_info { + /* the actual virtqueue */ + struct virtqueue *vq; + + /* the list node for the virtqueues list */ + struct list_head node; + + /* MSI-X vector (or none) */ + unsigned msix_vector; +}; + /* Our device structure */ struct virtio_pci_device { struct virtio_device vdev; @@ -64,25 +75,47 @@ struct virtio_pci_device { /* the IO mapping for the PCI config space */ void __iomem *ioaddr; + /* a list of queues so we can dispatch IRQs */ + spinlock_t lock; + struct list_head virtqueues; + + /* array of all queues for house-keeping */ + struct virtio_pci_vq_info **vqs; + + /* MSI-X support */ + int msix_enabled; + int intx_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ char (*msix_names)[256]; - /* Total Number of MSI-X vectors (including per-VQ ones). */ - int msix_vectors; - /* Map of per-VQ MSI-X vectors, may be NULL */ - unsigned *msix_vector_map; + /* Number of available vectors */ + unsigned msix_vectors; + /* Vectors allocated, excluding per-vq vectors if any */ + unsigned msix_used_vectors; + + /* Whether we have vector per vq */ + bool per_vq_vectors; struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev, + struct virtio_pci_vq_info *info, unsigned idx, void (*callback)(struct virtqueue *vq), const char *name, u16 msix_vec); - void (*del_vq)(struct virtqueue *vq); + void (*del_vq)(struct virtio_pci_vq_info *info); u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); }; +/* Constants for MSI-X */ +/* Use first vector for configuration changes, second and the rest for + * virtqueues Thus, we need at least 2 vectors for MSI. */ +enum { + VP_MSIX_CONFIG_VECTOR = 0, + VP_MSIX_VQ_VECTOR = 1, +}; + /* Convert a generic virtio device to our structure */ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) { diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index f7362c5fe18a..4bfa48fb1324 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -112,6 +112,7 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) } static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, + struct virtio_pci_vq_info *info, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, @@ -129,6 +130,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) return ERR_PTR(-ENOENT); + info->msix_vector = msix_vec; + /* create the vring */ vq = vring_create_virtqueue(index, num, VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev, @@ -159,13 +162,14 @@ out_deactivate: return ERR_PTR(err); } -static void del_vq(struct virtqueue *vq) +static void del_vq(struct virtio_pci_vq_info *info) { + struct virtqueue *vq = info->vq; struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - if (vp_dev->pci_dev->msix_enabled) { + if (vp_dev->msix_enabled) { iowrite16(VIRTIO_MSI_NO_VECTOR, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); /* Flush the write out to device */ diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 7bc3004b840e..8978f109d2d7 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -293,6 +293,7 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) } static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, + struct virtio_pci_vq_info *info, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, @@ -322,6 +323,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, /* get offset of notification word for this vq */ off = vp_ioread16(&cfg->queue_notify_off); + info->msix_vector = msix_vec; + /* create the vring */ vq = vring_create_virtqueue(index, num, SMP_CACHE_BYTES, &vp_dev->vdev, @@ -405,13 +408,14 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, return 0; } -static void del_vq(struct virtqueue *vq) +static void del_vq(struct virtio_pci_vq_info *info) { + struct virtqueue *vq = info->vq; struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); vp_iowrite16(vq->index, &vp_dev->common->queue_select); - if (vp_dev->pci_dev->msix_enabled) { + if (vp_dev->msix_enabled) { vp_iowrite16(VIRTIO_MSI_NO_VECTOR, &vp_dev->common->queue_msix_vector); /* Flush the write out to device */ |