From c787f1baa5031c22cbe20af17b2ee36ad32957ea Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:34 -0500 Subject: block: Add PR callouts for read keys and reservation Add callouts for reading keys and reservations. This allows LIO to support the READ_KEYS and READ_RESERVATION commands so it can export devices to VMs for software like windows clustering. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-2-michael.christie@oracle.com Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/linux/pr.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pr.h b/include/linux/pr.h index 94ceec713afe..3003daec28a5 100644 --- a/include/linux/pr.h +++ b/include/linux/pr.h @@ -4,6 +4,18 @@ #include +struct pr_keys { + u32 generation; + u32 num_keys; + u64 keys[]; +}; + +struct pr_held_reservation { + u64 key; + u32 generation; + enum pr_type type; +}; + struct pr_ops { int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key, u32 flags); @@ -14,6 +26,19 @@ struct pr_ops { int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key, enum pr_type type, bool abort); int (*pr_clear)(struct block_device *bdev, u64 key); + /* + * pr_read_keys - Read the registered keys and return them in the + * pr_keys->keys array. The keys array will have been allocated at the + * end of the pr_keys struct, and pr_keys->num_keys must be set to the + * number of keys the array can hold. If there are more than can fit + * in the array, success will still be returned and pr_keys->num_keys + * will reflect the total number of keys the device contains, so the + * caller can retry with a larger array. + */ + int (*pr_read_keys)(struct block_device *bdev, + struct pr_keys *keys_info); + int (*pr_read_reservation)(struct block_device *bdev, + struct pr_held_reservation *rsv); }; #endif /* LINUX_PR_H */ -- cgit v1.2.3 From 7ba150834b840f6f5cdd07ca69a4ccf39df59a66 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:35 -0500 Subject: block: Rename BLK_STS_NEXUS to BLK_STS_RESV_CONFLICT BLK_STS_NEXUS is used for NVMe/SCSI reservation conflicts and DASD's locking feature which works similar to NVMe/SCSI reservations where a host can get a lock on a device and when the lock is taken it will get failures. This patch renames BLK_STS_NEXUS so it better reflects this type of use. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-3-michael.christie@oracle.com Acked-by: Stefan Haberland Reviewed-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- block/blk-core.c | 2 +- drivers/nvme/host/core.c | 2 +- drivers/s390/block/dasd.c | 7 ++++++- drivers/scsi/scsi_lib.c | 2 +- include/linux/blk_types.h | 4 ++-- 5 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 9e5e0277a4d9..ff8fb7a49389 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -155,7 +155,7 @@ static const struct { [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" }, [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" }, [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" }, - [BLK_STS_NEXUS] = { -EBADE, "critical nexus" }, + [BLK_STS_RESV_CONFLICT] = { -EBADE, "reservation conflict" }, [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" }, [BLK_STS_PROTECTION] = { -EILSEQ, "protection" }, [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" }, diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c2730b116dc6..535a26ceb205 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -279,7 +279,7 @@ static blk_status_t nvme_error_status(u16 status) case NVME_SC_INVALID_PI: return BLK_STS_PROTECTION; case NVME_SC_RESERVATION_CONFLICT: - return BLK_STS_NEXUS; + return BLK_STS_RESV_CONFLICT; case NVME_SC_HOST_PATH_ERROR: return BLK_STS_TRANSPORT; case NVME_SC_ZONE_TOO_MANY_ACTIVE: diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a9c2a8d76c45..ca0df87fa8f4 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2723,7 +2723,12 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) else if (status == 0) { switch (cqr->intrc) { case -EPERM: - error = BLK_STS_NEXUS; + /* + * DASD doesn't implement SCSI/NVMe reservations, but it + * implements a locking scheme similar to them. We + * return this error when we no longer have the lock. + */ + error = BLK_STS_RESV_CONFLICT; break; case -ENOLINK: error = BLK_STS_TRANSPORT; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index b7c569a42aa4..e1468483ac7e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -599,7 +599,7 @@ static blk_status_t scsi_result_to_blk_status(int result) case SCSIML_STAT_OK: break; case SCSIML_STAT_RESV_CONFLICT: - return BLK_STS_NEXUS; + return BLK_STS_RESV_CONFLICT; case SCSIML_STAT_NOSPC: return BLK_STS_NOSPC; case SCSIML_STAT_MED_ERROR: diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 99be590f952f..2b2452086a2f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -96,7 +96,7 @@ typedef u16 blk_short_t; #define BLK_STS_NOSPC ((__force blk_status_t)3) #define BLK_STS_TRANSPORT ((__force blk_status_t)4) #define BLK_STS_TARGET ((__force blk_status_t)5) -#define BLK_STS_NEXUS ((__force blk_status_t)6) +#define BLK_STS_RESV_CONFLICT ((__force blk_status_t)6) #define BLK_STS_MEDIUM ((__force blk_status_t)7) #define BLK_STS_PROTECTION ((__force blk_status_t)8) #define BLK_STS_RESOURCE ((__force blk_status_t)9) @@ -184,7 +184,7 @@ static inline bool blk_path_error(blk_status_t error) case BLK_STS_NOTSUPP: case BLK_STS_NOSPC: case BLK_STS_TARGET: - case BLK_STS_NEXUS: + case BLK_STS_RESV_CONFLICT: case BLK_STS_MEDIUM: case BLK_STS_PROTECTION: return false; -- cgit v1.2.3 From f2bf2e7e2d526116aab942aaf1b71a949a570ba6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:40 -0500 Subject: nvme: Fix reservation status related structs This fixes the following issues with the reservation status structs: 1. resv10 is bytes 23:10 so it should be 14 bytes. 2. regctl_ds only supports 64 bit host IDs. These are not currently used, but will be in this patchset which adds support for the reservation report command. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-8-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/linux/nvme.h | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4fad4aa245fb..57b5b2b8d95b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -759,20 +759,42 @@ enum { NVME_LBART_ATTRIB_HIDE = 1 << 1, }; +struct nvme_registered_ctrl { + __le16 cntlid; + __u8 rcsts; + __u8 rsvd3[5]; + __le64 hostid; + __le64 rkey; +}; + struct nvme_reservation_status { __le32 gen; __u8 rtype; __u8 regctl[2]; __u8 resv5[2]; __u8 ptpls; - __u8 resv10[13]; - struct { - __le16 cntlid; - __u8 rcsts; - __u8 resv3[5]; - __le64 hostid; - __le64 rkey; - } regctl_ds[]; + __u8 resv10[14]; + struct nvme_registered_ctrl regctl_ds[]; +}; + +struct nvme_registered_ctrl_ext { + __le16 cntlid; + __u8 rcsts; + __u8 rsvd3[5]; + __le64 rkey; + __u8 hostid[16]; + __u8 rsvd32[32]; +}; + +struct nvme_reservation_status_ext { + __le32 gen; + __u8 rtype; + __u8 regctl[2]; + __u8 resv5[2]; + __u8 ptpls; + __u8 resv10[14]; + __u8 rsvd24[40]; + struct nvme_registered_ctrl_ext regctl_eds[]; }; enum nvme_async_event_type { -- cgit v1.2.3 From 5fd96a4e15de8442915a912233d800c56f49001d Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:44 -0500 Subject: nvme: Add pr_ops read_keys support This patch adds support for the pr_ops read_keys callout by calling the NVMe Reservation Report helper, then parsing that info to get the controller's registered keys. Because the callout is only used in the kernel where the callers, like LIO, do not know about controller/host IDs, the callout just returns the registered keys which is required by the SCSI PR in READ KEYS command. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-12-michael.christie@oracle.com Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/nvme/host/pr.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nvme.h | 4 +++ 2 files changed, 73 insertions(+) (limited to 'include/linux') diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index cd93d2e5b340..0ee656404437 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -154,10 +154,79 @@ static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } +static int nvme_pr_resv_report(struct block_device *bdev, void *data, + u32 data_len, bool *eds) +{ + struct nvme_command c = { }; + int ret; + + c.common.opcode = nvme_cmd_resv_report; + c.common.cdw10 = cpu_to_le32(nvme_bytes_to_numd(data_len)); + c.common.cdw11 = cpu_to_le32(NVME_EXTENDED_DATA_STRUCT); + *eds = true; + +retry: + ret = nvme_send_pr_command(bdev, &c, data, data_len); + if (ret == NVME_SC_HOST_ID_INCONSIST && + c.common.cdw11 == cpu_to_le32(NVME_EXTENDED_DATA_STRUCT)) { + c.common.cdw11 = 0; + *eds = false; + goto retry; + } + + if (ret < 0) + return ret; + + return nvme_sc_to_pr_err(ret); +} + +static int nvme_pr_read_keys(struct block_device *bdev, + struct pr_keys *keys_info) +{ + u32 rse_len, num_keys = keys_info->num_keys; + struct nvme_reservation_status_ext *rse; + int ret, i; + bool eds; + + /* + * Assume we are using 128-bit host IDs and allocate a buffer large + * enough to get enough keys to fill the return keys buffer. + */ + rse_len = struct_size(rse, regctl_eds, num_keys); + rse = kzalloc(rse_len, GFP_KERNEL); + if (!rse) + return -ENOMEM; + + ret = nvme_pr_resv_report(bdev, rse, rse_len, &eds); + if (ret) + goto free_rse; + + keys_info->generation = le32_to_cpu(rse->gen); + keys_info->num_keys = get_unaligned_le16(&rse->regctl); + + num_keys = min(num_keys, keys_info->num_keys); + for (i = 0; i < num_keys; i++) { + if (eds) { + keys_info->keys[i] = + le64_to_cpu(rse->regctl_eds[i].rkey); + } else { + struct nvme_reservation_status *rs; + + rs = (struct nvme_reservation_status *)rse; + keys_info->keys[i] = le64_to_cpu(rs->regctl_ds[i].rkey); + } + } + +free_rse: + kfree(rse); + return ret; +} + const struct pr_ops nvme_pr_ops = { .pr_register = nvme_pr_register, .pr_reserve = nvme_pr_reserve, .pr_release = nvme_pr_release, .pr_preempt = nvme_pr_preempt, .pr_clear = nvme_pr_clear, + .pr_read_keys = nvme_pr_read_keys, }; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 57b5b2b8d95b..a617e250d629 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -759,6 +759,10 @@ enum { NVME_LBART_ATTRIB_HIDE = 1 << 1, }; +enum nvme_eds { + NVME_EXTENDED_DATA_STRUCT = 0x1, +}; + struct nvme_registered_ctrl { __le16 cntlid; __u8 rcsts; -- cgit v1.2.3 From be1a7cd2d0ed028ffdd60c65e3734e2a1d8b17df Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Apr 2023 15:05:45 -0500 Subject: nvme: Add a nvme_pr_type enum The next patch adds support to report the reservation type, so we need to be able to convert from the NVMe PR value we get from the device to the linux block layer PR value that will be returned to callers. To prepare for that, this patch adds a nvme_pr_type enum and renames the nvme_pr_type function. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20230407200551.12660-13-michael.christie@oracle.com Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/nvme/host/pr.c | 24 ++++++++++++------------ include/linux/nvme.h | 9 +++++++++ 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index 0ee656404437..732c56b417c2 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -9,24 +9,24 @@ #include "nvme.h" -static char nvme_pr_type(enum pr_type type) +static enum nvme_pr_type nvme_pr_type_from_blk(enum pr_type type) { switch (type) { case PR_WRITE_EXCLUSIVE: - return 1; + return NVME_PR_WRITE_EXCLUSIVE; case PR_EXCLUSIVE_ACCESS: - return 2; + return NVME_PR_EXCLUSIVE_ACCESS; case PR_WRITE_EXCLUSIVE_REG_ONLY: - return 3; + return NVME_PR_WRITE_EXCLUSIVE_REG_ONLY; case PR_EXCLUSIVE_ACCESS_REG_ONLY: - return 4; + return NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY; case PR_WRITE_EXCLUSIVE_ALL_REGS: - return 5; + return NVME_PR_WRITE_EXCLUSIVE_ALL_REGS; case PR_EXCLUSIVE_ACCESS_ALL_REGS: - return 6; - default: - return 0; + return NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS; } + + return 0; } static int nvme_send_ns_head_pr_command(struct block_device *bdev, @@ -127,7 +127,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key, if (flags & ~PR_FL_IGNORE_KEY) return -EOPNOTSUPP; - cdw10 = nvme_pr_type(type) << 8; + cdw10 = nvme_pr_type_from_blk(type) << 8; cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); } @@ -135,7 +135,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key, static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, enum pr_type type, bool abort) { - u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1); + u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (abort ? 2 : 1); return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); } @@ -149,7 +149,7 @@ static int nvme_pr_clear(struct block_device *bdev, u64 key) static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { - u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 0 : 1 << 3); + u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (key ? 0 : 1 << 3); return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a617e250d629..4013abb86642 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -759,6 +759,15 @@ enum { NVME_LBART_ATTRIB_HIDE = 1 << 1, }; +enum nvme_pr_type { + NVME_PR_WRITE_EXCLUSIVE = 1, + NVME_PR_EXCLUSIVE_ACCESS = 2, + NVME_PR_WRITE_EXCLUSIVE_REG_ONLY = 3, + NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY = 4, + NVME_PR_WRITE_EXCLUSIVE_ALL_REGS = 5, + NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS = 6, +}; + enum nvme_eds { NVME_EXTENDED_DATA_STRUCT = 0x1, }; -- cgit v1.2.3