From 93315e46b000fc80fff5d53c3f444417fb3df6de Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 11 Aug 2022 18:00:00 +0530 Subject: perf/core: Add speculation info to branch entries Add a new "spec" bitfield to branch entries for providing speculation information. This will be populated using hints provided by branch sampling features on supported hardware. The following cases are covered: * No branch speculation information is available * Branch is speculative but taken on the wrong path * Branch is non-speculative but taken on the correct path * Branch is speculative and taken on the correct path Suggested-by: Stephane Eranian Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/834088c302faf21c7b665031dd111f424e509a64.1660211399.git.sandipan.das@amd.com --- include/uapi/linux/perf_event.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 03b370062741..30a4723aefd4 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -256,6 +256,17 @@ enum { PERF_BR_MAX, }; +/* + * Common branch speculation outcome classification + */ +enum { + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_MAX, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1363,6 +1374,7 @@ union perf_mem_data_src { * abort: aborting a hardware transaction * cycles: cycles from last branch (or 0 if not supported) * type: branch type + * spec: branch speculation info (or 0 if not supported) */ struct perf_branch_entry { __u64 from; @@ -1373,7 +1385,8 @@ struct perf_branch_entry { abort:1, /* transaction abort */ cycles:16, /* cycle count to last branch */ type:4, /* branch type */ - reserved:40; + spec:2, /* branch speculation info */ + reserved:38; }; union perf_sample_weight { -- cgit v1.2.3 From a724ec82966d57e4b5d36341d3e3dc1a3c011564 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:15 +0530 Subject: perf: Add system error and not in transaction branch types This expands generic branch type classification by adding two more entries there in i.e system error and not in transaction. This also updates the x86 implementation to process X86_BR_NO_TX records as appropriate. This changes branch types reported to user space on x86 platform but it should not be a problem. The possible scenarios and impacts are enumerated here. -------------------------------------------------------------------------- | kernel | perf tool | Impact | -------------------------------------------------------------------------- | old | old | Works as before | -------------------------------------------------------------------------- | old | new | PERF_BR_UNKNOWN is processed | -------------------------------------------------------------------------- | new | old | PERF_BR_NO_TX is blocked via old PERF_BR_MAX | -------------------------------------------------------------------------- | new | new | PERF_BR_NO_TX is recognized | -------------------------------------------------------------------------- When PERF_BR_NO_TX is blocked via old PERF_BR_MAX (new kernel with old perf tool) the user space might throw up an warning complaining about an unrecognized branch types being reported, but it's expected. PERF_BR_SERROR & PERF_BR_NO_TX branch types will be used for BRBE implementation on arm64 platform. PERF_BR_NO_TX complements 'abort' and 'in_tx' elements in perf_branch_entry which represent other transaction states for a given branch record. Because this completes the transaction state classification. Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-2-anshuman.khandual@arm.com --- arch/x86/events/utils.c | 2 +- include/uapi/linux/perf_event.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c index e013243f360c..5f5617afde79 100644 --- a/arch/x86/events/utils.c +++ b/arch/x86/events/utils.c @@ -225,7 +225,7 @@ static int branch_map[X86_BR_TYPE_MAP_MAX] = { PERF_BR_IND_CALL, /* X86_BR_IND_CALL */ PERF_BR_UNKNOWN, /* X86_BR_ABORT */ PERF_BR_UNKNOWN, /* X86_BR_IN_TX */ - PERF_BR_UNKNOWN, /* X86_BR_NO_TX */ + PERF_BR_NO_TX, /* X86_BR_NO_TX */ PERF_BR_CALL, /* X86_BR_ZERO_CALL */ PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */ PERF_BR_IND, /* X86_BR_IND_JMP */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 30a4723aefd4..a79cc0eb4de7 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -253,6 +253,8 @@ enum { PERF_BR_COND_RET = 10, /* conditional function return */ PERF_BR_ERET = 11, /* exception return */ PERF_BR_IRQ = 12, /* irq */ + PERF_BR_SERROR = 13, /* system error */ + PERF_BR_NO_TX = 14, /* not in transaction */ PERF_BR_MAX, }; -- cgit v1.2.3 From b190bc4ac9e6d9763b61654c5a0c085ff77d7a09 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:16 +0530 Subject: perf: Extend branch type classification branch_entry.type now has ran out of space to accommodate more branch types classification. This will prevent perf branch stack implementation on arm64 (via BRBE) to capture all available branch types. Extending this bit field i.e branch_entry.type [4 bits] is not an option as it will break user space ABI both for little and big endian perf tools. Extend branch classification with a new field branch_entry.new_type via a new branch type PERF_BR_EXTEND_ABI in branch_entry.type. Perf tools which could decode PERF_BR_EXTEND_ABI, will then parse branch_entry.new_type as well. branch_entry.new_type is a 4 bit field which can hold upto 16 branch types. The first three branch types will hold various generic page faults followed by five architecture specific branch types, which can be overridden by the platform for specific use cases. These architecture specific branch types gets overridden on arm64 platform for BRBE implementation. New generic branch types - PERF_BR_NEW_FAULT_ALGN - PERF_BR_NEW_FAULT_DATA - PERF_BR_NEW_FAULT_INST New arch specific branch types - PERF_BR_NEW_ARCH_1 - PERF_BR_NEW_ARCH_2 - PERF_BR_NEW_ARCH_3 - PERF_BR_NEW_ARCH_4 - PERF_BR_NEW_ARCH_5 Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-3-anshuman.khandual@arm.com --- include/uapi/linux/perf_event.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index a79cc0eb4de7..fed60e6b10e5 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -255,6 +255,7 @@ enum { PERF_BR_IRQ = 12, /* irq */ PERF_BR_SERROR = 13, /* system error */ PERF_BR_NO_TX = 14, /* not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* extend ABI */ PERF_BR_MAX, }; @@ -269,6 +270,18 @@ enum { PERF_BR_SPEC_MAX, }; +enum { + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_MAX, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1388,7 +1401,8 @@ struct perf_branch_entry { cycles:16, /* cycle count to last branch */ type:4, /* branch type */ spec:2, /* branch speculation info */ - reserved:38; + new_type:4, /* additional branch type */ + reserved:34; }; union perf_sample_weight { -- cgit v1.2.3 From 5402d25aa5710d240040f73fb13d7d5c303ef071 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:17 +0530 Subject: perf: Capture branch privilege information Platforms like arm64 could capture privilege level information for all the branch records. Hence this adds a new element in the struct branch_entry to record the privilege level information, which could be requested through a new event.attr.branch_sample_type based flag PERF_SAMPLE_BRANCH_PRIV_SAVE. This flag helps user choose whether privilege information is captured. Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-4-anshuman.khandual@arm.com --- include/uapi/linux/perf_event.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index fed60e6b10e5..1a258d45a3fa 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -233,6 +235,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -282,6 +286,13 @@ enum { PERF_BR_NEW_MAX, }; +enum { + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1402,7 +1413,8 @@ struct perf_branch_entry { type:4, /* branch type */ spec:2, /* branch speculation info */ new_type:4, /* additional branch type */ - reserved:34; + priv:3, /* privilege level */ + reserved:31; }; union perf_sample_weight { -- cgit v1.2.3 From f4054e522531038354bea5c924f286fdd8ae77b5 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:18 +0530 Subject: perf: Add PERF_BR_NEW_ARCH_[N] map for BRBE on arm64 platform BRBE captured branch types will overflow perf_branch_entry.type and generic branch types in perf_branch_entry.new_type. So override each available arch specific branch type in the following manner to comprehensively process all reported branch types in BRBE. PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-5-anshuman.khandual@arm.com --- include/uapi/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1a258d45a3fa..dca16582885f 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -293,6 +293,12 @@ enum { PERF_BR_PRIV_HV = 3, }; +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ -- cgit v1.2.3 From 8f36b3b4e1b58dca7d05e1579019230437e55d43 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 23 Aug 2022 18:24:56 -0600 Subject: usbip: add USBIP_URB_* URB transfer flags USBIP driver packs URB transfer flags in network packets that are exchanged between Server (usbip_host) and Client (vhci_hcd). URB_* flags are internal to kernel and could change. Where as USBIP URB flags exchanged in network packets are USBIP user API must not change. Add USBIP_URB* flags to make this an explicit API and change the client and server to map them. Details as follows: Client tx path (USBIP_CMD_SUBMIT): - Maps URB_* to USBIP_URB_* when it sends USBIP_CMD_SUBMIT packet. Server rx path (USBIP_CMD_SUBMIT): - Maps USBIP_URB_* to URB_* when it receives USBIP_CMD_SUBMIT packet. Flags aren't included in USBIP_CMD_UNLINK and USBIP_RET_SUBMIT packets and no special handling is needed for them in the following cases: - Server rx path (USBIP_CMD_UNLINK) - Client rx path & Server tx path (USBIP_RET_SUBMIT) Update protocol documentation to reflect the change. Suggested-by: Hongren Zenithal Zheng Suggested-by: Alan Stern Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/20220824002456.94605-1-skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/usbip_protocol.rst | 13 +++--- drivers/usb/usbip/stub_rx.c | 4 +- drivers/usb/usbip/usbip_common.c | 91 ++++++++++++++++++++++++++++++++++-- include/uapi/linux/usbip.h | 26 +++++++++++ 4 files changed, 122 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/usb/usbip_protocol.rst b/Documentation/usb/usbip_protocol.rst index 0b8541fda4d8..adc158967cc6 100644 --- a/Documentation/usb/usbip_protocol.rst +++ b/Documentation/usb/usbip_protocol.rst @@ -340,13 +340,12 @@ USBIP_CMD_SUBMIT: | 0 | 20 | usbip_header_basic, 'command' shall be 0x00000001 | +-----------+--------+---------------------------------------------------+ | 0x14 | 4 | transfer_flags: possible values depend on the | -| | | URB transfer_flags (refer to URB doc in | -| | | Documentation/driver-api/usb/URB.rst) | -| | | but with URB_NO_TRANSFER_DMA_MAP masked. Refer to | -| | | function usbip_pack_cmd_submit and function | -| | | tweak_transfer_flags in drivers/usb/usbip/ | -| | | usbip_common.c. The following fields may also ref | -| | | to function usbip_pack_cmd_submit and URB doc | +| | | USBIP_URB transfer_flags. | +| | | Refer to include/uapi/linux/usbip.h and | +| | | Documentation/driver-api/usb/URB.rst. | +| | | Refer to usbip_pack_cmd_submit() and | +| | | tweak_transfer_flags() in drivers/usb/usbip/ | +| | | usbip_common.c. | +-----------+--------+---------------------------------------------------+ | 0x18 | 4 | transfer_buffer_length: | | | | use URB transfer_buffer_length | diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 5dd41e8215e0..fc01b31bbb87 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -464,7 +464,7 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, int nents; int num_urbs = 1; int pipe = get_pipe(sdev, pdu); - int use_sg = pdu->u.cmd_submit.transfer_flags & URB_DMA_MAP_SG; + int use_sg = pdu->u.cmd_submit.transfer_flags & USBIP_URB_DMA_MAP_SG; int support_sg = 1; int np = 0; int ret, i; @@ -514,7 +514,7 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, num_urbs = nents; priv->completed_urbs = 0; pdu->u.cmd_submit.transfer_flags &= - ~URB_DMA_MAP_SG; + ~USBIP_URB_DMA_MAP_SG; } } else { buffer = kzalloc(buf_len, GFP_KERNEL); diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index 2ab99244bc31..053a2bca4c47 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -344,6 +344,91 @@ static unsigned int tweak_transfer_flags(unsigned int flags) return flags; } +/* + * USBIP driver packs URB transfer flags in PDUs that are exchanged + * between Server (usbip_host) and Client (vhci_hcd). URB_* flags + * are internal to kernel and could change. Where as USBIP URB flags + * exchanged in PDUs are USBIP user API must not change. + * + * USBIP_URB* flags are exported as explicit API and client and server + * do mapping from kernel flags to USBIP_URB*. Details as follows: + * + * Client tx path (USBIP_CMD_SUBMIT): + * - Maps URB_* to USBIP_URB_* when it sends USBIP_CMD_SUBMIT packet. + * + * Server rx path (USBIP_CMD_SUBMIT): + * - Maps USBIP_URB_* to URB_* when it receives USBIP_CMD_SUBMIT packet. + * + * Flags aren't included in USBIP_CMD_UNLINK and USBIP_RET_SUBMIT packets + * and no special handling is needed for them in the following cases: + * - Server rx path (USBIP_CMD_UNLINK) + * - Client rx path & Server tx path (USBIP_RET_SUBMIT) + * + * Code paths: + * usbip_pack_pdu() is the common routine that handles packing pdu from + * urb and unpack pdu to an urb. + * + * usbip_pack_cmd_submit() and usbip_pack_ret_submit() handle + * USBIP_CMD_SUBMIT and USBIP_RET_SUBMIT respectively. + * + * usbip_map_urb_to_usbip() and usbip_map_usbip_to_urb() are used + * by usbip_pack_cmd_submit() and usbip_pack_ret_submit() to map + * flags. + */ + +struct urb_to_usbip_flags { + u32 urb_flag; + u32 usbip_flag; +}; + +#define NUM_USBIP_FLAGS 17 + +static const struct urb_to_usbip_flags flag_map[NUM_USBIP_FLAGS] = { + {URB_SHORT_NOT_OK, USBIP_URB_SHORT_NOT_OK}, + {URB_ISO_ASAP, USBIP_URB_ISO_ASAP}, + {URB_NO_TRANSFER_DMA_MAP, USBIP_URB_NO_TRANSFER_DMA_MAP}, + {URB_ZERO_PACKET, USBIP_URB_ZERO_PACKET}, + {URB_NO_INTERRUPT, USBIP_URB_NO_INTERRUPT}, + {URB_FREE_BUFFER, USBIP_URB_FREE_BUFFER}, + {URB_DIR_IN, USBIP_URB_DIR_IN}, + {URB_DIR_OUT, USBIP_URB_DIR_OUT}, + {URB_DIR_MASK, USBIP_URB_DIR_MASK}, + {URB_DMA_MAP_SINGLE, USBIP_URB_DMA_MAP_SINGLE}, + {URB_DMA_MAP_PAGE, USBIP_URB_DMA_MAP_PAGE}, + {URB_DMA_MAP_SG, USBIP_URB_DMA_MAP_SG}, + {URB_MAP_LOCAL, USBIP_URB_MAP_LOCAL}, + {URB_SETUP_MAP_SINGLE, USBIP_URB_SETUP_MAP_SINGLE}, + {URB_SETUP_MAP_LOCAL, USBIP_URB_SETUP_MAP_LOCAL}, + {URB_DMA_SG_COMBINED, USBIP_URB_DMA_SG_COMBINED}, + {URB_ALIGNED_TEMP_BUFFER, USBIP_URB_ALIGNED_TEMP_BUFFER}, +}; + +static unsigned int urb_to_usbip(unsigned int flags) +{ + unsigned int map_flags = 0; + int loop; + + for (loop = 0; loop < NUM_USBIP_FLAGS; loop++) { + if (flags & flag_map[loop].urb_flag) + map_flags |= flag_map[loop].usbip_flag; + } + + return map_flags; +} + +static unsigned int usbip_to_urb(unsigned int flags) +{ + unsigned int map_flags = 0; + int loop; + + for (loop = 0; loop < NUM_USBIP_FLAGS; loop++) { + if (flags & flag_map[loop].usbip_flag) + map_flags |= flag_map[loop].urb_flag; + } + + return map_flags; +} + static void usbip_pack_cmd_submit(struct usbip_header *pdu, struct urb *urb, int pack) { @@ -354,14 +439,14 @@ static void usbip_pack_cmd_submit(struct usbip_header *pdu, struct urb *urb, * will be discussed when usbip is ported to other operating systems. */ if (pack) { - spdu->transfer_flags = - tweak_transfer_flags(urb->transfer_flags); + /* map after tweaking the urb flags */ + spdu->transfer_flags = urb_to_usbip(tweak_transfer_flags(urb->transfer_flags)); spdu->transfer_buffer_length = urb->transfer_buffer_length; spdu->start_frame = urb->start_frame; spdu->number_of_packets = urb->number_of_packets; spdu->interval = urb->interval; } else { - urb->transfer_flags = spdu->transfer_flags; + urb->transfer_flags = usbip_to_urb(spdu->transfer_flags); urb->transfer_buffer_length = spdu->transfer_buffer_length; urb->start_frame = spdu->start_frame; urb->number_of_packets = spdu->number_of_packets; diff --git a/include/uapi/linux/usbip.h b/include/uapi/linux/usbip.h index fd393d908d8a..e4421ad55b2e 100644 --- a/include/uapi/linux/usbip.h +++ b/include/uapi/linux/usbip.h @@ -24,4 +24,30 @@ enum usbip_device_status { VDEV_ST_USED, VDEV_ST_ERROR }; + +/* USB URB Transfer flags: + * + * USBIP server and client (vchi) pack URBs in TCP packets. The following + * are the transfer type defines used in USBIP protocol. + */ + +#define USBIP_URB_SHORT_NOT_OK 0x0001 +#define USBIP_URB_ISO_ASAP 0x0002 +#define USBIP_URB_NO_TRANSFER_DMA_MAP 0x0004 +#define USBIP_URB_ZERO_PACKET 0x0040 +#define USBIP_URB_NO_INTERRUPT 0x0080 +#define USBIP_URB_FREE_BUFFER 0x0100 +#define USBIP_URB_DIR_IN 0x0200 +#define USBIP_URB_DIR_OUT 0 +#define USBIP_URB_DIR_MASK USBIP_URB_DIR_IN + +#define USBIP_URB_DMA_MAP_SINGLE 0x00010000 +#define USBIP_URB_DMA_MAP_PAGE 0x00020000 +#define USBIP_URB_DMA_MAP_SG 0x00040000 +#define USBIP_URB_MAP_LOCAL 0x00080000 +#define USBIP_URB_SETUP_MAP_SINGLE 0x00100000 +#define USBIP_URB_SETUP_MAP_LOCAL 0x00200000 +#define USBIP_URB_DMA_SG_COMBINED 0x00400000 +#define USBIP_URB_ALIGNED_TEMP_BUFFER 0x00800000 + #endif /* _UAPI_LINUX_USBIP_H */ -- cgit v1.2.3 From 385ecfdfb5d5ad0ff37e20381c70e18af8cf1bdb Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Mon, 29 Aug 2022 17:18:46 +0530 Subject: vfio: Add the device features for the low power entry and exit This patch adds the following new device features for the low power entry and exit in the header file. The implementation for the same will be added in the subsequent patches. - VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY - VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP - VFIO_DEVICE_FEATURE_LOW_POWER_EXIT For vfio-pci based devices, with the standard PCI PM registers, all power states cannot be achieved. The platform-based power management needs to be involved to go into the lowest power state. For doing low power entry and exit with platform-based power management, these device features can be used. The entry device feature has two variants. These two variants are mainly to support the different behaviour for the low power entry. If there is any access for the VFIO device on the host side, then the device will be moved out of the low power state without the user's guest driver involvement. Some devices (for example NVIDIA VGA or 3D controller) require the user's guest driver involvement for each low-power entry. In the first variant, the host can return the device to low power automatically. The device will continue to attempt to reach low power until the low power exit feature is called. In the second variant, if the device exits low power due to an access, the host kernel will signal the user via the provided eventfd and will not return the device to low power without a subsequent call to one of the low power entry features. A call to the low power exit feature is optional if the user provided eventfd is signaled. These device features only support VFIO_DEVICE_FEATURE_SET and VFIO_DEVICE_FEATURE_PROBE operations. Signed-off-by: Abhishek Sahu Link: https://lore.kernel.org/r/20220829114850.4341-2-abhsahu@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 56 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 733a1cddde30..76a173f973de 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -986,6 +986,62 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_RUNNING_P2P = 5, }; +/* + * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power + * state with the platform-based power management. Device use of lower power + * states depends on factors managed by the runtime power management core, + * including system level support and coordinating support among dependent + * devices. Enabling device low power entry does not guarantee lower power + * usage by the device, nor is a mechanism provided through this feature to + * know the current power state of the device. If any device access happens + * (either from the host or through the vfio uAPI) when the device is in the + * low power state, then the host will move the device out of the low power + * state as necessary prior to the access. Once the access is completed, the + * device may re-enter the low power state. For single shot low power support + * with wake-up notification, see + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd + * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after + * calling LOW_POWER_EXIT. + */ +#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3 + +/* + * This device feature has the same behavior as + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user + * provides an eventfd for wake-up notification. When the device moves out of + * the low power state for the wake-up, the host will not allow the device to + * re-enter a low power state without a subsequent user call to one of the low + * power entry device feature IOCTLs. Access to mmap'd device regions is + * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the + * low power exit. The low power exit can happen either through LOW_POWER_EXIT + * or through any other access (where the wake-up notification has been + * generated). The access to mmap'd device regions will not trigger low power + * exit. + * + * The notification through the provided eventfd will be generated only when + * the device has entered and is resumed from a low power state after + * calling this device feature IOCTL. A device that has not entered low power + * state, as managed through the runtime power management core, will not + * generate a notification through the provided eventfd on access. Calling the + * LOW_POWER_EXIT feature is optional in the case where notification has been + * signaled on the provided eventfd that a resume from low power has occurred. + */ +struct vfio_device_low_power_entry_with_wakeup { + __s32 wakeup_eventfd; + __u32 reserved; +}; + +#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4 + +/* + * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as + * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features. + * This device feature IOCTL may itself generate a wakeup eventfd notification + * in the latter case if the device had previously entered a low power state. + */ +#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From 835e699ef82adfc85ac4cc3f1f237c1adfdefd20 Mon Sep 17 00:00:00 2001 From: Jagath Jog J Date: Wed, 31 Aug 2022 12:01:16 +0530 Subject: iio: Add new event type gesture and use direction for single and double tap Add new event type for tap called gesture and the direction can be used to differentiate single and double tap. This may be used by accelerometer sensors to express single and double tap events. For directional tap, modifiers like IIO_MOD_(X/Y/Z) can be used along with singletap and doubletap direction. Signed-off-by: Jagath Jog J Link: https://lore.kernel.org/r/20220831063117.4141-2-jagathjog1996@gmail.com Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio | 69 +++++++++++++++++++++++++++++++++ drivers/iio/industrialio-event.c | 7 +++- include/linux/iio/types.h | 2 + include/uapi/linux/iio/types.h | 3 ++ tools/iio/iio_event_monitor.c | 8 +++- 5 files changed, 87 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 80e8a38d1ee2..66e81c48ee21 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -2068,3 +2068,72 @@ Description: individual channels. If multiple channels are enabled in a scan, then the sampling_frequency of the scan may be computed from the per channel sampling frequencies. + +What: /sys/.../events/in_accel_gesture_singletap_en +What: /sys/.../events/in_accel_gesture_doubletap_en +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Device generates an event on a single or double tap. + +What: /sys/.../events/in_accel_gesture_singletap_value +What: /sys/.../events/in_accel_gesture_doubletap_value +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Specifies the threshold value that the device is comparing + against to generate the tap gesture event. The lower + threshold value increases the sensitivity of tap detection. + Units and the exact meaning of value are device-specific. + +What: /sys/.../events/in_accel_gesture_tap_value_available +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Lists all available threshold values which can be used to + modify the sensitivity of the tap detection. + +What: /sys/.../events/in_accel_gesture_singletap_reset_timeout +What: /sys/.../events/in_accel_gesture_doubletap_reset_timeout +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Specifies the timeout value in seconds for the tap detector + to not to look for another tap event after the event as + occurred. Basically the minimum quiet time between the two + single-tap's or two double-tap's. + +What: /sys/.../events/in_accel_gesture_tap_reset_timeout_available +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Lists all available tap reset timeout values. Units in seconds. + +What: /sys/.../events/in_accel_gesture_doubletap_tap2_min_delay +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Specifies the minimum quiet time in seconds between the two + taps of a double tap. + +What: /sys/.../events/in_accel_gesture_doubletap_tap2_min_delay_available +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Lists all available delay values between two taps in the double + tap. Units in seconds. + +What: /sys/.../events/in_accel_gesture_tap_maxtomin_time +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Specifies the maximum time difference allowed between upper + and lower peak of tap to consider it as the valid tap event. + Units in seconds. + +What: /sys/.../events/in_accel_gesture_tap_maxtomin_time_available +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Lists all available time values between upper peak to lower + peak. Units in seconds. diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index 0e2056894965..3d78da2531a9 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -231,12 +231,15 @@ static const char * const iio_ev_type_text[] = { [IIO_EV_TYPE_MAG_ADAPTIVE] = "mag_adaptive", [IIO_EV_TYPE_CHANGE] = "change", [IIO_EV_TYPE_MAG_REFERENCED] = "mag_referenced", + [IIO_EV_TYPE_GESTURE] = "gesture", }; static const char * const iio_ev_dir_text[] = { [IIO_EV_DIR_EITHER] = "either", [IIO_EV_DIR_RISING] = "rising", - [IIO_EV_DIR_FALLING] = "falling" + [IIO_EV_DIR_FALLING] = "falling", + [IIO_EV_DIR_SINGLETAP] = "singletap", + [IIO_EV_DIR_DOUBLETAP] = "doubletap", }; static const char * const iio_ev_info_text[] = { @@ -247,6 +250,8 @@ static const char * const iio_ev_info_text[] = { [IIO_EV_INFO_HIGH_PASS_FILTER_3DB] = "high_pass_filter_3db", [IIO_EV_INFO_LOW_PASS_FILTER_3DB] = "low_pass_filter_3db", [IIO_EV_INFO_TIMEOUT] = "timeout", + [IIO_EV_INFO_RESET_TIMEOUT] = "reset_timeout", + [IIO_EV_INFO_TAP2_MIN_DELAY] = "tap2_min_delay", }; static enum iio_event_direction iio_ev_attr_dir(struct iio_dev_attr *attr) diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 27143b03909d..82faa98c719a 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -17,6 +17,8 @@ enum iio_event_info { IIO_EV_INFO_HIGH_PASS_FILTER_3DB, IIO_EV_INFO_LOW_PASS_FILTER_3DB, IIO_EV_INFO_TIMEOUT, + IIO_EV_INFO_RESET_TIMEOUT, + IIO_EV_INFO_TAP2_MIN_DELAY, }; #define IIO_VAL_INT 1 diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 472cead10d8d..913864221ac4 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -105,6 +105,7 @@ enum iio_event_type { IIO_EV_TYPE_MAG_ADAPTIVE, IIO_EV_TYPE_CHANGE, IIO_EV_TYPE_MAG_REFERENCED, + IIO_EV_TYPE_GESTURE, }; enum iio_event_direction { @@ -112,6 +113,8 @@ enum iio_event_direction { IIO_EV_DIR_RISING, IIO_EV_DIR_FALLING, IIO_EV_DIR_NONE, + IIO_EV_DIR_SINGLETAP, + IIO_EV_DIR_DOUBLETAP, }; #endif /* _UAPI_IIO_TYPES_H_ */ diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index 2f4581658859..b3b3ea399f67 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -69,12 +69,15 @@ static const char * const iio_ev_type_text[] = { [IIO_EV_TYPE_MAG_ADAPTIVE] = "mag_adaptive", [IIO_EV_TYPE_CHANGE] = "change", [IIO_EV_TYPE_MAG_REFERENCED] = "mag_referenced", + [IIO_EV_TYPE_GESTURE] = "gesture", }; static const char * const iio_ev_dir_text[] = { [IIO_EV_DIR_EITHER] = "either", [IIO_EV_DIR_RISING] = "rising", - [IIO_EV_DIR_FALLING] = "falling" + [IIO_EV_DIR_FALLING] = "falling", + [IIO_EV_DIR_SINGLETAP] = "singletap", + [IIO_EV_DIR_DOUBLETAP] = "doubletap", }; static const char * const iio_modifier_names[] = { @@ -227,6 +230,7 @@ static bool event_is_known(struct iio_event_data *event) case IIO_EV_TYPE_THRESH_ADAPTIVE: case IIO_EV_TYPE_MAG_ADAPTIVE: case IIO_EV_TYPE_CHANGE: + case IIO_EV_TYPE_GESTURE: break; default: return false; @@ -236,6 +240,8 @@ static bool event_is_known(struct iio_event_data *event) case IIO_EV_DIR_EITHER: case IIO_EV_DIR_RISING: case IIO_EV_DIR_FALLING: + case IIO_EV_DIR_SINGLETAP: + case IIO_EV_DIR_DOUBLETAP: case IIO_EV_DIR_NONE: break; default: -- cgit v1.2.3 From 42ee53f9bfd3e4cf58ae7656e0d11075f5fe8489 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Sep 2022 21:34:41 +0300 Subject: vfio: Introduce DMA logging uAPIs DMA logging allows a device to internally record what DMAs the device is initiating and report them back to userspace. It is part of the VFIO migration infrastructure that allows implementing dirty page tracking during the pre copy phase of live migration. Only DMA WRITEs are logged, and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. This patch introduces the DMA logging involved uAPIs. It uses the FEATURE ioctl with its GET/SET/PROBE options as of below. It exposes a PROBE option to detect if the device supports DMA logging. It exposes a SET option to start device DMA logging in given IOVAs ranges. It exposes a SET option to stop device DMA logging that was previously started. It exposes a GET option to read back and clear the device DMA log. Extra details exist as part of vfio.h per a specific option. Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220908183448.195262-4-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 86 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 76a173f973de..d7d8e0922376 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1042,6 +1042,92 @@ struct vfio_device_low_power_entry_with_wakeup { */ #define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 +/* + * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging. + * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports + * DMA logging. + * + * DMA logging allows a device to internally record what DMAs the device is + * initiating and report them back to userspace. It is part of the VFIO + * migration infrastructure that allows implementing dirty page tracking + * during the pre copy phase of live migration. Only DMA WRITEs are logged, + * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. + * + * When DMA logging is started a range of IOVAs to monitor is provided and the + * device can optimize its logging to cover only the IOVA range given. Each + * DMA that the device initiates inside the range will be logged by the device + * for later retrieval. + * + * page_size is an input that hints what tracking granularity the device + * should try to achieve. If the device cannot do the hinted page size then + * it's the driver choice which page size to pick based on its support. + * On output the device will return the page size it selected. + * + * ranges is a pointer to an array of + * struct vfio_device_feature_dma_logging_range. + * + * The core kernel code guarantees to support by minimum num_ranges that fit + * into a single kernel page. User space can try higher values but should give + * up if the above can't be achieved as of some driver limitations. + * + * A single call to start device DMA logging can be issued and a matching stop + * should follow at the end. Another start is not allowed in the meantime. + */ +struct vfio_device_feature_dma_logging_control { + __aligned_u64 page_size; + __u32 num_ranges; + __u32 __reserved; + __aligned_u64 ranges; +}; + +struct vfio_device_feature_dma_logging_range { + __aligned_u64 iova; + __aligned_u64 length; +}; + +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6 + +/* + * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started + * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START + */ +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7 + +/* + * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log + * + * Query the device's DMA log for written pages within the given IOVA range. + * During querying the log is cleared for the IOVA range. + * + * bitmap is a pointer to an array of u64s that will hold the output bitmap + * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits + * is given by: + * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64)) + * + * The input page_size can be any power of two value and does not have to + * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver + * will format its internal logging to match the reporting page size, possibly + * by replicating bits if the internal page size is lower than requested. + * + * The LOGGING_REPORT will only set bits in the bitmap and never clear or + * perform any initialization of the user provided bitmap. + * + * If any error is returned userspace should assume that the dirty log is + * corrupted. Error recovery is to consider all memory dirty and try to + * restart the dirty tracking, or to abort/restart the whole migration. + * + * If DMA logging is not enabled, an error will be returned. + * + */ +struct vfio_device_feature_dma_logging_report { + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 page_size; + __aligned_u64 bitmap; +}; + +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From 2d5de004e009add27db76c5cdc9f1f7f7dc087e7 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Mon, 8 Aug 2022 10:56:11 -0700 Subject: userfaultfd: add /dev/userfaultfd for fine grained access control Historically, it has been shown that intercepting kernel faults with userfaultfd (thereby forcing the kernel to wait for an arbitrary amount of time) can be exploited, or at least can make some kinds of exploits easier. So, in 37cd0575b8 "userfaultfd: add UFFD_USER_MODE_ONLY" we changed things so, in order for kernel faults to be handled by userfaultfd, either the process needs CAP_SYS_PTRACE, or this sysctl must be configured so that any unprivileged user can do it. In a typical implementation of a hypervisor with live migration (take QEMU/KVM as one such example), we do indeed need to be able to handle kernel faults. But, both options above are less than ideal: - Toggling the sysctl increases attack surface by allowing any unprivileged user to do it. - Granting the live migration process CAP_SYS_PTRACE gives it this ability, but *also* the ability to "observe and control the execution of another process [...], and examine and change [its] memory and registers" (from ptrace(2)). This isn't something we need or want to be able to do, so granting this permission violates the "principle of least privilege". This is all a long winded way to say: we want a more fine-grained way to grant access to userfaultfd, without granting other additional permissions at the same time. To achieve this, add a /dev/userfaultfd misc device. This device provides an alternative to the userfaultfd(2) syscall for the creation of new userfaultfds. The idea is, any userfaultfds created this way will be able to handle kernel faults, without the caller having any special capabilities. Access to this mechanism is instead restricted using e.g. standard filesystem permissions. [axelrasmussen@google.com: Handle misc_register() failure properly] Link: https://lkml.kernel.org/r/20220819205201.658693-3-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20220808175614.3885028-3-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Acked-by: Nadav Amit Acked-by: Peter Xu Acked-by: Mike Rapoport Cc: Al Viro Cc: Dave Hansen Cc: Dmitry V. Levin Cc: Gleb Fotengauer-Malinovskiy Cc: Hugh Dickins Cc: Jan Kara Cc: Jonathan Corbet Cc: Mel Gorman Cc: Mike Kravetz Cc: Shuah Khan Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zhang Yi Cc: Mike Rapoport Signed-off-by: Andrew Morton --- fs/userfaultfd.c | 71 +++++++++++++++++++++++++++++++--------- include/uapi/linux/userfaultfd.h | 4 +++ 2 files changed, 59 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 175de70e3adf..4de91ba9e85e 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -30,6 +30,7 @@ #include #include #include +#include int sysctl_unprivileged_userfaultfd __read_mostly; @@ -415,13 +416,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) if (ctx->features & UFFD_FEATURE_SIGBUS) goto out; - if ((vmf->flags & FAULT_FLAG_USER) == 0 && - ctx->flags & UFFD_USER_MODE_ONLY) { - printk_once(KERN_WARNING "uffd: Set unprivileged_userfaultfd " - "sysctl knob to 1 if kernel faults must be handled " - "without obtaining CAP_SYS_PTRACE capability\n"); + if (!(vmf->flags & FAULT_FLAG_USER) && (ctx->flags & UFFD_USER_MODE_ONLY)) goto out; - } /* * If it's already released don't get it. This avoids to loop @@ -2056,20 +2052,11 @@ static void init_once_userfaultfd_ctx(void *mem) seqcount_spinlock_init(&ctx->refile_seq, &ctx->fault_pending_wqh.lock); } -SYSCALL_DEFINE1(userfaultfd, int, flags) +static int new_userfaultfd(int flags) { struct userfaultfd_ctx *ctx; int fd; - if (!sysctl_unprivileged_userfaultfd && - (flags & UFFD_USER_MODE_ONLY) == 0 && - !capable(CAP_SYS_PTRACE)) { - printk_once(KERN_WARNING "uffd: Set unprivileged_userfaultfd " - "sysctl knob to 1 if kernel faults must be handled " - "without obtaining CAP_SYS_PTRACE capability\n"); - return -EPERM; - } - BUG_ON(!current->mm); /* Check the UFFD_* constants for consistency. */ @@ -2102,8 +2089,60 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) return fd; } +static inline bool userfaultfd_syscall_allowed(int flags) +{ + /* Userspace-only page faults are always allowed */ + if (flags & UFFD_USER_MODE_ONLY) + return true; + + /* + * The user is requesting a userfaultfd which can handle kernel faults. + * Privileged users are always allowed to do this. + */ + if (capable(CAP_SYS_PTRACE)) + return true; + + /* Otherwise, access to kernel fault handling is sysctl controlled. */ + return sysctl_unprivileged_userfaultfd; +} + +SYSCALL_DEFINE1(userfaultfd, int, flags) +{ + if (!userfaultfd_syscall_allowed(flags)) + return -EPERM; + + return new_userfaultfd(flags); +} + +static long userfaultfd_dev_ioctl(struct file *file, unsigned int cmd, unsigned long flags) +{ + if (cmd != USERFAULTFD_IOC_NEW) + return -EINVAL; + + return new_userfaultfd(flags); +} + +static const struct file_operations userfaultfd_dev_fops = { + .unlocked_ioctl = userfaultfd_dev_ioctl, + .compat_ioctl = userfaultfd_dev_ioctl, + .owner = THIS_MODULE, + .llseek = noop_llseek, +}; + +static struct miscdevice userfaultfd_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "userfaultfd", + .fops = &userfaultfd_dev_fops +}; + static int __init userfaultfd_init(void) { + int ret; + + ret = misc_register(&userfaultfd_misc); + if (ret) + return ret; + userfaultfd_ctx_cachep = kmem_cache_create("userfaultfd_ctx_cache", sizeof(struct userfaultfd_ctx), 0, diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 7d32b1e797fb..005e5e306266 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -12,6 +12,10 @@ #include +/* ioctls for /dev/userfaultfd */ +#define USERFAULTFD_IOC 0xAA +#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) + /* * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In -- cgit v1.2.3 From b4e12b2d70fd9eccdb3cef8015dc1788ca38e3fd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Sep 2022 14:41:04 -0700 Subject: perf: Kill __PERF_SAMPLE_CALLCHAIN_EARLY There's no in-tree user anymore. Let's get rid of it. Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220908214104.3851807-3-namhyung@kernel.org --- arch/x86/events/amd/ibs.c | 10 ---------- arch/x86/events/intel/core.c | 3 --- include/uapi/linux/perf_event.h | 2 -- 3 files changed, 15 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index dab094166693..ce5720bfb350 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -300,16 +300,6 @@ static int perf_ibs_init(struct perf_event *event) hwc->config_base = perf_ibs->msr; hwc->config = config; - /* - * rip recorded by IbsOpRip will not be consistent with rsp and rbp - * recorded as part of interrupt regs. Thus we need to use rip from - * interrupt regs while unwinding call stack. Setting _EARLY flag - * makes sure we unwind call-stack before perf sample rip is set to - * IbsOpRip. - */ - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) - event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; - return 0; } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7f4e7e6b45f0..b16c91ac9219 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3868,9 +3868,6 @@ static int intel_pmu_hw_config(struct perf_event *event) } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); - - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) - event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; } if (needs_branch_stack(event)) { diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index dca16582885f..e639c74cf5fb 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -164,8 +164,6 @@ enum perf_event_sample_format { PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ - - __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) -- cgit v1.2.3 From bcc57a48eaee63a71983996c4c9d89ce7cbf55d9 Mon Sep 17 00:00:00 2001 From: Andrea Merello Date: Wed, 7 Sep 2022 15:21:52 +0200 Subject: iio: add modifiers for linear acceleration Add IIO_MOD_LINEAR_X, IIO_MOD_LINEAR_Y and IIO_MOD_LINEAR_Z modifiers to te IIO core, which is preparatory for adding the Bosch BNO055 IMU driver. Bosch BNO055 IMU can report raw accelerations (among x, y and z axis) as well as the so called "linear accelerations" (again, among x, y and z axis) which is basically the acceleration after subtracting gravity and for which those new modifiers are for. Signed-off-by: Andrea Merello Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220907132205.28021-2-andrea.merello@iit.it Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 3 +++ include/uapi/linux/iio/types.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 583e0e5205a0..6f7b6fd0c6ef 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -134,6 +134,9 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_ETHANOL] = "ethanol", [IIO_MOD_H2] = "h2", [IIO_MOD_O2] = "o2", + [IIO_MOD_LINEAR_X] = "linear_x", + [IIO_MOD_LINEAR_Y] = "linear_y", + [IIO_MOD_LINEAR_Z] = "linear_z", }; /* relies on pairs of these shared then separate */ diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 913864221ac4..b7ba9861a24d 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -95,6 +95,9 @@ enum iio_modifier { IIO_MOD_ETHANOL, IIO_MOD_H2, IIO_MOD_O2, + IIO_MOD_LINEAR_X, + IIO_MOD_LINEAR_Y, + IIO_MOD_LINEAR_Z, }; enum iio_event_type { @@ -118,4 +121,3 @@ enum iio_event_direction { }; #endif /* _UAPI_IIO_TYPES_H_ */ - -- cgit v1.2.3 From dcedf14553810cd6bbf7227c995beb4548e0859d Mon Sep 17 00:00:00 2001 From: Andrea Merello Date: Wed, 7 Sep 2022 15:21:55 +0200 Subject: iio: add modifers for pitch, yaw, roll Add modifiers for reporting rotations as euler angles (i.e. yaw, pitch and roll). Signed-off-by: Andrea Merello Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220907132205.28021-5-andrea.merello@iit.it Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 3 +++ include/uapi/linux/iio/types.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 6f7b6fd0c6ef..8f12993f87be 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -137,6 +137,9 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_LINEAR_X] = "linear_x", [IIO_MOD_LINEAR_Y] = "linear_y", [IIO_MOD_LINEAR_Z] = "linear_z", + [IIO_MOD_PITCH] = "pitch", + [IIO_MOD_YAW] = "yaw", + [IIO_MOD_ROLL] = "roll", }; /* relies on pairs of these shared then separate */ diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index b7ba9861a24d..c79f2f046a0b 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -98,6 +98,9 @@ enum iio_modifier { IIO_MOD_LINEAR_X, IIO_MOD_LINEAR_Y, IIO_MOD_LINEAR_Z, + IIO_MOD_PITCH, + IIO_MOD_YAW, + IIO_MOD_ROLL, }; enum iio_event_type { -- cgit v1.2.3 From 7d37539037c2fca70346fbedc219f655253d5cff Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sat, 24 Sep 2022 07:00:00 +0200 Subject: fuse: implement ->tmpfile() This is basically equivalent to the FUSE_CREATE operation which creates and opens a regular file. Add a new FUSE_TMPFILE operation, otherwise just reuse the protocol and the code for FUSE_CREATE. Acked-by: Christian Brauner (Microsoft) Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 24 +++++++++++++++++++++--- fs/fuse/fuse_i.h | 3 +++ include/uapi/linux/fuse.h | 6 +++++- 3 files changed, 29 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b585b04e815e..bb97a384dc5d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -529,7 +529,7 @@ out_err: */ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned int flags, - umode_t mode) + umode_t mode, u32 opcode) { int err; struct inode *inode; @@ -573,7 +573,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; } - args.opcode = FUSE_CREATE; + args.opcode = opcode; args.nodeid = get_node_id(dir); args.in_numargs = 2; args.in_args[0].size = sizeof(inarg); @@ -676,7 +676,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, if (fc->no_create) goto mknod; - err = fuse_create_open(dir, entry, file, flags, mode); + err = fuse_create_open(dir, entry, file, flags, mode, FUSE_CREATE); if (err == -ENOSYS) { fc->no_create = 1; goto mknod; @@ -802,6 +802,23 @@ static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir, return fuse_mknod(&init_user_ns, dir, entry, mode, 0); } +static int fuse_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, + struct file *file, umode_t mode) +{ + struct fuse_conn *fc = get_fuse_conn(dir); + int err; + + if (fc->no_tmpfile) + return -EOPNOTSUPP; + + err = fuse_create_open(dir, file->f_path.dentry, file, file->f_flags, mode, FUSE_TMPFILE); + if (err == -ENOSYS) { + fc->no_tmpfile = 1; + err = -EOPNOTSUPP; + } + return err; +} + static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *entry, umode_t mode) { @@ -1913,6 +1930,7 @@ static const struct inode_operations fuse_dir_inode_operations = { .setattr = fuse_setattr, .create = fuse_create, .atomic_open = fuse_atomic_open, + .tmpfile = fuse_tmpfile, .mknod = fuse_mknod, .permission = fuse_permission, .getattr = fuse_getattr, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 488b460e046f..98a9cf531873 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -784,6 +784,9 @@ struct fuse_conn { /* Does the filesystem support per inode DAX? */ unsigned int inode_dax:1; + /* Is tmpfile not implemented by fs? */ + unsigned int no_tmpfile:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d6ccee961891..76ee8f9e024a 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -194,6 +194,9 @@ * - add FUSE_SECURITY_CTX init flag * - add security context to create, mkdir, symlink, and mknod requests * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX + * + * 7.37 + * - add FUSE_TMPFILE */ #ifndef _LINUX_FUSE_H @@ -229,7 +232,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 36 +#define FUSE_KERNEL_MINOR_VERSION 37 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -537,6 +540,7 @@ enum fuse_opcode { FUSE_SETUPMAPPING = 48, FUSE_REMOVEMAPPING = 49, FUSE_SYNCFS = 50, + FUSE_TMPFILE = 51, /* CUSE specific operations */ CUSE_INIT = 4096, -- cgit v1.2.3 From 1260cd04a601e0e02e09fa332111b8639611970d Mon Sep 17 00:00:00 2001 From: Nate Yocom Date: Wed, 28 Sep 2022 18:23:22 -0700 Subject: Input: add ABS_PROFILE to uapi and documentation Define new ABS_PROFILE axis for input devices which need it, e.g. X-Box Adaptive Controller and X-Box Elite 2. Signed-off-by: Nate Yocom Link: https://lore.kernel.org/r/20220908173930.28940-4-nate@yocom.org Signed-off-by: Dmitry Torokhov --- Documentation/input/event-codes.rst | 6 ++++++ Documentation/input/gamepad.rst | 6 ++++++ drivers/hid/hid-debug.c | 3 ++- include/uapi/linux/input-event-codes.h | 1 + 4 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/input/event-codes.rst b/Documentation/input/event-codes.rst index 8741d390b184..b4557462edd7 100644 --- a/Documentation/input/event-codes.rst +++ b/Documentation/input/event-codes.rst @@ -235,6 +235,12 @@ A few EV_ABS codes have special meanings: BTN_TOOL_ signals the type of tool that is currently detected by the hardware and is otherwise independent of ABS_DISTANCE and/or BTN_TOUCH. +* ABS_PROFILE: + + - Used to describe the state of a multi-value profile switch. An event is + emitted only when the selected profile changes, indicating the newly + selected profile value. + * ABS_MT_: - Used to describe multitouch input events. Please see diff --git a/Documentation/input/gamepad.rst b/Documentation/input/gamepad.rst index 4d5e7fb80a84..71019de46036 100644 --- a/Documentation/input/gamepad.rst +++ b/Documentation/input/gamepad.rst @@ -189,3 +189,9 @@ Gamepads report the following events: - Rumble: Rumble is advertised as FF_RUMBLE. + +- Profile: + + Some pads provide a multi-value profile selection switch. An example is the + XBox Adaptive and the XBox Elite 2 controllers. When the active profile is + switched, its newly selected value is emitted as an ABS_PROFILE event. diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 81e7e404a5fc..2ca6ab600bc9 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -1014,7 +1014,8 @@ static const char *absolutes[ABS_CNT] = { [ABS_HAT3Y] = "Hat 3Y", [ABS_PRESSURE] = "Pressure", [ABS_DISTANCE] = "Distance", [ABS_TILT_X] = "XTilt", [ABS_TILT_Y] = "YTilt", [ABS_TOOL_WIDTH] = "ToolWidth", - [ABS_VOLUME] = "Volume", [ABS_MISC] = "Misc", + [ABS_VOLUME] = "Volume", [ABS_PROFILE] = "Profile", + [ABS_MISC] = "Misc", [ABS_MT_TOUCH_MAJOR] = "MTMajor", [ABS_MT_TOUCH_MINOR] = "MTMinor", [ABS_MT_WIDTH_MAJOR] = "MTMajorW", diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index dff8e7f17074..7ad931a32970 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -862,6 +862,7 @@ #define ABS_TOOL_WIDTH 0x1c #define ABS_VOLUME 0x20 +#define ABS_PROFILE 0x21 #define ABS_MISC 0x28 -- cgit v1.2.3 From 17601bfed909fa080fcfd227b57da2bd4dc2d2a6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Sep 2022 15:51:16 +0100 Subject: KVM: Add KVM_CAP_DIRTY_LOG_RING_ACQ_REL capability and config option In order to differenciate between architectures that require no extra synchronisation when accessing the dirty ring and those who do, add a new capability (KVM_CAP_DIRTY_LOG_RING_ACQ_REL) that identify the latter sort. TSO architectures can obviously advertise both, while relaxed architectures must only advertise the ACQ_REL version. This requires some configuration symbol rejigging, with HAVE_KVM_DIRTY_RING being only indirectly selected by two top-level config symbols: - HAVE_KVM_DIRTY_RING_TSO for strongly ordered architectures (x86) - HAVE_KVM_DIRTY_RING_ACQ_REL for weakly ordered architectures (arm64) Suggested-by: Paolo Bonzini Signed-off-by: Marc Zyngier Reviewed-by: Gavin Shan Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/20220926145120.27974-3-maz@kernel.org --- arch/x86/kvm/Kconfig | 2 +- include/uapi/linux/kvm.h | 1 + virt/kvm/Kconfig | 14 ++++++++++++++ virt/kvm/kvm_main.c | 9 ++++++++- 4 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index e3cbd7706136..876748b236ff 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -28,7 +28,7 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_PFNCACHE select HAVE_KVM_IRQFD - select HAVE_KVM_DIRTY_RING + select HAVE_KVM_DIRTY_RING_TSO select IRQ_BYPASS_MANAGER select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_IRQ_ROUTING diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index eed0315a77a6..0d5d4419139a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1177,6 +1177,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 #define KVM_CAP_S390_ZPCI_OP 221 #define KVM_CAP_S390_CPU_TOPOLOGY 222 +#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index a8c5c9f06b3c..800f9470e36b 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -19,6 +19,20 @@ config HAVE_KVM_IRQ_ROUTING config HAVE_KVM_DIRTY_RING bool +# Only strongly ordered architectures can select this, as it doesn't +# put any explicit constraint on userspace ordering. They can also +# select the _ACQ_REL version. +config HAVE_KVM_DIRTY_RING_TSO + bool + select HAVE_KVM_DIRTY_RING + depends on X86 + +# Weakly ordered architectures can only select this, advertising +# to userspace the additional ordering requirements. +config HAVE_KVM_DIRTY_RING_ACQ_REL + bool + select HAVE_KVM_DIRTY_RING + config HAVE_KVM_EVENTFD bool select EVENTFD diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 584a5bab3af3..5b064dbadaf4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4475,7 +4475,13 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_NR_MEMSLOTS: return KVM_USER_MEM_SLOTS; case KVM_CAP_DIRTY_LOG_RING: -#ifdef CONFIG_HAVE_KVM_DIRTY_RING +#ifdef CONFIG_HAVE_KVM_DIRTY_RING_TSO + return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); +#else + return 0; +#endif + case KVM_CAP_DIRTY_LOG_RING_ACQ_REL: +#ifdef CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); #else return 0; @@ -4580,6 +4586,7 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, return 0; } case KVM_CAP_DIRTY_LOG_RING: + case KVM_CAP_DIRTY_LOG_RING_ACQ_REL: return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); default: return kvm_vm_ioctl_enable_cap(kvm, cap); -- cgit v1.2.3 From ee3e88dfec23153d0675b5d00522297b9adf657c Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:51 +0530 Subject: perf/mem: Introduce PERF_MEM_LVLNUM_{EXTN_MEM|IO} PERF_MEM_LVLNUM_EXTN_MEM which can be used to indicate accesses to extension memory like CXL etc. PERF_MEM_LVL_IO can be used for IO accesses but it can not distinguish between local and remote IO. Introduce new field PERF_MEM_LVLNUM_IO which can be clubbed with PERF_MEM_REMOTE_REMOTE to indicate Remote IO accesses. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-2-ravi.bangoria@amd.com --- include/uapi/linux/perf_event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e639c74cf5fb..4ae3c249f675 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1336,7 +1336,9 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0xa available */ +/* 5-0x8 available */ +#define PERF_MEM_LVLNUM_EXTN_MEM 0x09 /* Extension memory */ +#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ #define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -- cgit v1.2.3 From cfef80bad4cf79cdc964a53c98254dfa462be83f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:57 +0530 Subject: perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file PERF_MEM_SNOOPX_PEER is defined only in tools uapi header. Although it's used only by perf tool, not defining it in kernel header can create problems in future. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-8-ravi.bangoria@amd.com --- include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4ae3c249f675..85be78e0e7f6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1356,7 +1356,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOP_SHIFT 19 #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -/* 1 free */ +#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ #define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ -- cgit v1.2.3 From 650ae67bbf7ba5ac193f053969612fbb93247b64 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Tue, 27 Sep 2022 18:53:38 -0400 Subject: counter: Introduce the Signal polarity component The Signal polarity component represents the active level of a respective Signal. There are two possible states: positive (rising edge) and negative (falling edge); enum counter_signal_polarity represents these states. A convenience macro COUNTER_COMP_POLARITY() is provided for driver authors to declare a Signal polarity component. Cc: Julien Panis Link: https://lore.kernel.org/r/8f47d6e1db71a11bb1e2666f8e2a6e9d256d4131.1664204990.git.william.gray@linaro.org/ Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/r/b6e53438badcb6318997d13dd2fc052f97d808ac.1664318353.git.william.gray@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-bus-counter | 14 ++++++++++++++ drivers/counter/counter-chrdev.c | 1 + drivers/counter/counter-sysfs.c | 12 ++++++++++++ include/linux/counter.h | 10 ++++++++++ include/uapi/linux/counter.h | 6 ++++++ 5 files changed, 43 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/ABI/testing/sysfs-bus-counter b/Documentation/ABI/testing/sysfs-bus-counter index 06c2b3e27e0b..2a996deabe9e 100644 --- a/Documentation/ABI/testing/sysfs-bus-counter +++ b/Documentation/ABI/testing/sysfs-bus-counter @@ -217,6 +217,7 @@ What: /sys/bus/counter/devices/counterX/signalY/cable_fault_component_id What: /sys/bus/counter/devices/counterX/signalY/cable_fault_enable_component_id What: /sys/bus/counter/devices/counterX/signalY/filter_clock_prescaler_component_id What: /sys/bus/counter/devices/counterX/signalY/index_polarity_component_id +What: /sys/bus/counter/devices/counterX/signalY/polarity_component_id What: /sys/bus/counter/devices/counterX/signalY/synchronous_mode_component_id KernelVersion: 5.16 Contact: linux-iio@vger.kernel.org @@ -303,6 +304,19 @@ Description: Discrete set of available values for the respective Signal Y configuration are listed in this file. +What: /sys/bus/counter/devices/counterX/signalY/polarity +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Active level of Signal Y. The following polarity values are + available: + + positive: + Signal high state considered active level (rising edge). + + negative: + Signal low state considered active level (falling edge). + What: /sys/bus/counter/devices/counterX/signalY/name KernelVersion: 5.2 Contact: linux-iio@vger.kernel.org diff --git a/drivers/counter/counter-chrdev.c b/drivers/counter/counter-chrdev.c index 4e71a19d7e6a..120879ee2e87 100644 --- a/drivers/counter/counter-chrdev.c +++ b/drivers/counter/counter-chrdev.c @@ -487,6 +487,7 @@ static int counter_get_data(struct counter_device *const counter, case COUNTER_COMP_ENUM: case COUNTER_COMP_COUNT_DIRECTION: case COUNTER_COMP_COUNT_MODE: + case COUNTER_COMP_SIGNAL_POLARITY: switch (comp_node->component.scope) { case COUNTER_SCOPE_DEVICE: ret = comp->device_u32_read(counter, &value_u32); diff --git a/drivers/counter/counter-sysfs.c b/drivers/counter/counter-sysfs.c index 04eac41dad33..e5dd36e1a45f 100644 --- a/drivers/counter/counter-sysfs.c +++ b/drivers/counter/counter-sysfs.c @@ -91,6 +91,11 @@ static const char *const counter_count_mode_str[] = { [COUNTER_COUNT_MODE_MODULO_N] = "modulo-n" }; +static const char *const counter_signal_polarity_str[] = { + [COUNTER_SIGNAL_POLARITY_POSITIVE] = "positive", + [COUNTER_SIGNAL_POLARITY_NEGATIVE] = "negative" +}; + static ssize_t counter_comp_u8_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -201,6 +206,8 @@ static ssize_t counter_comp_u32_show(struct device *dev, return sysfs_emit(buf, "%s\n", counter_count_direction_str[data]); case COUNTER_COMP_COUNT_MODE: return sysfs_emit(buf, "%s\n", counter_count_mode_str[data]); + case COUNTER_COMP_SIGNAL_POLARITY: + return sysfs_emit(buf, "%s\n", counter_signal_polarity_str[data]); default: return sysfs_emit(buf, "%u\n", (unsigned int)data); } @@ -252,6 +259,10 @@ static ssize_t counter_comp_u32_store(struct device *dev, err = counter_find_enum(&data, avail->enums, avail->num_items, buf, counter_count_mode_str); break; + case COUNTER_COMP_SIGNAL_POLARITY: + err = counter_find_enum(&data, avail->enums, avail->num_items, + buf, counter_signal_polarity_str); + break; default: err = kstrtou32(buf, 0, &data); break; @@ -469,6 +480,7 @@ static int counter_attr_create(struct device *const dev, case COUNTER_COMP_ENUM: case COUNTER_COMP_COUNT_DIRECTION: case COUNTER_COMP_COUNT_MODE: + case COUNTER_COMP_SIGNAL_POLARITY: if (comp->device_u32_read) { dev_attr->attr.mode |= 0444; dev_attr->show = counter_comp_u32_show; diff --git a/include/linux/counter.h b/include/linux/counter.h index a81234bc8ea8..b3fb6b68881a 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -31,6 +31,7 @@ enum counter_comp_type { COUNTER_COMP_ENUM, COUNTER_COMP_COUNT_DIRECTION, COUNTER_COMP_COUNT_MODE, + COUNTER_COMP_SIGNAL_POLARITY, }; /** @@ -477,6 +478,15 @@ struct counter_available { #define COUNTER_COMP_FLOOR(_read, _write) \ COUNTER_COMP_COUNT_U64("floor", _read, _write) +#define COUNTER_COMP_POLARITY(_read, _write, _available) \ +{ \ + .type = COUNTER_COMP_SIGNAL_POLARITY, \ + .name = "polarity", \ + .signal_u32_read = (_read), \ + .signal_u32_write = (_write), \ + .priv = &(_available), \ +} + #define COUNTER_COMP_PRESET(_read, _write) \ COUNTER_COMP_COUNT_U64("preset", _read, _write) diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h index 96c5ffd368ad..e9610e1944dc 100644 --- a/include/uapi/linux/counter.h +++ b/include/uapi/linux/counter.h @@ -153,4 +153,10 @@ enum counter_synapse_action { COUNTER_SYNAPSE_ACTION_BOTH_EDGES, }; +/* Signal polarity values */ +enum counter_signal_polarity { + COUNTER_SIGNAL_POLARITY_POSITIVE, + COUNTER_SIGNAL_POLARITY_NEGATIVE, +}; + #endif /* _UAPI_COUNTER_H_ */ -- cgit v1.2.3 From 45d2918520b2d8e640e4fb3fbf664dfb823dc520 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Tue, 27 Sep 2022 18:53:40 -0400 Subject: counter: Introduce the Count capture component Some devices provide a latch function to save historic Count values. This patch standardizes exposure of such functionality as Count capture components. A COUNTER_COMP_CAPTURE macro is provided for driver authors to define a capture component. A new event COUNTER_EVENT_CAPTURE is introduced to represent Count value capture events. Cc: Julien Panis Link: https://lore.kernel.org/r/c239572ab4208d0d6728136e82a88ad464369a7a.1664204990.git.william.gray@linaro.org/ Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/r/3cebaa0b807a225eb277d771504fe6dba7269ffd.1664318353.git.william.gray@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-bus-counter | 7 +++++++ include/linux/counter.h | 3 +++ include/uapi/linux/counter.h | 2 ++ 3 files changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/ABI/testing/sysfs-bus-counter b/Documentation/ABI/testing/sysfs-bus-counter index 2a996deabe9e..3eb6b063970a 100644 --- a/Documentation/ABI/testing/sysfs-bus-counter +++ b/Documentation/ABI/testing/sysfs-bus-counter @@ -4,6 +4,12 @@ Contact: linux-iio@vger.kernel.org Description: Count data of Count Y represented as a string. +What: /sys/bus/counter/devices/counterX/countY/capture +KernelVersion: 6.1 +Contact: linux-iio@vger.kernel.org +Description: + Historical capture of the Count Y count data. + What: /sys/bus/counter/devices/counterX/countY/ceiling KernelVersion: 5.2 Contact: linux-iio@vger.kernel.org @@ -203,6 +209,7 @@ Description: both edges: Any state transition. +What: /sys/bus/counter/devices/counterX/countY/capture_component_id What: /sys/bus/counter/devices/counterX/countY/ceiling_component_id What: /sys/bus/counter/devices/counterX/countY/floor_component_id What: /sys/bus/counter/devices/counterX/countY/count_mode_component_id diff --git a/include/linux/counter.h b/include/linux/counter.h index b3fb6b68881a..e160197971dd 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -453,6 +453,9 @@ struct counter_available { .priv = &(_available), \ } +#define COUNTER_COMP_CAPTURE(_read, _write) \ + COUNTER_COMP_COUNT_U64("capture", _read, _write) + #define COUNTER_COMP_CEILING(_read, _write) \ COUNTER_COMP_COUNT_U64("ceiling", _read, _write) diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h index e9610e1944dc..8ab12d731e3b 100644 --- a/include/uapi/linux/counter.h +++ b/include/uapi/linux/counter.h @@ -63,6 +63,8 @@ enum counter_event_type { COUNTER_EVENT_INDEX, /* State of counter is changed */ COUNTER_EVENT_CHANGE_OF_STATE, + /* Count value captured */ + COUNTER_EVENT_CAPTURE, }; /** -- cgit v1.2.3 From 90fea5a800c3dd80fb8ad9a02929bcef5fde42b8 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 27 Sep 2022 15:48:08 +0800 Subject: vdpa: device feature provisioning This patch allows the device features to be provisioned through netlink. A new attribute is introduced to allow the userspace to pass a 64bit device features during device adding. This provides several advantages: - Allow to provision a subset of the features to ease the cross vendor live migration. - Better debug-ability for vDPA framework and parent. Reviewed-by: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20220927074810.28627-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 5 +++++ include/linux/vdpa.h | 1 + include/uapi/linux/vdpa.h | 2 ++ 3 files changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index c06c02704461..278e26bfa492 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -600,6 +600,11 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i } config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); } + if (nl_attrs[VDPA_ATTR_DEV_FEATURES]) { + config.device_features = + nla_get_u64(nl_attrs[VDPA_ATTR_DEV_FEATURES]); + config.mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES); + } /* Skip checking capability if user didn't prefer to configure any * device networking attributes. It is likely that user might have used diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index d282f464d2f1..6d0f5e4e82c2 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -104,6 +104,7 @@ struct vdpa_iova_range { }; struct vdpa_dev_set_config { + u64 device_features; struct { u8 mac[ETH_ALEN]; u16 mtu; diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 25c55cab3d7c..9dc855f37c59 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -52,6 +52,8 @@ enum vdpa_attr { VDPA_ATTR_DEV_VENDOR_ATTR_NAME, /* string */ VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, /* u64 */ + VDPA_ATTR_DEV_FEATURES, /* u64 */ + /* new attributes must be added above here */ VDPA_ATTR_MAX, }; -- cgit v1.2.3 From e60d64074214db7207fc13c25ee39d8d47cb4a34 Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Wed, 21 Sep 2022 11:27:29 +0300 Subject: virtio_blk: add SECURE ERASE command support Support for the VIRTIO_BLK_F_SECURE_ERASE VirtIO feature. A device that offers this feature can receive VIRTIO_BLK_T_SECURE_ERASE commands. A device which supports this feature has the following fields in the virtio config: - max_secure_erase_sectors - max_secure_erase_seg - secure_erase_sector_alignment max_secure_erase_sectors and secure_erase_sector_alignment are expressed in 512-byte units. Every secure erase command has the following fields: - sectors: The starting offset in 512-byte units. - num_sectors: The number of sectors. Signed-off-by: Alvaro Karsz Message-Id: <20220921082729.2516779-1-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- drivers/block/virtio_blk.c | 110 +++++++++++++++++++++++++++++++++------- include/uapi/linux/virtio_blk.h | 19 +++++++ 2 files changed, 111 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dd9a05174726..f03505a65d08 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -130,7 +130,7 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); } -static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) +static int virtblk_setup_discard_write_zeroes_erase(struct request *req, bool unmap) { unsigned short segments = blk_rq_nr_discard_segments(req); unsigned short n = 0; @@ -240,6 +240,9 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, type = VIRTIO_BLK_T_WRITE_ZEROES; unmap = !(req->cmd_flags & REQ_NOUNMAP); break; + case REQ_OP_SECURE_ERASE: + type = VIRTIO_BLK_T_SECURE_ERASE; + break; case REQ_OP_DRV_IN: type = VIRTIO_BLK_T_GET_ID; break; @@ -251,8 +254,9 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, vbr->out_hdr.type = cpu_to_virtio32(vdev, type); vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); - if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { - if (virtblk_setup_discard_write_zeroes(req, unmap)) + if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES || + type == VIRTIO_BLK_T_SECURE_ERASE) { + if (virtblk_setup_discard_write_zeroes_erase(req, unmap)) return BLK_STS_RESOURCE; } @@ -888,6 +892,8 @@ static int virtblk_probe(struct virtio_device *vdev) int err, index; u32 v, blk_size, max_size, sg_elems, opt_io_size; + u32 max_discard_segs = 0; + u32 discard_granularity = 0; u16 min_io_size; u8 physical_block_exp, alignment_offset; unsigned int queue_depth; @@ -1045,27 +1051,14 @@ static int virtblk_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { virtio_cread(vdev, struct virtio_blk_config, - discard_sector_alignment, &v); - if (v) - q->limits.discard_granularity = v << SECTOR_SHIFT; - else - q->limits.discard_granularity = blk_size; + discard_sector_alignment, &discard_granularity); virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors, &v); blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, - &v); - - /* - * max_discard_seg == 0 is out of spec but we always - * handled it. - */ - if (!v) - v = sg_elems; - blk_queue_max_discard_segments(q, - min(v, MAX_DISCARD_SEGMENTS)); + &max_discard_segs); } if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { @@ -1074,6 +1067,85 @@ static int virtblk_probe(struct virtio_device *vdev) blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); } + /* The discard and secure erase limits are combined since the Linux + * block layer uses the same limit for both commands. + * + * If both VIRTIO_BLK_F_SECURE_ERASE and VIRTIO_BLK_F_DISCARD features + * are negotiated, we will use the minimum between the limits. + * + * discard sector alignment is set to the minimum between discard_sector_alignment + * and secure_erase_sector_alignment. + * + * max discard sectors is set to the minimum between max_discard_seg and + * max_secure_erase_seg. + */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) { + + virtio_cread(vdev, struct virtio_blk_config, + secure_erase_sector_alignment, &v); + + /* secure_erase_sector_alignment should not be zero, the device should set a + * valid number of sectors. + */ + if (!v) { + dev_err(&vdev->dev, + "virtio_blk: secure_erase_sector_alignment can't be 0\n"); + err = -EINVAL; + goto out_cleanup_disk; + } + + discard_granularity = min_not_zero(discard_granularity, v); + + virtio_cread(vdev, struct virtio_blk_config, + max_secure_erase_sectors, &v); + + /* max_secure_erase_sectors should not be zero, the device should set a + * valid number of sectors. + */ + if (!v) { + dev_err(&vdev->dev, + "virtio_blk: max_secure_erase_sectors can't be 0\n"); + err = -EINVAL; + goto out_cleanup_disk; + } + + blk_queue_max_secure_erase_sectors(q, v); + + virtio_cread(vdev, struct virtio_blk_config, + max_secure_erase_seg, &v); + + /* max_secure_erase_seg should not be zero, the device should set a + * valid number of segments + */ + if (!v) { + dev_err(&vdev->dev, + "virtio_blk: max_secure_erase_seg can't be 0\n"); + err = -EINVAL; + goto out_cleanup_disk; + } + + max_discard_segs = min_not_zero(max_discard_segs, v); + } + + if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD) || + virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) { + /* max_discard_seg and discard_granularity will be 0 only + * if max_discard_seg and discard_sector_alignment fields in the virtio + * config are 0 and VIRTIO_BLK_F_SECURE_ERASE feature is not negotiated. + * In this case, we use default values. + */ + if (!max_discard_segs) + max_discard_segs = sg_elems; + + blk_queue_max_discard_segments(q, + min(max_discard_segs, MAX_DISCARD_SEGMENTS)); + + if (discard_granularity) + q->limits.discard_granularity = discard_granularity << SECTOR_SHIFT; + else + q->limits.discard_granularity = blk_size; + } + virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); @@ -1169,6 +1241,7 @@ static unsigned int features_legacy[] = { VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, + VIRTIO_BLK_F_SECURE_ERASE, } ; static unsigned int features[] = { @@ -1176,6 +1249,7 @@ static unsigned int features[] = { VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, + VIRTIO_BLK_F_SECURE_ERASE, }; static struct virtio_driver virtio_blk = { diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index d888f013d9ff..58e70b24b504 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -40,6 +40,7 @@ #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ +#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ /* Legacy feature bits */ #ifndef VIRTIO_BLK_NO_LEGACY @@ -121,6 +122,21 @@ struct virtio_blk_config { __u8 write_zeroes_may_unmap; __u8 unused1[3]; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */ + /* + * The maximum secure erase sectors (in 512-byte sectors) for + * one segment. + */ + __virtio32 max_secure_erase_sectors; + /* + * The maximum number of secure erase segments in a + * secure erase command. + */ + __virtio32 max_secure_erase_seg; + /* Secure erase commands must be aligned to this number of sectors. */ + __virtio32 secure_erase_sector_alignment; + } __attribute__((packed)); /* @@ -155,6 +171,9 @@ struct virtio_blk_config { /* Write zeroes command */ #define VIRTIO_BLK_T_WRITE_ZEROES 13 +/* Secure erase command */ +#define VIRTIO_BLK_T_SECURE_ERASE 14 + #ifndef VIRTIO_BLK_NO_LEGACY /* Barrier before this op. */ #define VIRTIO_BLK_T_BARRIER 0x80000000 -- cgit v1.2.3 From 228565100def593df0f26ee07d5fb810039454d5 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Thu, 29 Sep 2022 09:45:50 +0800 Subject: vDPA: allow userspace to query features of a vDPA device This commit adds a new vDPA netlink attribution VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES. Userspace can query features of vDPA devices through this new attr. This commit invokes vdpa_config_ops.get_config() rather than vdpa_get_config_unlocked() to read the device config spcae, so no races in vdpa_set_features_unlocked() Userspace tool iproute2 example: $ vdpa dev config show vdpa0 vdpa0: mac 00:e8:ca:11:be:05 link up link_announce false max_vq_pairs 4 mtu 1500 negotiated_features MRG_RXBUF CTRL_VQ MQ VERSION_1 ACCESS_PLATFORM dev_features MTU MAC MRG_RXBUF CTRL_VQ MQ ANY_LAYOUT VERSION_1 ACCESS_PLATFORM Signed-off-by: Zhu Lingshan Message-Id: <20220929014555.112323-2-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 16 +++++++++++----- include/uapi/linux/vdpa.h | 4 ++++ 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 278e26bfa492..d0a7b76d9163 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -820,10 +820,10 @@ static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev, static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) { struct virtio_net_config config = {}; - u64 features; + u64 features_device, features_driver; u16 val_u16; - vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config)); + vdev->config->get_config(vdev, 0, &config, sizeof(config)); if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac), config.mac)) @@ -837,12 +837,18 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16)) return -EMSGSIZE; - features = vdev->config->get_driver_features(vdev); - if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features, + features_driver = vdev->config->get_driver_features(vdev); + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features_driver, + VDPA_ATTR_PAD)) + return -EMSGSIZE; + + features_device = vdev->config->get_device_features(vdev); + + if (nla_put_u64_64bit(msg, VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, features_device, VDPA_ATTR_PAD)) return -EMSGSIZE; - return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config); + return vdpa_dev_net_mq_config_fill(vdev, msg, features_driver, &config); } static int diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 9dc855f37c59..9bd79235c875 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -46,6 +46,7 @@ enum vdpa_attr { VDPA_ATTR_DEV_NEGOTIATED_FEATURES, /* u64 */ VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, /* u32 */ + /* virtio features that are supported by the vDPA management device */ VDPA_ATTR_DEV_SUPPORTED_FEATURES, /* u64 */ VDPA_ATTR_DEV_QUEUE_INDEX, /* u32 */ @@ -54,6 +55,9 @@ enum vdpa_attr { VDPA_ATTR_DEV_FEATURES, /* u64 */ + /* virtio features that are supported by the vDPA device */ + VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, /* u64 */ + /* new attributes must be added above here */ VDPA_ATTR_MAX, }; -- cgit v1.2.3