From 65074d43fc77bcae32776724b7fa2696923c78e4 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 6 Dec 2017 14:45:13 -0800 Subject: perf/core: Prepare perf_event.h for new types: 'perf_kprobe' and 'perf_uprobe' Two new perf types, perf_kprobe and perf_uprobe, will be added to allow creating [k,u]probe with perf_event_open. These [k,u]probe are associated with the file decriptor created by perf_event_open(), thus are easy to clean when the file descriptor is destroyed. kprobe_func and uprobe_path are added to union config1 for pointers to function name for kprobe or binary path for uprobe. kprobe_addr and probe_offset are added to union config2 for kernel address (when kprobe_func is NULL), or [k,u]probe offset. Signed-off-by: Song Liu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Yonghong Song Reviewed-by: Josef Bacik Acked-by: Alexei Starovoitov Cc: Cc: Cc: Cc: Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171206224518.3598254-4-songliubraving@fb.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c77c9a2ebbbb..5d49cfc509e7 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -380,10 +380,14 @@ struct perf_event_attr { __u32 bp_type; union { __u64 bp_addr; + __u64 kprobe_func; /* for perf_kprobe */ + __u64 uprobe_path; /* for perf_uprobe */ __u64 config1; /* extension of config */ }; union { __u64 bp_len; + __u64 kprobe_addr; /* when kprobe_func == NULL */ + __u64 probe_offset; /* for perf_[k,u]probe */ __u64 config2; /* extension of config1 */ }; __u64 branch_sample_type; /* enum perf_branch_sample_type */ -- cgit v1.2.3 From a11024457d348672b26b3d4581ed19c793399b48 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 6 Feb 2018 20:32:46 -0500 Subject: uapi: Fix type used in ioctl parameter structures Use __u32 and __u64 instead of POSIX types that may not be defined in user mode builds. Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index f4cab5b3ba9a..111d73ba2d96 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -263,10 +263,10 @@ struct kfd_ioctl_get_tile_config_args { }; struct kfd_ioctl_set_trap_handler_args { - uint64_t tba_addr; /* to KFD */ - uint64_t tma_addr; /* to KFD */ - uint32_t gpu_id; /* to KFD */ - uint32_t pad; + __u64 tba_addr; /* to KFD */ + __u64 tma_addr; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; }; #define AMDKFD_IOCTL_BASE 'K' -- cgit v1.2.3 From 5f171577b4f35b44795a73bde8cf2c49b4073925 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 24 Oct 2017 16:52:32 +0100 Subject: Drop a bunch of metag references Now that arch/metag/ has been removed, drop a bunch of metag references in various codes across the whole tree: - VM_GROWSUP and __VM_ARCH_SPECIFIC_1. - MT_METAG_* ELF note types. - METAG Kconfig dependencies (FRAME_POINTER) and ranges (MAX_STACK_SIZE_MB). - metag cases in tools (checkstack.pl, recordmcount.c, perf). Signed-off-by: James Hogan Acked-by: Steven Rostedt (VMware) Acked-by: Peter Zijlstra (Intel) Reviewed-by: Guenter Roeck Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: linux-mm@kvack.org Cc: linux-metag@vger.kernel.org --- include/linux/cpuhotplug.h | 1 - include/linux/mm.h | 2 -- include/trace/events/mmflags.h | 2 +- include/uapi/linux/elf.h | 3 --- lib/Kconfig.debug | 2 +- mm/Kconfig | 7 +++---- scripts/checkstack.pl | 4 ---- scripts/recordmcount.c | 20 -------------------- tools/perf/perf-sys.h | 4 ---- 9 files changed, 5 insertions(+), 40 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 5172ad0daa7c..c7a950681f3a 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -108,7 +108,6 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CQM_STARTING, CPUHP_AP_PERF_X86_CSTATE_STARTING, CPUHP_AP_PERF_XTENSA_STARTING, - CPUHP_AP_PERF_METAG_STARTING, CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, CPUHP_AP_ARM_SDEI_STARTING, CPUHP_AP_ARM_VFP_STARTING, diff --git a/include/linux/mm.h b/include/linux/mm.h index ad06d42adb1a..ccac10682ce5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -241,8 +241,6 @@ extern unsigned int kobjsize(const void *objp); # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 -#elif defined(CONFIG_METAG) -# define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_IA64) # define VM_GROWSUP VM_ARCH_1 #elif !defined(CONFIG_MMU) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index dbe1bb058c09..a81cffb76d89 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -115,7 +115,7 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) #define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } #elif defined(CONFIG_PPC) #define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" } -#elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64) +#elif defined(CONFIG_PARISC) || defined(CONFIG_IA64) #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" } #elif !defined(CONFIG_MMU) #define __VM_ARCH_SPECIFIC_1 {VM_MAPPED_COPY,"mappedcopy" } diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 3bf73fb58045..e2535d6dcec7 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -420,9 +420,6 @@ typedef struct elf64_shdr { #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ #define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */ -#define NT_METAG_CBUF 0x500 /* Metag catch buffer registers */ -#define NT_METAG_RPIPE 0x501 /* Metag read pipeline state */ -#define NT_METAG_TLS 0x502 /* Metag TLS pointer */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ /* Note header in a PT_NOTE section */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6088408ef26c..d1c523e408e9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -356,7 +356,7 @@ config FRAME_POINTER bool "Compile the kernel with frame pointers" depends on DEBUG_KERNEL && \ (CRIS || M68K || FRV || UML || \ - SUPERH || BLACKFIN || MN10300 || METAG) || \ + SUPERH || BLACKFIN || MN10300) || \ ARCH_WANT_FRAME_POINTERS default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS help diff --git a/mm/Kconfig b/mm/Kconfig index c782e8fb7235..abefa573bcd8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -627,15 +627,14 @@ config GENERIC_EARLY_IOREMAP config MAX_STACK_SIZE_MB int "Maximum user stack size for 32-bit processes (MB)" default 80 - range 8 256 if METAG range 8 2048 depends on STACK_GROWSUP && (!64BIT || COMPAT) help This is the maximum stack size in Megabytes in the VM layout of 32-bit user processes when the stack grows upwards (currently only on parisc - and metag arch). The stack will be located at the highest memory - address minus the given value, unless the RLIMIT_STACK hard limit is - changed to a smaller value in which case that is used. + arch). The stack will be located at the highest memory address minus + the given value, unless the RLIMIT_STACK hard limit is changed to a + smaller value in which case that is used. A sane initial value is 80 MB. diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index cb993801e4b2..eeb9ac8dbcfb 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -64,10 +64,6 @@ my (@stack, $re, $dre, $x, $xs, $funcre); # 2b6c: 4e56 fb70 linkw %fp,#-1168 # 1df770: defc ffe4 addaw #-28,%sp $re = qr/.*(?:linkw %fp,|addaw )#-([0-9]{1,4})(?:,%sp)?$/o; - } elsif ($arch eq 'metag') { - #400026fc: 40 00 00 82 ADD A0StP,A0StP,#0x8 - $re = qr/.*ADD.*A0StP,A0StP,\#(0x$x{1,8})/o; - $funcre = qr/^$x* <[^\$](.*)>:$/; } elsif ($arch eq 'mips64') { #8800402c: 67bdfff0 daddiu sp,sp,-16 $re = qr/.*daddiu.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o; diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 16e086dcc567..8c9691c3329e 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -33,20 +33,6 @@ #include #include -/* - * glibc synced up and added the metag number but didn't add the relocations. - * Work around this in a crude manner for now. - */ -#ifndef EM_METAG -#define EM_METAG 174 -#endif -#ifndef R_METAG_ADDR32 -#define R_METAG_ADDR32 2 -#endif -#ifndef R_METAG_NONE -#define R_METAG_NONE 3 -#endif - #ifndef EM_AARCH64 #define EM_AARCH64 183 #define R_AARCH64_NONE 0 @@ -538,12 +524,6 @@ do_file(char const *const fname) gpfx = '_'; break; case EM_IA_64: reltype = R_IA64_IMM64; gpfx = '_'; break; - case EM_METAG: reltype = R_METAG_ADDR32; - altmcount = "_mcount_wrapper"; - rel_type_nop = R_METAG_NONE; - /* We happen to have the same requirement as MIPS */ - is_fake_mcount32 = MIPS32_is_fake_mcount; - break; case EM_MIPS: /* reltype: e_class */ gpfx = '_'; break; case EM_PPC: reltype = R_PPC_ADDR32; gpfx = '_'; break; case EM_PPC64: reltype = R_PPC64_ADDR64; gpfx = '_'; break; diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index 36673f98d66b..3eb7a39169f6 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -46,10 +46,6 @@ #define CPUINFO_PROC {"Processor"} #endif -#ifdef __metag__ -#define CPUINFO_PROC {"CPU"} -#endif - #ifdef __xtensa__ #define CPUINFO_PROC {"core ID"} #endif -- cgit v1.2.3 From 2fe4c22c53fc2e3f35be2cd0033cb3d15ebd41b1 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 27 Feb 2018 08:03:56 -0500 Subject: media: rc: lirc does not use LIRC_CAN_SEND_SCANCODE feature Since commit 02d742f4b209 ("media: lirc: lirc daemon fails to detect raw IR device"), the feature LIRC_CAN_SEND_SCANCODE is no longer used as it tripped up lircd. The ability to send scancodes for IR Tx is implied by LIRC_CAN_SEND_PULSE (i.e. any device that can send can use IR Tx encoders). So, remove LIRC_CAN_SEND_SCANCODE since it never used. This fixes: Documentation/output/lirc.h.rst:6: WARNING: undefined label: lirc-can-send-scancode (if the link has no caption the label must precede a section header As this flag was added for kernel 4.16, let's remove it, while not too late. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index 4fe580d36e41..f5bf06ecd87d 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -54,7 +54,6 @@ #define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW) #define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE) #define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2) -#define LIRC_CAN_SEND_SCANCODE LIRC_MODE2SEND(LIRC_MODE_SCANCODE) #define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE) #define LIRC_CAN_SEND_MASK 0x0000003f -- cgit v1.2.3 From 32ff77e8cc9e66cc4fb38098f64fd54cc8f54573 Mon Sep 17 00:00:00 2001 From: Milind Chabbi Date: Mon, 12 Mar 2018 14:45:47 +0100 Subject: perf/core: Implement fast breakpoint modification via _IOC_MODIFY_ATTRIBUTES Problem and motivation: Once a breakpoint perf event (PERF_TYPE_BREAKPOINT) is created, there is no flexibility to change the breakpoint type (bp_type), breakpoint address (bp_addr), or breakpoint length (bp_len). The only option is to close the perf event and configure a new breakpoint event. This inflexibility has a significant performance overhead. For example, sampling-based, lightweight performance profilers (and also concurrency bug detection tools), monitor different addresses for a short duration using PERF_TYPE_BREAKPOINT and change the address (bp_addr) to another address or change the kind of breakpoint (bp_type) from "write" to a "read" or vice-versa or change the length (bp_len) of the address being monitored. The cost of these modifications is prohibitive since it involves unmapping the circular buffer associated with the perf event, closing the perf event, opening another perf event and mmaping another circular buffer. Solution: The new ioctl flag for perf events, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, introduced in this patch takes a pointer to a struct perf_event_attr as an argument to update an old breakpoint event with new address, type, and size. This facility allows retaining a previous mmaped perf events ring buffer and avoids having to close and reopen another perf event. This patch supports only changing PERF_TYPE_BREAKPOINT event type; future implementations can extend this feature. The patch replicates some of its functionality of modify_user_hw_breakpoint() in kernel/events/hw_breakpoint.c. modify_user_hw_breakpoint cannot be called directly since perf_event_ctx_lock() is already held in _perf_ioctl(). Evidence: Experiments show that the baseline (not able to modify an already created breakpoint) costs an order of magnitude (~10x) more than the suggested optimization (having the ability to dynamically modifying a configured breakpoint via ioctl). When the breakpoints typically do not trap, the speedup due to the suggested optimization is ~10x; even when the breakpoints always trap, the speedup is ~4x due to the suggested optimization. Testing: tests posted at https://github.com/linux-contrib/perf_event_modify_bp demonstrate the performance significance of this patch. Tests also check the functional correctness of the patch. Signed-off-by: Milind Chabbi [ Using modify_user_hw_breakpoint_check function. ] [ Reformated PERF_EVENT_IOC_*, so the values are all in one column. ] Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Frederic Weisbecker Cc: Hari Bathini Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Michael Ellerman Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20180312134548.31532-8-jolsa@kernel.org Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 7 +++++ include/uapi/linux/perf_event.h | 23 +++++++++-------- kernel/events/core.c | 48 +++++++++++++++++++++++++++++++++++ kernel/events/hw_breakpoint.c | 2 +- tools/include/uapi/linux/perf_event.h | 23 +++++++++-------- 5 files changed, 80 insertions(+), 23 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index cf045885a499..6058c3844a76 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -53,6 +53,9 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, /* FIXME: only change from the attr, and don't unregister */ extern int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr); +extern int +modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, + bool check); /* * Kernel breakpoints are not associated with any particular thread. @@ -97,6 +100,10 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, static inline int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { return -ENOSYS; } +static inline int +modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, + bool check) { return -ENOSYS; } + static inline struct perf_event * register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_overflow_handler_t triggered, diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 6f873503552d..912b85b52344 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -448,17 +448,18 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) -#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/kernel/events/core.c b/kernel/events/core.c index ee145bdee6ed..3b4c7792a6ac 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2846,6 +2846,41 @@ int perf_event_refresh(struct perf_event *event, int refresh) } EXPORT_SYMBOL_GPL(perf_event_refresh); +static int perf_event_modify_breakpoint(struct perf_event *bp, + struct perf_event_attr *attr) +{ + int err; + + _perf_event_disable(bp); + + err = modify_user_hw_breakpoint_check(bp, attr, true); + if (err) { + if (!bp->attr.disabled) + _perf_event_enable(bp); + + return err; + } + + if (!attr->disabled) + _perf_event_enable(bp); + return 0; +} + +static int perf_event_modify_attr(struct perf_event *event, + struct perf_event_attr *attr) +{ + if (event->attr.type != attr->type) + return -EINVAL; + + switch (event->attr.type) { + case PERF_TYPE_BREAKPOINT: + return perf_event_modify_breakpoint(event, attr); + default: + /* Place holder for future additions. */ + return -EOPNOTSUPP; + } +} + static void ctx_sched_out(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, enum event_type_t event_type) @@ -4952,6 +4987,8 @@ static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event); static int perf_event_set_filter(struct perf_event *event, void __user *arg); static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd); +static int perf_copy_attr(struct perf_event_attr __user *uattr, + struct perf_event_attr *attr); static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) { @@ -5024,6 +5061,17 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon case PERF_EVENT_IOC_QUERY_BPF: return perf_event_query_prog_array(event, (void __user *)arg); + + case PERF_EVENT_IOC_MODIFY_ATTRIBUTES: { + struct perf_event_attr new_attr; + int err = perf_copy_attr((struct perf_event_attr __user *)arg, + &new_attr); + + if (err) + return err; + + return perf_event_modify_attr(event, &new_attr); + } default: return -ENOTTY; } diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 0c82663395f7..6253d5519cd8 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -456,7 +456,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); -static int +int modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, bool check) { diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 6f873503552d..912b85b52344 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -448,17 +448,18 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) -#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, -- cgit v1.2.3 From c7bcbfa4f8d1e0e1078adfe959d4b65542bccf66 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 15 Mar 2018 17:27:46 -0400 Subject: drm/amdkfd: Remove limit on number of GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the number of GPUs is limited by aperture placement options available on GFX7 and GFX8 hardware. This limitation is not necessary. Scratch and LDS represent per-work-item and per-work-group storage respectively. Different work-items and work-groups use the same virtual address to access their own data. Work running on different GPUs is by definition in different work-groups (different dispatches, in fact). That means the same virtual addresses can be used for these apertures on different GPUs. Add a new AMDKFD_IOC_GET_PROCESS_APERTURES_NEW ioctl that removes the artificial limitation on the number of GPUs that can be supported. The new ioctl allows user mode to query the number of GPUs to allocate enough memory for all GPUs to be reported. This deprecates AMDKFD_IOC_GET_PROCESS_APERTURES. Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 94 ++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 22 +++---- include/uapi/linux/kfd_ioctl.h | 27 +++++++- 3 files changed, 128 insertions(+), 15 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 6fe24964540b..7d4009418ec3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -825,6 +825,97 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, return 0; } +static int kfd_ioctl_get_process_apertures_new(struct file *filp, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_process_apertures_new_args *args = data; + struct kfd_process_device_apertures *pa; + struct kfd_process_device *pdd; + uint32_t nodes = 0; + int ret; + + dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); + + if (args->num_of_nodes == 0) { + /* Return number of nodes, so that user space can alloacate + * sufficient memory + */ + mutex_lock(&p->mutex); + + if (!kfd_has_process_device_data(p)) + goto out_unlock; + + /* Run over all pdd of the process */ + pdd = kfd_get_first_process_device_data(p); + do { + args->num_of_nodes++; + pdd = kfd_get_next_process_device_data(p, pdd); + } while (pdd); + + goto out_unlock; + } + + /* Fill in process-aperture information for all available + * nodes, but not more than args->num_of_nodes as that is + * the amount of memory allocated by user + */ + pa = kzalloc((sizeof(struct kfd_process_device_apertures) * + args->num_of_nodes), GFP_KERNEL); + if (!pa) + return -ENOMEM; + + mutex_lock(&p->mutex); + + if (!kfd_has_process_device_data(p)) { + args->num_of_nodes = 0; + kfree(pa); + goto out_unlock; + } + + /* Run over all pdd of the process */ + pdd = kfd_get_first_process_device_data(p); + do { + pa[nodes].gpu_id = pdd->dev->id; + pa[nodes].lds_base = pdd->lds_base; + pa[nodes].lds_limit = pdd->lds_limit; + pa[nodes].gpuvm_base = pdd->gpuvm_base; + pa[nodes].gpuvm_limit = pdd->gpuvm_limit; + pa[nodes].scratch_base = pdd->scratch_base; + pa[nodes].scratch_limit = pdd->scratch_limit; + + dev_dbg(kfd_device, + "gpu id %u\n", pdd->dev->id); + dev_dbg(kfd_device, + "lds_base %llX\n", pdd->lds_base); + dev_dbg(kfd_device, + "lds_limit %llX\n", pdd->lds_limit); + dev_dbg(kfd_device, + "gpuvm_base %llX\n", pdd->gpuvm_base); + dev_dbg(kfd_device, + "gpuvm_limit %llX\n", pdd->gpuvm_limit); + dev_dbg(kfd_device, + "scratch_base %llX\n", pdd->scratch_base); + dev_dbg(kfd_device, + "scratch_limit %llX\n", pdd->scratch_limit); + nodes++; + + pdd = kfd_get_next_process_device_data(p, pdd); + } while (pdd && (nodes < args->num_of_nodes)); + mutex_unlock(&p->mutex); + + args->num_of_nodes = nodes; + ret = copy_to_user( + (void __user *)args->kfd_process_device_apertures_ptr, + pa, + (nodes * sizeof(struct kfd_process_device_apertures))); + kfree(pa); + return ret ? -EFAULT : 0; + +out_unlock: + mutex_unlock(&p->mutex); + return 0; +} + static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, void *data) { @@ -1017,6 +1108,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, kfd_ioctl_set_trap_handler, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, + kfd_ioctl_get_process_apertures_new, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 7377513050e6..a06b0100af96 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -282,14 +282,14 @@ (((uint64_t)(base) & \ 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL) -#define MAKE_SCRATCH_APP_BASE(gpu_num) \ - (((uint64_t)(gpu_num) << 61) + 0x100000000L) +#define MAKE_SCRATCH_APP_BASE() \ + (((uint64_t)(0x1UL) << 61) + 0x100000000L) #define MAKE_SCRATCH_APP_LIMIT(base) \ (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) -#define MAKE_LDS_APP_BASE(gpu_num) \ - (((uint64_t)(gpu_num) << 61) + 0x0) +#define MAKE_LDS_APP_BASE() \ + (((uint64_t)(0x1UL) << 61) + 0x0) #define MAKE_LDS_APP_LIMIT(base) \ (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) @@ -314,7 +314,7 @@ int kfd_init_apertures(struct kfd_process *process) return -1; } /* - * For 64 bit process aperture will be statically reserved in + * For 64 bit process apertures will be statically reserved in * the x86_64 non canonical process address space * amdkfd doesn't currently support apertures for 32 bit process */ @@ -323,12 +323,11 @@ int kfd_init_apertures(struct kfd_process *process) pdd->gpuvm_base = pdd->gpuvm_limit = 0; pdd->scratch_base = pdd->scratch_limit = 0; } else { - /* - * node id couldn't be 0 - the three MSB bits of - * aperture shoudn't be 0 + /* Same LDS and scratch apertures can be used + * on all GPUs. This allows using more dGPUs + * than placement options for apertures. */ - pdd->lds_base = MAKE_LDS_APP_BASE(id + 1); - + pdd->lds_base = MAKE_LDS_APP_BASE(); pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); @@ -336,8 +335,7 @@ int kfd_init_apertures(struct kfd_process *process) pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base); - pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1); - + pdd->scratch_base = MAKE_SCRATCH_APP_BASE(); pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); } diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 111d73ba2d96..52014370e2e5 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -107,8 +107,6 @@ struct kfd_ioctl_get_clock_counters_args { __u32 pad; }; -#define NUM_OF_SUPPORTED_GPUS 7 - struct kfd_process_device_apertures { __u64 lds_base; /* from KFD */ __u64 lds_limit; /* from KFD */ @@ -120,6 +118,12 @@ struct kfd_process_device_apertures { __u32 pad; }; +/* + * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use + * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an + * unlimited number of GPUs. + */ +#define NUM_OF_SUPPORTED_GPUS 7 struct kfd_ioctl_get_process_apertures_args { struct kfd_process_device_apertures process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ @@ -129,6 +133,19 @@ struct kfd_ioctl_get_process_apertures_args { __u32 pad; }; +struct kfd_ioctl_get_process_apertures_new_args { + /* User allocated. Pointer to struct kfd_process_device_apertures + * filled in by Kernel + */ + __u64 kfd_process_device_apertures_ptr; + /* to KFD - indicates amount of memory present in + * kfd_process_device_apertures_ptr + * from KFD - Number of entries filled by KFD. + */ + __u32 num_of_nodes; + __u32 pad; +}; + #define MAX_ALLOWED_NUM_POINTS 100 #define MAX_ALLOWED_AW_BUFF_SIZE 4096 #define MAX_ALLOWED_WAC_BUFF_SIZE 128 @@ -332,7 +349,11 @@ struct kfd_ioctl_set_trap_handler_args { #define AMDKFD_IOC_SET_TRAP_HANDLER \ AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) +#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \ + AMDKFD_IOWR(0x14, \ + struct kfd_ioctl_get_process_apertures_new_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x14 +#define AMDKFD_COMMAND_END 0x15 #endif -- cgit v1.2.3 From 5ec7e02854b3b9b55936c3b44b8acfb85e333f49 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 15 Mar 2018 17:27:51 -0400 Subject: drm/amdkfd: Add ioctls for GPUVM memory management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: * Fix error handling after kfd_bind_process_to_device in kfd_ioctl_map_memory_to_gpu v3: * Add ioctl to acquire VM from a DRM FD v4: * Return number of successful map/unmap operations in failure cases * Facilitate partial retry after failed map/unmap * Added comments with parameter descriptions to new APIs * Defined AMDKFD_IOC_FREE_MEMORY_OF_GPU write-only Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 377 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 + drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 2 + include/uapi/linux/kfd_ioctl.h | 97 +++++- 4 files changed, 483 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 7d4009418ec3..a563ff2ca7dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1046,6 +1047,366 @@ static int kfd_ioctl_get_tile_config(struct file *filep, return 0; } +static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, + void *data) +{ + struct kfd_ioctl_acquire_vm_args *args = data; + struct kfd_process_device *pdd; + struct kfd_dev *dev; + struct file *drm_file; + int ret; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + drm_file = fget(args->drm_fd); + if (!drm_file) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + ret = -EINVAL; + goto err_unlock; + } + + if (pdd->drm_file) { + ret = pdd->drm_file == drm_file ? 0 : -EBUSY; + goto err_unlock; + } + + ret = kfd_process_device_init_vm(pdd, drm_file); + if (ret) + goto err_unlock; + /* On success, the PDD keeps the drm_file reference */ + mutex_unlock(&p->mutex); + + return 0; + +err_unlock: + mutex_unlock(&p->mutex); + fput(drm_file); + return ret; +} + +bool kfd_dev_is_large_bar(struct kfd_dev *dev) +{ + struct kfd_local_mem_info mem_info; + + if (dev->device_info->needs_iommu_device) + return false; + + dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info); + if (mem_info.local_mem_size_private == 0 && + mem_info.local_mem_size_public > 0) + return true; + return false; +} + +static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; + struct kfd_process_device *pdd; + void *mem; + struct kfd_dev *dev; + int idr_handle; + long err; + uint64_t offset = args->mmap_offset; + uint32_t flags = args->flags; + + if (args->size == 0) + return -EINVAL; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && + (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && + !kfd_dev_is_large_bar(dev)) { + pr_err("Alloc host visible vram on small bar is not allowed\n"); + return -EINVAL; + } + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto err_unlock; + } + + err = dev->kfd2kgd->alloc_memory_of_gpu( + dev->kgd, args->va_addr, args->size, + pdd->vm, (struct kgd_mem **) &mem, &offset, + flags); + + if (err) + goto err_unlock; + + idr_handle = kfd_process_device_create_obj_handle(pdd, mem); + if (idr_handle < 0) { + err = -EFAULT; + goto err_free; + } + + mutex_unlock(&p->mutex); + + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); + args->mmap_offset = offset; + + return 0; + +err_free: + dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); +err_unlock: + mutex_unlock(&p->mutex); + return err; +} + +static int kfd_ioctl_free_memory_of_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_free_memory_of_gpu_args *args = data; + struct kfd_process_device *pdd; + void *mem; + struct kfd_dev *dev; + int ret; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + ret = -EINVAL; + goto err_unlock; + } + + mem = kfd_process_device_translate_handle( + pdd, GET_IDR_HANDLE(args->handle)); + if (!mem) { + ret = -EINVAL; + goto err_unlock; + } + + ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); + + /* If freeing the buffer failed, leave the handle in place for + * clean-up during process tear-down. + */ + if (!ret) + kfd_process_device_remove_obj_handle( + pdd, GET_IDR_HANDLE(args->handle)); + +err_unlock: + mutex_unlock(&p->mutex); + return ret; +} + +static int kfd_ioctl_map_memory_to_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_map_memory_to_gpu_args *args = data; + struct kfd_process_device *pdd, *peer_pdd; + void *mem; + struct kfd_dev *dev, *peer; + long err = 0; + int i; + uint32_t *devices_arr = NULL; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + + if (!args->n_devices) { + pr_debug("Device IDs array empty\n"); + return -EINVAL; + } + if (args->n_success > args->n_devices) { + pr_debug("n_success exceeds n_devices\n"); + return -EINVAL; + } + + devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), + GFP_KERNEL); + if (!devices_arr) + return -ENOMEM; + + err = copy_from_user(devices_arr, + (void __user *)args->device_ids_array_ptr, + args->n_devices * sizeof(*devices_arr)); + if (err != 0) { + err = -EFAULT; + goto copy_from_user_failed; + } + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto bind_process_to_device_failed; + } + + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->handle)); + if (!mem) { + err = -ENOMEM; + goto get_mem_obj_from_handle_failed; + } + + for (i = args->n_success; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (!peer) { + pr_debug("Getting device by id failed for 0x%x\n", + devices_arr[i]); + err = -EINVAL; + goto get_mem_obj_from_handle_failed; + } + + peer_pdd = kfd_bind_process_to_device(peer, p); + if (IS_ERR(peer_pdd)) { + err = PTR_ERR(peer_pdd); + goto get_mem_obj_from_handle_failed; + } + err = peer->kfd2kgd->map_memory_to_gpu( + peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); + if (err) { + pr_err("Failed to map to gpu %d/%d\n", + i, args->n_devices); + goto map_memory_to_gpu_failed; + } + args->n_success = i+1; + } + + mutex_unlock(&p->mutex); + + err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); + if (err) { + pr_debug("Sync memory failed, wait interrupted by user signal\n"); + goto sync_memory_failed; + } + + /* Flush TLBs after waiting for the page table updates to complete */ + for (i = 0; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (WARN_ON_ONCE(!peer)) + continue; + peer_pdd = kfd_get_process_device_data(peer, p); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + kfd_flush_tlb(peer_pdd); + } + + kfree(devices_arr); + + return err; + +bind_process_to_device_failed: +get_mem_obj_from_handle_failed: +map_memory_to_gpu_failed: + mutex_unlock(&p->mutex); +copy_from_user_failed: +sync_memory_failed: + kfree(devices_arr); + + return err; +} + +static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; + struct kfd_process_device *pdd, *peer_pdd; + void *mem; + struct kfd_dev *dev, *peer; + long err = 0; + uint32_t *devices_arr = NULL, i; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + + if (!args->n_devices) { + pr_debug("Device IDs array empty\n"); + return -EINVAL; + } + if (args->n_success > args->n_devices) { + pr_debug("n_success exceeds n_devices\n"); + return -EINVAL; + } + + devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), + GFP_KERNEL); + if (!devices_arr) + return -ENOMEM; + + err = copy_from_user(devices_arr, + (void __user *)args->device_ids_array_ptr, + args->n_devices * sizeof(*devices_arr)); + if (err != 0) { + err = -EFAULT; + goto copy_from_user_failed; + } + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + err = PTR_ERR(pdd); + goto bind_process_to_device_failed; + } + + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->handle)); + if (!mem) { + err = -ENOMEM; + goto get_mem_obj_from_handle_failed; + } + + for (i = args->n_success; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (!peer) { + err = -EINVAL; + goto get_mem_obj_from_handle_failed; + } + + peer_pdd = kfd_get_process_device_data(peer, p); + if (!peer_pdd) { + err = -ENODEV; + goto get_mem_obj_from_handle_failed; + } + err = dev->kfd2kgd->unmap_memory_to_gpu( + peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); + if (err) { + pr_err("Failed to unmap from gpu %d/%d\n", + i, args->n_devices); + goto unmap_memory_from_gpu_failed; + } + args->n_success = i+1; + } + kfree(devices_arr); + + mutex_unlock(&p->mutex); + + return 0; + +bind_process_to_device_failed: +get_mem_obj_from_handle_failed: +unmap_memory_from_gpu_failed: + mutex_unlock(&p->mutex); +copy_from_user_failed: + kfree(devices_arr); + return err; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1111,6 +1472,22 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, kfd_ioctl_get_process_apertures_new, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, + kfd_ioctl_acquire_vm, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, + kfd_ioctl_alloc_memory_of_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, + kfd_ioctl_free_memory_of_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, + kfd_ioctl_map_memory_to_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, + kfd_ioctl_unmap_memory_from_gpu, 0), + }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index aaed005ce1f5..1542807373d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -509,6 +509,14 @@ struct qcm_process_device { int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct dma_fence *fence); +/* 8 byte handle containing GPU ID in the most significant 4 bytes and + * idr_handle in the least significant 4 bytes + */ +#define MAKE_HANDLE(gpu_id, idr_handle) \ + (((uint64_t)(gpu_id) << 32) + idr_handle) +#define GET_GPU_ID(handle) (handle >> 32) +#define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF) + enum kfd_pdd_bound { PDD_UNBOUND = 0, PDD_BOUND, diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index b1f35c8be2cf..237289a72bb7 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -130,6 +130,7 @@ struct tile_config { /* * Allocation flag domains + * NOTE: This must match the corresponding definitions in kfd_ioctl.h. */ #define ALLOC_MEM_FLAGS_VRAM (1 << 0) #define ALLOC_MEM_FLAGS_GTT (1 << 1) @@ -138,6 +139,7 @@ struct tile_config { /* * Allocation flags attributes/access options. + * NOTE: This must match the corresponding definitions in kfd_ioctl.h. */ #define ALLOC_MEM_FLAGS_WRITABLE (1 << 31) #define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 52014370e2e5..b4f5073dbac2 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -286,6 +286,86 @@ struct kfd_ioctl_set_trap_handler_args { __u32 pad; }; +struct kfd_ioctl_acquire_vm_args { + __u32 drm_fd; /* to KFD */ + __u32 gpu_id; /* to KFD */ +}; + +/* Allocation flags: memory types */ +#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0) +#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) +#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2) +#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) +/* Allocation flags: attributes/access options */ +#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) +#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) +#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29) +#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) +#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27) +#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26) + +/* Allocate memory for later SVM (shared virtual memory) mapping. + * + * @va_addr: virtual address of the memory to be allocated + * all later mappings on all GPUs will use this address + * @size: size in bytes + * @handle: buffer handle returned to user mode, used to refer to + * this allocation for mapping, unmapping and freeing + * @mmap_offset: for CPU-mapping the allocation by mmapping a render node + * for userptrs this is overloaded to specify the CPU address + * @gpu_id: device identifier + * @flags: memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above + */ +struct kfd_ioctl_alloc_memory_of_gpu_args { + __u64 va_addr; /* to KFD */ + __u64 size; /* to KFD */ + __u64 handle; /* from KFD */ + __u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */ + __u32 gpu_id; /* to KFD */ + __u32 flags; +}; + +/* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu + * + * @handle: memory handle returned by alloc + */ +struct kfd_ioctl_free_memory_of_gpu_args { + __u64 handle; /* to KFD */ +}; + +/* Map memory to one or more GPUs + * + * @handle: memory handle returned by alloc + * @device_ids_array_ptr: array of gpu_ids (__u32 per device) + * @n_devices: number of devices in the array + * @n_success: number of devices mapped successfully + * + * @n_success returns information to the caller how many devices from + * the start of the array have mapped the buffer successfully. It can + * be passed into a subsequent retry call to skip those devices. For + * the first call the caller should initialize it to 0. + * + * If the ioctl completes with return code 0 (success), n_success == + * n_devices. + */ +struct kfd_ioctl_map_memory_to_gpu_args { + __u64 handle; /* to KFD */ + __u64 device_ids_array_ptr; /* to KFD */ + __u32 n_devices; /* to KFD */ + __u32 n_success; /* to/from KFD */ +}; + +/* Unmap memory from one or more GPUs + * + * same arguments as for mapping + */ +struct kfd_ioctl_unmap_memory_from_gpu_args { + __u64 handle; /* to KFD */ + __u64 device_ids_array_ptr; /* to KFD */ + __u32 n_devices; /* to KFD */ + __u32 n_success; /* to/from KFD */ +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -353,7 +433,22 @@ struct kfd_ioctl_set_trap_handler_args { AMDKFD_IOWR(0x14, \ struct kfd_ioctl_get_process_apertures_new_args) +#define AMDKFD_IOC_ACQUIRE_VM \ + AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args) + +#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \ + AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args) + +#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \ + AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args) + +#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \ + AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args) + +#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \ + AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x15 +#define AMDKFD_COMMAND_END 0x1A #endif -- cgit v1.2.3 From 4bbb3e0e8239f9079bf1fe20b3c0cb598714ae61 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 13 Mar 2018 14:51:27 +0900 Subject: net: Fix vlan untag for bridge and vlan_dev with reorder_hdr off When we have a bridge with vlan_filtering on and a vlan device on top of it, packets would be corrupted in skb_vlan_untag() called from br_dev_xmit(). The problem sits in skb_reorder_vlan_header() used in skb_vlan_untag(), which makes use of skb->mac_len. In this function mac_len is meant for handling rx path with vlan devices with reorder_header disabled, but in tx path mac_len is typically 0 and cannot be used, which is the problem in this case. The current code even does not properly handle rx path (skb_vlan_untag() called from __netif_receive_skb_core()) with reorder_header off actually. In rx path single tag case, it works as follows: - Before skb_reorder_vlan_header() mac_header data v v +-------------------+-------------+------+---- | ETH | VLAN | ETH | | ADDRS | TPID | TCI | TYPE | +-------------------+-------------+------+---- <-------- mac_len ---------> <-------------> to be removed - After skb_reorder_vlan_header() mac_header data v v +-------------------+------+---- | ETH | ETH | | ADDRS | TYPE | +-------------------+------+---- <-------- mac_len ---------> This is ok, but in rx double tag case, it corrupts packets: - Before skb_reorder_vlan_header() mac_header data v v +-------------------+-------------+-------------+------+---- | ETH | VLAN | VLAN | ETH | | ADDRS | TPID | TCI | TPID | TCI | TYPE | +-------------------+-------------+-------------+------+---- <--------------- mac_len ----------------> <-------------> should be removed <---------------------------> actually will be removed - After skb_reorder_vlan_header() mac_header data v v +-------------------+------+---- | ETH | ETH | | ADDRS | TYPE | +-------------------+------+---- <--------------- mac_len ----------------> So, two of vlan tags are both removed while only inner one should be removed and mac_header (and mac_len) is broken. skb_vlan_untag() is meant for removing the vlan header at (skb->data - 2), so use skb->data and skb->mac_header to calculate the right offset. Reported-by: Brandon Carpenter Fixes: a6e18ff11170 ("vlan: Fix untag operations of stacked vlans with REORDER_HEADER off") Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + net/core/skbuff.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 8bbbcb5cd94b..820de5d222d2 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -30,6 +30,7 @@ */ #define ETH_ALEN 6 /* Octets in one ethernet addr */ +#define ETH_TLEN 2 /* Octets in ethernet type field */ #define ETH_HLEN 14 /* Total octets in header. */ #define ETH_ZLEN 60 /* Min. octets in frame sans FCS */ #define ETH_DATA_LEN 1500 /* Max. octets in payload */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index baf990528943..b103f46ec512 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5020,13 +5020,16 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { + int mac_len; + if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); return NULL; } - memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN, - 2 * ETH_ALEN); + mac_len = skb->data - skb_mac_header(skb); + memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), + mac_len - VLAN_HLEN - ETH_TLEN); skb->mac_header += VLAN_HLEN; return skb; } -- cgit v1.2.3 From a6618f4aedb2b60932d766bd82ae7ce866e842aa Mon Sep 17 00:00:00 2001 From: Kirill Marinushkin Date: Mon, 19 Mar 2018 07:11:08 +0100 Subject: ALSA: usb-audio: Fix parsing descriptor of UAC2 processing unit Currently, the offsets in the UAC2 processing unit descriptor are calculated incorrectly. It causes an issue when connecting the device which provides such a feature: ~~~~ [84126.724420] usb 1-1.3.1: invalid Processing Unit descriptor (id 18) ~~~~ After this patch is applied, the UAC2 processing unit inits w/o this error. Fixes: 23caaf19b11e ("ALSA: usb-mixer: Add support for Audio Class v2.0") Signed-off-by: Kirill Marinushkin Cc: Signed-off-by: Takashi Iwai --- include/uapi/linux/usb/audio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h index 17a022c5b414..da3315ed1bcd 100644 --- a/include/uapi/linux/usb/audio.h +++ b/include/uapi/linux/usb/audio.h @@ -370,7 +370,7 @@ static inline __u8 uac_processing_unit_bControlSize(struct uac_processing_unit_d { return (protocol == UAC_VERSION_1) ? desc->baSourceID[desc->bNrInPins + 4] : - desc->baSourceID[desc->bNrInPins + 6]; + 2; /* in UAC2, this value is constant */ } static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_descriptor *desc, @@ -378,7 +378,7 @@ static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_de { return (protocol == UAC_VERSION_1) ? &desc->baSourceID[desc->bNrInPins + 5] : - &desc->baSourceID[desc->bNrInPins + 7]; + &desc->baSourceID[desc->bNrInPins + 6]; } static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_descriptor *desc, -- cgit v1.2.3