From a11024457d348672b26b3d4581ed19c793399b48 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Tue, 6 Feb 2018 20:32:46 -0500
Subject: uapi: Fix type used in ioctl parameter structures

Use __u32 and __u64 instead of POSIX types that may not be defined
in user mode builds.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/linux/kfd_ioctl.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index f4cab5b3ba9a..111d73ba2d96 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -263,10 +263,10 @@ struct kfd_ioctl_get_tile_config_args {
 };
 
 struct kfd_ioctl_set_trap_handler_args {
-	uint64_t tba_addr;		/* to KFD */
-	uint64_t tma_addr;		/* to KFD */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u64 tba_addr;		/* to KFD */
+	__u64 tma_addr;		/* to KFD */
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 #define AMDKFD_IOCTL_BASE 'K'
-- 
cgit v1.2.3


From 2fe4c22c53fc2e3f35be2cd0033cb3d15ebd41b1 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Tue, 27 Feb 2018 08:03:56 -0500
Subject: media: rc: lirc does not use LIRC_CAN_SEND_SCANCODE feature

Since commit 02d742f4b209 ("media: lirc: lirc daemon fails to detect raw
IR device"), the feature LIRC_CAN_SEND_SCANCODE is no longer used as it
tripped up lircd. The ability to send scancodes for IR Tx is implied by
LIRC_CAN_SEND_PULSE (i.e. any device that can send can use IR Tx encoders).

So, remove LIRC_CAN_SEND_SCANCODE since it never used. This fixes:

Documentation/output/lirc.h.rst:6: WARNING: undefined label:
lirc-can-send-scancode (if the link has no caption the label must precede
a section header

As this flag was added for kernel 4.16, let's remove it, while not too
late.

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 include/uapi/linux/lirc.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h
index 4fe580d36e41..f5bf06ecd87d 100644
--- a/include/uapi/linux/lirc.h
+++ b/include/uapi/linux/lirc.h
@@ -54,7 +54,6 @@
 #define LIRC_CAN_SEND_RAW              LIRC_MODE2SEND(LIRC_MODE_RAW)
 #define LIRC_CAN_SEND_PULSE            LIRC_MODE2SEND(LIRC_MODE_PULSE)
 #define LIRC_CAN_SEND_MODE2            LIRC_MODE2SEND(LIRC_MODE_MODE2)
-#define LIRC_CAN_SEND_SCANCODE         LIRC_MODE2SEND(LIRC_MODE_SCANCODE)
 #define LIRC_CAN_SEND_LIRCCODE         LIRC_MODE2SEND(LIRC_MODE_LIRCCODE)
 
 #define LIRC_CAN_SEND_MASK             0x0000003f
-- 
cgit v1.2.3


From c7bcbfa4f8d1e0e1078adfe959d4b65542bccf66 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 15 Mar 2018 17:27:46 -0400
Subject: drm/amdkfd: Remove limit on number of GPUs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the number of GPUs is limited by aperture placement options
available on GFX7 and GFX8 hardware. This limitation is not necessary.
Scratch and LDS represent per-work-item and per-work-group storage
respectively. Different work-items and work-groups use the same virtual
address to access their own data. Work running on different GPUs is by
definition in different work-groups (different dispatches, in fact).
That means the same virtual addresses can be used for these apertures
on different GPUs.

Add a new AMDKFD_IOC_GET_PROCESS_APERTURES_NEW ioctl that removes the
artificial limitation on the number of GPUs that can be supported. The
new ioctl allows user mode to query the number of GPUs to allocate
enough memory for all GPUs to be reported.

This deprecates AMDKFD_IOC_GET_PROCESS_APERTURES.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c     | 94 ++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 22 +++----
 include/uapi/linux/kfd_ioctl.h               | 27 +++++++-
 3 files changed, 128 insertions(+), 15 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6fe24964540b..7d4009418ec3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -825,6 +825,97 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
 	return 0;
 }
 
+static int kfd_ioctl_get_process_apertures_new(struct file *filp,
+				struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_process_apertures_new_args *args = data;
+	struct kfd_process_device_apertures *pa;
+	struct kfd_process_device *pdd;
+	uint32_t nodes = 0;
+	int ret;
+
+	dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+
+	if (args->num_of_nodes == 0) {
+		/* Return number of nodes, so that user space can alloacate
+		 * sufficient memory
+		 */
+		mutex_lock(&p->mutex);
+
+		if (!kfd_has_process_device_data(p))
+			goto out_unlock;
+
+		/* Run over all pdd of the process */
+		pdd = kfd_get_first_process_device_data(p);
+		do {
+			args->num_of_nodes++;
+			pdd = kfd_get_next_process_device_data(p, pdd);
+		} while (pdd);
+
+		goto out_unlock;
+	}
+
+	/* Fill in process-aperture information for all available
+	 * nodes, but not more than args->num_of_nodes as that is
+	 * the amount of memory allocated by user
+	 */
+	pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
+				args->num_of_nodes), GFP_KERNEL);
+	if (!pa)
+		return -ENOMEM;
+
+	mutex_lock(&p->mutex);
+
+	if (!kfd_has_process_device_data(p)) {
+		args->num_of_nodes = 0;
+		kfree(pa);
+		goto out_unlock;
+	}
+
+	/* Run over all pdd of the process */
+	pdd = kfd_get_first_process_device_data(p);
+	do {
+		pa[nodes].gpu_id = pdd->dev->id;
+		pa[nodes].lds_base = pdd->lds_base;
+		pa[nodes].lds_limit = pdd->lds_limit;
+		pa[nodes].gpuvm_base = pdd->gpuvm_base;
+		pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
+		pa[nodes].scratch_base = pdd->scratch_base;
+		pa[nodes].scratch_limit = pdd->scratch_limit;
+
+		dev_dbg(kfd_device,
+			"gpu id %u\n", pdd->dev->id);
+		dev_dbg(kfd_device,
+			"lds_base %llX\n", pdd->lds_base);
+		dev_dbg(kfd_device,
+			"lds_limit %llX\n", pdd->lds_limit);
+		dev_dbg(kfd_device,
+			"gpuvm_base %llX\n", pdd->gpuvm_base);
+		dev_dbg(kfd_device,
+			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
+		dev_dbg(kfd_device,
+			"scratch_base %llX\n", pdd->scratch_base);
+		dev_dbg(kfd_device,
+			"scratch_limit %llX\n", pdd->scratch_limit);
+		nodes++;
+
+		pdd = kfd_get_next_process_device_data(p, pdd);
+	} while (pdd && (nodes < args->num_of_nodes));
+	mutex_unlock(&p->mutex);
+
+	args->num_of_nodes = nodes;
+	ret = copy_to_user(
+			(void __user *)args->kfd_process_device_apertures_ptr,
+			pa,
+			(nodes * sizeof(struct kfd_process_device_apertures)));
+	kfree(pa);
+	return ret ? -EFAULT : 0;
+
+out_unlock:
+	mutex_unlock(&p->mutex);
+	return 0;
+}
+
 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
 					void *data)
 {
@@ -1017,6 +1108,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
 			kfd_ioctl_set_trap_handler, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
+			kfd_ioctl_get_process_apertures_new, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 7377513050e6..a06b0100af96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -282,14 +282,14 @@
 	(((uint64_t)(base) & \
 		0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL)
 
-#define MAKE_SCRATCH_APP_BASE(gpu_num) \
-	(((uint64_t)(gpu_num) << 61) + 0x100000000L)
+#define MAKE_SCRATCH_APP_BASE() \
+	(((uint64_t)(0x1UL) << 61) + 0x100000000L)
 
 #define MAKE_SCRATCH_APP_LIMIT(base) \
 	(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
 
-#define MAKE_LDS_APP_BASE(gpu_num) \
-	(((uint64_t)(gpu_num) << 61) + 0x0)
+#define MAKE_LDS_APP_BASE() \
+	(((uint64_t)(0x1UL) << 61) + 0x0)
 #define MAKE_LDS_APP_LIMIT(base) \
 	(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
 
@@ -314,7 +314,7 @@ int kfd_init_apertures(struct kfd_process *process)
 			return -1;
 		}
 		/*
-		 * For 64 bit process aperture will be statically reserved in
+		 * For 64 bit process apertures will be statically reserved in
 		 * the x86_64 non canonical process address space
 		 * amdkfd doesn't currently support apertures for 32 bit process
 		 */
@@ -323,12 +323,11 @@ int kfd_init_apertures(struct kfd_process *process)
 			pdd->gpuvm_base = pdd->gpuvm_limit = 0;
 			pdd->scratch_base = pdd->scratch_limit = 0;
 		} else {
-			/*
-			 * node id couldn't be 0 - the three MSB bits of
-			 * aperture shoudn't be 0
+			/* Same LDS and scratch apertures can be used
+			 * on all GPUs. This allows using more dGPUs
+			 * than placement options for apertures.
 			 */
-			pdd->lds_base = MAKE_LDS_APP_BASE(id + 1);
-
+			pdd->lds_base = MAKE_LDS_APP_BASE();
 			pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
 
 			pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
@@ -336,8 +335,7 @@ int kfd_init_apertures(struct kfd_process *process)
 			pdd->gpuvm_limit =
 					MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
 
-			pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
-
+			pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
 			pdd->scratch_limit =
 				MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
 		}
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 111d73ba2d96..52014370e2e5 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -107,8 +107,6 @@ struct kfd_ioctl_get_clock_counters_args {
 	__u32 pad;
 };
 
-#define NUM_OF_SUPPORTED_GPUS 7
-
 struct kfd_process_device_apertures {
 	__u64 lds_base;		/* from KFD */
 	__u64 lds_limit;		/* from KFD */
@@ -120,6 +118,12 @@ struct kfd_process_device_apertures {
 	__u32 pad;
 };
 
+/*
+ * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use
+ * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an
+ * unlimited number of GPUs.
+ */
+#define NUM_OF_SUPPORTED_GPUS 7
 struct kfd_ioctl_get_process_apertures_args {
 	struct kfd_process_device_apertures
 			process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
@@ -129,6 +133,19 @@ struct kfd_ioctl_get_process_apertures_args {
 	__u32 pad;
 };
 
+struct kfd_ioctl_get_process_apertures_new_args {
+	/* User allocated. Pointer to struct kfd_process_device_apertures
+	 * filled in by Kernel
+	 */
+	__u64 kfd_process_device_apertures_ptr;
+	/* to KFD - indicates amount of memory present in
+	 *  kfd_process_device_apertures_ptr
+	 * from KFD - Number of entries filled by KFD.
+	 */
+	__u32 num_of_nodes;
+	__u32 pad;
+};
+
 #define MAX_ALLOWED_NUM_POINTS    100
 #define MAX_ALLOWED_AW_BUFF_SIZE 4096
 #define MAX_ALLOWED_WAC_BUFF_SIZE  128
@@ -332,7 +349,11 @@ struct kfd_ioctl_set_trap_handler_args {
 #define AMDKFD_IOC_SET_TRAP_HANDLER		\
 		AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
 
+#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW	\
+		AMDKFD_IOWR(0x14,		\
+			struct kfd_ioctl_get_process_apertures_new_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x14
+#define AMDKFD_COMMAND_END		0x15
 
 #endif
-- 
cgit v1.2.3


From 5ec7e02854b3b9b55936c3b44b8acfb85e333f49 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 15 Mar 2018 17:27:51 -0400
Subject: drm/amdkfd: Add ioctls for GPUVM memory management
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
* Fix error handling after kfd_bind_process_to_device in
  kfd_ioctl_map_memory_to_gpu
v3:
* Add ioctl to acquire VM from a DRM FD
v4:
* Return number of successful map/unmap operations in failure cases
* Facilitate partial retry after failed map/unmap
* Added comments with parameter descriptions to new APIs
* Defined AMDKFD_IOC_FREE_MEMORY_OF_GPU write-only

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c        | 377 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h           |   8 +
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h |   2 +
 include/uapi/linux/kfd_ioctl.h                  |  97 +++++-
 4 files changed, 483 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 7d4009418ec3..a563ff2ca7dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/err.h>
 #include <linux/fs.h>
+#include <linux/file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
@@ -1046,6 +1047,366 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
 	return 0;
 }
 
+static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
+				void *data)
+{
+	struct kfd_ioctl_acquire_vm_args *args = data;
+	struct kfd_process_device *pdd;
+	struct kfd_dev *dev;
+	struct file *drm_file;
+	int ret;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev)
+		return -EINVAL;
+
+	drm_file = fget(args->drm_fd);
+	if (!drm_file)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	if (pdd->drm_file) {
+		ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
+		goto err_unlock;
+	}
+
+	ret = kfd_process_device_init_vm(pdd, drm_file);
+	if (ret)
+		goto err_unlock;
+	/* On success, the PDD keeps the drm_file reference */
+	mutex_unlock(&p->mutex);
+
+	return 0;
+
+err_unlock:
+	mutex_unlock(&p->mutex);
+	fput(drm_file);
+	return ret;
+}
+
+bool kfd_dev_is_large_bar(struct kfd_dev *dev)
+{
+	struct kfd_local_mem_info mem_info;
+
+	if (dev->device_info->needs_iommu_device)
+		return false;
+
+	dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
+	if (mem_info.local_mem_size_private == 0 &&
+			mem_info.local_mem_size_public > 0)
+		return true;
+	return false;
+}
+
+static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
+	struct kfd_process_device *pdd;
+	void *mem;
+	struct kfd_dev *dev;
+	int idr_handle;
+	long err;
+	uint64_t offset = args->mmap_offset;
+	uint32_t flags = args->flags;
+
+	if (args->size == 0)
+		return -EINVAL;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev)
+		return -EINVAL;
+
+	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
+		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
+		!kfd_dev_is_large_bar(dev)) {
+		pr_err("Alloc host visible vram on small bar is not allowed\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto err_unlock;
+	}
+
+	err = dev->kfd2kgd->alloc_memory_of_gpu(
+		dev->kgd, args->va_addr, args->size,
+		pdd->vm, (struct kgd_mem **) &mem, &offset,
+		flags);
+
+	if (err)
+		goto err_unlock;
+
+	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
+	if (idr_handle < 0) {
+		err = -EFAULT;
+		goto err_free;
+	}
+
+	mutex_unlock(&p->mutex);
+
+	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
+	args->mmap_offset = offset;
+
+	return 0;
+
+err_free:
+	dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+err_unlock:
+	mutex_unlock(&p->mutex);
+	return err;
+}
+
+static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
+	struct kfd_process_device *pdd;
+	void *mem;
+	struct kfd_dev *dev;
+	int ret;
+
+	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	mem = kfd_process_device_translate_handle(
+		pdd, GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+
+	/* If freeing the buffer failed, leave the handle in place for
+	 * clean-up during process tear-down.
+	 */
+	if (!ret)
+		kfd_process_device_remove_obj_handle(
+			pdd, GET_IDR_HANDLE(args->handle));
+
+err_unlock:
+	mutex_unlock(&p->mutex);
+	return ret;
+}
+
+static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
+	struct kfd_process_device *pdd, *peer_pdd;
+	void *mem;
+	struct kfd_dev *dev, *peer;
+	long err = 0;
+	int i;
+	uint32_t *devices_arr = NULL;
+
+	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+	if (!dev)
+		return -EINVAL;
+
+	if (!args->n_devices) {
+		pr_debug("Device IDs array empty\n");
+		return -EINVAL;
+	}
+	if (args->n_success > args->n_devices) {
+		pr_debug("n_success exceeds n_devices\n");
+		return -EINVAL;
+	}
+
+	devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
+			      GFP_KERNEL);
+	if (!devices_arr)
+		return -ENOMEM;
+
+	err = copy_from_user(devices_arr,
+			     (void __user *)args->device_ids_array_ptr,
+			     args->n_devices * sizeof(*devices_arr));
+	if (err != 0) {
+		err = -EFAULT;
+		goto copy_from_user_failed;
+	}
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto bind_process_to_device_failed;
+	}
+
+	mem = kfd_process_device_translate_handle(pdd,
+						GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		err = -ENOMEM;
+		goto get_mem_obj_from_handle_failed;
+	}
+
+	for (i = args->n_success; i < args->n_devices; i++) {
+		peer = kfd_device_by_id(devices_arr[i]);
+		if (!peer) {
+			pr_debug("Getting device by id failed for 0x%x\n",
+				 devices_arr[i]);
+			err = -EINVAL;
+			goto get_mem_obj_from_handle_failed;
+		}
+
+		peer_pdd = kfd_bind_process_to_device(peer, p);
+		if (IS_ERR(peer_pdd)) {
+			err = PTR_ERR(peer_pdd);
+			goto get_mem_obj_from_handle_failed;
+		}
+		err = peer->kfd2kgd->map_memory_to_gpu(
+			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+		if (err) {
+			pr_err("Failed to map to gpu %d/%d\n",
+			       i, args->n_devices);
+			goto map_memory_to_gpu_failed;
+		}
+		args->n_success = i+1;
+	}
+
+	mutex_unlock(&p->mutex);
+
+	err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
+	if (err) {
+		pr_debug("Sync memory failed, wait interrupted by user signal\n");
+		goto sync_memory_failed;
+	}
+
+	/* Flush TLBs after waiting for the page table updates to complete */
+	for (i = 0; i < args->n_devices; i++) {
+		peer = kfd_device_by_id(devices_arr[i]);
+		if (WARN_ON_ONCE(!peer))
+			continue;
+		peer_pdd = kfd_get_process_device_data(peer, p);
+		if (WARN_ON_ONCE(!peer_pdd))
+			continue;
+		kfd_flush_tlb(peer_pdd);
+	}
+
+	kfree(devices_arr);
+
+	return err;
+
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+map_memory_to_gpu_failed:
+	mutex_unlock(&p->mutex);
+copy_from_user_failed:
+sync_memory_failed:
+	kfree(devices_arr);
+
+	return err;
+}
+
+static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
+	struct kfd_process_device *pdd, *peer_pdd;
+	void *mem;
+	struct kfd_dev *dev, *peer;
+	long err = 0;
+	uint32_t *devices_arr = NULL, i;
+
+	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+	if (!dev)
+		return -EINVAL;
+
+	if (!args->n_devices) {
+		pr_debug("Device IDs array empty\n");
+		return -EINVAL;
+	}
+	if (args->n_success > args->n_devices) {
+		pr_debug("n_success exceeds n_devices\n");
+		return -EINVAL;
+	}
+
+	devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
+			      GFP_KERNEL);
+	if (!devices_arr)
+		return -ENOMEM;
+
+	err = copy_from_user(devices_arr,
+			     (void __user *)args->device_ids_array_ptr,
+			     args->n_devices * sizeof(*devices_arr));
+	if (err != 0) {
+		err = -EFAULT;
+		goto copy_from_user_failed;
+	}
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		err = PTR_ERR(pdd);
+		goto bind_process_to_device_failed;
+	}
+
+	mem = kfd_process_device_translate_handle(pdd,
+						GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		err = -ENOMEM;
+		goto get_mem_obj_from_handle_failed;
+	}
+
+	for (i = args->n_success; i < args->n_devices; i++) {
+		peer = kfd_device_by_id(devices_arr[i]);
+		if (!peer) {
+			err = -EINVAL;
+			goto get_mem_obj_from_handle_failed;
+		}
+
+		peer_pdd = kfd_get_process_device_data(peer, p);
+		if (!peer_pdd) {
+			err = -ENODEV;
+			goto get_mem_obj_from_handle_failed;
+		}
+		err = dev->kfd2kgd->unmap_memory_to_gpu(
+			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+		if (err) {
+			pr_err("Failed to unmap from gpu %d/%d\n",
+			       i, args->n_devices);
+			goto unmap_memory_from_gpu_failed;
+		}
+		args->n_success = i+1;
+	}
+	kfree(devices_arr);
+
+	mutex_unlock(&p->mutex);
+
+	return 0;
+
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+unmap_memory_from_gpu_failed:
+	mutex_unlock(&p->mutex);
+copy_from_user_failed:
+	kfree(devices_arr);
+	return err;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
 			    .cmd_drv = 0, .name = #ioctl}
@@ -1111,6 +1472,22 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
 			kfd_ioctl_get_process_apertures_new, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
+			kfd_ioctl_acquire_vm, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
+			kfd_ioctl_alloc_memory_of_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
+			kfd_ioctl_free_memory_of_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
+			kfd_ioctl_map_memory_to_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
+			kfd_ioctl_unmap_memory_from_gpu, 0),
+
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index aaed005ce1f5..1542807373d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -509,6 +509,14 @@ struct qcm_process_device {
 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
 					       struct dma_fence *fence);
 
+/* 8 byte handle containing GPU ID in the most significant 4 bytes and
+ * idr_handle in the least significant 4 bytes
+ */
+#define MAKE_HANDLE(gpu_id, idr_handle) \
+	(((uint64_t)(gpu_id) << 32) + idr_handle)
+#define GET_GPU_ID(handle) (handle >> 32)
+#define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
+
 enum kfd_pdd_bound {
 	PDD_UNBOUND = 0,
 	PDD_BOUND,
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index b1f35c8be2cf..237289a72bb7 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -130,6 +130,7 @@ struct tile_config {
 
 /*
  * Allocation flag domains
+ * NOTE: This must match the corresponding definitions in kfd_ioctl.h.
  */
 #define ALLOC_MEM_FLAGS_VRAM		(1 << 0)
 #define ALLOC_MEM_FLAGS_GTT		(1 << 1)
@@ -138,6 +139,7 @@ struct tile_config {
 
 /*
  * Allocation flags attributes/access options.
+ * NOTE: This must match the corresponding definitions in kfd_ioctl.h.
  */
 #define ALLOC_MEM_FLAGS_WRITABLE	(1 << 31)
 #define ALLOC_MEM_FLAGS_EXECUTABLE	(1 << 30)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 52014370e2e5..b4f5073dbac2 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -286,6 +286,86 @@ struct kfd_ioctl_set_trap_handler_args {
 	__u32 pad;
 };
 
+struct kfd_ioctl_acquire_vm_args {
+	__u32 drm_fd;	/* to KFD */
+	__u32 gpu_id;	/* to KFD */
+};
+
+/* Allocation flags: memory types */
+#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM		(1 << 0)
+#define KFD_IOC_ALLOC_MEM_FLAGS_GTT		(1 << 1)
+#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR		(1 << 2)
+#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL	(1 << 3)
+/* Allocation flags: attributes/access options */
+#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE	(1 << 31)
+#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE	(1 << 30)
+#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC		(1 << 29)
+#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE	(1 << 28)
+#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM	(1 << 27)
+#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT	(1 << 26)
+
+/* Allocate memory for later SVM (shared virtual memory) mapping.
+ *
+ * @va_addr:     virtual address of the memory to be allocated
+ *               all later mappings on all GPUs will use this address
+ * @size:        size in bytes
+ * @handle:      buffer handle returned to user mode, used to refer to
+ *               this allocation for mapping, unmapping and freeing
+ * @mmap_offset: for CPU-mapping the allocation by mmapping a render node
+ *               for userptrs this is overloaded to specify the CPU address
+ * @gpu_id:      device identifier
+ * @flags:       memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above
+ */
+struct kfd_ioctl_alloc_memory_of_gpu_args {
+	__u64 va_addr;		/* to KFD */
+	__u64 size;		/* to KFD */
+	__u64 handle;		/* from KFD */
+	__u64 mmap_offset;	/* to KFD (userptr), from KFD (mmap offset) */
+	__u32 gpu_id;		/* to KFD */
+	__u32 flags;
+};
+
+/* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu
+ *
+ * @handle: memory handle returned by alloc
+ */
+struct kfd_ioctl_free_memory_of_gpu_args {
+	__u64 handle;		/* to KFD */
+};
+
+/* Map memory to one or more GPUs
+ *
+ * @handle:                memory handle returned by alloc
+ * @device_ids_array_ptr:  array of gpu_ids (__u32 per device)
+ * @n_devices:             number of devices in the array
+ * @n_success:             number of devices mapped successfully
+ *
+ * @n_success returns information to the caller how many devices from
+ * the start of the array have mapped the buffer successfully. It can
+ * be passed into a subsequent retry call to skip those devices. For
+ * the first call the caller should initialize it to 0.
+ *
+ * If the ioctl completes with return code 0 (success), n_success ==
+ * n_devices.
+ */
+struct kfd_ioctl_map_memory_to_gpu_args {
+	__u64 handle;			/* to KFD */
+	__u64 device_ids_array_ptr;	/* to KFD */
+	__u32 n_devices;		/* to KFD */
+	__u32 n_success;		/* to/from KFD */
+};
+
+/* Unmap memory from one or more GPUs
+ *
+ * same arguments as for mapping
+ */
+struct kfd_ioctl_unmap_memory_from_gpu_args {
+	__u64 handle;			/* to KFD */
+	__u64 device_ids_array_ptr;	/* to KFD */
+	__u32 n_devices;		/* to KFD */
+	__u32 n_success;		/* to/from KFD */
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -353,7 +433,22 @@ struct kfd_ioctl_set_trap_handler_args {
 		AMDKFD_IOWR(0x14,		\
 			struct kfd_ioctl_get_process_apertures_new_args)
 
+#define AMDKFD_IOC_ACQUIRE_VM			\
+		AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args)
+
+#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU		\
+		AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
+
+#define AMDKFD_IOC_FREE_MEMORY_OF_GPU		\
+		AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args)
+
+#define AMDKFD_IOC_MAP_MEMORY_TO_GPU		\
+		AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args)
+
+#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU	\
+		AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x15
+#define AMDKFD_COMMAND_END		0x1A
 
 #endif
-- 
cgit v1.2.3


From 4bbb3e0e8239f9079bf1fe20b3c0cb598714ae61 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Tue, 13 Mar 2018 14:51:27 +0900
Subject: net: Fix vlan untag for bridge and vlan_dev with reorder_hdr off

When we have a bridge with vlan_filtering on and a vlan device on top of
it, packets would be corrupted in skb_vlan_untag() called from
br_dev_xmit().

The problem sits in skb_reorder_vlan_header() used in skb_vlan_untag(),
which makes use of skb->mac_len. In this function mac_len is meant for
handling rx path with vlan devices with reorder_header disabled, but in
tx path mac_len is typically 0 and cannot be used, which is the problem
in this case.

The current code even does not properly handle rx path (skb_vlan_untag()
called from __netif_receive_skb_core()) with reorder_header off actually.

In rx path single tag case, it works as follows:

- Before skb_reorder_vlan_header()

 mac_header                                data
   v                                        v
   +-------------------+-------------+------+----
   |        ETH        |    VLAN     | ETH  |
   |       ADDRS       | TPID | TCI  | TYPE |
   +-------------------+-------------+------+----
   <-------- mac_len --------->
                       <------------->
                        to be removed

- After skb_reorder_vlan_header()

            mac_header                     data
                 v                          v
                 +-------------------+------+----
                 |        ETH        | ETH  |
                 |       ADDRS       | TYPE |
                 +-------------------+------+----
                 <-------- mac_len --------->

This is ok, but in rx double tag case, it corrupts packets:

- Before skb_reorder_vlan_header()

 mac_header                                              data
   v                                                      v
   +-------------------+-------------+-------------+------+----
   |        ETH        |    VLAN     |    VLAN     | ETH  |
   |       ADDRS       | TPID | TCI  | TPID | TCI  | TYPE |
   +-------------------+-------------+-------------+------+----
   <--------------- mac_len ---------------->
                                     <------------->
                                    should be removed
                       <--------------------------->
                         actually will be removed

- After skb_reorder_vlan_header()

            mac_header                                   data
                 v                                        v
                               +-------------------+------+----
                               |        ETH        | ETH  |
                               |       ADDRS       | TYPE |
                               +-------------------+------+----
                 <--------------- mac_len ---------------->

So, two of vlan tags are both removed while only inner one should be
removed and mac_header (and mac_len) is broken.

skb_vlan_untag() is meant for removing the vlan header at (skb->data - 2),
so use skb->data and skb->mac_header to calculate the right offset.

Reported-by: Brandon Carpenter <brandon.carpenter@cypherpath.com>
Fixes: a6e18ff11170 ("vlan: Fix untag operations of stacked vlans with REORDER_HEADER off")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 1 +
 net/core/skbuff.c             | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 8bbbcb5cd94b..820de5d222d2 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -30,6 +30,7 @@
  */
 
 #define ETH_ALEN	6		/* Octets in one ethernet addr	 */
+#define ETH_TLEN	2		/* Octets in ethernet type field */
 #define ETH_HLEN	14		/* Total octets in header.	 */
 #define ETH_ZLEN	60		/* Min. octets in frame sans FCS */
 #define ETH_DATA_LEN	1500		/* Max. octets in payload	 */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index baf990528943..b103f46ec512 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5020,13 +5020,16 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
 
 static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 {
+	int mac_len;
+
 	if (skb_cow(skb, skb_headroom(skb)) < 0) {
 		kfree_skb(skb);
 		return NULL;
 	}
 
-	memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
-		2 * ETH_ALEN);
+	mac_len = skb->data - skb_mac_header(skb);
+	memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
+		mac_len - VLAN_HLEN - ETH_TLEN);
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }
-- 
cgit v1.2.3


From a6618f4aedb2b60932d766bd82ae7ce866e842aa Mon Sep 17 00:00:00 2001
From: Kirill Marinushkin <k.marinushkin@gmail.com>
Date: Mon, 19 Mar 2018 07:11:08 +0100
Subject: ALSA: usb-audio: Fix parsing descriptor of UAC2 processing unit

Currently, the offsets in the UAC2 processing unit descriptor are
calculated incorrectly. It causes an issue when connecting the device which
provides such a feature:

~~~~
[84126.724420] usb 1-1.3.1: invalid Processing Unit descriptor (id 18)
~~~~

After this patch is applied, the UAC2 processing unit inits w/o this error.

Fixes: 23caaf19b11e ("ALSA: usb-mixer: Add support for Audio Class v2.0")
Signed-off-by: Kirill Marinushkin <k.marinushkin@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/uapi/linux/usb/audio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h
index 17a022c5b414..da3315ed1bcd 100644
--- a/include/uapi/linux/usb/audio.h
+++ b/include/uapi/linux/usb/audio.h
@@ -370,7 +370,7 @@ static inline __u8 uac_processing_unit_bControlSize(struct uac_processing_unit_d
 {
 	return (protocol == UAC_VERSION_1) ?
 		desc->baSourceID[desc->bNrInPins + 4] :
-		desc->baSourceID[desc->bNrInPins + 6];
+		2; /* in UAC2, this value is constant */
 }
 
 static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_descriptor *desc,
@@ -378,7 +378,7 @@ static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_de
 {
 	return (protocol == UAC_VERSION_1) ?
 		&desc->baSourceID[desc->bNrInPins + 5] :
-		&desc->baSourceID[desc->bNrInPins + 7];
+		&desc->baSourceID[desc->bNrInPins + 6];
 }
 
 static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_descriptor *desc,
-- 
cgit v1.2.3