From e1aaf311dbe82221910cc0e0809c988de210cc3c Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Fri, 5 Aug 2016 10:39:34 -0300 Subject: dma-buf/fence-array: add fence_is_array() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add helper to check if fence is array. v2: Comments from Chris Wilson - remove ternary if from ops comparison - add EXPORT_SYMBOL(fence_array_ops) Cc: Chris Wilson Cc: Christian König Signed-off-by: Gustavo Padovan Reviewed-by: Chris Wilson Reviewed-by: Christian König Signed-off-by: Sumit Semwal --- include/linux/fence-array.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fence-array.h b/include/linux/fence-array.h index 86baaa45567c..a44794e508df 100644 --- a/include/linux/fence-array.h +++ b/include/linux/fence-array.h @@ -51,6 +51,16 @@ struct fence_array { extern const struct fence_ops fence_array_ops; +/** + * fence_is_array - check if a fence is from the array subsclass + * + * Return true if it is a fence_array and false otherwise. + */ +static inline bool fence_is_array(struct fence *fence) +{ + return fence->ops == &fence_array_ops; +} + /** * to_fence_array - cast a fence to a fence_array * @fence: fence to cast to a fence_array -- cgit v1.2.3 From a02b9dc90d844cc7df7b63264e7920cc425052d9 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Fri, 5 Aug 2016 10:39:35 -0300 Subject: dma-buf/sync_file: refactor fence storage in struct sync_file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create sync_file->fence to abstract the type of fence we are using for each sync_file. If only one fence is present we use a normal struct fence but if there is more fences to be added to the sync_file a fence_array is created. This change cleans up sync_file a bit. We don't need to have sync_file_cb array anymore. Instead, as we always have one fence, only one fence callback is registered per sync_file. v2: Comments from Chris Wilson and Christian König - Not using fence_ops anymore - fence_is_array() was created to differentiate fence from fence_array - fence_array_teardown() is now exported and used under fence_is_array() - struct sync_file lost num_fences member v3: Comments from Chris Wilson and Christian König - struct sync_file lost status member in favor of fence_is_signaled() - drop use of fence_array_teardown() - use sizeof(*fence) to allocate only an array on fence pointers v4: Comments from Chris Wilson - use sizeof(*fence) to reallocate array - fix typo in comments - protect num_fences sum against overflows - use array->base instead of casting the to struct fence v5: fixes checkpatch warnings v6: fix case where all fences are signaled. Signed-off-by: Gustavo Padovan Reviewed-by: Chris Wilson Acked-by: Christian König Acked-by: Greg Kroah-Hartman Signed-off-by: Sumit Semwal --- drivers/dma-buf/sync_file.c | 174 +++++++++++++++++++++++------------ drivers/staging/android/sync_debug.c | 12 ++- include/linux/sync_file.h | 17 +--- 3 files changed, 129 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 9aaa608dfe01..ac9c250af302 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -28,11 +28,11 @@ static const struct file_operations sync_file_fops; -static struct sync_file *sync_file_alloc(int size) +static struct sync_file *sync_file_alloc(void) { struct sync_file *sync_file; - sync_file = kzalloc(size, GFP_KERNEL); + sync_file = kzalloc(sizeof(*sync_file), GFP_KERNEL); if (!sync_file) return NULL; @@ -45,6 +45,8 @@ static struct sync_file *sync_file_alloc(int size) init_waitqueue_head(&sync_file->wq); + INIT_LIST_HEAD(&sync_file->cb.node); + return sync_file; err: @@ -54,14 +56,11 @@ err: static void fence_check_cb_func(struct fence *f, struct fence_cb *cb) { - struct sync_file_cb *check; struct sync_file *sync_file; - check = container_of(cb, struct sync_file_cb, cb); - sync_file = check->sync_file; + sync_file = container_of(cb, struct sync_file, cb); - if (atomic_dec_and_test(&sync_file->status)) - wake_up_all(&sync_file->wq); + wake_up_all(&sync_file->wq); } /** @@ -76,22 +75,18 @@ struct sync_file *sync_file_create(struct fence *fence) { struct sync_file *sync_file; - sync_file = sync_file_alloc(offsetof(struct sync_file, cbs[1])); + sync_file = sync_file_alloc(); if (!sync_file) return NULL; - sync_file->num_fences = 1; - atomic_set(&sync_file->status, 1); + sync_file->fence = fence; + snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d", fence->ops->get_driver_name(fence), fence->ops->get_timeline_name(fence), fence->context, fence->seqno); - sync_file->cbs[0].fence = fence; - sync_file->cbs[0].sync_file = sync_file; - if (fence_add_callback(fence, &sync_file->cbs[0].cb, - fence_check_cb_func)) - atomic_dec(&sync_file->status); + fence_add_callback(fence, &sync_file->cb, fence_check_cb_func); return sync_file; } @@ -121,14 +116,49 @@ err: return NULL; } -static void sync_file_add_pt(struct sync_file *sync_file, int *i, - struct fence *fence) +static int sync_file_set_fence(struct sync_file *sync_file, + struct fence **fences, int num_fences) { - sync_file->cbs[*i].fence = fence; - sync_file->cbs[*i].sync_file = sync_file; + struct fence_array *array; + + /* + * The reference for the fences in the new sync_file and held + * in add_fence() during the merge procedure, so for num_fences == 1 + * we already own a new reference to the fence. For num_fence > 1 + * we own the reference of the fence_array creation. + */ + if (num_fences == 1) { + sync_file->fence = fences[0]; + } else { + array = fence_array_create(num_fences, fences, + fence_context_alloc(1), 1, false); + if (!array) + return -ENOMEM; + + sync_file->fence = &array->base; + } - if (!fence_add_callback(fence, &sync_file->cbs[*i].cb, - fence_check_cb_func)) { + return 0; +} + +static struct fence **get_fences(struct sync_file *sync_file, int *num_fences) +{ + if (fence_is_array(sync_file->fence)) { + struct fence_array *array = to_fence_array(sync_file->fence); + + *num_fences = array->num_fences; + return array->fences; + } + + *num_fences = 1; + return &sync_file->fence; +} + +static void add_fence(struct fence **fences, int *i, struct fence *fence) +{ + fences[*i] = fence; + + if (!fence_is_signaled(fence)) { fence_get(fence); (*i)++; } @@ -147,16 +177,24 @@ static void sync_file_add_pt(struct sync_file *sync_file, int *i, static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { - int num_fences = a->num_fences + b->num_fences; struct sync_file *sync_file; - int i, i_a, i_b; - unsigned long size = offsetof(struct sync_file, cbs[num_fences]); + struct fence **fences, **nfences, **a_fences, **b_fences; + int i, i_a, i_b, num_fences, a_num_fences, b_num_fences; - sync_file = sync_file_alloc(size); + sync_file = sync_file_alloc(); if (!sync_file) return NULL; - atomic_set(&sync_file->status, num_fences); + a_fences = get_fences(a, &a_num_fences); + b_fences = get_fences(b, &b_num_fences); + if (a_num_fences > INT_MAX - b_num_fences) + return NULL; + + num_fences = a_num_fences + b_num_fences; + + fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); + if (!fences) + goto err; /* * Assume sync_file a and b are both ordered and have no @@ -165,55 +203,73 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, * If a sync_file can only be created with sync_file_merge * and sync_file_create, this is a reasonable assumption. */ - for (i = i_a = i_b = 0; i_a < a->num_fences && i_b < b->num_fences; ) { - struct fence *pt_a = a->cbs[i_a].fence; - struct fence *pt_b = b->cbs[i_b].fence; + for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { + struct fence *pt_a = a_fences[i_a]; + struct fence *pt_b = b_fences[i_b]; if (pt_a->context < pt_b->context) { - sync_file_add_pt(sync_file, &i, pt_a); + add_fence(fences, &i, pt_a); i_a++; } else if (pt_a->context > pt_b->context) { - sync_file_add_pt(sync_file, &i, pt_b); + add_fence(fences, &i, pt_b); i_b++; } else { if (pt_a->seqno - pt_b->seqno <= INT_MAX) - sync_file_add_pt(sync_file, &i, pt_a); + add_fence(fences, &i, pt_a); else - sync_file_add_pt(sync_file, &i, pt_b); + add_fence(fences, &i, pt_b); i_a++; i_b++; } } - for (; i_a < a->num_fences; i_a++) - sync_file_add_pt(sync_file, &i, a->cbs[i_a].fence); + for (; i_a < a_num_fences; i_a++) + add_fence(fences, &i, a_fences[i_a]); + + for (; i_b < b_num_fences; i_b++) + add_fence(fences, &i, b_fences[i_b]); + + if (i == 0) { + add_fence(fences, &i, a_fences[0]); + i++; + } - for (; i_b < b->num_fences; i_b++) - sync_file_add_pt(sync_file, &i, b->cbs[i_b].fence); + if (num_fences > i) { + nfences = krealloc(fences, i * sizeof(*fences), + GFP_KERNEL); + if (!nfences) + goto err; + + fences = nfences; + } + + if (sync_file_set_fence(sync_file, fences, i) < 0) { + kfree(fences); + goto err; + } - if (num_fences > i) - atomic_sub(num_fences - i, &sync_file->status); - sync_file->num_fences = i; + fence_add_callback(sync_file->fence, &sync_file->cb, + fence_check_cb_func); strlcpy(sync_file->name, name, sizeof(sync_file->name)); return sync_file; + +err: + fput(sync_file->file); + return NULL; + } static void sync_file_free(struct kref *kref) { struct sync_file *sync_file = container_of(kref, struct sync_file, kref); - int i; - - for (i = 0; i < sync_file->num_fences; ++i) { - fence_remove_callback(sync_file->cbs[i].fence, - &sync_file->cbs[i].cb); - fence_put(sync_file->cbs[i].fence); - } + fence_remove_callback(sync_file->fence, &sync_file->cb); + fence_put(sync_file->fence); kfree(sync_file); } @@ -232,9 +288,9 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait) poll_wait(file, &sync_file->wq, wait); - status = atomic_read(&sync_file->status); + status = fence_is_signaled(sync_file->fence); - if (!status) + if (status) return POLLIN; if (status < 0) return POLLERR; @@ -315,8 +371,9 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, { struct sync_file_info info; struct sync_fence_info *fence_info = NULL; + struct fence **fences; __u32 size; - int ret, i; + int num_fences, ret, i; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) return -EFAULT; @@ -324,6 +381,8 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, if (info.flags || info.pad) return -EINVAL; + fences = get_fences(sync_file, &num_fences); + /* * Passing num_fences = 0 means that userspace doesn't want to * retrieve any sync_fence_info. If num_fences = 0 we skip filling @@ -333,16 +392,16 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, if (!info.num_fences) goto no_fences; - if (info.num_fences < sync_file->num_fences) + if (info.num_fences < num_fences) return -EINVAL; - size = sync_file->num_fences * sizeof(*fence_info); + size = num_fences * sizeof(*fence_info); fence_info = kzalloc(size, GFP_KERNEL); if (!fence_info) return -ENOMEM; - for (i = 0; i < sync_file->num_fences; ++i) - sync_fill_fence_info(sync_file->cbs[i].fence, &fence_info[i]); + for (i = 0; i < num_fences; i++) + sync_fill_fence_info(fences[i], &fence_info[i]); if (copy_to_user(u64_to_user_ptr(info.sync_fence_info), fence_info, size)) { @@ -352,11 +411,8 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, no_fences: strlcpy(info.name, sync_file->name, sizeof(info.name)); - info.status = atomic_read(&sync_file->status); - if (info.status >= 0) - info.status = !info.status; - - info.num_fences = sync_file->num_fences; + info.status = fence_is_signaled(sync_file->fence); + info.num_fences = num_fences; if (copy_to_user((void __user *)arg, &info, sizeof(info))) ret = -EFAULT; diff --git a/drivers/staging/android/sync_debug.c b/drivers/staging/android/sync_debug.c index 4c5a85595a85..961a94bf156c 100644 --- a/drivers/staging/android/sync_debug.c +++ b/drivers/staging/android/sync_debug.c @@ -135,10 +135,16 @@ static void sync_print_sync_file(struct seq_file *s, int i; seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name, - sync_status_str(atomic_read(&sync_file->status))); + sync_status_str(!fence_is_signaled(sync_file->fence))); - for (i = 0; i < sync_file->num_fences; ++i) - sync_print_fence(s, sync_file->cbs[i].fence, true); + if (fence_is_array(sync_file->fence)) { + struct fence_array *array = to_fence_array(sync_file->fence); + + for (i = 0; i < array->num_fences; ++i) + sync_print_fence(s, array->fences[i], true); + } else { + sync_print_fence(s, sync_file->fence, true); + } } static int sync_debugfs_show(struct seq_file *s, void *unused) diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h index c6ffe8b0725c..2efc5ec60575 100644 --- a/include/linux/sync_file.h +++ b/include/linux/sync_file.h @@ -19,12 +19,7 @@ #include #include #include - -struct sync_file_cb { - struct fence_cb cb; - struct fence *fence; - struct sync_file *sync_file; -}; +#include /** * struct sync_file - sync file to export to the userspace @@ -32,10 +27,9 @@ struct sync_file_cb { * @kref: reference count on fence. * @name: name of sync_file. Useful for debugging * @sync_file_list: membership in global file list - * @num_fences: number of sync_pts in the fence * @wq: wait queue for fence signaling - * @status: 0: signaled, >0:active, <0: error - * @cbs: sync_pts callback information + * @fence: fence with the fences in the sync_file + * @cb: fence callback information */ struct sync_file { struct file *file; @@ -44,12 +38,11 @@ struct sync_file { #ifdef CONFIG_DEBUG_FS struct list_head sync_file_list; #endif - int num_fences; wait_queue_head_t wq; - atomic_t status; - struct sync_file_cb cbs[]; + struct fence *fence; + struct fence_cb cb; }; struct sync_file *sync_file_create(struct fence *fence); -- cgit v1.2.3 From 972526a4093243fdaf77dd7c6f8b11fba5b15864 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Fri, 5 Aug 2016 10:39:36 -0300 Subject: dma-buf/sync_file: add sync_file_get_fence() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creates a function that given an sync file descriptor returns a fence containing all fences in the sync_file. v2: Comments by Daniel Vetter - Adapt to new version of fence_collection_init() - Hold a reference for the fence we return v3: - Adapt to use fput() directly - rename to sync_file_get_fence() as we always return one fence v4: Adapt to use fence_array v5: set fence through fence_get() Signed-off-by: Gustavo Padovan Reviewed-by: Chris Wilson Acked-by: Christian König Signed-off-by: Sumit Semwal --- drivers/dma-buf/sync_file.c | 23 +++++++++++++++++++++++ include/linux/sync_file.h | 1 + 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index ac9c250af302..2873760e02a9 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -116,6 +116,29 @@ err: return NULL; } +/** + * sync_file_get_fence - get the fence related to the sync_file fd + * @fd: sync_file fd to get the fence from + * + * Ensures @fd references a valid sync_file and returns a fence that + * represents all fence in the sync_file. On error NULL is returned. + */ +struct fence *sync_file_get_fence(int fd) +{ + struct sync_file *sync_file; + struct fence *fence; + + sync_file = sync_file_fdget(fd); + if (!sync_file) + return NULL; + + fence = fence_get(sync_file->fence); + fput(sync_file->file); + + return fence; +} +EXPORT_SYMBOL(sync_file_get_fence); + static int sync_file_set_fence(struct sync_file *sync_file, struct fence **fences, int num_fences) { diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h index 2efc5ec60575..f7de5a0b3d12 100644 --- a/include/linux/sync_file.h +++ b/include/linux/sync_file.h @@ -46,5 +46,6 @@ struct sync_file { }; struct sync_file *sync_file_create(struct fence *fence); +struct fence *sync_file_get_fence(int fd); #endif /* _LINUX_SYNC_H */ -- cgit v1.2.3 From e24165537312723e2900831dd6e7415b8d85278c Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Fri, 5 Aug 2016 10:39:38 -0300 Subject: dma-buf/sync_file: only enable fence signalling on poll() Signalling doesn't need to be enabled at sync_file creation, it is only required if userspace waiting the fence to signal through poll(). Thus we delay fence_add_callback() until poll is called. It only adds the callback the first time poll() is called. This avoid re-adding the same callback multiple times. v2: rebase and update to work with new fence support for sync_file v3: use atomic operation to set enabled and protect fence_add_callback() v4: use user bit from fence flags (comment from Chris Wilson) v5: use ternary if on poll return (comment from Chris Wilson) Signed-off-by: Gustavo Padovan Reviewed-by: Chris Wilson Signed-off-by: Sumit Semwal [sumits: remove unused var status] Link: http://patchwork.freedesktop.org/patch/msgid/1470404378-27961-1-git-send-email-gustavo@padovan.org --- drivers/dma-buf/sync_file.c | 21 ++++++++------------- include/linux/sync_file.h | 2 ++ 2 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 2873760e02a9..486d29c1a830 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -86,8 +86,6 @@ struct sync_file *sync_file_create(struct fence *fence) fence->ops->get_timeline_name(fence), fence->context, fence->seqno); - fence_add_callback(fence, &sync_file->cb, fence_check_cb_func); - return sync_file; } EXPORT_SYMBOL(sync_file_create); @@ -274,9 +272,6 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, goto err; } - fence_add_callback(sync_file->fence, &sync_file->cb, - fence_check_cb_func); - strlcpy(sync_file->name, name, sizeof(sync_file->name)); return sync_file; @@ -291,7 +286,8 @@ static void sync_file_free(struct kref *kref) struct sync_file *sync_file = container_of(kref, struct sync_file, kref); - fence_remove_callback(sync_file->fence, &sync_file->cb); + if (test_bit(POLL_ENABLED, &sync_file->fence->flags)) + fence_remove_callback(sync_file->fence, &sync_file->cb); fence_put(sync_file->fence); kfree(sync_file); } @@ -307,17 +303,16 @@ static int sync_file_release(struct inode *inode, struct file *file) static unsigned int sync_file_poll(struct file *file, poll_table *wait) { struct sync_file *sync_file = file->private_data; - int status; poll_wait(file, &sync_file->wq, wait); - status = fence_is_signaled(sync_file->fence); + if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) { + if (fence_add_callback(sync_file->fence, &sync_file->cb, + fence_check_cb_func) < 0) + wake_up_all(&sync_file->wq); + } - if (status) - return POLLIN; - if (status < 0) - return POLLERR; - return 0; + return fence_is_signaled(sync_file->fence) ? POLLIN : 0; } static long sync_file_ioctl_merge(struct sync_file *sync_file, diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h index f7de5a0b3d12..aa17ccfc2f57 100644 --- a/include/linux/sync_file.h +++ b/include/linux/sync_file.h @@ -45,6 +45,8 @@ struct sync_file { struct fence_cb cb; }; +#define POLL_ENABLED FENCE_FLAG_USER_BITS + struct sync_file *sync_file_create(struct fence *fence); struct fence *sync_file_get_fence(int fd); -- cgit v1.2.3 From 0951e458d2ebd06e5e83fbb8a67f52dd3146d594 Mon Sep 17 00:00:00 2001 From: Sumit Semwal Date: Thu, 11 Aug 2016 16:17:57 +0530 Subject: dma-buf/fence: kerneldoc: remove unused struct members Commit 0431b9065f28ecf6c320fefebe0241620049984f ("staging/android: bring struct sync_pt back") removed child_list and active_list from struct fence, but left it in kernel doc. Delete them. Fixes: 0431b9065f28 ("staging/android: bring struct sync_pt back") Signed-off-by: Sumit Semwal Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470912480-32304-2-git-send-email-sumit.semwal@linaro.org --- include/linux/fence.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fence.h b/include/linux/fence.h index 8cc719a63728..2ac6fa5f4712 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -49,8 +49,6 @@ struct fence_cb; * @timestamp: Timestamp when the fence was signaled. * @status: Optional, only valid if < 0, must be set before calling * fence_signal, indicates that the fence has completed with an error. - * @child_list: list of children fences - * @active_list: list of active fences * * the flags member must be manipulated and read using the appropriate * atomic ops (bit_*), so taking the spinlock will not be needed most -- cgit v1.2.3 From 3590d50e2313644cd192ff55e83df76dea232319 Mon Sep 17 00:00:00 2001 From: Sumit Semwal Date: Thu, 11 Aug 2016 16:17:58 +0530 Subject: dma-buf/fence: kerneldoc: remove spurious section header Commit e941759c74a44d6ac2eed21bb0a38b21fe4559e2 ("fence: dma-buf cross-device synchronization (v18)") had a spurious kerneldoc section header that caused Sphinx to complain. Fix it. Fixes: e941759c74a4 ("fence: dma-buf cross-device synchronization (v18)") Signed-off-by: Sumit Semwal Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470912480-32304-3-git-send-email-sumit.semwal@linaro.org --- include/linux/fence.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fence.h b/include/linux/fence.h index 2ac6fa5f4712..0d763053f97a 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -60,7 +60,7 @@ struct fence_cb; * implementer of the fence for its own purposes. Can be used in different * ways by different fence implementers, so do not rely on this. * - * *) Since atomic bitops are used, this is not guaranteed to be the case. + * Since atomic bitops are used, this is not guaranteed to be the case. * Particularly, if the bit was set, but fence_signal was called right * before this bit was set, it would have been able to set the * FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called. -- cgit v1.2.3 From b754b35b089ddfea3ff7b9b1d2e99e61d726d177 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 12 Aug 2016 22:48:56 +0200 Subject: vgaarbiter: rst-ifiy and polish kerneldoc Move the documentation into Documentation/gpu, link it up and pull in the kernel doc. No actual text changes except that I did polish the kerneldoc a bit, especially for vga_client_register(). v2: Remove some rst from vga-switcheroo.rst that I don't understand, but which seems to be the reason why the new vgaarbiter.rst sometimes drops out of the sidebar index. v3: Drop one level of headings and clarify the vgaarb one a bit. v4: Fix some typos (Sean). Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: Sean Paul Reviewed-by: Sean Paul Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1471034937-651-20-git-send-email-daniel.vetter@ffwll.ch --- Documentation/gpu/index.rst | 1 + Documentation/gpu/vga-switcheroo.rst | 2 - Documentation/gpu/vgaarbiter.rst | 191 ++++++++++++++++++++++++++++++++++ Documentation/vgaarbiter.txt | 192 ----------------------------------- drivers/gpu/vga/vgaarb.c | 110 +++++++++++++++++++- include/linux/vgaarb.h | 128 +++-------------------- 6 files changed, 316 insertions(+), 308 deletions(-) create mode 100644 Documentation/gpu/vgaarbiter.rst delete mode 100644 Documentation/vgaarbiter.txt (limited to 'include/linux') diff --git a/Documentation/gpu/index.rst b/Documentation/gpu/index.rst index fcac0fa72056..ba92f45abb76 100644 --- a/Documentation/gpu/index.rst +++ b/Documentation/gpu/index.rst @@ -12,3 +12,4 @@ Linux GPU Driver Developer's Guide drm-uapi i915 vga-switcheroo + vgaarbiter diff --git a/Documentation/gpu/vga-switcheroo.rst b/Documentation/gpu/vga-switcheroo.rst index cbbdb994f1dd..463a74fc40d1 100644 --- a/Documentation/gpu/vga-switcheroo.rst +++ b/Documentation/gpu/vga-switcheroo.rst @@ -1,5 +1,3 @@ -.. _vga_switcheroo: - ============== VGA Switcheroo ============== diff --git a/Documentation/gpu/vgaarbiter.rst b/Documentation/gpu/vgaarbiter.rst new file mode 100644 index 000000000000..0b41b051d021 --- /dev/null +++ b/Documentation/gpu/vgaarbiter.rst @@ -0,0 +1,191 @@ +=========== +VGA Arbiter +=========== + +Graphic devices are accessed through ranges in I/O or memory space. While most +modern devices allow relocation of such ranges, some "Legacy" VGA devices +implemented on PCI will typically have the same "hard-decoded" addresses as +they did on ISA. For more details see "PCI Bus Binding to IEEE Std 1275-1994 +Standard for Boot (Initialization Configuration) Firmware Revision 2.1" +Section 7, Legacy Devices. + +The Resource Access Control (RAC) module inside the X server [0] existed for +the legacy VGA arbitration task (besides other bus management tasks) when more +than one legacy device co-exists on the same machine. But the problem happens +when these devices are trying to be accessed by different userspace clients +(e.g. two server in parallel). Their address assignments conflict. Moreover, +ideally, being a userspace application, it is not the role of the X server to +control bus resources. Therefore an arbitration scheme outside of the X server +is needed to control the sharing of these resources. This document introduces +the operation of the VGA arbiter implemented for the Linux kernel. + +vgaarb kernel/userspace ABI +--------------------------- + +The vgaarb is a module of the Linux Kernel. When it is initially loaded, it +scans all PCI devices and adds the VGA ones inside the arbitration. The +arbiter then enables/disables the decoding on different devices of the VGA +legacy instructions. Devices which do not want/need to use the arbiter may +explicitly tell it by calling vga_set_legacy_decoding(). + +The kernel exports a char device interface (/dev/vga_arbiter) to the clients, +which has the following semantics: + +open + Opens a user instance of the arbiter. By default, it's attached to the + default VGA device of the system. + +close + Close a user instance. Release locks made by the user + +read + Return a string indicating the status of the target like: + + ",decodes=,owns=,locks= (ic,mc)" + + An IO state string is of the form {io,mem,io+mem,none}, mc and + ic are respectively mem and io lock counts (for debugging/ + diagnostic only). "decodes" indicate what the card currently + decodes, "owns" indicates what is currently enabled on it, and + "locks" indicates what is locked by this card. If the card is + unplugged, we get "invalid" then for card_ID and an -ENODEV + error is returned for any command until a new card is targeted. + + +write + Write a command to the arbiter. List of commands: + + target + switch target to card (see below) + lock + acquires locks on target ("none" is an invalid io_state) + trylock + non-blocking acquire locks on target (returns EBUSY if + unsuccessful) + unlock + release locks on target + unlock all + release all locks on target held by this user (not implemented + yet) + decodes + set the legacy decoding attributes for the card + + poll + event if something changes on any card (not just the target) + + card_ID is of the form "PCI:domain:bus:dev.fn". It can be set to "default" + to go back to the system default card (TODO: not implemented yet). Currently, + only PCI is supported as a prefix, but the userland API may support other bus + types in the future, even if the current kernel implementation doesn't. + +Note about locks: + +The driver keeps track of which user has which locks on which card. It +supports stacking, like the kernel one. This complexifies the implementation +a bit, but makes the arbiter more tolerant to user space problems and able +to properly cleanup in all cases when a process dies. +Currently, a max of 16 cards can have locks simultaneously issued from +user space for a given user (file descriptor instance) of the arbiter. + +In the case of devices hot-{un,}plugged, there is a hook - pci_notify() - to +notify them being added/removed in the system and automatically added/removed +in the arbiter. + +There is also an in-kernel API of the arbiter in case DRM, vgacon, or other +drivers want to use it. + +In-kernel interface +------------------- + +.. kernel-doc:: include/linux/vgaarb.h + :internal: + +.. kernel-doc:: drivers/gpu/vga/vgaarb.c + :export: + +libpciaccess +------------ + +To use the vga arbiter char device it was implemented an API inside the +libpciaccess library. One field was added to struct pci_device (each device +on the system):: + + /* the type of resource decoded by the device */ + int vgaarb_rsrc; + +Besides it, in pci_system were added:: + + int vgaarb_fd; + int vga_count; + struct pci_device *vga_target; + struct pci_device *vga_default_dev; + +The vga_count is used to track how many cards are being arbitrated, so for +instance, if there is only one card, then it can completely escape arbitration. + +These functions below acquire VGA resources for the given card and mark those +resources as locked. If the resources requested are "normal" (and not legacy) +resources, the arbiter will first check whether the card is doing legacy +decoding for that type of resource. If yes, the lock is "converted" into a +legacy resource lock. The arbiter will first look for all VGA cards that +might conflict and disable their IOs and/or Memory access, including VGA +forwarding on P2P bridges if necessary, so that the requested resources can +be used. Then, the card is marked as locking these resources and the IO and/or +Memory access is enabled on the card (including VGA forwarding on parent +P2P bridges if any). In the case of vga_arb_lock(), the function will block +if some conflicting card is already locking one of the required resources (or +any resource on a different bus segment, since P2P bridges don't differentiate +VGA memory and IO afaik). If the card already owns the resources, the function +succeeds. vga_arb_trylock() will return (-EBUSY) instead of blocking. Nested +calls are supported (a per-resource counter is maintained). + +Set the target device of this client. :: + + int pci_device_vgaarb_set_target (struct pci_device *dev); + +For instance, in x86 if two devices on the same bus want to lock different +resources, both will succeed (lock). If devices are in different buses and +trying to lock different resources, only the first who tried succeeds. :: + + int pci_device_vgaarb_lock (void); + int pci_device_vgaarb_trylock (void); + +Unlock resources of device. :: + + int pci_device_vgaarb_unlock (void); + +Indicates to the arbiter if the card decodes legacy VGA IOs, legacy VGA +Memory, both, or none. All cards default to both, the card driver (fbdev for +example) should tell the arbiter if it has disabled legacy decoding, so the +card can be left out of the arbitration process (and can be safe to take +interrupts at any time. :: + + int pci_device_vgaarb_decodes (int new_vgaarb_rsrc); + +Connects to the arbiter device, allocates the struct :: + + int pci_device_vgaarb_init (void); + +Close the connection :: + + void pci_device_vgaarb_fini (void); + +xf86VGAArbiter (X server implementation) +---------------------------------------- + +X server basically wraps all the functions that touch VGA registers somehow. + +References +---------- + +Benjamin Herrenschmidt (IBM?) started this work when he discussed such design +with the Xorg community in 2005 [1, 2]. In the end of 2007, Paulo Zanoni and +Tiago Vignatti (both of C3SL/Federal University of Paraná) proceeded his work +enhancing the kernel code to adapt as a kernel module and also did the +implementation of the user space side [3]. Now (2009) Tiago Vignatti and Dave +Airlie finally put this work in shape and queued to Jesse Barnes' PCI tree. + +0) http://cgit.freedesktop.org/xorg/xserver/commit/?id=4b42448a2388d40f257774fbffdccaea87bd0347 +1) http://lists.freedesktop.org/archives/xorg/2005-March/006663.html +2) http://lists.freedesktop.org/archives/xorg/2005-March/006745.html +3) http://lists.freedesktop.org/archives/xorg/2007-October/029507.html diff --git a/Documentation/vgaarbiter.txt b/Documentation/vgaarbiter.txt deleted file mode 100644 index 014423e2824c..000000000000 --- a/Documentation/vgaarbiter.txt +++ /dev/null @@ -1,192 +0,0 @@ - -VGA Arbiter -=========== - -Graphic devices are accessed through ranges in I/O or memory space. While most -modern devices allow relocation of such ranges, some "Legacy" VGA devices -implemented on PCI will typically have the same "hard-decoded" addresses as -they did on ISA. For more details see "PCI Bus Binding to IEEE Std 1275-1994 -Standard for Boot (Initialization Configuration) Firmware Revision 2.1" -Section 7, Legacy Devices. - -The Resource Access Control (RAC) module inside the X server [0] existed for -the legacy VGA arbitration task (besides other bus management tasks) when more -than one legacy device co-exists on the same machine. But the problem happens -when these devices are trying to be accessed by different userspace clients -(e.g. two server in parallel). Their address assignments conflict. Moreover, -ideally, being a userspace application, it is not the role of the X server to -control bus resources. Therefore an arbitration scheme outside of the X server -is needed to control the sharing of these resources. This document introduces -the operation of the VGA arbiter implemented for the Linux kernel. - ----------------------------------------------------------------------------- - -I. Details and Theory of Operation - I.1 vgaarb - I.2 libpciaccess - I.3 xf86VGAArbiter (X server implementation) -II. Credits -III.References - - -I. Details and Theory of Operation -================================== - -I.1 vgaarb ----------- - -The vgaarb is a module of the Linux Kernel. When it is initially loaded, it -scans all PCI devices and adds the VGA ones inside the arbitration. The -arbiter then enables/disables the decoding on different devices of the VGA -legacy instructions. Devices which do not want/need to use the arbiter may -explicitly tell it by calling vga_set_legacy_decoding(). - -The kernel exports a char device interface (/dev/vga_arbiter) to the clients, -which has the following semantics: - - open : open user instance of the arbiter. By default, it's attached to - the default VGA device of the system. - - close : close user instance. Release locks made by the user - - read : return a string indicating the status of the target like: - - ",decodes=,owns=,locks= (ic,mc)" - - An IO state string is of the form {io,mem,io+mem,none}, mc and - ic are respectively mem and io lock counts (for debugging/ - diagnostic only). "decodes" indicate what the card currently - decodes, "owns" indicates what is currently enabled on it, and - "locks" indicates what is locked by this card. If the card is - unplugged, we get "invalid" then for card_ID and an -ENODEV - error is returned for any command until a new card is targeted. - - - write : write a command to the arbiter. List of commands: - - target : switch target to card (see below) - lock : acquires locks on target ("none" is an invalid io_state) - trylock : non-blocking acquire locks on target (returns EBUSY if - unsuccessful) - unlock : release locks on target - unlock all : release all locks on target held by this user (not - implemented yet) - decodes : set the legacy decoding attributes for the card - - poll : event if something changes on any card (not just the - target) - - card_ID is of the form "PCI:domain:bus:dev.fn". It can be set to "default" - to go back to the system default card (TODO: not implemented yet). Currently, - only PCI is supported as a prefix, but the userland API may support other bus - types in the future, even if the current kernel implementation doesn't. - -Note about locks: - -The driver keeps track of which user has which locks on which card. It -supports stacking, like the kernel one. This complexifies the implementation -a bit, but makes the arbiter more tolerant to user space problems and able -to properly cleanup in all cases when a process dies. -Currently, a max of 16 cards can have locks simultaneously issued from -user space for a given user (file descriptor instance) of the arbiter. - -In the case of devices hot-{un,}plugged, there is a hook - pci_notify() - to -notify them being added/removed in the system and automatically added/removed -in the arbiter. - -There is also an in-kernel API of the arbiter in case DRM, vgacon, or other -drivers want to use it. - - -I.2 libpciaccess ----------------- - -To use the vga arbiter char device it was implemented an API inside the -libpciaccess library. One field was added to struct pci_device (each device -on the system): - - /* the type of resource decoded by the device */ - int vgaarb_rsrc; - -Besides it, in pci_system were added: - - int vgaarb_fd; - int vga_count; - struct pci_device *vga_target; - struct pci_device *vga_default_dev; - - -The vga_count is used to track how many cards are being arbitrated, so for -instance, if there is only one card, then it can completely escape arbitration. - - -These functions below acquire VGA resources for the given card and mark those -resources as locked. If the resources requested are "normal" (and not legacy) -resources, the arbiter will first check whether the card is doing legacy -decoding for that type of resource. If yes, the lock is "converted" into a -legacy resource lock. The arbiter will first look for all VGA cards that -might conflict and disable their IOs and/or Memory access, including VGA -forwarding on P2P bridges if necessary, so that the requested resources can -be used. Then, the card is marked as locking these resources and the IO and/or -Memory access is enabled on the card (including VGA forwarding on parent -P2P bridges if any). In the case of vga_arb_lock(), the function will block -if some conflicting card is already locking one of the required resources (or -any resource on a different bus segment, since P2P bridges don't differentiate -VGA memory and IO afaik). If the card already owns the resources, the function -succeeds. vga_arb_trylock() will return (-EBUSY) instead of blocking. Nested -calls are supported (a per-resource counter is maintained). - - -Set the target device of this client. - int pci_device_vgaarb_set_target (struct pci_device *dev); - - -For instance, in x86 if two devices on the same bus want to lock different -resources, both will succeed (lock). If devices are in different buses and -trying to lock different resources, only the first who tried succeeds. - int pci_device_vgaarb_lock (void); - int pci_device_vgaarb_trylock (void); - -Unlock resources of device. - int pci_device_vgaarb_unlock (void); - -Indicates to the arbiter if the card decodes legacy VGA IOs, legacy VGA -Memory, both, or none. All cards default to both, the card driver (fbdev for -example) should tell the arbiter if it has disabled legacy decoding, so the -card can be left out of the arbitration process (and can be safe to take -interrupts at any time. - int pci_device_vgaarb_decodes (int new_vgaarb_rsrc); - -Connects to the arbiter device, allocates the struct - int pci_device_vgaarb_init (void); - -Close the connection - void pci_device_vgaarb_fini (void); - - -I.3 xf86VGAArbiter (X server implementation) --------------------------------------------- - -(TODO) - -X server basically wraps all the functions that touch VGA registers somehow. - - -II. Credits -=========== - -Benjamin Herrenschmidt (IBM?) started this work when he discussed such design -with the Xorg community in 2005 [1, 2]. In the end of 2007, Paulo Zanoni and -Tiago Vignatti (both of C3SL/Federal University of Paraná) proceeded his work -enhancing the kernel code to adapt as a kernel module and also did the -implementation of the user space side [3]. Now (2009) Tiago Vignatti and Dave -Airlie finally put this work in shape and queued to Jesse Barnes' PCI tree. - - -III. References -============== - -[0] http://cgit.freedesktop.org/xorg/xserver/commit/?id=4b42448a2388d40f257774fbffdccaea87bd0347 -[1] http://lists.freedesktop.org/archives/xorg/2005-March/006663.html -[2] http://lists.freedesktop.org/archives/xorg/2005-March/006745.html -[3] http://lists.freedesktop.org/archives/xorg/2007-October/029507.html diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index f17cb0431833..1887f199ccb7 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -131,7 +131,24 @@ static struct vga_device *vgadev_find(struct pci_dev *pdev) return NULL; } -/* Returns the default VGA device (vgacon's babe) */ +/** + * vga_default_device - return the default VGA device, for vgacon + * + * This can be defined by the platform. The default implementation + * is rather dumb and will probably only work properly on single + * vga card setups and/or x86 platforms. + * + * If your VGA default device is not PCI, you'll have to return + * NULL here. In this case, I assume it will not conflict with + * any PCI card. If this is not true, I'll have to define two archs + * hooks for enabling/disabling the VGA default device if that is + * possible. This may be a problem with real _ISA_ VGA cards, in + * addition to a PCI one. I don't know at this point how to deal + * with that card. Can theirs IOs be disabled at all ? If not, then + * I suppose it's a matter of having the proper arch hook telling + * us about it, so we basically never allow anybody to succeed a + * vga_get()... + */ struct pci_dev *vga_default_device(void) { return vga_default; @@ -356,6 +373,40 @@ static void __vga_put(struct vga_device *vgadev, unsigned int rsrc) wake_up_all(&vga_wait_queue); } +/** + * vga_get - acquire & locks VGA resources + * @pdev: pci device of the VGA card or NULL for the system default + * @rsrc: bit mask of resources to acquire and lock + * @interruptible: blocking should be interruptible by signals ? + * + * This function acquires VGA resources for the given card and mark those + * resources locked. If the resource requested are "normal" (and not legacy) + * resources, the arbiter will first check whether the card is doing legacy + * decoding for that type of resource. If yes, the lock is "converted" into a + * legacy resource lock. + * + * The arbiter will first look for all VGA cards that might conflict and disable + * their IOs and/or Memory access, including VGA forwarding on P2P bridges if + * necessary, so that the requested resources can be used. Then, the card is + * marked as locking these resources and the IO and/or Memory accesses are + * enabled on the card (including VGA forwarding on parent P2P bridges if any). + * + * This function will block if some conflicting card is already locking one of + * the required resources (or any resource on a different bus segment, since P2P + * bridges don't differentiate VGA memory and IO afaik). You can indicate + * whether this blocking should be interruptible by a signal (for userland + * interface) or not. + * + * Must not be called at interrupt time or in atomic context. If the card + * already owns the resources, the function succeeds. Nested calls are + * supported (a per-resource counter is maintained) + * + * On success, release the VGA resource again with vga_put(). + * + * Returns: + * + * 0 on success, negative error code on failure. + */ int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible) { struct vga_device *vgadev, *conflict; @@ -408,6 +459,21 @@ int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible) } EXPORT_SYMBOL(vga_get); +/** + * vga_tryget - try to acquire & lock legacy VGA resources + * @pdev: pci devivce of VGA card or NULL for system default + * @rsrc: bit mask of resources to acquire and lock + * + * This function performs the same operation as vga_get(), but will return an + * error (-EBUSY) instead of blocking if the resources are already locked by + * another card. It can be called in any context + * + * On success, release the VGA resource again with vga_put(). + * + * Returns: + * + * 0 on success, negative error code on failure. + */ int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { struct vga_device *vgadev; @@ -435,6 +501,16 @@ bail: } EXPORT_SYMBOL(vga_tryget); +/** + * vga_put - release lock on legacy VGA resources + * @pdev: pci device of VGA card or NULL for system default + * @rsrc: but mask of resource to release + * + * This fuction releases resources previously locked by vga_get() or + * vga_tryget(). The resources aren't disabled right away, so that a subsequence + * vga_get() on the same card will succeed immediately. Resources have a + * counter, so locks are only released if the counter reaches 0. + */ void vga_put(struct pci_dev *pdev, unsigned int rsrc) { struct vga_device *vgadev; @@ -716,7 +792,37 @@ void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes) } EXPORT_SYMBOL(vga_set_legacy_decoding); -/* call with NULL to unregister */ +/** + * vga_client_register - register or unregister a VGA arbitration client + * @pdev: pci device of the VGA client + * @cookie: client cookie to be used in callbacks + * @irq_set_state: irq state change callback + * @set_vga_decode: vga decode change callback + * + * Clients have two callback mechanisms they can use. + * + * @irq_set_state callback: If a client can't disable its GPUs VGA + * resources, then we need to be able to ask it to turn off its irqs when we + * turn off its mem and io decoding. + * + * @set_vga_decode callback: If a client can disable its GPU VGA resource, it + * will get a callback from this to set the encode/decode state. + * + * Rationale: we cannot disable VGA decode resources unconditionally some single + * GPU laptops seem to require ACPI or BIOS access to the VGA registers to + * control things like backlights etc. Hopefully newer multi-GPU laptops do + * something saner, and desktops won't have any special ACPI for this. The + * driver will get a callback when VGA arbitration is first used by userspace + * since some older X servers have issues. + * + * This function does not check whether a client for @pdev has been registered + * already. + * + * To unregister just call this function with @irq_set_state and @set_vga_decode + * both set to NULL for the same @pdev as originally used to register them. + * + * Returns: 0 on success, -1 on failure + */ int vga_client_register(struct pci_dev *pdev, void *cookie, void (*irq_set_state)(void *cookie, bool state), unsigned int (*set_vga_decode)(void *cookie, diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index 8c3b412d84df..ee162e3e879b 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -73,34 +73,6 @@ static inline void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes) { }; #endif -/** - * vga_get - acquire & locks VGA resources - * - * @pdev: pci device of the VGA card or NULL for the system default - * @rsrc: bit mask of resources to acquire and lock - * @interruptible: blocking should be interruptible by signals ? - * - * This function acquires VGA resources for the given - * card and mark those resources locked. If the resource requested - * are "normal" (and not legacy) resources, the arbiter will first check - * whether the card is doing legacy decoding for that type of resource. If - * yes, the lock is "converted" into a legacy resource lock. - * The arbiter will first look for all VGA cards that might conflict - * and disable their IOs and/or Memory access, including VGA forwarding - * on P2P bridges if necessary, so that the requested resources can - * be used. Then, the card is marked as locking these resources and - * the IO and/or Memory accesse are enabled on the card (including - * VGA forwarding on parent P2P bridges if any). - * This function will block if some conflicting card is already locking - * one of the required resources (or any resource on a different bus - * segment, since P2P bridges don't differenciate VGA memory and IO - * afaik). You can indicate whether this blocking should be interruptible - * by a signal (for userland interface) or not. - * Must not be called at interrupt time or in atomic context. - * If the card already owns the resources, the function succeeds. - * Nested calls are supported (a per-resource counter is maintained) - */ - #if defined(CONFIG_VGA_ARB) extern int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible); #else @@ -108,11 +80,14 @@ static inline int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interrupt #endif /** - * vga_get_interruptible + * vga_get_interruptible + * @pdev: pci device of the VGA card or NULL for the system default + * @rsrc: bit mask of resources to acquire and lock * - * Shortcut to vga_get + * Shortcut to vga_get with interruptible set to true. + * + * On success, release the VGA resource again with vga_put(). */ - static inline int vga_get_interruptible(struct pci_dev *pdev, unsigned int rsrc) { @@ -120,47 +95,26 @@ static inline int vga_get_interruptible(struct pci_dev *pdev, } /** - * vga_get_uninterruptible + * vga_get_uninterruptible - shortcut to vga_get() + * @pdev: pci device of the VGA card or NULL for the system default + * @rsrc: bit mask of resources to acquire and lock * - * Shortcut to vga_get + * Shortcut to vga_get with interruptible set to false. + * + * On success, release the VGA resource again with vga_put(). */ - static inline int vga_get_uninterruptible(struct pci_dev *pdev, unsigned int rsrc) { return vga_get(pdev, rsrc, 0); } -/** - * vga_tryget - try to acquire & lock legacy VGA resources - * - * @pdev: pci devivce of VGA card or NULL for system default - * @rsrc: bit mask of resources to acquire and lock - * - * This function performs the same operation as vga_get(), but - * will return an error (-EBUSY) instead of blocking if the resources - * are already locked by another card. It can be called in any context - */ - #if defined(CONFIG_VGA_ARB) extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc); #else static inline int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { return 0; } #endif -/** - * vga_put - release lock on legacy VGA resources - * - * @pdev: pci device of VGA card or NULL for system default - * @rsrc: but mask of resource to release - * - * This function releases resources previously locked by vga_get() - * or vga_tryget(). The resources aren't disabled right away, so - * that a subsequence vga_get() on the same card will succeed - * immediately. Resources have a counter, so locks are only - * released if the counter reaches 0. - */ - #if defined(CONFIG_VGA_ARB) extern void vga_put(struct pci_dev *pdev, unsigned int rsrc); #else @@ -168,25 +122,6 @@ extern void vga_put(struct pci_dev *pdev, unsigned int rsrc); #endif -/** - * vga_default_device - * - * This can be defined by the platform. The default implementation - * is rather dumb and will probably only work properly on single - * vga card setups and/or x86 platforms. - * - * If your VGA default device is not PCI, you'll have to return - * NULL here. In this case, I assume it will not conflict with - * any PCI card. If this is not true, I'll have to define two archs - * hooks for enabling/disabling the VGA default device if that is - * possible. This may be a problem with real _ISA_ VGA cards, in - * addition to a PCI one. I don't know at this point how to deal - * with that card. Can theirs IOs be disabled at all ? If not, then - * I suppose it's a matter of having the proper arch hook telling - * us about it, so we basically never allow anybody to succeed a - * vga_get()... - */ - #ifdef CONFIG_VGA_ARB extern struct pci_dev *vga_default_device(void); extern void vga_set_default_device(struct pci_dev *pdev); @@ -195,14 +130,11 @@ static inline struct pci_dev *vga_default_device(void) { return NULL; }; static inline void vga_set_default_device(struct pci_dev *pdev) { }; #endif -/** - * vga_conflicts - * - * Architectures should define this if they have several - * independent PCI domains that can afford concurrent VGA - * decoding +/* + * Architectures should define this if they have several + * independent PCI domains that can afford concurrent VGA + * decoding */ - #ifndef __ARCH_HAS_VGA_CONFLICT static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2) { @@ -210,34 +142,6 @@ static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2) } #endif -/** - * vga_client_register - * - * @pdev: pci device of the VGA client - * @cookie: client cookie to be used in callbacks - * @irq_set_state: irq state change callback - * @set_vga_decode: vga decode change callback - * - * return value: 0 on success, -1 on failure - * Register a client with the VGA arbitration logic - * - * Clients have two callback mechanisms they can use. - * irq enable/disable callback - - * If a client can't disable its GPUs VGA resources, then we - * need to be able to ask it to turn off its irqs when we - * turn off its mem and io decoding. - * set_vga_decode - * If a client can disable its GPU VGA resource, it will - * get a callback from this to set the encode/decode state - * - * Rationale: we cannot disable VGA decode resources unconditionally - * some single GPU laptops seem to require ACPI or BIOS access to the - * VGA registers to control things like backlights etc. - * Hopefully newer multi-GPU laptops do something saner, and desktops - * won't have any special ACPI for this. - * They driver will get a callback when VGA arbitration is first used - * by userspace since we some older X servers have issues. - */ #if defined(CONFIG_VGA_ARB) int vga_client_register(struct pci_dev *pdev, void *cookie, void (*irq_set_state)(void *cookie, bool state), -- cgit v1.2.3 From cafaf14a5d8f152ed3c984ecd48dee6e824446bc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 19 Aug 2016 16:54:26 +0100 Subject: io-mapping: Always create a struct to hold metadata about the io-mapping Currently, we only allocate a structure to hold metadata if we need to allocate an ioremap for every access, such as on x86-32. However, it would be useful to store basic information about the io-mapping, such as its page protection, on all platforms. Signed-off-by: Chris Wilson Cc: linux-mm@kvack.org Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20160819155428.1670-4-chris@chris-wilson.co.uk --- include/linux/io-mapping.h | 92 ++++++++++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 645ad06b5d52..b4c4b5c4216d 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -31,16 +31,16 @@ * See Documentation/io-mapping.txt */ -#ifdef CONFIG_HAVE_ATOMIC_IOMAP - -#include - struct io_mapping { resource_size_t base; unsigned long size; pgprot_t prot; + void __iomem *iomem; }; +#ifdef CONFIG_HAVE_ATOMIC_IOMAP + +#include /* * For small address space machines, mapping large objects * into the kernel virtual space isn't practical. Where @@ -49,34 +49,25 @@ struct io_mapping { */ static inline struct io_mapping * -io_mapping_create_wc(resource_size_t base, unsigned long size) +io_mapping_init_wc(struct io_mapping *iomap, + resource_size_t base, + unsigned long size) { - struct io_mapping *iomap; pgprot_t prot; - iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); - if (!iomap) - goto out_err; - if (iomap_create_wc(base, size, &prot)) - goto out_free; + return NULL; iomap->base = base; iomap->size = size; iomap->prot = prot; return iomap; - -out_free: - kfree(iomap); -out_err: - return NULL; } static inline void -io_mapping_free(struct io_mapping *mapping) +io_mapping_fini(struct io_mapping *mapping) { iomap_free(mapping->base, mapping->size); - kfree(mapping); } /* Atomic map/unmap */ @@ -121,21 +112,40 @@ io_mapping_unmap(void __iomem *vaddr) #else #include - -/* this struct isn't actually defined anywhere */ -struct io_mapping; +#include /* Create the io_mapping object*/ static inline struct io_mapping * -io_mapping_create_wc(resource_size_t base, unsigned long size) +io_mapping_init_wc(struct io_mapping *iomap, + resource_size_t base, + unsigned long size) +{ + iomap->base = base; + iomap->size = size; + iomap->iomem = ioremap_wc(base, size); + iomap->prot = pgprot_writecombine(PAGE_KERNEL_IO); + + return iomap; +} + +static inline void +io_mapping_fini(struct io_mapping *mapping) +{ + iounmap(mapping->iomem); +} + +/* Non-atomic map/unmap */ +static inline void __iomem * +io_mapping_map_wc(struct io_mapping *mapping, + unsigned long offset, + unsigned long size) { - return (struct io_mapping __force *) ioremap_wc(base, size); + return mapping->iomem + offset; } static inline void -io_mapping_free(struct io_mapping *mapping) +io_mapping_unmap(void __iomem *vaddr) { - iounmap((void __force __iomem *) mapping); } /* Atomic map/unmap */ @@ -145,30 +155,42 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, { preempt_disable(); pagefault_disable(); - return ((char __force __iomem *) mapping) + offset; + return io_mapping_map_wc(mapping, offset, PAGE_SIZE); } static inline void io_mapping_unmap_atomic(void __iomem *vaddr) { + io_mapping_unmap(vaddr); pagefault_enable(); preempt_enable(); } -/* Non-atomic map/unmap */ -static inline void __iomem * -io_mapping_map_wc(struct io_mapping *mapping, - unsigned long offset, - unsigned long size) +#endif /* HAVE_ATOMIC_IOMAP */ + +static inline struct io_mapping * +io_mapping_create_wc(resource_size_t base, + unsigned long size) { - return ((char __force __iomem *) mapping) + offset; + struct io_mapping *iomap; + + iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); + if (!iomap) + return NULL; + + if (!io_mapping_init_wc(iomap, base, size)) { + kfree(iomap); + return NULL; + } + + return iomap; } static inline void -io_mapping_unmap(void __iomem *vaddr) +io_mapping_free(struct io_mapping *iomap) { + io_mapping_fini(iomap); + kfree(iomap); } -#endif /* HAVE_ATOMIC_IOMAP */ - #endif /* _LINUX_IO_MAPPING_H */ -- cgit v1.2.3 From bcaaa0c4310ebe1ab04274594916bbb2d82a49c8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 23 Aug 2016 16:50:24 +0100 Subject: io-mapping.h: s/PAGE_KERNEL_IO/PAGE_KERNEL/ PAGE_KERNEL_IO is an x86-ism. Though it is used to define the pgprot_t used for the iomapped region, it itself is just PAGE_KERNEL. On all other arches, PAGE_KERNEL_IO is undefined so in a general header we must refrain from using it. v2: include pgtable for pgprot_combine() Reported-by: Stephen Rothwell Fixes: cafaf14a5d8f ("io-mapping: Always create a struct to hold metadata about the io-mapping") Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Joonas Lahtinen Cc: linux-mm@kvack.org Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20160823155024.22379-1-chris@chris-wilson.co.uk (cherry picked from commit ac96b5566926af83463ddcf4655856033c092f26) --- include/linux/io-mapping.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index b4c4b5c4216d..a87dd7fffc0a 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -112,7 +112,7 @@ io_mapping_unmap(void __iomem *vaddr) #else #include -#include +#include /* Create the io_mapping object*/ static inline struct io_mapping * @@ -123,7 +123,7 @@ io_mapping_init_wc(struct io_mapping *iomap, iomap->base = base; iomap->size = size; iomap->iomem = ioremap_wc(base, size); - iomap->prot = pgprot_writecombine(PAGE_KERNEL_IO); + iomap->prot = pgprot_writecombine(PAGE_KERNEL); return iomap; } -- cgit v1.2.3 From 351243897b15aba02ad15317724d616aeaf00c7d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 23 Aug 2016 22:15:02 +0200 Subject: io-mapping: Fixup for different names of writecombine Somehow architectures can't agree on this. And for good measure make sure we have a fallback which should work everywhere (fingers crossed). v2: Make it compile properly, needs a defined() for the #elif. Fixes: ac96b5566926 ("io-mapping.h: s/PAGE_KERNEL_IO/PAGE_KERNEL/") Cc: Chris Wilson Cc: Daniel Vetter Cc: Joonas Lahtinen Cc: linux-mm@kvack.org Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20160823202233.4681-1-daniel.vetter@ffwll.ch (cherry picked from commit 80c33624e4723c4e22d9917cd676067ebf652dc2) --- include/linux/io-mapping.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index a87dd7fffc0a..58df02bd93c9 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -123,7 +123,13 @@ io_mapping_init_wc(struct io_mapping *iomap, iomap->base = base; iomap->size = size; iomap->iomem = ioremap_wc(base, size); +#if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */ + iomap->prot = pgprot_noncached_wc(PAGE_KERNEL); +#elif defined(pgprot_writecombine) iomap->prot = pgprot_writecombine(PAGE_KERNEL); +#else + iomap->prot = pgprot_noncached(PAGE_KERNEL); +#endif return iomap; } -- cgit v1.2.3