From 122020af856181c24fe45903e43e3cc987c175f7 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 15 Nov 2016 15:46:42 +0000
Subject: dma-buf: Provide wrappers for reservation's lock

Joonas complained that writing ww_mutex_lock(&resv->lock, ctx) was too
intrusive compared to reservation_object_lock(resv, ctx);

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161115154642.31850-1-chris@chris-wilson.co.uk
---
 include/linux/reservation.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include/linux')
diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index d9706a6f5ae2..2b5a4679daea 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -144,6 +144,40 @@ reservation_object_get_list(struct reservation_object *obj)
 					 reservation_object_held(obj));
 }
 
+/**
+ * reservation_object_lock - lock the reservation object
+ * @obj: the reservation object
+ * @ctx: the locking context
+ *
+ * Locks the reservation object for exclusive access and modification. Note,
+ * that the lock is only against other writers, readers will run concurrently
+ * with a writer under RCU. The seqlock is used to notify readers if they
+ * overlap with a writer.
+ *
+ * As the reservation object may be locked by multiple parties in an
+ * undefined order, a #ww_acquire_ctx is passed to unwind if a cycle
+ * is detected. See ww_mutex_lock() and ww_acquire_init(). A reservation
+ * object may be locked by itself by passing NULL as @ctx.
+ */
+static inline int
+reservation_object_lock(struct reservation_object *obj,
+			struct ww_acquire_ctx *ctx)
+{
+	return ww_mutex_lock(&obj->lock, ctx);
+}
+
+/**
+ * reservation_object_unlock - unlock the reservation object
+ * @obj: the reservation object
+ *
+ * Unlocks the reservation object following exclusive access.
+ */
+static inline void
+reservation_object_unlock(struct reservation_object *obj)
+{
+	ww_mutex_unlock(&obj->lock);
+}
+
 /**
  * reservation_object_get_excl - get the reservation object's
  * exclusive fence, with update-side lock held
-- 
cgit v1.2.3


From 2904a8c1311f02896635fd35744262413a0b2726 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 9 Dec 2016 19:53:07 +0100
Subject: dma-buf: Reorganize device dma access docs

- Put the initial overview for dma-buf into dma-buf.rst.
- Put all the comments about detailed semantics into the right
  kernel-doc comment for functions or ops structure member.
- To allow that detail, switch the reworked kerneldoc to inline style
  for dma_buf_ops.
- Tie everything together into a much more streamlined overview
  comment, relying on the hyperlinks for all the details.
- Also sprinkle some links into the kerneldoc for dma_buf and
  dma_buf_attachment to tie it all together.

Cc: linux-doc@vger.kernel.org
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20161209185309.1682-4-daniel.vetter@ffwll.ch
---
 Documentation/dma-buf-sharing.txt    | 222 -----------------------------------
 Documentation/driver-api/dma-buf.rst |  38 ++++++
 drivers/dma-buf/dma-buf.c            |  64 +++++++++-
 drivers/dma-buf/sync_file.c          |   1 -
 include/linux/dma-buf.h              | 133 +++++++++++++++++----
 5 files changed, 207 insertions(+), 251 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index ca44c5820585..dca2fb7ac3b4 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt
@@ -5,228 +5,6 @@
                 <sumit dot semwal at linaro dot org>
                  <sumit dot semwal at ti dot com>
 
-This document serves as a guide to device-driver writers on what is the dma-buf
-buffer sharing API, how to use it for exporting and using shared buffers.
-
-Any device driver which wishes to be a part of DMA buffer sharing, can do so as
-either the 'exporter' of buffers, or the 'user' of buffers.
-
-Say a driver A wants to use buffers created by driver B, then we call B as the
-exporter, and A as buffer-user.
-
-The exporter
-- implements and manages operations[1] for the buffer
-- allows other users to share the buffer by using dma_buf sharing APIs,
-- manages the details of buffer allocation,
-- decides about the actual backing storage where this allocation happens,
-- takes care of any migration of scatterlist - for all (shared) users of this
-   buffer,
-
-The buffer-user
-- is one of (many) sharing users of the buffer.
-- doesn't need to worry about how the buffer is allocated, or where.
-- needs a mechanism to get access to the scatterlist that makes up this buffer
-   in memory, mapped into its own address space, so it can access the same area
-   of memory.
-
-dma-buf operations for device dma only
---------------------------------------
-
-The dma_buf buffer sharing API usage contains the following steps:
-
-1. Exporter announces that it wishes to export a buffer
-2. Userspace gets the file descriptor associated with the exported buffer, and
-   passes it around to potential buffer-users based on use case
-3. Each buffer-user 'connects' itself to the buffer
-4. When needed, buffer-user requests access to the buffer from exporter
-5. When finished with its use, the buffer-user notifies end-of-DMA to exporter
-6. when buffer-user is done using this buffer completely, it 'disconnects'
-   itself from the buffer.
-
-
-1. Exporter's announcement of buffer export
-
-   The buffer exporter announces its wish to export a buffer. In this, it
-   connects its own private buffer data, provides implementation for operations
-   that can be performed on the exported dma_buf, and flags for the file
-   associated with this buffer. All these fields are filled in struct
-   dma_buf_export_info, defined via the DEFINE_DMA_BUF_EXPORT_INFO macro.
-
-   Interface:
-      DEFINE_DMA_BUF_EXPORT_INFO(exp_info)
-      struct dma_buf *dma_buf_export(struct dma_buf_export_info *exp_info)
-
-   If this succeeds, dma_buf_export allocates a dma_buf structure, and
-   returns a pointer to the same. It also associates an anonymous file with this
-   buffer, so it can be exported. On failure to allocate the dma_buf object,
-   it returns NULL.
-
-   'exp_name' in struct dma_buf_export_info is the name of exporter - to
-   facilitate information while debugging. It is set to KBUILD_MODNAME by
-   default, so exporters don't have to provide a specific name, if they don't
-   wish to.
-
-   DEFINE_DMA_BUF_EXPORT_INFO macro defines the struct dma_buf_export_info,
-   zeroes it out and pre-populates exp_name in it.
-
-
-2. Userspace gets a handle to pass around to potential buffer-users
-
-   Userspace entity requests for a file-descriptor (fd) which is a handle to the
-   anonymous file associated with the buffer. It can then share the fd with other
-   drivers and/or processes.
-
-   Interface:
-      int dma_buf_fd(struct dma_buf *dmabuf, int flags)
-
-   This API installs an fd for the anonymous file associated with this buffer;
-   returns either 'fd', or error.
-
-3. Each buffer-user 'connects' itself to the buffer
-
-   Each buffer-user now gets a reference to the buffer, using the fd passed to
-   it.
-
-   Interface:
-      struct dma_buf *dma_buf_get(int fd)
-
-   This API will return a reference to the dma_buf, and increment refcount for
-   it.
-
-   After this, the buffer-user needs to attach its device with the buffer, which
-   helps the exporter to know of device buffer constraints.
-
-   Interface:
-      struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-                                                struct device *dev)
-
-   This API returns reference to an attachment structure, which is then used
-   for scatterlist operations. It will optionally call the 'attach' dma_buf
-   operation, if provided by the exporter.
-
-   The dma-buf sharing framework does the bookkeeping bits related to managing
-   the list of all attachments to a buffer.
-
-Until this stage, the buffer-exporter has the option to choose not to actually
-allocate the backing storage for this buffer, but wait for the first buffer-user
-to request use of buffer for allocation.
-
-
-4. When needed, buffer-user requests access to the buffer
-
-   Whenever a buffer-user wants to use the buffer for any DMA, it asks for
-   access to the buffer using dma_buf_map_attachment API. At least one attach to
-   the buffer must have happened before map_dma_buf can be called.
-
-   Interface:
-      struct sg_table * dma_buf_map_attachment(struct dma_buf_attachment *,
-                                         enum dma_data_direction);
-
-   This is a wrapper to dma_buf->ops->map_dma_buf operation, which hides the
-   "dma_buf->ops->" indirection from the users of this interface.
-
-   In struct dma_buf_ops, map_dma_buf is defined as
-      struct sg_table * (*map_dma_buf)(struct dma_buf_attachment *,
-                                                enum dma_data_direction);
-
-   It is one of the buffer operations that must be implemented by the exporter.
-   It should return the sg_table containing scatterlist for this buffer, mapped
-   into caller's address space.
-
-   If this is being called for the first time, the exporter can now choose to
-   scan through the list of attachments for this buffer, collate the requirements
-   of the attached devices, and choose an appropriate backing storage for the
-   buffer.
-
-   Based on enum dma_data_direction, it might be possible to have multiple users
-   accessing at the same time (for reading, maybe), or any other kind of sharing
-   that the exporter might wish to make available to buffer-users.
-
-   map_dma_buf() operation can return -EINTR if it is interrupted by a signal.
-
-
-5. When finished, the buffer-user notifies end-of-DMA to exporter
-
-   Once the DMA for the current buffer-user is over, it signals 'end-of-DMA' to
-   the exporter using the dma_buf_unmap_attachment API.
-
-   Interface:
-      void dma_buf_unmap_attachment(struct dma_buf_attachment *,
-                                    struct sg_table *);
-
-   This is a wrapper to dma_buf->ops->unmap_dma_buf() operation, which hides the
-   "dma_buf->ops->" indirection from the users of this interface.
-
-   In struct dma_buf_ops, unmap_dma_buf is defined as
-      void (*unmap_dma_buf)(struct dma_buf_attachment *,
-                            struct sg_table *,
-                            enum dma_data_direction);
-
-   unmap_dma_buf signifies the end-of-DMA for the attachment provided. Like
-   map_dma_buf, this API also must be implemented by the exporter.
-
-
-6. when buffer-user is done using this buffer, it 'disconnects' itself from the
-   buffer.
-
-   After the buffer-user has no more interest in using this buffer, it should
-   disconnect itself from the buffer:
-
-   - it first detaches itself from the buffer.
-
-   Interface:
-      void dma_buf_detach(struct dma_buf *dmabuf,
-                          struct dma_buf_attachment *dmabuf_attach);
-
-   This API removes the attachment from the list in dmabuf, and optionally calls
-   dma_buf->ops->detach(), if provided by exporter, for any housekeeping bits.
-
-   - Then, the buffer-user returns the buffer reference to exporter.
-
-   Interface:
-     void dma_buf_put(struct dma_buf *dmabuf);
-
-   This API then reduces the refcount for this buffer.
-
-   If, as a result of this call, the refcount becomes 0, the 'release' file
-   operation related to this fd is called. It calls the dmabuf->ops->release()
-   operation in turn, and frees the memory allocated for dmabuf when exported.
-
-NOTES:
-- Importance of attach-detach and {map,unmap}_dma_buf operation pairs
-   The attach-detach calls allow the exporter to figure out backing-storage
-   constraints for the currently-interested devices. This allows preferential
-   allocation, and/or migration of pages across different types of storage
-   available, if possible.
-
-   Bracketing of DMA access with {map,unmap}_dma_buf operations is essential
-   to allow just-in-time backing of storage, and migration mid-way through a
-   use-case.
-
-- Migration of backing storage if needed
-   If after
-   - at least one map_dma_buf has happened,
-   - and the backing storage has been allocated for this buffer,
-   another new buffer-user intends to attach itself to this buffer, it might
-   be allowed, if possible for the exporter.
-
-   In case it is allowed by the exporter:
-    if the new buffer-user has stricter 'backing-storage constraints', and the
-    exporter can handle these constraints, the exporter can just stall on the
-    map_dma_buf until all outstanding access is completed (as signalled by
-    unmap_dma_buf).
-    Once all users have finished accessing and have unmapped this buffer, the
-    exporter could potentially move the buffer to the stricter backing-storage,
-    and then allow further {map,unmap}_dma_buf operations from any buffer-user
-    from the migrated backing-storage.
-
-   If the exporter cannot fulfill the backing-storage constraints of the new
-   buffer-user device as requested, dma_buf_attach() would return an error to
-   denote non-compatibility of the new buffer-sharing request with the current
-   buffer.
-
-   If the exporter chooses not to allow an attach() operation once a
-   map_dma_buf() API has been called, it simply returns an error.
 
 Kernel cpu access to a dma-buf buffer object
 --------------------------------------------
diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst
index a9b457a4b949..906d1532efad 100644
--- a/Documentation/driver-api/dma-buf.rst
+++ b/Documentation/driver-api/dma-buf.rst
@@ -17,6 +17,44 @@ shared or exclusive fence(s) associated with the buffer.
 Shared DMA Buffers
 ------------------
 
+This document serves as a guide to device-driver writers on what is the dma-buf
+buffer sharing API, how to use it for exporting and using shared buffers.
+
+Any device driver which wishes to be a part of DMA buffer sharing, can do so as
+either the 'exporter' of buffers, or the 'user' or 'importer' of buffers.
+
+Say a driver A wants to use buffers created by driver B, then we call B as the
+exporter, and A as buffer-user/importer.
+
+The exporter
+
+ - implements and manages operations in :c:type:`struct dma_buf_ops
+   <dma_buf_ops>` for the buffer,
+ - allows other users to share the buffer by using dma_buf sharing APIs,
+ - manages the details of buffer allocation, wrapped int a :c:type:`struct
+   dma_buf <dma_buf>`,
+ - decides about the actual backing storage where this allocation happens,
+ - and takes care of any migration of scatterlist - for all (shared) users of
+   this buffer.
+
+The buffer-user
+
+ - is one of (many) sharing users of the buffer.
+ - doesn't need to worry about how the buffer is allocated, or where.
+ - and needs a mechanism to get access to the scatterlist that makes up this
+   buffer in memory, mapped into its own address space, so it can access the
+   same area of memory. This interface is provided by :c:type:`struct
+   dma_buf_attachment <dma_buf_attachment>`.
+
+Basic Operation and Device DMA Access
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/dma-buf/dma-buf.c
+   :doc: dma buf device access
+
+Kernel Functions and Structures Reference
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 .. kernel-doc:: drivers/dma-buf/dma-buf.c
    :export:
 
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index e72e64484131..09f948fd62ad 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -313,6 +313,37 @@ static inline int is_dma_buf_file(struct file *file)
 	return file->f_op == &dma_buf_fops;
 }
 
+/**
+ * DOC: dma buf device access
+ *
+ * For device DMA access to a shared DMA buffer the usual sequence of operations
+ * is fairly simple:
+ *
+ * 1. The exporter defines his exporter instance using
+ *    DEFINE_DMA_BUF_EXPORT_INFO() and calls dma_buf_export() to wrap a private
+ *    buffer object into a &dma_buf. It then exports that &dma_buf to userspace
+ *    as a file descriptor by calling dma_buf_fd().
+ *
+ * 2. Userspace passes this file-descriptors to all drivers it wants this buffer
+ *    to share with: First the filedescriptor is converted to a &dma_buf using
+ *    dma_buf_get(). The the buffer is attached to the device using
+ *    dma_buf_attach().
+ *
+ *    Up to this stage the exporter is still free to migrate or reallocate the
+ *    backing storage.
+ *
+ * 3. Once the buffer is attached to all devices userspace can inniate DMA
+ *    access to the shared buffer. In the kernel this is done by calling
+ *    dma_buf_map_attachment() and dma_buf_unmap_attachment().
+ *
+ * 4. Once a driver is done with a shared buffer it needs to call
+ *    dma_buf_detach() (after cleaning up any mappings) and then release the
+ *    reference acquired with dma_buf_get by calling dma_buf_put().
+ *
+ * For the detailed semantics exporters are expected to implement see
+ * &dma_buf_ops.
+ */
+
 /**
  * dma_buf_export - Creates a new dma_buf, and associates an anon file
  * with this buffer, so it can be exported.
@@ -320,13 +351,15 @@ static inline int is_dma_buf_file(struct file *file)
  * Additionally, provide a name string for exporter; useful in debugging.
  *
  * @exp_info:	[in]	holds all the export related information provided
- *			by the exporter. see struct dma_buf_export_info
+ *			by the exporter. see struct &dma_buf_export_info
  *			for further details.
  *
  * Returns, on success, a newly created dma_buf object, which wraps the
  * supplied private data and operations for dma_buf_ops. On either missing
  * ops, or error in allocating struct dma_buf, will return negative error.
  *
+ * For most cases the easiest way to create @exp_info is through the
+ * %DEFINE_DMA_BUF_EXPORT_INFO macro.
  */
 struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 {
@@ -458,7 +491,12 @@ EXPORT_SYMBOL_GPL(dma_buf_get);
  * dma_buf_put - decreases refcount of the buffer
  * @dmabuf:	[in]	buffer to reduce refcount of
  *
- * Uses file's refcounting done implicitly by fput()
+ * Uses file's refcounting done implicitly by fput().
+ *
+ * If, as a result of this call, the refcount becomes 0, the 'release' file
+ * operation related to this fd is called. It calls the release operation of
+ * struct &dma_buf_ops in turn, and frees the memory allocated for dmabuf when
+ * exported.
  */
 void dma_buf_put(struct dma_buf *dmabuf)
 {
@@ -475,8 +513,17 @@ EXPORT_SYMBOL_GPL(dma_buf_put);
  * @dmabuf:	[in]	buffer to attach device to.
  * @dev:	[in]	device to be attached.
  *
- * Returns struct dma_buf_attachment * for this attachment; returns ERR_PTR on
- * error.
+ * Returns struct dma_buf_attachment pointer for this attachment. Attachments
+ * must be cleaned up by calling dma_buf_detach().
+ *
+ * Returns:
+ *
+ * A pointer to newly created &dma_buf_attachment on success, or a negative
+ * error code wrapped into a pointer on failure.
+ *
+ * Note that this can fail if the backing storage of @dmabuf is in a place not
+ * accessible to @dev, and cannot be moved to a more suitable place. This is
+ * indicated with the error code -EBUSY.
  */
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 					  struct device *dev)
@@ -519,6 +566,7 @@ EXPORT_SYMBOL_GPL(dma_buf_attach);
  * @dmabuf:	[in]	buffer to detach from.
  * @attach:	[in]	attachment to be detached; is free'd after this call.
  *
+ * Clean up a device attachment obtained by calling dma_buf_attach().
  */
 void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 {
@@ -543,7 +591,12 @@ EXPORT_SYMBOL_GPL(dma_buf_detach);
  * @direction:	[in]	direction of DMA transfer
  *
  * Returns sg_table containing the scatterlist to be returned; returns ERR_PTR
- * on error.
+ * on error. May return -EINTR if it is interrupted by a signal.
+ *
+ * A mapping must be unmapped again using dma_buf_map_attachment(). Note that
+ * the underlying backing storage is pinned for as long as a mapping exists,
+ * therefore users/importers should not hold onto a mapping for undue amounts of
+ * time.
  */
 struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 					enum dma_data_direction direction)
@@ -571,6 +624,7 @@ EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
  * @sg_table:	[in]	scatterlist info of the buffer to unmap
  * @direction:  [in]    direction of DMA transfer
  *
+ * This unmaps a DMA mapping for @attached obtained by dma_buf_map_attachment().
  */
 void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
 				struct sg_table *sg_table,
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index d5179d7e8575..07cb9b908f30 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -462,4 +462,3 @@ static const struct file_operations sync_file_fops = {
 	.unlocked_ioctl = sync_file_ioctl,
 	.compat_ioctl = sync_file_ioctl,
 };
-
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 8daeb3ce0016..6df170fb243f 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -39,19 +39,6 @@ struct dma_buf_attachment;
 
 /**
  * struct dma_buf_ops - operations possible on struct dma_buf
- * @attach: [optional] allows different devices to 'attach' themselves to the
- *	    given buffer. It might return -EBUSY to signal that backing storage
- *	    is already allocated and incompatible with the requirements
- *	    of requesting device.
- * @detach: [optional] detach a given device from this buffer.
- * @map_dma_buf: returns list of scatter pages allocated, increases usecount
- *		 of the buffer. Requires atleast one attach to be called
- *		 before. Returned sg list should already be mapped into
- *		 _device_ address space. This call may sleep. May also return
- *		 -EINTR. Should return -EINVAL if attach hasn't been called yet.
- * @unmap_dma_buf: decreases usecount of buffer, might deallocate scatter
- *		   pages.
- * @release: release this buffer; to be called after the last dma_buf_put.
  * @begin_cpu_access: [optional] called before cpu access to invalidate cpu
  * 		      caches and allocate backing storage (if not yet done)
  * 		      respectively pin the object into memory.
@@ -72,25 +59,109 @@ struct dma_buf_attachment;
  * @vunmap: [optional] unmaps a vmap from the buffer
  */
 struct dma_buf_ops {
+	/**
+	 * @attach:
+	 *
+	 * This is called from dma_buf_attach() to make sure that a given
+	 * &device can access the provided &dma_buf. Exporters which support
+	 * buffer objects in special locations like VRAM or device-specific
+	 * carveout areas should check whether the buffer could be move to
+	 * system memory (or directly accessed by the provided device), and
+	 * otherwise need to fail the attach operation.
+	 *
+	 * The exporter should also in general check whether the current
+	 * allocation fullfills the DMA constraints of the new device. If this
+	 * is not the case, and the allocation cannot be moved, it should also
+	 * fail the attach operation.
+	 *
+	 * Any exporter-private housekeeping data can be stored in the priv
+	 * pointer of &dma_buf_attachment structure.
+	 *
+	 * This callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success, negative error code on failure. It might return -EBUSY
+	 * to signal that backing storage is already allocated and incompatible
+	 * with the requirements of requesting device.
+	 */
 	int (*attach)(struct dma_buf *, struct device *,
-			struct dma_buf_attachment *);
+		      struct dma_buf_attachment *);
 
+	/**
+	 * @detach:
+	 *
+	 * This is called by dma_buf_detach() to release a &dma_buf_attachment.
+	 * Provided so that exporters can clean up any housekeeping for an
+	 * &dma_buf_attachment.
+	 *
+	 * This callback is optional.
+	 */
 	void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
 
-	/* For {map,unmap}_dma_buf below, any specific buffer attributes
-	 * required should get added to device_dma_parameters accessible
-	 * via dev->dma_params.
+	/**
+	 * @map_dma_buf:
+	 *
+	 * This is called by dma_buf_map_attachment() and is used to map a
+	 * shared &dma_buf into device address space, and it is mandatory. It
+	 * can only be called if @attach has been called successfully. This
+	 * essentially pins the DMA buffer into place, and it cannot be moved
+	 * any more
+	 *
+	 * This call may sleep, e.g. when the backing storage first needs to be
+	 * allocated, or moved to a location suitable for all currently attached
+	 * devices.
+	 *
+	 * Note that any specific buffer attributes required for this function
+	 * should get added to device_dma_parameters accessible via
+	 * device->dma_params from the &dma_buf_attachment. The @attach callback
+	 * should also check these constraints.
+	 *
+	 * If this is being called for the first time, the exporter can now
+	 * choose to scan through the list of attachments for this buffer,
+	 * collate the requirements of the attached devices, and choose an
+	 * appropriate backing storage for the buffer.
+	 *
+	 * Based on enum dma_data_direction, it might be possible to have
+	 * multiple users accessing at the same time (for reading, maybe), or
+	 * any other kind of sharing that the exporter might wish to make
+	 * available to buffer-users.
+	 *
+	 * Returns:
+	 *
+	 * A &sg_table scatter list of or the backing storage of the DMA buffer,
+	 * already mapped into the device address space of the &device attached
+	 * with the provided &dma_buf_attachment.
+	 *
+	 * On failure, returns a negative error value wrapped into a pointer.
+	 * May also return -EINTR when a signal was received while being
+	 * blocked.
 	 */
 	struct sg_table * (*map_dma_buf)(struct dma_buf_attachment *,
-						enum dma_data_direction);
+					 enum dma_data_direction);
+	/**
+	 * @unmap_dma_buf:
+	 *
+	 * This is called by dma_buf_unmap_attachment() and should unmap and
+	 * release the &sg_table allocated in @map_dma_buf, and it is mandatory.
+	 * It should also unpin the backing storage if this is the last mapping
+	 * of the DMA buffer, it the exporter supports backing storage
+	 * migration.
+	 */
 	void (*unmap_dma_buf)(struct dma_buf_attachment *,
-						struct sg_table *,
-						enum dma_data_direction);
+			      struct sg_table *,
+			      enum dma_data_direction);
+
 	/* TODO: Add try_map_dma_buf version, to return immed with -EBUSY
 	 * if the call would block.
 	 */
 
-	/* after final dma_buf_put() */
+	/**
+	 * @release:
+	 *
+	 * Called after the last dma_buf_put to release the &dma_buf, and
+	 * mandatory.
+	 */
 	void (*release)(struct dma_buf *);
 
 	int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction);
@@ -124,6 +195,15 @@ struct dma_buf_ops {
  * @poll: for userspace poll support
  * @cb_excl: for userspace poll support
  * @cb_shared: for userspace poll support
+ *
+ * This represents a shared buffer, created by calling dma_buf_export(). The
+ * userspace representation is a normal file descriptor, which can be created by
+ * calling dma_buf_fd().
+ *
+ * Shared dma buffers are reference counted using dma_buf_put() and
+ * get_dma_buf().
+ *
+ * Device DMA access is handled by the separate struct &dma_buf_attachment.
  */
 struct dma_buf {
 	size_t size;
@@ -160,6 +240,11 @@ struct dma_buf {
  * This structure holds the attachment information between the dma_buf buffer
  * and its user device(s). The list contains one attachment struct per device
  * attached to the buffer.
+ *
+ * An attachment is created by calling dma_buf_attach(), and released again by
+ * calling dma_buf_detach(). The DMA mapping itself needed to initiate a
+ * transfer is created by dma_buf_map_attachment() and freed again by calling
+ * dma_buf_unmap_attachment().
  */
 struct dma_buf_attachment {
 	struct dma_buf *dmabuf;
@@ -192,9 +277,11 @@ struct dma_buf_export_info {
 };
 
 /**
- * helper macro for exporters; zeros and fills in most common values
- *
+ * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters
  * @name: export-info name
+ *
+ * DEFINE_DMA_BUF_EXPORT_INFO macro defines the struct &dma_buf_export_info,
+ * zeroes it out and pre-populates exp_name in it.
  */
 #define DEFINE_DMA_BUF_EXPORT_INFO(name)	\
 	struct dma_buf_export_info name = { .exp_name = KBUILD_MODNAME, \
-- 
cgit v1.2.3


From 0959a1683d78270bab6381d498707fb8655ae11c Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 9 Dec 2016 19:53:08 +0100
Subject: dma-buf: Update cpu access documentation

- Again move the information relevant for driver writers next to the
  callbacks.
- Put the overview and userspace interface documentation into a DOC:
  section within the code.
- Remove the text that mmap needs to be coherent - since the
  DMA_BUF_IOCTL_SYNC landed that's no longer the case. But keep the text
  that for pte zapping exporters need to adjust the address space.
- Add a FIXME that kmap and the new begin/end stuff used by the SYNC
  ioctl don't really mix correctly. That's something I just realized
  while doing this doc rework.
- Augment function and structure docs like usual.

Cc: linux-doc@vger.kernel.org
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
  [sumits: fix cosmetic issues]
Link: http://patchwork.freedesktop.org/patch/msgid/20161209185309.1682-5-daniel.vetter@ffwll.ch
---
 Documentation/dma-buf-sharing.txt    | 213 -----------------------------------
 Documentation/driver-api/dma-buf.rst |   6 +
 drivers/dma-buf/dma-buf.c            | 122 ++++++++++++++++++++
 include/linux/dma-buf.h              |  91 +++++++++++++--
 4 files changed, 211 insertions(+), 221 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index dca2fb7ac3b4..74c99edb7976 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt
@@ -6,205 +6,6 @@
                  <sumit dot semwal at ti dot com>
 
 
-Kernel cpu access to a dma-buf buffer object
---------------------------------------------
-
-The motivation to allow cpu access from the kernel to a dma-buf object from the
-importers side are:
-- fallback operations, e.g. if the devices is connected to a usb bus and the
-  kernel needs to shuffle the data around first before sending it away.
-- full transparency for existing users on the importer side, i.e. userspace
-  should not notice the difference between a normal object from that subsystem
-  and an imported one backed by a dma-buf. This is really important for drm
-  opengl drivers that expect to still use all the existing upload/download
-  paths.
-
-Access to a dma_buf from the kernel context involves three steps:
-
-1. Prepare access, which invalidate any necessary caches and make the object
-   available for cpu access.
-2. Access the object page-by-page with the dma_buf map apis
-3. Finish access, which will flush any necessary cpu caches and free reserved
-   resources.
-
-1. Prepare access
-
-   Before an importer can access a dma_buf object with the cpu from the kernel
-   context, it needs to notify the exporter of the access that is about to
-   happen.
-
-   Interface:
-      int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
-				   enum dma_data_direction direction)
-
-   This allows the exporter to ensure that the memory is actually available for
-   cpu access - the exporter might need to allocate or swap-in and pin the
-   backing storage. The exporter also needs to ensure that cpu access is
-   coherent for the access direction. The direction can be used by the exporter
-   to optimize the cache flushing, i.e. access with a different direction (read
-   instead of write) might return stale or even bogus data (e.g. when the
-   exporter needs to copy the data to temporary storage).
-
-   This step might fail, e.g. in oom conditions.
-
-2. Accessing the buffer
-
-   To support dma_buf objects residing in highmem cpu access is page-based using
-   an api similar to kmap. Accessing a dma_buf is done in aligned chunks of
-   PAGE_SIZE size. Before accessing a chunk it needs to be mapped, which returns
-   a pointer in kernel virtual address space. Afterwards the chunk needs to be
-   unmapped again. There is no limit on how often a given chunk can be mapped
-   and unmapped, i.e. the importer does not need to call begin_cpu_access again
-   before mapping the same chunk again.
-
-   Interfaces:
-      void *dma_buf_kmap(struct dma_buf *, unsigned long);
-      void dma_buf_kunmap(struct dma_buf *, unsigned long, void *);
-
-   There are also atomic variants of these interfaces. Like for kmap they
-   facilitate non-blocking fast-paths. Neither the importer nor the exporter (in
-   the callback) is allowed to block when using these.
-
-   Interfaces:
-      void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
-      void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
-
-   For importers all the restrictions of using kmap apply, like the limited
-   supply of kmap_atomic slots. Hence an importer shall only hold onto at most 2
-   atomic dma_buf kmaps at the same time (in any given process context).
-
-   dma_buf kmap calls outside of the range specified in begin_cpu_access are
-   undefined. If the range is not PAGE_SIZE aligned, kmap needs to succeed on
-   the partial chunks at the beginning and end but may return stale or bogus
-   data outside of the range (in these partial chunks).
-
-   Note that these calls need to always succeed. The exporter needs to complete
-   any preparations that might fail in begin_cpu_access.
-
-   For some cases the overhead of kmap can be too high, a vmap interface
-   is introduced. This interface should be used very carefully, as vmalloc
-   space is a limited resources on many architectures.
-
-   Interfaces:
-      void *dma_buf_vmap(struct dma_buf *dmabuf)
-      void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
-
-   The vmap call can fail if there is no vmap support in the exporter, or if it
-   runs out of vmalloc space. Fallback to kmap should be implemented. Note that
-   the dma-buf layer keeps a reference count for all vmap access and calls down
-   into the exporter's vmap function only when no vmapping exists, and only
-   unmaps it once. Protection against concurrent vmap/vunmap calls is provided
-   by taking the dma_buf->lock mutex.
-
-3. Finish access
-
-   When the importer is done accessing the CPU, it needs to announce this to
-   the exporter (to facilitate cache flushing and unpinning of any pinned
-   resources). The result of any dma_buf kmap calls after end_cpu_access is
-   undefined.
-
-   Interface:
-      void dma_buf_end_cpu_access(struct dma_buf *dma_buf,
-				  enum dma_data_direction dir);
-
-
-Direct Userspace Access/mmap Support
-------------------------------------
-
-Being able to mmap an export dma-buf buffer object has 2 main use-cases:
-- CPU fallback processing in a pipeline and
-- supporting existing mmap interfaces in importers.
-
-1. CPU fallback processing in a pipeline
-
-   In many processing pipelines it is sometimes required that the cpu can access
-   the data in a dma-buf (e.g. for thumbnail creation, snapshots, ...). To avoid
-   the need to handle this specially in userspace frameworks for buffer sharing
-   it's ideal if the dma_buf fd itself can be used to access the backing storage
-   from userspace using mmap.
-
-   Furthermore Android's ION framework already supports this (and is otherwise
-   rather similar to dma-buf from a userspace consumer side with using fds as
-   handles, too). So it's beneficial to support this in a similar fashion on
-   dma-buf to have a good transition path for existing Android userspace.
-
-   No special interfaces, userspace simply calls mmap on the dma-buf fd, making
-   sure that the cache synchronization ioctl (DMA_BUF_IOCTL_SYNC) is *always*
-   used when the access happens. Note that DMA_BUF_IOCTL_SYNC can fail with
-   -EAGAIN or -EINTR, in which case it must be restarted.
-
-   Some systems might need some sort of cache coherency management e.g. when
-   CPU and GPU domains are being accessed through dma-buf at the same time. To
-   circumvent this problem there are begin/end coherency markers, that forward
-   directly to existing dma-buf device drivers vfunc hooks. Userspace can make
-   use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence
-   would be used like following:
-     - mmap dma-buf fd
-     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
-       to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
-       want (with the new data being consumed by the GPU or say scanout device)
-     - munmap once you don't need the buffer any more
-
-    For correctness and optimal performance, it is always required to use
-    SYNC_START and SYNC_END before and after, respectively, when accessing the
-    mapped address. Userspace cannot rely on coherent access, even when there
-    are systems where it just works without calling these ioctls.
-
-2. Supporting existing mmap interfaces in importers
-
-   Similar to the motivation for kernel cpu access it is again important that
-   the userspace code of a given importing subsystem can use the same interfaces
-   with a imported dma-buf buffer object as with a native buffer object. This is
-   especially important for drm where the userspace part of contemporary OpenGL,
-   X, and other drivers is huge, and reworking them to use a different way to
-   mmap a buffer rather invasive.
-
-   The assumption in the current dma-buf interfaces is that redirecting the
-   initial mmap is all that's needed. A survey of some of the existing
-   subsystems shows that no driver seems to do any nefarious thing like syncing
-   up with outstanding asynchronous processing on the device or allocating
-   special resources at fault time. So hopefully this is good enough, since
-   adding interfaces to intercept pagefaults and allow pte shootdowns would
-   increase the complexity quite a bit.
-
-   Interface:
-      int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
-		       unsigned long);
-
-   If the importing subsystem simply provides a special-purpose mmap call to set
-   up a mapping in userspace, calling do_mmap with dma_buf->file will equally
-   achieve that for a dma-buf object.
-
-3. Implementation notes for exporters
-
-   Because dma-buf buffers have invariant size over their lifetime, the dma-buf
-   core checks whether a vma is too large and rejects such mappings. The
-   exporter hence does not need to duplicate this check.
-
-   Because existing importing subsystems might presume coherent mappings for
-   userspace, the exporter needs to set up a coherent mapping. If that's not
-   possible, it needs to fake coherency by manually shooting down ptes when
-   leaving the cpu domain and flushing caches at fault time. Note that all the
-   dma_buf files share the same anon inode, hence the exporter needs to replace
-   the dma_buf file stored in vma->vm_file with it's own if pte shootdown is
-   required. This is because the kernel uses the underlying inode's address_space
-   for vma tracking (and hence pte tracking at shootdown time with
-   unmap_mapping_range).
-
-   If the above shootdown dance turns out to be too expensive in certain
-   scenarios, we can extend dma-buf with a more explicit cache tracking scheme
-   for userspace mappings. But the current assumption is that using mmap is
-   always a slower path, so some inefficiencies should be acceptable.
-
-   Exporters that shoot down mappings (for any reasons) shall not do any
-   synchronization at fault time with outstanding device operations.
-   Synchronization is an orthogonal issue to sharing the backing storage of a
-   buffer and hence should not be handled by dma-buf itself. This is explicitly
-   mentioned here because many people seem to want something like this, but if
-   different exporters handle this differently, buffer sharing can fail in
-   interesting ways depending upong the exporter (if userspace starts depending
-   upon this implicit synchronization).
-
 Other Interfaces Exposed to Userspace on the dma-buf FD
 ------------------------------------------------------
 
@@ -240,20 +41,6 @@ Miscellaneous notes
   the exporting driver to create a dmabuf fd must provide a way to let
   userspace control setting of O_CLOEXEC flag passed in to dma_buf_fd().
 
-- If an exporter needs to manually flush caches and hence needs to fake
-  coherency for mmap support, it needs to be able to zap all the ptes pointing
-  at the backing storage. Now linux mm needs a struct address_space associated
-  with the struct file stored in vma->vm_file to do that with the function
-  unmap_mapping_range. But the dma_buf framework only backs every dma_buf fd
-  with the anon_file struct file, i.e. all dma_bufs share the same file.
-
-  Hence exporters need to setup their own file (and address_space) association
-  by setting vma->vm_file and adjusting vma->vm_pgoff in the dma_buf mmap
-  callback. In the specific case of a gem driver the exporter could use the
-  shmem file already provided by gem (and set vm_pgoff = 0). Exporters can then
-  zap ptes by unmapping the corresponding range of the struct address_space
-  associated with their own file.
-
 References:
 [1] struct dma_buf_ops in include/linux/dma-buf.h
 [2] All interfaces mentioned above defined in include/linux/dma-buf.h
diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst
index 906d1532efad..92e417035e16 100644
--- a/Documentation/driver-api/dma-buf.rst
+++ b/Documentation/driver-api/dma-buf.rst
@@ -52,6 +52,12 @@ Basic Operation and Device DMA Access
 .. kernel-doc:: drivers/dma-buf/dma-buf.c
    :doc: dma buf device access
 
+CPU Access to DMA Buffer Objects
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/dma-buf/dma-buf.c
+   :doc: cpu access
+
 Kernel Functions and Structures Reference
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 09f948fd62ad..eae0846cbd95 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -640,6 +640,122 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
 }
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
 
+/**
+ * DOC: cpu access
+ *
+ * There are mutliple reasons for supporting CPU access to a dma buffer object:
+ *
+ * - Fallback operations in the kernel, for example when a device is connected
+ *   over USB and the kernel needs to shuffle the data around first before
+ *   sending it away. Cache coherency is handled by braketing any transactions
+ *   with calls to dma_buf_begin_cpu_access() and dma_buf_end_cpu_access()
+ *   access.
+ *
+ *   To support dma_buf objects residing in highmem cpu access is page-based
+ *   using an api similar to kmap. Accessing a dma_buf is done in aligned chunks
+ *   of PAGE_SIZE size. Before accessing a chunk it needs to be mapped, which
+ *   returns a pointer in kernel virtual address space. Afterwards the chunk
+ *   needs to be unmapped again. There is no limit on how often a given chunk
+ *   can be mapped and unmapped, i.e. the importer does not need to call
+ *   begin_cpu_access again before mapping the same chunk again.
+ *
+ *   Interfaces::
+ *      void \*dma_buf_kmap(struct dma_buf \*, unsigned long);
+ *      void dma_buf_kunmap(struct dma_buf \*, unsigned long, void \*);
+ *
+ *   There are also atomic variants of these interfaces. Like for kmap they
+ *   facilitate non-blocking fast-paths. Neither the importer nor the exporter
+ *   (in the callback) is allowed to block when using these.
+ *
+ *   Interfaces::
+ *      void \*dma_buf_kmap_atomic(struct dma_buf \*, unsigned long);
+ *      void dma_buf_kunmap_atomic(struct dma_buf \*, unsigned long, void \*);
+ *
+ *   For importers all the restrictions of using kmap apply, like the limited
+ *   supply of kmap_atomic slots. Hence an importer shall only hold onto at
+ *   max 2 atomic dma_buf kmaps at the same time (in any given process context).
+ *
+ *   dma_buf kmap calls outside of the range specified in begin_cpu_access are
+ *   undefined. If the range is not PAGE_SIZE aligned, kmap needs to succeed on
+ *   the partial chunks at the beginning and end but may return stale or bogus
+ *   data outside of the range (in these partial chunks).
+ *
+ *   Note that these calls need to always succeed. The exporter needs to
+ *   complete any preparations that might fail in begin_cpu_access.
+ *
+ *   For some cases the overhead of kmap can be too high, a vmap interface
+ *   is introduced. This interface should be used very carefully, as vmalloc
+ *   space is a limited resources on many architectures.
+ *
+ *   Interfaces::
+ *      void \*dma_buf_vmap(struct dma_buf \*dmabuf)
+ *      void dma_buf_vunmap(struct dma_buf \*dmabuf, void \*vaddr)
+ *
+ *   The vmap call can fail if there is no vmap support in the exporter, or if
+ *   it runs out of vmalloc space. Fallback to kmap should be implemented. Note
+ *   that the dma-buf layer keeps a reference count for all vmap access and
+ *   calls down into the exporter's vmap function only when no vmapping exists,
+ *   and only unmaps it once. Protection against concurrent vmap/vunmap calls is
+ *   provided by taking the dma_buf->lock mutex.
+ *
+ * - For full compatibility on the importer side with existing userspace
+ *   interfaces, which might already support mmap'ing buffers. This is needed in
+ *   many processing pipelines (e.g. feeding a software rendered image into a
+ *   hardware pipeline, thumbnail creation, snapshots, ...). Also, Android's ION
+ *   framework already supported this and for DMA buffer file descriptors to
+ *   replace ION buffers mmap support was needed.
+ *
+ *   There is no special interfaces, userspace simply calls mmap on the dma-buf
+ *   fd. But like for CPU access there's a need to braket the actual access,
+ *   which is handled by the ioctl (DMA_BUF_IOCTL_SYNC). Note that
+ *   DMA_BUF_IOCTL_SYNC can fail with -EAGAIN or -EINTR, in which case it must
+ *   be restarted.
+ *
+ *   Some systems might need some sort of cache coherency management e.g. when
+ *   CPU and GPU domains are being accessed through dma-buf at the same time.
+ *   To circumvent this problem there are begin/end coherency markers, that
+ *   forward directly to existing dma-buf device drivers vfunc hooks. Userspace
+ *   can make use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The
+ *   sequence would be used like following:
+ *
+ *     - mmap dma-buf fd
+ *     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
+ *       to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
+ *       want (with the new data being consumed by say the GPU or the scanout
+ *       device)
+ *     - munmap once you don't need the buffer any more
+ *
+ *    For correctness and optimal performance, it is always required to use
+ *    SYNC_START and SYNC_END before and after, respectively, when accessing the
+ *    mapped address. Userspace cannot rely on coherent access, even when there
+ *    are systems where it just works without calling these ioctls.
+ *
+ * - And as a CPU fallback in userspace processing pipelines.
+ *
+ *   Similar to the motivation for kernel cpu access it is again important that
+ *   the userspace code of a given importing subsystem can use the same
+ *   interfaces with a imported dma-buf buffer object as with a native buffer
+ *   object. This is especially important for drm where the userspace part of
+ *   contemporary OpenGL, X, and other drivers is huge, and reworking them to
+ *   use a different way to mmap a buffer rather invasive.
+ *
+ *   The assumption in the current dma-buf interfaces is that redirecting the
+ *   initial mmap is all that's needed. A survey of some of the existing
+ *   subsystems shows that no driver seems to do any nefarious thing like
+ *   syncing up with outstanding asynchronous processing on the device or
+ *   allocating special resources at fault time. So hopefully this is good
+ *   enough, since adding interfaces to intercept pagefaults and allow pte
+ *   shootdowns would increase the complexity quite a bit.
+ *
+ *   Interface::
+ *      int dma_buf_mmap(struct dma_buf \*, struct vm_area_struct \*,
+ *		       unsigned long);
+ *
+ *   If the importing subsystem simply provides a special-purpose mmap call to
+ *   set up a mapping in userspace, calling do_mmap with dma_buf->file will
+ *   equally achieve that for a dma-buf object.
+ */
+
 static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
 				      enum dma_data_direction direction)
 {
@@ -665,6 +781,10 @@ static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
  * @dmabuf:	[in]	buffer to prepare cpu access for.
  * @direction:	[in]	length of range for cpu access.
  *
+ * After the cpu access is complete the caller should call
+ * dma_buf_end_cpu_access(). Only when cpu access is braketed by both calls is
+ * it guaranteed to be coherent with other DMA access.
+ *
  * Can return negative error values, returns 0 on success.
  */
 int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
@@ -697,6 +817,8 @@ EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);
  * @dmabuf:	[in]	buffer to complete cpu access for.
  * @direction:	[in]	length of range for cpu access.
  *
+ * This terminates CPU access started with dma_buf_begin_cpu_access().
+ *
  * Can return negative error values, returns 0 on success.
  */
 int dma_buf_end_cpu_access(struct dma_buf *dmabuf,
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 6df170fb243f..57828154e440 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -39,10 +39,6 @@ struct dma_buf_attachment;
 
 /**
  * struct dma_buf_ops - operations possible on struct dma_buf
- * @begin_cpu_access: [optional] called before cpu access to invalidate cpu
- * 		      caches and allocate backing storage (if not yet done)
- * 		      respectively pin the object into memory.
- * @end_cpu_access: [optional] called after cpu access to flush caches.
  * @kmap_atomic: maps a page from the buffer into kernel address
  * 		 space, users may not block until the subsequent unmap call.
  * 		 This callback must not sleep.
@@ -50,10 +46,6 @@ struct dma_buf_attachment;
  * 		   This Callback must not sleep.
  * @kmap: maps a page from the buffer into kernel address space.
  * @kunmap: [optional] unmaps a page from the buffer.
- * @mmap: used to expose the backing storage to userspace. Note that the
- * 	  mapping needs to be coherent - if the exporter doesn't directly
- * 	  support this, it needs to fake coherency by shooting down any ptes
- * 	  when transitioning away from the cpu domain.
  * @vmap: [optional] creates a virtual mapping for the buffer into kernel
  *	  address space. Same restrictions as for vmap and friends apply.
  * @vunmap: [optional] unmaps a vmap from the buffer
@@ -164,13 +156,96 @@ struct dma_buf_ops {
 	 */
 	void (*release)(struct dma_buf *);
 
+	/**
+	 * @begin_cpu_access:
+	 *
+	 * This is called from dma_buf_begin_cpu_access() and allows the
+	 * exporter to ensure that the memory is actually available for cpu
+	 * access - the exporter might need to allocate or swap-in and pin the
+	 * backing storage. The exporter also needs to ensure that cpu access is
+	 * coherent for the access direction. The direction can be used by the
+	 * exporter to optimize the cache flushing, i.e. access with a different
+	 * direction (read instead of write) might return stale or even bogus
+	 * data (e.g. when the exporter needs to copy the data to temporary
+	 * storage).
+	 *
+	 * This callback is optional.
+	 *
+	 * FIXME: This is both called through the DMA_BUF_IOCTL_SYNC command
+	 * from userspace (where storage shouldn't be pinned to avoid handing
+	 * de-factor mlock rights to userspace) and for the kernel-internal
+	 * users of the various kmap interfaces, where the backing storage must
+	 * be pinned to guarantee that the atomic kmap calls can succeed. Since
+	 * there's no in-kernel users of the kmap interfaces yet this isn't a
+	 * real problem.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success or a negative error code on failure. This can for
+	 * example fail when the backing storage can't be allocated. Can also
+	 * return -ERESTARTSYS or -EINTR when the call has been interrupted and
+	 * needs to be restarted.
+	 */
 	int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction);
+
+	/**
+	 * @end_cpu_access:
+	 *
+	 * This is called from dma_buf_end_cpu_access() when the importer is
+	 * done accessing the CPU. The exporter can use this to flush caches and
+	 * unpin any resources pinned in @begin_cpu_access.
+	 * The result of any dma_buf kmap calls after end_cpu_access is
+	 * undefined.
+	 *
+	 * This callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success or a negative error code on failure. Can return
+	 * -ERESTARTSYS or -EINTR when the call has been interrupted and needs
+	 * to be restarted.
+	 */
 	int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction);
 	void *(*kmap_atomic)(struct dma_buf *, unsigned long);
 	void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *);
 	void *(*kmap)(struct dma_buf *, unsigned long);
 	void (*kunmap)(struct dma_buf *, unsigned long, void *);
 
+	/**
+	 * @mmap:
+	 *
+	 * This callback is used by the dma_buf_mmap() function
+	 *
+	 * Note that the mapping needs to be incoherent, userspace is expected
+	 * to braket CPU access using the DMA_BUF_IOCTL_SYNC interface.
+	 *
+	 * Because dma-buf buffers have invariant size over their lifetime, the
+	 * dma-buf core checks whether a vma is too large and rejects such
+	 * mappings. The exporter hence does not need to duplicate this check.
+	 * Drivers do not need to check this themselves.
+	 *
+	 * If an exporter needs to manually flush caches and hence needs to fake
+	 * coherency for mmap support, it needs to be able to zap all the ptes
+	 * pointing at the backing storage. Now linux mm needs a struct
+	 * address_space associated with the struct file stored in vma->vm_file
+	 * to do that with the function unmap_mapping_range. But the dma_buf
+	 * framework only backs every dma_buf fd with the anon_file struct file,
+	 * i.e. all dma_bufs share the same file.
+	 *
+	 * Hence exporters need to setup their own file (and address_space)
+	 * association by setting vma->vm_file and adjusting vma->vm_pgoff in
+	 * the dma_buf mmap callback. In the specific case of a gem driver the
+	 * exporter could use the shmem file already provided by gem (and set
+	 * vm_pgoff = 0). Exporters can then zap ptes by unmapping the
+	 * corresponding range of the struct address_space associated with their
+	 * own file.
+	 *
+	 * This callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
 
 	void *(*vmap)(struct dma_buf *);
-- 
cgit v1.2.3


From 56e3d1cd05cc7b24cfcae8714b0661bf607aaba3 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 15 Dec 2016 06:01:10 +0100
Subject: kref: prefer atomic_inc_not_zero to atomic_add_unless

On most platforms, there exists this ifdef:

 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)

This makes this patch functionally useless. However, on PPC, there is
actually an explicit definition of atomic_inc_not_zero with its own
assembly that is slightly more optimized than atomic_add_unless. So,
this patch changes kref to use atomic_inc_not_zero instead, for PPC and
any future platforms that might provide an explicit implementation.

This also puts this usage of kref more in line with a verbatim reading
of the examples in Paul McKenney's paper [1] in the section titled "2.4
Atomic Counting With Check and Release Memory Barrier", which uses
atomic_inc_not_zero.

[1] http://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2167.pdf

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161215050110.3241-1-Jason@zx2c4.com
---
 include/linux/kref.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index e15828fd71f1..62f0a84ae94e 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -133,6 +133,6 @@ static inline int kref_put_mutex(struct kref *kref,
  */
 static inline int __must_check kref_get_unless_zero(struct kref *kref)
 {
-	return atomic_add_unless(&kref->refcount, 1, 0);
+	return atomic_inc_not_zero(&kref->refcount);
 }
 #endif /* _KREF_H_ */
-- 
cgit v1.2.3


From cf4a7207b1cb4a3c3fe3aa11a83c9d673722a7f5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 22 Dec 2016 14:45:14 +0000
Subject: lib: Add a simple prime number generator

Prime numbers are interesting for testing components that use multiplies
and divides, such as testing DRM's struct drm_mm alignment computations.

v2: Move to lib/, add selftest
v3: Fix initial constants (exclude 0/1 from being primes)
v4: More RCU markup to keep 0day/sparse happy
v5: Fix RCU unwind on module exit, add to kselftests
v6: Tidy computation of bitmap size
v7: for_each_prime_number_from()
v8: Compose small-primes using BIT() for easier verification
v9: Move rcu dance entirely into callers.
v10: Improve quote for Betrand's Postulate (aka Chebyshev's theorem)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161222144514.3911-1-chris@chris-wilson.co.uk
---
 include/linux/prime_numbers.h                |  37 ++++
 lib/Kconfig                                  |   7 +
 lib/Makefile                                 |   2 +
 lib/prime_numbers.c                          | 314 +++++++++++++++++++++++++++
 tools/testing/selftests/lib/prime_numbers.sh |  15 ++
 5 files changed, 375 insertions(+)
 create mode 100644 include/linux/prime_numbers.h
 create mode 100644 lib/prime_numbers.c
 create mode 100755 tools/testing/selftests/lib/prime_numbers.sh

(limited to 'include/linux')

diff --git a/include/linux/prime_numbers.h b/include/linux/prime_numbers.h
new file mode 100644
index 000000000000..14ec4f567342
--- /dev/null
+++ b/include/linux/prime_numbers.h
@@ -0,0 +1,37 @@
+#ifndef __LINUX_PRIME_NUMBERS_H
+#define __LINUX_PRIME_NUMBERS_H
+
+#include <linux/types.h>
+
+bool is_prime_number(unsigned long x);
+unsigned long next_prime_number(unsigned long x);
+
+/**
+ * for_each_prime_number - iterate over each prime upto a value
+ * @prime: the current prime number in this iteration
+ * @max: the upper limit
+ *
+ * Starting from the first prime number 2 iterate over each prime number up to
+ * the @max value. On each iteration, @prime is set to the current prime number.
+ * @max should be less than ULONG_MAX to ensure termination. To begin with
+ * @prime set to 1 on the first iteration use for_each_prime_number_from()
+ * instead.
+ */
+#define for_each_prime_number(prime, max) \
+	for_each_prime_number_from((prime), 2, (max))
+
+/**
+ * for_each_prime_number_from - iterate over each prime upto a value
+ * @prime: the current prime number in this iteration
+ * @from: the initial value
+ * @max: the upper limit
+ *
+ * Starting from @from iterate over each successive prime number up to the
+ * @max value. On each iteration, @prime is set to the current prime number.
+ * @max should be less than ULONG_MAX, and @from less than @max, to ensure
+ * termination.
+ */
+#define for_each_prime_number_from(prime, from, max) \
+	for (prime = (from); prime <= (max); prime = next_prime_number(prime))
+
+#endif /* !__LINUX_PRIME_NUMBERS_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 260a80e313b9..1788a1f50d28 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -550,4 +550,11 @@ config STACKDEPOT
 config SBITMAP
 	bool
 
+config PRIME_NUMBERS
+	tristate "Prime number generator"
+	default n
+	help
+	  Provides a helper module to generate prime numbers. Useful for writing
+	  test code, especially when checking multiplication and divison.
+
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 50144a3aeebd..c664143fd917 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -197,6 +197,8 @@ obj-$(CONFIG_ASN1) += asn1_decoder.o
 
 obj-$(CONFIG_FONT_SUPPORT) += fonts/
 
+obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/prime_numbers.c b/lib/prime_numbers.c
new file mode 100644
index 000000000000..c9b3c29614aa
--- /dev/null
+++ b/lib/prime_numbers.c
@@ -0,0 +1,314 @@
+#define pr_fmt(fmt) "prime numbers: " fmt "\n"
+
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/prime_numbers.h>
+#include <linux/slab.h>
+
+#define bitmap_size(nbits) (BITS_TO_LONGS(nbits) * sizeof(unsigned long))
+
+struct primes {
+	struct rcu_head rcu;
+	unsigned long last, sz;
+	unsigned long primes[];
+};
+
+#if BITS_PER_LONG == 64
+static const struct primes small_primes = {
+	.last = 61,
+	.sz = 64,
+	.primes = {
+		BIT(2) |
+		BIT(3) |
+		BIT(5) |
+		BIT(7) |
+		BIT(11) |
+		BIT(13) |
+		BIT(17) |
+		BIT(19) |
+		BIT(23) |
+		BIT(29) |
+		BIT(31) |
+		BIT(37) |
+		BIT(41) |
+		BIT(43) |
+		BIT(47) |
+		BIT(53) |
+		BIT(59) |
+		BIT(61)
+	}
+};
+#elif BITS_PER_LONG == 32
+static const struct primes small_primes = {
+	.last = 31,
+	.sz = 32,
+	.primes = {
+		BIT(2) |
+		BIT(3) |
+		BIT(5) |
+		BIT(7) |
+		BIT(11) |
+		BIT(13) |
+		BIT(17) |
+		BIT(19) |
+		BIT(23) |
+		BIT(29) |
+		BIT(31)
+	}
+};
+#else
+#error "unhandled BITS_PER_LONG"
+#endif
+
+static DEFINE_MUTEX(lock);
+static const struct primes __rcu *primes = RCU_INITIALIZER(&small_primes);
+
+static unsigned long selftest_max;
+
+static bool slow_is_prime_number(unsigned long x)
+{
+	unsigned long y = int_sqrt(x);
+
+	while (y > 1) {
+		if ((x % y) == 0)
+			break;
+		y--;
+	}
+
+	return y == 1;
+}
+
+static unsigned long slow_next_prime_number(unsigned long x)
+{
+	while (x < ULONG_MAX && !slow_is_prime_number(++x))
+		;
+
+	return x;
+}
+
+static unsigned long clear_multiples(unsigned long x,
+				     unsigned long *p,
+				     unsigned long start,
+				     unsigned long end)
+{
+	unsigned long m;
+
+	m = 2 * x;
+	if (m < start)
+		m = roundup(start, x);
+
+	while (m < end) {
+		__clear_bit(m, p);
+		m += x;
+	}
+
+	return x;
+}
+
+static bool expand_to_next_prime(unsigned long x)
+{
+	const struct primes *p;
+	struct primes *new;
+	unsigned long sz, y;
+
+	/* Betrand's Postulate (or Chebyshev's theorem) states that if n > 3,
+	 * there is always at least one prime p between n and 2n - 2.
+	 * Equivalently, if n > 1, then there is always at least one prime p
+	 * such that n < p < 2n.
+	 *
+	 * http://mathworld.wolfram.com/BertrandsPostulate.html
+	 * https://en.wikipedia.org/wiki/Bertrand's_postulate
+	 */
+	sz = 2 * x;
+	if (sz < x)
+		return false;
+
+	sz = round_up(sz, BITS_PER_LONG);
+	new = kmalloc(sizeof(*new) + bitmap_size(sz), GFP_KERNEL);
+	if (!new)
+		return false;
+
+	mutex_lock(&lock);
+	p = rcu_dereference_protected(primes, lockdep_is_held(&lock));
+	if (x < p->last) {
+		kfree(new);
+		goto unlock;
+	}
+
+	/* Where memory permits, track the primes using the
+	 * Sieve of Eratosthenes. The sieve is to remove all multiples of known
+	 * primes from the set, what remains in the set is therefore prime.
+	 */
+	bitmap_fill(new->primes, sz);
+	bitmap_copy(new->primes, p->primes, p->sz);
+	for (y = 2UL; y < sz; y = find_next_bit(new->primes, sz, y + 1))
+		new->last = clear_multiples(y, new->primes, p->sz, sz);
+	new->sz = sz;
+
+	BUG_ON(new->last <= x);
+
+	rcu_assign_pointer(primes, new);
+	if (p != &small_primes)
+		kfree_rcu((struct primes *)p, rcu);
+
+unlock:
+	mutex_unlock(&lock);
+	return true;
+}
+
+static void free_primes(void)
+{
+	const struct primes *p;
+
+	mutex_lock(&lock);
+	p = rcu_dereference_protected(primes, lockdep_is_held(&lock));
+	if (p != &small_primes) {
+		rcu_assign_pointer(primes, &small_primes);
+		kfree_rcu((struct primes *)p, rcu);
+	}
+	mutex_unlock(&lock);
+}
+
+/**
+ * next_prime_number - return the next prime number
+ * @x: the starting point for searching to test
+ *
+ * A prime number is an integer greater than 1 that is only divisible by
+ * itself and 1.  The set of prime numbers is computed using the Sieve of
+ * Eratoshenes (on finding a prime, all multiples of that prime are removed
+ * from the set) enabling a fast lookup of the next prime number larger than
+ * @x. If the sieve fails (memory limitation), the search falls back to using
+ * slow trial-divison, up to the value of ULONG_MAX (which is reported as the
+ * final prime as a sentinel).
+ *
+ * Returns: the next prime number larger than @x
+ */
+unsigned long next_prime_number(unsigned long x)
+{
+	const struct primes *p;
+
+	rcu_read_lock();
+	p = rcu_dereference(primes);
+	while (x >= p->last) {
+		rcu_read_unlock();
+
+		if (!expand_to_next_prime(x))
+			return slow_next_prime_number(x);
+
+		rcu_read_lock();
+		p = rcu_dereference(primes);
+	}
+	x = find_next_bit(p->primes, p->last, x + 1);
+	rcu_read_unlock();
+
+	return x;
+}
+EXPORT_SYMBOL(next_prime_number);
+
+/**
+ * is_prime_number - test whether the given number is prime
+ * @x: the number to test
+ *
+ * A prime number is an integer greater than 1 that is only divisible by
+ * itself and 1. Internally a cache of prime numbers is kept (to speed up
+ * searching for sequential primes, see next_prime_number()), but if the number
+ * falls outside of that cache, its primality is tested using trial-divison.
+ *
+ * Returns: true if @x is prime, false for composite numbers.
+ */
+bool is_prime_number(unsigned long x)
+{
+	const struct primes *p;
+	bool result;
+
+	rcu_read_lock();
+	p = rcu_dereference(primes);
+	while (x >= p->sz) {
+		rcu_read_unlock();
+
+		if (!expand_to_next_prime(x))
+			return slow_is_prime_number(x);
+
+		rcu_read_lock();
+		p = rcu_dereference(primes);
+	}
+	result = test_bit(x, p->primes);
+	rcu_read_unlock();
+
+	return result;
+}
+EXPORT_SYMBOL(is_prime_number);
+
+static void dump_primes(void)
+{
+	const struct primes *p;
+	char *buf;
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+	rcu_read_lock();
+	p = rcu_dereference(primes);
+
+	if (buf)
+		bitmap_print_to_pagebuf(true, buf, p->primes, p->sz);
+	pr_info("primes.{last=%lu, .sz=%lu, .primes[]=...x%lx} = %s",
+		p->last, p->sz, p->primes[BITS_TO_LONGS(p->sz) - 1], buf);
+
+	rcu_read_unlock();
+
+	kfree(buf);
+}
+
+static int selftest(unsigned long max)
+{
+	unsigned long x, last;
+
+	if (!max)
+		return 0;
+
+	for (last = 0, x = 2; x < max; x++) {
+		bool slow = slow_is_prime_number(x);
+		bool fast = is_prime_number(x);
+
+		if (slow != fast) {
+			pr_err("inconsistent result for is-prime(%lu): slow=%s, fast=%s!",
+			       x, slow ? "yes" : "no", fast ? "yes" : "no");
+			goto err;
+		}
+
+		if (!slow)
+			continue;
+
+		if (next_prime_number(last) != x) {
+			pr_err("incorrect result for next-prime(%lu): expected %lu, got %lu",
+			       last, x, next_prime_number(last));
+			goto err;
+		}
+		last = x;
+	}
+
+	pr_info("selftest(%lu) passed, last prime was %lu", x, last);
+	return 0;
+
+err:
+	dump_primes();
+	return -EINVAL;
+}
+
+static int __init primes_init(void)
+{
+	return selftest(selftest_max);
+}
+
+static void __exit primes_exit(void)
+{
+	free_primes();
+}
+
+module_init(primes_init);
+module_exit(primes_exit);
+
+module_param_named(selftest, selftest_max, ulong, 0400);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
new file mode 100755
index 000000000000..da4cbcd766f5
--- /dev/null
+++ b/tools/testing/selftests/lib/prime_numbers.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# Checks fast/slow prime_number generation for inconsistencies
+
+if ! /sbin/modprobe -q -r prime_numbers; then
+	echo "prime_numbers: [SKIP]"
+	exit 77
+fi
+
+if /sbin/modprobe -q prime_numbers selftest=65536; then
+	/sbin/modprobe -q -r prime_numbers
+	echo "prime_numbers: ok"
+else
+	echo "prime_numbers: [FAIL]"
+	exit 1
+fi
-- 
cgit v1.2.3


From f641d3b536e559dd2bae9245ec2e7d86cdf623fd Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 29 Dec 2016 21:48:24 +0100
Subject: dma-buf: use preferred struct reference in kernel-doc

sed -e 's/\( \* .*\)struct &\([_a-z]*\)/\1\&struct \2/' -i

Originally I wasnt a friend of this style because I thought a
line-break between the "&struct" and "foo" part would break it. But a
quick test shows that " * &struct \n * foo\n" works pefectly well with
current kernel-doc. So time to mass-apply these changes!

Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1483044517-5770-4-git-send-email-daniel.vetter@ffwll.ch
---
 drivers/dma-buf/dma-buf.c | 6 +++---
 include/linux/dma-buf.h   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 91aff74ed092..ab814aff0a5b 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -128,7 +128,7 @@ static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence)
  * DOC: fence polling
  *
  * To support cross-device and cross-driver synchronization of buffer access
- * implicit fences (represented internally in the kernel with struct &fence) can
+ * implicit fences (represented internally in the kernel with &struct fence) can
  * be attached to a &dma_buf. The glue for that and a few related things are
  * provided in the &reservation_object structure.
  *
@@ -373,7 +373,7 @@ static inline int is_dma_buf_file(struct file *file)
  * Additionally, provide a name string for exporter; useful in debugging.
  *
  * @exp_info:	[in]	holds all the export related information provided
- *			by the exporter. see struct &dma_buf_export_info
+ *			by the exporter. see &struct dma_buf_export_info
  *			for further details.
  *
  * Returns, on success, a newly created dma_buf object, which wraps the
@@ -517,7 +517,7 @@ EXPORT_SYMBOL_GPL(dma_buf_get);
  *
  * If, as a result of this call, the refcount becomes 0, the 'release' file
  * operation related to this fd is called. It calls the release operation of
- * struct &dma_buf_ops in turn, and frees the memory allocated for dmabuf when
+ * &struct dma_buf_ops in turn, and frees the memory allocated for dmabuf when
  * exported.
  */
 void dma_buf_put(struct dma_buf *dmabuf)
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 57828154e440..4d61fc55278b 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -278,7 +278,7 @@ struct dma_buf_ops {
  * Shared dma buffers are reference counted using dma_buf_put() and
  * get_dma_buf().
  *
- * Device DMA access is handled by the separate struct &dma_buf_attachment.
+ * Device DMA access is handled by the separate &struct dma_buf_attachment.
  */
 struct dma_buf {
 	size_t size;
@@ -355,7 +355,7 @@ struct dma_buf_export_info {
  * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters
  * @name: export-info name
  *
- * DEFINE_DMA_BUF_EXPORT_INFO macro defines the struct &dma_buf_export_info,
+ * DEFINE_DMA_BUF_EXPORT_INFO macro defines the &struct dma_buf_export_info,
  * zeroes it out and pre-populates exp_name in it.
  */
 #define DEFINE_DMA_BUF_EXPORT_INFO(name)	\
-- 
cgit v1.2.3


From e9b4d7b56f293ed4de9ff7d16759d33492f83180 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 29 Dec 2016 21:48:25 +0100
Subject: dma-buf: Use recommended structure member reference

I just learned that &struct_name.member_name works and looks pretty
even. It doesn't (yet) link to the member directly though, which would
be really good for big structures or vfunc tables (where the
per-member kerneldoc tends to be long).

Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1483044517-5770-5-git-send-email-daniel.vetter@ffwll.ch
---
 drivers/dma-buf/dma-buf.c | 5 ++---
 include/linux/dma-buf.h   | 6 +++---
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index ab814aff0a5b..718f832a5c71 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -516,9 +516,8 @@ EXPORT_SYMBOL_GPL(dma_buf_get);
  * Uses file's refcounting done implicitly by fput().
  *
  * If, as a result of this call, the refcount becomes 0, the 'release' file
- * operation related to this fd is called. It calls the release operation of
- * &struct dma_buf_ops in turn, and frees the memory allocated for dmabuf when
- * exported.
+ * operation related to this fd is called. It calls &dma_buf_ops.release vfunc
+ * in turn, and frees the memory allocated for dmabuf when exported.
  */
 void dma_buf_put(struct dma_buf *dmabuf)
 {
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 4d61fc55278b..bfb3704fc6fc 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -66,8 +66,8 @@ struct dma_buf_ops {
 	 * is not the case, and the allocation cannot be moved, it should also
 	 * fail the attach operation.
 	 *
-	 * Any exporter-private housekeeping data can be stored in the priv
-	 * pointer of &dma_buf_attachment structure.
+	 * Any exporter-private housekeeping data can be stored in the
+	 * &dma_buf_attachment.priv pointer.
 	 *
 	 * This callback is optional.
 	 *
@@ -106,7 +106,7 @@ struct dma_buf_ops {
 	 *
 	 * Note that any specific buffer attributes required for this function
 	 * should get added to device_dma_parameters accessible via
-	 * device->dma_params from the &dma_buf_attachment. The @attach callback
+	 * &device.dma_params from the &dma_buf_attachment. The @attach callback
 	 * should also check these constraints.
 	 *
 	 * If this is being called for the first time, the exporter can now
-- 
cgit v1.2.3


From d6c99f4bf093a58d3ab47caaec74b81f18bc4e3f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 4 Jan 2017 14:12:21 +0000
Subject: dma-fence: Wrap querying the fence->status

The fence->status is an optional field that is only valid once the fence
has been signaled. (Driver may fill the fence->status with an error code
prior to calling dma_fence_signal().) Given the restriction upon its
validity, wrap querying of the fence->status into a helper
dma_fence_get_status().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20170104141222.6992-2-chris@chris-wilson.co.uk
---
 drivers/dma-buf/dma-fence.c  | 25 +++++++++++++++++++++++++
 drivers/dma-buf/sync_debug.c | 17 ++++++++---------
 drivers/dma-buf/sync_file.c  |  6 ++----
 include/linux/dma-fence.h    | 24 ++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index cb6eef308cd9..5e66dfece03c 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -281,6 +281,31 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
 }
 EXPORT_SYMBOL(dma_fence_add_callback);
 
+/**
+ * dma_fence_get_status - returns the status upon completion
+ * @fence: [in]	the dma_fence to query
+ *
+ * This wraps dma_fence_get_status_locked() to return the error status
+ * condition on a signaled fence. See dma_fence_get_status_locked() for more
+ * details.
+ *
+ * Returns 0 if the fence has not yet been signaled, 1 if the fence has
+ * been signaled without an error condition, or a negative error code
+ * if the fence has been completed in err.
+ */
+int dma_fence_get_status(struct dma_fence *fence)
+{
+	unsigned long flags;
+	int status;
+
+	spin_lock_irqsave(fence->lock, flags);
+	status = dma_fence_get_status_locked(fence);
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	return status;
+}
+EXPORT_SYMBOL(dma_fence_get_status);
+
 /**
  * dma_fence_remove_callback - remove a callback from the signaling list
  * @fence:	[in]	the fence to wait on
diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c
index 48b20e34fb6d..c769dc653b34 100644
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@@ -62,30 +62,29 @@ void sync_file_debug_remove(struct sync_file *sync_file)
 
 static const char *sync_status_str(int status)
 {
-	if (status == 0)
-		return "signaled";
+	if (status < 0)
+		return "error";
 
 	if (status > 0)
-		return "active";
+		return "signaled";
 
-	return "error";
+	return "active";
 }
 
 static void sync_print_fence(struct seq_file *s,
 			     struct dma_fence *fence, bool show)
 {
-	int status = 1;
 	struct sync_timeline *parent = dma_fence_parent(fence);
+	int status;
 
-	if (dma_fence_is_signaled_locked(fence))
-		status = fence->status;
+	status = dma_fence_get_status_locked(fence);
 
 	seq_printf(s, "  %s%sfence %s",
 		   show ? parent->name : "",
 		   show ? "_" : "",
 		   sync_status_str(status));
 
-	if (status <= 0) {
+	if (status) {
 		struct timespec64 ts64 =
 			ktime_to_timespec64(fence->timestamp);
 
@@ -136,7 +135,7 @@ static void sync_print_sync_file(struct seq_file *s,
 	int i;
 
 	seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name,
-		   sync_status_str(!dma_fence_is_signaled(sync_file->fence)));
+		   sync_status_str(dma_fence_get_status(sync_file->fence)));
 
 	if (dma_fence_is_array(sync_file->fence)) {
 		struct dma_fence_array *array = to_dma_fence_array(sync_file->fence);
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 07cb9b908f30..2321035f6204 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -373,10 +373,8 @@ static void sync_fill_fence_info(struct dma_fence *fence,
 		sizeof(info->obj_name));
 	strlcpy(info->driver_name, fence->ops->get_driver_name(fence),
 		sizeof(info->driver_name));
-	if (dma_fence_is_signaled(fence))
-		info->status = fence->status >= 0 ? 1 : fence->status;
-	else
-		info->status = 0;
+
+	info->status = dma_fence_get_status(fence);
 	info->timestamp_ns = ktime_to_ns(fence->timestamp);
 }
 
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index d51a7d23c358..19f7af905182 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -378,6 +378,30 @@ static inline struct dma_fence *dma_fence_later(struct dma_fence *f1,
 		return dma_fence_is_signaled(f2) ? NULL : f2;
 }
 
+/**
+ * dma_fence_get_status_locked - returns the status upon completion
+ * @fence: [in]	the dma_fence to query
+ *
+ * Drivers can supply an optional error status condition before they signal
+ * the fence (to indicate whether the fence was completed due to an error
+ * rather than success). The value of the status condition is only valid
+ * if the fence has been signaled, dma_fence_get_status_locked() first checks
+ * the signal state before reporting the error status.
+ *
+ * Returns 0 if the fence has not yet been signaled, 1 if the fence has
+ * been signaled without an error condition, or a negative error code
+ * if the fence has been completed in err.
+ */
+static inline int dma_fence_get_status_locked(struct dma_fence *fence)
+{
+	if (dma_fence_is_signaled_locked(fence))
+		return fence->status < 0 ? fence->status : 1;
+	else
+		return 0;
+}
+
+int dma_fence_get_status(struct dma_fence *fence);
+
 signed long dma_fence_wait_timeout(struct dma_fence *,
 				   bool intr, signed long timeout);
 signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
-- 
cgit v1.2.3


From a009e975da5c7d42a7f5eaadc54946eb5f76c9af Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 4 Jan 2017 14:12:22 +0000
Subject: dma-fence: Introduce drm_fence_set_error() helper

The dma_fence.error field (formerly known as dma_fence.status) is an
optional field that may be set by drivers before calling
dma_fence_signal(). The field can be used to indicate that the fence was
completed in err rather than with success, and is visible to other
consumers of the fence and to userspace via sync_file.

This patch renames the field from status to error so that its meaning is
hopefully more clear (and distinct from dma_fence_get_status() which is
a composite between the error state and signal state) and adds a helper
that validates the preconditions of when it is suitable to adjust the
error field.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20170104141222.6992-3-chris@chris-wilson.co.uk
---
 drivers/dma-buf/dma-fence.c |  2 +-
 include/linux/dma-fence.h   | 30 +++++++++++++++++++++++++-----
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 5e66dfece03c..a1bfc098ea10 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -566,7 +566,7 @@ dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
 	fence->context = context;
 	fence->seqno = seqno;
 	fence->flags = 0UL;
-	fence->status = 0;
+	fence->error = 0;
 
 	trace_dma_fence_init(fence);
 }
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 19f7af905182..6048fa404e57 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -47,7 +47,7 @@ struct dma_fence_cb;
  * can be compared to decide which fence would be signaled later.
  * @flags: A mask of DMA_FENCE_FLAG_* defined below
  * @timestamp: Timestamp when the fence was signaled.
- * @status: Optional, only valid if < 0, must be set before calling
+ * @error: Optional, only valid if < 0, must be set before calling
  * dma_fence_signal, indicates that the fence has completed with an error.
  *
  * the flags member must be manipulated and read using the appropriate
@@ -79,7 +79,7 @@ struct dma_fence {
 	unsigned seqno;
 	unsigned long flags;
 	ktime_t timestamp;
-	int status;
+	int error;
 };
 
 enum dma_fence_flag_bits {
@@ -133,7 +133,7 @@ struct dma_fence_cb {
  * or some failure occurred that made it impossible to enable
  * signaling. True indicates successful enabling.
  *
- * fence->status may be set in enable_signaling, but only when false is
+ * fence->error may be set in enable_signaling, but only when false is
  * returned.
  *
  * Calling dma_fence_signal before enable_signaling is called allows
@@ -145,7 +145,7 @@ struct dma_fence_cb {
  * the second time will be a noop since it was already signaled.
  *
  * Notes on signaled:
- * May set fence->status if returning true.
+ * May set fence->error if returning true.
  *
  * Notes on wait:
  * Must not be NULL, set to dma_fence_default_wait for default implementation.
@@ -395,13 +395,33 @@ static inline struct dma_fence *dma_fence_later(struct dma_fence *f1,
 static inline int dma_fence_get_status_locked(struct dma_fence *fence)
 {
 	if (dma_fence_is_signaled_locked(fence))
-		return fence->status < 0 ? fence->status : 1;
+		return fence->error ?: 1;
 	else
 		return 0;
 }
 
 int dma_fence_get_status(struct dma_fence *fence);
 
+/**
+ * dma_fence_set_error - flag an error condition on the fence
+ * @fence: [in]	the dma_fence
+ * @error: [in]	the error to store
+ *
+ * Drivers can supply an optional error status condition before they signal
+ * the fence, to indicate that the fence was completed due to an error
+ * rather than success. This must be set before signaling (so that the value
+ * is visible before any waiters on the signal callback are woken). This
+ * helper exists to help catching erroneous setting of #dma_fence.error.
+ */
+static inline void dma_fence_set_error(struct dma_fence *fence,
+				       int error)
+{
+	BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
+	BUG_ON(error >= 0 || error < -MAX_ERRNO);
+
+	fence->error = error;
+}
+
 signed long dma_fence_wait_timeout(struct dma_fence *,
 				   bool intr, signed long timeout);
 signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
-- 
cgit v1.2.3