From 702adba22433c175e8429a47760f35ca16caf1cd Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 17 Nov 2010 12:28:29 +0000 Subject: drm/ttm/radeon/nouveau: Kill the bo lock in favour of a bo device fence_lock The bo lock used only to protect the bo sync object members, and since it is a per bo lock, fencing a buffer list will see a lot of locks and unlocks. Replace it with a per-device lock that protects the sync object members on *all* bos. Reading and setting these members will always be very quick, so the risc of heavy lock contention is microscopic. Note that waiting for sync objects will always take place outside of this lock. The bo device fence lock will eventually be replaced with a seqlock / rcu mechanism so we can determine that a bo is idle under a rcu / read seqlock. However this change will allow us to batch fencing and unreserving of buffers with a minimal amount of locking. Signed-off-by: Thomas Hellstrom Reviewed-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/radeon/radeon_object.c') diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 1d067743fee0..e939cb6a91cc 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -369,11 +369,11 @@ void radeon_bo_list_fence(struct list_head *head, void *fence) list_for_each_entry(lobj, head, list) { bo = lobj->bo; - spin_lock(&bo->tbo.lock); + spin_lock(&bo->tbo.bdev->fence_lock); old_fence = (struct radeon_fence *)bo->tbo.sync_obj; bo->tbo.sync_obj = radeon_fence_ref(fence); bo->tbo.sync_obj_arg = NULL; - spin_unlock(&bo->tbo.lock); + spin_unlock(&bo->tbo.bdev->fence_lock); if (old_fence) { radeon_fence_unref(&old_fence); } -- cgit v1.2.3 From 147666fb3b93b8c484f562da33a37f886ddff768 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 17 Nov 2010 12:38:32 +0000 Subject: drm/radeon: Use the ttm execbuf utilities Rather than re-implementing in the Radeon driver, Use the execbuf / cs / pushbuf utilities that comes with TTM. This comes with an even greater benefit now that many spinlocks have been optimized away... Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon.h | 4 +-- drivers/gpu/drm/radeon/radeon_cs.c | 17 ++++++----- drivers/gpu/drm/radeon/radeon_object.c | 55 +++------------------------------- drivers/gpu/drm/radeon/radeon_object.h | 3 -- 4 files changed, 16 insertions(+), 63 deletions(-) (limited to 'drivers/gpu/drm/radeon/radeon_object.c') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 3a7095743d44..b1e073b7381f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -69,6 +69,7 @@ #include #include #include +#include #include "radeon_family.h" #include "radeon_mode.h" @@ -259,13 +260,12 @@ struct radeon_bo { }; struct radeon_bo_list { - struct list_head list; + struct ttm_validate_buffer tv; struct radeon_bo *bo; uint64_t gpu_offset; unsigned rdomain; unsigned wdomain; u32 tiling_flags; - bool reserved; }; /* diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 6d64a2705f12..35b5eb8fbe2a 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -77,13 +77,13 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs_ptr[i] = &p->relocs[i]; p->relocs[i].robj = p->relocs[i].gobj->driver_private; p->relocs[i].lobj.bo = p->relocs[i].robj; - p->relocs[i].lobj.rdomain = r->read_domains; p->relocs[i].lobj.wdomain = r->write_domain; + p->relocs[i].lobj.rdomain = r->read_domains; + p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle; p->relocs[i].flags = r->flags; - INIT_LIST_HEAD(&p->relocs[i].lobj.list); radeon_bo_list_add_object(&p->relocs[i].lobj, - &p->validated); + &p->validated); } } return radeon_bo_list_validate(&p->validated); @@ -189,10 +189,13 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) { unsigned i; - if (!error && parser->ib) { - radeon_bo_list_fence(&parser->validated, parser->ib->fence); - } - radeon_bo_list_unreserve(&parser->validated); + + if (!error && parser->ib) + ttm_eu_fence_buffer_objects(&parser->validated, + parser->ib->fence); + else + ttm_eu_backoff_reservation(&parser->validated); + if (parser->relocs != NULL) { for (i = 0; i < parser->nrelocs; i++) { if (parser->relocs[i].gobj) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index e939cb6a91cc..a8594d289bcf 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -293,34 +293,9 @@ void radeon_bo_list_add_object(struct radeon_bo_list *lobj, struct list_head *head) { if (lobj->wdomain) { - list_add(&lobj->list, head); + list_add(&lobj->tv.head, head); } else { - list_add_tail(&lobj->list, head); - } -} - -int radeon_bo_list_reserve(struct list_head *head) -{ - struct radeon_bo_list *lobj; - int r; - - list_for_each_entry(lobj, head, list){ - r = radeon_bo_reserve(lobj->bo, false); - if (unlikely(r != 0)) - return r; - lobj->reserved = true; - } - return 0; -} - -void radeon_bo_list_unreserve(struct list_head *head) -{ - struct radeon_bo_list *lobj; - - list_for_each_entry(lobj, head, list) { - /* only unreserve object we successfully reserved */ - if (lobj->reserved && radeon_bo_is_reserved(lobj->bo)) - radeon_bo_unreserve(lobj->bo); + list_add_tail(&lobj->tv.head, head); } } @@ -331,14 +306,11 @@ int radeon_bo_list_validate(struct list_head *head) u32 domain; int r; - list_for_each_entry(lobj, head, list) { - lobj->reserved = false; - } - r = radeon_bo_list_reserve(head); + r = ttm_eu_reserve_buffers(head); if (unlikely(r != 0)) { return r; } - list_for_each_entry(lobj, head, list) { + list_for_each_entry(lobj, head, tv.head) { bo = lobj->bo; if (!bo->pin_count) { domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain; @@ -361,25 +333,6 @@ int radeon_bo_list_validate(struct list_head *head) return 0; } -void radeon_bo_list_fence(struct list_head *head, void *fence) -{ - struct radeon_bo_list *lobj; - struct radeon_bo *bo; - struct radeon_fence *old_fence = NULL; - - list_for_each_entry(lobj, head, list) { - bo = lobj->bo; - spin_lock(&bo->tbo.bdev->fence_lock); - old_fence = (struct radeon_fence *)bo->tbo.sync_obj; - bo->tbo.sync_obj = radeon_fence_ref(fence); - bo->tbo.sync_obj_arg = NULL; - spin_unlock(&bo->tbo.bdev->fence_lock); - if (old_fence) { - radeon_fence_unref(&old_fence); - } - } -} - int radeon_bo_fbdev_mmap(struct radeon_bo *bo, struct vm_area_struct *vma) { diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index fd536751f865..22d4c237dea5 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -152,10 +152,7 @@ extern int radeon_bo_init(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev); extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, struct list_head *head); -extern int radeon_bo_list_reserve(struct list_head *head); -extern void radeon_bo_list_unreserve(struct list_head *head); extern int radeon_bo_list_validate(struct list_head *head); -extern void radeon_bo_list_fence(struct list_head *head, void *fence); extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, struct vm_area_struct *vma); extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo, -- cgit v1.2.3 From 99ee7fac189893c90145a22b86bbcfdc98f69a9c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 23 Nov 2010 11:47:49 +1000 Subject: drm/radeon: add initial tracepoint support. this adds a bo create, and fence seq tracking tracepoints. This is just an initial set to play around with, we should investigate what others we need would be useful. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/Makefile | 5 +- drivers/gpu/drm/radeon/radeon_fence.c | 4 ++ drivers/gpu/drm/radeon/radeon_object.c | 2 + drivers/gpu/drm/radeon/radeon_trace.h | 82 ++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_trace_points.c | 9 +++ 5 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/radeon/radeon_trace.h create mode 100644 drivers/gpu/drm/radeon/radeon_trace_points.c (limited to 'drivers/gpu/drm/radeon/radeon_object.c') diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 6cae4f2028d2..e97e6f842699 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -65,10 +65,13 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \ r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \ r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \ - evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o + evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \ + radeon_trace_points.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o radeon-$(CONFIG_ACPI) += radeon_acpi.o obj-$(CONFIG_DRM_RADEON)+= radeon.o + +CFLAGS_radeon_trace_points.o := -I$(src) \ No newline at end of file diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index daacb281dfaf..171b0b2e3a64 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -38,6 +38,7 @@ #include "drm.h" #include "radeon_reg.h" #include "radeon.h" +#include "radeon_trace.h" int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) { @@ -57,6 +58,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) } else radeon_fence_ring_emit(rdev, fence); + trace_radeon_fence_emit(rdev->ddev, fence->seq); fence->emited = true; list_del(&fence->list); list_add_tail(&fence->list, &rdev->fence_drv.emited); @@ -213,6 +215,7 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) retry: /* save current sequence used to check for GPU lockup */ seq = rdev->fence_drv.last_seq; + trace_radeon_fence_wait_begin(rdev->ddev, seq); if (intr) { radeon_irq_kms_sw_irq_get(rdev); r = wait_event_interruptible_timeout(rdev->fence_drv.queue, @@ -227,6 +230,7 @@ retry: radeon_fence_signaled(fence), timeout); radeon_irq_kms_sw_irq_put(rdev); } + trace_radeon_fence_wait_end(rdev->ddev, seq); if (unlikely(!radeon_fence_signaled(fence))) { /* we were interrupted for some reason and fence isn't * isn't signaled yet, resume wait diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index a8594d289bcf..8bdf0ba2983a 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -34,6 +34,7 @@ #include #include "radeon_drm.h" #include "radeon.h" +#include "radeon_trace.h" int radeon_ttm_init(struct radeon_device *rdev); @@ -137,6 +138,7 @@ retry: list_add_tail(&bo->list, &rdev->gem.objects); mutex_unlock(&bo->rdev->gem.mutex); } + trace_radeon_bo_create(bo); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h new file mode 100644 index 000000000000..eafd8160a155 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_trace.h @@ -0,0 +1,82 @@ +#if !defined(_RADEON_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _RADEON_TRACE_H_ + +#include +#include +#include + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM radeon +#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) +#define TRACE_INCLUDE_FILE radeon_trace + +TRACE_EVENT(radeon_bo_create, + TP_PROTO(struct radeon_bo *bo), + TP_ARGS(bo), + TP_STRUCT__entry( + __field(struct radeon_bo *, bo) + __field(u32, pages) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->pages = bo->tbo.num_pages; + ), + TP_printk("bo=%p, pages=%u", __entry->bo, __entry->pages) +); + +DECLARE_EVENT_CLASS(radeon_fence_request, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->seqno = seqno; + ), + + TP_printk("dev=%u, seqno=%u", __entry->dev, __entry->seqno) +); + +DEFINE_EVENT(radeon_fence_request, radeon_fence_emit, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno) +); + +DEFINE_EVENT(radeon_fence_request, radeon_fence_retire, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno) +); + +DEFINE_EVENT(radeon_fence_request, radeon_fence_wait_begin, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno) +); + +DEFINE_EVENT(radeon_fence_request, radeon_fence_wait_end, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno) +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include diff --git a/drivers/gpu/drm/radeon/radeon_trace_points.c b/drivers/gpu/drm/radeon/radeon_trace_points.c new file mode 100644 index 000000000000..8175993df84d --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_trace_points.c @@ -0,0 +1,9 @@ +/* Copyright Red Hat Inc 2010. + * Author : Dave Airlie + */ +#include +#include "radeon_drm.h" +#include "radeon.h" + +#define CREATE_TRACE_POINTS +#include "radeon_trace.h" -- cgit v1.2.3