2019-09-19 16:43:19 +02:00
|
|
|
From 7713f79b0a5473eb0b8456d36b99ae00815dd8a1 Mon Sep 17 00:00:00 2001
|
2019-07-09 20:32:28 +02:00
|
|
|
From: Eric Anholt <eric@anholt.net>
|
|
|
|
Date: Wed, 27 Mar 2019 17:44:40 -0700
|
2019-12-23 13:42:55 +01:00
|
|
|
Subject: [PATCH] drm/v3d: Add missing implicit synchronization.
|
2019-07-09 20:32:28 +02:00
|
|
|
|
|
|
|
It is the expectation of existing userspace (X11 + Mesa, in
|
|
|
|
particular) that jobs submitted to the kernel against a shared BO will
|
|
|
|
get implicitly synchronized by their submission order. If we want to
|
|
|
|
allow clever userspace to disable implicit synchronization, we should
|
|
|
|
do that under its own submit flag (as amdgpu and lima do).
|
|
|
|
|
|
|
|
Note that we currently only implicitly sync for the rendering pass,
|
|
|
|
not binning -- if you texture-from-pixmap in the binning vertex shader
|
|
|
|
(vertex coordinate generation), you'll miss out on synchronization.
|
|
|
|
|
|
|
|
Fixes flickering when multiple clients are running in parallel,
|
|
|
|
particularly GL apps and compositors.
|
|
|
|
|
|
|
|
Signed-off-by: Eric Anholt <eric@anholt.net>
|
|
|
|
---
|
|
|
|
drivers/gpu/drm/v3d/v3d_drv.h | 10 +---
|
|
|
|
drivers/gpu/drm/v3d/v3d_gem.c | 98 ++++++++++++++++++++++++++++++---
|
|
|
|
drivers/gpu/drm/v3d/v3d_sched.c | 45 ++-------------
|
|
|
|
3 files changed, 96 insertions(+), 57 deletions(-)
|
|
|
|
|
|
|
|
--- a/drivers/gpu/drm/v3d/v3d_drv.h
|
|
|
|
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
|
|
|
|
@@ -186,8 +186,9 @@ struct v3d_job {
|
|
|
|
struct v3d_bo **bo;
|
|
|
|
u32 bo_count;
|
|
|
|
|
|
|
|
- /* An optional fence userspace can pass in for the job to depend on. */
|
|
|
|
- struct dma_fence *in_fence;
|
|
|
|
+ struct dma_fence **deps;
|
|
|
|
+ int deps_count;
|
|
|
|
+ int deps_size;
|
|
|
|
|
|
|
|
/* v3d fence to be signaled by IRQ handler when the job is complete. */
|
|
|
|
struct dma_fence *irq_fence;
|
|
|
|
@@ -219,11 +220,6 @@ struct v3d_bin_job {
|
|
|
|
struct v3d_render_job {
|
|
|
|
struct v3d_job base;
|
|
|
|
|
|
|
|
- /* Optional fence for the binner, to depend on before starting
|
|
|
|
- * our job.
|
|
|
|
- */
|
|
|
|
- struct dma_fence *bin_done_fence;
|
|
|
|
-
|
|
|
|
/* GPU virtual addresses of the start/end of the CL job. */
|
|
|
|
u32 start, end;
|
|
|
|
|
|
|
|
--- a/drivers/gpu/drm/v3d/v3d_gem.c
|
|
|
|
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
|
|
|
|
@@ -218,6 +218,71 @@ v3d_unlock_bo_reservations(struct v3d_bo
|
|
|
|
ww_acquire_fini(acquire_ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
+static int
|
|
|
|
+v3d_add_dep(struct v3d_job *job, struct dma_fence *fence)
|
|
|
|
+{
|
|
|
|
+ if (!fence)
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ if (job->deps_size == job->deps_count) {
|
|
|
|
+ int new_deps_size = max(job->deps_size * 2, 4);
|
|
|
|
+ struct dma_fence **new_deps =
|
|
|
|
+ krealloc(job->deps, new_deps_size * sizeof(*new_deps),
|
|
|
|
+ GFP_KERNEL);
|
|
|
|
+ if (!new_deps) {
|
|
|
|
+ dma_fence_put(fence);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ job->deps = new_deps;
|
|
|
|
+ job->deps_size = new_deps_size;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ job->deps[job->deps_count++] = fence;
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * Adds the required implicit fences before executing the job
|
|
|
|
+ *
|
|
|
|
+ * Userspace (X11 + Mesa) requires that a job submitted against a shared BO
|
|
|
|
+ * from one fd will implicitly synchronize against previous jobs submitted
|
|
|
|
+ * against that BO from other fds.
|
|
|
|
+ *
|
|
|
|
+ * Currently we don't bother trying to track the shared BOs, and instead just
|
|
|
|
+ * sync everything. However, our synchronization is only for the render pass
|
|
|
|
+ * -- the binning stage (VS coordinate calculations) ignores implicit sync,
|
|
|
|
+ * since using shared buffers for texture coordinates seems unlikely, and
|
|
|
|
+ * implicitly syncing them would break bin/render parallelism. If we want to
|
|
|
|
+ * fix that, we should introduce a flag when VS texturing has been used in the
|
|
|
|
+ * binning stage, or a set of flags for which BOs are sampled during binning.
|
|
|
|
+ */
|
|
|
|
+static int
|
|
|
|
+v3d_add_implicit_fences(struct v3d_job *job, struct v3d_bo *bo)
|
|
|
|
+{
|
|
|
|
+ int i, ret, nr_fences;
|
|
|
|
+ struct dma_fence **fences;
|
|
|
|
+
|
|
|
|
+ ret = reservation_object_get_fences_rcu(bo->resv, NULL,
|
|
|
|
+ &nr_fences, &fences);
|
|
|
|
+ if (ret || !nr_fences)
|
|
|
|
+ return ret;
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < nr_fences; i++) {
|
|
|
|
+ ret = v3d_add_dep(job, fences[i]);
|
|
|
|
+ if (ret)
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* Free any remaining fences after error. */
|
|
|
|
+ for (; i < nr_fences; i++)
|
|
|
|
+ dma_fence_put(fences[i]);
|
|
|
|
+ kfree(fences);
|
|
|
|
+
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
/* Takes the reservation lock on all the BOs being referenced, so that
|
|
|
|
* at queue submit time we can update the reservations.
|
|
|
|
*
|
|
|
|
@@ -226,10 +291,11 @@ v3d_unlock_bo_reservations(struct v3d_bo
|
|
|
|
* to v3d, so we don't attach dma-buf fences to them.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
-v3d_lock_bo_reservations(struct v3d_bo **bos,
|
|
|
|
- int bo_count,
|
|
|
|
+v3d_lock_bo_reservations(struct v3d_job *job,
|
|
|
|
struct ww_acquire_ctx *acquire_ctx)
|
|
|
|
{
|
|
|
|
+ struct v3d_bo **bos = job->bo;
|
|
|
|
+ int bo_count = job->bo_count;
|
|
|
|
int contended_lock = -1;
|
|
|
|
int i, ret;
|
|
|
|
|
|
|
|
@@ -281,6 +347,13 @@ retry:
|
|
|
|
* before we commit the CL to the hardware.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < bo_count; i++) {
|
|
|
|
+ ret = v3d_add_implicit_fences(job, bos[i]);
|
|
|
|
+ if (ret) {
|
|
|
|
+ v3d_unlock_bo_reservations(bos, bo_count,
|
|
|
|
+ acquire_ctx);
|
|
|
|
+ return ret;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
ret = reservation_object_reserve_shared(bos[i]->resv);
|
|
|
|
if (ret) {
|
|
|
|
v3d_unlock_bo_reservations(bos, bo_count,
|
|
|
|
@@ -383,7 +456,10 @@ v3d_job_free(struct kref *ref)
|
|
|
|
}
|
|
|
|
kvfree(job->bo);
|
|
|
|
|
|
|
|
- dma_fence_put(job->in_fence);
|
|
|
|
+ for (i = 0; i < job->deps_count; i++)
|
|
|
|
+ dma_fence_put(job->deps[i]);
|
|
|
|
+ kfree(job->deps);
|
|
|
|
+
|
|
|
|
dma_fence_put(job->irq_fence);
|
|
|
|
dma_fence_put(job->done_fence);
|
|
|
|
|
|
|
|
@@ -464,15 +540,20 @@ v3d_job_init(struct v3d_dev *v3d, struct
|
|
|
|
struct v3d_job *job, void (*free)(struct kref *ref),
|
|
|
|
u32 in_sync)
|
|
|
|
{
|
|
|
|
+ struct dma_fence *in_fence = NULL;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
job->v3d = v3d;
|
|
|
|
job->free = free;
|
|
|
|
|
|
|
|
- ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &job->in_fence);
|
|
|
|
+ ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &in_fence);
|
|
|
|
if (ret == -EINVAL)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
+ ret = v3d_add_dep(job, in_fence);
|
|
|
|
+ if (ret)
|
|
|
|
+ return ret;
|
|
|
|
+
|
|
|
|
kref_init(&job->refcount);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
@@ -590,8 +671,7 @@ v3d_submit_cl_ioctl(struct drm_device *d
|
|
|
|
if (ret)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
- ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count,
|
|
|
|
- &acquire_ctx);
|
|
|
|
+ ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
|
|
|
|
if (ret)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
@@ -601,7 +681,8 @@ v3d_submit_cl_ioctl(struct drm_device *d
|
|
|
|
if (ret)
|
|
|
|
goto fail_unreserve;
|
|
|
|
|
|
|
|
- render->bin_done_fence = dma_fence_get(bin->base.done_fence);
|
|
|
|
+ ret = v3d_add_dep(&render->base,
|
|
|
|
+ dma_fence_get(bin->base.done_fence));
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
|
|
|
|
@@ -692,8 +773,7 @@ v3d_submit_tfu_ioctl(struct drm_device *
|
|
|
|
}
|
|
|
|
spin_unlock(&file_priv->table_lock);
|
|
|
|
|
|
|
|
- ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count,
|
|
|
|
- &acquire_ctx);
|
|
|
|
+ ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
|
|
|
|
if (ret)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
--- a/drivers/gpu/drm/v3d/v3d_sched.c
|
|
|
|
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
|
|
|
|
@@ -67,47 +67,10 @@ v3d_job_dependency(struct drm_sched_job
|
|
|
|
struct drm_sched_entity *s_entity)
|
|
|
|
{
|
|
|
|
struct v3d_job *job = to_v3d_job(sched_job);
|
|
|
|
- struct dma_fence *fence;
|
|
|
|
-
|
|
|
|
- fence = job->in_fence;
|
|
|
|
- if (fence) {
|
|
|
|
- job->in_fence = NULL;
|
|
|
|
- return fence;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return NULL;
|
|
|
|
-}
|
|
|
|
|
|
|
|
-/**
|
|
|
|
- * Returns the fences that the render job depends on, one by one.
|
|
|
|
- * v3d_job_run() won't be called until all of them have been signaled.
|
|
|
|
- */
|
|
|
|
-static struct dma_fence *
|
|
|
|
-v3d_render_job_dependency(struct drm_sched_job *sched_job,
|
|
|
|
- struct drm_sched_entity *s_entity)
|
|
|
|
-{
|
|
|
|
- struct v3d_render_job *job = to_render_job(sched_job);
|
|
|
|
- struct dma_fence *fence;
|
|
|
|
-
|
|
|
|
- fence = v3d_job_dependency(sched_job, s_entity);
|
|
|
|
- if (fence)
|
|
|
|
- return fence;
|
|
|
|
-
|
|
|
|
- /* If we had a bin job, the render job definitely depends on
|
|
|
|
- * it. We first have to wait for bin to be scheduled, so that
|
|
|
|
- * its done_fence is created.
|
|
|
|
- */
|
|
|
|
- fence = job->bin_done_fence;
|
|
|
|
- if (fence) {
|
|
|
|
- job->bin_done_fence = NULL;
|
|
|
|
- return fence;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /* XXX: Wait on a fence for switching the GMP if necessary,
|
|
|
|
- * and then do so.
|
|
|
|
- */
|
|
|
|
-
|
|
|
|
- return fence;
|
|
|
|
+ if (!job->deps_count)
|
|
|
|
+ return NULL;
|
|
|
|
+ return job->deps[--job->deps_count];
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
|
|
|
|
@@ -329,7 +292,7 @@ static const struct drm_sched_backend_op
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct drm_sched_backend_ops v3d_render_sched_ops = {
|
|
|
|
- .dependency = v3d_render_job_dependency,
|
|
|
|
+ .dependency = v3d_job_dependency,
|
|
|
|
.run_job = v3d_render_job_run,
|
|
|
|
.timedout_job = v3d_render_job_timedout,
|
|
|
|
.free_job = v3d_job_free,
|