summaryrefslogtreecommitdiff
authorChris Wilson <chris@chris-wilson.co.uk>2012-04-15 23:35:06 (GMT)
committer Chris Wilson <chris@chris-wilson.co.uk>2012-04-16 09:05:39 (GMT)
commitb377b69ade5dc3c4af049f7220a6209599a8f854 (patch) (side-by-side diff)
tree09af54f7a0dd707d6c610e995dfade8ff31cc516
parentcfeb329bba022c06b11dc11ef759e1e25434bb77 (diff)
downloadlinux-2.6-845g.zip
linux-2.6-845g.tar.gz
drm/i915: Perform a chipset flush just once before execution845g
Accumulate all the CPU flushes over the course of pwriting upload buffers into a single flush before the submission of the next batch. Anything that is pinned into the display plane (or assumed to be) is immediately flushed. Note one functional change in the patch is *not* to issue a chipset flush after flushing the CPU write domain for a snoopable object. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h8
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c50
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c6
3 files changed, 48 insertions, 16 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 85aa9bc..dee22dd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -707,6 +707,12 @@ typedef struct drm_i915_private {
bool interruptible;
/**
+ * Do we have pending writes from the CPU to the GPU
+ * that need to be flushed?
+ */
+ bool gtt_chipset_flush;
+
+ /**
* Flag if the X Server, and thus DRM, is not currently in
* control of the device.
*
@@ -1309,7 +1315,7 @@ void i915_gem_retire_requests(struct drm_device *dev);
void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
void i915_gem_reset(struct drm_device *dev);
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
+bool i915_gem_clflush_object(struct drm_i915_gem_object *obj);
int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
uint32_t read_domains,
uint32_t write_domain);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b453471..964d15c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,7 +38,8 @@
static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
+static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+ bool flush);
static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
unsigned alignment,
bool map_and_fenceable,
@@ -730,6 +731,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
struct drm_i915_gem_pwrite *args,
struct drm_file *file)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
ssize_t remain;
loff_t offset;
@@ -851,13 +853,18 @@ out:
/* and flush dirty cachelines in case the object isn't in the cpu write
* domain anymore. */
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
- i915_gem_clflush_object(obj);
- intel_gtt_chipset_flush();
+ if (i915_gem_clflush_object(obj))
+ needs_clflush_after = true;
}
}
- if (needs_clflush_after)
- intel_gtt_chipset_flush();
+ if (needs_clflush_after) {
+ if (obj->pin_count) {
+ intel_gtt_chipset_flush();
+ dev_priv->mm.gtt_chipset_flush = false;
+ } else
+ dev_priv->mm.gtt_chipset_flush = true;
+ }
return ret;
}
@@ -1023,7 +1030,7 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
/* Pinned buffers may be scanout, so flush the cache */
if (obj->pin_count)
- i915_gem_object_flush_cpu_write_domain(obj);
+ i915_gem_object_flush_cpu_write_domain(obj, true);
drm_gem_object_unreference(&obj->base);
unlock:
@@ -2871,7 +2878,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
return 0;
}
-void
+bool
i915_gem_clflush_object(struct drm_i915_gem_object *obj)
{
/* If we don't have a page list set up, then we're not pinned
@@ -2879,7 +2886,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
* again at bind time.
*/
if (obj->pages == NULL)
- return;
+ return false;
/* If the GPU is snooping the contents of the CPU cache,
* we do not need to manually clear the CPU cache lines. However,
@@ -2891,13 +2898,14 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
*/
if (obj->cache_level != I915_CACHE_NONE) {
obj->cache_dirty = obj->cache_level;
- return;
+ return false;
}
trace_i915_gem_object_clflush(obj);
drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
obj->cache_dirty = I915_CACHE_NONE;
+ return true;
}
/** Flushes any GPU write domain for the object if it's dirty. */
@@ -2940,15 +2948,22 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
/** Flushes the CPU write domain for the object if it's dirty. */
static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
+i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+ bool flush)
{
uint32_t old_write_domain;
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
return;
- i915_gem_clflush_object(obj);
- intel_gtt_chipset_flush();
+ if (i915_gem_clflush_object(obj)) {
+ struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+ if (flush)
+ intel_gtt_chipset_flush();
+ else
+ dev_priv->mm.gtt_chipset_flush = true;
+ }
+
old_write_domain = obj->base.write_domain;
obj->base.write_domain = 0;
@@ -2987,7 +3002,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
return ret;
}
- i915_gem_object_flush_cpu_write_domain(obj);
+ i915_gem_object_flush_cpu_write_domain(obj, obj->pin_count);
old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains;
@@ -3130,7 +3145,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- i915_gem_object_flush_cpu_write_domain(obj);
+ i915_gem_object_flush_cpu_write_domain(obj, true);
old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains;
@@ -3388,7 +3403,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
/* XXX - flush the CPU caches for pinned objects
* as the X server doesn't manage domains yet
*/
- i915_gem_object_flush_cpu_write_domain(obj);
+ i915_gem_object_flush_cpu_write_domain(obj, true);
args->offset = obj->gtt_offset;
out:
drm_gem_object_unreference(&obj->base);
@@ -4034,9 +4049,11 @@ void i915_gem_free_all_phys_object(struct drm_device *dev)
i915_gem_free_phys_object(dev, i);
}
+
void i915_gem_detach_phys_object(struct drm_device *dev,
struct drm_i915_gem_object *obj)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
char *vaddr;
int i;
@@ -4062,6 +4079,7 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
}
}
intel_gtt_chipset_flush();
+ dev_priv->mm.gtt_chipset_flush = false;
obj->phys_obj->cur_obj = NULL;
obj->phys_obj = NULL;
@@ -4131,6 +4149,7 @@ i915_gem_phys_pwrite(struct drm_device *dev,
struct drm_i915_gem_pwrite *args,
struct drm_file *file_priv)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
@@ -4149,6 +4168,7 @@ i915_gem_phys_pwrite(struct drm_device *dev,
}
intel_gtt_chipset_flush();
+ dev_priv->mm.gtt_chipset_flush = false;
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b825c06..61d8daa 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -878,12 +878,18 @@ static int
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
struct list_head *objects)
{
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
struct drm_i915_gem_object *obj;
struct change_domains cd;
int ret;
memset(&cd, 0, sizeof(cd));
+ if (dev_priv->mm.gtt_chipset_flush) {
+ cd.flush_domains = I915_GEM_DOMAIN_CPU;
+ dev_priv->mm.gtt_chipset_flush = false;
+ }
+
/* We need to invalidate the BLT's prefetched entries after
* updating the GATT (as the hardware invalidates the wrong PTEs).
*/