intel-gem: Speed up tiled readpixels by tracking which pages have been flushed.

This is around 3x or so speedup, since we would read wide rows at a time, and
clflush each tile 8 times as a result.  We'll want code related to this anyway
when we do fault-based per-page clflushing for sw fallbacks.
main
Eric Anholt 2008-07-26 19:28:14 -07:00
parent 04ae66db1c
commit f85fd1b42d
2 changed files with 79 additions and 26 deletions

View File

@ -36,6 +36,12 @@ static int
i915_gem_object_set_domain(struct drm_gem_object *obj,
uint32_t read_domains,
uint32_t write_domain);
static int
i915_gem_object_set_domain_range(struct drm_gem_object *obj,
uint64_t offset,
uint64_t size,
uint32_t read_domains,
uint32_t write_domain);
int
i915_gem_set_domain(struct drm_gem_object *obj,
struct drm_file *file_priv,
@ -136,32 +142,11 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
mutex_lock(&dev->struct_mutex);
/* Do a partial equivalent of i915_gem_set_domain(CPU, 0), as
* we don't want to clflush whole objects to read a portion of them.
*
* The side effect of doing this is that repeated preads of the same
* contents would take extra clflush overhead, since we don't track
* flushedness on a page basis.
*/
if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) {
ret = i915_gem_object_wait_rendering(obj);
if (ret) {
drm_gem_object_unreference(obj);
mutex_unlock(&dev->struct_mutex);
return ret;
}
}
if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
int first_page = args->offset / PAGE_SIZE;
int last_page = (args->offset + args->size - 1) / PAGE_SIZE;
/* If we don't have the page list, the pages are unpinned
* and swappable, and thus should already be in the CPU domain.
*/
BUG_ON(obj_priv->page_list == NULL);
drm_ttm_cache_flush(&obj_priv->page_list[first_page],
last_page - first_page + 1);
ret = i915_gem_object_set_domain_range(obj, args->offset, args->size,
I915_GEM_DOMAIN_CPU, 0);
if (ret != 0) {
drm_gem_object_unreference(obj);
mutex_unlock(&dev->struct_mutex);
}
offset = args->offset;
@ -1383,7 +1368,17 @@ i915_gem_object_set_domain(struct drm_gem_object *obj,
if ((write_domain | flush_domains) != 0)
obj->write_domain = write_domain;
/* If we're invalidating the CPU domain, clear the per-page CPU
* domain list as well.
*/
if (obj_priv->page_cpu_valid != NULL &&
(obj->read_domains & I915_GEM_DOMAIN_CPU) &&
((read_domains & I915_GEM_DOMAIN_CPU) == 0)) {
memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
}
obj->read_domains = read_domains;
dev->invalidate_domains |= invalidate_domains;
dev->flush_domains |= flush_domains;
#if WATCH_BUF
@ -1395,6 +1390,57 @@ i915_gem_object_set_domain(struct drm_gem_object *obj,
return 0;
}
/**
* Set the read/write domain on a range of the object.
*
* Currently only implemented for CPU reads, otherwise drops to normal
* i915_gem_object_set_domain().
*/
static int
i915_gem_object_set_domain_range(struct drm_gem_object *obj,
uint64_t offset,
uint64_t size,
uint32_t read_domains,
uint32_t write_domain)
{
struct drm_i915_gem_object *obj_priv = obj->driver_private;
int ret, i;
if (obj->read_domains & I915_GEM_DOMAIN_CPU)
return 0;
if (read_domains != I915_GEM_DOMAIN_CPU ||
write_domain != 0)
return i915_gem_object_set_domain(obj,
read_domains, write_domain);
/* Wait on any GPU rendering to the object to be flushed. */
if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) {
ret = i915_gem_object_wait_rendering(obj);
if (ret)
return ret;
}
if (obj_priv->page_cpu_valid == NULL) {
obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE,
DRM_MEM_DRIVER);
}
/* Flush the cache on any pages that are still invalid from the CPU's
* perspective.
*/
for (i = offset / PAGE_SIZE; i < (offset + size - 1) / PAGE_SIZE; i++) {
if (obj_priv->page_cpu_valid[i])
continue;
drm_ttm_cache_flush(obj_priv->page_list + i, 1);
obj_priv->page_cpu_valid[i] = 1;
}
return 0;
}
/**
* Once all of the objects have been set in the proper domain,
* perform the necessary flush and invalidate operations.
@ -2097,6 +2143,7 @@ void i915_gem_free_object(struct drm_gem_object *obj)
i915_gem_object_unbind(obj);
drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
}

View File

@ -393,6 +393,12 @@ struct drm_i915_gem_object {
/** Current tiling mode for the object. */
uint32_t tiling_mode;
/**
* Flagging of which individual pages are valid in GEM_DOMAIN_CPU when
* GEM_DOMAIN_CPU is not in the object's read domain.
*/
uint8_t *page_cpu_valid;
};
/**