Skip to content

Commit

Permalink
drm/i915/ttm: Implement asynchronous TTM moves
Browse files Browse the repository at this point in the history
Don't wait sync while migrating, but rather make the GPU blit await the
dependencies and add a moving fence to the object.

This also enables asynchronous VRAM management in that on eviction,
rather than waiting for the moving fence to expire before freeing VRAM,
it is freed immediately and the fence is stored with the VRAM manager and
handed out to newly allocated objects to await before clears and swapins,
or for kernel objects before setting up gpu vmas or mapping.

To collect dependencies before migrating, add a set of utilities that
coalesce these to a single dma_fence.

What is still missing for fully asynchronous operation is asynchronous vma
unbinding, which is still to be implemented.

This commit substantially reduces execution time in the gem_lmem_swapping
test.

v2:
- Make a couple of functions static.
v4:
- Fix some style issues (Matthew Auld)
- Audit and add more checks for ghost objects (Matthew Auld)
- Add more documentation for the i915_deps utility (Mattew Auld)
- Simplify the i915_deps_sync() function
v6:
- Re-check for fence signaled before returning -EBUSY (Matthew Auld)
- Use dma_resv_iter_is_exclusive() (Matthew Auld)
- Await all dma-resv fences before a migration blit (Matthew Auld)

Signed-off-by: Thomas Hellström <[email protected]>
Reviewed-by: Matthew Auld <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
  • Loading branch information
Thomas Hellström committed Nov 25, 2021
1 parent 004746e commit 6385eb7
Show file tree
Hide file tree
Showing 4 changed files with 348 additions and 32 deletions.
32 changes: 26 additions & 6 deletions drivers/gpu/drm/i915/gem/i915_gem_ttm.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,10 +248,13 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
struct ttm_resource_manager *man =
ttm_manager_type(bo->bdev, bo->resource->mem_type);
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
enum ttm_caching caching = i915_ttm_select_tt_caching(obj);
enum ttm_caching caching;
struct i915_ttm_tt *i915_tt;
int ret;

if (!obj)
return NULL;

i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL);
if (!i915_tt)
return NULL;
Expand All @@ -260,6 +263,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
man->use_tt)
page_flags |= TTM_TT_FLAG_ZERO_ALLOC;

caching = i915_ttm_select_tt_caching(obj);
if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
page_flags |= TTM_TT_FLAG_EXTERNAL |
TTM_TT_FLAG_EXTERNAL_MAPPABLE;
Expand Down Expand Up @@ -326,6 +330,9 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);

if (!obj)
return false;

/*
* EXTERNAL objects should never be swapped out by TTM, instead we need
* to handle that ourselves. TTM will already skip such objects for us,
Expand Down Expand Up @@ -552,8 +559,12 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
int ret = i915_ttm_move_notify(bo);
int ret;

if (!obj)
return;

ret = i915_ttm_move_notify(bo);
GEM_WARN_ON(ret);
GEM_WARN_ON(obj->ttm.cached_io_rsgt);
if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
Expand All @@ -575,17 +586,23 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
unsigned long page_offset)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
unsigned long base = obj->mm.region->iomap.base - obj->mm.region->region.start;
struct scatterlist *sg;
unsigned long base;
unsigned int ofs;

GEM_BUG_ON(!obj);
GEM_WARN_ON(bo->ttm);

base = obj->mm.region->iomap.base - obj->mm.region->region.start;
sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true);

return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs;
}

/*
* All callbacks need to take care not to downcast a struct ttm_buffer_object
* without checking its subclass, since it might be a TTM ghost object.
*/
static struct ttm_device_funcs i915_ttm_bo_driver = {
.ttm_tt_create = i915_ttm_tt_create,
.ttm_tt_populate = i915_ttm_tt_populate,
Expand Down Expand Up @@ -847,13 +864,16 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj)
static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
{
struct vm_area_struct *area = vmf->vma;
struct drm_i915_gem_object *obj =
i915_ttm_to_gem(area->vm_private_data);
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
struct ttm_buffer_object *bo = area->vm_private_data;
struct drm_device *dev = bo->base.dev;
struct drm_i915_gem_object *obj;
vm_fault_t ret;
int idx;

obj = i915_ttm_to_gem(bo);
if (!obj)
return VM_FAULT_SIGBUS;

/* Sanity check that we allow writing into this object */
if (unlikely(i915_gem_object_is_readonly(obj) &&
area->vm_flags & VM_WRITE))
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gem/i915_gem_ttm.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo);
static inline struct drm_i915_gem_object *
i915_ttm_to_gem(struct ttm_buffer_object *bo)
{
if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy))
if (bo->destroy != i915_ttm_bo_destroy)
return NULL;

return container_of(bo, struct drm_i915_gem_object, __do_not_access);
Expand Down
Loading

0 comments on commit 6385eb7

Please sign in to comment.