drm/i915/cmdparser: Only cache the dst vmap

For simplicity, we want to continue using a contiguous mapping of the command buffer, but we can reduce the number of vmappings we hold by switching over to a page-by-page copy from the user batch buffer to the shadow. The cost for saving one linear mapping is about 5% in trivial workloads - which is more or less the overhead in calling kmap_atomic(). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-34-chris@chris-wilson.co.uk
2025-03-06 20:59:54 +01:00 · 2016-08-18 17:17:13 +01:00 · 2016-08-18 17:17:13 +01:00 · ed13033f02
commit ed13033f02
parent 0b5372727b
1 changed files with 19 additions and 14 deletions
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@ -946,7 +946,8 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 {
 	unsigned int src_needs_clflush;
 	unsigned int dst_needs_clflush;
-	void *src, *dst;
+	void *dst, *ptr;
 	int offset, n;
 	int ret;
 	ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
@ -959,19 +960,12 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		goto unpin_src;
 	}
 	src = i915_gem_object_pin_map(src_obj, I915_MAP_WB);
 	if (IS_ERR(src)) {
 		dst = src;
 		goto unpin_dst;
 	}
 	dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
 	if (IS_ERR(dst))
-		goto unmap_src;
+		goto unpin_dst;
-	src += batch_start_offset;
+	ptr = dst;
-	if (src_needs_clflush)
+	offset = offset_in_page(batch_start_offset);
 		drm_clflush_virt_range(src, batch_len);
 	/* We can avoid clflushing partial cachelines before the write if we
 	 * only every write full cache-lines. Since we know that both the
@ -982,13 +976,24 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 	if (dst_needs_clflush & CLFLUSH_BEFORE)
 		batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size);
-	memcpy(dst, src, batch_len);
+	for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
 		int len = min_t(int, batch_len, PAGE_SIZE - offset);
 		void *vaddr;
 		vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n));
 		if (src_needs_clflush)
 			drm_clflush_virt_range(vaddr + offset, len);
 		memcpy(ptr, vaddr + offset, len);
 		kunmap_atomic(vaddr);
 		ptr += len;
 		batch_len -= len;
 		offset = 0;
 	}
 	/* dst_obj is returned with vmap pinned */
 	*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
 unmap_src:
 	i915_gem_object_unpin_map(src_obj);
 unpin_dst:
 	i915_gem_obj_finish_shmem_access(dst_obj);
 unpin_src: