plan b: multi-cs submitsradeon-cs-setup

author: Dave Airlie <airlied@redhat.com> 2010-07-06 16:54:16 +1000
committer: Dave Airlie <airlied@redhat.com> 2010-07-06 16:54:16 +1000
commit: 77668d621f48cb638b1408430b64cc7bd614a5db (patch)
tree: b0fa790a200c2d1b25157b5c46c8d40e9ebd0239
parent: ba7daacc1438eac912101d642ec328ae5e63fe20 (diff)
6 files changed, 44 insertions, 45 deletions
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index dfd80d505dd6..c47fd37d4fc1 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -182,11 +182,11 @@ void r300_fence_ring_emit(struct radeon_device *rdev,
 	/* Who ever call radeon_fence_emit should call ring_lock and ask
 	 * for enough space (today caller are ib schedule and buffer move) */
 	/* Write SC register so SC & US assert idle */
-	if (fence->flush == false)
-		goto just_fence;
 
 	radeon_ring_write(rdev, PACKET0(R300_RE_SCISSORS_TL, 0));
 	radeon_ring_write(rdev, 0);
+	if (fence->flush == false)
+		goto just_fence;
 	radeon_ring_write(rdev, PACKET0(R300_RE_SCISSORS_BR, 0));
 	radeon_ring_write(rdev, 0);
 	/* Flush 3D cache */
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 9f6df777ee61..ba4c5314d386 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -1498,7 +1498,7 @@ int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
 	/* Copy the packet into the IB, the parser will read from the
 	 * input memory (cached) and write to the IB (which can be
 	 * uncached). */
-	ib_chunk = &parser.chunks[parser.chunk_ib_idx];
+	ib_chunk = &parser.chunks[parser.chunk_final_ib_idx];
 	ib_chunk->ib = &fake_ib;
 	ib_chunk->ib->length_dw = ib_chunk->length_dw;
 	*l = ib_chunk->length_dw;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5ac36e048c5b..2d7363081174 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -523,9 +523,10 @@ struct radeon_cs_parser {
 	struct radeon_cs_reloc	**relocs_ptr;
 	struct list_head	validated;
 	/* indices of various chunks */
-	int			chunk_ib_idx;
 	int			chunk_relocs_idx;
-	int			chunk_setup_idx;
+	int			chunk_final_ib_idx;
+	int			chunk_ib_idx[4];
+	int                     num_ib_chunks;
 	void			*track;
 	unsigned		family;
 	int parser_error;
@@ -1084,7 +1085,6 @@ struct radeon_device {
 	bool powered_down;
 	struct notifier_block acpi_nb;
 
-	struct drm_file *last_cs_filp;
 	void *current_track;
 };
 
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index fd6d485bef70..2e3e77080cc2 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -100,21 +100,23 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 	}
 	/* get chunks */
 	INIT_LIST_HEAD(&p->validated);
-	p->chunk_ib_idx = -1;
-	p->chunk_setup_idx = -1;
+	p->chunk_final_ib_idx = -1;
 	p->chunk_relocs_idx = -1;
 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
 	if (p->chunks_array == NULL) {
+		DRM_ERROR("fail alloc chunks array %d\n", cs->num_chunks);
 		return -ENOMEM;
 	}
 	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
 	if (DRM_COPY_FROM_USER(p->chunks_array, chunk_array_ptr,
 			       sizeof(uint64_t)*cs->num_chunks)) {
+		DRM_ERROR("failed copy chun array %d\n", cs->num_chunks);
 		return -EFAULT;
 	}
 	p->nchunks = cs->num_chunks;
 	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
 	if (p->chunks == NULL) {
+		DRM_ERROR("failed calloc cunks %d\n", cs->num_chunks);
 		return -ENOMEM;
 	}
 	for (i = 0; i < p->nchunks; i++) {
@@ -137,23 +139,23 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 			p->chunk_relocs_idx = i;
 		}
 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
-			p->chunk_ib_idx = i;
 			/* zero length IB isn't useful */
-			if (p->chunks[i].length_dw == 0)
+			if ((p->chunks[i].length_dw == 0) || (p->chunks[i].length_dw > (16 * 1024))) {
+				DRM_ERROR("ib chunk %d/%d too large %d\n", i, p->num_ib_chunks, p->chunks[i].length_dw);
 				return -EINVAL;
+			}
+			p->chunk_ib_idx[p->num_ib_chunks++] = i;
+			p->chunk_final_ib_idx = i;
 		}
-		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB_SETUP)
-			if (p->chunks[i].length_dw > 0)
-				p->chunk_setup_idx = i;
 
-		p->chunks[i].length_dw = user_chunk.length_dw;
 		p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data;
 
 		cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data;
-		if (p->chunks[i].chunk_id != RADEON_CHUNK_ID_IB && p->chunks[i].chunk_id != RADEON_CHUNK_ID_IB_SETUP) {
+		if (p->chunks[i].chunk_id != RADEON_CHUNK_ID_IB) {
 			size = p->chunks[i].length_dw * sizeof(uint32_t);
 			p->chunks[i].kdata = kmalloc(size, GFP_KERNEL);
 			if (p->chunks[i].kdata == NULL) {
+				DRM_ERROR("failed kmalloc non-ib chunk failed %d\n", size);
 				return -ENOMEM;
 			}
 			if (DRM_COPY_FROM_USER(p->chunks[i].kdata,
@@ -166,6 +168,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 			if (p->chunks[i].kpage[0] == NULL || p->chunks[i].kpage[1] == NULL) {
 				kfree(p->chunks[i].kpage[0]);
 				kfree(p->chunks[i].kpage[1]);
+				DRM_ERROR("failed alloc pages\n");
 				return -ENOMEM;
 			}
 			p->chunks[i].kpage_idx[0] = -1;
@@ -174,11 +177,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 			p->chunks[i].last_page_index = ((p->chunks[i].length_dw * 4) - 1) / PAGE_SIZE;
 		}
 	}
-	if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) {
-		DRM_ERROR("cs IB too big: %d\n",
-			  p->chunks[p->chunk_ib_idx].length_dw);
-		return -EINVAL;
-	}
+
 	return 0;
 }
 
@@ -194,8 +193,8 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
 {
 	unsigned i;
 
-	if (!error && parser->chunks[parser->chunk_ib_idx].ib)
-		radeon_bo_list_fence(&parser->validated, parser->chunks[parser->chunk_ib_idx].ib->fence);
+	if (!error && parser->chunks[parser->chunk_final_ib_idx].ib)
+		radeon_bo_list_fence(&parser->validated, parser->chunks[parser->chunk_final_ib_idx].ib->fence);
 
 	radeon_bo_list_unreserve(&parser->validated);
 	if (parser->relocs != NULL) {
@@ -219,32 +218,34 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
 static int radeon_cs_parser_chunk(struct radeon_cs_parser *parser, int chunk_id, bool flush)
 {
 	struct radeon_device *rdev = parser->rdev;
-	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_chunk *chunk;
 	int r;
 
-	ib_chunk = &parser->chunks[chunk_id];
-	r = radeon_ib_get(parser->rdev, &ib_chunk->ib);
+	chunk = &parser->chunks[chunk_id];
+	r = radeon_ib_get(parser->rdev, &chunk->ib);
 	if (r) {
 		DRM_ERROR("Failed to get ib !\n");
 		return r;
 	}
-	ib_chunk->ib->fence->flush = flush;
+	chunk->ib->fence->flush = flush;
 
-	ib_chunk->ib->length_dw = ib_chunk->length_dw;
+	if (chunk->parser->num_ib_chunks > 1)
+		DRM_INFO("chunks %d  length %d\n", chunk_id, chunk->length_dw);
+	chunk->ib->length_dw = chunk->length_dw;
 	/* Copy the packet into the IB, the parser will read from the
 	 * input memory (cached) and write to the IB (which can be
 	 * uncached). */
-	r = radeon_cs_parse(parser, ib_chunk);
+	r = radeon_cs_parse(parser, chunk);
 	if (r || parser->parser_error) {
 		DRM_ERROR("Invalid command stream !\n");
 		return r;
 	}
-	r = radeon_cs_finish_pages(ib_chunk);
+	r = radeon_cs_finish_pages(chunk);
 	if (r) {
 		DRM_ERROR("Invalid command stream !\n");
 		return r;
 	}
-	r = radeon_ib_schedule(rdev, ib_chunk->ib);
+	r = radeon_ib_schedule(rdev, chunk->ib);
 	if (r) {
 		DRM_ERROR("Faild to schedule IB !\n");
 	}
@@ -256,6 +257,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_cs_parser parser;
 	int r;
+	int i;
 
 	mutex_lock(&rdev->cs_mutex);
 
@@ -282,23 +284,20 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		goto out;
 	}
 
-	if (parser.chunk_setup_idx != -1) {
-		if (rdev->last_cs_filp != filp) {
-			radeon_cs_clear_tracker(rdev, parser.track);
-			r = radeon_cs_parser_chunk(&parser, parser.chunk_setup_idx, false);
-			if (r)
-				goto out;
-		}
-	} else
-		radeon_cs_clear_tracker(rdev, parser.track);
+	radeon_cs_clear_tracker(rdev, parser.track);
 
-	r = radeon_cs_parser_chunk(&parser, parser.chunk_ib_idx, true);
-	if (r)
-		goto out;
+	if (parser.num_ib_chunks > 1)
+		DRM_INFO("submitting chunks %d\n", parser.num_ib_chunks);
 
-	/* verify trackers */
+	for (i = 0; i < parser.num_ib_chunks; i++) {
+		bool flush = (parser.chunk_ib_idx[i] == parser.chunk_final_ib_idx);
+		if (parser.num_ib_chunks > 1)
+			DRM_INFO("submitting chunks %d %d %d\n", i, parser.chunk_ib_idx[i], flush);
+		r = radeon_cs_parser_chunk(&parser, parser.chunk_ib_idx[i], flush);
+		if (r)
+			goto out;
+	}
 out:
-	rdev->last_cs_filp = filp;
 	radeon_cs_parser_fini(&parser, r);
 	mutex_unlock(&rdev->cs_mutex);
 	return r;
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index b1f9a81b5d1d..9f739611fb75 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -148,6 +148,7 @@ int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence)
 	(*fence)->emited = false;
 	(*fence)->signaled = false;
 	(*fence)->seq = 0;
+	(*fence)->flush = true;
 	INIT_LIST_HEAD(&(*fence)->list);
 
 	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index eeb4eff56a34..fad580801f01 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -874,7 +874,6 @@ struct drm_radeon_gem_pwrite {
 
 #define RADEON_CHUNK_ID_RELOCS	0x01
 #define RADEON_CHUNK_ID_IB	0x02
-#define RADEON_CHUNK_ID_IB_SETUP  0x03
 
 struct drm_radeon_cs_chunk {
 	uint32_t		chunk_id;
author	Dave Airlie <airlied@redhat.com>	2010-07-06 16:54:16 +1000
committer	Dave Airlie <airlied@redhat.com>	2010-07-06 16:54:16 +1000
commit	77668d621f48cb638b1408430b64cc7bd614a5db (patch)
tree	b0fa790a200c2d1b25157b5c46c8d40e9ebd0239
parent	ba7daacc1438eac912101d642ec328ae5e63fe20 (diff)