Rewrite radeon_cp_dispatch_texture() to avoid pingponging back to userspace

when issue large (multi-buffer) uploads.
2002-12-06 12:22:43 +00:00 · 2002-12-06 12:22:43 +00:00 · b03fa556b2
parent a885d6786f
commit b03fa556b2
2 changed files with 200 additions and 206 deletions
--- a/shared-core/radeon_state.c
+++ b/shared-core/radeon_state.c
@ -1074,19 +1074,30 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 	const u8 *data;
 	int size, dwords, tex_width, blit_width;
 	u32 y, height;
-	int ret = 0, i;
+	int i;
 	RING_LOCALS;

 	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;

-	/* FIXME: Be smarter about this...
+	/* Flush the pixel cache.  This ensures no pixel data gets mixed
+	 * up with the texture data from the host data blit, otherwise
+	 * part of the texture image may be corrupted.
 	 */
-	buf = radeon_freelist_get( dev );
-	if ( !buf ) return DRM_ERR(EAGAIN);
+	BEGIN_RING( 4 );
+	RADEON_FLUSH_CACHE();
+	RADEON_WAIT_UNTIL_IDLE();
+	ADVANCE_RING();
+
+#ifdef __BIG_ENDIAN
+	/* The Mesa texture functions provide the data in little endian as the
+	 * chip wants it, but we need to compensate for the fact that the CP
+	 * ring gets byte-swapped
+	 */
+	BEGIN_RING( 2 );
+	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
+	ADVANCE_RING();
+#endif

-	DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
-		   tex->offset >> 10, tex->pitch, tex->format,
-		   image->x, image->y, image->width, image->height );

 	/* The compiler won't optimize away a division by a variable,
 	 * even if the only legal values are powers of two.  Thus, we'll
@ -1120,127 +1131,113 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 		return DRM_ERR(EINVAL);
 	}

-	DRM_DEBUG( "   tex=%dx%d  blit=%d\n",
-		   tex_width, tex->height, blit_width );
+	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );

-	/* Flush the pixel cache.  This ensures no pixel data gets mixed
-	 * up with the texture data from the host data blit, otherwise
-	 * part of the texture image may be corrupted.
-	 */
-	BEGIN_RING( 4 );
+	do {
+		DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
+			   tex->offset >> 10, tex->pitch, tex->format,
+			   image->x, image->y, image->width, image->height );

-	RADEON_FLUSH_CACHE();
-	RADEON_WAIT_UNTIL_IDLE();
-
-	ADVANCE_RING();
-
-#ifdef __BIG_ENDIAN
-	/* The Mesa texture functions provide the data in little endian as the
-	 * chip wants it, but we need to compensate for the fact that the CP
-	 * ring gets byte-swapped
-	 */
-	BEGIN_RING( 2 );
-	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
-	ADVANCE_RING();
-#endif
-
-	/* Make a copy of the parameters in case we have to update them
-	 * for a multi-pass texture blit.
-	 */
-	y = image->y;
-	height = image->height;
-	data = (const u8 *)image->data;
-
-	size = height * blit_width;
-
-	if ( size > RADEON_MAX_TEXTURE_SIZE ) {
-		/* Texture image is too large, do a multipass upload */
-		ret = DRM_ERR(EAGAIN);
-
-		/* Adjust the blit size to fit the indirect buffer */
-		height = RADEON_MAX_TEXTURE_SIZE / blit_width;
+		/* Make a copy of the parameters in case we have to
+		 * update them for a multi-pass texture blit.
+		 */
+		y = image->y;
+		height = image->height;
+		data = (const u8 *)image->data;
+		
 		size = height * blit_width;

+		if ( size > RADEON_MAX_TEXTURE_SIZE ) {
+			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
+			size = height * blit_width;
+		} else if ( size < 4 && size > 0 ) {
+			size = 4;
+		} else if ( size == 0 ) {
+			return 0;
+		}
+
 		/* Update the input parameters for next time */
 		image->y += height;
 		image->height -= height;
-		image->data = (const char *)image->data + size;
+		image->data += size;

-		if ( DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ) ) {
-			DRM_ERROR( "EFAULT on tex->image\n" );
-			return DRM_ERR(EFAULT);
+		buf = radeon_freelist_get( dev );
+		if ( 0 && !buf ) {
+			radeon_do_cp_idle( dev_priv );
+			buf = radeon_freelist_get( dev );
 		}
-	} else if ( size < 4 && size > 0 ) {
-		size = 4;
-	}
-
-	dwords = size / 4;
-
-	/* Dispatch the indirect buffer.
-	 */
-	buffer = (u32 *)((char *)dev_priv->buffers->handle + buf->offset);
-
-	buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
-	buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
-		     RADEON_GMC_BRUSH_NONE |
-		     (format << 8) |
-		     RADEON_GMC_SRC_DATATYPE_COLOR |
-		     RADEON_ROP3_S |
-		     RADEON_DP_SRC_SOURCE_HOST_DATA |
-		     RADEON_GMC_CLR_CMP_CNTL_DIS |
-		     RADEON_GMC_WR_MSK_DIS);
-
-	buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
-	buffer[3] = 0xffffffff;
-	buffer[4] = 0xffffffff;
-	buffer[5] = (y << 16) | image->x;
-	buffer[6] = (height << 16) | image->width;
-	buffer[7] = dwords;
-
-	buffer += 8;
-
-	if ( tex_width >= 32 ) {
-		/* Texture image width is larger than the minimum, so we
-		 * can upload it directly.
-		 */
-		if ( DRM_COPY_FROM_USER( buffer, data, dwords * sizeof(u32) ) ) {
-			DRM_ERROR( "EFAULT on data, %d dwords\n", dwords );
-			return DRM_ERR(EFAULT);
+		if ( !buf ) {
+			DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
+			DRM_COPY_TO_USER( tex->image, image, sizeof(*image) );
+			return DRM_ERR(EAGAIN);
 		}
-	} else {
-		/* Texture image width is less than the minimum, so we
-		 * need to pad out each image scanline to the minimum
-		 * width.
+
+
+		/* Dispatch the indirect buffer.
 		 */
-		for ( i = 0 ; i < tex->height ; i++ ) {
-			if ( DRM_COPY_FROM_USER( buffer, data, tex_width ) ) {
-				DRM_ERROR( "EFAULT on pad, %d bytes\n",
-					   tex_width );
+		buffer = (u32*)((char*)dev_priv->buffers->handle + buf->offset);
+		dwords = size / 4;
+		buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
+		buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+			     RADEON_GMC_BRUSH_NONE |
+			     (format << 8) |
+			     RADEON_GMC_SRC_DATATYPE_COLOR |
+			     RADEON_ROP3_S |
+			     RADEON_DP_SRC_SOURCE_HOST_DATA |
+			     RADEON_GMC_CLR_CMP_CNTL_DIS |
+			     RADEON_GMC_WR_MSK_DIS);
+		
+		buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
+		buffer[3] = 0xffffffff;
+		buffer[4] = 0xffffffff;
+		buffer[5] = (y << 16) | image->x;
+		buffer[6] = (height << 16) | image->width;
+		buffer[7] = dwords;
+		buffer += 8;
+
+		if ( tex_width >= 32 ) {
+			/* Texture image width is larger than the minimum, so we
+			 * can upload it directly.
+			 */
+			if ( DRM_COPY_FROM_USER( buffer, data, 
+						 dwords * sizeof(u32) ) ) {
+				DRM_ERROR( "EFAULT on data, %d dwords\n", 
+					   dwords );
 				return DRM_ERR(EFAULT);
 			}
-			buffer += 8;
-			data += tex_width;
+		} else {
+			/* Texture image width is less than the minimum, so we
+			 * need to pad out each image scanline to the minimum
+			 * width.
+			 */
+			for ( i = 0 ; i < tex->height ; i++ ) {
+				if ( DRM_COPY_FROM_USER( buffer, data, 
+							 tex_width ) ) {
+					DRM_ERROR( "EFAULT on pad, %d bytes\n",
+						   tex_width );
+					return DRM_ERR(EFAULT);
+				}
+				buffer += 8;
+				data += tex_width;
+			}
 		}
-	}

-	buf->pid = DRM_CURRENTPID;
-	buf->used = (dwords + 8) * sizeof(u32);
+		buf->pid = DRM_CURRENTPID;
+		buf->used = (dwords + 8) * sizeof(u32);
+		radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
+		radeon_cp_discard_buffer( dev, buf );

-	radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
-	radeon_cp_discard_buffer( dev, buf );
+	} while (image->height > 0);

 	/* Flush the pixel cache after the blit completes.  This ensures
 	 * the texture data is written out to memory before rendering
 	 * continues.
 	 */
 	BEGIN_RING( 4 );
-
 	RADEON_FLUSH_CACHE();
 	RADEON_WAIT_UNTIL_2D_IDLE();
-
 	ADVANCE_RING();
-
-	return ret;
+	return 0;
 }


--- a/shared/radeon_state.c
+++ b/shared/radeon_state.c
@ -1074,19 +1074,30 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 	const u8 *data;
 	int size, dwords, tex_width, blit_width;
 	u32 y, height;
-	int ret = 0, i;
+	int i;
 	RING_LOCALS;

 	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;

-	/* FIXME: Be smarter about this...
+	/* Flush the pixel cache.  This ensures no pixel data gets mixed
+	 * up with the texture data from the host data blit, otherwise
+	 * part of the texture image may be corrupted.
 	 */
-	buf = radeon_freelist_get( dev );
-	if ( !buf ) return DRM_ERR(EAGAIN);
+	BEGIN_RING( 4 );
+	RADEON_FLUSH_CACHE();
+	RADEON_WAIT_UNTIL_IDLE();
+	ADVANCE_RING();
+
+#ifdef __BIG_ENDIAN
+	/* The Mesa texture functions provide the data in little endian as the
+	 * chip wants it, but we need to compensate for the fact that the CP
+	 * ring gets byte-swapped
+	 */
+	BEGIN_RING( 2 );
+	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
+	ADVANCE_RING();
+#endif

-	DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
-		   tex->offset >> 10, tex->pitch, tex->format,
-		   image->x, image->y, image->width, image->height );

 	/* The compiler won't optimize away a division by a variable,
 	 * even if the only legal values are powers of two.  Thus, we'll
@ -1120,127 +1131,113 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 		return DRM_ERR(EINVAL);
 	}

-	DRM_DEBUG( "   tex=%dx%d  blit=%d\n",
-		   tex_width, tex->height, blit_width );
+	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );

-	/* Flush the pixel cache.  This ensures no pixel data gets mixed
-	 * up with the texture data from the host data blit, otherwise
-	 * part of the texture image may be corrupted.
-	 */
-	BEGIN_RING( 4 );
+	do {
+		DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
+			   tex->offset >> 10, tex->pitch, tex->format,
+			   image->x, image->y, image->width, image->height );

-	RADEON_FLUSH_CACHE();
-	RADEON_WAIT_UNTIL_IDLE();
-
-	ADVANCE_RING();
-
-#ifdef __BIG_ENDIAN
-	/* The Mesa texture functions provide the data in little endian as the
-	 * chip wants it, but we need to compensate for the fact that the CP
-	 * ring gets byte-swapped
-	 */
-	BEGIN_RING( 2 );
-	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
-	ADVANCE_RING();
-#endif
-
-	/* Make a copy of the parameters in case we have to update them
-	 * for a multi-pass texture blit.
-	 */
-	y = image->y;
-	height = image->height;
-	data = (const u8 *)image->data;
-
-	size = height * blit_width;
-
-	if ( size > RADEON_MAX_TEXTURE_SIZE ) {
-		/* Texture image is too large, do a multipass upload */
-		ret = DRM_ERR(EAGAIN);
-
-		/* Adjust the blit size to fit the indirect buffer */
-		height = RADEON_MAX_TEXTURE_SIZE / blit_width;
+		/* Make a copy of the parameters in case we have to
+		 * update them for a multi-pass texture blit.
+		 */
+		y = image->y;
+		height = image->height;
+		data = (const u8 *)image->data;
+		
 		size = height * blit_width;

+		if ( size > RADEON_MAX_TEXTURE_SIZE ) {
+			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
+			size = height * blit_width;
+		} else if ( size < 4 && size > 0 ) {
+			size = 4;
+		} else if ( size == 0 ) {
+			return 0;
+		}
+
 		/* Update the input parameters for next time */
 		image->y += height;
 		image->height -= height;
-		image->data = (const char *)image->data + size;
+		image->data += size;

-		if ( DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ) ) {
-			DRM_ERROR( "EFAULT on tex->image\n" );
-			return DRM_ERR(EFAULT);
+		buf = radeon_freelist_get( dev );
+		if ( 0 && !buf ) {
+			radeon_do_cp_idle( dev_priv );
+			buf = radeon_freelist_get( dev );
 		}
-	} else if ( size < 4 && size > 0 ) {
-		size = 4;
-	}
-
-	dwords = size / 4;
-
-	/* Dispatch the indirect buffer.
-	 */
-	buffer = (u32 *)((char *)dev_priv->buffers->handle + buf->offset);
-
-	buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
-	buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
-		     RADEON_GMC_BRUSH_NONE |
-		     (format << 8) |
-		     RADEON_GMC_SRC_DATATYPE_COLOR |
-		     RADEON_ROP3_S |
-		     RADEON_DP_SRC_SOURCE_HOST_DATA |
-		     RADEON_GMC_CLR_CMP_CNTL_DIS |
-		     RADEON_GMC_WR_MSK_DIS);
-
-	buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
-	buffer[3] = 0xffffffff;
-	buffer[4] = 0xffffffff;
-	buffer[5] = (y << 16) | image->x;
-	buffer[6] = (height << 16) | image->width;
-	buffer[7] = dwords;
-
-	buffer += 8;
-
-	if ( tex_width >= 32 ) {
-		/* Texture image width is larger than the minimum, so we
-		 * can upload it directly.
-		 */
-		if ( DRM_COPY_FROM_USER( buffer, data, dwords * sizeof(u32) ) ) {
-			DRM_ERROR( "EFAULT on data, %d dwords\n", dwords );
-			return DRM_ERR(EFAULT);
+		if ( !buf ) {
+			DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
+			DRM_COPY_TO_USER( tex->image, image, sizeof(*image) );
+			return DRM_ERR(EAGAIN);
 		}
-	} else {
-		/* Texture image width is less than the minimum, so we
-		 * need to pad out each image scanline to the minimum
-		 * width.
+
+
+		/* Dispatch the indirect buffer.
 		 */
-		for ( i = 0 ; i < tex->height ; i++ ) {
-			if ( DRM_COPY_FROM_USER( buffer, data, tex_width ) ) {
-				DRM_ERROR( "EFAULT on pad, %d bytes\n",
-					   tex_width );
+		buffer = (u32*)((char*)dev_priv->buffers->handle + buf->offset);
+		dwords = size / 4;
+		buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
+		buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+			     RADEON_GMC_BRUSH_NONE |
+			     (format << 8) |
+			     RADEON_GMC_SRC_DATATYPE_COLOR |
+			     RADEON_ROP3_S |
+			     RADEON_DP_SRC_SOURCE_HOST_DATA |
+			     RADEON_GMC_CLR_CMP_CNTL_DIS |
+			     RADEON_GMC_WR_MSK_DIS);
+		
+		buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
+		buffer[3] = 0xffffffff;
+		buffer[4] = 0xffffffff;
+		buffer[5] = (y << 16) | image->x;
+		buffer[6] = (height << 16) | image->width;
+		buffer[7] = dwords;
+		buffer += 8;
+
+		if ( tex_width >= 32 ) {
+			/* Texture image width is larger than the minimum, so we
+			 * can upload it directly.
+			 */
+			if ( DRM_COPY_FROM_USER( buffer, data, 
+						 dwords * sizeof(u32) ) ) {
+				DRM_ERROR( "EFAULT on data, %d dwords\n", 
+					   dwords );
 				return DRM_ERR(EFAULT);
 			}
-			buffer += 8;
-			data += tex_width;
+		} else {
+			/* Texture image width is less than the minimum, so we
+			 * need to pad out each image scanline to the minimum
+			 * width.
+			 */
+			for ( i = 0 ; i < tex->height ; i++ ) {
+				if ( DRM_COPY_FROM_USER( buffer, data, 
+							 tex_width ) ) {
+					DRM_ERROR( "EFAULT on pad, %d bytes\n",
+						   tex_width );
+					return DRM_ERR(EFAULT);
+				}
+				buffer += 8;
+				data += tex_width;
+			}
 		}
-	}

-	buf->pid = DRM_CURRENTPID;
-	buf->used = (dwords + 8) * sizeof(u32);
+		buf->pid = DRM_CURRENTPID;
+		buf->used = (dwords + 8) * sizeof(u32);
+		radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
+		radeon_cp_discard_buffer( dev, buf );

-	radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
-	radeon_cp_discard_buffer( dev, buf );
+	} while (image->height > 0);

 	/* Flush the pixel cache after the blit completes.  This ensures
 	 * the texture data is written out to memory before rendering
 	 * continues.
 	 */
 	BEGIN_RING( 4 );
-
 	RADEON_FLUSH_CACHE();
 	RADEON_WAIT_UNTIL_2D_IDLE();
-
 	ADVANCE_RING();
-
-	return ret;
+	return 0;
 }