Rewrite radeon_cp_dispatch_texture() to avoid pingponging back to userspace

when issue large (multi-buffer) uploads.
main
Keith Whitwell 2002-12-06 12:22:43 +00:00
parent a885d6786f
commit b03fa556b2
2 changed files with 200 additions and 206 deletions

View File

@ -1074,19 +1074,30 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
const u8 *data;
int size, dwords, tex_width, blit_width;
u32 y, height;
int ret = 0, i;
int i;
RING_LOCALS;
dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
/* FIXME: Be smarter about this...
/* Flush the pixel cache. This ensures no pixel data gets mixed
* up with the texture data from the host data blit, otherwise
* part of the texture image may be corrupted.
*/
buf = radeon_freelist_get( dev );
if ( !buf ) return DRM_ERR(EAGAIN);
BEGIN_RING( 4 );
RADEON_FLUSH_CACHE();
RADEON_WAIT_UNTIL_IDLE();
ADVANCE_RING();
#ifdef __BIG_ENDIAN
/* The Mesa texture functions provide the data in little endian as the
* chip wants it, but we need to compensate for the fact that the CP
* ring gets byte-swapped
*/
BEGIN_RING( 2 );
OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
ADVANCE_RING();
#endif
DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
tex->offset >> 10, tex->pitch, tex->format,
image->x, image->y, image->width, image->height );
/* The compiler won't optimize away a division by a variable,
* even if the only legal values are powers of two. Thus, we'll
@ -1120,127 +1131,113 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
return DRM_ERR(EINVAL);
}
DRM_DEBUG( " tex=%dx%d blit=%d\n",
tex_width, tex->height, blit_width );
DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
/* Flush the pixel cache. This ensures no pixel data gets mixed
* up with the texture data from the host data blit, otherwise
* part of the texture image may be corrupted.
*/
BEGIN_RING( 4 );
do {
DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
tex->offset >> 10, tex->pitch, tex->format,
image->x, image->y, image->width, image->height );
RADEON_FLUSH_CACHE();
RADEON_WAIT_UNTIL_IDLE();
ADVANCE_RING();
#ifdef __BIG_ENDIAN
/* The Mesa texture functions provide the data in little endian as the
* chip wants it, but we need to compensate for the fact that the CP
* ring gets byte-swapped
*/
BEGIN_RING( 2 );
OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
ADVANCE_RING();
#endif
/* Make a copy of the parameters in case we have to update them
* for a multi-pass texture blit.
*/
y = image->y;
height = image->height;
data = (const u8 *)image->data;
size = height * blit_width;
if ( size > RADEON_MAX_TEXTURE_SIZE ) {
/* Texture image is too large, do a multipass upload */
ret = DRM_ERR(EAGAIN);
/* Adjust the blit size to fit the indirect buffer */
height = RADEON_MAX_TEXTURE_SIZE / blit_width;
/* Make a copy of the parameters in case we have to
* update them for a multi-pass texture blit.
*/
y = image->y;
height = image->height;
data = (const u8 *)image->data;
size = height * blit_width;
if ( size > RADEON_MAX_TEXTURE_SIZE ) {
height = RADEON_MAX_TEXTURE_SIZE / blit_width;
size = height * blit_width;
} else if ( size < 4 && size > 0 ) {
size = 4;
} else if ( size == 0 ) {
return 0;
}
/* Update the input parameters for next time */
image->y += height;
image->height -= height;
image->data = (const char *)image->data + size;
image->data += size;
if ( DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ) ) {
DRM_ERROR( "EFAULT on tex->image\n" );
return DRM_ERR(EFAULT);
buf = radeon_freelist_get( dev );
if ( 0 && !buf ) {
radeon_do_cp_idle( dev_priv );
buf = radeon_freelist_get( dev );
}
} else if ( size < 4 && size > 0 ) {
size = 4;
}
dwords = size / 4;
/* Dispatch the indirect buffer.
*/
buffer = (u32 *)((char *)dev_priv->buffers->handle + buf->offset);
buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
RADEON_GMC_BRUSH_NONE |
(format << 8) |
RADEON_GMC_SRC_DATATYPE_COLOR |
RADEON_ROP3_S |
RADEON_DP_SRC_SOURCE_HOST_DATA |
RADEON_GMC_CLR_CMP_CNTL_DIS |
RADEON_GMC_WR_MSK_DIS);
buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
buffer[3] = 0xffffffff;
buffer[4] = 0xffffffff;
buffer[5] = (y << 16) | image->x;
buffer[6] = (height << 16) | image->width;
buffer[7] = dwords;
buffer += 8;
if ( tex_width >= 32 ) {
/* Texture image width is larger than the minimum, so we
* can upload it directly.
*/
if ( DRM_COPY_FROM_USER( buffer, data, dwords * sizeof(u32) ) ) {
DRM_ERROR( "EFAULT on data, %d dwords\n", dwords );
return DRM_ERR(EFAULT);
if ( !buf ) {
DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
DRM_COPY_TO_USER( tex->image, image, sizeof(*image) );
return DRM_ERR(EAGAIN);
}
} else {
/* Texture image width is less than the minimum, so we
* need to pad out each image scanline to the minimum
* width.
/* Dispatch the indirect buffer.
*/
for ( i = 0 ; i < tex->height ; i++ ) {
if ( DRM_COPY_FROM_USER( buffer, data, tex_width ) ) {
DRM_ERROR( "EFAULT on pad, %d bytes\n",
tex_width );
buffer = (u32*)((char*)dev_priv->buffers->handle + buf->offset);
dwords = size / 4;
buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
RADEON_GMC_BRUSH_NONE |
(format << 8) |
RADEON_GMC_SRC_DATATYPE_COLOR |
RADEON_ROP3_S |
RADEON_DP_SRC_SOURCE_HOST_DATA |
RADEON_GMC_CLR_CMP_CNTL_DIS |
RADEON_GMC_WR_MSK_DIS);
buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
buffer[3] = 0xffffffff;
buffer[4] = 0xffffffff;
buffer[5] = (y << 16) | image->x;
buffer[6] = (height << 16) | image->width;
buffer[7] = dwords;
buffer += 8;
if ( tex_width >= 32 ) {
/* Texture image width is larger than the minimum, so we
* can upload it directly.
*/
if ( DRM_COPY_FROM_USER( buffer, data,
dwords * sizeof(u32) ) ) {
DRM_ERROR( "EFAULT on data, %d dwords\n",
dwords );
return DRM_ERR(EFAULT);
}
buffer += 8;
data += tex_width;
} else {
/* Texture image width is less than the minimum, so we
* need to pad out each image scanline to the minimum
* width.
*/
for ( i = 0 ; i < tex->height ; i++ ) {
if ( DRM_COPY_FROM_USER( buffer, data,
tex_width ) ) {
DRM_ERROR( "EFAULT on pad, %d bytes\n",
tex_width );
return DRM_ERR(EFAULT);
}
buffer += 8;
data += tex_width;
}
}
}
buf->pid = DRM_CURRENTPID;
buf->used = (dwords + 8) * sizeof(u32);
buf->pid = DRM_CURRENTPID;
buf->used = (dwords + 8) * sizeof(u32);
radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
radeon_cp_discard_buffer( dev, buf );
radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
radeon_cp_discard_buffer( dev, buf );
} while (image->height > 0);
/* Flush the pixel cache after the blit completes. This ensures
* the texture data is written out to memory before rendering
* continues.
*/
BEGIN_RING( 4 );
RADEON_FLUSH_CACHE();
RADEON_WAIT_UNTIL_2D_IDLE();
ADVANCE_RING();
return ret;
return 0;
}

View File

@ -1074,19 +1074,30 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
const u8 *data;
int size, dwords, tex_width, blit_width;
u32 y, height;
int ret = 0, i;
int i;
RING_LOCALS;
dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
/* FIXME: Be smarter about this...
/* Flush the pixel cache. This ensures no pixel data gets mixed
* up with the texture data from the host data blit, otherwise
* part of the texture image may be corrupted.
*/
buf = radeon_freelist_get( dev );
if ( !buf ) return DRM_ERR(EAGAIN);
BEGIN_RING( 4 );
RADEON_FLUSH_CACHE();
RADEON_WAIT_UNTIL_IDLE();
ADVANCE_RING();
#ifdef __BIG_ENDIAN
/* The Mesa texture functions provide the data in little endian as the
* chip wants it, but we need to compensate for the fact that the CP
* ring gets byte-swapped
*/
BEGIN_RING( 2 );
OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
ADVANCE_RING();
#endif
DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
tex->offset >> 10, tex->pitch, tex->format,
image->x, image->y, image->width, image->height );
/* The compiler won't optimize away a division by a variable,
* even if the only legal values are powers of two. Thus, we'll
@ -1120,127 +1131,113 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
return DRM_ERR(EINVAL);
}
DRM_DEBUG( " tex=%dx%d blit=%d\n",
tex_width, tex->height, blit_width );
DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
/* Flush the pixel cache. This ensures no pixel data gets mixed
* up with the texture data from the host data blit, otherwise
* part of the texture image may be corrupted.
*/
BEGIN_RING( 4 );
do {
DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
tex->offset >> 10, tex->pitch, tex->format,
image->x, image->y, image->width, image->height );
RADEON_FLUSH_CACHE();
RADEON_WAIT_UNTIL_IDLE();
ADVANCE_RING();
#ifdef __BIG_ENDIAN
/* The Mesa texture functions provide the data in little endian as the
* chip wants it, but we need to compensate for the fact that the CP
* ring gets byte-swapped
*/
BEGIN_RING( 2 );
OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
ADVANCE_RING();
#endif
/* Make a copy of the parameters in case we have to update them
* for a multi-pass texture blit.
*/
y = image->y;
height = image->height;
data = (const u8 *)image->data;
size = height * blit_width;
if ( size > RADEON_MAX_TEXTURE_SIZE ) {
/* Texture image is too large, do a multipass upload */
ret = DRM_ERR(EAGAIN);
/* Adjust the blit size to fit the indirect buffer */
height = RADEON_MAX_TEXTURE_SIZE / blit_width;
/* Make a copy of the parameters in case we have to
* update them for a multi-pass texture blit.
*/
y = image->y;
height = image->height;
data = (const u8 *)image->data;
size = height * blit_width;
if ( size > RADEON_MAX_TEXTURE_SIZE ) {
height = RADEON_MAX_TEXTURE_SIZE / blit_width;
size = height * blit_width;
} else if ( size < 4 && size > 0 ) {
size = 4;
} else if ( size == 0 ) {
return 0;
}
/* Update the input parameters for next time */
image->y += height;
image->height -= height;
image->data = (const char *)image->data + size;
image->data += size;
if ( DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ) ) {
DRM_ERROR( "EFAULT on tex->image\n" );
return DRM_ERR(EFAULT);
buf = radeon_freelist_get( dev );
if ( 0 && !buf ) {
radeon_do_cp_idle( dev_priv );
buf = radeon_freelist_get( dev );
}
} else if ( size < 4 && size > 0 ) {
size = 4;
}
dwords = size / 4;
/* Dispatch the indirect buffer.
*/
buffer = (u32 *)((char *)dev_priv->buffers->handle + buf->offset);
buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
RADEON_GMC_BRUSH_NONE |
(format << 8) |
RADEON_GMC_SRC_DATATYPE_COLOR |
RADEON_ROP3_S |
RADEON_DP_SRC_SOURCE_HOST_DATA |
RADEON_GMC_CLR_CMP_CNTL_DIS |
RADEON_GMC_WR_MSK_DIS);
buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
buffer[3] = 0xffffffff;
buffer[4] = 0xffffffff;
buffer[5] = (y << 16) | image->x;
buffer[6] = (height << 16) | image->width;
buffer[7] = dwords;
buffer += 8;
if ( tex_width >= 32 ) {
/* Texture image width is larger than the minimum, so we
* can upload it directly.
*/
if ( DRM_COPY_FROM_USER( buffer, data, dwords * sizeof(u32) ) ) {
DRM_ERROR( "EFAULT on data, %d dwords\n", dwords );
return DRM_ERR(EFAULT);
if ( !buf ) {
DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
DRM_COPY_TO_USER( tex->image, image, sizeof(*image) );
return DRM_ERR(EAGAIN);
}
} else {
/* Texture image width is less than the minimum, so we
* need to pad out each image scanline to the minimum
* width.
/* Dispatch the indirect buffer.
*/
for ( i = 0 ; i < tex->height ; i++ ) {
if ( DRM_COPY_FROM_USER( buffer, data, tex_width ) ) {
DRM_ERROR( "EFAULT on pad, %d bytes\n",
tex_width );
buffer = (u32*)((char*)dev_priv->buffers->handle + buf->offset);
dwords = size / 4;
buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
RADEON_GMC_BRUSH_NONE |
(format << 8) |
RADEON_GMC_SRC_DATATYPE_COLOR |
RADEON_ROP3_S |
RADEON_DP_SRC_SOURCE_HOST_DATA |
RADEON_GMC_CLR_CMP_CNTL_DIS |
RADEON_GMC_WR_MSK_DIS);
buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
buffer[3] = 0xffffffff;
buffer[4] = 0xffffffff;
buffer[5] = (y << 16) | image->x;
buffer[6] = (height << 16) | image->width;
buffer[7] = dwords;
buffer += 8;
if ( tex_width >= 32 ) {
/* Texture image width is larger than the minimum, so we
* can upload it directly.
*/
if ( DRM_COPY_FROM_USER( buffer, data,
dwords * sizeof(u32) ) ) {
DRM_ERROR( "EFAULT on data, %d dwords\n",
dwords );
return DRM_ERR(EFAULT);
}
buffer += 8;
data += tex_width;
} else {
/* Texture image width is less than the minimum, so we
* need to pad out each image scanline to the minimum
* width.
*/
for ( i = 0 ; i < tex->height ; i++ ) {
if ( DRM_COPY_FROM_USER( buffer, data,
tex_width ) ) {
DRM_ERROR( "EFAULT on pad, %d bytes\n",
tex_width );
return DRM_ERR(EFAULT);
}
buffer += 8;
data += tex_width;
}
}
}
buf->pid = DRM_CURRENTPID;
buf->used = (dwords + 8) * sizeof(u32);
buf->pid = DRM_CURRENTPID;
buf->used = (dwords + 8) * sizeof(u32);
radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
radeon_cp_discard_buffer( dev, buf );
radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
radeon_cp_discard_buffer( dev, buf );
} while (image->height > 0);
/* Flush the pixel cache after the blit completes. This ensures
* the texture data is written out to memory before rendering
* continues.
*/
BEGIN_RING( 4 );
RADEON_FLUSH_CACHE();
RADEON_WAIT_UNTIL_2D_IDLE();
ADVANCE_RING();
return ret;
return 0;
}