diff --git a/linux-core/Makefile.kernel b/linux-core/Makefile.kernel index 82d200b3..abfad0a1 100644 --- a/linux-core/Makefile.kernel +++ b/linux-core/Makefile.kernel @@ -20,7 +20,8 @@ r128-objs := r128_drv.o r128_cce.o r128_state.o r128_irq.o mga-objs := mga_drv.o mga_dma.o mga_state.o mga_warp.o mga_irq.o i810-objs := i810_drv.o i810_dma.o i915-objs := i915_drv.o i915_dma.o i915_irq.o i915_mem.o i915_fence.o \ - i915_buffer.o i915_compat.o i915_execbuf.o i915_gem.o + i915_buffer.o i915_compat.o i915_execbuf.o \ + i915_gem.o i915_gem_tiling.o nouveau-objs := nouveau_drv.o nouveau_state.o nouveau_fifo.o nouveau_mem.o \ nouveau_object.o nouveau_irq.o nouveau_notifier.o nouveau_swmthd.o \ nouveau_sgdma.o nouveau_dma.o nouveau_bo.o nouveau_fence.o \ diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c index 446c9ba9..236203a5 100644 --- a/linux-core/i915_gem.c +++ b/linux-core/i915_gem.c @@ -2702,3 +2702,18 @@ i915_gem_lastclose(struct drm_device *dev) mutex_unlock(&dev->struct_mutex); } + +void i915_gem_load(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + + INIT_LIST_HEAD(&dev_priv->mm.active_list); + INIT_LIST_HEAD(&dev_priv->mm.flushing_list); + INIT_LIST_HEAD(&dev_priv->mm.inactive_list); + INIT_LIST_HEAD(&dev_priv->mm.request_list); + INIT_DELAYED_WORK(&dev_priv->mm.retire_work, + i915_gem_retire_work_handler); + dev_priv->mm.next_gem_seqno = 1; + + i915_gem_detect_bit_6_swizzle(dev); +} diff --git a/linux-core/i915_gem_tiling.c b/linux-core/i915_gem_tiling.c new file mode 100644 index 00000000..d66276d6 --- /dev/null +++ b/linux-core/i915_gem_tiling.c @@ -0,0 +1,290 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "drmP.h" +#include "drm.h" +#include "i915_drm.h" +#include "i915_drv.h" + +/** @file i915_gem_tiling.c + * + * Support for managing tiling state of buffer objects. + * + * The idea behind tiling is to increase cache hit rates by rearranging + * pixel data so that a group of pixel accesses are in the same cacheline. + * Performance improvement from doing this on the back/depth buffer are on + * the order of 30%. + * + * Intel architectures make this somewhat more complicated, though, by + * adjustments made to addressing of data when the memory is in interleaved + * mode (matched pairs of DIMMS) to improve memory bandwidth. + * For interleaved memory, the CPU sends every sequential 64 bytes + * to an alternate memory channel so it can get the bandwidth from both. + * + * The GPU also rearranges its accesses for increased bandwidth to interleaved + * memory, and it matches what the CPU does for non-tiled. However, when tiled + * it does it a little differently, since one walks addresses not just in the + * X direction but also Y. So, along with alternating channels when bit + * 6 of the address flips, it also alternates when other bits flip -- Bits 9 + * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) + * are common to both the 915 and 965-class hardware. + * + * The CPU also sometimes XORs in higher bits as well, to improve + * bandwidth doing strided access like we do so frequently in graphics. This + * is called "Channel XOR Randomization" in the MCH documentation. The result + * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address + * decode. + * + * All of this bit 6 XORing has an effect on our memory management, + * as we need to make sure that the 3d driver can correctly address object + * contents. + * + * If we don't have interleaved memory, all tiling is safe and no swizzling is + * required. + * + * When bit 17 is XORed in, we simply refuse to tile at all. Bit + * 17 is not just a page offset, so as we page an objet out and back in, + * individual pages in it will have different bit 17 addresses, resulting in + * each 64 bytes being swapped with its neighbor! + * + * Otherwise, if interleaved, we have to tell the 3d driver what the address + * swizzling it needs to do is, since it's writing with the CPU to the pages + * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the + * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling + * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order + * to match what the GPU expects. + */ + +/** + * Detects bit 6 swizzling of address lookup between IGD access and CPU + * access through main memory. + */ +void +i915_gem_detect_bit_6_swizzle(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct pci_dev *bridge; + uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + int mchbar_offset; + volatile char *mchbar; + int ret; + + bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0,0)); + if (bridge == NULL) { + DRM_ERROR("Couldn't get bridge device\n"); + return; + } + + ret = pci_enable_device(bridge); + if (ret != 0) { + DRM_ERROR("pci_enable_device failed: %d\n", ret); + return; + } + + if (IS_I965G(dev)) + mchbar_offset = 0x48; + else + mchbar_offset = 0x44; + + /* Use resource 2 for our BAR that's stashed in a nonstandard location, + * since the bridge would only ever use standard BARs 0-1 (though it + * doesn't anyway) + */ + pci_read_base(bridge, mchbar_offset, &bridge->resource[2]); + + mchbar = ioremap(pci_resource_start(bridge, 2), + pci_resource_len(bridge, 2)); + if (mchbar == NULL) { + DRM_ERROR("Couldn't map MCHBAR to determine tile swizzling\n"); + return; + } + + if (IS_I965G(dev) && !IS_I965GM(dev)) { + uint32_t chdecmisc; + + /* On the 965, channel interleave appears to be determined by + * the flex bit. If flex is set, then the ranks (sides of a + * DIMM) of memory will be "stacked" (physical addresses walk + * through one rank then move on to the next, flipping channels + * or not depending on rank configuration). The GPU in this + * case does exactly the same addressing as the CPU. + * + * Unlike the 945, channel randomization based does not + * appear to be available. + * + * XXX: While the G965 doesn't appear to do any interleaving + * when the DIMMs are not exactly matched, the G4x chipsets + * might be for "L-shaped" configurations, and will need to be + * detected. + * + * L-shaped configuration: + * + * +-----+ + * | | + * |DIMM2| <-- non-interleaved + * +-----+ + * +-----+ +-----+ + * | | | | + * |DIMM0| |DIMM1| <-- interleaved area + * +-----+ +-----+ + */ + chdecmisc = readb(mchbar + CHDECMISC); + + if (chdecmisc & CHDECMISC_FLEXMEMORY) { + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } else { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } + } else if (IS_I9XX(dev)){ + uint32_t dcc; + + /* On 915-945 and GM965, channel interleave by the CPU is + * determined by DCC. The CPU will alternate based on bit 6 + * in interleaved mode, and the GPU will then also alternate + * on bit 6, 9, and 10 for X, but the CPU may also optionally + * alternate based on bit 17 (XOR not disabled and XOR + * bit == 17). + */ + dcc = readl(mchbar + DCC); + switch (dcc & DCC_ADDRESSING_MODE_MASK) { + case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + break; + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: + if (IS_I915G(dev) || IS_I915GM(dev) || + dcc & DCC_CHANNEL_XOR_DISABLE) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else if (IS_I965GM(dev)) { + /* GM965 only does bit 11-based channel + * randomization + */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; + swizzle_y = I915_BIT_6_SWIZZLE_9_11; + } else { + /* Bit 17 or perhaps other swizzling */ + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } + break; + } + } + + iounmap(mchbar); + + dev_priv->mm.bit_6_swizzle_x = swizzle_x; + dev_priv->mm.bit_6_swizzle_y = swizzle_y; +} + +/** + * Sets the tiling mode of an object, returning the required swizzling of + * bit 6 of addresses in the object. + */ +int +i915_gem_set_tiling(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_set_tiling *args = data; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_gem_object *obj; + struct drm_i915_gem_object *obj_priv; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return -EINVAL; + obj_priv = obj->driver_private; + + mutex_lock(&dev->struct_mutex); + + if (args->tiling_mode == I915_TILING_NONE) { + obj_priv->tiling_mode = I915_TILING_NONE; + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + } else { + if (args->tiling_mode == I915_TILING_X) + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + else + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + /* If we can't handle the swizzling, make it untiled. */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { + args->tiling_mode = I915_TILING_NONE; + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + } + } + obj_priv->tiling_mode = args->tiling_mode; + + mutex_unlock(&dev->struct_mutex); + + drm_gem_object_unreference(obj); + + return 0; +} + +/** + * Returns the current tiling mode and required bit 6 swizzling for the object. + */ +int +i915_gem_get_tiling(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_get_tiling *args = data; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_gem_object *obj; + struct drm_i915_gem_object *obj_priv; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return -EINVAL; + obj_priv = obj->driver_private; + + mutex_lock(&dev->struct_mutex); + + args->tiling_mode = obj_priv->tiling_mode; + switch (obj_priv->tiling_mode) { + case I915_TILING_X: + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + break; + case I915_TILING_NONE: + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + break; + default: + DRM_ERROR("unknown tiling mode\n"); + } + + mutex_unlock(&dev->struct_mutex); + + drm_gem_object_unreference(obj); + + return 0; +} diff --git a/shared-core/i915_dma.c b/shared-core/i915_dma.c index 6ee3d19f..c9a9d800 100644 --- a/shared-core/i915_dma.c +++ b/shared-core/i915_dma.c @@ -1068,13 +1068,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) ret = drm_addmap(dev, base, size, _DRM_REGISTERS, _DRM_KERNEL | _DRM_DRIVER, &dev_priv->mmio_map); - INIT_LIST_HEAD(&dev_priv->mm.active_list); - INIT_LIST_HEAD(&dev_priv->mm.flushing_list); - INIT_LIST_HEAD(&dev_priv->mm.inactive_list); - INIT_LIST_HEAD(&dev_priv->mm.request_list); - INIT_DELAYED_WORK(&dev_priv->mm.retire_work, - i915_gem_retire_work_handler); - dev_priv->mm.next_gem_seqno = 1; + i915_gem_load(dev); #ifdef __linux__ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) @@ -1217,6 +1211,8 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF(DRM_I915_GEM_MMAP, i915_gem_mmap_ioctl, 0), DRM_IOCTL_DEF(DRM_I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, 0), DRM_IOCTL_DEF(DRM_I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, 0), + DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, 0), + DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/shared-core/i915_drm.h b/shared-core/i915_drm.h index fde474d7..9feffeb5 100644 --- a/shared-core/i915_drm.h +++ b/shared-core/i915_drm.h @@ -190,6 +190,8 @@ typedef struct drm_i915_sarea { #define DRM_I915_GEM_MMAP 0x1e #define DRM_I915_GEM_SET_DOMAIN 0x1f #define DRM_I915_GEM_SW_FINISH 0x20 +#define DRM_I915_GEM_SET_TILING 0x21 +#define DRM_I915_GEM_GET_TILING 0x22 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -223,6 +225,8 @@ typedef struct drm_i915_sarea { #define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) #define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) #define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish) +#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) +#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) /* Asynchronous page flipping: */ @@ -651,4 +655,64 @@ struct drm_i915_gem_busy { uint32_t busy; }; +#define I915_TILING_NONE 0 +#define I915_TILING_X 1 +#define I915_TILING_Y 2 + +#define I915_BIT_6_SWIZZLE_NONE 0 +#define I915_BIT_6_SWIZZLE_9 1 +#define I915_BIT_6_SWIZZLE_9_10 2 +#define I915_BIT_6_SWIZZLE_9_11 3 +#define I915_BIT_6_SWIZZLE_9_10_11 4 +/* Not seen by userland */ +#define I915_BIT_6_SWIZZLE_UNKNOWN 5 + +struct drm_i915_gem_set_tiling { + /** Handle of the buffer to have its tiling state updated */ + uint32_t handle; + + /** + * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + * + * This value is to be set on request, and will be updated by the + * kernel on successful return with the actual chosen tiling layout. + * + * The tiling mode may be demoted to I915_TILING_NONE when the system + * has bit 6 swizzling that can't be managed correctly by GEM. + * + * Buffer contents become undefined when changing tiling_mode. + */ + uint32_t tiling_mode; + + /** + * Stride in bytes for the object when in I915_TILING_X or + * I915_TILING_Y. + */ + uint32_t stride; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + uint32_t swizzle_mode; +}; + +struct drm_i915_gem_get_tiling { + /** Handle of the buffer to get tiling state for. */ + uint32_t handle; + + /** + * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + */ + uint32_t tiling_mode; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + uint32_t swizzle_mode; +}; + #endif /* _I915_DRM_H_ */ diff --git a/shared-core/i915_drv.h b/shared-core/i915_drv.h index 0df5af9c..a9a431c4 100644 --- a/shared-core/i915_drv.h +++ b/shared-core/i915_drv.h @@ -316,6 +316,11 @@ typedef struct drm_i915_private { * every pending request fail */ int wedged; + + /** Bit 6 swizzling required for X tiling */ + uint32_t bit_6_swizzle_x; + /** Bit 6 swizzling required for Y tiling */ + uint32_t bit_6_swizzle_y; } mm; } drm_i915_private_t; @@ -376,6 +381,9 @@ struct drm_i915_gem_object { /** Breadcrumb of last rendering to the buffer. */ uint32_t last_rendering_seqno; + + /** Current tiling mode for the object. */ + uint32_t tiling_mode; }; /** @@ -517,6 +525,11 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_set_tiling(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int i915_gem_get_tiling(struct drm_device *dev, void *data, + struct drm_file *file_priv); +void i915_gem_load(struct drm_device *dev); int i915_gem_proc_init(struct drm_minor *minor); void i915_gem_proc_cleanup(struct drm_minor *minor); int i915_gem_init_object(struct drm_gem_object *obj); @@ -529,6 +542,9 @@ void i915_gem_retire_requests(struct drm_device *dev); void i915_gem_retire_work_handler(struct work_struct *work); #endif +/* i915_gem_tiling.c */ +void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); + #ifdef __linux__ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) extern void intel_init_chipset_flush_compat(struct drm_device *dev); @@ -585,6 +601,19 @@ void i915_ring_validate(struct drm_device *dev, const char *func, int line); extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); +/* MCH MMIO space */ +/** 915-945 and GM965 MCH register controlling DRAM channel access */ +#define DCC 0x200 +#define DCC_ADDRESSING_MODE_SINGLE_CHANNEL (0 << 0) +#define DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC (1 << 0) +#define DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED (2 << 0) +#define DCC_ADDRESSING_MODE_MASK (3 << 0) +#define DCC_CHANNEL_XOR_DISABLE (1 << 10) + +/** 965 MCH register controlling DRAM channel configuration */ +#define CHDECMISC 0x111 +#define CHDECMISC_FLEXMEMORY (1 << 1) + /* Extended config space */ #define LBB 0xf4