5605 lines
243 KiB
C
Raw Normal View History

2025-04-23 00:09:35 -04:00
/*==========================================================================;
*
* Copyright (C) Microsoft Corporation. All Rights Reserved.
*
* File: d3d9gpu.h
* Content: Direct3D hardware register definitions include file
*
****************************************************************************/
#ifndef _D3D9GPU_H_
#define _D3D9GPU_H_
#ifdef __cplusplus
extern "C" {
#endif
#pragma warning(push)
// Disable nameless struct/union and zero-sized array warnings for this header.
#pragma warning(disable:4201 4200)
// The PPC back-end of the C compiler by default defines bitfields to be
// ordered from the MSB to the LSB, which is opposite the convention on
// the X86 platform. Use the 'bitfield_order' pragma to switch the
// ordering. Note that this does not affect endianness in any way.
#if defined(_M_PPCBE)
#pragma bitfield_order(push)
#pragma bitfield_order(lsb_to_msb)
#endif
// Total amount of EDRAM memory, in bytes:
#define GPU_EDRAM_SIZE (10*1024*1024)
// Total amount of system memory addressable by GPU, in bytes:
#define GPU_MEMORY_SIZE (512*1024*1024)
// The GPU's core clock is 500 MHz:
#define GPU_CLOCK_SPEED (500*1000*1000)
// AND with this value as part of the calculation to convert from a
// virtual memory pointer to a physical address that can be used by the GPU:
#define GPU_ADDRESS_MASK (GPU_MEMORY_SIZE - 1)
// The CPU's virtual addresses for 'physical memory' allocations are divided
// into three sections:
//
// 0xA0000000 - 0xBFFFFFF 64 KB Pages
// 0xC0000000 - 0xDFFFFFF 16 MB Pages (Address range also used for cached-read-only pages, see below)
// 0xE0000000 - 0xFFFFFFF 4 KB Pages
#define GPU_CPU_VIRTUAL_ADDRESS_64KB_START ((DWORD) 0xA0000000)
#define GPU_CPU_VIRTUAL_ADDRESS_64KB_END ((DWORD) 0xBFFFFFFF)
#define GPU_CPU_VIRTUAL_ADDRESS_16MB_START ((DWORD) 0xC0000000)
#define GPU_CPU_VIRTUAL_ADDRESS_16MB_END ((DWORD) 0xDFFFFFFF)
#define GPU_CPU_VIRTUAL_ADDRESS_4KB_START ((DWORD) 0xDFFFF000)
#define GPU_CPU_VIRTUAL_ADDRESS_4KB_END ((DWORD) 0xFFFFFFFF)
#define GPU_CPU_VIRTUAL_ADDRESS ((DWORD) 0xA0000000)
// The 64KB and 4 KB physical memory pages can be accessed as
// cached, read-only memory by offseting their addresses from their
// native range to the corresponding 0xC0000000 to 0xDFFFFFFF range.
// 16 MB pages are always write-combined. They don't have a corresponding
// cached, read-only representation.
#define GPU_CPU_CACHED_READONLY_VIRTUAL_ADDRESS ((DWORD) 0xC0000000)
// There are sixteen 32-bit 'tail write-back' registers on the CPU. The
// GPU's command processor can write directly to these registers. They're
// beneficial over GPU write-backs to normal cacheable memory because they
// reduce the latency, which is particularly important for XPS: the CPU
// doesn't have to go all the way to memory to read a new write-back value.
// There is one register per 128 bytes at the following physical address
// range. The remainder of the range is unused.
#define GPU_TAIL_WRITEBACKS 16
#define GPU_TAIL_WRITEBACK_DELTA 128
#define GPU_TAIL_WRITEBACK_RANGE_VIRTUAL_ADDRESS ((DWORD) 0x7F000000)
#define GPU_TAIL_WRITEBACK_RANGE_PHYSICAL_ADDRESS ((DWORD) 0xC0000000)
#define GPU_TAIL_WRITEBACK_RANGE_SIZE 0x00100000
// There is a particular physical memory range where the GPU knows to request
// memory from the CPU's L2 instead of from physical memory. This is used
// solely by XPS. The L2 set has to be 'locked' and each cache-line in the
// set has to be pre-initialized using 'dcbz128' to a virtual address
// which corresponds to a consecutive subset of this physical memory range.
// (In other words, unless the cache is pre-populated, GPU requests from this
// memory range will be lost.)
#define GPU_XPS_LOCKED_L2_RANGE_VIRTUAL_ADDRESS ((DWORD) 0x7F100000)
#define GPU_XPS_LOCKED_L2_RANGE_PHYSICAL_ADDRESS ((DWORD) 0xC0100000)
#define GPU_XPS_LOCKED_L2_RANGE_SIZE 0x07F00000
// Convert a CPU address into a GPU usable address. Only 'physical memory'
// (as allocated by XPhysicalAlloc or XMemAlloc(XALLOC_MEMTYPE_PHYSICAL)) can
// be converted in this way.
static __forceinline DWORD GPU_CONVERT_CPU_TO_GPU_ADDRESS(CONST void* CpuAddress)
{
return (((DWORD) (UINT_PTR) CpuAddress) & GPU_ADDRESS_MASK) +
(DWORD)(((((UINT_PTR) CpuAddress) >> 20) + 0x200) & 0x1000);
}
// Convert a GPU address into a CPU usable address. The resulting address
// is usable only if the memory was originally allocated as 'physical
// memory' by XPhysicalAlloc or XMemAlloc(XALLOC_MEMTYPE_PHYSICAL). The
// memory will be accessed by the CPU using the memory-protection type (e.g.,
// cached or write-combining) of the original XPhysicalAlloc or XMemAlloc
// allocation.
static __forceinline void* GPU_CONVERT_GPU_TO_CPU_ADDRESS_64KB(DWORD GpuAddress)
{
return (void*) (UINT_PTR) (GpuAddress + GPU_CPU_VIRTUAL_ADDRESS_64KB_START);
}
static __forceinline void* GPU_CONVERT_GPU_TO_CPU_ADDRESS_16MB(DWORD GpuAddress)
{
return (void*) (UINT_PTR) (GpuAddress + GPU_CPU_VIRTUAL_ADDRESS_16MB_START);
}
static __forceinline void* GPU_CONVERT_GPU_TO_CPU_ADDRESS_4KB(DWORD GpuAddress)
{
return (void*) (UINT_PTR) (GpuAddress + GPU_CPU_VIRTUAL_ADDRESS_4KB_START);
}
// Convert a GPU address into a CPU usable address. All of memory can be
// accessed regardless of how the memory was originally allocated. The
// memory will be accessed by the CPU as cached and readonly. The hardware
// has no automatic cache coherency between write-combining and cached views
// so care must be taken to avoid stale data (e.g., if the CPU does a cached
// read of this memory, then modifies it using write-combining, then does a
// cached read again, the second read may get invalid stale cached data
// unless CPU cache control instructions are used).
static __forceinline void* GPU_CONVERT_GPU_TO_CPU_CACHED_READONLY_ADDRESS(DWORD GpuAddress)
{
return (void*) (UINT_PTR) (GpuAddress + GPU_CPU_CACHED_READONLY_VIRTUAL_ADDRESS);
}
// Convert a CPU address into a CPU cached, readonly address. Only 'physical
// memory' (as allocated by XPhysicalAlloc or XMemAlloc(XALLOC_MEMTYPE_PHYSICAL))
// can be converted in this way.
static __forceinline void* GPU_CONVERT_CPU_TO_CPU_CACHED_READONLY_ADDRESS(CONST void* CpuAddress)
{
return GPU_CONVERT_GPU_TO_CPU_CACHED_READONLY_ADDRESS(GPU_CONVERT_CPU_TO_GPU_ADDRESS(CpuAddress));
}
// Convert a CPU address pointing into the XPS locked L2 cache into a GPU
// physical address.
static __forceinline DWORD GPU_CONVERT_XPS_CPU_TO_GPU_ADDRESS(CONST void* CpuAddress)
{
return ((DWORD) (UINT_PTR) CpuAddress)
- GPU_XPS_LOCKED_L2_RANGE_VIRTUAL_ADDRESS
+ GPU_XPS_LOCKED_L2_RANGE_PHYSICAL_ADDRESS;
}
// Convert a GPU address pointing into the XPS locked L2 cache into a CPU
// virtual address.
static __forceinline void* GPU_CONVERT_XPS_GPU_TO_CPU_ADDRESS(DWORD GpuAddress)
{
return (void*) (UINT_PTR) (GpuAddress
- GPU_XPS_LOCKED_L2_RANGE_PHYSICAL_ADDRESS
+ GPU_XPS_LOCKED_L2_RANGE_VIRTUAL_ADDRESS);
}
// Total size of the shader program store, in instructions:
#define GPU_INSTRUCTIONS 4096
// Total number of constants support by a single context:
#define GPU_ALU_CONSTANTS 512
#define GPU_BOOLEAN_CONSTANTS 256
#define GPU_INTEGER_CONSTANTS 32
#define GPU_FLOW_CONSTANTS (GPU_BOOLEAN_CONSTANTS/32 + GPU_INTEGER_CONSTANTS) // Actual register count
#define GPU_FETCH_CONSTANTS 32
#define GPU_VERTEX_FETCH_CONSTANTS (3*GPU_FETCH_CONSTANTS)
// How D3D divides up the constants:
#define GPU_D3D_VERTEX_CONSTANTF_BASE 0
#define GPU_D3D_VERTEX_CONSTANTF_COUNT 256
#define GPU_D3D_VERTEX_CONSTANTI_BASE 0
#define GPU_D3D_VERTEX_CONSTANTI_COUNT 16
#define GPU_D3D_VERTEX_CONSTANTB_BASE 0
#define GPU_D3D_VERTEX_CONSTANTB_COUNT 128
#define GPU_D3D_VERTEX_FETCH_CONSTANT_BASE 26
#define GPU_D3D_VERTEX_FETCH_CONSTANT_COUNT 6 // Good for 18 streams
#define GPU_D3D_VERTEX_TEXTURE_FETCH_CONSTANT_BASE 16
#define GPU_D3D_VERTEX_TEXTURE_FETCH_CONSTANT_COUNT 10
#define GPU_D3D_PIXEL_CONSTANTF_BASE GPU_D3D_VERTEX_CONSTANTF_COUNT
#define GPU_D3D_PIXEL_CONSTANTF_COUNT 256
#define GPU_D3D_PIXEL_CONSTANTI_BASE GPU_D3D_VERTEX_CONSTANTI_COUNT
#define GPU_D3D_PIXEL_CONSTANTI_COUNT 16
#define GPU_D3D_PIXEL_CONSTANTB_BASE GPU_D3D_VERTEX_CONSTANTB_COUNT
#define GPU_D3D_PIXEL_CONSTANTB_COUNT 128
#define GPU_D3D_PIXEL_TEXTURE_FETCH_CONSTANT_BASE 0
#define GPU_D3D_PIXEL_TEXTURE_FETCH_CONSTANT_COUNT 26
// Total texture fetch constants tracked by D3D runtime
// for both vertex and pixel shaders:
#define GPU_D3D_TEXTURE_FETCH_CONSTANT_BASE 0
#define GPU_D3D_TEXTURE_FETCH_CONSTANT_COUNT 26
// Fetch constant allocation strategy:
// pixel shader samplers start at texture constant 0, and proceed upwards.
// vertex shader samplers start at texture constant 16 and proceed upwards.
// The constant D3DDMAPSAMPLER is set to 16. Typically vertex shaders will use
// textures D3DDMAPSAMPLER..D3DDMAPSAMPLER+3, while pixel shaders will use
// textures 0..15. But it's up to the shader authors to sort things out.
// 0..15 : samplers 0..15 for pixel shaders
// 16..19 : samplers 0..3 for vertex shaders
// : also samplers 16..19 for pixel shaders
// 20..25 : samplers 20..25 for pixel shaders
// also samplers 4..9 for vertex shaders
// also vertex streams 18..35
// 26..31 : Vertex stream fetch constants 0..17(31.High is stream 0, 31.Middle is stream 1, and so on.)
#define GPU_CONVERT_D3D_TO_HARDWARE_TEXTUREFETCHCONSTANT(X) ((X) + GPU_D3D_PIXEL_TEXTURE_FETCH_CONSTANT_BASE)
#define GPU_CONVERT_D3D_TO_HARDWARE_VERTEXFETCHCONSTANT(X) ((GPU_VERTEX_FETCH_CONSTANTS - 1) - (X))
// GPU Temporary register count (that can be used by our runtime)
#define GPU_SHADER_TEMPORARY_REGISTER_COUNT 32
// GPU Temporary register count (that the hardware supports)
#define GPU_SHADER_TEMPORARY_REGISTER_COUNT_PHYSICAL 64
// Vertex buffer maximum number of verts:
#define GPU_MAX_VERTEX_BUFFER_DIMENSION 16777216
// 2D textures have a maximum dimension of 8K by 8K:
#define GPU_MAX_TEXTURE_DIMENSION 8192
// 1D textures have a maximum dimension of 16M:
#define GPU_MAX_1D_TEXTURE_DIMENSION 16777216
// Volume textures can have up to 1024 layers in the Z direction:
#define GPU_MAX_TEXTURE_DEPTH 1024
// Textures have to have 4K alignment both for the base and mip levels:
#define GPU_TEXTURE_ALIGNMENT 4096
// Textures have to have a pitch that is a multiple of 32 texels:
#define GPU_TEXTURE_TEXEL_PITCH_ALIGNMENT 32
// Linear textures have to have a pitch that is a multiple of 256 bytes:
#define GPU_LINEAR_TEXTURE_PITCH_BYTE_ALIGNMENT 256
// Texture tiles are 32x32x4 texels:
#define GPU_TEXTURE_TILE_DIMENSION 32
#define GPU_TEXTURE_TILE_DEPTH_DIMENSION 4
// Resolve rectangles must always be 8x8 pixel aligned:
#define GPU_RESOLVE_ALIGNMENT 8
// Maximum vertex fetch stride in dwords:
#define GPU_MAX_VERTEX_STRIDE 255
// There's 16 interpolators for the pixel shader:
#define GPU_INTERPOLATORS 16
// There is a pool of 128 sets of GPRs:
#define GPU_GPRS 128
// There's 4 channels per interpolator:
#define GPU_INTERPOLATOR_CHANNELS 4
// Size of the post-transform cache. VtxReuseDepth should always be
// set to this value:
#define GPU_VERTEX_REUSE_DEPTH 14
// Distance from pixel center to outermost sample for multisampling.
// MaxSampleDist should always be set to the appropriate value:
#define GPU_MAX_SAMPLE_DIST_1X 0
#define GPU_MAX_SAMPLE_DIST_2X 4
#define GPU_MAX_SAMPLE_DIST_4X 6
// Distance (in indices) which the vertex vector slot assignment leads the
// deallocation. DeallocDist should always be set to this value:
#define GPU_DEALLOC_DIST 16
// EDRAM tiles are allocated in units of 80x16 pixels at 1X multisampling,
// 80x8 at 2X and 40x8 at 4X:
#define GPU_EDRAM_TILE_WIDTH_1X 80
#define GPU_EDRAM_TILE_HEIGHT_1X 16
#define GPU_EDRAM_TILE_WIDTH_2X 80
#define GPU_EDRAM_TILE_HEIGHT_2X 8
#define GPU_EDRAM_TILE_WIDTH_4X 40
#define GPU_EDRAM_TILE_HEIGHT_4X 8
// EDRAM tile size in fragments instead of pixels:
#define GPU_EDRAM_TILE_WIDTH_IN_FRAGMENTS GPU_EDRAM_TILE_WIDTH_1X
#define GPU_EDRAM_TILE_HEIGHT_IN_FRAGMENTS GPU_EDRAM_TILE_HEIGHT_1X
// An EDRAM tile size, in bytes. Note that 64-bit surfaces have an
// allocation granularity of twice this (10240 bytes) but can have a 5120
// byte start alignment:
#define GPU_EDRAM_TILE_SIZE 5120
// Total number of usable EDRAM tiles:
#define GPU_EDRAM_TILES (GPU_EDRAM_SIZE / GPU_EDRAM_TILE_SIZE)
// Hierarchical Z tiles are allocated in units of 32x16 at 1X multisampilng,
// 32x8 at 2X and 16x8 at 4X:
#define GPU_HIERARCHICAL_Z_TILE_WIDTH_1X 32
#define GPU_HIERARCHICAL_Z_TILE_HEIGHT_1X 16
#define GPU_HIERARCHICAL_Z_TILE_WIDTH_2X 32
#define GPU_HIERARCHICAL_Z_TILE_HEIGHT_2X 8
#define GPU_HIERARCHICAL_Z_TILE_WIDTH_4X 16
#define GPU_HIERARCHICAL_Z_TILE_HEIGHT_4X 8
// Hierarchical Z tile size in fragments instead of pixels:
#define GPU_HIERARCHICAL_Z_TILE_WIDTH_IN_FRAGMENTS GPU_HIERARCHICAL_Z_TILE_WIDTH_1X
#define GPU_HIERARCHICAL_Z_TILE_HEIGHT_IN_FRAGMENTS GPU_HIERARCHICAL_Z_TILE_HEIGHT_1X
// A hierarchical Z tile size, in fragments:
#define GPU_HIERARCHICAL_Z_TILE_SIZE 512
// Total number of usable hierarchical Z tiles. This is enough for 1280x720x2X:
#define GPU_HIERARCHICAL_Z_TILES 3600
// Resource address shift amounts for storage in header fields:
#define GPU_VERTEXBUFFER_ADDRESS_SHIFT 2
#define GPU_VERTEXBUFFER_SIZE_SHIFT 2
#define GPU_TEXTURE_ADDRESS_SHIFT 12
// GPU callable command buffers require a 32 byte alignment:
#define GPU_COMMAND_BUFFER_ALIGNMENT 32
// GPU callable indirect command buffers have a 1 MB DWORD maximum size:
#define GPU_COMMAND_BUFFER_INDIRECT_MAX_SIZE (1 << 20)
// Masks and shifts for GPU control flow exec instruction
// TypeAndSerialize field
#define GPUEXEC_TYPE_SHIFT 0
#define GPUEXEC_TYPE_MASK 1 // Set if Fetch instruction
#define GPUEXEC_SERIALIZE_SHIFT 1
#define GPUEXEC_SERIALIZE_MASK 2 // Set if serialized
// Maximum number of ALU/Fetch instructions in an Exec:
#define GPUFLOW_MAX_EXEC_COUNT 6
// Size of the guard band:
#define GPU_GUARDBAND 8192
// Helper for calculating guard band factors for a given number of pixels
// and a given scale. (Scale is 1/2 the height or width of the screen.):
#define GPU_GUARDBANDFACTOR(GUARDBAND_PIXELS, SCALE) (1.0f + ((float) (GUARDBAND_PIXELS)) / ((float) (SCALE)))
// Maximum size of a sprite:
#define GPU_MAX_POINT_SIZE 256.0f
//------------------------------------------------------------------------------
typedef enum
{
GPUENDIAN_NONE = 0,
GPUENDIAN_8IN16 = 1,
GPUENDIAN_8IN32 = 2,
GPUENDIAN_16IN32 = 3,
} GPUENDIAN;
typedef enum
{
GPUENDIAN128_NONE = 0,
GPUENDIAN128_8IN16 = 1,
GPUENDIAN128_8IN32 = 2,
GPUENDIAN128_16IN32 = 3,
GPUENDIAN128_8IN64 = 4,
GPUENDIAN128_8IN128 = 5,
} GPUENDIAN128;
typedef enum
{
GPUCOLORARRAY_2D_COLOR = 0,
GPUCOLORARRAY_3D_SLICE_COLOR = 1,
} GPUCOLORARRAY;
typedef enum
{
GPUDEPTHARRAY_2D_ALT_DEPTH = 0,
GPUDEPTHARRAY_2D_DEPTH = 1,
} GPUDEPTHARRAY;
typedef enum
{
GPUCOLORFORMAT_8 = 2,
GPUCOLORFORMAT_1_5_5_5 = 3,
GPUCOLORFORMAT_5_6_5 = 4,
GPUCOLORFORMAT_6_5_5 = 5,
GPUCOLORFORMAT_8_8_8_8 = 6,
GPUCOLORFORMAT_2_10_10_10 = 7,
GPUCOLORFORMAT_8_A = 8,
GPUCOLORFORMAT_8_B = 9,
GPUCOLORFORMAT_8_8 = 10,
GPUCOLORFORMAT_8_8_8_8_A = 14,
GPUCOLORFORMAT_4_4_4_4 = 15,
GPUCOLORFORMAT_10_11_11 = 16,
GPUCOLORFORMAT_11_11_10 = 17,
GPUCOLORFORMAT_16 = 24,
GPUCOLORFORMAT_16_16 = 25,
GPUCOLORFORMAT_16_16_16_16 = 26,
GPUCOLORFORMAT_16_FLOAT = 30,
GPUCOLORFORMAT_16_16_FLOAT = 31,
GPUCOLORFORMAT_16_16_16_16_FLOAT = 32,
GPUCOLORFORMAT_32_FLOAT = 36,
GPUCOLORFORMAT_32_32_FLOAT = 37,
GPUCOLORFORMAT_32_32_32_32_FLOAT = 38,
GPUCOLORFORMAT_2_10_10_10_FLOAT = 62, // EDRAM render target only
} GPUCOLORFORMAT;
typedef enum
{
GPUTEXTUREFORMAT_1_REVERSE = 0,
GPUTEXTUREFORMAT_1 = 1,
GPUTEXTUREFORMAT_8 = 2,
GPUTEXTUREFORMAT_1_5_5_5 = 3,
GPUTEXTUREFORMAT_5_6_5 = 4,
GPUTEXTUREFORMAT_6_5_5 = 5,
GPUTEXTUREFORMAT_8_8_8_8 = 6,
GPUTEXTUREFORMAT_2_10_10_10 = 7,
GPUTEXTUREFORMAT_8_A = 8,
GPUTEXTUREFORMAT_8_B = 9,
GPUTEXTUREFORMAT_8_8 = 10,
GPUTEXTUREFORMAT_Cr_Y1_Cb_Y0_REP = 11,
GPUTEXTUREFORMAT_Y1_Cr_Y0_Cb_REP = 12,
GPUTEXTUREFORMAT_16_16_EDRAM = 13, // EDRAM render target only
GPUTEXTUREFORMAT_8_8_8_8_A = 14,
GPUTEXTUREFORMAT_4_4_4_4 = 15,
GPUTEXTUREFORMAT_10_11_11 = 16,
GPUTEXTUREFORMAT_11_11_10 = 17,
GPUTEXTUREFORMAT_DXT1 = 18,
GPUTEXTUREFORMAT_DXT2_3 = 19,
GPUTEXTUREFORMAT_DXT4_5 = 20,
GPUTEXTUREFORMAT_16_16_16_16_EDRAM = 21, // EDRAM render target only
GPUTEXTUREFORMAT_24_8 = 22,
GPUTEXTUREFORMAT_24_8_FLOAT = 23,
GPUTEXTUREFORMAT_16 = 24,
GPUTEXTUREFORMAT_16_16 = 25,
GPUTEXTUREFORMAT_16_16_16_16 = 26,
GPUTEXTUREFORMAT_16_EXPAND = 27,
GPUTEXTUREFORMAT_16_16_EXPAND = 28,
GPUTEXTUREFORMAT_16_16_16_16_EXPAND = 29,
GPUTEXTUREFORMAT_16_FLOAT = 30,
GPUTEXTUREFORMAT_16_16_FLOAT = 31,
GPUTEXTUREFORMAT_16_16_16_16_FLOAT = 32,
GPUTEXTUREFORMAT_32 = 33,
GPUTEXTUREFORMAT_32_32 = 34,
GPUTEXTUREFORMAT_32_32_32_32 = 35,
GPUTEXTUREFORMAT_32_FLOAT = 36,
GPUTEXTUREFORMAT_32_32_FLOAT = 37,
GPUTEXTUREFORMAT_32_32_32_32_FLOAT = 38,
GPUTEXTUREFORMAT_32_AS_8 = 39,
GPUTEXTUREFORMAT_32_AS_8_8 = 40,
GPUTEXTUREFORMAT_16_MPEG = 41,
GPUTEXTUREFORMAT_16_16_MPEG = 42,
GPUTEXTUREFORMAT_8_INTERLACED = 43,
GPUTEXTUREFORMAT_32_AS_8_INTERLACED = 44,
GPUTEXTUREFORMAT_32_AS_8_8_INTERLACED = 45,
GPUTEXTUREFORMAT_16_INTERLACED = 46,
GPUTEXTUREFORMAT_16_MPEG_INTERLACED = 47,
GPUTEXTUREFORMAT_16_16_MPEG_INTERLACED = 48,
GPUTEXTUREFORMAT_DXN = 49,
GPUTEXTUREFORMAT_8_8_8_8_AS_16_16_16_16 = 50,
GPUTEXTUREFORMAT_DXT1_AS_16_16_16_16 = 51,
GPUTEXTUREFORMAT_DXT2_3_AS_16_16_16_16 = 52,
GPUTEXTUREFORMAT_DXT4_5_AS_16_16_16_16 = 53,
GPUTEXTUREFORMAT_2_10_10_10_AS_16_16_16_16 = 54,
GPUTEXTUREFORMAT_10_11_11_AS_16_16_16_16 = 55,
GPUTEXTUREFORMAT_11_11_10_AS_16_16_16_16 = 56,
GPUTEXTUREFORMAT_32_32_32_FLOAT = 57,
GPUTEXTUREFORMAT_DXT3A = 58,
GPUTEXTUREFORMAT_DXT5A = 59,
GPUTEXTUREFORMAT_CTX1 = 60,
GPUTEXTUREFORMAT_DXT3A_AS_1_1_1_1 = 61,
GPUTEXTUREFORMAT_8_8_8_8_GAMMA_EDRAM = 62, // EDRAM render target only
GPUTEXTUREFORMAT_2_10_10_10_FLOAT_EDRAM = 63, // EDRAM render target only
} GPUTEXTUREFORMAT;
typedef enum
{
GPUVERTEXFORMAT_8_8_8_8 = 6,
GPUVERTEXFORMAT_2_10_10_10 = 7,
GPUVERTEXFORMAT_10_11_11 = 16,
GPUVERTEXFORMAT_11_11_10 = 17,
GPUVERTEXFORMAT_16_16 = 25,
GPUVERTEXFORMAT_16_16_16_16 = 26,
GPUVERTEXFORMAT_16_16_FLOAT = 31,
GPUVERTEXFORMAT_16_16_16_16_FLOAT = 32,
GPUVERTEXFORMAT_32 = 33,
GPUVERTEXFORMAT_32_32 = 34,
GPUVERTEXFORMAT_32_32_32_32 = 35,
GPUVERTEXFORMAT_32_FLOAT = 36,
GPUVERTEXFORMAT_32_32_FLOAT = 37,
GPUVERTEXFORMAT_32_32_32_32_FLOAT = 38,
GPUVERTEXFORMAT_32_32_32_FLOAT = 57,
} GPUVERTEXFORMAT;
typedef enum
{
GPUEDRAMCOLORFORMAT_8_8_8_8 = 0,
GPUEDRAMCOLORFORMAT_8_8_8_8_GAMMA = 1,
GPUEDRAMCOLORFORMAT_2_10_10_10 = 2,
GPUEDRAMCOLORFORMAT_2_10_10_10_FLOAT = 3,
GPUEDRAMCOLORFORMAT_16_16 = 4,
GPUEDRAMCOLORFORMAT_16_16_16_16 = 5,
GPUEDRAMCOLORFORMAT_16_16_FLOAT = 6,
GPUEDRAMCOLORFORMAT_16_16_16_16_FLOAT = 7,
GPUEDRAMCOLORFORMAT_2_10_10_10_AS_10_10_10_10 = 10,
GPUEDRAMCOLORFORMAT_2_10_10_10_FLOAT_AS_16_16_16_16 = 12,
GPUEDRAMCOLORFORMAT_32_FLOAT = 14,
GPUEDRAMCOLORFORMAT_32_32_FLOAT = 15,
} GPUEDRAMCOLORFORMAT;
typedef enum
{
GPUEDRAMDEPTHFORMAT_24_8 = 0,
GPUEDRAMDEPTHFORMAT_24_8_FLOAT = 1,
} GPUEDRAMDEPTHFORMAT;
typedef enum
{
GPUSIGN_UNSIGNED = 0,
GPUSIGN_SIGNED = 1,
GPUSIGN_BIAS = 2,
GPUSIGN_GAMMA = 3,
} GPUSIGN;
#define GPUSIGN_ALL_UNSIGNED (GPUSIGN_UNSIGNED | GPUSIGN_UNSIGNED<<2 | GPUSIGN_UNSIGNED<<4 | GPUSIGN_UNSIGNED<<6)
#define GPUSIGN_ALL_SIGNED (GPUSIGN_SIGNED | GPUSIGN_SIGNED<<2 | GPUSIGN_SIGNED<<4 | GPUSIGN_SIGNED<<6)
typedef enum
{
GPUSWIZZLE_X = 0,
GPUSWIZZLE_Y = 1,
GPUSWIZZLE_Z = 2,
GPUSWIZZLE_W = 3,
GPUSWIZZLE_0 = 4,
GPUSWIZZLE_1 = 5,
GPUSWIZZLE_KEEP = 7, // Fetch instructions only
} GPUSWIZZLE;
#define GPUSWIZZLE_ARGB (GPUSWIZZLE_Z | GPUSWIZZLE_Y<<3 | GPUSWIZZLE_X<<6 | GPUSWIZZLE_W<<9)
#define GPUSWIZZLE_ORGB (GPUSWIZZLE_Z | GPUSWIZZLE_Y<<3 | GPUSWIZZLE_X<<6 | GPUSWIZZLE_1<<9)
#define GPUSWIZZLE_ABGR (GPUSWIZZLE_X | GPUSWIZZLE_Y<<3 | GPUSWIZZLE_Z<<6 | GPUSWIZZLE_W<<9)
#define GPUSWIZZLE_OBGR (GPUSWIZZLE_X | GPUSWIZZLE_Y<<3 | GPUSWIZZLE_Z<<6 | GPUSWIZZLE_1<<9)
#define GPUSWIZZLE_OOGR (GPUSWIZZLE_X | GPUSWIZZLE_Y<<3 | GPUSWIZZLE_1<<6 | GPUSWIZZLE_1<<9)
#define GPUSWIZZLE_OZGR (GPUSWIZZLE_X | GPUSWIZZLE_Y<<3 | GPUSWIZZLE_0<<6 | GPUSWIZZLE_1<<9)
#define GPUSWIZZLE_RZZZ (GPUSWIZZLE_0 | GPUSWIZZLE_0<<3 | GPUSWIZZLE_0<<6 | GPUSWIZZLE_X<<9)
#define GPUSWIZZLE_OOOR (GPUSWIZZLE_X | GPUSWIZZLE_1<<3 | GPUSWIZZLE_1<<6 | GPUSWIZZLE_1<<9)
#define GPUSWIZZLE_ORRR (GPUSWIZZLE_X | GPUSWIZZLE_X<<3 | GPUSWIZZLE_X<<6 | GPUSWIZZLE_1<<9)
#define GPUSWIZZLE_GRRR (GPUSWIZZLE_X | GPUSWIZZLE_X<<3 | GPUSWIZZLE_X<<6 | GPUSWIZZLE_Y<<9)
#define GPUSWIZZLE_RGBA (GPUSWIZZLE_W | GPUSWIZZLE_Z<<3 | GPUSWIZZLE_Y<<6 | GPUSWIZZLE_X<<9)
typedef enum
{
GPUNUMFORMAT_FRACTION = 0,
GPUNUMFORMAT_INTEGER = 1,
} GPUNUMFORMAT;
typedef enum
{
GPUCONSTANTTYPE_INVALID_TEXTURE = 0,
GPUCONSTANTTYPE_INVALID_VERTEX = 1,
GPUCONSTANTTYPE_TEXTURE = 2,
GPUCONSTANTTYPE_VERTEX = 3,
} GPUCONSTANTTYPE;
typedef enum
{
GPUCLAMP_WRAP = 0,
GPUCLAMP_MIRROR = 1,
GPUCLAMP_CLAMP_TO_LAST = 2,
GPUCLAMP_MIRROR_ONCE_TO_LAST = 3,
GPUCLAMP_CLAMP_HALFWAY = 4,
GPUCLAMP_MIRROR_ONCE_HALFWAY = 5,
GPUCLAMP_CLAMP_TO_BORDER = 6,
GPUCLAMP_MIRROR_TO_BORDER = 7,
} GPUCLAMP;
typedef enum
{
GPUDIMENSION_1D = 0,
GPUDIMENSION_2D = 1,
GPUDIMENSION_3D = 2,
GPUDIMENSION_CUBEMAP = 3,
} GPUDIMENSION;
typedef enum
{
GPUREQUESTSIZE_256BIT = 0,
GPUREQUESTSIZE_512BIT = 1,
} GPUREQUESTSIZE;
typedef enum
{
GPUCLAMPPOLICY_D3D = 0,
GPUCLAMPPOLICY_OGL = 1,
} GPUCLAMPPOLICY;
typedef enum
{
GPUMINMAGFILTER_POINT = 0,
GPUMINMAGFILTER_LINEAR = 1,
GPUMINMAGFILTER_KEEP = 3, // Texture fetch instructions only
} GPUMINMAGFILTER;
typedef enum
{
GPUMIPFILTER_POINT = 0,
GPUMIPFILTER_LINEAR = 1,
GPUMIPFILTER_BASEMAP = 2,
GPUMIPFILTER_KEEP = 3, // Texture fetch instructions only
} GPUMIPFILTER;
typedef enum
{
GPUANISOFILTER_DISABLED = 0,
GPUANISOFILTER_MAX1TO1 = 1,
GPUANISOFILTER_MAX2TO1 = 2,
GPUANISOFILTER_MAX4TO1 = 3,
GPUANISOFILTER_MAX8TO1 = 4,
GPUANISOFILTER_MAX16TO1 = 5 ,
GPUANISOFILTER_KEEP = 7, // Texture fetch instructions only
} GPUANISOFILTER;
typedef enum
{
GPUBORDERCOLOR_ABGR_BLACK = 0,
GPUBORDERCOLOR_ABGR_WHITE = 1,
GPUBORDERCOLOR_ACBYCR_BLACK = 2,
GPUBORDERCOLOR_ACBCRY_BLACK = 3,
} GPUBORDERCOLOR;
typedef enum
{
GPUTRICLAMP_NORMAL = 0,
GPUTRICLAMP_ONE_SIXTH = 1,
GPUTRICLAMP_ONE_FOURTH = 2,
GPUTRICLAMP_THREE_EIGHTHS = 3,
} GPUTRICLAMP;
typedef enum
{
GPUADDRESSCLAMP_CLAMP_TO_LAST = 0,
GPUADDRESSCLAMP_CLAMP_TO_CONSTANT = 1,
} GPUADDRESSCLAMP;
typedef enum
{
GPUSURFACENUMBER_UREPEAT = 0,
GPUSURFACENUMBER_SREPEAT = 1,
GPUSURFACENUMBER_UINTEGER = 2,
GPUSURFACENUMBER_SINTEGER = 3,
GPUSURFACENUMBER_FLOAT = 7,
} GPUSURFACENUMBER;
typedef enum
{
SURFACESWAP_LOW_RED = 0,
SURFACESWAP_LOW_BLUE = 1,
} GPUSURFACESWAP;
typedef enum
{
GPUPRIMTYPE_NONE = 0,
GPUPRIMTYPE_POINTLIST = 1,
GPUPRIMTYPE_LINELIST = 2,
GPUPRIMTYPE_LINESTRIP = 3,
GPUPRIMTYPE_TRILIST = 4,
GPUPRIMTYPE_TRIFAN = 5,
GPUPRIMTYPE_TRISTRIP = 6,
GPUPRIMTYPE_TRI_WITH_WFLAGS = 7,
GPUPRIMTYPE_RECTLIST = 8,
GPUPRIMTYPE_LINELOOP = 12,
GPUPRIMTYPE_QUADLIST = 13,
GPUPRIMTYPE_QUADSTRIP = 14,
GPUPRIMTYPE_POLYGON = 15,
GPUPRIMTYPE_2D_COPY_RECT_LIST_V0 = 16,
GPUPRIMTYPE_2D_COPY_RECT_LIST_V1 = 17,
GPUPRIMTYPE_2D_COPY_RECT_LIST_V2 = 18,
GPUPRIMTYPE_2D_COPY_RECT_LIST_V3 = 19,
GPUPRIMTYPE_2D_FILL_RECT_LIST = 20,
GPUPRIMTYPE_2D_LINE_STRIP = 21,
GPUPRIMTYPE_2D_TRI_STRIP = 22,
} GPUPRIMTYPE;
typedef enum
{
GPUGROUPPRIMTYPE_3D_POINT = 0,
GPUGROUPPRIMTYPE_3D_LINE = 1,
GPUGROUPPRIMTYPE_3D_TRI = 2,
GPUGROUPPRIMTYPE_3D_RECT = 3,
GPUGROUPPRIMTYPE_3D_QUAD = 4,
GPUGROUPPRIMTYPE_2D_COPY_RECT_V0 = 5,
GPUGROUPPRIMTYPE_2D_COPY_RECT_V1 = 6,
GPUGROUPPRIMTYPE_2D_COPY_RECT_V2 = 7,
GPUGROUPPRIMTYPE_2D_COPY_RECT_V3 = 8,
GPUGROUPPRIMTYPE_2D_FILL_RECT = 9,
GPUGROUPPRIMTYPE_2D_LINE = 10,
GPUGROUPPRIMTYPE_2D_TRI = 11,
GPUGROUPPRIMTYPE_PRIM_INDEX_LINE = 12,
GPUGROUPPRIMTYPE_PRIM_INDEX_TRI = 13,
GPUGROUPPRIMTYPE_PRIM_INDEX_QUAD = 14,
} GPUGROUPPRIMTYPE;
typedef enum
{
GPUGROUPPRIMORDER_LIST = 0,
GPUGROUPPRIMORDER_STRIP = 1,
GPUGROUPPRIMORDER_FAN = 2,
GPUGROUPPRIMORDER_LOOP = 3,
GPUGROUPPRIMORDER_POLYGON = 4,
} GPUGROUPPRIMORDER;
typedef enum
{
GPUGROUPCONV_INDEX_16 = 0,
GPUGROUPCONV_INDEX_32 = 1,
GPUGROUPCONV_UINT_16 = 2,
GPUGROUPCONV_UINT_32 = 3,
GPUGROUPCONV_SINT_16 = 4,
GPUGROUPCONV_SINT_32 = 5,
GPUGROUPCONV_FLOAT_32 = 6,
GPUGROUPCONV_AUTO_PRIM = 7,
GPUGROUPCONV_FIX_1_23_TO_FLOAT = 8,
} GPUGROUPCONV;
typedef enum
{
GPUCMP_NEVER = 0,
GPUCMP_LESS = 1,
GPUCMP_EQUAL = 2,
GPUCMP_LESS_EQUAL = 3,
GPUCMP_GREATER = 4,
GPUCMP_NOT_EQUAL = 5,
GPUCMP_GREATER_EQUAL = 6,
GPUCMP_ALWAYS = 7,
} GPUCMPFUNC;
typedef enum
{
GPUSTENCILOP_KEEP = 0,
GPUSTENCILOP_ZERO = 1,
GPUSTENCILOP_REPLACE = 2,
GPUSTENCILOP_INCRSAT = 3,
GPUSTENCILOP_DECRSAT = 4,
GPUSTENCILOP_INVERT = 5,
GPUSTENCILOP_INCR = 6,
GPUSTENCILOP_DECR = 7,
} GPUSTENCILOP;
typedef enum
{
GPUBLEND_ZERO = 0,
GPUBLEND_ONE = 1,
GPUBLEND_SRCCOLOR = 4,
GPUBLEND_INVSRCCOLOR = 5,
GPUBLEND_SRCALPHA = 6,
GPUBLEND_INVSRCALPHA = 7,
GPUBLEND_DESTCOLOR = 8,
GPUBLEND_INVDESTCOLOR = 9,
GPUBLEND_DESTALPHA = 10,
GPUBLEND_INVDESTALPHA = 11,
GPUBLEND_BLENDFACTOR = 12,
GPUBLEND_INVBLENDFACTOR = 13,
GPUBLEND_CONSTANTALPHA = 14,
GPUBLEND_INVCONSTANTALPHA = 15,
GPUBLEND_SRCALPHASAT = 16,
} GPUBLEND;
typedef enum
{
GPUBLENDOP_ADD = 0,
GPUBLENDOP_SUBTRACT = 1,
GPUBLENDOP_MIN = 2,
GPUBLENDOP_MAX = 3,
GPUBLENDOP_REVSUBTRACT = 4,
} GPUBLENDOP;
typedef enum
{
GPUCULL_NONE_FRONTFACE_CCW = 0x0,
GPUCULL_FRONT_FRONTFACE_CCW = 0x1,
GPUCULL_BACK_FRONTFACE_CCW = 0x2,
GPUCULL_NONE_FRONTFACE_CW = 0x4,
GPUCULL_FRONT_FRONTFACE_CW = 0x5,
GPUCULL_BACK_FRONTFACE_CW = 0x6,
} GPUCULL;
typedef enum
{
GPUFILL_POINT = 0,
GPUFILL_WIREFRAME = 1,
GPUFILL_SOLID = 2,
} GPUFILLMODE;
typedef enum
{
GPUVERTEXSIGN_UNSIGNED = 0,
GPUVERTEXSIGN_SIGNED = 1
} GPUVERTEXSIGN;
typedef enum
{
GPUVERTEXFETCHOP_FETCH_VERTEX = 0,
} GPUVERTEXFETCHOP;
typedef enum
{
GPUTEXTUREFETCHOP_FETCH_TEXTURE_MAP = 1,
GPUTEXTUREFETCHOP_GET_BORDER_COLOR_FRACTION = 16,
GPUTEXTUREFETCHOP_GET_COMPUTED_TEX_LOD = 17,
GPUTEXTUREFETCHOP_GET_GRADIENTS = 18,
GPUTEXTUREFETCHOP_GET_WEIGHTS = 19,
GPUTEXTUREFETCHOP_SET_TEX_LOD = 24,
GPUTEXTUREFETCHOP_SET_GRADIENTS_H = 25,
GPUTEXTUREFETCHOP_SET_GRADIENTS_V = 26,
} GPUTEXTUREFETCHOP;
typedef enum
{
GPUFLOWOP_NOP = 0,
GPUFLOWOP_EXEC = 1,
GPUFLOWOP_EXEC_END = 2,
GPUFLOWOP_COND_EXEC = 3,
GPUFLOWOP_COND_EXEC_END = 4,
GPUFLOWOP_COND_EXEC_PRED = 5,
GPUFLOWOP_COND_EXEC_PRED_END = 6,
GPUFLOWOP_LOOP_START = 7,
GPUFLOWOP_LOOP_END = 8,
GPUFLOWOP_COND_CALL = 9,
GPUFLOWOP_RETURN = 10,
GPUFLOWOP_COND_JUMP = 11,
GPUFLOWOP_ALLOC = 12,
GPUFLOWOP_COND_EXEC_PRED_CLEAN = 13,
GPUFLOWOP_COND_EXEC_PRED_CLEAN_END = 14,
GPUFLOWOP_VFETCH_END = 15,
} GPUFLOWOP;
typedef enum
{
GPUALUSCALAROP_ADD = 0,
GPUALUSCALAROP_ADDPREV = 1,
GPUALUSCALAROP_MUL = 2,
GPUALUSCALAROP_MULPREV = 3,
GPUALUSCALAROP_MULPREV2 = 4,
GPUALUSCALAROP_MAX = 5,
GPUALUSCALAROP_MIN = 6,
GPUALUSCALAROP_SEQ = 7,
GPUALUSCALAROP_SGT = 8,
GPUALUSCALAROP_SGE = 9,
GPUALUSCALAROP_SNE = 10,
GPUALUSCALAROP_FRC = 11,
GPUALUSCALAROP_TRUNC = 12,
GPUALUSCALAROP_FLOOR = 13,
GPUALUSCALAROP_EXP = 14,
GPUALUSCALAROP_LOGC = 15,
GPUALUSCALAROP_LOG = 16,
GPUALUSCALAROP_RCPC = 17,
GPUALUSCALAROP_RCPF = 18,
GPUALUSCALAROP_RCP = 19,
GPUALUSCALAROP_RSQC = 20,
GPUALUSCALAROP_RSQF = 21,
GPUALUSCALAROP_RSQ = 22,
GPUALUSCALAROP_MAXA = 23,
GPUALUSCALAROP_MAXAF = 24,
GPUALUSCALAROP_SUB = 25,
GPUALUSCALAROP_SUBPREV = 26,
GPUALUSCALAROP_SETPEQ = 27,
GPUALUSCALAROP_SETPNE = 28,
GPUALUSCALAROP_SETPGT = 29,
GPUALUSCALAROP_SETPGE = 30,
GPUALUSCALAROP_SETPINV = 31,
GPUALUSCALAROP_SETPPOP = 32,
GPUALUSCALAROP_SETPCLR = 33,
GPUALUSCALAROP_SETPRSTR = 34,
GPUALUSCALAROP_KILLEQ = 35,
GPUALUSCALAROP_KILLGT = 36,
GPUALUSCALAROP_KILLGE = 37,
GPUALUSCALAROP_KILLNE = 38,
GPUALUSCALAROP_KILLONE = 39,
GPUALUSCALAROP_SQRT = 40,
GPUALUSCALAROP_MULC0 = 42,
GPUALUSCALAROP_MULC1 = 43,
GPUALUSCALAROP_ADDC0 = 44,
GPUALUSCALAROP_ADDC1 = 45,
GPUALUSCALAROP_SUBC0 = 46,
GPUALUSCALAROP_SUBC1 = 47,
GPUALUSCALAROP_SIN = 48,
GPUALUSCALAROP_COS = 49,
GPUALUSCALAROP_RETAINPREV = 50
} GPUALUSCALAROP;
typedef enum
{
GPUALUVECTOROP_ADD = 0,
GPUALUVECTOROP_MUL = 1,
GPUALUVECTOROP_MAX = 2,
GPUALUVECTOROP_MIN = 3,
GPUALUVECTOROP_SEQ = 4,
GPUALUVECTOROP_SGT = 5,
GPUALUVECTOROP_SGE = 6,
GPUALUVECTOROP_SNE = 7,
GPUALUVECTOROP_FRC = 8,
GPUALUVECTOROP_TRUNC = 9,
GPUALUVECTOROP_FLOOR = 10,
GPUALUVECTOROP_MAD = 11,
GPUALUVECTOROP_CNDEQ = 12,
GPUALUVECTOROP_CNDGE = 13,
GPUALUVECTOROP_CNDGT = 14,
GPUALUVECTOROP_DP4 = 15,
GPUALUVECTOROP_DP3 = 16,
GPUALUVECTOROP_DP2ADD = 17,
GPUALUVECTOROP_CUBE = 18,
GPUALUVECTOROP_MAX4 = 19,
GPUALUVECTOROP_SETPEQP = 20,
GPUALUVECTOROP_SETPNEP = 21,
GPUALUVECTOROP_SETPGTP = 22,
GPUALUVECTOROP_SETPGEP = 23,
GPUALUVECTOROP_KILLEQ = 24,
GPUALUVECTOROP_KILLGT = 25,
GPUALUVECTOROP_KILLGE = 26,
GPUALUVECTOROP_KILLNE = 27,
GPUALUVECTOROP_DST = 28,
GPUALUVECTOROP_MAXA = 29
} GPUALUVECTOROP;
typedef enum
{
GPUALUSRCSELECT_C = 0,
GPUALUSRCSELECT_R = 1
} GPUALUSRCSELECT;
typedef enum
{
GPUEXPORTREGISTER_PS_COLOR_0 = 0,
GPUEXPORTREGISTER_PS_COLOR_1 = 1,
GPUEXPORTREGISTER_PS_COLOR_2 = 2,
GPUEXPORTREGISTER_PS_COLOR_3 = 3,
GPUEXPORTREGISTER_PS_EXPORT_ADDRESS = 32,
GPUEXPORTREGISTER_PS_EXPORT_DATA_0 = 33,
GPUEXPORTREGISTER_PS_EXPORT_DATA_1 = 34,
GPUEXPORTREGISTER_PS_EXPORT_DATA_2 = 35,
GPUEXPORTREGISTER_PS_EXPORT_DATA_3 = 36,
GPUEXPORTREGISTER_PS_EXPORT_DATA_4 = 37,
GPUEXPORTREGISTER_PS_DEPTH = 61,
} GPUEXPORTREGISTER_PS;
typedef enum
{
GPUEXPORTREGISTER_VS_INTERPOLATOR_0 = 0,
GPUEXPORTREGISTER_VS_INTERPOLATOR_1 = 1,
GPUEXPORTREGISTER_VS_INTERPOLATOR_2 = 2,
GPUEXPORTREGISTER_VS_INTERPOLATOR_3 = 3,
GPUEXPORTREGISTER_VS_INTERPOLATOR_4 = 4,
GPUEXPORTREGISTER_VS_INTERPOLATOR_5 = 5,
GPUEXPORTREGISTER_VS_INTERPOLATOR_6 = 6,
GPUEXPORTREGISTER_VS_INTERPOLATOR_7 = 7,
GPUEXPORTREGISTER_VS_INTERPOLATOR_8 = 8,
GPUEXPORTREGISTER_VS_INTERPOLATOR_9 = 9,
GPUEXPORTREGISTER_VS_INTERPOLATOR_10 = 10,
GPUEXPORTREGISTER_VS_INTERPOLATOR_11 = 11,
GPUEXPORTREGISTER_VS_INTERPOLATOR_12 = 12,
GPUEXPORTREGISTER_VS_INTERPOLATOR_13 = 13,
GPUEXPORTREGISTER_VS_INTERPOLATOR_14 = 14,
GPUEXPORTREGISTER_VS_INTERPOLATOR_15 = 15,
GPUEXPORTREGISTER_VS_EXPORT_ADDRESS = 32,
GPUEXPORTREGISTER_VS_EXPORT_DATA_0 = 33,
GPUEXPORTREGISTER_VS_EXPORT_DATA_1 = 34,
GPUEXPORTREGISTER_VS_EXPORT_DATA_2 = 35,
GPUEXPORTREGISTER_VS_EXPORT_DATA_3 = 36,
GPUEXPORTREGISTER_VS_EXPORT_DATA_4 = 37,
GPUEXPORTREGISTER_VS_POSITION = 62,
GPUEXPORTREGISTER_VS_SPRITE_EDGE_KILL = 63,
} GPUEXPORTREGISTER_VS;
typedef enum
{
GPUCOMMANDOP_NOP = 0x10,
GPUCOMMANDOP_REG_RMW = 0x21,
GPUCOMMANDOP_DRAW = 0x22,
GPUCOMMANDOP_VIZ_QUERY = 0x23,
GPUCOMMANDOP_SET_STATE = 0x25,
GPUCOMMANDOP_WAIT_FOR_IDLE = 0x26,
GPUCOMMANDOP_LOAD_SHADER = 0x27,
GPUCOMMANDOP_LOAD_SHADER_IMMEDIATE = 0x2b,
GPUCOMMANDOP_SET_CONSTANT = 0x2d,
GPUCOMMANDOP_LOAD_ALU_CONSTANT = 0x2f,
GPUCOMMANDOP_DRAW_IMMEDIATE = 0x36,
GPUCOMMANDOP_MPEG_INDEX = 0x3a,
GPUCOMMANDOP_INVALIDATE_STATE = 0x3b,
GPUCOMMANDOP_WAIT_REG_MEM = 0x3c,
GPUCOMMANDOP_MEM_WRITE = 0x3d,
GPUCOMMANDOP_REG_TO_MEM = 0x3e,
GPUCOMMANDOP_INDIRECT_BUFFER = 0x3f,
GPUCOMMANDOP_COND_WRITE = 0x45,
GPUCOMMANDOP_EVENT_WRITE = 0x46,
GPUCOMMANDOP_ME_INIT = 0x48,
GPUCOMMANDOP_FIX_2_FLT_REG = 0x4d,
GPUCOMMANDOP_MEM_WRITE_COUNTER = 0x4f,
GPUCOMMANDOP_WAIT_REG_EQ = 0x52,
GPUCOMMANDOP_WAIT_REG_GTE = 0x53,
GPUCOMMANDOP_CPU_INTERRUPT = 0x54,
GPUCOMMANDOP_EVENT_WRITE_SHADER = 0x58,
GPUCOMMANDOP_EVENT_WRITE_CACHE_FLUSH = 0x59,
GPUCOMMANDOP_EVENT_WRITE_SCREEN_EXTENT = 0x5a,
GPUCOMMANDOP_EVENT_WRITE_ZPASS_DONE = 0x5b,
GPUCOMMANDOP_CONTEXT_UPDATE = 0x5e,
#ifdef XAM_BUILD
GPUCOMMANDOP_SET_BIN_MASK_LO = GPUCOMMANDOP_NOP,
GPUCOMMANDOP_SET_BIN_MASK_HI = GPUCOMMANDOP_NOP,
GPUCOMMANDOP_SET_BIN_SELECT_LO = GPUCOMMANDOP_NOP,
GPUCOMMANDOP_SET_BIN_SELECT_HI = GPUCOMMANDOP_NOP,
#else
GPUCOMMANDOP_SET_BIN_MASK_LO = 0x60,
GPUCOMMANDOP_SET_BIN_MASK_HI = 0x61,
GPUCOMMANDOP_SET_BIN_SELECT_LO = 0x62,
GPUCOMMANDOP_SET_BIN_SELECT_HI = 0x63,
#endif
} GPUCOMMANDOP;
typedef enum
{
GPULOADTYPE_VERTEX = 0,
GPULOADTYPE_PIXEL = 1,
GPULOADTYPE_SHARED = 2,
} GPULOADTYPE;
typedef enum
{
GPUHIZFUNC_LESS_EQUAL = 0,
GPUHIZFUNC_GREATER_EQUAL = 1,
} GPUHIZFUNC;
typedef enum
{
GPUHISTENCILFUNC_EQUAL = 0,
GPUHISTENCILFUNC_NOT_EQUAL = 1,
} GPUHISTENCILFUNC;
typedef enum
{
GPUEDRAMMODE_NOP = 0,
GPUEDRAMMODE_COLOR_DEPTH = 4,
GPUEDRAMMODE_DOUBLE_DEPTH = 5,
GPUEDRAMMODE_COPY = 6,
} GPUEDRAMMODE;
typedef enum
{
GPUCLIPPLANEMODE_CULL_CENTER_NO_BIAS = 0,
GPUCLIPPLANEMODE_CULL_CENTER_RADIUS_BIAS = 1,
GPUCLIPPLANEMODE_CULL_CENTER_RADIUS_BIAS_EXPAND = 2,
GPUCLIPPLANEMODE_ALWAYS_EXPAND = 3,
} GPUCLIPPLANEMODE;
typedef enum
{
GPUSAMPLECONTROL_CENTROIDS_ONLY = 0,
GPUSAMPLECONTROL_CENTERS_ONLY = 1,
GPUSAMPLECONTROL_CENTROIDS_AND_CENTERS = 2,
} GPUSAMPLECONTROL;
typedef enum
{
GPUPATHSELECT_VERTEX_REUSE = 0,
GPUPATHSELECT_TESS_ENABLE = 1,
GPUPATHSELECT_PASSTHRU = 2,
} GPUPATHSELECT;
typedef enum
{
GPUTESSMODE_DISCRETE = 0,
GPUTESSMODE_CONTINUOUS = 1,
GPUTESSMODE_ADAPTIVE = 2,
} GPUTESSMODE;
typedef enum
{
GPUPIXCENTER_ZERO = 0,
GPUPIXCENTER_HALF = 1,
} GPUPIXCENTER;
typedef enum
{
GPUROUNDMODE_TRUNCATE = 0,
GPUROUNDMODE_ROUND = 1,
GPUROUNDMODE_ROUND_TO_EVEN = 2,
GPUROUNDMODE_ROUND_TO_ODD = 3,
} GPUROUNDMODE;
typedef enum
{
GPUQUANTMODE_16TH = 0,
GPUQUANTMODE_8TH = 1,
GPUQUANTMODE_4TH = 2,
GPUQUANTMODE_HALF = 3,
GPUQUANTMODE_ONE = 4,
} GPUQUANTMODE;
typedef enum
{
GPUCOPYSRCSELECT_RENDER_TARGET_0 = 0,
GPUCOPYSRCSELECT_RENDER_TARGET_1 = 1,
GPUCOPYSRCSELECT_RENDER_TARGET_2 = 2,
GPUCOPYSRCSELECT_RENDER_TARGET_3 = 3,
GPUCOPYSRCSELECT_DEPTH_STENCIL = 4,
} GPUCOPYSRCSELECT;
typedef enum
{
GPUCOPYSAMPLESELECT_SAMPLE_0 = 0,
GPUCOPYSAMPLESELECT_SAMPLE_1 = 1,
GPUCOPYSAMPLESELECT_SAMPLE_2 = 2,
GPUCOPYSAMPLESELECT_SAMPLE_3 = 3,
GPUCOPYSAMPLESELECT_SAMPLES_0_1 = 4,
GPUCOPYSAMPLESELECT_SAMPLES_2_3 = 5,
GPUCOPYSAMPLESELECT_SAMPLES_0_1_2_3 = 6,
} GPUCOPYSAMPLESELECT;
typedef enum
{
GPUCOPYCOMMAND_RAW = 0,
GPUCOPYCOMMAND_CONVERT = 1,
GPUCOPYCOMMAND_1_1_1_1 = 2,
GPUCOPYCOMMAND_NULL = 3,
} GPUCOPYCOMMAND;
typedef enum
{
GPUVSEXPORTMODE_POSITION_ONLY = 0,
GPUVSEXPORTMODE_SPRITE = 2,
GPUVSEXPORTMODE_EDGE = 3,
GPUVSEXPORTMODE_KILL = 4,
GPUVSEXPORTMODE_SPRITE_KILL = 5,
GPUVSEXPORTMODE_EDGE_KILL = 6,
GPUVSEXPORTMODE_MULTIPASS = 7,
} GPUVSEXPORTMODE;
typedef enum
{
GPUSAMPLES_1X = 0,
GPUSAMPLES_2X = 1,
GPUSAMPLES_4X = 2,
} GPUSAMPLES;
typedef enum
{
GPUEXECINSTRUCTIONTYPE_ALU = 0,
GPUEXECINSTRUCTIONTYPE_FETCH = 1,
} GPUEXECINSTRUCTIONTYPE;
typedef enum
{
GPUEXECSERIALIZEMODE_UNSERIALIZED = 0,
GPUEXECSERIALIZEMODE_SERIALIZED = 1,
} GPUEXECSERIALIZEMODE;
typedef enum
{
GPUEXECFETCHCACHETYPE_TEXTURE = 0,
GPUEXECFETCHCACHETYPE_VERTEX = 1,
} GPUEXECFETCHCACHETYPE;
typedef enum
{
GPUALLOCBUFFERSELECT_POSITION = 1,
GPUALLOCBUFFERSELECT_INTERPOLATORS = 2, // for vertex shaders
GPUALLOCBUFFERSELECT_COLORS = 2, // for pixel shaders
GPUALLOCBUFFERSELECT_EXPORT = 3
} GPUALLOCBUFFERSELECT;
typedef enum
{
GPUSCREENEXTENTS_ALLTILES = 0,
GPUSCREENEXTENTS_NONCULLED = 1,
GPUSCREENEXTENTS_PRIMEXTENTS = 2,
} GPUSCREENEXTENTS;
typedef enum
{
GPUINITIATOR_VS_DEALLOC = 0,
GPUINITIATOR_PS_DEALLOC = 1,
GPUINITIATOR_VS_DONE_WRITE_BACK = 2,
GPUINITIATOR_PS_DONE_WRITE_BACK = 3,
GPUINITIATOR_CACHE_FLUSH_WRITE_BACK = 4,
GPUINITIATOR_CONTEXT_DONE = 5,
GPUINITIATOR_CACHE_FLUSH = 6,
GPUINITIATOR_VIZQUERY_START = 7,
GPUINITIATOR_VIZQUERY_END = 8,
GPUINITIATOR_SC_WAIT_WC = 9,
GPUINITIATOR_MPASS_PS_CP_REFETCH = 10,
GPUINITIATOR_MPASS_PS_RST_START = 11,
GPUINITIATOR_MPASS_PS_INCR_START = 12,
GPUINITIATOR_RST_PIX_CNT = 13,
GPUINITIATOR_RST_VTX_CNT = 14,
GPUINITIATOR_TILE_FLUSH = 15,
GPUINITIATOR_CACHE_FLUSH_AND_INV_WRITE_BACK_EVENT = 20,
GPUINITIATOR_ZPASS_DONE = 21,
GPUINITIATOR_CACHE_FLUSH_AND_INV_EVENT = 22,
GPUINITIATOR_PERFCOUNTER_START = 23,
GPUINITIATOR_PERFCOUNTER_STOP = 24,
GPUINITIATOR_SCREEN_EXT_INIT = 25,
GPUINITIATOR_SCREEN_EXT_RPT = 26,
GPUINITIATOR_VS_FETCH_DONE_WRITE_BACK = 27,
} GPUINITIATOR;
typedef enum
{
GPUSYNCFUNCTION_NEVER = 0,
GPUSYNCFUNCTION_LESS = 1,
GPUSYNCFUNCTION_LEQUAL = 2,
GPUSYNCFUNCTION_EQUAL = 3,
GPUSYNCFUNCTION_NOTEQUAL = 4,
GPUSYNCFUNCTION_GEQUAL = 5,
GPUSYNCFUNCTION_GREATER = 6,
GPUSYNCFUNCTION_ALWAYS = 7,
} GPUSYNCFUNCTION;
typedef enum
{
GPUSYNCSPACE_REGISTER = 0,
GPUSYNCSPACE_MEMORY = 1,
} GPUSYNCSPACE;
typedef enum
{
GPUCONSTANTID_ALU = 0,
GPUCONSTANTID_FETCH = 1,
GPUCONSTANTID_BOOLEAN = 2,
GPUCONSTANTID_INTEGER = 3,
GPUCONSTANTID_REGISTER = 4,
} GPUCONSTANTID;
typedef enum
{
GPUINDEXSELECT_DMA = 0,
GPUINDEXSELECT_IMMEDIATE = 1,
GPUINDEXSELECT_AUTO = 2,
} GPUINDEXSELECT;
typedef enum
{
GPUINDEXTYPE_16BIT = 0,
GPUINDEXTYPE_32BIT = 1,
} GPUINDEXTYPE;
typedef enum
{
GPUDESTBASE7_CLIPPLANE_DISABLED = 0,
GPUDESTBASE7_CLIPPLANE_ENABLED = 0X1000,
} GPUDESTBASE7;
//------------------------------------------------------------------------------
typedef struct
{
DWORD Total[2]; // Odd/even pair, little-endian
DWORD ZFail[2]; // Odd/even pair, little-endian
DWORD ZPass[2]; // Odd/even pair, little-endian
DWORD StencilFail[2]; // Odd/even pair, little-endian
} GPU_SAMPLECOUNT;
typedef struct
{
WORD MinX; // In tile coordinates, multiply by 8 for pixel coordinates
WORD MaxX; // In tile coordinates, multiply by 8 for pixel coordinates
WORD MinY; // In tile coordinates, multiply by 8 for pixel coordinates
WORD MaxY; // In tile coordinates, multiply by 8 for pixel coordinates
WORD MinZ; // 11 MSBs of Z
WORD MaxZ; // 11 MSBs of Z
} GPU_SCREENEXTENT;
//------------------------------------------------------------------------------
typedef union {
struct {
DWORD SurfacePitch : 14;
DWORD : 2;
DWORD MsaaSamples : 2; // GPUSAMPLES
DWORD HiZPitch : 14;
};
DWORD dword;
} GPU_SURFACEINFO;
typedef union {
struct {
DWORD ColorBase : 12;
DWORD : 4;
DWORD ColorFormat : 4; // GPUEDRAMCOLORFORMAT
DWORD ColorExpBias : 6;
DWORD : 6;
};
DWORD dword;
} GPU_COLORINFO;
typedef union {
struct {
DWORD DepthBase : 12;
DWORD : 4;
DWORD DepthFormat : 1; // GPUEDRAMDEPTHFORMAT
DWORD DisableHZClamp : 1;
DWORD : 14;
};
DWORD dword;
} GPU_DEPTHINFO;
typedef union {
struct {
int X : 15;
DWORD : 1;
int Y : 15;
DWORD : 1;
};
DWORD dword;
} GPU_POINT;
typedef union {
struct {
DWORD Write0 : 4;
DWORD Write1 : 4;
DWORD Write2 : 4;
DWORD Write3 : 4;
DWORD : 16;
};
DWORD dword;
} GPU_COLORMASK;
typedef union {
struct {
DWORD Ref : 8;
DWORD Mask : 8;
DWORD WriteMask : 8;
DWORD : 8;
};
DWORD dword;
} GPU_STENCILREFMASK;
typedef union {
struct {
DWORD VsMaxReg : 6;
DWORD : 2;
DWORD PsMaxReg : 6;
DWORD : 2;
DWORD VsResource : 1;
DWORD PsResource : 1;
DWORD ParamGen : 1;
DWORD GenIndexPix : 1;
DWORD VsExportCount : 4;
DWORD VsExportMode : 3; // GPUVSEXPORTMODE
DWORD PsExportZ : 1;
DWORD PsExportColorCount : 3;
DWORD GenIndexVtx : 1;
};
DWORD dword;
} GPU_PROGRAMCONTROL;
#define GPU_PROGRAMCONTROL_PS_MASK 0x780E3F00
#define GPU_PROGRAMCONTROL_VS_MASK (~GPU_PROGRAMCONTROL_PS_MASK)
typedef union {
struct {
DWORD InstPredOptimize : 1;
DWORD OutputScreenXY : 1;
DWORD SampleControl : 2; // GPUSAMPLECONTROL
DWORD : 4;
DWORD ParamGenPos : 8;
DWORD PerfCounterRef : 1;
DWORD YieldOptimize : 1;
DWORD TxCacheSelect : 1;
DWORD : 13;
};
DWORD dword;
} GPU_CONTEXTMISC;
#define GPU_CONTEXTMISC_PS_MASK 0x0000FFE
#define GPU_CONTEXTMISC_VS_MASK (~GPU_CONTEXTMISC_PS_MASK)
typedef union {
struct {
DWORD ParamShade : 16;
DWORD SamplingPattern : 16;
};
DWORD dword;
} GPU_INTERPOLATORCONTROL;
typedef union {
struct {
DWORD Wrap0 : 4;
DWORD Wrap1 : 4;
DWORD Wrap2 : 4;
DWORD Wrap3 : 4;
DWORD Wrap4 : 4;
DWORD Wrap5 : 4;
DWORD Wrap6 : 4;
DWORD Wrap7 : 4;
};
DWORD dword;
} GPU_WRAPPING1;
typedef union {
struct {
DWORD Wrap8 : 4;
DWORD Wrap9 : 4;
DWORD Wrap10 : 4;
DWORD Wrap11 : 4;
DWORD Wrap12 : 4;
DWORD Wrap13 : 4;
DWORD Wrap14 : 4;
DWORD Wrap15 : 4;
};
DWORD dword;
} GPU_WRAPPING0;
typedef union {
struct {
DWORD StencilEnable : 1;
DWORD ZEnable : 1;
DWORD ZWriteEnable : 1;
DWORD : 1;
DWORD ZFunc : 3; // GPUCMPFUNC
DWORD BackFaceEnable : 1;
DWORD StencilFunc : 3; // GPUCMPFUNC
DWORD StencilFail : 3; // GPUSTENCILOP
DWORD StencilZPass : 3; // GPUSTENCILOP
DWORD StencilZFail : 3; // GPUSTENCILOP
DWORD StencilFuncBF : 3; // GPUCMPFUNC
DWORD StencilFailBF : 3; // GPUSTENCILOP
DWORD StencilZPassBF : 3; // GPUSTENCILOP
DWORD StencilZFailBF : 3; // GPUSTENCILOP
};
DWORD dword;
} GPU_DEPTHCONTROL;
typedef union {
struct {
DWORD ColorSrcBlend : 5; // GPUBLEND
DWORD ColorBlendOp : 3; // GPUBLENDOP
DWORD ColorDestBlend : 5; // GPUBLEND
DWORD : 3;
DWORD AlphaSrcBlend : 5; // GPUBLEND
DWORD AlphaBlendOp : 3; // GPUBLENDOP
DWORD AlphaDestBlend : 5; // GPUBLEND
DWORD : 3;
};
DWORD dword;
} GPU_BLENDCONTROL;
typedef union {
struct {
DWORD AlphaFunc : 3; // GPUCMPFUNC
DWORD AlphaTestEnable : 1;
DWORD AlphaToMaskEnable : 1;
DWORD : 19;
DWORD AlphaToMaskOffset0 : 2;
DWORD AlphaToMaskOffset1 : 2;
DWORD AlphaToMaskOffset2 : 2;
DWORD AlphaToMaskOffset3 : 2;
};
DWORD dword;
} GPU_COLORCONTROL;
typedef union {
struct {
DWORD HiZWriteEnable : 1;
DWORD HiZEnable : 1;
DWORD HiStencilWriteEnable : 1;
DWORD HiStencilEnable : 1;
DWORD HiZFunc : 1; // GPUHIZFUNC
DWORD HiStencilFunc : 1; // GPUHISTENCILFUNC
DWORD : 2;
DWORD HiStencilRef : 8;
DWORD : 1;
DWORD HiBaseAddr : 15;
};
DWORD dword;
} GPU_HICONTROL;
typedef union {
struct {
DWORD ClipPlaneEnable0 : 1;
DWORD ClipPlaneEnable1 : 1;
DWORD ClipPlaneEnable2 : 1;
DWORD ClipPlaneEnable3 : 1;
DWORD ClipPlaneEnable4 : 1;
DWORD ClipPlaneEnable5 : 1;
DWORD : 8;
DWORD ClipPlaneMode : 2; // GPUCLIPPLANEMODE
DWORD ClipDisable : 1;
DWORD ClipPlaneCullOnlyEnable : 1;
DWORD BoundaryEdgeFlagEnable : 1;
DWORD DxClipSpaceDef : 1;
DWORD DisableClipErrDetect : 1;
DWORD VtxKillOr : 1;
DWORD XyNanRetain : 1;
DWORD ZNanRetain : 1;
DWORD WNanRetain : 1;
DWORD : 7;
};
DWORD dword;
} GPU_CLIPCONTROL;
typedef union {
struct {
DWORD CullMode : 3; // GPUCULL
DWORD PolyMode : 2; // BOOL
DWORD PolyModeFrontPType : 3; // GPUFILLMODE
DWORD PolyModeBackPType : 3; // GPUFILLMODE
DWORD PolyOffsetFrontEnable : 1;
DWORD PolyOffsetBackEnable : 1;
DWORD PolyOffsetParaEnable : 1;
DWORD : 1;
DWORD MsaaEnable : 1;
DWORD VtxWindowOffsetEnable : 1;
DWORD : 2;
DWORD ProvokingVtxLast : 1;
DWORD PerspCorrDisable : 1;
DWORD MultiPrimIbEnable : 1;
DWORD : 1;
DWORD QuadOrderEnable : 1;
DWORD ScOneQuadPerClock : 1;
DWORD : 7;
};
DWORD dword;
} GPU_MODECONTROL;
typedef union {
struct {
DWORD VportXScaleEnable : 1;
DWORD VportXOffsetEnable : 1;
DWORD VportYScaleEnable : 1;
DWORD VportYOffsetEnable : 1;
DWORD VportZScaleEnable : 1;
DWORD VportZOffsetEnable : 1;
DWORD : 2;
DWORD VtxXyFmt : 1;
DWORD VtxZFmt : 1;
DWORD VtxW0Fmt : 1;
DWORD PerfCounterRef : 1;
DWORD : 20;
};
DWORD dword;
} GPU_VTECONTROL;
typedef union {
struct {
DWORD EdramMode : 3; // GPUEDRAMMODE
DWORD ColorDepthMacro : 1;
DWORD : 28;
};
DWORD dword;
} GPU_EDRAMMODECONTROL;
typedef union {
struct {
DWORD Height : 16;
DWORD Width : 16;
};
DWORD dword;
} GPU_POINTSIZE;
typedef union {
struct {
DWORD MinSize : 16;
DWORD MaxSize : 16;
};
DWORD dword;
} GPU_POINTMINMAX;
typedef union {
struct {
DWORD Width : 16;
DWORD : 16;
};
DWORD dword;
} GPU_LINECONTROL;
typedef union {
struct {
DWORD PathSelect : 2; // GPUPATHSELECT
DWORD : 30;
};
DWORD dword;
} GPU_OUTPUTPATHCONTROL;
typedef union {
struct {
DWORD TessMode : 2; // GPUTESSMODE
DWORD : 30;
};
DWORD dword;
} GPU_HOSCONTROL;
typedef union {
struct {
DWORD GroupPrimType : 4; // GPUGROUPPRIMTYPE
DWORD : 10;
DWORD GroupRetainOrder : 1;
DWORD GroupRetainQuads : 1;
DWORD GroupPrimOrder : 3; // GPUGROUPPRIMORDER
DWORD : 13;
};
DWORD dword;
} GPU_GROUPPRIMTYPE;
typedef union {
struct {
DWORD CompXEnable : 1;
DWORD CompYEnable : 1;
DWORD CompZEnable : 1;
DWORD CompWEnable : 1;
DWORD : 4;
DWORD Stride : 8;
DWORD Shift : 8;
DWORD : 8;
};
DWORD dword;
} GPU_GROUPVECTCONTROL;
typedef union {
struct {
DWORD XConv : 4; // GPUGROUPCONV
DWORD XOffset : 4;
DWORD YConv : 4; // GPUGROUPCONV
DWORD YOffset : 4;
DWORD ZConv : 4; // GPUGROUPCONV
DWORD ZOffset : 4;
DWORD WConv : 4; // GPUGROUPCONV
DWORD WOffset : 4;
};
DWORD dword;
} GPU_GROUPVECTFMTCONTROL;
typedef union {
struct {
DWORD MPassPixVecPerPass : 20;
DWORD : 11;
DWORD MPassPsEnable : 1;
};
DWORD dword;
} GPU_MPASSPSCONTROL;
typedef union {
struct {
DWORD VizQueryEnable : 1;
DWORD VizQueryId : 6;
DWORD KillPixPostHiZ : 1;
DWORD KillPixPostDetailMask : 1;
DWORD : 23;
};
DWORD dword;
} GPU_VIZQUERY;
typedef union {
struct {
DWORD Misc : 16;
DWORD : 16;
};
DWORD dword;
} GPU_ENHANCE;
typedef union {
struct {
DWORD BresControl : 8;
DWORD UseBresControl : 1;
DWORD ExpandLineWidth : 1;
DWORD LastPixel : 1;
DWORD : 21;
};
DWORD dword;
} GPU_SCLINECONTROL;
typedef union {
struct {
DWORD MsaaNumSamples : 3;
DWORD : 10;
DWORD MaxSampleDist : 4;
DWORD : 15;
};
DWORD dword;
} GPU_AACONFIG;
typedef union {
struct {
DWORD PixCenter : 1; // GPUPIXCENTER
DWORD RoundMode : 2; // GPUROUNDMODE
DWORD QuantMode : 3; // GPUQUANTMODE
DWORD : 26;
};
DWORD dword;
} GPU_VTXCONTROL;
typedef union {
struct {
DWORD Base : 9;
DWORD : 3;
DWORD Size : 9;
DWORD : 11;
};
DWORD dword;
} GPU_CONST;
typedef union {
struct {
DWORD DbProgOn : 1;
DWORD : 3;
DWORD DbProbBreak : 1;
DWORD : 3;
DWORD DbProbAddr : 11;
DWORD : 5;
DWORD DbProbCount : 8;
};
DWORD dword;
} GPU_DEBUGMISC0;
typedef union {
struct {
DWORD DbOnPix : 1;
DWORD DbOnVtx : 1;
DWORD : 6;
DWORD DbInstCount : 8;
DWORD DbBreakAddr : 11;
DWORD : 5;
};
DWORD dword;
} GPU_DEBUGMISC1;
typedef union {
struct {
DWORD VtxReuseDepth : 8;
DWORD : 24;
};
DWORD dword;
} GPU_VERTEXREUSEBLOCKCONTROL;
typedef union {
struct {
DWORD DeallocDist : 7;
DWORD : 25;
};
DWORD dword;
} GPU_OUTDEALLOCCONTROL;
typedef union {
struct {
DWORD CopySrcSelect : 3; // GPUCOPYSRCSELECT
DWORD : 1;
DWORD CopySampleSelect : 3; // GPUCOPYSAMPLESELECT
DWORD : 1;
DWORD ColorClearEnable : 1;
DWORD DepthClearEnable : 1;
DWORD : 10;
DWORD CopyCommand : 2; // GPUCOPYCOMMAND
DWORD : 10;
};
DWORD dword;
} GPU_COPYCONTROL;
typedef union {
struct {
DWORD CopyDestPitch : 14;
DWORD : 2;
DWORD CopyDestHeight : 14;
DWORD : 2;
};
DWORD dword;
} GPU_COPYDESTPITCH;
typedef union {
struct {
DWORD CopyDestEndian : 3; // GPUENDIAN128
DWORD CopyDestArray : 1; // GPUCOLORARRAY
DWORD CopyDestSlice : 3;
DWORD CopyDestFormat : 6; // GPUCOLORFORMAT
DWORD CopyDestNumber : 3; // GPUSURFACENUMBER
DWORD CopyDestExpBias : 6;
DWORD : 2;
DWORD CopyDestSwap : 1; // GPUSURFACESWAP
DWORD : 7;
};
DWORD dword;
} GPU_COPYDESTINFO;
typedef union {
struct {
DWORD CopyFuncRed : 3;
DWORD : 1;
DWORD CopyFuncGreen : 3;
DWORD : 1;
DWORD CopyFuncBlue : 3;
DWORD : 1;
DWORD CopyFuncAlpha : 3;
DWORD : 17;
};
DWORD dword;
} GPU_COPYFUNC;
typedef union {
struct {
DWORD CopyRefRed : 8;
DWORD CopyRefGreen : 8;
DWORD CopyRefBlue : 8;
DWORD CopyRefAlpha : 8;
};
DWORD dword;
} GPU_COPYREF;
typedef union {
struct {
DWORD CopyMaskRed : 8;
DWORD CopyMaskGreen : 8;
DWORD CopyMaskBlue : 8;
DWORD CopyMaskAlpha : 8;
};
DWORD dword;
} GPU_COPYMASK;
typedef union {
struct {
DWORD ResetSampleCount : 1;
DWORD CopySampleCount : 1;
DWORD : 30;
};
DWORD dword;
} GPU_SAMPLECOUNTCONTROL;
typedef union {
struct {
DWORD BankActToActSClk : 6;
DWORD : 2;
DWORD DisableFragCombine : 1;
DWORD DisableReOrder : 1;
DWORD HzFudgeShift : 2;
DWORD ScreenExtMethod : 2; // GPUSCREENEXTENTS
DWORD : 18;
};
DWORD dword;
} GPU_BCCONTROL;
typedef union {
struct {
DWORD MatchingContexts : 8;
DWORD CopyDestBaseEnable : 1;
DWORD DestBase0Enable : 1;
DWORD DestBase1Enable : 1;
DWORD DestBase2Enable : 1;
DWORD DestBase3Enable : 1;
DWORD DestBase4Enable : 1;
DWORD DestBase5Enable : 1;
DWORD DestBase6Enable : 1;
DWORD DestBase7Enable : 1;
DWORD : 7;
DWORD VcActionEnable : 1;
DWORD TcActionEnable : 1;
DWORD PglbActionEnable : 1;
DWORD : 4;
DWORD Status : 1;
};
DWORD dword;
} GPU_COHERSTATUS;
typedef union {
struct {
DWORD WaitCrtcPFlip : 1;
DWORD WaitReCrtcVLine : 1;
DWORD WaitFeCrtcVLine : 1;
DWORD WaitCrtcVLine : 1;
DWORD : 4;
DWORD WaitCpDmaIdle : 1;
DWORD : 1;
DWORD WaitCmdFifo : 1;
DWORD WaitOvFlip : 1;
DWORD : 3;
DWORD WaitIdle : 1;
DWORD : 1;
DWORD WaitIdleClean : 1;
DWORD : 2;
DWORD CmdFifoEntries : 4;
DWORD : 8;
};
DWORD dword;
} GPU_WAITUNTIL;
typedef union {
struct {
DWORD Cpu0Ack : 1;
DWORD Cpu1Ack : 1;
DWORD Cpu2Ack : 1;
DWORD Cpu3Ack : 1;
DWORD Cpu4Ack : 1;
DWORD Cpu5Ack : 1;
DWORD : 26;
};
DWORD dword;
} GPU_CPUINTERRUPTACK;
typedef union {
struct {
DWORD Dynamic : 1;
DWORD : 3;
DWORD PixelSize : 7;
DWORD : 1;
DWORD VertexSize : 7;
DWORD : 13;
};
DWORD dword;
} GPU_GPRMANAGEMENT;
typedef union {
struct {
DWORD PixelBase : 16;
DWORD VertexBase : 16;
};
DWORD dword;
} GPU_INSTSTOREMANAGEMENT;
typedef union {
struct {
DWORD Flags ; // DWORD 0 must be 0x3FF
DWORD Reserved1 ; // DWORD 1 must be 0
DWORD Reserved2 ; // DWORD 2 must be 0
union {
struct {
DWORD Reserved3 ; // DWORD 3 must be 0
DWORD Reserved4 ; // DWORD 4 must be 0x80
DWORD Reserved5 ; // DWORD 5 must be 0x100
DWORD Reserved6 ; // DWORD 6 must be 0x180
DWORD Reserved7 ; // DWORD 7 must be 0x200
DWORD Reserved8 ; // DWORD 8 must be 0x280
DWORD Reserved9 ; // DWORD 9 must be 0x300
DWORD Reserved10 ; // DWORD 10 must be 0x380
};
DWORD Reserved3To10[8] ; // DWORD 3-10
};
union {
struct {
DWORD PixelBase : 16; // DWORD 11
DWORD VertexBase : 16; // DWORD 11
};
GPU_INSTSTOREMANAGEMENT InstructionStore ;
};
DWORD MaxContext ; // DWORD 12 must be 7
DWORD Reserved13 ; // DWORD 13 must be 0
DWORD Reserved14 ; // DWORD 14 must be 0
DWORD Reserved15 ; // DWORD 15 must be 0
DWORD Reserved16 ; // DWORD 16 must be 0
DWORD Reserved17 ; // DWORD 17 must be 0
};
DWORD dword[18];
} GPU_MEINIT;
//------------------------------------------------------------------------------
// Packet 0:
typedef struct
{
GPU_SURFACEINFO SurfaceInfo; // RB_SURFACE_INFO
GPU_COLORINFO Color0Info; // RB_COLOR0_INFO
GPU_DEPTHINFO DepthInfo; // RB_DEPTH_INFO
GPU_COLORINFO Color1Info; // RB_COLOR1_INFO
GPU_COLORINFO Color2Info; // RB_COLOR2_INFO
GPU_COLORINFO Color3Info; // RB_COLOR3_INFO
DWORD CoherDestBase0; // COHER_DEST_BASE_0
DWORD CoherDestBase1; // COHER_DEST_BASE_1
DWORD CoherDestBase2; // COHER_DEST_BASE_2
DWORD CoherDestBase3; // COHER_DEST_BASE_3
DWORD CoherDestBase4; // COHER_DEST_BASE_4
DWORD CoherDestBase5; // COHER_DEST_BASE_5
DWORD CoherDestBase6; // COHER_DEST_BASE_6
DWORD CoherDestBase7; // COHER_DEST_BASE_7
GPU_POINT ScreenScissorTL; // PA_SC_SCREEN_SCISSOR_TL
GPU_POINT ScreenScissorBR; // PA_SC_SCREEN_SCISSOR_BR
} GPU_DESTINATIONPACKET;
// Packet 1:
typedef struct
{
GPU_POINT WindowOffset; // PA_SC_WINDOW_OFFSET
GPU_POINT WindowScissorTL; // PA_SC_WINDOW_SCISSOR_TL
GPU_POINT WindowScissorBR; // PA_SC_WINDOW_SCISSOR_BR
} GPU_WINDOWPACKET;
// Packet 2:
typedef struct
{
DWORD MaxVtxIndx; // VGT_MAX_VTX_INDX
DWORD MinVtxIndx; // VGT_MIN_VTX_INDX
DWORD IndxOffset; // VGT_INDX_OFFSET
DWORD MultiPrimIbResetIndx; // VGT_MULTI_PRIM_IB_RESET_INDX
GPU_COLORMASK ColorMask; // RB_COLOR_MASK
float BlendRed; // RB_BLEND_RED
float BlendGreen; // RB_BLEND_GREEN
float BlendBlue; // RB_BLEND_BLUE
float BlendAlpha; // RB_BLEND_ALPHA
DWORD Unused[3];
GPU_STENCILREFMASK StencilRefMaskBF; // RB_STENCILREFMASK_BF
GPU_STENCILREFMASK StencilRefMask; // RB_STENCILREFMASK
float AlphaRef; // RB_ALPHA_REF
float VportXScale; // PA_CL_VPORT_XSCALE
float VportXOffset; // PA_CL_VPORT_XOFFSET
float VportYScale; // PA_CL_VPORT_YSCALE
float VportYOffset; // PA_CL_VPORT_YOFFSET
float VportZScale; // PA_CL_VPORT_ZSCALE
float VportZOffset; // PA_CL_VPORT_ZOFFSET
} GPU_VALUESPACKET;
// Packet 3:
typedef struct
{
GPU_PROGRAMCONTROL ProgramControl; // SQ_PROGRAM_CNTL
GPU_CONTEXTMISC ContextMisc; // SQ_CONTEXT_MISC
GPU_INTERPOLATORCONTROL InterpolatorControl; // SQ_INTERPOLATOR_CNTL
GPU_WRAPPING0 Wrapping0; // SQ_WRAPPING_0
GPU_WRAPPING1 Wrapping1; // SQ_WRAPPING_1
} GPU_PROGRAMPACKET;
// Packet 4:
typedef struct
{
GPU_DEPTHCONTROL DepthControl; // RB_DEPTHCONTROL
GPU_BLENDCONTROL BlendControl0; // RB_BLENDCONTROL0
GPU_COLORCONTROL ColorControl; // RB_COLORCONTROL
GPU_HICONTROL HiControl; // RB_TILECONTROL
GPU_CLIPCONTROL ClipControl; // PA_CL_CLIP_CNTL
GPU_MODECONTROL ModeControl; // PA_SU_SC_MODE_CNTL
GPU_VTECONTROL VteControl; // PA_CL_VTE_CNTL
DWORD Unused;
GPU_EDRAMMODECONTROL EdramModeControl; // RB_MODECONTROL
GPU_BLENDCONTROL BlendControl1; // RB_BLENDCONTROL1
GPU_BLENDCONTROL BlendControl2; // RB_BLENDCONTROL2
GPU_BLENDCONTROL BlendControl3; // RB_BLENDCONTROL3
} GPU_CONTROLPACKET;
// Packet 5:
typedef struct
{
GPU_POINTSIZE PointSize; // PA_SU_POINT_SIZE
GPU_POINTMINMAX PointMinMax; // PA_SU_POINT_MINMAX
GPU_LINECONTROL LineControl; // PA_SU_LINE_CNTL
DWORD Unused1;
GPU_OUTPUTPATHCONTROL OutputPathControl; // VGT_OUTPUT_PATH_CNTL
GPU_HOSCONTROL HosControl; // VGT_HOS_CNTL
float HosMaxTessLevel; // VGT_HOS_MAX_TESS_LEVEL
float HosMinTessLevel; // VGT_HOS_MIN_TESS_LEVEL
DWORD HosReuseDepth; // VGT_HOS_REUSE_DEPTH
GPU_GROUPPRIMTYPE GroupPrimType; // VGT_GROUP_PRIM_TYPE
DWORD GroupFirstDecr; // VGT_GROUP_FIRST_DECR
DWORD GroupDecr; // VGT_GROUP_DECR
GPU_GROUPVECTCONTROL GroupVect0Control; // VGT_GROUP_VECT_0_CNTL
GPU_GROUPVECTCONTROL GroupVect1Control; // VGT_GROUP_VECT_1_CNTL
GPU_GROUPVECTFMTCONTROL GroupVect0FmtControl; // VGT_GROUP_VECT_0_FMT_CNTL
GPU_GROUPVECTFMTCONTROL GroupVect1FmtControl; // VGT_GROUP_VECT_1_FMT_CNTL
DWORD Unused2[2];
GPU_MPASSPSCONTROL MPassPsControl; // PA_SC_MPASS_PS_CNTL
GPU_VIZQUERY VizQuery; // PA_SC_VIZ_QUERY
GPU_ENHANCE Enhance; // VGT_ENHANCE
} GPU_TESSELLATORPACKET;
// Packet 6:
typedef struct
{
GPU_SCLINECONTROL ScLineControl; // PA_SC_LINE_CNTL
GPU_AACONFIG AaConfig; // PA_SC_AA_CONFIG
GPU_VTXCONTROL VtxControl; // PA_SU_VTX_CNTL
float GbVertClipAdj; // PA_CL_GB_VERT_CLIP_ADJ
float GbVertDiscAdj; // PA_CL_GB_VERT_DISC_ADJ
float GbHorzClipAdj; // PA_CL_GB_HORZ_CLIP_ADJ
float GbHorzDiscAdj; // PA_CL_GB_HORZ_DISC_ADJ
GPU_CONST VsConst; // SQ_VS_CONST
GPU_CONST PsConst; // SQ_PS_CONST
GPU_DEBUGMISC0 DebugMisc0; // SQ_DEBUG_MISC_0
GPU_DEBUGMISC1 DebugMisc1; // SQ_DEBUG_MISC_1
DWORD Unused1[5];
DWORD Unused2[2];
DWORD AaMask; // PA_SC_AA_MASK
DWORD Unused3[3];
GPU_VERTEXREUSEBLOCKCONTROL VertexReuseBlockControl;// VGT_VERTEX_REUSE_BLOCK_CNTL
GPU_OUTDEALLOCCONTROL OutDeallocControl; // VGT_OUT_DEALLOC_CNTL
GPU_COPYCONTROL CopyControl; // RB_COPY_CONTROL
DWORD CopyDestBase; // RB_COPY_DEST_BASE
GPU_COPYDESTPITCH CopyDestPitch; // RB_COPY_DEST_PITCH
GPU_COPYDESTINFO CopyDestInfo; // RB_COPY_DEST_INFO
DWORD HiClear; // RB_TILE_CLEAR
DWORD DepthClear; // RB_DEPTH_CLEAR
DWORD ColorClear; // RB_COLOR_CLEAR
DWORD ColorClearLo; // RB_COLOR_CLEAR_LO
GPU_COPYFUNC CopyFunc; // RB_COPY_FUNC
GPU_COPYREF CopyRef; // RB_COPY_REF
GPU_COPYMASK CopyMask; // RB_COPY_MASK
DWORD CopySurfaceSlice; // RB_COPY_SURFACE_SLICE
GPU_SAMPLECOUNTCONTROL SampleCountControl; // RB_SAMPLE_COUNT_CTL
DWORD SampleCountAddress; // RB_SAMPLE_COUNT_ADDR
} GPU_MISCPACKET;
// Packet 7:
typedef struct
{
float PolyOffsetFrontScale; // PA_SU_POLY_OFFSET_FRONT_SCALE
float PolyOffsetFrontOffset; // PA_SU_POLY_OFFSET_FRONT_OFFSET
float PolyOffsetBackScale; // PA_SU_POLY_OFFSET_BACK_SCALE
float PolyOffsetBackOffset; // PA_SU_POLY_OFFSET_BACK_OFFSET
float PointXRad; // PA_CL_POINT_X_RAD
float PointYRad; // PA_CL_POINT_Y_RAD
float PointConstantSize; // PA_CL_POINT_SIZE
float PointCullRad; // PA_CL_POINT_CULL_RAD
} GPU_POINTPACKET;
//------------------------------------------------------------------------------
// Number of packets:
#define GPU_PACKET_COUNT 8
// Maximum size of every packet, in registers:
#define GPU_PACKET_SIZE 0x80
// Actual size of each packet:
#define GPU_DESTINATIONPACKET_SIZE (sizeof(GPU_DESTINATIONPACKET) / sizeof(DWORD))
#define GPU_WINDOWPACKET_SIZE (sizeof(GPU_WINDOWPACKET) / sizeof(DWORD))
#define GPU_VALUESPACKET_SIZE (sizeof(GPU_VALUESPACKET) / sizeof(DWORD))
#define GPU_PROGRAMPACKET_SIZE (sizeof(GPU_PROGRAMPACKET) / sizeof(DWORD))
#define GPU_CONTROLPACKET_SIZE (sizeof(GPU_CONTROLPACKET) / sizeof(DWORD))
#define GPU_TESSELLATORPACKET_SIZE (sizeof(GPU_TESSELLATORPACKET) / sizeof(DWORD))
#define GPU_MISCPACKET_SIZE (sizeof(GPU_MISCPACKET) / sizeof(DWORD))
#define GPU_POINTPACKET_SIZE (sizeof(GPU_POINTPACKET) / sizeof(DWORD))
//------------------------------------------------------------------------------
typedef enum
{
GPUREG_WAITUNTIL = 0x05C8, // GPU_WAITUNTIL
GPUREG_COHERSIZEHOST = 0x0A2F, // DWORD
GPUREG_COHERBASEHOST, // DWORD
GPUREG_COHERSTATUSHOST, // GPU_COHERSTATUS
GPUREG_CPUINTERRUPTACK = 0x045E, // GPU_CPUINTERRUPTACK
GPUREG_CALLBACKADDRESS = 0x057C, // DWORD
GPUREG_GPRMANAGEMENT = 0x0D00, // GPU_GPRMANAGEMENT
GPUREG_INSTSTOREMANAGEMENT = 0x0D02, // GPU_INSTSTOREMANAGEMENT
GPUREG_INVALIDATECONSTANTS = 0x0D04, // DWORD
GPUREG_BCCONTROL = 0x0F01, // GPU_BCCONTROL
// Packet 0:
GPUREG_DESTINATIONPACKET = 0x2000, // GPU_DESTINATIONPACKET
GPUREG_PACKET0 = 0x2000,
GPUREG_SURFACEINFO = 0x2000, // GPU_SURFACEINFO
GPUREG_COLOR0INFO, // GPU_COLORINFO
GPUREG_DEPTHINFO, // GPU_DEPTHINFO
GPUREG_COLOR1INFO, // GPU_COLORINFO
GPUREG_COLOR2INFO, // GPU_COLORINFO
GPUREG_COLOR3INFO, // GPU_COLORINFO
GPUREG_COHERDESTBASE0, // DWORD
GPUREG_COHERDESTBASE1, // DWORD
GPUREG_COHERDESTBASE2, // DWORD
GPUREG_COHERDESTBASE3, // DWORD
GPUREG_COHERDESTBASE4, // DWORD
GPUREG_COHERDESTBASE5, // DWORD
GPUREG_COHERDESTBASE6, // DWORD
GPUREG_COHERDESTBASE7, // DWORD
GPUREG_SCREENSCISSORTL, // GPU_POINT
GPUREG_SCREENSCISSORBR, // GPU_POINT
// Packet 1:
GPUREG_WINDOWPACKET = 0x2080, // GPU_WINDOWPACKET
GPUREG_PACKET1 = 0x2080,
GPUREG_WINDOWOFFSET = 0x2080, // GPU_POINT
GPUREG_WINDOWSCISSORTL, // GPU_POINT
GPUREG_WINDOWSCISSORBR, // GPU_POINT
// Packet 2:
GPUREG_VALUESPACKET = 0x2100, // GPU_VALUESPACKET
GPUREG_PACKET2 = 0x2100,
GPUREG_MAXVTXINDX = 0x2100, // DWORD
GPUREG_MINVTXINDX, // DWORD
GPUREG_INDXOFFSET, // DWORD
GPUREG_MULTIPRIMIBRESETINDX, // DWORD
GPUREG_COLORMASK, // GPUREG_COLORMASK
GPUREG_BLENDRED, // float
GPUREG_BLENDGREEN, // float
GPUREG_BLENDBLUE, // float
GPUREG_BLENDALPHA, // float
GPUREG_UNUSED0,
GPUREG_UNUSED1,
GPUREG_UNUSED2,
GPUREG_STENCILREFMASKBF, // GPU_STENCILREFMASK
GPUREG_STENCILREFMASK, // GPU_STENCILREFMASK
GPUREG_ALPHAREF, // float
GPUREG_VPORTXSCALE, // float
GPUREG_VPORTXOFFSET, // float
GPUREG_VPORTYSCALE, // float
GPUREG_VPORTYOFFSET, // float
GPUREG_VPORTZSCALE, // float
GPUREG_VPORTZOFFSET, // float
// Packet 3:
GPUREG_PROGRAMPACKET = 0x2180, // GPU_PROGRAMPACKET
GPUREG_PACKET3 = 0x2180,
GPUREG_PROGRAMCONTROL = 0x2180, // GPU_PROGRAMCONTROL
GPUREG_CONTEXTMISC, // GPU_CONTEXTMISC
GPUREG_INTERPOLATORCONTROL, // GPU_INTERPOLATORCONTROL
GPUREG_WRAPPING0, // GPU_WRAPPING0
GPUREG_WRAPPING1, // GPU_WRAPPING1
// Packet 4:
GPUREG_CONTROLPACKET = 0x2200, // GPU_CONTROLPACKET
GPUREG_PACKET4 = 0x2200,
GPUREG_DEPTHCONTROL = 0x2200, // GPU_DEPTHCONTROL
GPUREG_BLENDCONTROL0, // GPU_BLENDCONTROL
GPUREG_COLORCONTROL, // GPU_COLORCONTROL
GPUREG_HICONTROL, // GPU_HICONTROL
GPUREG_CLIPCONTROL, // GPU_CLIPCONTROL
GPUREG_MODECONTROL, // GPU_MODECONTROL
GPUREG_VTECONTROL, // GPU_VTECONTROL
GPUREG_UNUSED3,
GPUREG_EDRAMMODECONTROL, // GPU_EDRAMMODECONTROL
GPUREG_BLENDCONTROL1, // GPU_BLENDCONTROL
GPUREG_BLENDCONTROL2, // GPU_BLENDCONTROL
GPUREG_BLENDCONTROL3, // GPU_BLENDCONTROL
// Packet 5:
GPUREG_TESSELLATORPACKET = 0x2280, // GPU_TESSELLATORPACKET
GPUREG_PACKET5 = 0x2280,
GPUREG_POINTSIZE = 0x2280, // GPU_POINTSIZE
GPUREG_POINTMINMAX, // GPU_POINTMINMAX
GPUREG_LINECONTROL, // GPU_LINECONTROL
GPUREG_UNUSED4,
GPUREG_OUTPUTPATHCONTROL, // GPU_OUTPUTPATHCONTROL
GPUREG_HOSCONTROL, // GPU_HOSCONTROL
GPUREG_HOSMAXTESSLEVEL, // float
GPUREG_HOSMINTESSLEVEL, // float
GPUREG_HOSREUSEDEPTH, // DWORD
GPUREG_GROUPPRIMTYPE, // GPU_GROUPPRIMTYPE
GPUREG_GROUPFIRSTDECR, // DWORD
GPUREG_GROUPDECR, // DWORD
GPUREG_GROUPVECT0CONTROL, // GPU_GROUPVECTCONTROL
GPUREG_GROUPVECT1CONTROL, // GPU_GROUPVECTCONTROL
GPUREG_GROUPVECT0FMTCONTROL, // GPU_GROUPVECTFMTCONTROL
GPUREG_GROUPVECT1FMTCONTROL, // GPU_GROUPVECTFMTCONTROL
GPUREG_UNUSED5,
GPUREG_UNUSED6,
GPUREG_MPASSPSCONTROL, // GPU_MPASSPSCONTROL
GPUREG_VIZQUERY, // GPU_VIZQUERY
GPUREG_ENHANCE, // GPU_ENHANCE
// Packet 6:
GPUREG_MISCPACKET = 0x2300, // GPU_MISCPACKET
GPUREG_PACKET6 = 0x2300,
GPUREG_SCLINECONTROL = 0x2300, // GPU_SCLINECONTROL
GPUREG_AACONFIG, // GPU_AACONFIG
GPUREG_VTXCONTROL, // GPU_VTXCONTROL
GPUREG_GBVERTCLIPADJ, // float
GPUREG_GBVERTDISCADJ, // float
GPUREG_GBHORZCLIPADJ, // float
GPUREG_GBHORZDISCADJ, // float
GPUREG_VSCONST, // GPU_CONST
GPUREG_PSCONST, // GPU_CONST
GPUREG_DEBUGMISC0, // GPU_DEBUGMISC0
GPUREG_DEBUGMISC1, // GPU_DEBUGMISC1
GPUREG_UNUSED7,
GPUREG_UNUSED8,
GPUREG_UNUSED9,
GPUREG_UNUSED10,
GPUREG_UNUSED11,
GPUREG_UNUSED12,
GPUREG_UNUSED13,
GPUREG_AAMASK, // DWORD
GPUREG_UNUSED14,
GPUREG_UNUSED15,
GPUREG_UNUSED16,
GPUREG_VERTEXREUSEBLOCKCONTROL, // GPU_VERTEXREUSEBLOCKCONTROL
GPUREG_OUTDEALLOCCONTROL, // GPU_OUTDEALLOCCONTROL
GPUREG_COPYCONTROL, // GPU_COPYCONTROL
GPUREG_COPYDESTBASE, // GPU_COPYDESTBASE
GPUREG_COPYDESTPITCH, // GPU_COPYDESTPITCH
GPUREG_COPYDESTINFO, // GPU_COPYDESTINFO
GPUREG_HICLEAR, // DWORD
GPUREG_DEPTHCLEAR, // DWORD
GPUREG_COLORCLEAR, // DWORD
GPUREG_COLORCLEARLO, // DWORD
GPUREG_COPYFUNC, // GPU_COPYFUNC
GPUREG_COPYREF, // GPU_COPYREF
GPUREG_COPYMASK, // GPU_COPYMASK
GPUREG_COPYSURFACESLICE, // DWORD
GPUREG_SAMPLECOUNTCONTROL, // GPU_SAMPLECOUNTCONTROL
GPUREG_SAMPLECOUNTADDRESS, // DWORD
// Packet 7:
GPUREG_POINTPACKET = 0x2380, // GPU_POINTPACKET
GPUREG_PACKET7 = 0x2380,
GPUREG_POLYOFFSETFRONTSCALE = 0x2380, // float
GPUREG_POLYOFFSETFRONTOFFSET, // float
GPUREG_POLYOFFSETBACKSCALE, // float
GPUREG_POLYOFFSETBACKOFFSET, // float
GPUREG_POINTXRAD, // float
GPUREG_POINTYRAD, // float
GPUREG_POINTCONSTANTSIZE, // float
GPUREG_POINTCULLRAD, // float
// Other:
GPUREG_CLIPPLANE0 = 0x2388, // float[4]
GPUREG_CLIPPLANE1 = 0x238C, // float[4]
GPUREG_CLIPPLANE2 = 0x2390, // float[4]
GPUREG_CLIPPLANE3 = 0x2394, // float[4]
GPUREG_CLIPPLANE4 = 0x2398, // float[4]
GPUREG_CLIPPLANE5 = 0x239C, // float[4]
GPUREG_ALUCONSTANTS = 0x4000, // float[GPU_ALU_CONSTANTS][4]
GPUREG_FETCHCONSTANTS = 0x4800, // GPUFETCH_CONSTANT[GPU_FETCH_CONSTANTS]
GPUREG_FLOWCONSTANTS = 0x4900, // DWORD[GPU_FLOW_CONSTANTS]
GPUREG_BOOLEANCONSTANTS = 0x4900, // DWORD[GPU_BOOLEAN_CONSTANTS/32]
GPUREG_INTEGERCONSTANTS = 0x4908, // DWORD[GPU_INTEGER_CONSTANTS]
GPUREG_FLUSHFETCHCONSTANTS = 0x5000, // DWORD[3]
} GPUREGISTER;
// When using GPUCOMMANDOP_SET_CONSTANT to set a register, subtract this value
// from the register address:
#define GPU_SET_CONSTANT_REGISTER_OFFSET 0x2000
//------------------------------------------------------------------------------
typedef struct
{
union {
DWORD FlowConstant[GPU_FLOW_CONSTANTS];
struct {
DWORD BooleanConstant[GPU_BOOLEAN_CONSTANTS/32];
DWORD IntegerConstant[GPU_INTEGER_CONSTANTS];
};
};
} GPUFLOW_CONSTANTS;
__forceinline
void GPU_SET_VERTEX_SHADER_CONSTANTB(
GPUFLOW_CONSTANTS* pFlowConstants,
DWORD Register,
BOOL Boolean)
{
DWORD index = (Register + GPU_D3D_VERTEX_CONSTANTB_BASE) / 32;
DWORD shift = (Register % 32);
pFlowConstants->BooleanConstant[index] &= ~(1 << shift);
pFlowConstants->BooleanConstant[index] |= ((Boolean & 1) << shift);
}
__forceinline
void GPU_SET_PIXEL_SHADER_CONSTANTB(
GPUFLOW_CONSTANTS* pFlowConstants,
DWORD Register,
BOOL Boolean)
{
DWORD index = (Register + GPU_D3D_PIXEL_CONSTANTB_BASE) / 32;
DWORD shift = (Register % 32);
pFlowConstants->BooleanConstant[index] &= ~(1 << shift);
pFlowConstants->BooleanConstant[index] |= ((Boolean & 1) << shift);
}
__forceinline
void GPU_SET_VERTEX_SHADER_CONSTANTI(
GPUFLOW_CONSTANTS* pFlowConstants,
DWORD Register,
const int* pConstantData)
{
DWORD index = Register + GPU_D3D_VERTEX_CONSTANTI_BASE;
pFlowConstants->IntegerConstant[index] = (DWORD) ((pConstantData[0] & 0xff)
| ((pConstantData[1] & 0xff) << 8)
| ((pConstantData[2] & 0xff) << 16));
}
__forceinline
void GPU_SET_PIXEL_SHADER_CONSTANTI(
GPUFLOW_CONSTANTS* pFlowConstants,
DWORD Register,
const int* pConstantData)
{
DWORD index = Register + GPU_D3D_PIXEL_CONSTANTI_BASE;
pFlowConstants->IntegerConstant[index] = (DWORD) ((pConstantData[0] & 0xff)
| ((pConstantData[1] & 0xff) << 8)
| ((pConstantData[2] & 0xff) << 16));
}
typedef struct
{
DWORD Width : 24; // DWORD
DWORD : 8;
} GPUTEXTURESIZE_1D;
typedef struct
{
DWORD Width : 13; // DWORD
DWORD Height : 13; // DWORD
DWORD : 6;
} GPUTEXTURESIZE_2D;
typedef struct
{
DWORD Width : 13; // DWORD
DWORD Height : 13; // DWORD
DWORD Depth : 6; // DWORD
} GPUTEXTURESIZE_STACK;
typedef struct
{
DWORD Width : 11; // DWORD
DWORD Height : 11; // DWORD
DWORD Depth : 10; // DWORD
} GPUTEXTURESIZE_3D;
typedef union {
struct {
// DWORD 0:
DWORD Type : 2; // GPUCONSTANTTYPE
DWORD SignX : 2; // GPUSIGN
DWORD SignY : 2; // GPUSIGN
DWORD SignZ : 2; // GPUSIGN
DWORD SignW : 2; // GPUSIGN
DWORD ClampX : 3; // GPUCLAMP
DWORD ClampY : 3; // GPUCLAMP
DWORD ClampZ : 3; // GPUCLAMP
DWORD : 2;
DWORD : 1;
DWORD Pitch : 9; // DWORD
DWORD Tiled : 1; // BOOL
// DWORD 1:
DWORD DataFormat : 6; // GPUTEXTUREFORMAT
DWORD Endian : 2; // GPUENDIAN
DWORD RequestSize : 2; // GPUREQUESTSIZE
DWORD Stacked : 1; // BOOL
DWORD ClampPolicy : 1; // GPUCLAMPPOLICY
DWORD BaseAddress : 20; // DWORD
// DWORD 2:
union
{
GPUTEXTURESIZE_1D OneD;
GPUTEXTURESIZE_2D TwoD;
GPUTEXTURESIZE_3D ThreeD;
GPUTEXTURESIZE_STACK Stack;
} Size;
// DWORD 3:
DWORD NumFormat : 1; // GPUNUMFORMAT
DWORD SwizzleX : 3; // GPUSWIZZLE
DWORD SwizzleY : 3; // GPUSWIZZLE
DWORD SwizzleZ : 3; // GPUSWIZZLE
DWORD SwizzleW : 3; // GPUSWIZZLE
INT ExpAdjust : 6; // int
DWORD MagFilter : 2; // GPUMINMAGFILTER
DWORD MinFilter : 2; // GPUMINMAGFILTER
DWORD MipFilter : 2; // GPUMIPFILTER
DWORD AnisoFilter : 3; // GPUANISOFILTER
DWORD : 3;
DWORD BorderSize : 1; // DWORD
// DWORD 4:
DWORD VolMagFilter : 1; // GPUMINMAGFILTER
DWORD VolMinFilter : 1; // GPUMINMAGFILTER
DWORD MinMipLevel : 4; // DWORD
DWORD MaxMipLevel : 4; // DWORD
DWORD MagAnisoWalk : 1; // BOOL
DWORD MinAnisoWalk : 1; // BOOL
INT LODBias : 10; // int
INT GradExpAdjustH : 5; // int
INT GradExpAdjustV : 5; // int
// DWORD 5:
DWORD BorderColor : 2; // GPUBORDERCOLOR
DWORD ForceBCWToMax : 1; // BOOL
DWORD TriClamp : 2; // GPUTRICLAMP
INT AnisoBias : 4; // int
DWORD Dimension : 2; // GPUDIMENSION
DWORD PackedMips : 1; // BOOL
DWORD MipAddress : 20; // DWORD
};
DWORD dword[6];
} GPUTEXTURE_FETCH_CONSTANT;
typedef union {
struct {
// DWORD 0:
DWORD Type : 2; // GPUCONSTANTTYPE
DWORD BaseAddress : 30; // DWORD
// DWORD 1:
DWORD Endian : 2; // GPUENDIAN
DWORD Size : 24; // DWORD
DWORD AddressClamp : 1; // GPUADDRESSCLAMP
DWORD : 1;
DWORD RequestSize : 2; // GPUREQUESTSIZE
DWORD ClampDisable : 2; // BOOL
};
DWORD dword[2];
} GPUVERTEX_FETCH_CONSTANT;
typedef union {
GPUTEXTURE_FETCH_CONSTANT Texture;
GPUVERTEX_FETCH_CONSTANT Vertex[3];
} GPUFETCH_CONSTANT;
typedef union {
struct {
// DWORD 0:
DWORD Op : 5; // GPUTEXTUREFETCHOP
DWORD SrcGPR : 6; // DWORD
DWORD SrcLoopIndexRelative : 1; // BOOL
DWORD DestGPR : 6; // DWORD
DWORD DestLoopIndexRelative : 1; // BOOL
DWORD FetchValidOnly : 1; // BOOL
DWORD ConstIndex : 5; // DWORD
DWORD UnnormalizedTextureCoords : 1; // BOOL
DWORD SrcSelectX : 2; // DWORD
DWORD SrcSelectY : 2; // DWORD
DWORD SrcSelectZ : 2; // DWORD
// DWORD 1:
DWORD SwizzleX : 3; // GPUSWIZZLE
DWORD SwizzleY : 3; // GPUSWIZZLE
DWORD SwizzleZ : 3; // GPUSWIZZLE
DWORD SwizzleW : 3; // GPUSWIZZLE
DWORD MagFilter : 2; // GPUMINMAGFILTER
DWORD MinFilter : 2; // GPUMINMAGFILTER
DWORD MipFilter : 2; // GPUMIPFILTER
DWORD AnisoFilter : 3; // GPUANISOFILTER
DWORD : 3;
DWORD VolMagFilter : 2; // GPUMINMAGFILTER
DWORD VolMinFilter : 2; // GPUMINMAGFILTER
DWORD UseComputedLOD : 1; // BOOL
DWORD UseRegisterLOD : 1; // BOOL
DWORD : 1;
DWORD Predicated : 1; // BOOL
// DWORD 2:
DWORD UseRegisterGradients : 1; // BOOL
DWORD : 1;
INT LODBias : 7; // int
DWORD : 5;
DWORD Dimension : 2; // GPUDIMENSION (pseudo register)
INT OffsetX : 5; // int
INT OffsetY : 5; // int
INT OffsetZ : 5; // int
DWORD PredicationCondition : 1; // DWORD
};
DWORD dword[3];
} GPUTEXTURE_FETCH_INSTRUCTION;
typedef union {
struct {
// DWORD 0:
DWORD Op : 5; // GPUVERTEXFETCHOP
DWORD SrcGPR : 6; // DWORD
DWORD SrcLoopIndexRelative : 1; // BOOL
DWORD DestGPR : 6; // DWORD
DWORD DestLoopIndexRelative : 1; // BOOL
DWORD FetchValidOnly : 1; // Must be 1
DWORD ConstIndex : 5; // DWORD
DWORD ConstIndexSelect : 2; // DWORD
DWORD PrefetchCount : 3; // DWORD
DWORD SrcSelect : 2; // DWORD
// DWORD 1:
DWORD SwizzleX : 3; // GPUSWIZZLE
DWORD SwizzleY : 3; // GPUSWIZZLE
DWORD SwizzleZ : 3; // GPUSWIZZLE
DWORD SwizzleW : 3; // GPUSWIZZLE
DWORD Signed : 1; // BOOL
DWORD NumFormat : 1; // GPUNUMFORMAT
DWORD : 1;
DWORD RoundIndex : 1; // BOOL
DWORD DataFormat : 6; // GPUVERTEXFORMAT
DWORD : 2;
INT ExpAdjust : 6; // int
DWORD MiniFetch : 1; // BOOL
DWORD Predicated : 1; // BOOL
// DWORD 2:
DWORD Stride : 8; // DWORD
INT Offset : 23; // int
DWORD PredicationCondition : 1; // DWORD
};
DWORD dword[3];
} GPUVERTEX_FETCH_INSTRUCTION;
//------------------------------------------------------------------------------------------------
// Convert logical vertex stream number to hardware format
__forceinline
void SetVertexStream(GPUVERTEX_FETCH_INSTRUCTION* pVertexFetch, DWORD stream)
{
// Avoid an integer division by using fixed point math
static const DWORD kRemainderAndDiv = ((DWORD) ((4.0 / 3.0) * 256 + 1)) << 16
| (0xffff & (DWORD)((1.0 / 3.0) * 256 + 1));
DWORD temp = ((GPU_VERTEX_FETCH_CONSTANTS - 1) - stream) * kRemainderAndDiv;
pVertexFetch->ConstIndex = (temp >> 8) & 0xff;
pVertexFetch->ConstIndexSelect = (temp >> 24) & 0x3;
}
//------------------------------------------------------------------------------------------------
// Convert hardware format to logical vertex stream
__forceinline
DWORD GetVertexStream(const GPUVERTEX_FETCH_INSTRUCTION* pVertexFetch)
{
return (GPU_VERTEX_FETCH_CONSTANTS - 1) - (pVertexFetch->ConstIndex * 3 + pVertexFetch->ConstIndexSelect);
}
typedef union {
struct {
DWORD Address : 12;
DWORD Count : 3;
DWORD Yield : 1;
DWORD TypeAndSerialize : 12; // GPUEXECSERIALIZEMODE, GPUEXECINSTRUCTIONTYPE
DWORD VertexCacheLow : 4; // GPUEXECFETCHCACHETYPE
DWORD VertexCacheHigh : 2; // GPUEXECFETCHCACHETYPE
DWORD : 7;
DWORD PredicateClean : 1;
DWORD : 1;
DWORD AbsoluteAddressing : 1;
DWORD Op : 4;
};
} GPUFLOW_EXEC;
typedef union {
struct {
DWORD Address : 12;
DWORD Count : 3;
DWORD Yield : 1;
DWORD TypeAndSerialize : 12;
DWORD VertexCacheLow : 4;
DWORD VertexCacheHigh : 2;
DWORD BooleanAddress : 8;
DWORD Condition : 1;
DWORD AbsoluteAddressing : 1;
DWORD Op : 4;
};
} GPUFLOW_COND_EXEC;
typedef union {
struct {
DWORD Address : 12;
DWORD Count : 3;
DWORD Yield : 1;
DWORD TypeAndSerialize : 12;
DWORD VertexCacheLow : 4;
DWORD VertexCacheHigh : 2;
DWORD : 7;
DWORD PredicateClean : 1;
DWORD Condition : 1;
DWORD AbsoluteAddressing : 1;
DWORD Op : 4;
};
} GPUFLOW_COND_EXEC_PREDS;
typedef union {
struct {
DWORD JumpAddress : 13;
DWORD Repeat : 1;
DWORD : 2;
DWORD LoopID : 5;
DWORD : 11;
DWORD : 11;
DWORD AbsoluteAddressing : 1;
DWORD Op : 4;
};
} GPUFLOW_LOOP_START;
typedef union {
struct {
DWORD JumpAddress : 13;
DWORD : 3;
DWORD LoopID : 5;
DWORD PredicateBreak : 1;
DWORD : 10;
DWORD : 10;
DWORD Condition : 1;
DWORD AbsoluteAddressing : 1;
DWORD Op : 4;
};
} GPUFLOW_LOOP_END;
typedef union {
struct {
DWORD JumpAddress : 13;
DWORD Force : 1;
DWORD Predicated : 1;
DWORD : 17;
DWORD : 2;
DWORD BooleanAddress : 8;
DWORD Condition : 1;
DWORD AbsoluteAddressing : 1;
DWORD Op : 4;
};
} GPUFLOW_COND_CALL;
typedef union {
struct {
DWORD : 32;
DWORD : 11;
DWORD AbsoluteAddressing : 1;
DWORD Op : 4;
};
} GPUFLOW_RETURN;
typedef union {
struct {
DWORD JumpAddress : 13;
DWORD Force : 1;
DWORD Predicated : 1;
DWORD : 17;
DWORD : 1;
DWORD ForwardOnly : 1;
DWORD BooleanAddress : 8;
DWORD Condition : 1;
DWORD AbsoluteAddressing : 1;
DWORD Op : 4;
};
} GPUFLOW_COND_JUMP;
typedef union {
struct {
DWORD Size : 3;
DWORD : 29;
DWORD : 8;
DWORD DoNotSerialize : 1;
DWORD BufferSelect : 2; // GPUALLOCBUFFERSELECT
DWORD : 1;
DWORD Op : 4;
};
} GPUFLOW_ALLOC;
typedef union {
struct {
DWORD : 32;
DWORD : 12;
DWORD Op : 4; // GPUFLOWOP
};
GPUFLOW_EXEC Exec;
GPUFLOW_COND_EXEC CondExec;
GPUFLOW_COND_EXEC_PREDS CondExecPreds;
GPUFLOW_LOOP_START LoopStart;
GPUFLOW_LOOP_END LoopEnd;
GPUFLOW_COND_CALL CondCall;
GPUFLOW_RETURN Return;
GPUFLOW_COND_JUMP CondJump;
GPUFLOW_ALLOC Alloc;
DWORD dword[2];
} GPUFLOW_INSTRUCTION;
// Use GPU_GET_FLOW_INSTRUCTIONS() to decode GPUFLOW_INSTRUCTION_PAIR:
typedef struct {
DWORD dword[3];
} GPUFLOW_INSTRUCTION_PAIR;
typedef union {
struct {
// DWORD 0:
DWORD VectorDest : 6;
DWORD VectorDestRelative : 1;
DWORD AbsConstants : 1;
DWORD ScalarDest : 6;
DWORD ScalarDestRelative : 1; // Also used as export mask
DWORD ResultExport : 1;
DWORD VectorMask : 4;
DWORD ScalarMask : 4;
DWORD VectorSaturate : 1;
DWORD ScalarSaturate : 1;
DWORD ScalarOp : 6; // GPUALUSCALAROP
// DWORD 1:
DWORD SrcCSwizzle : 8;
DWORD SrcBSwizzle : 8;
DWORD SrcASwizzle : 8;
DWORD SrcCNegate : 1;
DWORD SrcBNegate : 1;
DWORD SrcANegate : 1;
DWORD PredicationCondition : 1;
DWORD Predicated : 1;
DWORD RelativeAddress : 1;
DWORD Constant1Relative : 1;
DWORD Constant0Relative : 1;
// DWORD 2:
DWORD SrcCRegConst : 8;
DWORD SrcBRegConst : 8;
DWORD SrcARegConst : 8;
DWORD VectorOp : 5; // GPUALUVECTOROP
DWORD SrcCSelect : 1; // GPUALUSRCSELECT
DWORD SrcBSelect : 1; // GPUALUSRCSELECT
DWORD SrcASelect : 1; // GPUALUSRCSELECT
};
DWORD dword[3];
} GPUALU_INSTRUCTION;
//------------------------------------------------------------------------------
// used for GPUALUSCALAROP_MULC0..GPUALUSCALAROP_SUBC1
static __forceinline
DWORD GPU_GET_SCALAROP_SOURCE2_REG(
CONST GPUALU_INSTRUCTION* pALU)
{
return (pALU->SrcCSwizzle & 0x3C)
| (pALU->SrcCSelect << 1)
| (pALU->ScalarOp & 1);
}
static __forceinline
VOID GPU_SET_SCALAROP_SOURCE2_REG(
GPUALU_INSTRUCTION* pALU,
DWORD reg)
{
pALU->SrcCSwizzle =
(pALU->SrcCSwizzle & 0xC3)
| (reg & 0x3C);
pALU->SrcCSelect = reg >> 1;
pALU->ScalarOp =
(pALU->ScalarOp & 0xFE)
| (reg & 1);
}
//------------------------------------------------------------------------------
typedef union {
GPUTEXTURE_FETCH_INSTRUCTION TextureFetch;
GPUVERTEX_FETCH_INSTRUCTION VertexFetch;
GPUFLOW_INSTRUCTION Flow0;
GPUFLOW_INSTRUCTION_PAIR FlowPair;
GPUALU_INSTRUCTION Alu;
} GPUSHADER_INSTRUCTION;
//------------------------------------------------------------------------------
static __forceinline
void GPU_GET_FLOW_INSTRUCTIONS(
const GPUFLOW_INSTRUCTION_PAIR* pPair,
GPUFLOW_INSTRUCTION* pFlow0,
GPUFLOW_INSTRUCTION* pFlow1)
{
pFlow0->dword[0] = pPair->dword[0];
pFlow0->dword[1] = 0xffff & pPair->dword[1]; // Load low 16 bits.
pFlow1->dword[0] = (pPair->dword[1] >> 16) | (pPair->dword[2] << 16);
pFlow1->dword[1] = (pPair->dword[2] >> 16);
}
static __forceinline
void GPU_PUT_FLOW_INSTRUCTIONS(
GPUFLOW_INSTRUCTION_PAIR* pPair,
const GPUFLOW_INSTRUCTION* pFlow0,
const GPUFLOW_INSTRUCTION* pFlow1)
{
pPair->dword[0] = pFlow0->dword[0];
pPair->dword[1] = (pFlow0->dword[1] & 0xffff) | (pFlow1->dword[0] << 16);
pPair->dword[2] = (pFlow1->dword[0] >> 16) | (pFlow1->dword[1] << 16);
}
//------------------------------------------------------------------------------
typedef union {
struct {
// float x:
DWORD BaseAddress : 30; // Physical address in DWORDs
DWORD _01 : 2; // Must be 01
// float y:
DWORD _4B000000 : 32; // Must be 0x4B000000
// float z:
DWORD EndianSwap : 3; // GPUENDIAN128
DWORD : 5;
DWORD Format : 6; // GPUCOLORFORMAT
DWORD : 2;
DWORD NumericType : 3; // GPUSURFACENUMBER
DWORD ComponentSwap : 1; // GPUSURFACESWAP
DWORD _4B0 : 12; // Must be 0x4B0
// float w:
DWORD StreamMaxIndex : 23; // 0..StreamMaxIndex - 1 is valid
DWORD _96 : 9; // Must be 0x96
};
DWORD dword[4];
float c[4];
} GPU_MEMEXPORT_STREAM_CONSTANT;
//------------------------------------------------------------------------------
#ifdef _DEBUG
void GPU_SET_MEMEXPORT_STREAM_CONSTANT(
GPU_MEMEXPORT_STREAM_CONSTANT* pConstant,
VOID* pBaseAddress,
DWORD StreamMaxIndex,
GPUSURFACESWAP ComponentSwap,
GPUSURFACENUMBER NumericType,
GPUCOLORFORMAT Format,
GPUENDIAN128 EndianSwap);
#else
__forceinline
void GPU_SET_MEMEXPORT_STREAM_CONSTANT(
GPU_MEMEXPORT_STREAM_CONSTANT* pConstant,
VOID* pBaseAddress,
DWORD StreamMaxIndex,
GPUSURFACESWAP ComponentSwap,
GPUSURFACENUMBER NumericType,
GPUCOLORFORMAT Format,
GPUENDIAN128 EndianSwap)
{
pConstant->_01 = 0x01;
pConstant->BaseAddress = GPU_CONVERT_CPU_TO_GPU_ADDRESS(pBaseAddress) >> 2;
pConstant->_4B000000 = 0x4b000000;
pConstant->_4B0 = 0x4b0;
pConstant->ComponentSwap = ComponentSwap;
pConstant->NumericType = NumericType;
pConstant->Format = Format;
pConstant->EndianSwap = EndianSwap;
pConstant->_96 = 0x96;
pConstant->StreamMaxIndex = StreamMaxIndex;
}
#endif // _DEBUG
//------------------------------------------------------------------------------
typedef union {
struct {
// DWORD 0:
DWORD VizQueryId : 6;
DWORD : 2;
DWORD UseVizQuery : 1;
DWORD : 23;
// DWORD 1:
DWORD PrimType : 6; // GPUPRIMTYPE
DWORD SrcSelect : 2; // Must be GPUINDEXSELECT_DMA
DWORD MajorMode : 3;
DWORD IndexType : 1; // GPUINDEXTYPE
DWORD NotEndOfPacket : 1;
DWORD : 3;
DWORD NumIndices : 16;
// DWORD 2:
DWORD IndexBase : 32;
// DWORD 3:
DWORD IndexSize : 24;
DWORD : 6;
DWORD Endian : 2; // GPUENDIAN
};
DWORD dword[4];
} GPUCOMMAND_DRAW_INDEX;
typedef union {
struct {
// DWORD 0:
DWORD VizQueryId : 6;
DWORD UseVizQuery : 1;
DWORD : 25;
// DWORD 1:
DWORD PrimType : 6; // GPUPRIMTYPE
DWORD SrcSelect : 2; // Must be GPUINDEXSELECT_AUTO
DWORD MajorMode : 3;
DWORD IndexSize : 1;
DWORD NotEndOfPacket : 1;
DWORD : 3;
DWORD NumIndices : 16;
};
DWORD dword[2];
} GPUCOMMAND_DRAW_AUTO;
typedef union {
struct {
// DWORD 0:
DWORD PrimType : 6; // GPUPRIMTYPE
DWORD SrcSelect : 2; // Must be GPUINDEXSELECT_IMMEDIATE
DWORD MajorMode : 3;
DWORD IndexType : 1; // GPUINDEXTYPE
DWORD NotEndOfPacket : 1;
DWORD : 3;
DWORD NumIndices : 16;
// DWORD 1:
union {
WORD Index16[];
DWORD Index32[];
};
};
DWORD dword[1];
} GPUCOMMAND_DRAW_IMMEDIATE;
typedef union {
struct {
// DWORD 0:
DWORD Register : 13;
DWORD : 17;
DWORD OrImmediate : 1;
DWORD AndImmediate : 1;
// DWORD 1:
union {
struct {
DWORD AndMask : 32;
};
struct {
DWORD AndRegister : 13;
DWORD : 19;
};
};
// DWORD 2:
union {
struct {
DWORD OrMask : 32;
};
struct {
DWORD OrRegister : 13;
DWORD : 19;
};
};
};
DWORD dword[3];
} GPUCOMMAND_REG_RMW;
typedef union {
struct {
// DWORD 0:
DWORD DestinationSize : 4;
DWORD DestinationDisable : 1;
DWORD DestinationAddress : 27;
// DWORD 1:
DWORD WindowSize : 4;
DWORD WindowDisable : 1;
DWORD WindowAddress : 27;
// DWORD 2:
DWORD ValuesSize : 4;
DWORD ValuesDisable : 1;
DWORD ValuesAddress : 27;
// DWORD 3:
DWORD ProgramSize : 4;
DWORD ProgramDisable : 1;
DWORD ProgramAddress : 27;
// DWORD 4:
DWORD ControlSize : 4;
DWORD ControlDisable : 1;
DWORD ControlAddress : 27;
// DWORD 5:
DWORD TessellatorSize : 4;
DWORD TessellatorDisable : 1;
DWORD TessellatorAddress : 27;
// DWORD 6:
DWORD MiscSize : 4;
DWORD MiscDisable : 1;
DWORD MiscAddress : 27;
// DWORD 7:
DWORD PointSize : 4;
DWORD PointDisable : 1;
DWORD PointAddress : 27;
// DWORD 8:
DWORD : 4;
DWORD VertexShaderDisable : 1;
DWORD VertexShaderAddress : 27;
// DWORD 9:
DWORD VertexShaderSize : 14;
DWORD : 18;
// DWORD 10:
DWORD : 4;
DWORD PixelShaderDisable : 1;
DWORD PixelShaderAddress : 27;
// DWORD 11:
DWORD PixelShaderSize : 14;
DWORD : 18;
};
DWORD dword[10];
} GPUCOMMAND_SET_STATE;
typedef union {
struct {
// DWORD 0:
DWORD Offset : 11;
DWORD : 5;
DWORD Id : 8; // GPUCONSTANTID
DWORD : 8;
// DWORD 1:
DWORD Data[];
};
DWORD dword[1];
} GPUCOMMAND_SET_CONSTANT;
typedef union {
struct {
// DWORD 0:
DWORD : 2;
DWORD Address : 30;
// DWORD 1:
DWORD Offset : 11;
DWORD : 21;
// DWORD 2:
DWORD Size : 12;
DWORD : 20;
};
DWORD dword[3];
} GPUCOMMAND_LOAD_ALU_CONSTANT;
typedef union {
struct {
// DWORD 0:
DWORD Type : 2; // GPULOADTYPE
DWORD : 3;
DWORD Address : 27;
// DWORD 1:
DWORD Size : 14;
DWORD : 2;
DWORD Start : 12; // Unused unless GPULOADTYPE_SHARED
DWORD : 4;
};
DWORD dword[2];
} GPUCOMMAND_LOAD_SHADER;
typedef union {
struct {
DWORD Destination : 1;
DWORD Window : 1;
DWORD Values : 1;
DWORD Program : 1;
DWORD Control : 1;
DWORD Tessellator : 1;
DWORD Misc : 1;
DWORD Point : 1;
DWORD VertexShader : 1;
DWORD PixelShader : 1;
DWORD AluConstantBase : 1;
DWORD FetchConstantBase : 1;
DWORD IncrementalRegisterBase : 1;
DWORD BooleanBase : 1;
DWORD IntegerBase : 1;
DWORD : 17;
};
DWORD dword[1];
} GPUCOMMAND_INVALIDATE_STATE;
typedef union {
struct {
// DWORD 0:
DWORD Function : 3; // GPUSYNCFUNCTION
DWORD : 1;
DWORD MemSpace : 1; // GPUSYNCSPACE
DWORD : 27;
// DWORD 1:
union {
struct {
DWORD Endian : 2; // GPUENDIAN
DWORD Address : 30;
};
struct {
DWORD Register : 15;
DWORD : 17;
};
};
// DWORD 2:
DWORD Reference : 32;
// DWORD 3:
DWORD Mask : 32;
// DWORD 4:
DWORD WaitInterval : 16;
DWORD : 16;
};
DWORD dword[5];
} GPUCOMMAND_WAIT_REG_MEM;
typedef union {
struct {
// DWORD 0:
DWORD Register : 15;
DWORD : 17;
// DWORD 1:
DWORD Reference : 32;
// DWORD 2:
DWORD Mask : 32;
// DWORD 3:
DWORD WaitInterval : 16;
DWORD : 16;
};
DWORD dword[4];
} GPUCOMMAND_WAIT_REG_EQ;
typedef union {
struct {
// DWORD 0:
DWORD Register : 15;
DWORD : 17;
// DWORD 1:
DWORD Reference : 32;
// DWORD 2:
DWORD Mask : 32;
// DWORD 3:
DWORD WaitInterval : 16;
DWORD : 16;
};
DWORD dword[4];
} GPUCOMMAND_WAIT_REG_GTE;
typedef union {
struct {
// DWORD 0:
DWORD Endian : 2; // GPUENDIAN
DWORD Address : 30;
// DWORD 1:
DWORD Data : 32;
};
DWORD dword[2];
} GPUCOMMAND_MEM_WRITE;
typedef union {
struct {
// DWORD 0:
DWORD Function : 3; // GPUSYNCFUNCTION
DWORD : 1;
DWORD PollSpace : 1; // GPUSYNCSPACE
DWORD : 3;
DWORD WriteSpace : 1; // GPUSYNCSPACE
DWORD : 23;
// DWORD 1:
union {
struct {
DWORD PollEndian : 2; // GPUENDIAN
DWORD PollAddress : 30;
};
struct {
DWORD PollRegister : 15;
DWORD : 17;
};
};
// DWORD 2:
DWORD Reference : 32;
// DWORD 3:
DWORD Mask : 32;
// DWORD 4:
union {
struct {
DWORD WriteEndian : 2; // GPUENDIAN
DWORD WriteAddress : 30;
};
struct {
DWORD WriteRegister : 15;
DWORD : 17;
};
};
// DWORD 5:
DWORD Data : 32;
};
DWORD dword[6];
} GPUCOMMAND_COND_WRITE;
typedef union {
struct {
// DWORD 0:
DWORD Endian : 2; // GPUENDIAN
DWORD Address : 30;
};
DWORD dword[1];
} GPUCOMMAND_MEM_WRITE_COUNTER;
typedef union {
struct {
// DWORD 0:
DWORD Initiator : 6; // GPUINITIATOR
DWORD : 24;
DWORD SoftwareManaged : 1;
DWORD UseCounter : 1;
// DWORD 1:
DWORD Endian : 2; // GPUENDIAN
DWORD Address : 30;
// DWORD 2:
DWORD Data : 32;
};
DWORD dword[3];
} GPUCOMMAND_EVENT_WRITE;
typedef union {
struct {
// DWORD 0:
DWORD Initiator : 6; // GPUINITIATOR
DWORD : 24;
DWORD SoftwareManaged : 1;
DWORD UseCounter : 1;
// DWORD 1:
DWORD Endian : 2; // GPUENDIAN
DWORD Address : 30;
// DWORD 2:
DWORD Data : 32;
};
DWORD dword[3];
} GPUCOMMAND_EVENT_WRITE_SHADER;
typedef union {
struct {
// DWORD 0:
DWORD Initiator : 6; // GPUINITIATOR
DWORD : 25;
DWORD UseCounter : 1;
// DWORD 1:
DWORD Endian : 2; // GPUENDIAN
DWORD Address : 30;
// DWORD 2:
DWORD Data : 32;
};
DWORD dword[3];
} GPUCOMMAND_EVENT_WRITE_CACHE_FLUSH;
typedef union {
struct {
// DWORD 0:
DWORD Initiator : 6; // GPUINITIATOR
DWORD : 26;
// DWORD 1:
DWORD Endian : 2; // GPUENDIAN
DWORD Address : 30;
};
DWORD dword[2];
} GPUCOMMAND_EVENT_WRITE_SCREEN_EXTENT;
typedef union {
struct {
DWORD Initiator : 6; // GPUINITIATOR
DWORD : 26;
};
DWORD dword[1];
} GPUCOMMAND_EVENT_WRITE_ZPASS_DONE;
typedef union {
struct {
// DWORD 0:
DWORD Register : 15;
DWORD : 17;
// DWORD 1:
DWORD Endian : 2; // GPUENDIAN
DWORD Address : 30;
};
DWORD dword[2];
} GPUCOMMAND_REG_TO_MEM;
typedef union {
struct {
DWORD Unused : 32;
};
DWORD dword[1];
} GPUCOMMAND_WAIT_FOR_IDLE;
typedef union {
struct {
DWORD Cpu0 : 1;
DWORD Cpu1 : 1;
DWORD Cpu2 : 1;
DWORD Cpu3 : 1;
DWORD Cpu4 : 1;
DWORD Cpu5 : 1;
DWORD : 26;
};
DWORD dword[1];
} GPUCOMMAND_CPU_INTERRUPT;
typedef union {
struct {
DWORD Id : 6;
DWORD : 2;
DWORD End : 1;
DWORD : 23;
};
DWORD dword[1];
} GPUCOMMAND_VIZ_QUERY;
typedef union {
struct {
// DWORD 0:
DWORD PrimType : 6; // Must be GPUPRIMTYPE_RECTLIST
DWORD SrcSelect : 2; // Must be GPUINDEXSELECT_IMMEDIATE
DWORD MajorMode : 3;
DWORD IndexType : 1; // GPUINDEXTYPE
DWORD NotEndOfPacket : 1;
DWORD : 3;
DWORD NumIndices : 16;
// DWORD 1:
DWORD NumBaseIndices : 14;
DWORD : 18;
// DWORD 2:
DWORD Index32[];
};
DWORD dword[2];
} GPUCOMMAND_MPEG_INDEX;
typedef union {
struct {
DWORD Unused : 32;
};
DWORD dword[1];
} GPUCOMMAND_NOP;
typedef union {
struct {
// DWORD 0:
DWORD Address : 32;
// DWORD 1:
DWORD Size : 20;
DWORD : 11;
DWORD MultiPass : 1;
};
DWORD dword[2];
} GPUCOMMAND_INDIRECT_BUFFER;
typedef union {
struct {
// DWORD 0:
DWORD Register : 15;
DWORD : 17;
// DWORD 1:
DWORD Value : 8;
DWORD : 24;
};
DWORD dword[2];
} GPUCOMMAND_FIX_2_FLT_REG;
typedef union {
struct {
DWORD Unused : 32;
};
DWORD dword[1];
} GPUCOMMAND_CONTEXT_UPDATE;
#ifndef XAM_BUILD
typedef union {
struct {
// DWORD 0:
DWORD Low : 32;
};
DWORD dword[1];
} GPUCOMMAND_SET_BIN_MASK_LO;
typedef union {
struct {
// DWORD 0:
DWORD High : 32;
};
DWORD dword[1];
} GPUCOMMAND_SET_BIN_MASK_HI;
typedef union {
struct {
// DWORD 0:
DWORD Low : 32;
};
DWORD dword[1];
} GPUCOMMAND_SET_BIN_SELECT_LO;
typedef union {
struct {
// DWORD 0:
DWORD High : 32;
};
DWORD dword[1];
} GPUCOMMAND_SET_BIN_SELECT_HI;
#endif
//------------------------------------------------------------------------------
// GPU performce counters
//------------------------------------------------------------------------------
// GPU performance counter event select enums
typedef enum
{
GPUPE_CP_COUNT = 0, // Always Count
GPUPE_CP_RBIU_STALL = 1, // RBIU Transaction FIFO FUll
GPUPE_CP_RBIU_TAF = 2, // RBIU Transaction Almost FIFO
GPUPE_CP_PFP_STALL = 3, // PFP Transaction is Waiting for RBBM in RCIU
GPUPE_CP_RESERVED0 = 4, // Unused
GPUPE_CP_RESERVED1 = 5, // Unused
GPUPE_CP_NRT_RCIU_STALL = 6, // Transaction is Waiting for RBBM in RCIU
GPUPE_CP_Reserved2 = 7, // Unused
GPUPE_CP_NRT_MIU_STALL = 8, // CSF Fetcher Waiting on MIU
GPUPE_CP_CSF_PFP_I1_FULL = 9, // CSF PFP I1 Request FIFO is FUll
GPUPE_CP_CSF_PFP_I2_FULL = 10, // CSF PFP I2 Request FIFO is FUll
GPUPE_CP_CSF_PFP_REQ_FULL = 11, // CSF PFP State Request FIFO is FUll
GPUPE_CP_RESERVED3 = 12, // Unused
GPUPE_CP_RING_ROQ_FULL = 13, // Ring Reorder Queue is Full
GPUPE_CP_I1_ROQ_FULL = 14, // I1 Reorder Queue is Full
GPUPE_CP_I2_ROQ_FULL = 15, // I2 Reorder Queue is Full
GPUPE_CP_ST_ROQ_FULL = 16, // State Reorder Queue is Full
GPUPE_CP_RTST_ROQ_FULL = 17, // Vertex Shader Early Fetch Done
GPUPE_CP_MIU_TAG_MEM_FULL = 18, // MIU Tag Memory is Full
GPUPE_CP_MIU_WRITECLEAN = 19, // MIU WriteClean is In-Progress
GPUPE_CP_RESERVED4 = 20, // Unused
GPUPE_CP_RESERVED5 = 21, // Unused
GPUPE_CP_NRT_WRITE_STALL = 22, // Write Request Stalled by MIU Input FIFO
GPUPE_CP_NRT_READ_STALL = 23, // Read Request Stalled by MIU Input FIFO
GPUPE_CP_WC_FIFO_FULL = 24, // Write Confirm FIFO is FULL
GPUPE_CP_VTX_DEALLOC_FIFO_FULL = 25, // Vertex Shader Dealloc FIFO is FULL
GPUPE_CP_PIX_DEALLOC_FIFO_FULL = 26, // Pixel Shader Dealloc FIFO is FULL
GPUPE_CP_VTX_EVENT_FIFO_FULL = 27, // Vertex Shader Event FIFO is FULL
GPUPE_CP_PIX_EVENT_FIFO_FULL = 28, // Pixel Shader Event FIFO is FULL
GPUPE_CP_CF_EVENT_FIFO_FULL = 29, // Cache Flush Event FIFO is FULL
GPUPE_CP_ME_RB_STARVED = 30, // Micro Engine's RB Processing Starved by PFP
GPUPE_CP_ME_I1_STARVED = 31, // Micro Engine's I1 Processing Starved by PFP
GPUPE_CP_ME_I2_STARVED = 32, // Micro Engine's I2 Processing Starved by PFP
GPUPE_CP_ME_ST_STARVED = 33, // Micro Engine's ST Processing Starved by PFP
GPUPE_CP_RESERVED6 = 34, // Unused
GPUPE_CP_RESERVED7 = 35, // Unused
GPUPE_CP_RESERVED8 = 36, // Unused
GPUPE_CP_RESERVED9 = 37, // Unused
GPUPE_CP_RESERVED10 = 38, // Unused
GPUPE_CP_RESERVED11 = 39, // Unused
GPUPE_RCIU_RBBM_DWORD_SENT = 40, // RCIU is sending data to the RBBM
GPUPE_ME_PARSER_BUSY_CLOCKS = 41, // Micro Engine<6E>s Parser is Busy
GPUPE_ME_WAIT_CONTEXT_AVAIL = 42, // Micro Engine is waiting for an available context
GPUPE_PFP_TYPE0_PACKET = 43, // PFP processed a Type-0 packet
GPUPE_PFP_TYPE3_PACKET = 44, // PFP processed a Type-3 packet
GPUPE_CSF_RB_WPTR_NEQ_RPTR = 45, // The CSF has more data to fetch from the Ring Command buffer
GPUPE_CSF_I1_SIZE_NEQ_ZERO = 46, // The CSF has more data to fetch from the Indirect1 Command buffer
GPUPE_CSF_I2_SIZE_NEQ_ZERO = 47, // The CSF has more data to fetch from the Indirect2 Command buffer
GPUPE_CSF_RB_I1_I2_FETCHING = 48, // The CSF has more data to fetch from any of the Command buffers (Ring/Indirect1/Indirect2)
GPUPE_CP_RESERVED12 = 49, // Unused
GPUPE_CP_RESERVED13 = 50, // Unused
GPUPE_CP_RESERVED14 = 51, // Unused
GPUPE_CP_RESERVED15 = 52, // Unused
GPUPE_CP_RESERVED16 = 53, // Unused
GPUPE_CP_RESERVED17 = 54, // Unused
GPUPE_CP_RESERVED18 = 55, // Unused
GPUPE_CP_RESERVED19 = 56, // Unused
GPUPE_CP_RESERVED20 = 57, // Unused
GPUPE_CP_RESERVED21 = 58, // Unused
GPUPE_CP_RESERVED22 = 59, // Unused
GPUPE_CP_RESERVED23 = 60, // Unused
GPUPE_CP_RESERVED24 = 61, // Unused
GPUPE_CP_RESERVED25 = 62, // Unused
GPUPE_CP_RESERVED26 = 63, // Unused
} GPUPERFEVENT_CP;
typedef enum
{
GPUPE_RBBM_COUNT = 0, // Count Number of Clocks
GPUPE_RBBM_NRT_BUSY = 1, // Non-Real-Time Busy
GPUPE_RBBM_BC_CNTX0_BUSY = 2,
GPUPE_RBBM_BC_CNTX17_BUSY = 3,
GPUPE_RBBM_SQ_CNTX0_BUSY = 4,
GPUPE_RBBM_SQ_CNTX17_BUSY = 5,
GPUPE_RBBM_VGT_BUSY = 6,
GPUPE_RBBM_VGT_NODMA_BUSY = 7,
GPUPE_RBBM_PA_BUSY = 8,
GPUPE_RBBM_SC_CNTX0_BUSY = 9,
GPUPE_RBBM_SC_CNTX17_BUSY = 10,
GPUPE_RBBM_TPC_BUSY = 11,
GPUPE_RBBM_TC_BUSY = 12,
GPUPE_RBBM_SX_BUSY = 13,
GPUPE_RESERVED1 = 14,
GPUPE_RBBM_CP_COHER_BUSY = 15,
GPUPE_RBBM_CP_NRT_BUSY = 16,
GPUPE_RESERVED2 = 17,
GPUPE_RBBM_CP_DMA_BUSY = 18,
GPUPE_RESERVED3 = 19,
GPUPE_RESERVED4 = 20,
GPUPE_RESERVED5 = 21,
GPUPE_RBBM_DMA_IDLE_STALL = 22, // Non-RT Waiting for CP`s DMA to go Idle
GPUPE_RESERVED6 = 23,
GPUPE_RESERVED7 = 24,
GPUPE_RBBM_GFX_IDLE_STALL = 25, // Non-RT Waiting for Graphics Pipe to be Idle
GPUPE_RBBM_GFX_IDLEC_STALL = 26, // Non-RT Waiting for Graphics Pipe to be Idle and Clean
GPUPE_RBBM_INTERRUPT = 27, // Combined Interrupt Signal to the BIF
} GPUPERFEVENT_RBBM;
typedef enum
{
GPUPE_SQ_PIXEL_VECTORS_SUB = 0, // Number of pixel vectors submitted
GPUPE_SQ_VERTEX_VECTORS_SUB = 1, // Number of vertex vectors submitted
GPUPE_SQ_ALU0_ACTIVE_VTX_SIMD0 = 2, // Number of cycles SIMD0's ALU0 is executing vertex shader instructions.
GPUPE_SQ_ALU1_ACTIVE_VTX_SIMD0 = 3, // Number of cycles ALU 1 is active (vertex) for SIMD0
GPUPE_SQ_ALU0_ACTIVE_PIX_SIMD0 = 4, // Number of cycles ALU 0 is active (pixel) for SIMD0
GPUPE_SQ_ALU1_ACTIVE_PIX_SIMD0 = 5, // Number of cycles ALU 1 is active (pixel) for SIMD0
GPUPE_SQ_ALU0_ACTIVE_VTX_SIMD1 = 6, // Number of cycles ALU 0 is active (vertex) for SIMD1
GPUPE_SQ_ALU1_ACTIVE_VTX_SIMD1 = 7, // Number of cycles ALU 1 is active (vertex) for SIMD1
GPUPE_SQ_ALU0_ACTIVE_PIX_SIMD1 = 8, // Number of cycles ALU 0 is active (pixel) for SIMD1
GPUPE_SQ_ALU1_ACTIVE_PIX_SIMD1 = 9, // Number of cycles ALU 1 is active (pixel) for SIMD1
GPUPE_SQ_EXPORT_CYCLES = 10, // Number of clocks the SQ is exporting data
GPUPE_SQ_ALU_CST_WRITTEN = 11, // Number of ALU constants written from the CP for both pix/vtx
GPUPE_SQ_TEX_CST_WRITTEN = 12, // Number of texture constants written from the CP for both pix/vtx
GPUPE_SQ_ALU_CST_STALL = 13, // Number of clocks the constant memory is stalled because of ALU constant store full
GPUPE_SQ_ALU_TEX_STALL = 14, // Number of clocks the constant memory is stalled because of texture constant store full
GPUPE_SQ_INST_WRITTEN = 15, // Number of instructions written from the CP for both pix/vtx
GPUPE_SQ_BOOLEAN_WRITTEN = 16, // Number of control flow booleans written from the CP for both pix/vtx
GPUPE_SQ_LOOPS_WRITTEN = 17, // Number of control flow loops written from the CP for both pix/vtx
GPUPE_SQ_PIXEL_SWAP_IN = 18, // Number of times a pixel vector is de-activated, this should be (number of clauses) * number of threads
GPUPE_SQ_PIXEL_SWAP_OUT = 19, // Number of times a pixel vector is activated, this should be (number of clauses - 1) * number of threads
GPUPE_SQ_VERTEX_SWAP_IN = 20, // Number of times a vertex vector is de-activated, this should be (number of clauses) * number of threads
GPUPE_SQ_VERTEX_SWAP_OUT = 21, // Number of times a vertex vector is activated, this should be (number of clauses - 1) * number of threads
GPUPE_SQ_ALU_VTX_INST_ISSUED = 22, // Number of ALU instruction issued (vertex) include all SIMDS and ALU 0/1
GPUPE_SQ_TEX_VTX_INST_ISSUED = 23, // Number of Texture instruction issued (vertex)
GPUPE_SQ_VC_VTX_INST_ISSUED = 24, // Number of VC instruction issued (vertex)
GPUPE_SQ_CF_VTX_INST_ISSUED = 25, // Number of control flow instruction issued (vertex) include all resources (TP,VC,all SIMDS)
GPUPE_SQ_ALU_PIX_INST_ISSUED = 26, // Number of ALU instruction issued (pixel) include all SIMDS and ALU 0/1
GPUPE_SQ_TEX_PIX_INST_ISSUED = 27, // Number of Texture instruction issued (pixel)
GPUPE_SQ_VC_PIX_INST_ISSUED = 28, // Number of VC instruction issued (pixel)
GPUPE_SQ_CF_PIX_INST_ISSUED = 29, // Number of control flow instruction issued (pixel) include all resources (TP,VC,all SIMDS)
GPUPE_SQ_ALU0_FIFO_EMPTY_SIMD0 = 30, // aka 'SQ_ALU0_STALL_SIMD0'. Number of clocks SIMD0's ALU0 and ALU1 were both idle, when there was any pixel or vertex threads in the RS.
GPUPE_SQ_ALU1_FIFO_EMPTY_SIMD0 = 31, // Number of clocks ALU 1 FIFO was empty (busy with control flow) for SIMD0, only counts when there is a thread in any of pixel or vertex RS
GPUPE_SQ_ALU0_FIFO_EMPTY_SIMD1 = 32, // Number of clocks ALU 0 FIFO was empty (busy with control flow) for SIMD1, only counts when there is a thread in any of pixel or vertex RS
GPUPE_SQ_ALU1_FIFO_EMPTY_SIMD1 = 33, // Number of clocks ALU 1 FIFO was empty (busy with control flow) for SIMD1, only counts when there is a thread in any of pixel or vertex RS
GPUPE_SQ_ALU_NOPS = 34, // Number of ALU NOPs generated by the SQ. This counts the number of added ALU instructions because of the use of the address register immediately after the address is set
GPUPE_SQ_PRED_SKIP = 35, // Always zero
GPUPE_SQ_SYNC_ALU_STALL_SIMD0_VTX = 36, // Number of cycles all vertex threads are blocked because of synchronization (Alu SIMD0). This is all threads that want to go to Alu SIMD0 but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_SYNC_ALU_STALL_SIMD1_VTX = 37, // Number of cycles all vertex threads are blocked because of synchronization (Alu SIMD1) This is all threads that want to go to Alu SIMD1 but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_SYNC_TEX_STALL_VTX = 38, // Number of cycles all vertex threads are blocked because of synchronization (Texture) This is all threads that want to go to the TP but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_SYNC_VC_STALL_VTX = 39, // Number of cycles all vertex threads are blocked because of synchronization (VC) This is all threads that want to go to the VC but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_CONSTANTS_USED_SIMD0 = 40, // Number of ALU constants used for SIMD0. Decodes the instruction to count how many constants there are (1,2,3)
GPUPE_SQ_CONSTANTS_SENT_SP_SIMD0 = 41, // Number of ALU constants sent to the SP for SIMD0. This is the number of reads to the constant store. If greather than SQ_CONSTANTS_USED, an instruction was recirculated because of waterfaling.
GPUPE_SQ_GPR_STALL_VTX = 42, // Number of stall cycles because of GPR resource (vertex). This prevents vertexes to be sent from the VGT.
GPUPE_SQ_GPR_STALL_PIX = 43, // Number of stall cycles because of GPR resource (pixel). This prevents pixels to be sent from the SC.
GPUPE_SQ_VTX_RS_STALL = 44, // Number of stall cycles because of Reservation Station (vertex). This prevents vertexes to be sent from the VGT.
GPUPE_SQ_PIX_RS_STALL = 45, // Number of stall cycles because of Reservation Station (pixel). This prevents pixels to be sent from the SC.
GPUPE_SQ_SX_PC_FULL = 46, // Number of cycles with parameter cache preventing export of vertex vector
GPUPE_SQ_SX_EXP_BUFF_FULL = 47, // Number of cycles with export buffers preventing export
GPUPE_SQ_SX_POS_BUFF_FULL = 48, // Number of cycles with position buffers preventing export
GPUPE_SQ_INTERP_QUADS = 49, // Number of interpolated quads. This is number of quads * number of parameters.
GPUPE_SQ_INTERP_ACTIVE = 50, // Number of active interpolation cycles. Optimaly one cycle should interpolate 4 quads for 1 parameter.
GPUPE_SQ_IN_PIXEL_STALL = 51, // Number of cycles a ready to go pixel vector stalled because of port arbitration to the GPRs
GPUPE_SQ_IN_VTX_STALL = 52, // Number of cycles a ready to go vertex vector stalled because of port arbitration to the GPRs
GPUPE_SQ_VTX_CNT = 53, // Number of individual vertexes sent to the sequencer
GPUPE_SQ_VTX_16_VECTOR = 54, // Number of vertex vectors with less than 16 vertexes
GPUPE_SQ_VTX_32_VECTOR = 55, // Number of vertex vectors with less than 32 vertexes
GPUPE_SQ_VTX_48_VECTOR = 56, // Number of vertex vectors with less than 48 vertexes
GPUPE_SQ_PIXEL_16_VECTOR = 57, // Number of pixel vectors with less than 16 pixels
GPUPE_SQ_PIXEL_32_VECTOR = 58, // Number of pixel vectors with less than 32 pixels
GPUPE_SQ_PIXEL_48_VECTOR = 59, // Number of pixel vectors with less than 48 pixels
GPUPE_SQ_CONSTANTS_USED_SIMD1 = 60, // Number of ALU constants used for SIMD1. Decodes the instruction to count how many constants there are (1,2,3)
GPUPE_SQ_CONSTANTS_SENT_SP_SIMD1 = 61, // Number of ALU constants sent to the SP for SIMD1. This is the number of reads to the constant store. If greather than SQ_CONSTANTS_USED, an instruction was recirculated because of waterfaling.
GPUPE_SQ_SX_MEM_EXP_FULL = 62, // always zero.
GPUPE_SQ_ALU0_ACTIVE_VTX_SIMD2 = 63, // Number of cycles ALU 0 is active (vertex) for SIMD2
GPUPE_SQ_ALU1_ACTIVE_VTX_SIMD2 = 64, // Number of cycles ALU 1 is active (vertex) for SIMD2
GPUPE_SQ_ALU0_ACTIVE_PIX_SIMD2 = 65, // Number of cycles ALU 0 is active (pixel) for SIMD2
GPUPE_SQ_ALU1_ACTIVE_PIX_SIMD2 = 66, // Number of cycles ALU 1 is active (pixel) for SIMD2
GPUPE_SQ_ALU0_ACTIVE_VTX_SIMD3 = 67, // Number of cycles ALU 0 is active (vertex) for SIMD3
GPUPE_SQ_ALU1_ACTIVE_VTX_SIMD3 = 68, // Number of cycles ALU 1 is active (vertex) for SIMD3
GPUPE_SQ_ALU0_ACTIVE_PIX_SIMD3 = 69, // Number of cycles ALU 0 is active (pixel) for SIMD3
GPUPE_SQ_ALU1_ACTIVE_PIX_SIMD3 = 70, // Number of cycles ALU 1 is active (pixel) for SIMD3
GPUPE_SQ_ALU0_FIFO_EMPTY_SIMD2 = 71, // Number of clocks ALU 0 FIFO was empty (busy with control flow) for SIMD2, only counts when there is a thread in any of pixel or vertex RS
GPUPE_SQ_ALU1_FIFO_EMPTY_SIMD2 = 72, // Number of clocks ALU 1 FIFO was empty (busy with control flow) for SIMD2, only counts when there is a thread in any of pixel or vertex RS
GPUPE_SQ_ALU0_FIFO_EMPTY_SIMD3 = 73, // Number of clocks ALU 0 FIFO was empty (busy with control flow) for SIMD3, only counts when there is a thread in any of pixel or vertex RS
GPUPE_SQ_ALU1_FIFO_EMPTY_SIMD3 = 74, // Number of clocks ALU 1 FIFO was empty (busy with control flow) for SIMD3, only counts when there is a thread in any of pixel or vertex RS
GPUPE_SQ_SYNC_ALU_STALL_SIMD2_VTX = 75, // Number of cycles all vertex threads are blocked because of synchronization (Alu SIMD2) This is all threads that want to go to Alu SIMD2 but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_SYNC_ALU_STALL_SIMD3_VTX = 76, // Number of cycles all vertex threads are blocked because of synchronization (Alu SIMD3) This is all threads that want to go to Alu SIMD3 but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_SYNC_ALU_STALL_SIMD0_PIX = 77, // Number of cycles all pixel threads are blocked because of synchronization (Alu SIMD0) This is all threads that want to go to Alu SIMD0 but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_SYNC_ALU_STALL_SIMD1_PIX = 78, // Number of cycles all pixel threads are blocked because of synchronization (Alu SIMD1) This is all threads that want to go to Alu SIMD1 but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_SYNC_ALU_STALL_SIMD2_PIX = 79, // Number of cycles all pixel threads are blocked because of synchronization (Alu SIMD2) This is all threads that want to go to Alu SIMD2 but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_SYNC_ALU_STALL_SIMD3_PIX = 80, // Number of cycles all pixel threads are blocked because of synchronization (Alu SIMD3) This is all threads that want to go to Alu SIMD3 but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_SYNC_TEX_STALL_PIX = 81, // Number of cycles all pixel threads are blocked because of synchronization (Texture) This is all threads that want to go to the TP but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_SYNC_VC_STALL_PIX = 82, // Number of cycles all pixel threads are blocked because of synchronization (VC) This is all threads that want to go to the VC but cannot because of the VC,TP or alloc resource.
GPUPE_SQ_CONSTANTS_USED_SIMD2 = 83, // Number of ALU constants used for SIMD2. Decodes the instruction to count how many constants there are (1,2,3)
GPUPE_SQ_CONSTANTS_SENT_SP_SIMD2 = 84, // Number of ALU constants sent to the SP for SIMD2. This is the number of reads to the constant store. If greather than SQ_CONSTANTS_USED, an instruction was recirculated because of waterfaling.
GPUPE_SQ_CONSTANTS_USED_SIMD3 = 85, // Number of ALU constants used for SIMD3. Decodes the instruction to count how many constants there are (1,2,3)
GPUPE_SQ_CONSTANTS_SENT_SP_SIMD3 = 86, // Number of ALU constants sent to the SP for SIMD3. This is the number of reads to the constant store. If greather than SQ_CONSTANTS_USED, an instruction was recirculated because of waterfaling.
GPUPE_SQ_ALU0_FIFO_FULL_SIMD0 = 87, // Number of cycles ALU 0 FIFO was full for SIMD0
GPUPE_SQ_ALU1_FIFO_FULL_SIMD0 = 88, // Number of cycles ALU 1 FIFO was full for SIMD0
GPUPE_SQ_ALU0_FIFO_FULL_SIMD1 = 89, // Number of cycles ALU 0 FIFO was full for SIMD1
GPUPE_SQ_ALU1_FIFO_FULL_SIMD1 = 90, // Number of cycles ALU 1 FIFO was full for SIMD1
GPUPE_SQ_ALU0_FIFO_FULL_SIMD2 = 91, // Number of cycles ALU 0 FIFO was full for SIMD2
GPUPE_SQ_ALU1_FIFO_FULL_SIMD2 = 92, // Number of cycles ALU 1 FIFO was full for SIMD2
GPUPE_SQ_ALU0_FIFO_FULL_SIMD3 = 93, // Number of cycles ALU 0 FIFO was full for SIMD3
GPUPE_SQ_ALU1_FIFO_FULL_SIMD3 = 94, // Number of cycles ALU 1 FIFO was full for SIMD3
GPUPE_VC_PERF_STATIC = 95, // Number of cycles the VC is not ready to receive anything and the SQ is not ready to send anything
GPUPE_VC_PERF_STALLED = 96, // Number of cycles the VC is not ready to receive anything but the SQ is ready to send something
GPUPE_VC_PERF_STARVED = 97, // Number of cycles the VC is ready to receive something but the SQ is not ready to send anything
GPUPE_VC_PERF_SEND = 98, // Number of cycles the VC is ready to receive something and the SQ is is ready to send something
GPUPE_VC_PERF_ACTUAL_STARVED = 99, // Number of cycles the VC is starved
GPUPE_PIXEL_THREAD_0_ACTIVE = 100, // Number of cycles the thread 0 of pixel is active
GPUPE_VERTEX_THREAD_0_ACTIVE = 101, // Number of cycles the thread 0 of vertex is active
GPUPE_PIXEL_THREAD_0_NUMBER = 102, // Number of times the thread 0 of pixel is active
GPUPE_VERTEX_THREAD_0_NUMBER = 103, // Number of times the thread 0 of vertex is active
GPUPE_VERTEX_EVENT_NUMBER = 104, // Number of events sent in the vertex thread buffer
GPUPE_PIXEL_EVENT_NUMBER = 105, // Number of events sent in the pixel thread buffer
} GPUPERFEVENT_SQ;
typedef enum
{
GPUPE_VGT_SQ_EVENT_WINDOW_ACTIVE = 0,
GPUPE_VGT_SQ_SEND = 1,
GPUPE_VGT_SQ_STALLED = 2,
GPUPE_VGT_SQ_STARVED_BUSY = 3,
GPUPE_VGT_SQ_STARVED_IDLE = 4,
GPUPE_VGT_SQ_STATIC = 5,
GPUPE_VGT_PA_EVENT_WINDOW_ACTIVE = 6,
GPUPE_VGT_PA_CLIP_V_SEND = 7,
GPUPE_VGT_PA_CLIP_V_STALLED = 8,
GPUPE_VGT_PA_CLIP_V_STARVED_BUSY = 9,
GPUPE_VGT_PA_CLIP_V_STARVED_IDLE = 10,
GPUPE_VGT_PA_CLIP_V_STATIC = 11,
GPUPE_VGT_PA_CLIP_P_SEND = 12,
GPUPE_VGT_PA_CLIP_P_STALLED = 13,
GPUPE_VGT_PA_CLIP_P_STARVED_BUSY = 14,
GPUPE_VGT_PA_CLIP_P_STARVED_IDLE = 15,
GPUPE_VGT_PA_CLIP_P_STATIC = 16,
GPUPE_VGT_PA_CLIP_S_SEND = 17,
GPUPE_VGT_PA_CLIP_S_STALLED = 18,
GPUPE_VGT_PA_CLIP_S_STARVED_BUSY = 19,
GPUPE_VGT_PA_CLIP_S_STARVED_IDLE = 20,
GPUPE_VGT_PA_CLIP_S_STATIC = 21,
GPUPE_RBIU_FIFOS_EVENT_WINDOW_ACTIVE = 22,
GPUPE_RBIU_IMMED_DATA_FIFO_STARVED = 23,
GPUPE_RBIU_IMMED_DATA_FIFO_STALLED = 24,
GPUPE_RBIU_DMA_REQUEST_FIFO_STARVED = 25,
GPUPE_RBIU_DMA_REQUEST_FIFO_STALLED = 26,
GPUPE_RBIU_DRAW_INITIATOR_FIFO_STARVED = 27,
GPUPE_RBIU_DRAW_INITIATOR_FIFO_STALLED = 28,
GPUPE_SPARE29 = 29,
GPUPE_SPARE30 = 30,
GPUPE_SPARE31 = 31,
GPUPE_SPARE32 = 32,
GPUPE_SPARE33 = 33,
GPUPE_SPARE34 = 34,
GPUPE_SPARE35 = 35,
GPUPE_SPARE36 = 36,
GPUPE_SPARE37 = 37,
GPUPE_SPARE38 = 38,
GPUPE_SPARE39 = 39,
GPUPE_TE_SU_IN_VALID = 40,
GPUPE_TE_SU_IN_READ = 41,
GPUPE_TE_SU_IN_PRIM = 42,
GPUPE_TE_SU_IN_EOP = 43,
GPUPE_TE_SU_IN_NULL_PRIM = 44,
GPUPE_TE_WK_IN_VALID = 45,
GPUPE_TE_WK_IN_READ = 46,
GPUPE_TE_OUT_PRIM_VALID = 47,
GPUPE_TE_OUT_PRIM_READ = 48,
} GPUPERFEVENT_VGT;
typedef enum
{
GPUPE_RG_VERTICES = 0, // Number of vertices processed by RG
GPUPE_RG_CLAMPED = 1, // Number of vertices clamped by RG
GPUPE_RG_L2_REQUEST = 2, // Count of L2 Requests created by RG
GPUPE_RG_L1_REQUEST = 3, // Count of L1 Requests created by RG
GPUPE_RG_MEGAFETCH = 4, // Count of megafetches processed by RG
GPUPE_RG_END_OF_GROUP = 5, // Count of end_of_group signals received from SQ
GPUPE_RG_CONFLICT = 6, // Number of vertex pairs that conflict in the L2
GPUPE_RG_DWORDS_REQUESTED = 7, // Number of dword requests created by RG
GPUPE_RG_SPARE0 = 8, // Reserved for RG
GPUPE_RG_SPARE1 = 9, // Reserved for RG
GPUPE_CC_STALLS = 10, // Number of clocks CC stalled due to memory latency
GPUPE_CC_HITS = 11, // L2 Requests that resulted in a cache hit
GPUPE_CC_MISSES = 12, // L2 Requests that resulted in a cache miss
GPUPE_CC_SECTOR_MISSES = 13, // L2 Requests that resulted in a sector miss
GPUPE_CC_L2B_STALLS = 14, // CC stalled due to L2B FIFO not RTR
GPUPE_CC_MI_STALLS = 15, // CC stalled due to MI not RTR
GPUPE_CC_MULTICYCLE_STALLS = 16, // CC stalled due to multicycle
GPUPE_CC_EVEN_ALLOC_STALLS = 17, // CC stalled due to even allocation stall
GPUPE_CC_ODD_ALLOC_STALLS = 18, // CC stalled due to odd allocation stall
GPUPE_CC_EVEN_BUSY_STALLS = 19, // CC stalled due to even counter busy
GPUPE_CC_ODD_BUSY_STALLS = 20, // CC stalled due to odd counter busy
GPUPE_CC_IN_FIFO_EMPTY = 21, // CC input FIFO is empty
GPUPE_CC_IN_FIFO_FULL = 22, // CC input FIFO is full
GPUPE_CC_FREEZE = 23, // CC frozen due to one of the stall conditions
GPUPE_CC_SPARE0 = 24, // Reserved for CC
GPUPE_CC_SPARE1 = 25, // Reserved for CC
GPUPE_CC_SPARE2 = 26, // Reserved for CC
GPUPE_CC_SPARE3 = 27, // Reserved for CC
GPUPE_CC_SPARE4 = 28, // Reserved for CC
GPUPE_CC_SPARE5 = 29, // Reserved for CC
GPUPE_MI_REQUESTS_TO_MH = 30, // Number of memory requests issued to memory hub
GPUPE_MI_AGP_REQUESTS = 31, // Number of AGP memory requests issued
GPUPE_MI_LATENCY_BITS_4_0 = 32, // 4:0 of the latency count
GPUPE_MI_LATENCY_BITS_9_5 = 33, // Bits 9:5 of the latency count
GPUPE_MI_LATENCY_BITS_14_10 = 34, // Bits 14:10 of the latency count
GPUPE_MI_LATENCY_BITS_17_15 = 35, // Bits 17:15 of the latency count
GPUPE_MI_INPUT_FIFOS_FULL = 36, // Number of cycles any of the four Input FIFOs to the MI requestor are full
GPUPE_MI_INPUT_FIFOS_0_FULL = 37, // Number of cycles any of the two Input FIFOs on request path 0 to the MI requestor are full
GPUPE_MI_INPUT_FIFOS_1_FULL = 38, // Number of cycles any of the two Input FIFOs on request path 1 to the MI requestor are full
GPUPE_MI_SPARE1 = 39, // Reserved for MI
GPUPE_RP_SP_DATA_VALID = 40, // Number of clocks of valid data returned to SP
GPUPE_RP_STALLED = 41, // Number of clocks RP stalled due to TC use of phase
GPUPE_RP_SPARE0 = 42, // Reserved for RP
GPUPE_RP_SPARE1 = 43, // Reserved for RP
GPUPE_RP_SPARE2 = 44, // Reserved for RP
GPUPE_RP_SPARE3 = 45, // Reserved for RP
GPUPE_RP_SPARE4 = 46, // Reserved for RP
GPUPE_RP_SPARE5 = 47, // Reserved for RP
GPUPE_RP_SPARE6 = 48, // Reserved for RP
GPUPE_RP_SPARE7 = 49, // Reserved for RP
GPUPE_DC_NUM_VALIDS = 50, // Number of valid vectors returned to the SP
GPUPE_DC_SPARE0 = 51, // Reserved for DC
GPUPE_DC_SPARE1 = 52, // Reserved for DC
GPUPE_DC_SPARE2 = 53, // Reserved for DC
GPUPE_DC_SPARE3 = 54, // Reserved for DC
GPUPE_DC_SPARE4 = 55, // Reserved for DC
GPUPE_DC_SPARE5 = 56, // Reserved for DC
GPUPE_DC_SPARE6 = 57, // Reserved for DC
GPUPE_DC_SPARE7 = 58, // Reserved for DC
GPUPE_DC_SPARE8 = 59, // Reserved for DC
GPUPE_SQ_VC_SEND = 60, // Number of clocks the SQ is passing data to the VC
GPUPE_VC_STARVED_IDLE = 61, // Number of clock cycles the VC is idle and waiting for more data from the SQ
GPUPE_VC_BUSY = 62, // Number of clock cycles the VC is busy processing data
GPUPE_VC_IDLE = 63, // Number of clock cycles the VC is idle
GPUPE_VC_SPARE0 = 64, // Reserved for VC
GPUPE_VC_SPARE1 = 65, // Reserved for VC
GPUPE_VC_SPARE2 = 66, // Reserved for VC
GPUPE_VC_SPARE3 = 67, // Reserved for VC
GPUPE_VC_SPARE4 = 68, // Reserved for VC
GPUPE_VC_SPARE5 = 69, // Reserved for VC
} GPUPERFEVENT_VC;
typedef enum
{
GPUPE_PERF_PAPC_PASX_REQ = 0, // Number of PA->SX requests
GPUPE_PERF_PAPC_PASX_DISABLE_PIPE = 1, // Number of transfers lost due to disabled pipe
GPUPE_PERF_PAPC_PASX_FIRST_VECTOR = 2, // Number of First Vectors from SX to PA
GPUPE_PERF_PAPC_PASX_SECOND_VECTOR = 3, // Number of Second Vectors from SX to PA
GPUPE_PERF_PAPC_PASX_FIRST_DEAD = 4, // Number of Unused First Vectors (due to granularity of 4)
GPUPE_PERF_PAPC_PASX_SECOND_DEAD = 5, // Number of Unused Second Vectors (due to granularity of 4)
GPUPE_PERF_PAPC_PASX_VTX_KILL_DISCARD = 6, // Number of vertices which have VTX KILL Enabled and Set
GPUPE_PERF_PAPC_PASX_VTX_NAN_DISCARD = 7, // Number ov vertices which have NaN and corresponding NaN discard
GPUPE_PERF_PAPC_PA_INPUT_PRIM = 8, // Number of Primitives input to PA
GPUPE_PERF_PAPC_PA_INPUT_NULL_PRIM = 9, // Number of Null Primitives input to PA
GPUPE_PERF_PAPC_PA_INPUT_EVENT_FLAG = 10, // Number of Events input to PA
GPUPE_PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT = 11, // Number of First-Prim-Of-Slots input to PA
GPUPE_PERF_PAPC_PA_INPUT_END_OF_PACKET = 12, // Number of End-Of-Packets input to PA
GPUPE_PERF_PAPC_CLPR_CULL_PRIM = 13, // Number of Prims Culled by Clipper for VV, UCP, VTX_KILL, VTX_NAN
GPUPE_PERF_PAPC_CLPR_VVUCP_CULL_PRIM = 14, // Number of Prims Culled by Clipper for VV and UCP
GPUPE_PERF_PAPC_CLPR_VV_CULL_PRIM = 15, // Number of Prims Culled by Clipper for VV
GPUPE_PERF_PAPC_CLPR_UCP_CULL_PRIM = 16, // Number of Prims Culled by Clipper for UCP
GPUPE_PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM = 17, // Number of Prims Culled by Clipper for VTX_KILL
GPUPE_PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM = 18, // Number of Prims Culled by Clipper for VTX_NAN
GPUPE_PERF_PAPC_CLPR_CULL_TO_NULL_PRIM = 19, // Number of Clipper Culled Prims Retained for Pipe Info
GPUPE_PERF_PAPC_CLPR_VVUCP_CLIP_PRIM = 20, // Number of Prims Clipped by Clipper for VV and/or UCP
GPUPE_PERF_PAPC_CLPR_VV_CLIP_PRIM = 21, // Number of Prims Clipped by Clipper for VV
GPUPE_PERF_PAPC_CLPR_UCP_CLIP_PRIM = 22, // Number of Prims Clipped by Clipper for UCP
GPUPE_PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE = 23, // Number of Points which require detailed clip checked
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_CNT_1 = 24, // Number of Prims with 1 Clip Plane Intersection (includes VV and UCP)
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_CNT_2 = 25, // Number of Prims with 2 Clip Plane Intersections (includes VV and UCP)
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_CNT_3 = 26, // Number of Prims with 3 Clip Plane Intersections (includes VV and UCP)
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_CNT_4 = 27, // Number of Prims with 4 Clip Plane Intersections (includes VV and UCP)
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_CNT_5_8 = 28, // Number of Prims with 5-8 Clip Plane Intersections (includes VV and UCP)
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_CNT_9_12 = 29, // Number of Prims with 9-12 Clip Plane Intersections (includes VV and UCP)
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_NEAR = 30, // Number of Prims which intersect the NEAR VV Plane
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_FAR = 31, // Number of Prims which intersect the FAR VV Plane
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_LEFT = 32, // Number of Prims which intersect the LEFT VV Plane
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_RIGHT = 33, // Number of Prims which intersect the RIGHT VV Plane
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_TOP = 34, // Number of Prims which intersect the TOP VV Plane
GPUPE_PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM = 35, // Number of Prims which intersect the BOTTOM VV Plane
GPUPE_PERF_PAPC_CLSM_NULL_PRIM = 36, // Number of null primitives at Clip State Machine pipe stage
GPUPE_PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM = 37, // Number of totally visible (no-clipping) prims
GPUPE_PERF_PAPC_CLSM_CLIP_PRIM = 38, // UNUSED
GPUPE_PERF_PAPC_CLSM_CULL_TO_NULL_PRIM = 39, // Number of primitives which are culled during clip process
GPUPE_PERF_PAPC_CLSM_OUT_PRIM_CNT_1 = 40, // Number of primitives which were clipped and result in 1 primitive
GPUPE_PERF_PAPC_CLSM_OUT_PRIM_CNT_2 = 41, // Number of primitives which were clipped and result in 2 primitives
GPUPE_PERF_PAPC_CLSM_OUT_PRIM_CNT_3 = 42, // Number of primitives which were clipped and result in 3 primitives
GPUPE_PERF_PAPC_CLSM_OUT_PRIM_CNT_4 = 43, // Number of primitives which were clipped and result in 4 primitives
GPUPE_PERF_PAPC_CLSM_OUT_PRIM_CNT_5_8 = 44, // Number of primitives which were clipped and result in 5-8 primitives
GPUPE_PERF_PAPC_CLSM_OUT_PRIM_CNT_9_13 = 45, // Number of primitives which were clipped and result in 9-13 primitives
GPUPE_PERF_PAPC_CLSM_NON_TRIVIAL_CULL = 46, // UNUSED
GPUPE_PERF_PAPC_SU_INPUT_PRIM = 47, // Number of primitives input to the Setup block
GPUPE_PERF_PAPC_SU_INPUT_CLIP_PRIM = 48, // Number of clipped primitives input to the Setup block
GPUPE_PERF_PAPC_SU_INPUT_NULL_PRIM = 49, // Number of null primitives input to the Setup block
GPUPE_PERF_PAPC_SU_ZERO_AREA_CULL_PRIM = 50, // Number of primitives culled due to zero area
GPUPE_PERF_PAPC_SU_BACK_FACE_CULL_PRIM = 51, // Number of back-face primitives culled due to facedness
GPUPE_PERF_PAPC_SU_FRONT_FACE_CULL_PRIM = 52, // Number of front-face primitives culled due to facedness
GPUPE_PERF_PAPC_SU_POLYMODE_FACE_CULL = 53, // Number of polymode cull-determination primitives culled
GPUPE_PERF_PAPC_SU_POLYMODE_BACK_CULL = 54, // Number of polymode primitives discarded due to Back-Face Cull
GPUPE_PERF_PAPC_SU_POLYMODE_FRONT_CULL = 55, // Number of polymode primitives discarded due to Front-Face Cull
GPUPE_PERF_PAPC_SU_POLYMODE_INVALID_FILL = 56, // Number of polymode lines and/or points which are culled because they are an internal edge or point
GPUPE_PERF_PAPC_SU_OUTPUT_PRIM = 57, // Number of primitives output from the Setup block
GPUPE_PERF_PAPC_SU_OUTPUT_CLIP_PRIM = 58, // Number of clipped primitives output from the Setup block
GPUPE_PERF_PAPC_SU_OUTPUT_NULL_PRIM = 59, // Number of null primitives output from the Setup block
GPUPE_PERF_PAPC_SU_OUTPUT_EVENT_FLAG = 60, // Number of events output from the Setup block
GPUPE_PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT = 61, // Number of First-Prim-Of-Slots output from the Setup block
GPUPE_PERF_PAPC_SU_OUTPUT_END_OF_PACKET = 62, // Number of End-Of-Packets output from the Setup block
GPUPE_PERF_PAPC_SU_OUTPUT_POLYMODE_FACE = 63, // Number of polymode facing primitives output from the Setup block
GPUPE_PERF_PAPC_SU_OUTPUT_POLYMODE_BACK = 64, // Number of polymode back-face primitives output from the Setup block
GPUPE_PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT = 65, // Number of polymode front-face primitives output from the Setup block
GPUPE_PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE = 66, // Number of clipped polymode facing primitives output from the Setup block
GPUPE_PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK = 67, // Number of clipped polymode back-face primitives output from the Setup block
GPUPE_PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT = 68, // Number of clipped polymode front-face primitives output from the Setup block
GPUPE_PERF_PAPC_PASX_REQ_IDLE = 69, // Number of clocks PASX Requestor is Idle
GPUPE_PERF_PAPC_PASX_REQ_BUSY = 70, // Number of clocks PASX Requestor is Busy
GPUPE_PERF_PAPC_PASX_REQ_STALLED = 71, // Number of clocks PASX Requestor is Stalled
GPUPE_PERF_PAPC_PASX_REC_IDLE = 72, // Number of clocks PASX Receiver is Idle
GPUPE_PERF_PAPC_PASX_REC_BUSY = 73, // Number of clocks PASX Receiver is Busy
GPUPE_PERF_PAPC_PASX_REC_STARVED_SX = 74, // Number of clocks PASX Receiver is Stalled by SX
GPUPE_PERF_PAPC_PASX_REC_STALLED = 75, // Number of clocks PASX Reciever is Stalled by Position Memory or Clip Code Generator
GPUPE_PERF_PAPC_PASX_REC_STALLED_POS_MEM = 76, // Number of clocks PASX Reciever is Stalled by Position Memory
GPUPE_PERF_PAPC_PASX_REC_STALLED_CCGSM_IN = 77, // Number of clocks PASX Reciever is Stalled by Clip Code Generator
GPUPE_PERF_PAPC_CCGSM_IDLE = 78, // Number of clocks Clip Code Gen is Idle
GPUPE_PERF_PAPC_CCGSM_BUSY = 79, // Number of clocks Clip Code Gen is Busy
GPUPE_PERF_PAPC_CCGSM_STALLED = 80, // Number of clocks Clip Code Gen is Stalled
GPUPE_PERF_PAPC_CLPRIM_IDLE = 81, // Number of clocks Clip Primitive Machine is Idle
GPUPE_PERF_PAPC_CLPRIM_BUSY = 82, // Number of clocks Clip Primitive Machine is Busy
GPUPE_PERF_PAPC_CLPRIM_STALLED = 83, // Number of clocks Clip Primitive Machine is stalled by Clip State Machines
GPUPE_PERF_PAPC_CLPRIM_STARVED_CCGSM = 84, // Number of clocks Clip Primitive Machine is starved by Clip Code Generator
GPUPE_PERF_PAPC_CLIPSM_IDLE = 85, // Number of clocks Clip State Machines are Idle
GPUPE_PERF_PAPC_CLIPSM_BUSY = 86, // Number of clocks Clip State Machines are Busy
GPUPE_PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH = 87, // Number of clocks Clip State Mahcines are waiting for Clip Vert storage resources
GPUPE_PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ = 88, // Number of clocks Clip State Machines are waiting for High Priority Sequencer
GPUPE_PERF_PAPC_CLIPSM_WAIT_CLIPGA = 89, // Number of clocks Clip State Machines are waiting for ClipGA
GPUPE_PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP = 90, // Number of clocks Clip State Machines are waiting for VTE cycles
GPUPE_PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM = 91, // Number of clocks Clip State Machines are waiting for Clip Output State Machine
GPUPE_PERF_PAPC_CLIPGA_IDLE = 92, // Number of clocks Clip Ga is Idle
GPUPE_PERF_PAPC_CLIPGA_BUSY = 93, // Number of clocks Clip Ga is Busy
GPUPE_PERF_PAPC_CLIPGA_STARVED_VTE_CLIP = 94, // Number of clocks Clip Ga is Starved by VTE or Clipper
GPUPE_PERF_PAPC_CLIPGA_STALLED = 95, // Number of clocks Clip Ga is stalled
GPUPE_PERF_PAPC_CLIP_IDLE = 96, // Number of clocks Clip is Idle
GPUPE_PERF_PAPC_CLIP_BUSY = 97, // Number of clocks Clip is Busy
GPUPE_PERF_PAPC_SU_IDLE = 98, // Number of clocks Setup is Idle
GPUPE_PERF_PAPC_SU_BUSY = 99, // Number of clocks Setup is Busy
GPUPE_PERF_PAPC_SU_STARVED_CLIP = 100, // Number of clocks Setup is starved by Clipper
GPUPE_PERF_PAPC_SU_STALLED_SC = 101, // Number of clocks Setup is stalled by SC
} GPUPERFEVENT_PA_SU;
typedef enum
{
GPUPE_SC_SR_WINDOW_VALID = 0, // Number of clocks event-window is valid at stage_reg
GPUPE_SC_CW_WINDOW_VALID = 1, // Number of clocks event-window is valid at coarse_walker
GPUPE_SC_QM_WINDOW_VALID = 2, // Number of clocks event-window is valid at quadmask
GPUPE_SC_QPP_WINDOW_VALID = 3, // Number of clocks event-window is valid at quad pair proc
GPUPE_SC_ITER_WINDOW_VALID = 4, // Number of clocks event-window is valid at iter
GPUPE_SC_STARVED_BY_PA = 5, // sc_rtr and not pa_rts and sc_busy
GPUPE_SC_STARVED_BY_RCC = 6, // sc_tile_fifo full, rcc z return fifo empty
GPUPE_SC_STALLED_BY_PRIM_FF = 7, // sc primitive fifo full is causing a stall
GPUPE_SC_STALLED_BY_RCC = 8, // sc_rts and not rcc_rtr
GPUPE_SC_STALLED_BY_BC = 9, // sc_rts and not bc_rtr
GPUPE_SC_STALLED_BY_SX = 10, // sc_rts and not (sx0_rtr and sx1_rtr)
GPUPE_SC_STALLED_BY_SX0 = 11, // sc_rts and not sx0_rtr
GPUPE_SC_STALLED_BY_SX1 = 12, // sc_rts and not sx1_rtr
GPUPE_SC_STALLED_BY_SQ = 13, // sc_sq count >= max
GPUPE_SC_STALLED_BY_SP = 14, // sc_sp count >= max
GPUPE_SC_WAIT_FOR_R1 = 15, // qpp has a single quad and is waiting for another tile for grouping
GPUPE_SC_SCISSOR_DISCARD = 16, // prim completely discarded by scissor
GPUPE_SC_BB_DISCARD = 17, // prim discarded by bounding-box check, no pixels hit
GPUPE_SC_SUPERT_PRIM_DISCARD = 18, // prim completely discarded by super_tile optimization
GPUPE_SC_RT_PRIM = 19, // real-time prim count
GPUPE_SC_TILE_VALID = 20, // tile count
GPUPE_SC_TILE_PER_PRIM_H0 = 21, // prims with < 2 tiles
GPUPE_SC_TILE_PER_PRIM_H1 = 22, // prims with < 4 tiles
GPUPE_SC_TILE_PER_PRIM_H2 = 23, // prims with < 8 tiles
GPUPE_SC_TILE_PER_PRIM_H3 = 24, // prims with < 16 tiles
GPUPE_SC_TILE_PER_PRIM_H4 = 25, // prims with < 32 tiles
GPUPE_SC_TILE_PER_PRIM_H5 = 26, // prims with < 64 tiles
GPUPE_SC_TILE_PER_PRIM_H6 = 27, // prims with < 128 tiles
GPUPE_SC_TILE_PER_PRIM_H7 = 28, // prims with < 256 tiles
GPUPE_SC_TILE_PER_PRIM_H8 = 29, // prims with < 512 tiles
GPUPE_SC_TILE_PER_PRIM_H9 = 30, // prims with < 1K tiles
GPUPE_SC_TILE_PER_PRIM_H10 = 31, // prims with < 2K tiles
GPUPE_SC_TILE_PER_PRIM_H11 = 32, // prims with < 4K tiles
GPUPE_SC_TILE_PER_PRIM_H12 = 33, // prims with < 8K tiles
GPUPE_SC_TILE_PER_PRIM_H13 = 34, // prims with < 16K tiles
GPUPE_SC_TILE_PER_PRIM_H14 = 35, // prims with < 32K tiles
GPUPE_SC_TILE_PER_PRIM_H15 = 36, // prims with < 64K tiles
GPUPE_SC_TILE_PER_PRIM_H16 = 37, // prims with < 1M tiles
GPUPE_SC_SUPERT_TILE_DISCARD = 38, // tiles discarded by super_tile optimization
GPUPE_SC_QM_NUM_QUADS = 39, // total quads hit by coarsewalk
GPUPE_SC_QM_MASK_H0 = 40, // tiles walked with 0 quads hit
GPUPE_SC_QM_MASK_H1 = 41, // tiles walked with 1 quads hit
GPUPE_SC_QM_MASK_H2 = 42, // tiles walked with 2 quads hit
GPUPE_SC_QM_MASK_H3 = 43, // tiles walked with 3 quads hit
GPUPE_SC_QM_MASK_H4 = 44, // tiles walked with 4 quads hit
GPUPE_SC_QM_MASK_H5 = 45, // tiles walked with 5 quads hit
GPUPE_SC_QM_MASK_H6 = 46, // tiles walked with 6 quads hit
GPUPE_SC_QM_MASK_H7 = 47, // tiles walked with 7 quads hit
GPUPE_SC_QM_MASK_H8 = 48, // tiles walked with 8 quads hit
GPUPE_SC_QM_MASK_H9 = 49, // tiles walked with 9 quads hit
GPUPE_SC_QM_MASK_H10 = 50, // tiles walked with 10 quads hit
GPUPE_SC_QM_MASK_H11 = 51, // tiles walked with 11 quads hit
GPUPE_SC_QM_MASK_H12 = 52, // tiles walked with 12 quads hit
GPUPE_SC_QM_MASK_H13 = 53, // tiles walked with 13 quads hit
GPUPE_SC_QM_MASK_H14 = 54, // tiles walked with 14 quads hit
GPUPE_SC_QM_MASK_H15 = 55, // tiles walked with 15 quads hit
GPUPE_SC_QM_MASK_H16 = 56, // tiles walked with 16 quads hit
GPUPE_SC_QM_COVERED_H0 = 57, // tiles walked with 0 quads covered
GPUPE_SC_QM_COVERED_H1 = 58, // tiles walked with 1 quads covered
GPUPE_SC_QM_COVERED_H2 = 59, // tiles walked with 2 quads covered
GPUPE_SC_QM_COVERED_H3 = 60, // tiles walked with 3 quads covered
GPUPE_SC_QM_COVERED_H4 = 61, // tiles walked with 4 quads covered
GPUPE_SC_QM_COVERED_H5 = 62, // tiles walked with 5 quads covered
GPUPE_SC_QM_COVERED_H6 = 63, // tiles walked with 6 quads covered
GPUPE_SC_QM_COVERED_H7 = 64, // tiles walked with 7 quads covered
GPUPE_SC_QM_COVERED_H8 = 65, // tiles walked with 8 quads covered
GPUPE_SC_QM_COVERED_H9 = 66, // tiles walked with 9 quads covered
GPUPE_SC_QM_COVERED_H10 = 67, // tiles walked with 10 quads covered
GPUPE_SC_QM_COVERED_H11 = 68, // tiles walked with 11 quads covered
GPUPE_SC_QM_COVERED_H12 = 69, // tiles walked with 12 quads covered
GPUPE_SC_QM_COVERED_H13 = 70, // tiles walked with 13 quads covered
GPUPE_SC_QM_COVERED_H14 = 71, // tiles walked with 14 quads covered
GPUPE_SC_QM_COVERED_H15 = 72, // tiles walked with 15 quads covered
GPUPE_SC_QM_COVERED_H16 = 73, // tiles walked with 16 quads covered
GPUPE_SC_HIER_NUM_QUADS = 74, // total quads surviving z
GPUPE_SC_HIER_MASK_H0 = 75, // tiles with 0 quads surviving z
GPUPE_SC_HIER_MASK_H1 = 76, // tiles with 1 quads surviving z
GPUPE_SC_HIER_MASK_H2 = 77, // tiles with 2 quads surviving z
GPUPE_SC_HIER_MASK_H3 = 78, // tiles with 3 quads surviving z
GPUPE_SC_HIER_MASK_H4 = 79, // tiles with 4 quads surviving z
GPUPE_SC_HIER_MASK_H5 = 80, // tiles with 5 quads surviving z
GPUPE_SC_HIER_MASK_H6 = 81, // tiles with 6 quads surviving z
GPUPE_SC_HIER_MASK_H7 = 82, // tiles with 7 quads surviving z
GPUPE_SC_HIER_MASK_H8 = 83, // tiles with 8 quads surviving z
GPUPE_SC_HIER_MASK_H9 = 84, // tiles with 9 quads surviving z
GPUPE_SC_HIER_MASK_H10 = 85, // tiles with 10 quads surviving z
GPUPE_SC_HIER_MASK_H11 = 86, // tiles with 11 quads surviving z
GPUPE_SC_HIER_MASK_H12 = 87, // tiles with 12 quads surviving z
GPUPE_SC_HIER_MASK_H13 = 88, // tiles with 13 quads surviving z
GPUPE_SC_HIER_MASK_H14 = 89, // tiles with 14 quads surviving z
GPUPE_SC_HIER_MASK_H15 = 90, // tiles with 15 quads surviving z
GPUPE_SC_HIER_MASK_H16 = 91, // tiles with 16 quads surviving z
GPUPE_SC_DET_NUM_QUADS = 92, // total quads surviving detail sampler
GPUPE_SC_PKR_QD_PER_ROW_H1 = 93, // packer row outputs with 1 valid quad
GPUPE_SC_PKR_QD_PER_ROW_H2 = 94, // packer row outputs with 2 valid quad
GPUPE_SC_PKR_QD_PER_ROW_H3 = 95, // packer row outputs with 3 valid quad
GPUPE_SC_PKR_QD_PER_ROW_H4 = 96, // packer row outputs with 4 valid quad
GPUPE_SC_PKR_END_OF_VECTOR = 97, // number of pixel vectors
GPUPE_SC_PKR_ONE_CLK = 98, // number of one clock commands
GPUPE_SC_QD_WITH_1_PIX = 99, // quads with 1 pixel surviving detail
GPUPE_SC_QD_WITH_2_PIX = 100, // quads with 2 pixels surviving detail
GPUPE_SC_QD_WITH_3_PIX = 101, // quads with 3 pixels surviving detail
GPUPE_SC_QD_WITH_4_PIX = 102, // quads with 4 pixels surviving detail
GPUPE_SC_SR_WINDOW_VALID_BUSY = 103, // Event-window is valid at stage_reg with sc busy
GPUPE_SC_CW_WINDOW_VALID_BUSY = 104, // Event-window is valid at coarse_walker with sc busy
GPUPE_SC_QM_WINDOW_VALID_BUSY = 105, // Event-window is valid at quadmask with sc busy
GPUPE_SC_QPP_WINDOW_VALID_BUSY = 106, // Event-window is valid at quad pair proc with sc busy
GPUPE_SC_ITER_WINDOW_VALID_BUSY = 107, // Event-window is valid at iter with sc busy
} GPUPERFEVENT_PA_SC;
typedef enum
{
GPUPE_WRC_1VAL_QUADS = 0, // Number of writes to hz_mem with 1 valid quad
GPUPE_WRC_2VAL_QUADS = 1, // Number of writes to hz_mem with 2 valid quads
GPUPE_WRC_3VAL_QUADS = 2, // Number of writes to hz_mem with 3 valid quads
GPUPE_WRC_4VAL_QUADS = 3, // Number of writes to hz_mem with 4 valid quads
GPUPE_HZ_WR_BUSY = 4, // Modules on the write-side of HZ memory are busy
GPUPE_HZ_SPARE0 = 5, // Reserved for HZ
GPUPE_RDC_TILE_HIT = 6, // Number of tile hits in the Read Cache
GPUPE_RDC_STAGE3_STALL = 7, // Number of conflicts/stalls in Read Stage 3
GPUPE_HZ_SPARE1 = 8, // Reserved for HZ
GPUPE_HZ_SPARE2 = 9, // Reserved for HZ
GPUPE_QUADS_KEPT = 10, // Number of quads kept
GPUPE_QUADS_ZCULL = 11, // Number of quads culled due to Z only
GPUPE_QUADS_SCULL = 12, // Number of quads culled due to Stencil only
GPUPE_QUADS_SZCULL = 13, // Number of quads culled due to both Z and Stencil
GPUPE_HZ_RE_BUSY = 14, // Modules on the read-side of HZ memory are busy
GPUPE_BC_SCLK_COUNT = 15, // SCLK count for BC
GPUPE_BC_HZ_VALID0 = 16, // Valid input data from BC bus 0
GPUPE_BC_HZ_VALID1 = 17, // Valid input data from BC bus 1
GPUPE_BC_HZ_VALID2 = 18, // Valid input data from BC bus 2
GPUPE_BC_HZ_VALID3 = 19, // Valid input data from BC bus 3
GPUPE_SC_SCLK_COUNT = 20, // SCLK count for SC
GPUPE_SC_HZ_COARSE_SND = 21, // SC sending coarse Z data to HZ
GPUPE_HZ_SC_COARSE_HLD = 22, // HZ holding SC from sending coarse Z data
GPUPE_HZ_SC_HIER_SND = 23, // HZ sending mask data to BC
GPUPE_SC_HZ_HIER_HLD = 24, // SC holding HZ from sending mask data
GPUPE_HZ_BC_TILE_SND = 25, // HZ sending tile data to BC
GPUPE_BC_HZ_TILE_HLD = 26, // BC holding HZ from sending tile data
GPUPE_SC_STALL_HZ = 27, // HZ stalled by SC
GPUPE_BC_STALL_HZ = 28, // HZ stalled by BC
GPUPE_EVENT_STALL = 29, // HZ stalled by an Outstanding Event
GPUPE_SC_STARVE_HZ_BUSY = 30, // HZ starved by the SC when the HZ is busy
GPUPE_SC_STARVE_HZ_IDLE = 31, // HZ starved by the SC when the HZ is idle
} GPUPERFEVENT_HZ;
typedef enum
{
// IPMUX receives memory sent by MH
// It can stall if the dxt decompressor isn't ready.
GPUPE_DGMMPD_IPMUX0_STALL = 0, // Number of clocks ipmux0 is stalled. Each tick means 16 bytes of Texture BW lost.
GPUPE_DGMMPD_IPMUX1_STALL = 1, // Number of clocks ipmux1 is stalled. Each tick means 16 bytes of Texture BW lost.
GPUPE_DGMMPD_IPMUX2_STALL = 2, // always 0
GPUPE_DGMMPD_IPMUX3_STALL = 3, // always 0
GPUPE_DGMMPD_IPMUX_ALL_STALL = 4, // always 0
GPUPE_OPMUX0_L2_WRITES = 5, // Number of opmux0 L2 writes
GPUPE_OPMUX1_L2_WRITES = 6, // Number of opmux1 L2 writes
GPUPE_OPMUX2_L2_WRITES = 7, // Number of opmux2 L2 writes
GPUPE_OPMUX3_L2_WRITES = 8, // Number of opmux3 L2 writes
} GPUPERFEVENT_TCR; // Texture Cache Return
typedef enum
{
// TCO Read Latency Fifos (256-deep).
GPUPE_QUAD0_RD_LAT_FIFO_EMPTY = 0, // Number of clocks Quad0 read latency fifo is empty
GPUPE_QUAD0_RD_LAT_FIFO_16TH_FULL = 1, // Number of clocks Quad0 read latency fifo is at least 1/16th full, but less than 1/8th
GPUPE_QUAD0_RD_LAT_FIFO_8TH_FULL = 2, // Number of clocks Quad0 read latency fifo is at least 1/8th full, but less than 1/4th
GPUPE_QUAD0_RD_LAT_FIFO_4TH_FULL = 3, // Number of clocks Quad0 read latency fifo is at least 1/4th full, but less than half
GPUPE_QUAD0_RD_LAT_FIFO_HALF_FULL = 4, // Number of clocks Quad0 read latency fifo is at least half full, but not full
GPUPE_QUAD0_RD_LAT_FIFO_FULL = 5, // Number of clocks Quad0 read latency fifo is full
GPUPE_QUAD0_RD_LAT_FIFO_LT_16TH_FULL = 6, // Number of clocks Quad0 read latency fifo is less than 1/16th full, but not empty
GPUPE_QUAD1_RD_LAT_FIFO_EMPTY = 7, // Number of clocks Quad1 read latency fifo is empty
GPUPE_QUAD1_RD_LAT_FIFO_16TH_FULL = 8, // Number of clocks Quad1 read latency fifo is at least 1/16th full, but less than 1/8th
GPUPE_QUAD1_RD_LAT_FIFO_8TH_FULL = 9, // Number of clocks Quad1 read latency fifo is at least 1/8th full, but less than 1/4th
GPUPE_QUAD1_RD_LAT_FIFO_4TH_FULL = 10, // Number of clocks Quad1 read latency fifo is at least 1/4th full, but less than half
GPUPE_QUAD1_RD_LAT_FIFO_HALF_FULL = 11, // Number of clocks Quad1 read latency fifo is at least half full, but not full
GPUPE_QUAD1_RD_LAT_FIFO_FULL = 12, // Number of clocks Quad1 read latency fifo is full
GPUPE_QUAD1_RD_LAT_FIFO_LT_16TH_FULL = 13, // Number of clocks Quad1 read latency fifo is less than 1/16th full, but not empty
GPUPE_QUAD2_RD_LAT_FIFO_EMPTY = 14, // Number of clocks Quad2 read latency fifo is empty
GPUPE_QUAD2_RD_LAT_FIFO_16TH_FULL = 15, // Number of clocks Quad2 read latency fifo is at least 1/16th full, but less than 1/8th
GPUPE_QUAD2_RD_LAT_FIFO_8TH_FULL = 16, // Number of clocks Quad2 read latency fifo is at least 1/8th full, but less than 1/4th
GPUPE_QUAD2_RD_LAT_FIFO_4TH_FULL = 17, // Number of clocks Quad2 read latency fifo is at least 1/4th full, but less than half
GPUPE_QUAD2_RD_LAT_FIFO_HALF_FULL = 18, // Number of clocks Quad2 read latency fifo is at least half full, but not full
GPUPE_QUAD2_RD_LAT_FIFO_FULL = 19, // Number of clocks Quad2 read latency fifo is full
GPUPE_QUAD2_RD_LAT_FIFO_LT_16TH_FULL = 20, // Number of clocks Quad2 read latency fifo is less than 1/16th full, but not empty
GPUPE_QUAD3_RD_LAT_FIFO_EMPTY = 21, // Number of clocks Quad3 read latency fifo is empty
GPUPE_QUAD3_RD_LAT_FIFO_16TH_FULL = 22, // Number of clocks Quad3 read latency fifo is at least 1/16th full, but less than 1/8th
GPUPE_QUAD3_RD_LAT_FIFO_8TH_FULL = 23, // Number of clocks Quad3 read latency fifo is at least 1/8th full, but less than 1/4th
GPUPE_QUAD3_RD_LAT_FIFO_4TH_FULL = 24, // Number of clocks Quad3 read latency fifo is at least 1/4th full, but less than half
GPUPE_QUAD3_RD_LAT_FIFO_HALF_FULL = 25, // Number of clocks Quad3 read latency fifo is at least half full, but not full
GPUPE_QUAD3_RD_LAT_FIFO_FULL = 26, // Number of clocks Quad3 read latency fifo is full
GPUPE_QUAD3_RD_LAT_FIFO_LT_16TH_FULL = 27, // Number of clocks Quad3 read latency fifo is less than 1/16th full, but not empty
GPUPE_READ_STARVED_QUAD0 = 28, // Number of clocks Quad0 is starved waiting for data from MC
GPUPE_READ_STARVED_QUAD1 = 29, // Number of clocks Quad1 is starved waiting for data from MC
GPUPE_READ_STARVED_QUAD2 = 30, // Number of clocks Quad2 is starved waiting for data from MC
GPUPE_READ_STARVED_QUAD3 = 31, // Number of clocks Quad3 is starved waiting for data from MC
GPUPE_READ_STARVED = 32, // Number of clocks any quad is starved waiting for data from MC
GPUPE_READ_STALLED_QUAD0 = 33, // Number of clocks Quad0 a read is stalled off by waiting for other quads to sync up
GPUPE_READ_STALLED_QUAD1 = 34, // Number of clocks Quad1 a read is stalled off by waiting for other quads to sync up
GPUPE_READ_STALLED_QUAD2 = 35, // Number of clocks Quad2 a read is stalled off by waiting for other quads to sync up
GPUPE_READ_STALLED_QUAD3 = 36, // Number of clocks Quad3 a read is stalled off by waiting for other quads to sync up
GPUPE_READ_STALLED = 37, // Number of clocks a read is stalled off by waiting for other quads to sync up
GPUPE_VALID_READ_QUAD0 = 38, // Valid cycles of cache reads on Quad0
GPUPE_VALID_READ_QUAD1 = 39, // Valid cycles of cache reads on Quad1
GPUPE_VALID_READ_QUAD2 = 40, // Valid cycles of cache reads on Quad2
GPUPE_VALID_READ_QUAD3 = 41, // Valid cycles of cache reads on Quad3
GPUPE_TC_TP_STARVED_QUAD0 = 42, // Quad0 is waiting for another quad to be valid before sending to TP
GPUPE_TC_TP_STARVED_QUAD1 = 43, // Quad1 is waiting for another quad to be valid before sending to TP
GPUPE_TC_TP_STARVED_QUAD2 = 44, // Quad2 is waiting for another quad to be valid before sending to TP
GPUPE_TC_TP_STARVED_QUAD3 = 45, // Quad3 is waiting for another quad to be valid before sending to TP
GPUPE_TC_TP_STARVED = 46, // Some data is ready for the TP, but stalled waiting for the rest
} GPUPERFEVENT_TCM; // Texture Cache Memory
typedef enum
{
// TPC Walker counters. The Walker causes the TPs to loop, for mip levels, for aniso,
// and for volume slices, to generate all the addresses needed for all their samples.
GPUPE_VALID_CYCLES = 0, // Cycles the TPC Walker is active. (Number of cycles the walker fifo is not empty, and the aligner fifo is not full.)
GPUPE_SINGLE_PHASES = 1, // cycles the walker is processing plain point/bilin fetches
GPUPE_ANISO_PHASES = 2, // aniso, mip aniso, vol aniso, mip vol aniso
GPUPE_MIP_PHASES = 3, // mip, mip aniso, mip vol, mip vol aniso
GPUPE_VOL_PHASES = 4, // vol, mip vol, vol aniso, mip vol aniso
GPUPE_MIP_VOL_PHASES = 5, // just mip vol
GPUPE_MIP_ANISO_PHASES = 6, // just mip aniso
GPUPE_VOL_ANISO_PHASES = 7, // just vol aniso
GPUPE_ANISO_2_1_PHASES = 8, // cycles spent on 2:1 aniso
GPUPE_ANISO_4_1_PHASES = 9, // cycles spent on 4:1 aniso
GPUPE_ANISO_6_1_PHASES = 10, // cycles spent on 6:1 aniso
GPUPE_ANISO_8_1_PHASES = 11, // cycles spent on 8:1 aniso
GPUPE_ANISO_10_1_PHASES = 12, // cycles spent on 10:1 aniso
GPUPE_ANISO_12_1_PHASES = 13, // cycles spent on 12:1 aniso
GPUPE_ANISO_14_1_PHASES = 14, // cycles spent on 14:1 aniso
GPUPE_ANISO_16_1_PHASES = 15, // cycles spent on 16:1 aniso
GPUPE_MIP_VOL_ANISO_PHASES = 16, // mip vol aniso
// TPC Aligner counters:
GPUPE_ALIGN_2_PHASES = 17,
GPUPE_ALIGN_4_PHASES = 18,
// TPC counters. TPC communicates between the four TPs, and sends common data
// to the TC (mainly TCA). It is responsible for making the TPs talk with TC
// at the same time, even when their fetches break down into different
// numbers of samples, due to aniso/mip differences.
GPUPE_TPC_BUSY = 19,
GPUPE_TPC_STALLED = 20,
GPUPE_TPC_STARVED = 21,
GPUPE_TPC_WORKING = 22,
GPUPE_TPC_WALKER_BUSY = 23,
GPUPE_TPC_WALKER_STALLED = 24,
GPUPE_TPC_WALKER_WORKING = 25,
GPUPE_TPC_ALIGNER_BUSY = 26,
GPUPE_TPC_ALIGNER_STALLED = 27,
GPUPE_TPC_ALIGNER_STALLED_BY_BLEND = 28,
GPUPE_TPC_ALIGNER_STALLED_BY_CACHE = 29,
GPUPE_TPC_ALIGNER_WORKING = 30,
GPUPE_TPC_BLEND_BUSY = 31,
GPUPE_TPC_BLEND_SYNC = 32,
GPUPE_TPC_BLEND_STARVED = 33,
GPUPE_TPC_BLEND_WORKING = 34,
// TPC Opcode counters: Incremented once per instruction per 64-vector
GPUPE_OPCODE_0X00 = 35, // num Vfetches with UseTextureCache=true
GPUPE_OPCODE_0X01 = 36, // num Tfetches executed
GPUPE_OPCODE_0X04 = 37, // unused, always 0
GPUPE_OPCODE_0X10 = 38, // num getBCF ops executed
GPUPE_OPCODE_0X11 = 39, // num getCompTexLOD ops executed
GPUPE_OPCODE_0X12 = 40, // num GetGradients ops executed
GPUPE_OPCODE_0X13 = 41, // num getWeights ops executed
GPUPE_OPCODE_0X18 = 42, // num setTexLOD ops executed
GPUPE_OPCODE_0X19 = 43, // num setGradientsH ops executed
GPUPE_OPCODE_0X1A = 44, // num setGradientsV ops executed
GPUPE_OPCODE_OTHER = 45, // unused, always 0
// Always zero:
GPUPE_RESERVED_46 = 46,
GPUPE_RESERVED_47 = 47,
GPUPE_RESERVED_48 = 48,
GPUPE_RESERVED_49 = 49,
GPUPE_RESERVED_50 = 50,
GPUPE_RESERVED_51 = 51,
GPUPE_RESERVED_52 = 52,
GPUPE_RESERVED_53 = 53,
GPUPE_RESERVED_54 = 54,
GPUPE_RESERVED_55 = 55,
// TP/TPC -> TCA fifos. The TCA receives sample requests from TPC and the
// 4 TPs, and breaks apart requests which cannot be sent to the Memory
// Hub in one cycle. TCA is split into 4 quarters, one per TP.
// One fifo entry contains a quad's worth of samples.
GPUPE_IN_FIFO_0_EMPTY = 56,
GPUPE_IN_FIFO_0_LT_HALF_FULL = 57,
GPUPE_IN_FIFO_0_HALF_FULL = 58,
GPUPE_IN_FIFO_0_FULL = 59,
GPUPE_IN_FIFO_1_EMPTY = 60,
GPUPE_IN_FIFO_1_LT_HALF_FULL = 61,
GPUPE_IN_FIFO_1_HALF_FULL = 62,
GPUPE_IN_FIFO_1_FULL = 63,
GPUPE_IN_FIFO_2_EMPTY = 64,
GPUPE_IN_FIFO_2_LT_HALF_FULL = 65,
GPUPE_IN_FIFO_2_HALF_FULL = 66,
GPUPE_IN_FIFO_2_FULL = 67,
GPUPE_IN_FIFO_3_EMPTY = 68,
GPUPE_IN_FIFO_3_LT_HALF_FULL = 69,
GPUPE_IN_FIFO_3_HALF_FULL = 70,
GPUPE_IN_FIFO_3_FULL = 71,
GPUPE_IN_FIFO_TPC_EMPTY = 72,
GPUPE_IN_FIFO_TPC_LT_HALF_FULL = 73,
GPUPE_IN_FIFO_TPC_HALF_FULL = 74,
GPUPE_IN_FIFO_TPC_FULL = 75,
// Other TCA counters:
GPUPE_TPC_TC_XFC = 76, // TPC_TC_rts
GPUPE_TPC_TC_STATE = 77, // tca_state_rts
GPUPE_TC_STALL = 78, // Num cycles any of the TP->TCA fifos was full
GPUPE_QUAD0_TAPS = 79, // u0TCA_PM_tap_valid_count
GPUPE_QUAD1_TAPS = 80, // u1TCA_PM_tap_valid_count
GPUPE_QUAD2_TAPS = 81, // u2TCA_PM_tap_valid_count
GPUPE_QUAD3_TAPS = 82, // u3TCA_PM_tap_valid_count
GPUPE_QUADS = 83, // tca_quad_valid_count
GPUPE_TCA_SYNC_STALL = 84, // Num cycles that 1+ TP->TCA fifos had data ready, but (not all fifos had data ready, or, some but not all of the TCA Probe Filters stalled)
GPUPE_TAG_STALL = 85, // Number of probe-filter tag stalls (sum from all 4 Probe Filter units).
GPUPE_SLICE_STALL = 86, // Number of probe-filter slice stalls (sum from all 4 Probe Filter units).
GPUPE_SKEW_STALL = 87, // No probe filter is allowed to get > 3 cycles ahead of another. SKEW_STALL is number of stalls introduced to prevent this. (sum from all 4 Probe Filter units)
GPUPE_TCB_SYNC_STALL = 88, // Num cycles that 1+ TCA probe filters had data ready for TCB, but 1+ didn't have data ready.
// TCB (Tag Compare Block) Core counters:
GPUPE_TCA_VALID = 89, // num cycles TCA sent data to TCB
GPUPE_PROBES_VALID = 90, // num probes sent from TCA to TCB
// MISS_STALL: Could be called the "cache thrash" counter:
// Number of cycles the TCB & TCA were stalled because a cache miss occurred,
// but no cache lines were free (meaning, all cache lines are allocated, waiting for
// MH to return data, or for TCO to finish reading all data out of the cache lines)
GPUPE_MISS_STALL = 91,
GPUPE_FETCH_FIFO_STALL= 92, // num cycles the TCB Fetch Fifo was full.
GPUPE_TCO_STALL = 93, // num cycles TCO wasn't ready for TCB to send it data
GPUPE_ANY_STALL = 94, // = GPUPE_MISS_STALL + GPUPE_FETCH_FIFO_STALL + GPUPE_TCO_STALL
// Tag Compare counters:
// Each tag is built from several samples entering the TCB at the same time.
// One tag is generated per cache line.
GPUPE_TAG_MISSES = 95, // Requested cache line was not in the cache.
GPUPE_TAG_HITS = 96, // Requested Cache line was in the cache.
GPUPE_SUB_TAG_MISSES = 97, // (Subset of TAG_HITS): Line was in the cache, but at least some of the data wasn't.
GPUPE_SET0_INVALIDATES = 98,
GPUPE_SET1_INVALIDATES = 99,
GPUPE_SET2_INVALIDATES = 100,
GPUPE_SET3_INVALIDATES = 101,
GPUPE_SET0_TAG_MISSES = 102,
GPUPE_SET1_TAG_MISSES = 103,
GPUPE_SET2_TAG_MISSES = 104,
GPUPE_SET3_TAG_MISSES = 105,
GPUPE_SET0_TAG_HITS = 106,
GPUPE_SET1_TAG_HITS = 107,
GPUPE_SET2_TAG_HITS = 108,
GPUPE_SET3_TAG_HITS = 109,
GPUPE_SET0_SUB_TAG_MISSES = 110,
GPUPE_SET1_SUB_TAG_MISSES = 111,
GPUPE_SET2_SUB_TAG_MISSES = 112,
GPUPE_SET3_SUB_TAG_MISSES = 113,
GPUPE_SET0_EVICT1 = 114,
GPUPE_SET0_EVICT2 = 115,
GPUPE_SET0_EVICT3 = 116,
GPUPE_SET0_EVICT4 = 117,
GPUPE_SET0_EVICT5 = 118,
GPUPE_SET0_EVICT6 = 119,
GPUPE_SET0_EVICT7 = 120,
GPUPE_SET0_EVICT8 = 121,
GPUPE_SET0_EVICT9 = 122,
GPUPE_SET0_EVICT10 = 123,
GPUPE_SET0_EVICT11 = 124,
GPUPE_SET0_EVICT12 = 125,
GPUPE_SET0_EVICT13 = 126,
GPUPE_SET0_EVICT14 = 127,
GPUPE_SET0_EVICT15 = 128,
GPUPE_SET0_EVICT16 = 129,
GPUPE_SET1_EVICT1 = 130,
GPUPE_SET1_EVICT2 = 131,
GPUPE_SET1_EVICT3 = 132,
GPUPE_SET1_EVICT4 = 133,
GPUPE_SET1_EVICT5 = 134,
GPUPE_SET1_EVICT6 = 135,
GPUPE_SET1_EVICT7 = 136,
GPUPE_SET1_EVICT8 = 137,
GPUPE_SET1_EVICT9 = 138,
GPUPE_SET1_EVICT10 = 139,
GPUPE_SET1_EVICT11 = 140,
GPUPE_SET1_EVICT12 = 141,
GPUPE_SET1_EVICT13 = 142,
GPUPE_SET1_EVICT14 = 143,
GPUPE_SET1_EVICT15 = 144,
GPUPE_SET1_EVICT16 = 145,
GPUPE_SET2_EVICT1 = 146,
GPUPE_SET2_EVICT2 = 147,
GPUPE_SET2_EVICT3 = 148,
GPUPE_SET2_EVICT4 = 149,
GPUPE_SET2_EVICT5 = 150,
GPUPE_SET2_EVICT6 = 151,
GPUPE_SET2_EVICT7 = 152,
GPUPE_SET2_EVICT8 = 153,
GPUPE_SET2_EVICT9 = 154,
GPUPE_SET2_EVICT10 = 155,
GPUPE_SET2_EVICT11 = 156,
GPUPE_SET2_EVICT12 = 157,
GPUPE_SET2_EVICT13 = 158,
GPUPE_SET2_EVICT14 = 159,
GPUPE_SET2_EVICT15 = 160,
GPUPE_SET2_EVICT16 = 161,
GPUPE_SET3_EVICT1 = 162,
GPUPE_SET3_EVICT2 = 163,
GPUPE_SET3_EVICT3 = 164,
GPUPE_SET3_EVICT4 = 165,
GPUPE_SET3_EVICT5 = 166,
GPUPE_SET3_EVICT6 = 167,
GPUPE_SET3_EVICT7 = 168,
GPUPE_SET3_EVICT8 = 169,
GPUPE_SET3_EVICT9 = 170,
GPUPE_SET3_EVICT10 = 171,
GPUPE_SET3_EVICT11 = 172,
GPUPE_SET3_EVICT12 = 173,
GPUPE_SET3_EVICT13 = 174,
GPUPE_SET3_EVICT14 = 175,
GPUPE_SET3_EVICT15 = 176,
GPUPE_SET3_EVICT16 = 177,
// TCB fetch fifo, 8-deep
GPUPE_FF_EMPTY = 178,
GPUPE_FF_LT_HALF_FULL = 179,
GPUPE_FF_HALF_FULL = 180,
GPUPE_FF_FULL = 181,
GPUPE_FF_XFC = 182,
GPUPE_FF_STALLED = 183, // TCB_PM_fetch_fifo_stalled
// TCB fetch generator
GPUPE_FG_MASKS = 184,
GPUPE_FG_LEFT_MASKS = 185,
GPUPE_FG_LEFT_MASK_STALLED = 186,
GPUPE_FG_LEFT_NOT_DONE_STALL = 187,
GPUPE_FG_LEFT_FG_STALL = 188,
GPUPE_FG_LEFT_SECTORS = 189,
GPUPE_FG_RIGHT_MASKS = 190,
GPUPE_FG_RIGHT_MASK_STALLED = 191,
GPUPE_FG_RIGHT_NOT_DONE_STALL = 192,
GPUPE_FG_RIGHT_FG_STALL = 193,
GPUPE_FG_RIGHT_SECTORS = 194,
GPUPE_FG0_REQUESTS = 195,
GPUPE_FG0_STALLED = 196,
GPUPE_FG1_REQUESTS = 197,
GPUPE_FG1_STALLED = 198,
// TCB MH Interface
GPUPE_MEM_REQ512 = 199,
GPUPE_MEM_REQ_SENT = 200,
GPUPE_MEM_AGP_READ_REQ = 201,
GPUPE_MEM_LOCAL_READ_REQ = 202,
GPUPE_TC0_MH_STALLED = 203,
GPUPE_TC1_MH_STALLED = 204,
} GPUPERFEVENT_TCF; // Texture Cache Fetch
typedef enum
{
GPUPE_POINT_QUADS = 0, // Any Point sampled quads (includes mip, aniso, volume)
GPUPE_BILIN_QUADS = 1, // Any Bilinearly filtered quads (includes mip, aniso, volume)
GPUPE_ANISO_QUADS = 2, // Any Aniso (>1:1) filtered quads (includes mip and/or vol)
GPUPE_MIP_QUADS = 3, // Any mip filtered quads (includes aniso and/or vol)
GPUPE_VOL_QUADS = 4, // Any volume filtered quads (include mip and/or aniso)
GPUPE_MIP_VOL_QUADS = 5, // Mip and volume filtered quads (not aniso)
GPUPE_MIP_ANISO_QUADS = 6, // Mip and aniso (>1:1) filtered quads (not vol)
GPUPE_VOL_ANISO_QUADS = 7, // Volume and aniso (>1:1) filtered quads (not mip)
GPUPE_ANISO_2_1_QUADS = 8, // Any quads with 2:1 anisotropic filtering
GPUPE_ANISO_4_1_QUADS = 9, // Any quads with 4:1 anisotropic filtering
GPUPE_ANISO_6_1_QUADS = 10, // Any quads with 6:1 anisotropic filtering
GPUPE_ANISO_8_1_QUADS = 11, // Any quads with 8:1 anisotropic filtering
GPUPE_ANISO_10_1_QUADS = 12, // Any quads with 10:1 anisotropic filtering
GPUPE_ANISO_12_1_QUADS = 13, // Any quads with 12:1 anisotropic filtering
GPUPE_ANISO_14_1_QUADS = 14, // Any quads with 14:1 anisotropic filtering
GPUPE_ANISO_16_1_QUADS = 15, // Any quads with 16:1 anisotropic filtering
GPUPE_MIP_VOL_ANISO_QUADS = 16, // Mip, volume and aniso (>1:1) filtered quads
GPUPE_ALIGN_2_QUADS = 17, // 2-cycle misaligned quads
GPUPE_ALIGN_4_QUADS = 18, // 4-cycle misaligned quads
GPUPE_PIX_0_QUAD = 19, // No valid pixels in quad
GPUPE_PIX_1_QUAD = 20, // 1 valid pixel in quad
GPUPE_PIX_2_QUAD = 21, // 2 valid pixels in quad
GPUPE_PIX_3_QUAD = 22, // 3 valid pixels in quad
GPUPE_PIX_4_QUAD = 23, // 4 valid pixels in quad
GPUPE_TP_MIPMAP_LOD0 = 24, // MipMap LOD 0
GPUPE_TP_MIPMAP_LOD1 = 25, // MipMap LOD 1
GPUPE_TP_MIPMAP_LOD2 = 26, // MipMap LOD 2
GPUPE_TP_MIPMAP_LOD3 = 27, // MipMap LOD 3
GPUPE_TP_MIPMAP_LOD4 = 28, // MipMap LOD 4
GPUPE_TP_MIPMAP_LOD5 = 29, // MipMap LOD 5
GPUPE_TP_MIPMAP_LOD6 = 30, // MipMap LOD 6
GPUPE_TP_MIPMAP_LOD7 = 31, // MipMap LOD 7
GPUPE_TP_MIPMAP_LOD8 = 32, // MipMap LOD 8
GPUPE_TP_MIPMAP_LOD9 = 33, // MipMap LOD 9
GPUPE_TP_MIPMAP_LOD10 = 34, // MipMap LOD 10
GPUPE_TP_MIPMAP_LOD11 = 35, // MipMap LOD 11
GPUPE_TP_MIPMAP_LOD12 = 36, // MipMap LOD 12
GPUPE_TP_MIPMAP_LOD13 = 37, // MipMap LOD 13
GPUPE_TP_MIPMAP_LOD14 = 38, // MipMap LOD 14
} GPUPERFEVENT_TP;
typedef enum
{
GPUPE_SX_SC_QUADS = 0, // Number of quads sent by the SC
GPUPE_SX_SC_QUAD_FIFO_FULL = 1, // Number of cycles where the SC quad FIFO is full
GPUPE_SX_EXPORT_VECTORS = 2, // Number of exported vectors
GPUPE_SX_DUMMY_QUADS = 3, // Number of dummy quads
GPUPE_SX_ALPHA_FAIL = 4, // Number of pixels that fail alpha test
GPUPE_SX_RB_QUAD_BUSY = 5, // SX sending quads to the RBs
GPUPE_SX_RB_COLOR_BUSY = 6, // SX sending colors to the RBs
GPUPE_SX_RB_QUAD_STALL = 7, // SX idle on the quad interface
GPUPE_SX_RB_COLOR_STALL = 8, // SX idle on the color interface
} GPUPERFEVENT_SX;
typedef enum
{
GPUPE_BC_CNTX0_BUSY = 0, // Number of cycles BC is busy processing data(bc_context0_busy)
GPUPE_BC_CNTX17_BUSY = 1, // Number of cycles BC is busy processing data(bc_context17_busy)
GPUPE_BC_RQ_STALLED = 2, // Number of cycles BC is stalled due to no available banks for reorder queue to select
GPUPE_BC_AZ_STALLED = 3, // Number of cycles BC is stalled by AZ
GPUPE_BC_MH_CPY_STALLED = 4, // Number of cycles BC is stalled by MH for copy/resolve
GPUPE_BC_MH_EXP_STALLED = 5, // Number of cycles BC is stalled by MH for memory exports
GPUPE_BC_SC_STARVED = 6, // Number of cycles SC->BC quad fifo is empty and BC has a tile from HZ to work on
GPUPE_BC_SX_STARVED = 7, // Number of cycles SX->BC quad fifo is empty and we're in color mode and BC has a tile and quads from HZ and SC
GPUPE_BC_ACC_COUNT = 8, // Number of times multiple fragments are combined into a quad (acc opcodes)
GPUPE_BC_DRAW_COUNT = 9, // Number of quads sent to AZ (draw opcodes)
GPUPE_BC_ACC2_COUNT = 10, // Number of accumulate two opcodes for 64bpp and MRT's
GPUPE_BC_DRAW2_COUNT = 11, // Number of draw two opcodes for 64bpp and MRT's
GPUPE_BC_SETZ_COUNT = 12, // Number of depth exports
GPUPE_BC_READ_COUNT = 13, // Number of read opcodes when resolving
GPUPE_BC_READ_ACC_COUNT = 14, // Number of read_acc opcodes when resolving
GPUPE_BC_STATE_COUNT = 15, // Number of state opcodes sent to AZ
GPUPE_BC_STATE2_COUNT = 16, // Number of state2 opcodes sent to AZ
GPUPE_BC_COPY_WRITE_COUNT = 17, // Number of 256-bit system memory writes for EDRAM copy/resolve
GPUPE_BC_EXPORT_COUNT = 18, // Number of memory exports from SX
} GPUPERFEVENT_BC;
typedef enum
{
GPUPE_RANK_BANK0_ACCESSES = 0, // Rank Bank 0 access event
GPUPE_RANK_BANK1_ACCESSES = 1, // Rank Bank 1 access event
GPUPE_RANK_BANK2_ACCESSES = 2, // Rank Bank 2 access event
GPUPE_RANK_BANK3_ACCESSES = 3, // Rank Bank 3 access event
GPUPE_RANK_BANK4_ACCESSES = 4, // Rank Bank 4 access event
GPUPE_RANK_BANK5_ACCESSES = 5, // Rank Bank 5 access event
GPUPE_RANK_BANK6_ACCESSES = 6, // Rank Bank 6 access event
GPUPE_RANK_BANK7_ACCESSES = 7, // Rank Bank 7 access event
GPUPE_RANK_BANK8_ACCESSES = 8, // Rank Bank 8 access event
GPUPE_RANK_BANK9_ACCESSES = 9, // Rank Bank 9 access event
GPUPE_RANK_BANK10_ACCESSES = 10, // Rank Bank 10 access event
GPUPE_RANK_BANK11_ACCESSES = 11, // Rank Bank 11 access event
GPUPE_RANK_BANK12_ACCESSES = 12, // Rank Bank 12 access event
GPUPE_RANK_BANK13_ACCESSES = 13, // Rank Bank 13 access event
GPUPE_RANK_BANK14_ACCESSES = 14, // Rank Bank 14 access event
GPUPE_RANK_BANK15_ACCESSES = 15, // Rank Bank 15 access event
GPUPE_READ_2_WRITE = 16, // Read to Write transition event
GPUPE_WRITE_2_READ = 17, // Write to Read transition event
GPUPE_NEW_PAGE_ACCESSES = 18, // Number of new page accesses out of the ordering engine
GPUPE_TOTAL_ACCESSES = 19, // Total accesses out of the ordering engine
GPUPE_READ_ACCESSES = 20, // Number of reads out of the ordering engine
GPUPE_ACCESS_PRESENT_NO_ISSUE_CLKS = 21, // Number of clocks in which an access is present but ordering engine doesn't issue
GPUPE_CMD_PRESENT_NO_XFER_CLKS = 22, // Number of non-transfer clocks on DRAM data bus when commands are present
GPUPE_URGENT_DC_ACCESSES = 23, // Number of urgent accesses from DC queue
GPUPE_URGENT_SB_ACCESSES = 24, // Number of urgent accesses from SB queue
GPUPE_URGENT_BIU_ACCESSES = 25, // Number of urgent accesses from BIUS (slow) queue
GPUPE_NEW_BIUF_ACCESSES = 26, // Number of new accesses from BIUF (fast) queue
GPUPE_NEW_CP_ACCESSES = 27, // Number of new accesses from CP queue
GPUPE_NEW_TC_ACCESSES = 28, // Number of new accesses from TC queue
GPUPE_NEW_VC_ACCESSES = 29, // Number of new accesses from VC queue
GPUPE_NEW_BC_CP_ACCESSES = 30, // Number of new accesses from BC_CP queue
GPUPE_NEW_BC_EX_ACCESSES = 31, // Number of new accesses from BC_EX queue
GPUPE_NEW_VGT_ACCESSES = 32, // Number of new accesses from VGT queue
GPUPE_NEW_DC_ACCESSES = 33, // Number of new accesses from DC queue
GPUPE_NEW_SB_ACCESSES = 34, // Number of new accesses from SB queue
GPUPE_NEW_BIUS_ACCESSES = 35, // Number of new accesses from BIUS (slow) queue
GPUPE_BIUS_READ_ACCESSES = 36, // Number of Read accesses from BIUS (slow) queue
GPUPE_SB_READ_ACCESSES = 37, // Number of Read accesses from SB queue
GPUPE_CP_READ_ACCESSES = 38, // Number of Read accesses from CP queue
// For the following counters, when writing GPUPERFREG_MC*_PERFCOUNTER0_SELECT,
// "N" is the 2nd lowest byte written to the _SELECT register. For instance,
// to read MC0's GPUPE_NTH_SMPG_ACCESS_IS_TC counter with N=1,
// use GPUPE_NTH_SMPG_ACCESS_IS_TC | (0x01 << 8)) for GPUPERFREG_MC0_PERFCOUNTER0_SELECT's selection
GPUPE_NTH_SMPG_ACCESS_IS_BIUF = 39, // Number of times the Nth access in a same page sequence is from BIUF (fast) queue
GPUPE_NTH_SMPG_ACCESS_IS_CP = 40, // Number of times the Nth access in a same page sequence is from CP queue
GPUPE_NTH_SMPG_ACCESS_IS_TC = 41, // Number of times the Nth access in a same page sequence is from TC queue
GPUPE_NTH_SMPG_ACCESS_IS_VC = 42, // Number of times the Nth access in a same page sequence is from VC queue
GPUPE_NTH_SMPG_ACCESS_IS_BC_CP = 43, // Number of times the Nth access in a same page sequence is from BC_CP queue
GPUPE_NTH_SMPG_ACCESS_IS_BC_EX = 44, // Number of times the Nth access in a same page sequence is from BC_EX queue
GPUPE_NTH_SMPG_ACCESS_IS_VGT = 45, // Number of times the Nth access in a same page sequence is from VGT queue
GPUPE_NTH_SMPG_ACCESS_IS_DC = 46, // Number of times the Nth access in a same page sequence is from DC queue
GPUPE_NTH_SMPG_ACCESS_IS_SB = 47, // Number of times the Nth access in a same page sequence is from SB queue
GPUPE_NTH_SMPG_ACCESS_IS_BIUS = 48, // Number of times the Nth access in a same page sequence is from BIUS (slow) queue
GPUPE_N_VALID_ENTRY_IN_TEXTURE_CAM_CLKS = 49, // Number of clocks where there are N valid entries in the texture cam
GPUPE_N_VALID_ENTRY_IN_VC_CAM_CLKS = 50, // Number of clocks where there are N valid entries in the VC cam
GPUPE_N_VALID_ENTRY_IN_BIUS_CAM_CLKS = 51, // Number of clocks where there are N valid entries in the BIUS cam
GPUPE_N_SB_BUF_USED_CLKS = 52, // Number of clocks when q_rdbuf_sb_buffers_used = N
GPUPE_N_TC_BUF_USED_CLKS = 53, // Number of clocks when q_rdbuf_tc_buffers_used = N
GPUPE_N_VC_BUF_USED_CLKS = 54, // Number of clocks when q_rdbuf_vc_buffers_used = N
GPUPE_N_DC_BUF_USED_CLKS = 55, // Number of clocks when q_rdbuf_dc_buffers_used = N
GPUPE_N_VGT_BUF_USED_CLK = 56, // Number of clocks when q_rdbuf_vgt_buffers_used = N
GPUPE_N_CP_BUF_USED_CLKS = 57, // Number of clocks when q_rdbuf_cp_buffers_used = N
} GPUPERFEVENT_MC;
typedef enum
{
GPUPE_CP_READ_MEMORY = 0, // counts cp read requests that maps to the main memory aperture
GPUPE_CP_READ_PGLB = 1, // counts cp read requests that maps to the pg line buffer aperture
GPUPE_CP_WRITE_MEMORY = 2, // counts cp write requests that maps to the main memory aperture
GPUPE_CP_WRITE_SNOOPED = 3, // counts cp write requests that is marked as snooped
GPUPE_CP_WRITE_WRITEBACK = 4, // counts cp write requests that maps to the writeback aperture
GPUPE_MH_CP_RTR = 5, // rtr from mh to cp, counts cycles where it is asserted - able to accept requests
GPUPE_VGT_READ_MEMORY = 6, // counts vgt read requests that maps to the main memory aperture
GPUPE_VGT_READ_PGLB = 7, // counts vgt read requests that maps to the pg line buffer aperture
GPUPE_MH_VGT_RTR = 8, // rtr from mh to vgt, counts cycles where it is asserted - able to accept requests
GPUPE_IOC_READ = 9, // counts ioc read requests
GPUPE_IOC_WRITE = 10, // counts ioc write requests
GPUPE_IOC_READ_BYTE_COUNT = 11, // counts the number of bytes in ioc read requests
GPUPE_IOC_WRITE_BYTE_COUNT = 12,// counts the number of bytes in ioc writes requests
GPUPE_IOC_URGENT = 13, // counts clock cycles that ioc asserts urgent
GPUPE_MH_IOC_RTR = 14, // counts clock cycles where the mh is ready to recieve requests from the ioc
GPUPE_TC0_READ = 15, // counts read requests
GPUPE_MH_TC0_RTR = 16, // counts cycles where mh is ready
GPUPE_TC1_READ = 17, // counts read requests
GPUPE_MH_TC1_RTR = 18, // counts cycles where mh is ready
GPUPE_VC0_READ_MEMORY = 19, // counts read requests to memory
GPUPE_VC0_READ_PGLB = 20, // counts read requests to pglb
GPUPE_MH_VC0_RTR = 21, // counts cycles where mh is ready
GPUPE_VC1_READ_MEMORY = 22, // counts read requests to memory
GPUPE_VC1_READ_PGLB = 23, // counts read requests to pglb
GPUPE_MH_VC1_RTR = 24, // counts cycles where mh is ready
GPUPE_BC0_CP_WRITE = 25, // counts copy write requests
GPUPE_BC0_EX_WRITE = 26, // counts export write requests
GPUPE_MH_BC0_RTR = 27, //
GPUPE_BC1_CP_WRITE = 28, // counts copy write requests
GPUPE_BC1_EX_WRITE = 29, // counts export write requests
GPUPE_MH_BC1_RTR = 30, //
GPUPE_DC_GRAPHICS_REQ = 31, // counts graphics read requests
GPUPE_DC_OVERLAY_REQ = 32, // counts overlay read requests
GPUPE_DC_URGENT = 33, // counts cycles urgent is asserted
GPUPE_PGLB_BIU_REQ = 34, // counts requests from the pglb to the biu
GPUPE_BIU_PGL_READ_DATA = 35, // counts cycles where read data is transfered from biu to pglb
GPUPE_PGL_MHS_READ_DATA = 36, // pglb to mh switch data transfer cycles
GPUPE_MH_MC0_READ_REQS = 37, // counts number of read request sent to mc0
GPUPE_MH_MC0_WRITE_REQS = 38, // counts number of write requests sent to mc0
GPUPE_MH_MC1_READ_REQS = 39, // counts number of read request sent to mc1
GPUPE_MH_MC1_WRITE_REQS = 40, // counts number of write requests sent to mc1
GPUPE_MC0_MH_READ_DATA = 41, // counts bytes returned to mh in increments of 32bytes from mc0
GPUPE_MC1_MH_READ_DATA = 42, // counts bytes returned to mh in increments of 32bytes from mc1
GPUPE_MH_CP_SEND = 43, // counts bytes returned to cp in increments of 4 bytes
GPUPE_MH_VGT_SEND = 44, // counts bytes returned to vgt in increments of 16 bytes
GPUPE_MH_IOC_SEND = 45, // counts bytes returned to ioc in increments of 4 bytes
GPUPE_MH_TC0_SEND = 46, // counts bytes returned to tc0 in increments of 16 bytes
GPUPE_MH_TC1_SEND = 47, // counts bytes returned to tc1 in increments of 16 bytes
GPUPE_MH_VC0_SEND = 48, // counts bytes returned to vc0 in increments of 16 bytes
GPUPE_MH_VC1_SEND = 49, // counts bytes returned to vc1 in increments of 16 bytes
GPUPE_MH_DC_SEND = 50, // counts bytes returned to dc in increments of 16 bytes
GPUPE_DC0_INFLIGHT_FULL = 51, // counts cycles where the inflight (or outstanding read request) queue was full
GPUPE_DC1_INFLIGHT_FULL = 52, //
GPUPE_VC0_INFLIGHT_FULL = 53, //
GPUPE_VC1_INFLIGHT_FULL = 54, //
GPUPE_TC0_INFLIGHT_FULL = 55, //
GPUPE_TC1_INFLIGHT_FULL = 56, //
GPUPE_CP0_INFLIGHT_FULL = 57, //
GPUPE_CP1_INFLIGHT_FULL = 58, //
GPUPE_VGT0_INFLIGHT_FULL = 59, //
GPUPE_VGT1_INFLIGHT_FULL = 60, //
GPUPE_SB0_INFLIGHT_FULL = 61, //
GPUPE_SB1_INFLIGHT_FULL = 62, //
GPUPE_VCPGL_INFLIGHT_FULL = 63, //
GPUPE_CPPGL_INFLIGHT_FULL = 64, //
GPUPE_VGTPGL_INFLIGHT_FULL = 65,//
GPUPE_MC0_DC_Q_FULL = 66, // counts cycles where the request queue in the mc was full
GPUPE_MC0_VC_Q_FULL = 67, //
GPUPE_MC0_TC_Q_FULL = 68, //
GPUPE_MC0_CP_Q_FULL = 69, //
GPUPE_MC0_SB_Q_FULL = 70, //
GPUPE_MC0_VGT_Q_FULL = 71, //
GPUPE_MC0_BCCP_Q_FULL = 72, //
GPUPE_MC0_BCEX_Q_FULL = 73, //
GPUPE_MC1_DC_Q_FULL = 74, //
GPUPE_MC1_VC_Q_FULL = 75, //
GPUPE_MC1_TC_Q_FULL = 76, //
GPUPE_MC1_CP_Q_FULL = 77, //
GPUPE_MC1_SB_Q_FULL = 78, //
GPUPE_MC1_VGT_Q_FULL = 79, //
GPUPE_MC1_BCCP_Q_FULL = 80, //
GPUPE_MC1_BCEX_Q_FULL = 81, //
GPUPE_CP_PGL_CACHE_HIT = 82, // counts pg line buffer cache hits
GPUPE_VGT_PGL_CACHE_HIT = 83, //
GPUPE_VC_PGL_CACHE_HIT = 84, //
GPUPE_CP_PGL_FULL = 85, // analagous to the request queue in the mc`s - for pg reads
GPUPE_VGT_PGL_FULL = 86, //
GPUPE_VC0_PGL_FULL = 87, //
GPUPE_VC1_PGL_FULL = 88, //
GPUPE_MC0_WDB_FULL = 89, // counts cycles where the write data buffer for mc0 was full
GPUPE_MC0_TAGBUF_FULL = 90, // counts cycles where the tag buffer for mc0 was full
GPUPE_MC1_WDB_FULL = 91, //
GPUPE_MC1_TAGBUF_FULL = 92, //
GPUPE_PGL_TAGBUF_FULL = 93, // counts cycles where the tag buffer for pgl was full
GPUPE_CP_WRITENOTIFY = 94, // counts write clean indications sent back to cp
GPUPE_BC_WRITENOTIFY = 95, // counts write cleans sent back to bc
GPUPE_IOC_SYNC = 96, // counts write cleans sent back to ioc
// Latency counts increment by the number of pending requests of that type.
// (Their derivative is incremented when a request is sent and decremented
// when the data is actually retrieved).
// Divide the counter by the number of requests of the specified type in order to
// get the average latency.
// The MH PGL latency counters can only be accessed through GPUPERFREG_MH_PERFCOUNTER2_SELECT
GPUPE_PGL_BIU_LATENCY = 97,
// The MH MC0 counters can only be accessed using GPUPERFREG_MH_PERFCOUNTER0_SELECT
GPUPE_MH_MC0_LATENCY = 98, //
GPUPE_MH_MC0_DC_LATENCY = 99, //
GPUPE_MH_MC0_VC_LATENCY = 100, //
GPUPE_MH_MC0_TC_LATENCY = 101, //
GPUPE_MH_MC0_CP_LATENCY = 102, //
GPUPE_MH_MC0_SB_LATENCY = 103, //
GPUPE_MH_MC0_VGT_LATENCY = 104, //
// The MH_MC1 counters can only be accessed using GPUPERFREG_MH_PERFCOUNTER1_SELECT
GPUPE_MH_MC1_LATENCY = 105, //
GPUPE_MH_MC1_DC_LATENCY = 106, //
GPUPE_MH_MC1_VC_LATENCY = 107, //
GPUPE_MH_MC1_TC_LATENCY = 108, //
GPUPE_MH_MC1_CP_LATENCY = 109, //
GPUPE_MH_MC1_SB_LATENCY = 110, //
GPUPE_MH_MC1_VGT_LATENCY = 111, //
// The MH PGL latency counters can only be accessed through GPUPERFREG_MH_PERFCOUNTER2_SELECT
GPUPE_MH_PGL_LATENCY = 112, //
GPUPE_MH_PGL_CP_LATENCY = 113, //
GPUPE_MH_PGL_VC_LATENCY = 114, //
GPUPE_MH_PGL_VGT_LATENCY = 115, //
GPUPE_TC0_RDY_AND_NOROOM = 116, // counts cycles where there was read data available to send to tc0 and there wasn`t room in tc0 to accept it
GPUPE_TC1_RDY_AND_NOROOM = 117, //
GPUPE_IOC_RDY_AND_NOROOM = 118, //
} GPUPERFEVENT_MH;
typedef enum
{
GPUPE_0 = 0, // Always Count
GPUPE_1 = 1, // RBBM_IF Fifo Full
GPUPE_2 = 2, // MIOC Fifo Full
GPUPE_3 = 3, // MIOC has a transaction, and is waiting for the RBBM to be ready
GPUPE_4 = 4, // SIOC Fifo Full
GPUPE_5 = 5, // SIOC has a transaction, and is waiting for the IOC to be ready
} GPUPERFEVENT_BIF;
typedef enum
{
GPUPE_DMIF_PER_DCREQ_EVENT = 0,
GPUPE_DMIF_PER_DCGRPH_REQ_EVENT = 1,
GPUPE_DMIF_PER_DCOVL_REQ_EVENT = 2,
GPUPE_DMIF_PER_DCREQ_SIZE_EVENT = 3,
GPUPE_DMIF_PER_DCGRPH_REQ_SIZE_EVENT = 4,
GPUPE_DMIF_PER_DCOVL_REQ_SIZE_EVENT = 5,
GPUPE_DMIF_PER_DCSURFACE_UPDATE_EVENT = 6,
GPUPE_DMIF_PER_DC_MH_REQ_EVENT = 7,
GPUPE_DMIF_PER_DC_MH_D1GRPH_REQ_EVENT = 8,
GPUPE_DMIF_PER_DC_MH_D1OVL_REQ_EVENT = 9,
GPUPE_DMIF_PER_DC_MH_REQ_SIZE_EVENT = 10,
GPUPE_DMIF_PER_DC_MH_D1GRPH_REQ_SIZE_EVENT = 11,
GPUPE_DMIF_PER_DC_MH_D1OVL_REQ_SIZE_EVENT = 12,
GPUPE_DMIF_PER_DC_MH_SURFACE_UPDATE_EVENT = 13,
GPUPE_DMIF_PER_MH_DC_RTR_EVENT = 14,
GPUPE_DMIF_PER_CMD_PROC_WAIT_RTR_STATE_EVENT = 15,
GPUPE_DMIF_PER_CMD_PROC_IDLE_STATE_EVENT = 16,
GPUPE_DMIF_PER_DC_MH_URGENT_EVENT = 17,
GPUPE_DMIF_PER_MH_DC_SEND_EVENT = 18,
GPUPE_DMIF_PER_MH_DC_SEND_D1GRPH_EVENT = 19,
GPUPE_DMIF_PER_MH_DC_SEND_D1OVL_EVENT = 20,
GPUPE_DMIF_PER_DC_MH_RTR_EVENT = 21,
GPUPE_DMIF_PER_DMIF_BUSY_EVENT = 22,
GPUPE_DMIF_PER_DMIF_BUSY_MH_DC_SEND_EVENT = 23,
GPUPE_DMIF_PER_DMIF_BUSY_DC_MH_RTR_EVENT = 24,
GPUPE_DMIF_PER_DMIF_DCSEND_EVENT = 25,
GPUPE_DMIF_PER_DMIF_DCNOT_RTS_EVENT = 26,
GPUPE_DMIF_PER_DCDMIF_NOT_RTR_EVENT = 27,
GPUPE_DCP_PER_LUT_HOST_RW_EVENT = 28,
GPUPE_DCP_PER_LUT_RW_BY_HOST_EVENT = 29,
GPUPE_DCP_PER_RTR_LOW_BY_LUT_HOST_RW_EVENT = 30,
GPUPE_DCCG_PER_SCLK_R_RBBMIF_CLOCK_ON_EVENT = 31,
GPUPE_DCCG_PER_SCLK_R_DISCLOCK_ON_EVENT = 32,
GPUPE_DCCG_PER_SCLK_G_SCL_CLOCK_ON_EVENT = 33,
GPUPE_DCCG_PER_SCLK_G_DCCLOCK_ON_EVENT = 34,
GPUPE_DCCG_PER_PCLK_CRTC_CLOCK_ON_EVENT = 35,
GPUPE_DCCG_PER_DVOACLK_C_CLOCK_ON_EVENT = 36,
GPUPE_DCCG_PER_DVOACLK_D_CLOCK_ON_EVENT = 37,
GPUPE_CRTC1_PER_START_LINE_EVENT = 38,
GPUPE_CRTC1_PER_HSYNC_A_EVENT = 39,
GPUPE_CRTC1_PER_VSYNC_A_EVENT = 40,
GPUPE_CRTC1_PER_H_DATA_ACTIVE_EVENT = 41,
GPUPE_CRTC1_PER_V_DATA_ACTIVE_EVENT = 42,
GPUPE_CRTC1_PER_DATA_ACTIVE_EVENT = 43,
GPUPE_CRTC1_PER_H_BLANK_EVENT = 44,
GPUPE_CRTC1_PER_V_BLANK_EVENT = 45,
GPUPE_CRTC1_PER_BLANK_EVENT = 46,
GPUPE_CRTC1_PER_INTERLACE_SELECT_EVENT = 47,
GPUPE_CRTC1_PER_STEREO_SELECT_EVENT = 48,
GPUPE_SCL1_PER_HOST_CONFLICT_EVENT = 49,
GPUPE_SCL1_PER_ADVANCE_FILTER_POS_EVENT = 50,
GPUPE_SCL1_PER_TAINC_EVENT = 51,
GPUPE_SCL1_PER_REQUEST_EOL_EVENT = 52,
GPUPE_SCL1_PER_V_COEF_PRELOAD_EVENT = 53,
GPUPE_SCL1_PER_EOL_EVENT = 54,
GPUPE_SCL1_PER_SOF_EVENT = 55,
GPUPE_LB_PER_DISP1_RESET_REQ_EVENT = 56,
GPUPE_LB_PER_DISP1_REQ_SEND_EVENT = 57,
GPUPE_LB_PER_DISP1_REQ_UNDERFLOW_EVENT = 58,
GPUPE_LB_PER_DISP1_DATA_UNDERFLOW_EVENT = 59,
GPUPE_LB_PER_DISP1_URGENT_EVENT = 60,
GPUPE_LB_PER_DISP1_VBLANK_STAT_EVENT = 61,
GPUPE_LB_PER_DISP1_VLINE_STAT_EVENT = 62,
GPUPE_DOUT_PER_SCL_DISP1_MODE_CHANGE_INTERRUPT_EVENT = 63,
GPUPE_DOUT_PER_LB_D1_VLINE_INTERRUPT_EVENT = 64,
GPUPE_DOUT_PER_LB_D1_VBLANK_INTERRUPT_EVENT = 65,
GPUPE_DOUT_PER_DISTIMER_INTERRUPT_EVENT = 66,
GPUPE_LOGIC1_EVENT = 256,
} DCPERFEVENT;
typedef enum
{
BIUPE_RECV_REQUESTS = 0,
BIUPE_RECV_RESPONSES = 1, // (pg data only),
BIUPE_IOC_READS = 2,
BIUPE_IOC_WRITES = 3,
BIUPE_MEM_READS = 4,
BIUPE_MEM_WRITES = 5,
BIUPE_FSB_SYNCS = 6,
BIUPE_EIEIOS = 7,
BIUPE_EOIS = 8,
BIUPE_FLUSH_ACKS = 9,
BIUPE_REQUEST_READ_DATA_BYTES_8_BYTE_AND_LESS_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER = 10,
BIUPE_REQUEST_READ_DATA_BYTES_16_BYTE_AND_MORE_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER = 11,
BIUPE_REQUEST_WRITE_DATA_BYTES_8_BYTE_AND_LESS_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER = 12,
BIUPE_REQUEST_WRITE_DATA_BYTES_16_BYTE_AND_MORE_TRANSFER_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER = 13,
BIUPE_RESERVED0 = 14,
BIUPE_XMIT_REQUESTS = 15,
BIUPE_XMIT_RESPONSES = 16,
BIUPE_READ128S = 17,
BIUPE_FLUSH_REQ_READS = 18,
BIUPE_FLUSH_REQ_WRITES = 19,
BIUPE_CPUWB = 20,
BIUPE_INTERRUPTS = 21,
BIUPE_RESPONSES_MC0 = 22,
BIUPE_RESPONSES_MC1 = 23,
BIUPE_RESPONSES_IOC_MST = 24,
BIUPE_MC0_RESPONSE_READ_DATA_BYTES_8_BYTE_AND_LESS_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER_ = 25,
BIUPE_MC0_RESPONSE_READ_DATA_BYTES_16_BYTE_AND_MORE_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER_ = 26,
BIUPE_MC1_RESPONSE_READ_DATA_BYTES_8_BYTE_AND_LESS_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER_ = 27,
BIUPE_MC1_RESPONSE_READ_DATA_BYTES_16_BYTE_AND_MORE_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER_ = 28,
BIUPE_IOC_MST_RESPONSE_READ_DATA_BYTES_8_BYTE_AND_LESS_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER_ = 29,
BIUPE_IOC_MST_RESPONSE_READ_DATA_BYTES_16_BYTE_AND_MORE_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER_ = 30,
BIUPE_RESERVED1A = 31,
BIUPE_RESERVED2A = 32,
BIUPE_RESERVED3A = 33,
BIUPE_RESERVED4A = 34,
BIUPE_RESERVED5A = 35,
BIUPE_RESERVED6A = 36,
BIUPE_RESERVED7A = 37,
BIUPE_RESERVED8A = 38,
BIUPE_RESERVED9A = 39,
BIUPE_REQUESTS = 40,
BIUPE_IOC_MAST_READS = 41,
BIUPE_IOC_MAST_WRITES = 42,
BIUPE_IOC_MAST_EOIS = 43,
BIUPE_IOC_MAST_REQUEST_READ_DATA_BYTES_8_BYTE_AND_LESS_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER = 44,
BIUPE_IOC_MAST_REQUEST_READ_DATA_BYTES_16_BYTE_AND_MORE_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER = 45,
BIUPE_IOC_MAST_REQUEST_WRITE_DATA_BYTES_8_BYTE_AND_LESS_TRANSFERS_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER = 46,
BIUPE_IOC_MAST_REQUEST_WRITE_DATA_BYTES_16_BYTE_AND_MORE_TRANSFER_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER = 47,
BIUPE_RESERVED10 = 48,
BIUPE_RESERVED11 = 49,
BIUPE_IOC_SLV_REQUESTS = 50,
BIUPE_IOC_SLV_SNOOP_READS = 51,
BIUPE_IOC_SLV_SNOOP_WRITES = 52,
BIUPE_IOC_SLV_INTERRUPTS = 53,
BIUPE_IOC_SLV_SYNCS = 54,
BIUPE_IOC_SLV_SNOOP_READS_NS = 55,
BIUPE_IOC_SLV_SNOOP_WRITES_NS = 56,
BIUPE_IOC_SLV_CPUWBS = 57,
BIUPE_IOC_SLV_REQUEST_READ_DATA_BYTES_DIVIDED_BY_4_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER_ = 58,
BIUPE_IOC_SLV_REQUEST_WRITE_DATA_BYTES_DIVIDED_BY_4_DEPENDENT_ON_SECONDARY_CONTROL_REGISTER_ = 59,
BIUPE_IOC_SLV_RESPONSES_BEATS_ = 60,
BIUPE_IOC_SLV_RESP_READ_DATA_BYTES_DIVIDED_BY_4_ = 61,
BIUPE_IOC_SLV_SYNCCLEANS = 62,
BIUPE_RESERVED12 = 63,
BIUPE_RESERVED13 = 64,
BIUPE_RESERVED14 = 65,
BIUPE_RESERVED15 = 66,
BIUPE_RESERVED16 = 67,
BIUPE_RESERVED17 = 68,
BIUPE_RESERVED18 = 69,
BIUPE_MC_0_FAST_PATH_CPU_READS = 70,
BIUPE_MC_0_ORDERED_CPU_PATH_READS = 71,
BIUPE_MC_0_ORDERED_CPU_PATH_WRITES = 72,
BIUPE_MC_0_ORDERED_IOC_PATH_READS = 73,
BIUPE_MC_0_ORDERED_IOC_PATH_WRITES = 74,
BIUPE_MC_0_SYNC_A = 75,
BIUPE_MC_0_SYNC_B = 76,
BIUPE_RESERVED19 = 77,
BIUPE_RESERVED20 = 78,
BIUPE_RESERVED21 = 79,
BIUPE_MC_1_FAST_PATH_CPU_READS = 80,
BIUPE_MC_1_ORDERED_CPU_PATH_READS = 81,
BIUPE_MC_1_ORDERED_CPU_PATH_WRITES = 82,
BIUPE_MC_1_ORDERED_IOC_PATH_READS = 83,
BIUPE_MC_1_ORDERED_IOC_PATH_WRITES = 84,
BIUPE_MC_1_SYNC_A = 85,
BIUPE_MC_1_SYNC_B = 86,
BIUPE_MCQF_NEARFULL = 100,
BIUPE_MCQDF_NEARFULL = 101,
BIUPE_IMQF_NEARFULL = 102,
BIUPE_PSF_NEARFULL = 103,
BIUPE_M0SF_NEARFULL = 104,
BIUPE_M1SF_NEARFULL = 105,
BIUPE_IMSF_NEARFULL = 106,
BIUPE_IMSDF_NEARFULL = 107,
BIUPE_ISQF_NEARFULL = 108,
BIUPE_ISSF_NEARFULL = 109,
BIUPE_ISYF_NEARFULL = 110,
BIUPE_CPYF_NEARFULL = 111,
BIUPE_PQF_NEARFULL = 112,
BIUPE_TOTAL_MEMORY_LATENCY_FAST_READS_MUST_BE_DIVIDED_BY_MEM_READ_REQUESTS_FOR_AVERAGE_LATENCY = 140,
BIUPE_TOTAL_MEMORY_LATENCY_ORDERED_CPU_READS_MUST_BE_DIVIDED_BY_MEM_READ_REQUESTS_FOR_AVERAGE_LATENCY = 141,
BIUPE_TOTAL_MEMORY_LATENCY_ORDERED_IOC_READS_MUST_BE_DIVIDED_BY_MEM_READ_REQUESTS_FOR_AVERAGE_LATENCY = 142,
BIUPE_TOTAL_FLUSH_LATENCY_FOR_SNOOP_READS_MUST_BE_DIVIED_BY_FLUSH_REQUESTS_FOR_AVERAGE_LATENCY = 143,
BIUPE_TOTAL_FLUSH_LATENCY_FOR_SNOOP_WRITES_MUST_BE_DIVIED_BY_FLUSH_REQUESTS_FOR_AVERAGE_LATENCY = 144,
BIUPE_SNOOP_COMPLETION_BUFFER_FULL = 145,
BIUPE_RESERVED62 = 146,
BIUPE_RESERVED63 = 147,
BIUPE_RESERVED64 = 148,
BIUPE_RESERVED65 = 149,
BIUPE_MC0_SLOW_CREDIT_COUNT_IS_ZERO = 150,
BIUPE_MC1_SLOW_CREDIT_COUNT_IS_ZERO = 151,
BIUPE_MC0_FAST_CREDIT_COUNT_IS_ZERO = 152,
BIUPE_MC1_FAST_CREDIT_COUNT_IS_ZERO = 153,
BIUPE_RESERVED66 = 154,
BIUPE_RESERVED67 = 155,
BIUPE_FSB_SLV_REQ_STALL_FOR_COMMAND_CREDITS = 156,
BIUPE_FSB_SLV_REQ_STALL_FOR_DATA_CREDITS = 157,
BIUPE_FSB_PG_REQ_STALL_FOR_CMD_CREDITS = 158,
BIUPE_SCLK_COUNTS_SCLKS_IN_THE_MCLK_DOMAIN_ = 200,
BIUPE_IOC_MASTER_REQ_STALL_BIU_HAS_DATA_BUT_IOC_NOT_READY_SCLK_DOMAIN_ = 201,
BIUPE_IOC_SLAVE_RSP_STALL_SCLK_DOMAIN_ = 202,
BIUPE_INCREMENT_BY_0_DISABLE_COUNT = 252,
BIUPE_INCREMENT_BY_1_USED_TO_COUNT_CLOCKS = 253,
BIUPE_INCREMENT_BY_63_USED_FOR_SIMULATION_TO_RAPIDLY_INCREMENT_COUNTER = 254,
BIUPE_OTHERS = 255, // Reserved
} BIUPERFEVENT;
typedef enum
{
IOCPE_PERF_CYCLES_WAITING_FOR_REGISTER_BUS = 0,
IOCPE_PERF_CYCLES_WAITING_FOR_BIU_SLV_REQ = 1,
IOCPE_PERF_CYCLES_WAITING_FOR_BIU_MST_CPL = 2,
IOCPE_PERF_CYCLES_WAITING_FOR_BSB_MST_REQ = 3,
IOCPE_PERF_CYCLES_WAITING_FOR_MH_SLV_REQ = 4,
IOCPE_PERF_BSB_SLV_REQ_TOTAL_REQUESTS = 5,
IOCPE_PERF_BSB_SLV_REQ_TOTAL_READS = 6,
IOCPE_PERF_BSB_SLV_REQ_TOTAL_WRITES = 7, // (includes interrupts)
IOCPE_PERF_BSB_SLV_REQ_SNOOPED_READS = 8,
IOCPE_PERF_BSB_SLV_REQ_SNOOPED_WRITES = 9, //includes interrupts, if snooped attr is true)
IOCPE_PERF_BSB_SLV_REQ_NON_SNOOPED_READS = 10,
IOCPE_PERF_BSB_SLV_REQ_NON_SNOOPED_WRITES = 11, //( includes interrupts, if non-snooped attr is true)
IOCPE_PERF_BSB_SLV_REQ_INTERRUPTS = 12,
IOCPE_PERF_BSB_SLV_REQ_TOTAL_REQUESTS_DWORDS = 13,
IOCPE_PERF_BSB_SLV_REQ_TOTAL_READS_DWORDS = 14,
IOCPE_PERF_BSB_SLV_REQ_TOTAL_WRITES_DWORDS = 15,
IOCPE_PERF_BSB_SLV_REQ_SNOOPED_READS_DWORDS = 16,
IOCPE_PERF_BSB_SLV_REQ_SNOOPED_WRITES_DWORDS = 17,
IOCPE_PERF_BSB_SLV_REQ_NON_SNOOPED_READS_DWORDS = 18,
IOCPE_PERF_BSB_SLV_REQ_NON_SNOOPED_WRITES_DWORDS = 19,
IOCPE_PERF_BSB_SLV_READ_LATENCY_CYCLES = 20, // (snooped or non-snooped dependent upon secondary control register) (used to calc average latency with # read requests)
IOCPE_PERF_BSB_SLV_CPL_TOTAL_READ_COMPLETIONS = 21,
IOCPE_PERF_BSB_SLV_CPL_SNOOPED_READ_COMPLETIONS = 22,
IOCPE_PERF_BSB_SLV_CPL_NON_SNOOPED_READ_COMPLETIONS = 23,
IOCPE_PERF_BSB_SLV_CPL_TOTAL_READ_COMPLETIONS_DWORDS = 24,
IOCPE_PERF_BSB_SLV_CPL_SNOOPED_READ_COMPLETIONS_DWORDS = 25,
IOCPE_PERF_BSB_SLV_CPL_NON_SNOOPED_READ_COMPLETIONS_DWORDS = 26,
IOCPE_PERF_BSB_SLV_CPL_URS = 27, // (counts ur's and ca's) (ca's don't happen in production mode),
IOCPE_PERF_BIF_SLV_REQ_TOTAL_REQUESTS = 28,
IOCPE_PERF_BIF_SLV_REQ_TOTAL_CP_WRITEBACKS = 29,
IOCPE_PERF_BIF_SLV_REQ_TOTAL_SNOOPED_WRITES = 30,
IOCPE_PERF_BSB_MST_REQ_TOTAL_REQUESTS = 31,
IOCPE_PERF_BSB_MST_REQ_TOTAL_READS = 32,
IOCPE_PERF_BSB_MST_REQ_TOTAL_WRITES = 33,
IOCPE_PERF_BSB_MST_REQ_TOTAL_REQUESTS_DWORDS = 34,
IOCPE_PERF_BSB_MST_REQ_TOTAL_READS_DWORDS = 35,
IOCPE_PERF_BSB_MST_REQ_TOTAL_WRITES_DWORDS = 36,
IOCPE_PERF_BSB_MST_CPL_TOTAL_COMPLETIONS = 37,
IOCPE_PERF_BSB_MST_CPL_TOTAL_COMPLETIONS_DWORDS = 38,
IOCPE_PERF_REG_BUS_REQ_TOTAL_REQUESTS = 39,
IOCPE_PERF_REG_BUS_REQ_TOTAL_READS = 40,
IOCPE_PERF_REG_BUS_REQ_TOTAL_WRITES = 41,
IOCPE_PERF_REG_BUS_CPL_READ_COMPLETIONS = 42,
IOCPE_PERF_TOTAL_SYNCS_ISSUED = 43,
IOCPE_PERF_BIU_SYNCS_ISSUED = 44,
IOCPE_PERF_MH_SYNCS_ISSUED = 45,
IOCPE_PERF_TOTAL_SYNCS_RECEIVED = 46,
IOCPE_PERF_BIU_SYNCS_RECEIVED = 47,
IOCPE_PERF_MH_SYNCS_RECEIVED = 48,
IOCPE_PERF_FIFO_BSB_MST_REQ_FULL = 49,
IOCPE_PERF_FIFO_BSB_MST_REQ_EMPTY = 50,
IOCPE_PERF_FIFO_SYNC_FULL = 51,
IOCPE_PERF_FIFO_SYNC_EMPTY = 52,
IOCPE_PERF_FIFO_INTERRUPT_FULL = 53,
IOCPE_PERF_FIFO_INTERRUPT_EMPTY = 54,
IOCPE_PERF_FIFO_REG_FULL = 55,
IOCPE_PERF_FIFO_REG_EMPTY = 56,
IOCPE_PERF_FIFO_BIU_SLV_CPL_NEARFULL = 57,
IOCPE_PERF_FIFO_BIU_SLV_CPL_EMPTY = 58,
IOCPE_PERF_FIFO_MH_SLV_CPL_FULL = 59,
IOCPE_PERF_FIFO_MH_SLV_CPL_EMPTY = 60,
IOCPE_PERF_FIFO_BSB_SLV_REQ_NEARFULL = 61,
IOCPE_PERF_FIFO_BSB_SLV_REQ_EMPTY = 62,
IOCPE_PERF_FIFO_BSB_MST_CPL_NEARFULL = 63,
IOCPE_PERF_FIFO_BSB_MST_CPL_EMPTY = 64,
IOCPE_PERF_FIFO_BIU_MST_REQ_NEARFULL = 65,
IOCPE_PERF_FIFO_BIU_MST_REQ_EMPTY = 66,
IOCPE_PERF_FIFO_BIF_SLV_REQ_NEARFULL = 67,
IOCPE_PERF_FIFO_BIF_SLV_REQ_EMPTY = 68,
IOCPE_PERF_FIFO_READ_LATENCY_FULL = 69,
IOCPE_PERF_FIFO_READ_LATENCY_EMPTY = 70,
IOCPE_PERF_NUMBER_OF_SYSTEM_CLOCKS = 71,
IOCPE_PERF_ZERO_COUNT = 72, //(disable count)
IOCPE_PERF_SIXTY_THREE_COUNT = 73, // (sim rapidly inc counter)
IOCPE_PERF_BIU_SPARE = 74, // (so dff ins don't disappear)
IOCPE_PERF_BSB_SLV_REQ_MESSAGES = 75,
IOCPE_PERF_BSB_MST_REQ_CONFIG_READS = 76, // (all 1 dword)
IOCPE_PERF_BSB_MST_REQ_CONFIG_WRITES = 77, // (all 1 dword)
IOCPE_PERF_BSB_MST_REQ_POSTED_WRITES = 78,
IOCPE_PERF_BSB_MST_REQ_POSTED_WRITES_DWORDS = 79,
IOCPE_PERF_BSB_MST_CPL_NON_POSTED_WRITES = 80, // (all 1 dword)
IOCPE_PERF_CYCLES_WAITING_FOR_INT_SYNC = 81, // (total time waiting for any type of interrupt sync)
IOCPE_PERF_CYCLES_WAITING_FOR_INT_SYNC_SNOOP_PATH = 82, // (time waiting for interrupts that only caused syncs to biu)
IOCPE_PERF_CYCLES_WAITING_FOR_INT_SYNC_NON_SNOOP_PATH = 83, // (time waiting for interrupts that only caused syncs to mh)
IOCPE_PERF_CYCLES_WAITING_FOR_INT_SYNC_BOTH_PATHS = 84, // (time waiting for interrupts that caused syncs to mh and biu)
IOCPE_PERF_CYCLES_WAITING_FOR_MST_RD_CPL_SYNC = 85, // (total time waiting for any type of mst cpl sync)
IOCPE_PERF_CYCLES_WAITING_FOR_MST_RD_CPL_SYNC_SNOOP_PATH = 86, // (time waiting for mst cpls that only caused syncs to biu)
IOCPE_PERF_CYCLES_WAITING_FOR_MST_RD_CPL_SYNC_NON_SNOOP_PATH = 87, // (time waiting for mst cpls that only caused syncs to mh)
IOCPE_PERF_CYCLES_WAITING_FOR_MST_RD_CPL_SYNC_BOTH_PATHS = 88, // (time waiting for mst cpls that caused syncs to mh and biu)
IOCPE_PERF_CYCLES_WAITING_FOR_NSR_PASS_SW = 89,
IOCPE_PERF_CYCLES_WAITING_FOR_SR_PASS_NSW = 90,
IOCPE_PERF_CYCLES_WAITING_FOR_NSW_PASS_SW = 91,
IOCPE_PERF_CYCLES_WAITING_FOR_SW_PASS_NSW = 92,
IOCPE_PERF_FIFO_MST_CPL_SYNC_TYPE_FIFO_FULL = 93,
IOCPE_PERF_FIFO_MST_CPL_SYNC_TYPE_FIFO_EMPTY = 94,
IOCPE_PERF_FIFO_BSB_SNOOP_SLV_REQ_FULL = 95,
IOCPE_PERF_FIFO_BSB_SNOOP_SLV_REQ_EMPTY = 96,
IOCPE_PERF_FIFO_BSB_SNOOP_SLV_REQ_DATA_FULL = 97,
IOCPE_PERF_FIFO_BSB_SNOOP_SLV_REQ_DATA_EMPTY = 98,
IOCPE_PERF_FIFO_READ_LATENCY_BAD_REQ_FULL = 99,
IOCPE_PERF_FIFO_READ_LATENCY_BAD_REQ_EMPTY = 100,
IOCPE_PERF_FIFO_READ_LATENCY_NON_SNOOP_CAM_ADDRESS_FULL = 101,
IOCPE_PERF_FIFO_READ_LATENCY_NON_SNOOP_CAM_ADDRESS_EMPTY = 102,
} IOCPERFEVENT;
//------------------------------------------------------------------------------
// GPU performance counter register offsets
typedef enum
{
GPUPERFREG_PA_SU_PERFCOUNTER0_SELECT = 0x0C88,
GPUPERFREG_PA_SU_PERFCOUNTER1_SELECT = 0x0C89,
GPUPERFREG_PA_SU_PERFCOUNTER2_SELECT = 0x0C8A,
GPUPERFREG_PA_SU_PERFCOUNTER3_SELECT = 0x0C8B,
GPUPERFREG_PA_SC_PERFCOUNTER0_SELECT = 0x0C98,
GPUPERFREG_PA_SC_PERFCOUNTER1_SELECT = 0x0C99,
GPUPERFREG_PA_SC_PERFCOUNTER2_SELECT = 0x0C9A,
GPUPERFREG_PA_SC_PERFCOUNTER3_SELECT = 0x0C9B,
GPUPERFREG_VGT_PERFCOUNTER0_SELECT = 0x0C48,
GPUPERFREG_VGT_PERFCOUNTER1_SELECT = 0x0C49,
GPUPERFREG_VGT_PERFCOUNTER2_SELECT = 0x0C4A,
GPUPERFREG_VGT_PERFCOUNTER3_SELECT = 0x0C4B,
GPUPERFREG_TCR_PERFCOUNTER0_SELECT = 0x0E05,
GPUPERFREG_TCR_PERFCOUNTER1_SELECT = 0x0E08,
GPUPERFREG_TP0_PERFCOUNTER0_SELECT = 0x0E1F,
GPUPERFREG_TP0_PERFCOUNTER1_SELECT = 0x0E22,
GPUPERFREG_TP1_PERFCOUNTER0_SELECT = 0x0E28,
GPUPERFREG_TP1_PERFCOUNTER1_SELECT = 0x0E2B,
GPUPERFREG_TP2_PERFCOUNTER0_SELECT = 0x0E31,
GPUPERFREG_TP2_PERFCOUNTER1_SELECT = 0x0E34,
GPUPERFREG_TP3_PERFCOUNTER0_SELECT = 0x0E3A,
GPUPERFREG_TP3_PERFCOUNTER1_SELECT = 0x0E3D,
GPUPERFREG_TCM_PERFCOUNTER0_SELECT = 0x0E54,
GPUPERFREG_TCM_PERFCOUNTER1_SELECT = 0x0E57,
GPUPERFREG_TCF_PERFCOUNTER0_SELECT = 0x0E5A,
GPUPERFREG_TCF_PERFCOUNTER1_SELECT = 0x0E5D,
GPUPERFREG_TCF_PERFCOUNTER2_SELECT = 0x0E60,
GPUPERFREG_TCF_PERFCOUNTER3_SELECT = 0x0E63,
GPUPERFREG_TCF_PERFCOUNTER4_SELECT = 0x0E66,
GPUPERFREG_TCF_PERFCOUNTER5_SELECT = 0x0E69,
GPUPERFREG_TCF_PERFCOUNTER6_SELECT = 0x0E6C,
GPUPERFREG_TCF_PERFCOUNTER7_SELECT = 0x0E6F,
GPUPERFREG_TCF_PERFCOUNTER8_SELECT = 0x0E72,
GPUPERFREG_TCF_PERFCOUNTER9_SELECT = 0x0E75,
GPUPERFREG_TCF_PERFCOUNTER10_SELECT = 0x0E78,
GPUPERFREG_TCF_PERFCOUNTER11_SELECT = 0x0E7B,
GPUPERFREG_VC_PERFCOUNTER0_SELECT = 0x0E48,
GPUPERFREG_VC_PERFCOUNTER1_SELECT = 0x0E4B,
GPUPERFREG_VC_PERFCOUNTER2_SELECT = 0x0E4E,
GPUPERFREG_VC_PERFCOUNTER3_SELECT = 0x0E51,
GPUPERFREG_SQ_PERFCOUNTER0_SELECT = 0x0DC8,
GPUPERFREG_SQ_PERFCOUNTER1_SELECT = 0x0DC9,
GPUPERFREG_SQ_PERFCOUNTER2_SELECT = 0x0DCA,
GPUPERFREG_SQ_PERFCOUNTER3_SELECT = 0x0DCB,
GPUPERFREG_SX_PERFCOUNTER0_SELECT = 0x0DD4,
GPUPERFREG_MC0_PERFCOUNTER0_SELECT = 0x0815,
GPUPERFREG_MC1_PERFCOUNTER0_SELECT = 0x0855,
GPUPERFREG_MH_PERFCOUNTER0_SELECT = 0x0A18,
GPUPERFREG_MH_PERFCOUNTER1_SELECT = 0x0A1B,
GPUPERFREG_MH_PERFCOUNTER2_SELECT = 0x0A1E,
GPUPERFREG_BIF_PERFCOUNTER0_SELECT = 0x0048,
GPUPERFREG_HZ_PERFCOUNTER0_SELECT = 0x1004,
GPUPERFREG_HZ_PERFCOUNTER1_SELECT = 0x1007,
GPUPERFREG_BC_PERFCOUNTER0_SELECT = 0x0F04,
GPUPERFREG_BC_PERFCOUNTER1_SELECT = 0x0F05,
GPUPERFREG_BC_PERFCOUNTER2_SELECT = 0x0F06,
GPUPERFREG_BC_PERFCOUNTER3_SELECT = 0x0F07,
GPUPERFREG_RBBM_PERFCOUNTER0_SELECT = 0x0395,
GPUPERFREG_RBBM_PERFCOUNTER1_SELECT = 0x0396,
GPUPERFREG_CP_PERFCOUNTER0_SELECT = 0x01E6,
GPUPERFREG_PA_SU_PERFCOUNTER0_LOW = 0x0C8C,
GPUPERFREG_PA_SU_PERFCOUNTER0_HI = 0x0C8D,
GPUPERFREG_PA_SU_PERFCOUNTER1_LOW = 0x0C8E,
GPUPERFREG_PA_SU_PERFCOUNTER1_HI = 0x0C8F,
GPUPERFREG_PA_SU_PERFCOUNTER2_LOW = 0x0C90,
GPUPERFREG_PA_SU_PERFCOUNTER2_HI = 0x0C91,
GPUPERFREG_PA_SU_PERFCOUNTER3_LOW = 0x0C92,
GPUPERFREG_PA_SU_PERFCOUNTER3_HI = 0x0C93,
GPUPERFREG_PA_SC_PERFCOUNTER0_LOW = 0x0C9C,
GPUPERFREG_PA_SC_PERFCOUNTER0_HI = 0x0C9D,
GPUPERFREG_PA_SC_PERFCOUNTER1_LOW = 0x0C9E,
GPUPERFREG_PA_SC_PERFCOUNTER1_HI = 0x0C9F,
GPUPERFREG_PA_SC_PERFCOUNTER2_LOW = 0x0CA0,
GPUPERFREG_PA_SC_PERFCOUNTER2_HI = 0x0CA1,
GPUPERFREG_PA_SC_PERFCOUNTER3_LOW = 0x0CA2,
GPUPERFREG_PA_SC_PERFCOUNTER3_HI = 0x0CA3,
GPUPERFREG_VGT_PERFCOUNTER0_LOW = 0x0C4C,
GPUPERFREG_VGT_PERFCOUNTER0_HI = 0x0C4D,
GPUPERFREG_VGT_PERFCOUNTER1_LOW = 0x0C4E,
GPUPERFREG_VGT_PERFCOUNTER1_HI = 0x0C4F,
GPUPERFREG_VGT_PERFCOUNTER2_LOW = 0x0C50,
GPUPERFREG_VGT_PERFCOUNTER2_HI = 0x0C51,
GPUPERFREG_VGT_PERFCOUNTER3_LOW = 0x0C52,
GPUPERFREG_VGT_PERFCOUNTER3_HI = 0x0C53,
GPUPERFREG_TCR_PERFCOUNTER0_LOW = 0x0E07,
GPUPERFREG_TCR_PERFCOUNTER0_HI = 0x0E06,
GPUPERFREG_TCR_PERFCOUNTER1_LOW = 0x0E0A,
GPUPERFREG_TCR_PERFCOUNTER1_HI = 0x0E09,
GPUPERFREG_TP0_PERFCOUNTER0_LOW = 0x0E21,
GPUPERFREG_TP0_PERFCOUNTER0_HI = 0x0E20,
GPUPERFREG_TP0_PERFCOUNTER1_LOW = 0x0E24,
GPUPERFREG_TP0_PERFCOUNTER1_HI = 0x0E23,
GPUPERFREG_TP1_PERFCOUNTER0_LOW = 0x0E2A,
GPUPERFREG_TP1_PERFCOUNTER0_HI = 0x0E29,
GPUPERFREG_TP1_PERFCOUNTER1_LOW = 0x0E2D,
GPUPERFREG_TP1_PERFCOUNTER1_HI = 0x0E2C,
GPUPERFREG_TP2_PERFCOUNTER0_LOW = 0x0E33,
GPUPERFREG_TP2_PERFCOUNTER0_HI = 0x0E32,
GPUPERFREG_TP2_PERFCOUNTER1_LOW = 0x0E36,
GPUPERFREG_TP2_PERFCOUNTER1_HI = 0x0E35,
GPUPERFREG_TP3_PERFCOUNTER0_LOW = 0x0E3C,
GPUPERFREG_TP3_PERFCOUNTER0_HI = 0x0E3B,
GPUPERFREG_TP3_PERFCOUNTER1_LOW = 0x0E3F,
GPUPERFREG_TP3_PERFCOUNTER1_HI = 0x0E3E,
GPUPERFREG_TCM_PERFCOUNTER0_LOW = 0x0E56,
GPUPERFREG_TCM_PERFCOUNTER0_HI = 0x0E55,
GPUPERFREG_TCM_PERFCOUNTER1_LOW = 0x0E59,
GPUPERFREG_TCM_PERFCOUNTER1_HI = 0x0E58,
GPUPERFREG_TCF_PERFCOUNTER0_LOW = 0x0E5C,
GPUPERFREG_TCF_PERFCOUNTER0_HI = 0x0E5B,
GPUPERFREG_TCF_PERFCOUNTER1_LOW = 0x0E5F,
GPUPERFREG_TCF_PERFCOUNTER1_HI = 0x0E5E,
GPUPERFREG_TCF_PERFCOUNTER2_LOW = 0x0E62,
GPUPERFREG_TCF_PERFCOUNTER2_HI = 0x0E61,
GPUPERFREG_TCF_PERFCOUNTER3_LOW = 0x0E65,
GPUPERFREG_TCF_PERFCOUNTER3_HI = 0x0E64,
GPUPERFREG_TCF_PERFCOUNTER4_LOW = 0x0E68,
GPUPERFREG_TCF_PERFCOUNTER4_HI = 0x0E67,
GPUPERFREG_TCF_PERFCOUNTER5_LOW = 0x0E6B,
GPUPERFREG_TCF_PERFCOUNTER5_HI = 0x0E6A,
GPUPERFREG_TCF_PERFCOUNTER6_LOW = 0x0E6E,
GPUPERFREG_TCF_PERFCOUNTER6_HI = 0x0E6D,
GPUPERFREG_TCF_PERFCOUNTER7_LOW = 0x0E71,
GPUPERFREG_TCF_PERFCOUNTER7_HI = 0x0E70,
GPUPERFREG_TCF_PERFCOUNTER8_LOW = 0x0E74,
GPUPERFREG_TCF_PERFCOUNTER8_HI = 0x0E73,
GPUPERFREG_TCF_PERFCOUNTER9_LOW = 0x0E77,
GPUPERFREG_TCF_PERFCOUNTER9_HI = 0x0E76,
GPUPERFREG_TCF_PERFCOUNTER10_LOW = 0x0E7A,
GPUPERFREG_TCF_PERFCOUNTER10_HI = 0x0E79,
GPUPERFREG_TCF_PERFCOUNTER11_LOW = 0x0E7D,
GPUPERFREG_TCF_PERFCOUNTER11_HI = 0x0E7C,
GPUPERFREG_VC_PERFCOUNTER0_LOW = 0x0E4A,
GPUPERFREG_VC_PERFCOUNTER0_HI = 0x0E49,
GPUPERFREG_VC_PERFCOUNTER1_LOW = 0x0E4D,
GPUPERFREG_VC_PERFCOUNTER1_HI = 0x0E4C,
GPUPERFREG_VC_PERFCOUNTER2_LOW = 0x0E50,
GPUPERFREG_VC_PERFCOUNTER2_HI = 0x0E4F,
GPUPERFREG_VC_PERFCOUNTER3_LOW = 0x0E53,
GPUPERFREG_VC_PERFCOUNTER3_HI = 0x0E52,
GPUPERFREG_SQ_PERFCOUNTER0_LOW = 0x0DCC,
GPUPERFREG_SQ_PERFCOUNTER0_HI = 0x0DCD,
GPUPERFREG_SQ_PERFCOUNTER1_LOW = 0x0DCE,
GPUPERFREG_SQ_PERFCOUNTER1_HI = 0x0DCF,
GPUPERFREG_SQ_PERFCOUNTER2_LOW = 0x0DD0,
GPUPERFREG_SQ_PERFCOUNTER2_HI = 0x0DD1,
GPUPERFREG_SQ_PERFCOUNTER3_LOW = 0x0DD2,
GPUPERFREG_SQ_PERFCOUNTER3_HI = 0x0DD3,
GPUPERFREG_SX_PERFCOUNTER0_LOW = 0x0DD8,
GPUPERFREG_SX_PERFCOUNTER0_HI = 0x0DD9,
GPUPERFREG_MC0_PERFCOUNTER0_LOW = 0x0817,
GPUPERFREG_MC0_PERFCOUNTER0_HI = 0x0816,
GPUPERFREG_MC1_PERFCOUNTER0_LOW = 0x0857,
GPUPERFREG_MC1_PERFCOUNTER0_HI = 0x0856,
GPUPERFREG_MH_PERFCOUNTER0_LOW = 0x0A1A,
GPUPERFREG_MH_PERFCOUNTER0_HI = 0x0A19,
GPUPERFREG_MH_PERFCOUNTER1_LOW = 0x0A1D,
GPUPERFREG_MH_PERFCOUNTER1_HI = 0x0A1C,
GPUPERFREG_MH_PERFCOUNTER2_LOW = 0x0A20,
GPUPERFREG_MH_PERFCOUNTER2_HI = 0x0A1F,
GPUPERFREG_BIF_PERFCOUNTER0_LOW = 0x004A,
GPUPERFREG_BIF_PERFCOUNTER0_HI = 0x0049,
GPUPERFREG_HZ_PERFCOUNTER0_LOW = 0x1006,
GPUPERFREG_HZ_PERFCOUNTER0_HI = 0x1005,
GPUPERFREG_HZ_PERFCOUNTER1_LOW = 0x1009,
GPUPERFREG_HZ_PERFCOUNTER1_HI = 0x1008,
GPUPERFREG_BC_PERFCOUNTER0_LOW = 0x0F08,
GPUPERFREG_BC_PERFCOUNTER0_HI = 0x0F09,
GPUPERFREG_BC_PERFCOUNTER1_LOW = 0x0F0A,
GPUPERFREG_BC_PERFCOUNTER1_HI = 0x0F0B,
GPUPERFREG_BC_PERFCOUNTER2_LOW = 0x0F0C,
GPUPERFREG_BC_PERFCOUNTER2_HI = 0x0F0D,
GPUPERFREG_BC_PERFCOUNTER3_LOW = 0x0F0E,
GPUPERFREG_BC_PERFCOUNTER3_HI = 0x0F0F,
GPUPERFREG_RBBM_PERFCOUNTER0_LOW = 0x0397,
GPUPERFREG_RBBM_PERFCOUNTER0_HI = 0x0398,
GPUPERFREG_RBBM_PERFCOUNTER1_LOW = 0x0399,
GPUPERFREG_RBBM_PERFCOUNTER1_HI = 0x039A,
GPUPERFREG_CP_PERFCOUNTER0_LOW = 0x01E7,
GPUPERFREG_CP_PERFCOUNTER0_HI = 0x01E8,
GPUPERFREG_CP_PERFMON_CNTL = 0x01F5,
GPUPERFREG_VGT_EVENT_INITIATOR = 0x21F9,
} GPUPERFREGISTER;
typedef enum
{
BIUPERFREG_BIU_PERFCOUNTER0_SELECT = 0x408041,
BIUPERFREG_BIU_PERFCOUNTER1_SELECT = 0x408044,
BIUPERFREG_BIU_PERFCOUNTER2_SELECT = 0x408047,
BIUPERFREG_BIU_PERFCOUNTER3_SELECT = 0x40804A,
BIUPERFREG_BIU_PERFCOUNTER0_LOW = 0x408043,
BIUPERFREG_BIU_PERFCOUNTER0_HI = 0x408042,
BIUPERFREG_BIU_PERFCOUNTER1_LOW = 0x408046,
BIUPERFREG_BIU_PERFCOUNTER1_HI = 0x408045,
BIUPERFREG_BIU_PERFCOUNTER2_LOW = 0x408049,
BIUPERFREG_BIU_PERFCOUNTER2_HI = 0x408048,
BIUPERFREG_BIU_PERFCOUNTER3_LOW = 0x40804C,
BIUPERFREG_BIU_PERFCOUNTER3_HI = 0x40804B,
BIUPERFREG_BIU_PERFMON_CNTL = 0x408040,
} BIUPERFREGISTER;
typedef enum
{
DCPERFREG_DC_PERFCOUNTER0_SELECT = 0x1FC8,
DCPERFREG_DC_PERFCOUNTER1_SELECT = 0x1FCB,
DCPERFREG_DC_PERFCOUNTER0_LOW = 0x1FCA,
DCPERFREG_DC_PERFCOUNTER0_HI = 0x1FC9,
DCPERFREG_DC_PERFCOUNTER1_LOW = 0x1FCD,
DCPERFREG_DC_PERFCOUNTER1_HI = 0x1FCC,
} DCPERFREGISTER;
typedef enum
{
IOCPERFREG_IOC_PERFCOUNTER0_SELECT = 0x410081,
IOCPERFREG_IOC_PERFCOUNTER1_SELECT = 0x410084,
IOCPERFREG_IOC_PERFCOUNTER2_SELECT = 0x410087,
IOCPERFREG_IOC_PERFCOUNTER3_SELECT = 0x41008A,
IOCPERFREG_IOC_PERFCOUNTER0_LOW = 0x410083,
IOCPERFREG_IOC_PERFCOUNTER0_HI = 0x410082,
IOCPERFREG_IOC_PERFCOUNTER1_LOW = 0x410086,
IOCPERFREG_IOC_PERFCOUNTER1_HI = 0x410085,
IOCPERFREG_IOC_PERFCOUNTER2_LOW = 0x410089,
IOCPERFREG_IOC_PERFCOUNTER2_HI = 0x410088,
IOCPERFREG_IOC_PERFCOUNTER3_LOW = 0x41008C,
IOCPERFREG_IOC_PERFCOUNTER3_HI = 0x41008B,
IOCPERFREG_IOC_PERFMON_CNTL = 0x410080,
} IOCPERFREGISTER;
//------------------------------------------------------------------------------
// GPU performance counter register defines
typedef union
{
struct {
DWORD Select : 8;
DWORD N : 8;
DWORD : 16;
};
DWORD dword;
} GPUPERFCOUNTER_SELECT;
typedef union {
struct {
DWORD Low : 32;
DWORD High : 16;
DWORD : 16;
};
ULARGE_INTEGER qword;
} GPUPERFCOUNTER_VALUE;
typedef union {
struct {
DWORD Low : 32;
DWORD High : 16;
DWORD : 16;
};
ULARGE_INTEGER qword;
} DCPERFCOUNTER_VALUE;
typedef union {
struct {
DWORD Low : 32;
DWORD High : 16;
DWORD : 16;
};
ULARGE_INTEGER qword;
} BIUPERFCOUNTER_VALUE;
typedef union {
struct {
DWORD Low : 32;
DWORD High : 16;
DWORD : 16;
};
ULARGE_INTEGER qword;
} IOCPERFCOUNTER_VALUE;
typedef union {
struct {
DWORD State : 4;
DWORD : 4;
DWORD EnableMode : 2;
DWORD : 22;
};
DWORD dword;
} GPUPERFCOUNTER_CNTL;
typedef union {
struct {
DWORD State : 4;
DWORD : 4;
DWORD EnableMode : 2;
DWORD : 22;
};
DWORD dword;
} DCPERFCOUNTER_CNTL;
typedef union {
struct {
DWORD State : 3;
DWORD : 29;
};
DWORD dword;
} BIUPERFCOUNTER_CNTL;
typedef union {
struct {
DWORD State : 4;
DWORD : 28;
};
DWORD dword;
} IOCPERFCOUNTER_CNTL;
#if defined(_M_PPCBE)
#pragma bitfield_order(pop)
#endif
#pragma warning(pop)
#ifdef __cplusplus
};
#endif
#endif /* _D3D9GPU_H_ */