diff --git a/etnaviv/etnaviv_drm.h b/etnaviv/etnaviv_drm.h index 110cc73b..0d5c49dc 100644 --- a/etnaviv/etnaviv_drm.h +++ b/etnaviv/etnaviv_drm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * Copyright (C) 2015 Etnaviv Project * @@ -54,6 +55,12 @@ struct drm_etnaviv_timespec { #define ETNAVIV_PARAM_GPU_FEATURES_4 0x07 #define ETNAVIV_PARAM_GPU_FEATURES_5 0x08 #define ETNAVIV_PARAM_GPU_FEATURES_6 0x09 +#define ETNAVIV_PARAM_GPU_FEATURES_7 0x0a +#define ETNAVIV_PARAM_GPU_FEATURES_8 0x0b +#define ETNAVIV_PARAM_GPU_FEATURES_9 0x0c +#define ETNAVIV_PARAM_GPU_FEATURES_10 0x0d +#define ETNAVIV_PARAM_GPU_FEATURES_11 0x0e +#define ETNAVIV_PARAM_GPU_FEATURES_12 0x0f #define ETNAVIV_PARAM_GPU_STREAM_COUNT 0x10 #define ETNAVIV_PARAM_GPU_REGISTER_MAX 0x11 diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index f784f248..c363b67f 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -160,6 +160,7 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_ALLOC_CTX 1 #define AMDGPU_CTX_OP_FREE_CTX 2 #define AMDGPU_CTX_OP_QUERY_STATE 3 +#define AMDGPU_CTX_OP_QUERY_STATE2 4 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -170,6 +171,13 @@ union drm_amdgpu_bo_list { /* unknown cause */ #define AMDGPU_CTX_UNKNOWN_RESET 3 +/* indicate gpu reset occured after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET (1<<0) +/* indicate vram lost occured after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1) +/* indicate some job from this context once cause gpu hang */ +#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2) + /* Context priority level */ #define AMDGPU_CTX_PRIORITY_UNSET -2048 #define AMDGPU_CTX_PRIORITY_VERY_LOW -1023 @@ -610,6 +618,8 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_SOS 0x0c /* Subquery id: Query PSP ASD firmware version */ #define AMDGPU_INFO_FW_ASD 0x0d + /* Subquery id: Query VCN firmware version */ + #define AMDGPU_INFO_FW_VCN 0x0e /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f /* the used VRAM size */ @@ -798,6 +808,7 @@ struct drm_amdgpu_info_firmware { #define AMDGPU_VRAM_TYPE_GDDR5 5 #define AMDGPU_VRAM_TYPE_HBM 6 #define AMDGPU_VRAM_TYPE_DDR3 7 +#define AMDGPU_VRAM_TYPE_DDR4 8 struct drm_amdgpu_info_device { /** PCI Device ID */ diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h index 5597a871..5f9fadbd 100644 --- a/include/drm/drm_mode.h +++ b/include/drm/drm_mode.h @@ -38,11 +38,11 @@ extern "C" { #define DRM_DISPLAY_MODE_LEN 32 #define DRM_PROP_NAME_LEN 32 -#define DRM_MODE_TYPE_BUILTIN (1<<0) -#define DRM_MODE_TYPE_CLOCK_C ((1<<1) | DRM_MODE_TYPE_BUILTIN) -#define DRM_MODE_TYPE_CRTC_C ((1<<2) | DRM_MODE_TYPE_BUILTIN) +#define DRM_MODE_TYPE_BUILTIN (1<<0) /* deprecated */ +#define DRM_MODE_TYPE_CLOCK_C ((1<<1) | DRM_MODE_TYPE_BUILTIN) /* deprecated */ +#define DRM_MODE_TYPE_CRTC_C ((1<<2) | DRM_MODE_TYPE_BUILTIN) /* deprecated */ #define DRM_MODE_TYPE_PREFERRED (1<<3) -#define DRM_MODE_TYPE_DEFAULT (1<<4) +#define DRM_MODE_TYPE_DEFAULT (1<<4) /* deprecated */ #define DRM_MODE_TYPE_USERDEF (1<<5) #define DRM_MODE_TYPE_DRIVER (1<<6) @@ -66,8 +66,8 @@ extern "C" { #define DRM_MODE_FLAG_PCSYNC (1<<7) #define DRM_MODE_FLAG_NCSYNC (1<<8) #define DRM_MODE_FLAG_HSKEW (1<<9) /* hskew provided */ -#define DRM_MODE_FLAG_BCAST (1<<10) -#define DRM_MODE_FLAG_PIXMUX (1<<11) +#define DRM_MODE_FLAG_BCAST (1<<10) /* deprecated */ +#define DRM_MODE_FLAG_PIXMUX (1<<11) /* deprecated */ #define DRM_MODE_FLAG_DBLCLK (1<<12) #define DRM_MODE_FLAG_CLKDIV2 (1<<13) /* @@ -173,6 +173,10 @@ extern "C" { DRM_MODE_REFLECT_X | \ DRM_MODE_REFLECT_Y) +/* Content Protection Flags */ +#define DRM_MODE_CONTENT_PROTECTION_UNDESIRED 0 +#define DRM_MODE_CONTENT_PROTECTION_DESIRED 1 +#define DRM_MODE_CONTENT_PROTECTION_ENABLED 2 struct drm_mode_modeinfo { __u32 clock; @@ -341,7 +345,7 @@ struct drm_mode_get_connector { __u32 pad; }; -#define DRM_MODE_PROP_PENDING (1<<0) +#define DRM_MODE_PROP_PENDING (1<<0) /* deprecated, do not use */ #define DRM_MODE_PROP_RANGE (1<<1) #define DRM_MODE_PROP_IMMUTABLE (1<<2) #define DRM_MODE_PROP_ENUM (1<<3) /* enumerated type with text strings */ @@ -576,8 +580,11 @@ struct drm_mode_crtc_lut { }; struct drm_color_ctm { - /* Conversion matrix in S31.32 format. */ - __s64 matrix[9]; + /* + * Conversion matrix in S31.32 sign-magnitude + * (not two's complement!) format. + */ + __u64 matrix[9]; }; struct drm_color_lut { diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 5ebe0462..16e452aa 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -86,6 +86,62 @@ enum i915_mocs_table_index { I915_MOCS_CACHED, }; +/* + * Different engines serve different roles, and there may be more than one + * engine serving each role. enum drm_i915_gem_engine_class provides a + * classification of the role of the engine, which may be used when requesting + * operations to be performed on a certain subset of engines, or for providing + * information about that group. + */ +enum drm_i915_gem_engine_class { + I915_ENGINE_CLASS_RENDER = 0, + I915_ENGINE_CLASS_COPY = 1, + I915_ENGINE_CLASS_VIDEO = 2, + I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, + + I915_ENGINE_CLASS_INVALID = -1 +}; + +/** + * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 + * + */ + +enum drm_i915_pmu_engine_sample { + I915_SAMPLE_BUSY = 0, + I915_SAMPLE_WAIT = 1, + I915_SAMPLE_SEMA = 2 +}; + +#define I915_PMU_SAMPLE_BITS (4) +#define I915_PMU_SAMPLE_MASK (0xf) +#define I915_PMU_SAMPLE_INSTANCE_BITS (8) +#define I915_PMU_CLASS_SHIFT \ + (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS) + +#define __I915_PMU_ENGINE(class, instance, sample) \ + ((class) << I915_PMU_CLASS_SHIFT | \ + (instance) << I915_PMU_SAMPLE_BITS | \ + (sample)) + +#define I915_PMU_ENGINE_BUSY(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY) + +#define I915_PMU_ENGINE_WAIT(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT) + +#define I915_PMU_ENGINE_SEMA(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) + +#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) + +#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) +#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) +#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) +#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) + +#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use @@ -260,6 +316,9 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 #define DRM_I915_PERF_OPEN 0x36 +#define DRM_I915_PERF_ADD_CONFIG 0x37 +#define DRM_I915_PERF_REMOVE_CONFIG 0x38 +#define DRM_I915_QUERY 0x39 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -315,6 +374,9 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) +#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) +#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) +#define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -393,10 +455,20 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_MIN_EU_IN_POOL 39 #define I915_PARAM_MMAP_GTT_VERSION 40 -/* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution * priorities and the driver will attempt to execute batches in priority order. + * The param returns a capability bitmask, nonzero implies that the scheduler + * is enabled, with different features present according to the mask. + * + * The initial priority for each batch is supplied by the context and is + * controlled via I915_CONTEXT_PARAM_PRIORITY. */ #define I915_PARAM_HAS_SCHEDULER 41 +#define I915_SCHEDULER_CAP_ENABLED (1ul << 0) +#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) +#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) + #define I915_PARAM_HUC_STATUS 42 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of @@ -412,6 +484,51 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_FENCE 44 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture + * user specified bufffers for post-mortem debugging of GPU hangs. See + * EXEC_OBJECT_CAPTURE. + */ +#define I915_PARAM_HAS_EXEC_CAPTURE 45 + +#define I915_PARAM_SLICE_MASK 46 + +/* Assuming it's uniform for each slice, this queries the mask of subslices + * per-slice for this system. + */ +#define I915_PARAM_SUBSLICE_MASK 47 + +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer + * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST. + */ +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. + */ +#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 + +/* + * Query whether every context (both per-file default and user created) is + * isolated (insofar as HW supports). If this parameter is not true, then + * freshly created contexts may inherit values from an existing context, + * rather than default HW values. If true, it also ensures (insofar as HW + * supports) that all state set by this context will not leak to any other + * context. + * + * As not every engine across every gen support contexts, the returned + * value reports the support of context isolation for individual engines by + * returning a bitmask of each engine class set to true if that class supports + * isolation. + */ +#define I915_PARAM_HAS_CONTEXT_ISOLATION 50 + +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP + * registers. This used to be fixed per platform but from CNL onwards, this + * might vary depending on the parts. + */ +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 + typedef struct drm_i915_getparam { __s32 param; /* @@ -666,6 +783,8 @@ struct drm_i915_gem_relocation_entry { #define I915_GEM_DOMAIN_VERTEX 0x00000020 /** GTT domain - aperture and scanout */ #define I915_GEM_DOMAIN_GTT 0x00000040 +/** WC domain - uncached access */ +#define I915_GEM_DOMAIN_WC 0x00000080 /** @} */ struct drm_i915_gem_exec_object { @@ -773,8 +892,15 @@ struct drm_i915_gem_exec_object2 { * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously. */ #define EXEC_OBJECT_ASYNC (1<<6) +/* Request that the contents of this execobject be copied into the error + * state upon a GPU hang involving this batch for post-mortem debugging. + * These buffers are recorded in no particular order as "user" in + * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see + * if the kernel supports this flag. + */ +#define EXEC_OBJECT_CAPTURE (1<<7) /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1) __u64 flags; union { @@ -784,6 +910,18 @@ struct drm_i915_gem_exec_object2 { __u64 rsvd2; }; +struct drm_i915_gem_exec_fence { + /** + * User's handle for a drm_syncobj to wait on or signal. + */ + __u32 handle; + +#define I915_EXEC_FENCE_WAIT (1<<0) +#define I915_EXEC_FENCE_SIGNAL (1<<1) +#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) + __u32 flags; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -798,7 +936,11 @@ struct drm_i915_gem_execbuffer2 { __u32 DR1; __u32 DR4; __u32 num_cliprects; - /** This is a struct drm_clip_rect *cliprects */ + /** + * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY + * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a + * struct drm_i915_gem_exec_fence *fences. + */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (7<<0) #define I915_EXEC_DEFAULT (0<<0) @@ -889,7 +1031,24 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_OUT (1<<17) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1)) +/* + * Traditionally the execbuf ioctl has only considered the final element in + * the execobject[] to be the executable batch. Often though, the client + * will known the batch object prior to construction and being able to place + * it into the execobject[] array first can simplify the relocation tracking. + * Setting I915_EXEC_BATCH_FIRST tells execbuf to use element 0 of the + * execobject[] as the * batch instead (the default is to use the last + * element). + */ +#define I915_EXEC_BATCH_FIRST (1<<18) + +/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr + * define an array of i915_gem_exec_fence structures which specify a set of + * dma fences to wait upon or signal. + */ +#define I915_EXEC_FENCE_ARRAY (1<<19) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ @@ -1201,7 +1360,9 @@ struct drm_intel_overlay_attrs { * active on a given plane. */ -#define I915_SET_COLORKEY_NONE (1<<0) /* disable color key matching */ +#define I915_SET_COLORKEY_NONE (1<<0) /* Deprecated. Instead set + * flags==0 to disable colorkeying. + */ #define I915_SET_COLORKEY_DESTINATION (1<<1) #define I915_SET_COLORKEY_SOURCE (1<<2) struct drm_intel_sprite_colorkey { @@ -1239,14 +1400,16 @@ struct drm_i915_reg_read { * be specified */ __u64 offset; +#define I915_REG_READ_8B_WA (1ul << 0) + __u64 val; /* Return value */ }; /* Known registers: * * Render engine timestamp - 0x2358 + 64bit - gen7+ * - Note this register returns an invalid value if using the default - * single instruction 8byte read, in order to workaround that use - * offset (0x2538 | 1) instead. + * single instruction 8byte read, in order to workaround that pass + * flag I915_REG_READ_8B_WA in offset field. * */ @@ -1289,17 +1452,26 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 #define I915_CONTEXT_PARAM_BANNABLE 0x5 +#define I915_CONTEXT_PARAM_PRIORITY 0x6 +#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ +#define I915_CONTEXT_DEFAULT_PRIORITY 0 +#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ __u64 value; }; enum drm_i915_oa_format { - I915_OA_FORMAT_A13 = 1, - I915_OA_FORMAT_A29, - I915_OA_FORMAT_A13_B8_C8, - I915_OA_FORMAT_B4_C8, - I915_OA_FORMAT_A45_B8_C8, - I915_OA_FORMAT_B4_C8_A16, - I915_OA_FORMAT_C4_B8, + I915_OA_FORMAT_A13 = 1, /* HSW only */ + I915_OA_FORMAT_A29, /* HSW only */ + I915_OA_FORMAT_A13_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8, /* HSW only */ + I915_OA_FORMAT_A45_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8_A16, /* HSW only */ + I915_OA_FORMAT_C4_B8, /* HSW+ */ + + /* Gen8+ */ + I915_OA_FORMAT_A12, + I915_OA_FORMAT_A12_B8_C8, + I915_OA_FORMAT_A32u40_A4u32_B8_C8, I915_OA_FORMAT_MAX /* non-ABI */ }; @@ -1424,6 +1596,127 @@ enum drm_i915_perf_record_type { DRM_I915_PERF_RECORD_MAX /* non-ABI */ }; +/** + * Structure to upload perf dynamic configuration into the kernel. + */ +struct drm_i915_perf_oa_config { + /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ + char uuid[36]; + + __u32 n_mux_regs; + __u32 n_boolean_regs; + __u32 n_flex_regs; + + /* + * These fields are pointers to tuples of u32 values (register address, + * value). For example the expected length of the buffer pointed by + * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). + */ + __u64 mux_regs_ptr; + __u64 boolean_regs_ptr; + __u64 flex_regs_ptr; +}; + +struct drm_i915_query_item { + __u64 query_id; +#define DRM_I915_QUERY_TOPOLOGY_INFO 1 + + /* + * When set to zero by userspace, this is filled with the size of the + * data to be written at the data_ptr pointer. The kernel sets this + * value to a negative value to signal an error on a particular query + * item. + */ + __s32 length; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * Data will be written at the location pointed by data_ptr when the + * value of length matches the length of the data to be written by the + * kernel. + */ + __u64 data_ptr; +}; + +struct drm_i915_query { + __u32 num_items; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * This points to an array of num_items drm_i915_query_item structures. + */ + __u64 items_ptr; +}; + +/* + * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO : + * + * data: contains the 3 pieces of information : + * + * - the slice mask with one bit per slice telling whether a slice is + * available. The availability of slice X can be queried with the following + * formula : + * + * (data[X / 8] >> (X % 8)) & 1 + * + * - the subslice mask for each slice with one bit per subslice telling + * whether a subslice is available. The availability of subslice Y in slice + * X can be queried with the following formula : + * + * (data[subslice_offset + + * X * subslice_stride + + * Y / 8] >> (Y % 8)) & 1 + * + * - the EU mask for each subslice in each slice with one bit per EU telling + * whether an EU is available. The availability of EU Z in subslice Y in + * slice X can be queried with the following formula : + * + * (data[eu_offset + + * (X * max_subslices + Y) * eu_stride + + * Z / 8] >> (Z % 8)) & 1 + */ +struct drm_i915_query_topology_info { + /* + * Unused for now. Must be cleared to zero. + */ + __u16 flags; + + __u16 max_slices; + __u16 max_subslices; + __u16 max_eus_per_subslice; + + /* + * Offset in data[] at which the subslice masks are stored. + */ + __u16 subslice_offset; + + /* + * Stride at which each of the subslice masks for each slice are + * stored. + */ + __u16 subslice_stride; + + /* + * Offset in data[] at which the EU masks are stored. + */ + __u16 eu_offset; + + /* + * Stride at which each of the EU masks for each subslice are stored. + */ + __u16 eu_stride; + + __u8 data[]; +}; + #if defined(__cplusplus) } #endif diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h index cb077821..d42105c8 100644 --- a/include/drm/nouveau_drm.h +++ b/include/drm/nouveau_drm.h @@ -104,6 +104,7 @@ struct drm_nouveau_setparam { #define NOUVEAU_GEM_DOMAIN_MAPPABLE (1 << 3) #define NOUVEAU_GEM_DOMAIN_COHERENT (1 << 4) +#define NOUVEAU_GEM_TILE_COMP 0x00030000 /* nv50-only */ #define NOUVEAU_GEM_TILE_LAYOUT_MASK 0x0000ff00 #define NOUVEAU_GEM_TILE_16BPP 0x00000001 #define NOUVEAU_GEM_TILE_32BPP 0x00000002 diff --git a/include/drm/vc4_drm.h b/include/drm/vc4_drm.h index 3415a4b7..4117117b 100644 --- a/include/drm/vc4_drm.h +++ b/include/drm/vc4_drm.h @@ -42,6 +42,9 @@ extern "C" { #define DRM_VC4_GET_TILING 0x09 #define DRM_VC4_LABEL_BO 0x0a #define DRM_VC4_GEM_MADVISE 0x0b +#define DRM_VC4_PERFMON_CREATE 0x0c +#define DRM_VC4_PERFMON_DESTROY 0x0d +#define DRM_VC4_PERFMON_GET_VALUES 0x0e #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) @@ -55,6 +58,9 @@ extern "C" { #define DRM_IOCTL_VC4_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling) #define DRM_IOCTL_VC4_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo) #define DRM_IOCTL_VC4_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GEM_MADVISE, struct drm_vc4_gem_madvise) +#define DRM_IOCTL_VC4_PERFMON_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_CREATE, struct drm_vc4_perfmon_create) +#define DRM_IOCTL_VC4_PERFMON_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_DESTROY, struct drm_vc4_perfmon_destroy) +#define DRM_IOCTL_VC4_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_GET_VALUES, struct drm_vc4_perfmon_get_values) struct drm_vc4_submit_rcl_surface { __u32 hindex; /* Handle index, or ~0 if not present. */ @@ -173,6 +179,15 @@ struct drm_vc4_submit_cl { * wait ioctl). */ __u64 seqno; + + /* ID of the perfmon to attach to this job. 0 means no perfmon. */ + __u32 perfmonid; + + /* Unused field to align this struct on 64 bits. Must be set to 0. + * If one ever needs to add an u32 field to this struct, this field + * can be used. + */ + __u32 pad2; }; /** @@ -308,6 +323,7 @@ struct drm_vc4_get_hang_state { #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5 #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER 6 #define DRM_VC4_PARAM_SUPPORTS_MADVISE 7 +#define DRM_VC4_PARAM_SUPPORTS_PERFMON 8 struct drm_vc4_get_param { __u32 param; @@ -352,6 +368,66 @@ struct drm_vc4_gem_madvise { __u32 pad; }; +enum { + VC4_PERFCNT_FEP_VALID_PRIMS_NO_RENDER, + VC4_PERFCNT_FEP_VALID_PRIMS_RENDER, + VC4_PERFCNT_FEP_CLIPPED_QUADS, + VC4_PERFCNT_FEP_VALID_QUADS, + VC4_PERFCNT_TLB_QUADS_NOT_PASSING_STENCIL, + VC4_PERFCNT_TLB_QUADS_NOT_PASSING_Z_AND_STENCIL, + VC4_PERFCNT_TLB_QUADS_PASSING_Z_AND_STENCIL, + VC4_PERFCNT_TLB_QUADS_ZERO_COVERAGE, + VC4_PERFCNT_TLB_QUADS_NON_ZERO_COVERAGE, + VC4_PERFCNT_TLB_QUADS_WRITTEN_TO_COLOR_BUF, + VC4_PERFCNT_PLB_PRIMS_OUTSIDE_VIEWPORT, + VC4_PERFCNT_PLB_PRIMS_NEED_CLIPPING, + VC4_PERFCNT_PSE_PRIMS_REVERSED, + VC4_PERFCNT_QPU_TOTAL_IDLE_CYCLES, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_VERTEX_COORD_SHADING, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_FRAGMENT_SHADING, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_EXEC_VALID_INST, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_TMUS, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_SCOREBOARD, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_VARYINGS, + VC4_PERFCNT_QPU_TOTAL_INST_CACHE_HIT, + VC4_PERFCNT_QPU_TOTAL_INST_CACHE_MISS, + VC4_PERFCNT_QPU_TOTAL_UNIFORM_CACHE_HIT, + VC4_PERFCNT_QPU_TOTAL_UNIFORM_CACHE_MISS, + VC4_PERFCNT_TMU_TOTAL_TEXT_QUADS_PROCESSED, + VC4_PERFCNT_TMU_TOTAL_TEXT_CACHE_MISS, + VC4_PERFCNT_VPM_TOTAL_CLK_CYCLES_VDW_STALLED, + VC4_PERFCNT_VPM_TOTAL_CLK_CYCLES_VCD_STALLED, + VC4_PERFCNT_L2C_TOTAL_L2_CACHE_HIT, + VC4_PERFCNT_L2C_TOTAL_L2_CACHE_MISS, + VC4_PERFCNT_NUM_EVENTS, +}; + +#define DRM_VC4_MAX_PERF_COUNTERS 16 + +struct drm_vc4_perfmon_create { + __u32 id; + __u32 ncounters; + __u8 events[DRM_VC4_MAX_PERF_COUNTERS]; +}; + +struct drm_vc4_perfmon_destroy { + __u32 id; +}; + +/* + * Returns the values of the performance counters tracked by this + * perfmon (as an array of ncounters u64 values). + * + * No implicit synchronization is performed, so the user has to + * guarantee that any jobs using this perfmon have already been + * completed (probably by blocking on the seqno returned by the + * last exec that used the perfmon). + */ +struct drm_vc4_perfmon_get_values { + __u32 id; + __u64 values_ptr; +}; + #if defined(__cplusplus) } #endif diff --git a/include/drm/virtgpu_drm.h b/include/drm/virtgpu_drm.h index 91a31ffe..9a781f06 100644 --- a/include/drm/virtgpu_drm.h +++ b/include/drm/virtgpu_drm.h @@ -63,6 +63,7 @@ struct drm_virtgpu_execbuffer { }; #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ +#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */ struct drm_virtgpu_getparam { __u64 param; diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h index d325a410..0bc784f5 100644 --- a/include/drm/vmwgfx_drm.h +++ b/include/drm/vmwgfx_drm.h @@ -41,6 +41,7 @@ extern "C" { #define DRM_VMW_GET_PARAM 0 #define DRM_VMW_ALLOC_DMABUF 1 #define DRM_VMW_UNREF_DMABUF 2 +#define DRM_VMW_HANDLE_CLOSE 2 #define DRM_VMW_CURSOR_BYPASS 3 /* guarded by DRM_VMW_PARAM_NUM_STREAMS != 0*/ #define DRM_VMW_CONTROL_STREAM 4 @@ -296,13 +297,17 @@ union drm_vmw_surface_reference_arg { * @version: Allows expanding the execbuf ioctl parameters without breaking * backwards compatibility, since user-space will always tell the kernel * which version it uses. - * @flags: Execbuf flags. None currently. + * @flags: Execbuf flags. + * @imported_fence_fd: FD for a fence imported from another device * * Argument to the DRM_VMW_EXECBUF Ioctl. */ #define DRM_VMW_EXECBUF_VERSION 2 +#define DRM_VMW_EXECBUF_FLAG_IMPORT_FENCE_FD (1 << 0) +#define DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD (1 << 1) + struct drm_vmw_execbuf_arg { __u64 commands; __u32 command_size; @@ -311,7 +316,7 @@ struct drm_vmw_execbuf_arg { __u32 version; __u32 flags; __u32 context_handle; - __u32 pad64; + __s32 imported_fence_fd; }; /** @@ -327,6 +332,7 @@ struct drm_vmw_execbuf_arg { * @passed_seqno: The highest seqno number processed by the hardware * so far. This can be used to mark user-space fence objects as signaled, and * to determine whether a fence seqno might be stale. + * @fd: FD associated with the fence, -1 if not exported * @error: This member should've been set to -EFAULT on submission. * The following actions should be take on completion: * error == -EFAULT: Fence communication failed. The host is synchronized. @@ -344,7 +350,7 @@ struct drm_vmw_fence_rep { __u32 mask; __u32 seqno; __u32 passed_seqno; - __u32 pad64; + __s32 fd; __s32 error; }; @@ -1092,6 +1098,29 @@ union drm_vmw_extended_context_arg { struct drm_vmw_context_arg rep; }; +/*************************************************************************/ +/* + * DRM_VMW_HANDLE_CLOSE - Close a user-space handle and release its + * underlying resource. + * + * Note that this ioctl is overlaid on the DRM_VMW_UNREF_DMABUF Ioctl. + * The ioctl arguments therefore need to be identical in layout. + * + */ + +/** + * struct drm_vmw_handle_close_arg + * + * @handle: Handle to close. + * + * Argument to the DRM_VMW_HANDLE_CLOSE Ioctl. + */ +struct drm_vmw_handle_close_arg { + __u32 handle; + __u32 pad64; +}; + + #if defined(__cplusplus) } #endif