1 /* 2 * Copyright © 2014-2018 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #ifndef _V3D_DRM_H_ 25 #define _V3D_DRM_H_ 26 27 #include "drm.h" 28 29 #if defined(__cplusplus) 30 extern "C" { 31 #endif 32 33 #define DRM_V3D_SUBMIT_CL 0x00 34 #define DRM_V3D_WAIT_BO 0x01 35 #define DRM_V3D_CREATE_BO 0x02 36 #define DRM_V3D_MMAP_BO 0x03 37 #define DRM_V3D_GET_PARAM 0x04 38 #define DRM_V3D_GET_BO_OFFSET 0x05 39 #define DRM_V3D_SUBMIT_TFU 0x06 40 #define DRM_V3D_SUBMIT_CSD 0x07 41 #define DRM_V3D_PERFMON_CREATE 0x08 42 #define DRM_V3D_PERFMON_DESTROY 0x09 43 #define DRM_V3D_PERFMON_GET_VALUES 0x0a 44 45 #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) 46 #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) 47 #define DRM_IOCTL_V3D_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_CREATE_BO, struct drm_v3d_create_bo) 48 #define DRM_IOCTL_V3D_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo) 49 #define DRM_IOCTL_V3D_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param) 50 #define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset) 51 #define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu) 52 #define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd) 53 #define DRM_IOCTL_V3D_PERFMON_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_CREATE, \ 54 struct drm_v3d_perfmon_create) 55 #define DRM_IOCTL_V3D_PERFMON_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_DESTROY, \ 56 struct drm_v3d_perfmon_destroy) 57 #define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \ 58 struct drm_v3d_perfmon_get_values) 59 60 #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 61 62 /** 63 * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D 64 * engine. 65 * 66 * This asks the kernel to have the GPU execute an optional binner 67 * command list, and a render command list. 68 * 69 * The L1T, slice, L2C, L2T, and GCA caches will be flushed before 70 * each CL executes. The VCD cache should be flushed (if necessary) 71 * by the submitted CLs. The TLB writes are guaranteed to have been 72 * flushed by the time the render done IRQ happens, which is the 73 * trigger for out_sync. Any dirtying of cachelines by the job (only 74 * possible using TMU writes) must be flushed by the caller using the 75 * DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG flag. 76 */ 77 struct drm_v3d_submit_cl { 78 /* Pointer to the binner command list. 79 * 80 * This is the first set of commands executed, which runs the 81 * coordinate shader to determine where primitives land on the screen, 82 * then writes out the state updates and draw calls necessary per tile 83 * to the tile allocation BO. 84 * 85 * This BCL will block on any previous BCL submitted on the 86 * same FD, but not on any RCL or BCLs submitted by other 87 * clients -- that is left up to the submitter to control 88 * using in_sync_bcl if necessary. 89 */ 90 __u32 bcl_start; 91 92 /** End address of the BCL (first byte after the BCL) */ 93 __u32 bcl_end; 94 95 /* Offset of the render command list. 96 * 97 * This is the second set of commands executed, which will either 98 * execute the tiles that have been set up by the BCL, or a fixed set 99 * of tiles (in the case of RCL-only blits). 100 * 101 * This RCL will block on this submit's BCL, and any previous 102 * RCL submitted on the same FD, but not on any RCL or BCLs 103 * submitted by other clients -- that is left up to the 104 * submitter to control using in_sync_rcl if necessary. 105 */ 106 __u32 rcl_start; 107 108 /** End address of the RCL (first byte after the RCL) */ 109 __u32 rcl_end; 110 111 /** An optional sync object to wait on before starting the BCL. */ 112 __u32 in_sync_bcl; 113 /** An optional sync object to wait on before starting the RCL. */ 114 __u32 in_sync_rcl; 115 /** An optional sync object to place the completion fence in. */ 116 __u32 out_sync; 117 118 /* Offset of the tile alloc memory 119 * 120 * This is optional on V3D 3.3 (where the CL can set the value) but 121 * required on V3D 4.1. 122 */ 123 __u32 qma; 124 125 /** Size of the tile alloc memory. */ 126 __u32 qms; 127 128 /** Offset of the tile state data array. */ 129 __u32 qts; 130 131 /* Pointer to a u32 array of the BOs that are referenced by the job. 132 */ 133 __u64 bo_handles; 134 135 /* Number of BO handles passed in (size is that times 4). */ 136 __u32 bo_handle_count; 137 138 __u32 flags; 139 140 /* ID of the perfmon to attach to this job. 0 means no perfmon. */ 141 __u32 perfmon_id; 142 143 __u32 pad; 144 }; 145 146 /** 147 * struct drm_v3d_wait_bo - ioctl argument for waiting for 148 * completion of the last DRM_V3D_SUBMIT_CL on a BO. 149 * 150 * This is useful for cases where multiple processes might be 151 * rendering to a BO and you want to wait for all rendering to be 152 * completed. 153 */ 154 struct drm_v3d_wait_bo { 155 __u32 handle; 156 __u32 pad; 157 __u64 timeout_ns; 158 }; 159 160 /** 161 * struct drm_v3d_create_bo - ioctl argument for creating V3D BOs. 162 * 163 * There are currently no values for the flags argument, but it may be 164 * used in a future extension. 165 */ 166 struct drm_v3d_create_bo { 167 __u32 size; 168 __u32 flags; 169 /** Returned GEM handle for the BO. */ 170 __u32 handle; 171 /** 172 * Returned offset for the BO in the V3D address space. This offset 173 * is private to the DRM fd and is valid for the lifetime of the GEM 174 * handle. 175 * 176 * This offset value will always be nonzero, since various HW 177 * units treat 0 specially. 178 */ 179 __u32 offset; 180 }; 181 182 /** 183 * struct drm_v3d_mmap_bo - ioctl argument for mapping V3D BOs. 184 * 185 * This doesn't actually perform an mmap. Instead, it returns the 186 * offset you need to use in an mmap on the DRM device node. This 187 * means that tools like valgrind end up knowing about the mapped 188 * memory. 189 * 190 * There are currently no values for the flags argument, but it may be 191 * used in a future extension. 192 */ 193 struct drm_v3d_mmap_bo { 194 /** Handle for the object being mapped. */ 195 __u32 handle; 196 __u32 flags; 197 /** offset into the drm node to use for subsequent mmap call. */ 198 __u64 offset; 199 }; 200 201 enum drm_v3d_param { 202 DRM_V3D_PARAM_V3D_UIFCFG, 203 DRM_V3D_PARAM_V3D_HUB_IDENT1, 204 DRM_V3D_PARAM_V3D_HUB_IDENT2, 205 DRM_V3D_PARAM_V3D_HUB_IDENT3, 206 DRM_V3D_PARAM_V3D_CORE0_IDENT0, 207 DRM_V3D_PARAM_V3D_CORE0_IDENT1, 208 DRM_V3D_PARAM_V3D_CORE0_IDENT2, 209 DRM_V3D_PARAM_SUPPORTS_TFU, 210 DRM_V3D_PARAM_SUPPORTS_CSD, 211 DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH, 212 DRM_V3D_PARAM_SUPPORTS_PERFMON, 213 }; 214 215 struct drm_v3d_get_param { 216 __u32 param; 217 __u32 pad; 218 __u64 value; 219 }; 220 221 /** 222 * Returns the offset for the BO in the V3D address space for this DRM fd. 223 * This is the same value returned by drm_v3d_create_bo, if that was called 224 * from this DRM fd. 225 */ 226 struct drm_v3d_get_bo_offset { 227 __u32 handle; 228 __u32 offset; 229 }; 230 231 struct drm_v3d_submit_tfu { 232 __u32 icfg; 233 __u32 iia; 234 __u32 iis; 235 __u32 ica; 236 __u32 iua; 237 __u32 ioa; 238 __u32 ios; 239 __u32 coef[4]; 240 /* First handle is the output BO, following are other inputs. 241 * 0 for unused. 242 */ 243 __u32 bo_handles[4]; 244 /* sync object to block on before running the TFU job. Each TFU 245 * job will execute in the order submitted to its FD. Synchronization 246 * against rendering jobs requires using sync objects. 247 */ 248 __u32 in_sync; 249 /* Sync object to signal when the TFU job is done. */ 250 __u32 out_sync; 251 }; 252 253 /* Submits a compute shader for dispatch. This job will block on any 254 * previous compute shaders submitted on this fd, and any other 255 * synchronization must be performed with in_sync/out_sync. 256 */ 257 struct drm_v3d_submit_csd { 258 __u32 cfg[7]; 259 __u32 coef[4]; 260 261 /* Pointer to a u32 array of the BOs that are referenced by the job. 262 */ 263 __u64 bo_handles; 264 265 /* Number of BO handles passed in (size is that times 4). */ 266 __u32 bo_handle_count; 267 268 /* sync object to block on before running the CSD job. Each 269 * CSD job will execute in the order submitted to its FD. 270 * Synchronization against rendering/TFU jobs or CSD from 271 * other fds requires using sync objects. 272 */ 273 __u32 in_sync; 274 /* Sync object to signal when the CSD job is done. */ 275 __u32 out_sync; 276 277 /* ID of the perfmon to attach to this job. 0 means no perfmon. */ 278 __u32 perfmon_id; 279 }; 280 281 enum { 282 V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS, 283 V3D_PERFCNT_FEP_VALID_PRIMS, 284 V3D_PERFCNT_FEP_EZ_NFCLIP_QUADS, 285 V3D_PERFCNT_FEP_VALID_QUADS, 286 V3D_PERFCNT_TLB_QUADS_STENCIL_FAIL, 287 V3D_PERFCNT_TLB_QUADS_STENCILZ_FAIL, 288 V3D_PERFCNT_TLB_QUADS_STENCILZ_PASS, 289 V3D_PERFCNT_TLB_QUADS_ZERO_COV, 290 V3D_PERFCNT_TLB_QUADS_NONZERO_COV, 291 V3D_PERFCNT_TLB_QUADS_WRITTEN, 292 V3D_PERFCNT_PTB_PRIM_VIEWPOINT_DISCARD, 293 V3D_PERFCNT_PTB_PRIM_CLIP, 294 V3D_PERFCNT_PTB_PRIM_REV, 295 V3D_PERFCNT_QPU_IDLE_CYCLES, 296 V3D_PERFCNT_QPU_ACTIVE_CYCLES_VERTEX_COORD_USER, 297 V3D_PERFCNT_QPU_ACTIVE_CYCLES_FRAG, 298 V3D_PERFCNT_QPU_CYCLES_VALID_INSTR, 299 V3D_PERFCNT_QPU_CYCLES_TMU_STALL, 300 V3D_PERFCNT_QPU_CYCLES_SCOREBOARD_STALL, 301 V3D_PERFCNT_QPU_CYCLES_VARYINGS_STALL, 302 V3D_PERFCNT_QPU_IC_HIT, 303 V3D_PERFCNT_QPU_IC_MISS, 304 V3D_PERFCNT_QPU_UC_HIT, 305 V3D_PERFCNT_QPU_UC_MISS, 306 V3D_PERFCNT_TMU_TCACHE_ACCESS, 307 V3D_PERFCNT_TMU_TCACHE_MISS, 308 V3D_PERFCNT_VPM_VDW_STALL, 309 V3D_PERFCNT_VPM_VCD_STALL, 310 V3D_PERFCNT_BIN_ACTIVE, 311 V3D_PERFCNT_RDR_ACTIVE, 312 V3D_PERFCNT_L2T_HITS, 313 V3D_PERFCNT_L2T_MISSES, 314 V3D_PERFCNT_CYCLE_COUNT, 315 V3D_PERFCNT_QPU_CYCLES_STALLED_VERTEX_COORD_USER, 316 V3D_PERFCNT_QPU_CYCLES_STALLED_FRAGMENT, 317 V3D_PERFCNT_PTB_PRIMS_BINNED, 318 V3D_PERFCNT_AXI_WRITES_WATCH_0, 319 V3D_PERFCNT_AXI_READS_WATCH_0, 320 V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_0, 321 V3D_PERFCNT_AXI_READ_STALLS_WATCH_0, 322 V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_0, 323 V3D_PERFCNT_AXI_READ_BYTES_WATCH_0, 324 V3D_PERFCNT_AXI_WRITES_WATCH_1, 325 V3D_PERFCNT_AXI_READS_WATCH_1, 326 V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_1, 327 V3D_PERFCNT_AXI_READ_STALLS_WATCH_1, 328 V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_1, 329 V3D_PERFCNT_AXI_READ_BYTES_WATCH_1, 330 V3D_PERFCNT_TLB_PARTIAL_QUADS, 331 V3D_PERFCNT_TMU_CONFIG_ACCESSES, 332 V3D_PERFCNT_L2T_NO_ID_STALL, 333 V3D_PERFCNT_L2T_COM_QUE_STALL, 334 V3D_PERFCNT_L2T_TMU_WRITES, 335 V3D_PERFCNT_TMU_ACTIVE_CYCLES, 336 V3D_PERFCNT_TMU_STALLED_CYCLES, 337 V3D_PERFCNT_CLE_ACTIVE, 338 V3D_PERFCNT_L2T_TMU_READS, 339 V3D_PERFCNT_L2T_CLE_READS, 340 V3D_PERFCNT_L2T_VCD_READS, 341 V3D_PERFCNT_L2T_TMUCFG_READS, 342 V3D_PERFCNT_L2T_SLC0_READS, 343 V3D_PERFCNT_L2T_SLC1_READS, 344 V3D_PERFCNT_L2T_SLC2_READS, 345 V3D_PERFCNT_L2T_TMU_W_MISSES, 346 V3D_PERFCNT_L2T_TMU_R_MISSES, 347 V3D_PERFCNT_L2T_CLE_MISSES, 348 V3D_PERFCNT_L2T_VCD_MISSES, 349 V3D_PERFCNT_L2T_TMUCFG_MISSES, 350 V3D_PERFCNT_L2T_SLC0_MISSES, 351 V3D_PERFCNT_L2T_SLC1_MISSES, 352 V3D_PERFCNT_L2T_SLC2_MISSES, 353 V3D_PERFCNT_CORE_MEM_WRITES, 354 V3D_PERFCNT_L2T_MEM_WRITES, 355 V3D_PERFCNT_PTB_MEM_WRITES, 356 V3D_PERFCNT_TLB_MEM_WRITES, 357 V3D_PERFCNT_CORE_MEM_READS, 358 V3D_PERFCNT_L2T_MEM_READS, 359 V3D_PERFCNT_PTB_MEM_READS, 360 V3D_PERFCNT_PSE_MEM_READS, 361 V3D_PERFCNT_TLB_MEM_READS, 362 V3D_PERFCNT_GMP_MEM_READS, 363 V3D_PERFCNT_PTB_W_MEM_WORDS, 364 V3D_PERFCNT_TLB_W_MEM_WORDS, 365 V3D_PERFCNT_PSE_R_MEM_WORDS, 366 V3D_PERFCNT_TLB_R_MEM_WORDS, 367 V3D_PERFCNT_TMU_MRU_HITS, 368 V3D_PERFCNT_COMPUTE_ACTIVE, 369 V3D_PERFCNT_NUM, 370 }; 371 372 #define DRM_V3D_MAX_PERF_COUNTERS 32 373 374 struct drm_v3d_perfmon_create { 375 __u32 id; 376 __u32 ncounters; 377 __u8 counters[DRM_V3D_MAX_PERF_COUNTERS]; 378 }; 379 380 struct drm_v3d_perfmon_destroy { 381 __u32 id; 382 }; 383 384 /* 385 * Returns the values of the performance counters tracked by this 386 * perfmon (as an array of ncounters u64 values). 387 * 388 * No implicit synchronization is performed, so the user has to 389 * guarantee that any jobs using this perfmon have already been 390 * completed (probably by blocking on the seqno returned by the 391 * last exec that used the perfmon). 392 */ 393 struct drm_v3d_perfmon_get_values { 394 __u32 id; 395 __u32 pad; 396 __u64 values_ptr; 397 }; 398 399 #if defined(__cplusplus) 400 } 401 #endif 402 403 #endif /* _V3D_DRM_H_ */ 404