1 /* 2 * Copyright © 2014-2018 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #ifndef _V3D_DRM_H_ 25 #define _V3D_DRM_H_ 26 27 #include "drm.h" 28 29 #if defined(__cplusplus) 30 extern "C" { 31 #endif 32 33 #define DRM_V3D_SUBMIT_CL 0x00 34 #define DRM_V3D_WAIT_BO 0x01 35 #define DRM_V3D_CREATE_BO 0x02 36 #define DRM_V3D_MMAP_BO 0x03 37 #define DRM_V3D_GET_PARAM 0x04 38 #define DRM_V3D_GET_BO_OFFSET 0x05 39 #define DRM_V3D_SUBMIT_TFU 0x06 40 #define DRM_V3D_SUBMIT_CSD 0x07 41 #define DRM_V3D_PERFMON_CREATE 0x08 42 #define DRM_V3D_PERFMON_DESTROY 0x09 43 #define DRM_V3D_PERFMON_GET_VALUES 0x0a 44 45 #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) 46 #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) 47 #define DRM_IOCTL_V3D_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_CREATE_BO, struct drm_v3d_create_bo) 48 #define DRM_IOCTL_V3D_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo) 49 #define DRM_IOCTL_V3D_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param) 50 #define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset) 51 #define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu) 52 #define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd) 53 #define DRM_IOCTL_V3D_PERFMON_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_CREATE, \ 54 struct drm_v3d_perfmon_create) 55 #define DRM_IOCTL_V3D_PERFMON_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_DESTROY, \ 56 struct drm_v3d_perfmon_destroy) 57 #define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \ 58 struct drm_v3d_perfmon_get_values) 59 60 #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 61 #define DRM_V3D_SUBMIT_EXTENSION 0x02 62 63 /* struct drm_v3d_extension - ioctl extensions 64 * 65 * Linked-list of generic extensions where the id identify which struct is 66 * pointed by ext_data. Therefore, DRM_V3D_EXT_ID_* is used on id to identify 67 * the extension type. 68 */ 69 struct drm_v3d_extension { 70 __u64 next; 71 __u32 id; 72 #define DRM_V3D_EXT_ID_MULTI_SYNC 0x01 73 __u32 flags; /* mbz */ 74 }; 75 76 /* struct drm_v3d_sem - wait/signal semaphore 77 * 78 * If binary semaphore, it only takes syncobj handle and ignores flags and 79 * point fields. Point is defined for timeline syncobj feature. 80 */ 81 struct drm_v3d_sem { 82 __u32 handle; /* syncobj */ 83 /* rsv below, for future uses */ 84 __u32 flags; 85 __u64 point; /* for timeline sem support */ 86 __u64 mbz[2]; /* must be zero, rsv */ 87 }; 88 89 /* Enum for each of the V3D queues. */ 90 enum v3d_queue { 91 V3D_BIN, 92 V3D_RENDER, 93 V3D_TFU, 94 V3D_CSD, 95 V3D_CACHE_CLEAN, 96 }; 97 98 /** 99 * struct drm_v3d_multi_sync - ioctl extension to add support multiples 100 * syncobjs for commands submission. 101 * 102 * When an extension of DRM_V3D_EXT_ID_MULTI_SYNC id is defined, it points to 103 * this extension to define wait and signal dependencies, instead of single 104 * in/out sync entries on submitting commands. The field flags is used to 105 * determine the stage to set wait dependencies. 106 */ 107 struct drm_v3d_multi_sync { 108 struct drm_v3d_extension base; 109 /* Array of wait and signal semaphores */ 110 __u64 in_syncs; 111 __u64 out_syncs; 112 113 /* Number of entries */ 114 __u32 in_sync_count; 115 __u32 out_sync_count; 116 117 /* set the stage (v3d_queue) to sync */ 118 __u32 wait_stage; 119 120 __u32 pad; /* mbz */ 121 }; 122 123 /** 124 * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D 125 * engine. 126 * 127 * This asks the kernel to have the GPU execute an optional binner 128 * command list, and a render command list. 129 * 130 * The L1T, slice, L2C, L2T, and GCA caches will be flushed before 131 * each CL executes. The VCD cache should be flushed (if necessary) 132 * by the submitted CLs. The TLB writes are guaranteed to have been 133 * flushed by the time the render done IRQ happens, which is the 134 * trigger for out_sync. Any dirtying of cachelines by the job (only 135 * possible using TMU writes) must be flushed by the caller using the 136 * DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG flag. 137 */ 138 struct drm_v3d_submit_cl { 139 /* Pointer to the binner command list. 140 * 141 * This is the first set of commands executed, which runs the 142 * coordinate shader to determine where primitives land on the screen, 143 * then writes out the state updates and draw calls necessary per tile 144 * to the tile allocation BO. 145 * 146 * This BCL will block on any previous BCL submitted on the 147 * same FD, but not on any RCL or BCLs submitted by other 148 * clients -- that is left up to the submitter to control 149 * using in_sync_bcl if necessary. 150 */ 151 __u32 bcl_start; 152 153 /** End address of the BCL (first byte after the BCL) */ 154 __u32 bcl_end; 155 156 /* Offset of the render command list. 157 * 158 * This is the second set of commands executed, which will either 159 * execute the tiles that have been set up by the BCL, or a fixed set 160 * of tiles (in the case of RCL-only blits). 161 * 162 * This RCL will block on this submit's BCL, and any previous 163 * RCL submitted on the same FD, but not on any RCL or BCLs 164 * submitted by other clients -- that is left up to the 165 * submitter to control using in_sync_rcl if necessary. 166 */ 167 __u32 rcl_start; 168 169 /** End address of the RCL (first byte after the RCL) */ 170 __u32 rcl_end; 171 172 /** An optional sync object to wait on before starting the BCL. */ 173 __u32 in_sync_bcl; 174 /** An optional sync object to wait on before starting the RCL. */ 175 __u32 in_sync_rcl; 176 /** An optional sync object to place the completion fence in. */ 177 __u32 out_sync; 178 179 /* Offset of the tile alloc memory 180 * 181 * This is optional on V3D 3.3 (where the CL can set the value) but 182 * required on V3D 4.1. 183 */ 184 __u32 qma; 185 186 /** Size of the tile alloc memory. */ 187 __u32 qms; 188 189 /** Offset of the tile state data array. */ 190 __u32 qts; 191 192 /* Pointer to a u32 array of the BOs that are referenced by the job. 193 */ 194 __u64 bo_handles; 195 196 /* Number of BO handles passed in (size is that times 4). */ 197 __u32 bo_handle_count; 198 199 /* DRM_V3D_SUBMIT_* properties */ 200 __u32 flags; 201 202 /* ID of the perfmon to attach to this job. 0 means no perfmon. */ 203 __u32 perfmon_id; 204 205 __u32 pad; 206 207 /* Pointer to an array of ioctl extensions*/ 208 __u64 extensions; 209 }; 210 211 /** 212 * struct drm_v3d_wait_bo - ioctl argument for waiting for 213 * completion of the last DRM_V3D_SUBMIT_CL on a BO. 214 * 215 * This is useful for cases where multiple processes might be 216 * rendering to a BO and you want to wait for all rendering to be 217 * completed. 218 */ 219 struct drm_v3d_wait_bo { 220 __u32 handle; 221 __u32 pad; 222 __u64 timeout_ns; 223 }; 224 225 /** 226 * struct drm_v3d_create_bo - ioctl argument for creating V3D BOs. 227 * 228 * There are currently no values for the flags argument, but it may be 229 * used in a future extension. 230 */ 231 struct drm_v3d_create_bo { 232 __u32 size; 233 __u32 flags; 234 /** Returned GEM handle for the BO. */ 235 __u32 handle; 236 /** 237 * Returned offset for the BO in the V3D address space. This offset 238 * is private to the DRM fd and is valid for the lifetime of the GEM 239 * handle. 240 * 241 * This offset value will always be nonzero, since various HW 242 * units treat 0 specially. 243 */ 244 __u32 offset; 245 }; 246 247 /** 248 * struct drm_v3d_mmap_bo - ioctl argument for mapping V3D BOs. 249 * 250 * This doesn't actually perform an mmap. Instead, it returns the 251 * offset you need to use in an mmap on the DRM device node. This 252 * means that tools like valgrind end up knowing about the mapped 253 * memory. 254 * 255 * There are currently no values for the flags argument, but it may be 256 * used in a future extension. 257 */ 258 struct drm_v3d_mmap_bo { 259 /** Handle for the object being mapped. */ 260 __u32 handle; 261 __u32 flags; 262 /** offset into the drm node to use for subsequent mmap call. */ 263 __u64 offset; 264 }; 265 266 enum drm_v3d_param { 267 DRM_V3D_PARAM_V3D_UIFCFG, 268 DRM_V3D_PARAM_V3D_HUB_IDENT1, 269 DRM_V3D_PARAM_V3D_HUB_IDENT2, 270 DRM_V3D_PARAM_V3D_HUB_IDENT3, 271 DRM_V3D_PARAM_V3D_CORE0_IDENT0, 272 DRM_V3D_PARAM_V3D_CORE0_IDENT1, 273 DRM_V3D_PARAM_V3D_CORE0_IDENT2, 274 DRM_V3D_PARAM_SUPPORTS_TFU, 275 DRM_V3D_PARAM_SUPPORTS_CSD, 276 DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH, 277 DRM_V3D_PARAM_SUPPORTS_PERFMON, 278 DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, 279 }; 280 281 struct drm_v3d_get_param { 282 __u32 param; 283 __u32 pad; 284 __u64 value; 285 }; 286 287 /** 288 * Returns the offset for the BO in the V3D address space for this DRM fd. 289 * This is the same value returned by drm_v3d_create_bo, if that was called 290 * from this DRM fd. 291 */ 292 struct drm_v3d_get_bo_offset { 293 __u32 handle; 294 __u32 offset; 295 }; 296 297 struct drm_v3d_submit_tfu { 298 __u32 icfg; 299 __u32 iia; 300 __u32 iis; 301 __u32 ica; 302 __u32 iua; 303 __u32 ioa; 304 __u32 ios; 305 __u32 coef[4]; 306 /* First handle is the output BO, following are other inputs. 307 * 0 for unused. 308 */ 309 __u32 bo_handles[4]; 310 /* sync object to block on before running the TFU job. Each TFU 311 * job will execute in the order submitted to its FD. Synchronization 312 * against rendering jobs requires using sync objects. 313 */ 314 __u32 in_sync; 315 /* Sync object to signal when the TFU job is done. */ 316 __u32 out_sync; 317 318 __u32 flags; 319 320 /* Pointer to an array of ioctl extensions*/ 321 __u64 extensions; 322 }; 323 324 /* Submits a compute shader for dispatch. This job will block on any 325 * previous compute shaders submitted on this fd, and any other 326 * synchronization must be performed with in_sync/out_sync. 327 */ 328 struct drm_v3d_submit_csd { 329 __u32 cfg[7]; 330 __u32 coef[4]; 331 332 /* Pointer to a u32 array of the BOs that are referenced by the job. 333 */ 334 __u64 bo_handles; 335 336 /* Number of BO handles passed in (size is that times 4). */ 337 __u32 bo_handle_count; 338 339 /* sync object to block on before running the CSD job. Each 340 * CSD job will execute in the order submitted to its FD. 341 * Synchronization against rendering/TFU jobs or CSD from 342 * other fds requires using sync objects. 343 */ 344 __u32 in_sync; 345 /* Sync object to signal when the CSD job is done. */ 346 __u32 out_sync; 347 348 /* ID of the perfmon to attach to this job. 0 means no perfmon. */ 349 __u32 perfmon_id; 350 351 /* Pointer to an array of ioctl extensions*/ 352 __u64 extensions; 353 354 __u32 flags; 355 356 __u32 pad; 357 }; 358 359 enum { 360 V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS, 361 V3D_PERFCNT_FEP_VALID_PRIMS, 362 V3D_PERFCNT_FEP_EZ_NFCLIP_QUADS, 363 V3D_PERFCNT_FEP_VALID_QUADS, 364 V3D_PERFCNT_TLB_QUADS_STENCIL_FAIL, 365 V3D_PERFCNT_TLB_QUADS_STENCILZ_FAIL, 366 V3D_PERFCNT_TLB_QUADS_STENCILZ_PASS, 367 V3D_PERFCNT_TLB_QUADS_ZERO_COV, 368 V3D_PERFCNT_TLB_QUADS_NONZERO_COV, 369 V3D_PERFCNT_TLB_QUADS_WRITTEN, 370 V3D_PERFCNT_PTB_PRIM_VIEWPOINT_DISCARD, 371 V3D_PERFCNT_PTB_PRIM_CLIP, 372 V3D_PERFCNT_PTB_PRIM_REV, 373 V3D_PERFCNT_QPU_IDLE_CYCLES, 374 V3D_PERFCNT_QPU_ACTIVE_CYCLES_VERTEX_COORD_USER, 375 V3D_PERFCNT_QPU_ACTIVE_CYCLES_FRAG, 376 V3D_PERFCNT_QPU_CYCLES_VALID_INSTR, 377 V3D_PERFCNT_QPU_CYCLES_TMU_STALL, 378 V3D_PERFCNT_QPU_CYCLES_SCOREBOARD_STALL, 379 V3D_PERFCNT_QPU_CYCLES_VARYINGS_STALL, 380 V3D_PERFCNT_QPU_IC_HIT, 381 V3D_PERFCNT_QPU_IC_MISS, 382 V3D_PERFCNT_QPU_UC_HIT, 383 V3D_PERFCNT_QPU_UC_MISS, 384 V3D_PERFCNT_TMU_TCACHE_ACCESS, 385 V3D_PERFCNT_TMU_TCACHE_MISS, 386 V3D_PERFCNT_VPM_VDW_STALL, 387 V3D_PERFCNT_VPM_VCD_STALL, 388 V3D_PERFCNT_BIN_ACTIVE, 389 V3D_PERFCNT_RDR_ACTIVE, 390 V3D_PERFCNT_L2T_HITS, 391 V3D_PERFCNT_L2T_MISSES, 392 V3D_PERFCNT_CYCLE_COUNT, 393 V3D_PERFCNT_QPU_CYCLES_STALLED_VERTEX_COORD_USER, 394 V3D_PERFCNT_QPU_CYCLES_STALLED_FRAGMENT, 395 V3D_PERFCNT_PTB_PRIMS_BINNED, 396 V3D_PERFCNT_AXI_WRITES_WATCH_0, 397 V3D_PERFCNT_AXI_READS_WATCH_0, 398 V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_0, 399 V3D_PERFCNT_AXI_READ_STALLS_WATCH_0, 400 V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_0, 401 V3D_PERFCNT_AXI_READ_BYTES_WATCH_0, 402 V3D_PERFCNT_AXI_WRITES_WATCH_1, 403 V3D_PERFCNT_AXI_READS_WATCH_1, 404 V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_1, 405 V3D_PERFCNT_AXI_READ_STALLS_WATCH_1, 406 V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_1, 407 V3D_PERFCNT_AXI_READ_BYTES_WATCH_1, 408 V3D_PERFCNT_TLB_PARTIAL_QUADS, 409 V3D_PERFCNT_TMU_CONFIG_ACCESSES, 410 V3D_PERFCNT_L2T_NO_ID_STALL, 411 V3D_PERFCNT_L2T_COM_QUE_STALL, 412 V3D_PERFCNT_L2T_TMU_WRITES, 413 V3D_PERFCNT_TMU_ACTIVE_CYCLES, 414 V3D_PERFCNT_TMU_STALLED_CYCLES, 415 V3D_PERFCNT_CLE_ACTIVE, 416 V3D_PERFCNT_L2T_TMU_READS, 417 V3D_PERFCNT_L2T_CLE_READS, 418 V3D_PERFCNT_L2T_VCD_READS, 419 V3D_PERFCNT_L2T_TMUCFG_READS, 420 V3D_PERFCNT_L2T_SLC0_READS, 421 V3D_PERFCNT_L2T_SLC1_READS, 422 V3D_PERFCNT_L2T_SLC2_READS, 423 V3D_PERFCNT_L2T_TMU_W_MISSES, 424 V3D_PERFCNT_L2T_TMU_R_MISSES, 425 V3D_PERFCNT_L2T_CLE_MISSES, 426 V3D_PERFCNT_L2T_VCD_MISSES, 427 V3D_PERFCNT_L2T_TMUCFG_MISSES, 428 V3D_PERFCNT_L2T_SLC0_MISSES, 429 V3D_PERFCNT_L2T_SLC1_MISSES, 430 V3D_PERFCNT_L2T_SLC2_MISSES, 431 V3D_PERFCNT_CORE_MEM_WRITES, 432 V3D_PERFCNT_L2T_MEM_WRITES, 433 V3D_PERFCNT_PTB_MEM_WRITES, 434 V3D_PERFCNT_TLB_MEM_WRITES, 435 V3D_PERFCNT_CORE_MEM_READS, 436 V3D_PERFCNT_L2T_MEM_READS, 437 V3D_PERFCNT_PTB_MEM_READS, 438 V3D_PERFCNT_PSE_MEM_READS, 439 V3D_PERFCNT_TLB_MEM_READS, 440 V3D_PERFCNT_GMP_MEM_READS, 441 V3D_PERFCNT_PTB_W_MEM_WORDS, 442 V3D_PERFCNT_TLB_W_MEM_WORDS, 443 V3D_PERFCNT_PSE_R_MEM_WORDS, 444 V3D_PERFCNT_TLB_R_MEM_WORDS, 445 V3D_PERFCNT_TMU_MRU_HITS, 446 V3D_PERFCNT_COMPUTE_ACTIVE, 447 V3D_PERFCNT_NUM, 448 }; 449 450 #define DRM_V3D_MAX_PERF_COUNTERS 32 451 452 struct drm_v3d_perfmon_create { 453 __u32 id; 454 __u32 ncounters; 455 __u8 counters[DRM_V3D_MAX_PERF_COUNTERS]; 456 }; 457 458 struct drm_v3d_perfmon_destroy { 459 __u32 id; 460 }; 461 462 /* 463 * Returns the values of the performance counters tracked by this 464 * perfmon (as an array of ncounters u64 values). 465 * 466 * No implicit synchronization is performed, so the user has to 467 * guarantee that any jobs using this perfmon have already been 468 * completed (probably by blocking on the seqno returned by the 469 * last exec that used the perfmon). 470 */ 471 struct drm_v3d_perfmon_get_values { 472 __u32 id; 473 __u32 pad; 474 __u64 values_ptr; 475 }; 476 477 #if defined(__cplusplus) 478 } 479 #endif 480 481 #endif /* _V3D_DRM_H_ */ 482