xref: /openbmc/linux/include/uapi/drm/v3d_drm.h (revision 03ab8e6297acd1bc0eedaa050e2a1635c576fd11)
1  /*
2   * Copyright © 2014-2018 Broadcom
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8   * and/or sell copies of the Software, and to permit persons to whom the
9   * Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice (including the next
12   * paragraph) shall be included in all copies or substantial portions of the
13   * Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21   * IN THE SOFTWARE.
22   */
23  
24  #ifndef _V3D_DRM_H_
25  #define _V3D_DRM_H_
26  
27  #include "drm.h"
28  
29  #if defined(__cplusplus)
30  extern "C" {
31  #endif
32  
33  #define DRM_V3D_SUBMIT_CL                         0x00
34  #define DRM_V3D_WAIT_BO                           0x01
35  #define DRM_V3D_CREATE_BO                         0x02
36  #define DRM_V3D_MMAP_BO                           0x03
37  #define DRM_V3D_GET_PARAM                         0x04
38  #define DRM_V3D_GET_BO_OFFSET                     0x05
39  #define DRM_V3D_SUBMIT_TFU                        0x06
40  #define DRM_V3D_SUBMIT_CSD                        0x07
41  #define DRM_V3D_PERFMON_CREATE                    0x08
42  #define DRM_V3D_PERFMON_DESTROY                   0x09
43  #define DRM_V3D_PERFMON_GET_VALUES                0x0a
44  
45  #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
46  #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
47  #define DRM_IOCTL_V3D_CREATE_BO           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_CREATE_BO, struct drm_v3d_create_bo)
48  #define DRM_IOCTL_V3D_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo)
49  #define DRM_IOCTL_V3D_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
50  #define DRM_IOCTL_V3D_GET_BO_OFFSET       DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
51  #define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
52  #define DRM_IOCTL_V3D_SUBMIT_CSD          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
53  #define DRM_IOCTL_V3D_PERFMON_CREATE      DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_CREATE, \
54  						   struct drm_v3d_perfmon_create)
55  #define DRM_IOCTL_V3D_PERFMON_DESTROY     DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_DESTROY, \
56  						   struct drm_v3d_perfmon_destroy)
57  #define DRM_IOCTL_V3D_PERFMON_GET_VALUES  DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \
58  						   struct drm_v3d_perfmon_get_values)
59  
60  #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE             0x01
61  #define DRM_V3D_SUBMIT_EXTENSION		  0x02
62  
63  /* struct drm_v3d_extension - ioctl extensions
64   *
65   * Linked-list of generic extensions where the id identify which struct is
66   * pointed by ext_data. Therefore, DRM_V3D_EXT_ID_* is used on id to identify
67   * the extension type.
68   */
69  struct drm_v3d_extension {
70  	__u64 next;
71  	__u32 id;
72  #define DRM_V3D_EXT_ID_MULTI_SYNC		0x01
73  	__u32 flags; /* mbz */
74  };
75  
76  /* struct drm_v3d_sem - wait/signal semaphore
77   *
78   * If binary semaphore, it only takes syncobj handle and ignores flags and
79   * point fields. Point is defined for timeline syncobj feature.
80   */
81  struct drm_v3d_sem {
82  	__u32 handle; /* syncobj */
83  	/* rsv below, for future uses */
84  	__u32 flags;
85  	__u64 point;  /* for timeline sem support */
86  	__u64 mbz[2]; /* must be zero, rsv */
87  };
88  
89  /* Enum for each of the V3D queues. */
90  enum v3d_queue {
91  	V3D_BIN,
92  	V3D_RENDER,
93  	V3D_TFU,
94  	V3D_CSD,
95  	V3D_CACHE_CLEAN,
96  };
97  
98  /**
99   * struct drm_v3d_multi_sync - ioctl extension to add support multiples
100   * syncobjs for commands submission.
101   *
102   * When an extension of DRM_V3D_EXT_ID_MULTI_SYNC id is defined, it points to
103   * this extension to define wait and signal dependencies, instead of single
104   * in/out sync entries on submitting commands. The field flags is used to
105   * determine the stage to set wait dependencies.
106   */
107  struct drm_v3d_multi_sync {
108  	struct drm_v3d_extension base;
109  	/* Array of wait and signal semaphores */
110  	__u64 in_syncs;
111  	__u64 out_syncs;
112  
113  	/* Number of entries */
114  	__u32 in_sync_count;
115  	__u32 out_sync_count;
116  
117  	/* set the stage (v3d_queue) to sync */
118  	__u32 wait_stage;
119  
120  	__u32 pad; /* mbz */
121  };
122  
123  /**
124   * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
125   * engine.
126   *
127   * This asks the kernel to have the GPU execute an optional binner
128   * command list, and a render command list.
129   *
130   * The L1T, slice, L2C, L2T, and GCA caches will be flushed before
131   * each CL executes.  The VCD cache should be flushed (if necessary)
132   * by the submitted CLs.  The TLB writes are guaranteed to have been
133   * flushed by the time the render done IRQ happens, which is the
134   * trigger for out_sync.  Any dirtying of cachelines by the job (only
135   * possible using TMU writes) must be flushed by the caller using the
136   * DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG flag.
137   */
138  struct drm_v3d_submit_cl {
139  	/* Pointer to the binner command list.
140  	 *
141  	 * This is the first set of commands executed, which runs the
142  	 * coordinate shader to determine where primitives land on the screen,
143  	 * then writes out the state updates and draw calls necessary per tile
144  	 * to the tile allocation BO.
145  	 *
146  	 * This BCL will block on any previous BCL submitted on the
147  	 * same FD, but not on any RCL or BCLs submitted by other
148  	 * clients -- that is left up to the submitter to control
149  	 * using in_sync_bcl if necessary.
150  	 */
151  	__u32 bcl_start;
152  
153  	/** End address of the BCL (first byte after the BCL) */
154  	__u32 bcl_end;
155  
156  	/* Offset of the render command list.
157  	 *
158  	 * This is the second set of commands executed, which will either
159  	 * execute the tiles that have been set up by the BCL, or a fixed set
160  	 * of tiles (in the case of RCL-only blits).
161  	 *
162  	 * This RCL will block on this submit's BCL, and any previous
163  	 * RCL submitted on the same FD, but not on any RCL or BCLs
164  	 * submitted by other clients -- that is left up to the
165  	 * submitter to control using in_sync_rcl if necessary.
166  	 */
167  	__u32 rcl_start;
168  
169  	/** End address of the RCL (first byte after the RCL) */
170  	__u32 rcl_end;
171  
172  	/** An optional sync object to wait on before starting the BCL. */
173  	__u32 in_sync_bcl;
174  	/** An optional sync object to wait on before starting the RCL. */
175  	__u32 in_sync_rcl;
176  	/** An optional sync object to place the completion fence in. */
177  	__u32 out_sync;
178  
179  	/* Offset of the tile alloc memory
180  	 *
181  	 * This is optional on V3D 3.3 (where the CL can set the value) but
182  	 * required on V3D 4.1.
183  	 */
184  	__u32 qma;
185  
186  	/** Size of the tile alloc memory. */
187  	__u32 qms;
188  
189  	/** Offset of the tile state data array. */
190  	__u32 qts;
191  
192  	/* Pointer to a u32 array of the BOs that are referenced by the job.
193  	 */
194  	__u64 bo_handles;
195  
196  	/* Number of BO handles passed in (size is that times 4). */
197  	__u32 bo_handle_count;
198  
199  	/* DRM_V3D_SUBMIT_* properties */
200  	__u32 flags;
201  
202  	/* ID of the perfmon to attach to this job. 0 means no perfmon. */
203  	__u32 perfmon_id;
204  
205  	__u32 pad;
206  
207  	/* Pointer to an array of ioctl extensions*/
208  	__u64 extensions;
209  };
210  
211  /**
212   * struct drm_v3d_wait_bo - ioctl argument for waiting for
213   * completion of the last DRM_V3D_SUBMIT_CL on a BO.
214   *
215   * This is useful for cases where multiple processes might be
216   * rendering to a BO and you want to wait for all rendering to be
217   * completed.
218   */
219  struct drm_v3d_wait_bo {
220  	__u32 handle;
221  	__u32 pad;
222  	__u64 timeout_ns;
223  };
224  
225  /**
226   * struct drm_v3d_create_bo - ioctl argument for creating V3D BOs.
227   *
228   * There are currently no values for the flags argument, but it may be
229   * used in a future extension.
230   */
231  struct drm_v3d_create_bo {
232  	__u32 size;
233  	__u32 flags;
234  	/** Returned GEM handle for the BO. */
235  	__u32 handle;
236  	/**
237  	 * Returned offset for the BO in the V3D address space.  This offset
238  	 * is private to the DRM fd and is valid for the lifetime of the GEM
239  	 * handle.
240  	 *
241  	 * This offset value will always be nonzero, since various HW
242  	 * units treat 0 specially.
243  	 */
244  	__u32 offset;
245  };
246  
247  /**
248   * struct drm_v3d_mmap_bo - ioctl argument for mapping V3D BOs.
249   *
250   * This doesn't actually perform an mmap.  Instead, it returns the
251   * offset you need to use in an mmap on the DRM device node.  This
252   * means that tools like valgrind end up knowing about the mapped
253   * memory.
254   *
255   * There are currently no values for the flags argument, but it may be
256   * used in a future extension.
257   */
258  struct drm_v3d_mmap_bo {
259  	/** Handle for the object being mapped. */
260  	__u32 handle;
261  	__u32 flags;
262  	/** offset into the drm node to use for subsequent mmap call. */
263  	__u64 offset;
264  };
265  
266  enum drm_v3d_param {
267  	DRM_V3D_PARAM_V3D_UIFCFG,
268  	DRM_V3D_PARAM_V3D_HUB_IDENT1,
269  	DRM_V3D_PARAM_V3D_HUB_IDENT2,
270  	DRM_V3D_PARAM_V3D_HUB_IDENT3,
271  	DRM_V3D_PARAM_V3D_CORE0_IDENT0,
272  	DRM_V3D_PARAM_V3D_CORE0_IDENT1,
273  	DRM_V3D_PARAM_V3D_CORE0_IDENT2,
274  	DRM_V3D_PARAM_SUPPORTS_TFU,
275  	DRM_V3D_PARAM_SUPPORTS_CSD,
276  	DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH,
277  	DRM_V3D_PARAM_SUPPORTS_PERFMON,
278  	DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT,
279  };
280  
281  struct drm_v3d_get_param {
282  	__u32 param;
283  	__u32 pad;
284  	__u64 value;
285  };
286  
287  /**
288   * Returns the offset for the BO in the V3D address space for this DRM fd.
289   * This is the same value returned by drm_v3d_create_bo, if that was called
290   * from this DRM fd.
291   */
292  struct drm_v3d_get_bo_offset {
293  	__u32 handle;
294  	__u32 offset;
295  };
296  
297  struct drm_v3d_submit_tfu {
298  	__u32 icfg;
299  	__u32 iia;
300  	__u32 iis;
301  	__u32 ica;
302  	__u32 iua;
303  	__u32 ioa;
304  	__u32 ios;
305  	__u32 coef[4];
306  	/* First handle is the output BO, following are other inputs.
307  	 * 0 for unused.
308  	 */
309  	__u32 bo_handles[4];
310  	/* sync object to block on before running the TFU job.  Each TFU
311  	 * job will execute in the order submitted to its FD.  Synchronization
312  	 * against rendering jobs requires using sync objects.
313  	 */
314  	__u32 in_sync;
315  	/* Sync object to signal when the TFU job is done. */
316  	__u32 out_sync;
317  
318  	__u32 flags;
319  
320  	/* Pointer to an array of ioctl extensions*/
321  	__u64 extensions;
322  };
323  
324  /* Submits a compute shader for dispatch.  This job will block on any
325   * previous compute shaders submitted on this fd, and any other
326   * synchronization must be performed with in_sync/out_sync.
327   */
328  struct drm_v3d_submit_csd {
329  	__u32 cfg[7];
330  	__u32 coef[4];
331  
332  	/* Pointer to a u32 array of the BOs that are referenced by the job.
333  	 */
334  	__u64 bo_handles;
335  
336  	/* Number of BO handles passed in (size is that times 4). */
337  	__u32 bo_handle_count;
338  
339  	/* sync object to block on before running the CSD job.  Each
340  	 * CSD job will execute in the order submitted to its FD.
341  	 * Synchronization against rendering/TFU jobs or CSD from
342  	 * other fds requires using sync objects.
343  	 */
344  	__u32 in_sync;
345  	/* Sync object to signal when the CSD job is done. */
346  	__u32 out_sync;
347  
348  	/* ID of the perfmon to attach to this job. 0 means no perfmon. */
349  	__u32 perfmon_id;
350  
351  	/* Pointer to an array of ioctl extensions*/
352  	__u64 extensions;
353  
354  	__u32 flags;
355  
356  	__u32 pad;
357  };
358  
359  enum {
360  	V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS,
361  	V3D_PERFCNT_FEP_VALID_PRIMS,
362  	V3D_PERFCNT_FEP_EZ_NFCLIP_QUADS,
363  	V3D_PERFCNT_FEP_VALID_QUADS,
364  	V3D_PERFCNT_TLB_QUADS_STENCIL_FAIL,
365  	V3D_PERFCNT_TLB_QUADS_STENCILZ_FAIL,
366  	V3D_PERFCNT_TLB_QUADS_STENCILZ_PASS,
367  	V3D_PERFCNT_TLB_QUADS_ZERO_COV,
368  	V3D_PERFCNT_TLB_QUADS_NONZERO_COV,
369  	V3D_PERFCNT_TLB_QUADS_WRITTEN,
370  	V3D_PERFCNT_PTB_PRIM_VIEWPOINT_DISCARD,
371  	V3D_PERFCNT_PTB_PRIM_CLIP,
372  	V3D_PERFCNT_PTB_PRIM_REV,
373  	V3D_PERFCNT_QPU_IDLE_CYCLES,
374  	V3D_PERFCNT_QPU_ACTIVE_CYCLES_VERTEX_COORD_USER,
375  	V3D_PERFCNT_QPU_ACTIVE_CYCLES_FRAG,
376  	V3D_PERFCNT_QPU_CYCLES_VALID_INSTR,
377  	V3D_PERFCNT_QPU_CYCLES_TMU_STALL,
378  	V3D_PERFCNT_QPU_CYCLES_SCOREBOARD_STALL,
379  	V3D_PERFCNT_QPU_CYCLES_VARYINGS_STALL,
380  	V3D_PERFCNT_QPU_IC_HIT,
381  	V3D_PERFCNT_QPU_IC_MISS,
382  	V3D_PERFCNT_QPU_UC_HIT,
383  	V3D_PERFCNT_QPU_UC_MISS,
384  	V3D_PERFCNT_TMU_TCACHE_ACCESS,
385  	V3D_PERFCNT_TMU_TCACHE_MISS,
386  	V3D_PERFCNT_VPM_VDW_STALL,
387  	V3D_PERFCNT_VPM_VCD_STALL,
388  	V3D_PERFCNT_BIN_ACTIVE,
389  	V3D_PERFCNT_RDR_ACTIVE,
390  	V3D_PERFCNT_L2T_HITS,
391  	V3D_PERFCNT_L2T_MISSES,
392  	V3D_PERFCNT_CYCLE_COUNT,
393  	V3D_PERFCNT_QPU_CYCLES_STALLED_VERTEX_COORD_USER,
394  	V3D_PERFCNT_QPU_CYCLES_STALLED_FRAGMENT,
395  	V3D_PERFCNT_PTB_PRIMS_BINNED,
396  	V3D_PERFCNT_AXI_WRITES_WATCH_0,
397  	V3D_PERFCNT_AXI_READS_WATCH_0,
398  	V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_0,
399  	V3D_PERFCNT_AXI_READ_STALLS_WATCH_0,
400  	V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_0,
401  	V3D_PERFCNT_AXI_READ_BYTES_WATCH_0,
402  	V3D_PERFCNT_AXI_WRITES_WATCH_1,
403  	V3D_PERFCNT_AXI_READS_WATCH_1,
404  	V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_1,
405  	V3D_PERFCNT_AXI_READ_STALLS_WATCH_1,
406  	V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_1,
407  	V3D_PERFCNT_AXI_READ_BYTES_WATCH_1,
408  	V3D_PERFCNT_TLB_PARTIAL_QUADS,
409  	V3D_PERFCNT_TMU_CONFIG_ACCESSES,
410  	V3D_PERFCNT_L2T_NO_ID_STALL,
411  	V3D_PERFCNT_L2T_COM_QUE_STALL,
412  	V3D_PERFCNT_L2T_TMU_WRITES,
413  	V3D_PERFCNT_TMU_ACTIVE_CYCLES,
414  	V3D_PERFCNT_TMU_STALLED_CYCLES,
415  	V3D_PERFCNT_CLE_ACTIVE,
416  	V3D_PERFCNT_L2T_TMU_READS,
417  	V3D_PERFCNT_L2T_CLE_READS,
418  	V3D_PERFCNT_L2T_VCD_READS,
419  	V3D_PERFCNT_L2T_TMUCFG_READS,
420  	V3D_PERFCNT_L2T_SLC0_READS,
421  	V3D_PERFCNT_L2T_SLC1_READS,
422  	V3D_PERFCNT_L2T_SLC2_READS,
423  	V3D_PERFCNT_L2T_TMU_W_MISSES,
424  	V3D_PERFCNT_L2T_TMU_R_MISSES,
425  	V3D_PERFCNT_L2T_CLE_MISSES,
426  	V3D_PERFCNT_L2T_VCD_MISSES,
427  	V3D_PERFCNT_L2T_TMUCFG_MISSES,
428  	V3D_PERFCNT_L2T_SLC0_MISSES,
429  	V3D_PERFCNT_L2T_SLC1_MISSES,
430  	V3D_PERFCNT_L2T_SLC2_MISSES,
431  	V3D_PERFCNT_CORE_MEM_WRITES,
432  	V3D_PERFCNT_L2T_MEM_WRITES,
433  	V3D_PERFCNT_PTB_MEM_WRITES,
434  	V3D_PERFCNT_TLB_MEM_WRITES,
435  	V3D_PERFCNT_CORE_MEM_READS,
436  	V3D_PERFCNT_L2T_MEM_READS,
437  	V3D_PERFCNT_PTB_MEM_READS,
438  	V3D_PERFCNT_PSE_MEM_READS,
439  	V3D_PERFCNT_TLB_MEM_READS,
440  	V3D_PERFCNT_GMP_MEM_READS,
441  	V3D_PERFCNT_PTB_W_MEM_WORDS,
442  	V3D_PERFCNT_TLB_W_MEM_WORDS,
443  	V3D_PERFCNT_PSE_R_MEM_WORDS,
444  	V3D_PERFCNT_TLB_R_MEM_WORDS,
445  	V3D_PERFCNT_TMU_MRU_HITS,
446  	V3D_PERFCNT_COMPUTE_ACTIVE,
447  	V3D_PERFCNT_NUM,
448  };
449  
450  #define DRM_V3D_MAX_PERF_COUNTERS                 32
451  
452  struct drm_v3d_perfmon_create {
453  	__u32 id;
454  	__u32 ncounters;
455  	__u8 counters[DRM_V3D_MAX_PERF_COUNTERS];
456  };
457  
458  struct drm_v3d_perfmon_destroy {
459  	__u32 id;
460  };
461  
462  /*
463   * Returns the values of the performance counters tracked by this
464   * perfmon (as an array of ncounters u64 values).
465   *
466   * No implicit synchronization is performed, so the user has to
467   * guarantee that any jobs using this perfmon have already been
468   * completed  (probably by blocking on the seqno returned by the
469   * last exec that used the perfmon).
470   */
471  struct drm_v3d_perfmon_get_values {
472  	__u32 id;
473  	__u32 pad;
474  	__u64 values_ptr;
475  };
476  
477  #if defined(__cplusplus)
478  }
479  #endif
480  
481  #endif /* _V3D_DRM_H_ */
482