1 // SPDX-License-Identifier: GPL-2.0+ 2 /* Copyright (C) 2015-2018 Broadcom */ 3 4 #include <linux/mm_types.h> 5 #include <drm/drmP.h> 6 #include <drm/drm_encoder.h> 7 #include <drm/drm_gem.h> 8 #include <drm/drm_gem_shmem_helper.h> 9 #include <drm/gpu_scheduler.h> 10 #include "uapi/drm/v3d_drm.h" 11 12 #define GMP_GRANULARITY (128 * 1024) 13 14 /* Enum for each of the V3D queues. */ 15 enum v3d_queue { 16 V3D_BIN, 17 V3D_RENDER, 18 V3D_TFU, 19 }; 20 21 #define V3D_MAX_QUEUES (V3D_TFU + 1) 22 23 struct v3d_queue_state { 24 struct drm_gpu_scheduler sched; 25 26 u64 fence_context; 27 u64 emit_seqno; 28 }; 29 30 struct v3d_dev { 31 struct drm_device drm; 32 33 /* Short representation (e.g. 33, 41) of the V3D tech version 34 * and revision. 35 */ 36 int ver; 37 bool single_irq_line; 38 39 struct device *dev; 40 struct platform_device *pdev; 41 void __iomem *hub_regs; 42 void __iomem *core_regs[3]; 43 void __iomem *bridge_regs; 44 void __iomem *gca_regs; 45 struct clk *clk; 46 struct reset_control *reset; 47 48 /* Virtual and DMA addresses of the single shared page table. */ 49 volatile u32 *pt; 50 dma_addr_t pt_paddr; 51 52 /* Virtual and DMA addresses of the MMU's scratch page. When 53 * a read or write is invalid in the MMU, it will be 54 * redirected here. 55 */ 56 void *mmu_scratch; 57 dma_addr_t mmu_scratch_paddr; 58 59 /* Number of V3D cores. */ 60 u32 cores; 61 62 /* Allocator managing the address space. All units are in 63 * number of pages. 64 */ 65 struct drm_mm mm; 66 spinlock_t mm_lock; 67 68 struct work_struct overflow_mem_work; 69 70 struct v3d_exec_info *bin_job; 71 struct v3d_exec_info *render_job; 72 struct v3d_tfu_job *tfu_job; 73 74 struct v3d_queue_state queue[V3D_MAX_QUEUES]; 75 76 /* Spinlock used to synchronize the overflow memory 77 * management against bin job submission. 78 */ 79 spinlock_t job_lock; 80 81 /* Protects bo_stats */ 82 struct mutex bo_lock; 83 84 /* Lock taken when resetting the GPU, to keep multiple 85 * processes from trying to park the scheduler threads and 86 * reset at once. 87 */ 88 struct mutex reset_lock; 89 90 /* Lock taken when creating and pushing the GPU scheduler 91 * jobs, to keep the sched-fence seqnos in order. 92 */ 93 struct mutex sched_lock; 94 95 struct { 96 u32 num_allocated; 97 u32 pages_allocated; 98 } bo_stats; 99 }; 100 101 static inline struct v3d_dev * 102 to_v3d_dev(struct drm_device *dev) 103 { 104 return (struct v3d_dev *)dev->dev_private; 105 } 106 107 /* The per-fd struct, which tracks the MMU mappings. */ 108 struct v3d_file_priv { 109 struct v3d_dev *v3d; 110 111 struct drm_sched_entity sched_entity[V3D_MAX_QUEUES]; 112 }; 113 114 struct v3d_bo { 115 struct drm_gem_shmem_object base; 116 117 struct drm_mm_node node; 118 119 /* List entry for the BO's position in 120 * v3d_exec_info->unref_list 121 */ 122 struct list_head unref_head; 123 }; 124 125 static inline struct v3d_bo * 126 to_v3d_bo(struct drm_gem_object *bo) 127 { 128 return (struct v3d_bo *)bo; 129 } 130 131 struct v3d_fence { 132 struct dma_fence base; 133 struct drm_device *dev; 134 /* v3d seqno for signaled() test */ 135 u64 seqno; 136 enum v3d_queue queue; 137 }; 138 139 static inline struct v3d_fence * 140 to_v3d_fence(struct dma_fence *fence) 141 { 142 return (struct v3d_fence *)fence; 143 } 144 145 #define V3D_READ(offset) readl(v3d->hub_regs + offset) 146 #define V3D_WRITE(offset, val) writel(val, v3d->hub_regs + offset) 147 148 #define V3D_BRIDGE_READ(offset) readl(v3d->bridge_regs + offset) 149 #define V3D_BRIDGE_WRITE(offset, val) writel(val, v3d->bridge_regs + offset) 150 151 #define V3D_GCA_READ(offset) readl(v3d->gca_regs + offset) 152 #define V3D_GCA_WRITE(offset, val) writel(val, v3d->gca_regs + offset) 153 154 #define V3D_CORE_READ(core, offset) readl(v3d->core_regs[core] + offset) 155 #define V3D_CORE_WRITE(core, offset, val) writel(val, v3d->core_regs[core] + offset) 156 157 struct v3d_job { 158 struct drm_sched_job base; 159 160 struct v3d_exec_info *exec; 161 162 /* An optional fence userspace can pass in for the job to depend on. */ 163 struct dma_fence *in_fence; 164 165 /* v3d fence to be signaled by IRQ handler when the job is complete. */ 166 struct dma_fence *irq_fence; 167 168 /* GPU virtual addresses of the start/end of the CL job. */ 169 u32 start, end; 170 171 u32 timedout_ctca, timedout_ctra; 172 }; 173 174 struct v3d_exec_info { 175 struct v3d_dev *v3d; 176 177 struct v3d_job bin, render; 178 179 /* Fence for when the scheduler considers the binner to be 180 * done, for render to depend on. 181 */ 182 struct dma_fence *bin_done_fence; 183 184 /* Fence for when the scheduler considers the render to be 185 * done, for when the BOs reservations should be complete. 186 */ 187 struct dma_fence *render_done_fence; 188 189 struct kref refcount; 190 191 /* This is the array of BOs that were looked up at the start of exec. */ 192 struct v3d_bo **bo; 193 u32 bo_count; 194 195 /* List of overflow BOs used in the job that need to be 196 * released once the job is complete. 197 */ 198 struct list_head unref_list; 199 200 /* Submitted tile memory allocation start/size, tile state. */ 201 u32 qma, qms, qts; 202 }; 203 204 struct v3d_tfu_job { 205 struct drm_sched_job base; 206 207 struct drm_v3d_submit_tfu args; 208 209 /* An optional fence userspace can pass in for the job to depend on. */ 210 struct dma_fence *in_fence; 211 212 /* v3d fence to be signaled by IRQ handler when the job is complete. */ 213 struct dma_fence *irq_fence; 214 215 struct v3d_dev *v3d; 216 217 struct kref refcount; 218 219 /* This is the array of BOs that were looked up at the start of exec. */ 220 struct v3d_bo *bo[4]; 221 }; 222 223 /** 224 * _wait_for - magic (register) wait macro 225 * 226 * Does the right thing for modeset paths when run under kdgb or similar atomic 227 * contexts. Note that it's important that we check the condition again after 228 * having timed out, since the timeout could be due to preemption or similar and 229 * we've never had a chance to check the condition before the timeout. 230 */ 231 #define wait_for(COND, MS) ({ \ 232 unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ 233 int ret__ = 0; \ 234 while (!(COND)) { \ 235 if (time_after(jiffies, timeout__)) { \ 236 if (!(COND)) \ 237 ret__ = -ETIMEDOUT; \ 238 break; \ 239 } \ 240 msleep(1); \ 241 } \ 242 ret__; \ 243 }) 244 245 static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) 246 { 247 /* nsecs_to_jiffies64() does not guard against overflow */ 248 if (NSEC_PER_SEC % HZ && 249 div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) 250 return MAX_JIFFY_OFFSET; 251 252 return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); 253 } 254 255 /* v3d_bo.c */ 256 struct drm_gem_object *v3d_create_object(struct drm_device *dev, size_t size); 257 void v3d_free_object(struct drm_gem_object *gem_obj); 258 struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, 259 size_t size); 260 int v3d_create_bo_ioctl(struct drm_device *dev, void *data, 261 struct drm_file *file_priv); 262 int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, 263 struct drm_file *file_priv); 264 int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, 265 struct drm_file *file_priv); 266 struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev, 267 struct dma_buf_attachment *attach, 268 struct sg_table *sgt); 269 270 /* v3d_debugfs.c */ 271 int v3d_debugfs_init(struct drm_minor *minor); 272 273 /* v3d_fence.c */ 274 extern const struct dma_fence_ops v3d_fence_ops; 275 struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue); 276 277 /* v3d_gem.c */ 278 int v3d_gem_init(struct drm_device *dev); 279 void v3d_gem_destroy(struct drm_device *dev); 280 int v3d_submit_cl_ioctl(struct drm_device *dev, void *data, 281 struct drm_file *file_priv); 282 int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, 283 struct drm_file *file_priv); 284 int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, 285 struct drm_file *file_priv); 286 void v3d_exec_put(struct v3d_exec_info *exec); 287 void v3d_tfu_job_put(struct v3d_tfu_job *exec); 288 void v3d_reset(struct v3d_dev *v3d); 289 void v3d_invalidate_caches(struct v3d_dev *v3d); 290 291 /* v3d_irq.c */ 292 int v3d_irq_init(struct v3d_dev *v3d); 293 void v3d_irq_enable(struct v3d_dev *v3d); 294 void v3d_irq_disable(struct v3d_dev *v3d); 295 void v3d_irq_reset(struct v3d_dev *v3d); 296 297 /* v3d_mmu.c */ 298 int v3d_mmu_get_offset(struct drm_file *file_priv, struct v3d_bo *bo, 299 u32 *offset); 300 int v3d_mmu_set_page_table(struct v3d_dev *v3d); 301 void v3d_mmu_insert_ptes(struct v3d_bo *bo); 302 void v3d_mmu_remove_ptes(struct v3d_bo *bo); 303 304 /* v3d_sched.c */ 305 int v3d_sched_init(struct v3d_dev *v3d); 306 void v3d_sched_fini(struct v3d_dev *v3d); 307