1 /* 2 * Copyright (C) 2015 Broadcom 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 */ 8 9 #include "drmP.h" 10 #include "drm_gem_cma_helper.h" 11 12 struct vc4_dev { 13 struct drm_device *dev; 14 15 struct vc4_hdmi *hdmi; 16 struct vc4_hvs *hvs; 17 struct vc4_crtc *crtc[3]; 18 struct vc4_v3d *v3d; 19 struct vc4_dpi *dpi; 20 struct vc4_vec *vec; 21 22 struct drm_fbdev_cma *fbdev; 23 24 struct vc4_hang_state *hang_state; 25 26 /* The kernel-space BO cache. Tracks buffers that have been 27 * unreferenced by all other users (refcounts of 0!) but not 28 * yet freed, so we can do cheap allocations. 29 */ 30 struct vc4_bo_cache { 31 /* Array of list heads for entries in the BO cache, 32 * based on number of pages, so we can do O(1) lookups 33 * in the cache when allocating. 34 */ 35 struct list_head *size_list; 36 uint32_t size_list_size; 37 38 /* List of all BOs in the cache, ordered by age, so we 39 * can do O(1) lookups when trying to free old 40 * buffers. 41 */ 42 struct list_head time_list; 43 struct work_struct time_work; 44 struct timer_list time_timer; 45 } bo_cache; 46 47 struct vc4_bo_stats { 48 u32 num_allocated; 49 u32 size_allocated; 50 u32 num_cached; 51 u32 size_cached; 52 } bo_stats; 53 54 /* Protects bo_cache and the BO stats. */ 55 struct mutex bo_lock; 56 57 /* Sequence number for the last job queued in bin_job_list. 58 * Starts at 0 (no jobs emitted). 59 */ 60 uint64_t emit_seqno; 61 62 /* Sequence number for the last completed job on the GPU. 63 * Starts at 0 (no jobs completed). 64 */ 65 uint64_t finished_seqno; 66 67 /* List of all struct vc4_exec_info for jobs to be executed in 68 * the binner. The first job in the list is the one currently 69 * programmed into ct0ca for execution. 70 */ 71 struct list_head bin_job_list; 72 73 /* List of all struct vc4_exec_info for jobs that have 74 * completed binning and are ready for rendering. The first 75 * job in the list is the one currently programmed into ct1ca 76 * for execution. 77 */ 78 struct list_head render_job_list; 79 80 /* List of the finished vc4_exec_infos waiting to be freed by 81 * job_done_work. 82 */ 83 struct list_head job_done_list; 84 /* Spinlock used to synchronize the job_list and seqno 85 * accesses between the IRQ handler and GEM ioctls. 86 */ 87 spinlock_t job_lock; 88 wait_queue_head_t job_wait_queue; 89 struct work_struct job_done_work; 90 91 /* List of struct vc4_seqno_cb for callbacks to be made from a 92 * workqueue when the given seqno is passed. 93 */ 94 struct list_head seqno_cb_list; 95 96 /* The binner overflow memory that's currently set up in 97 * BPOA/BPOS registers. When overflow occurs and a new one is 98 * allocated, the previous one will be moved to 99 * vc4->current_exec's free list. 100 */ 101 struct vc4_bo *overflow_mem; 102 struct work_struct overflow_mem_work; 103 104 int power_refcount; 105 106 /* Mutex controlling the power refcount. */ 107 struct mutex power_lock; 108 109 struct { 110 struct timer_list timer; 111 struct work_struct reset_work; 112 } hangcheck; 113 114 struct semaphore async_modeset; 115 }; 116 117 static inline struct vc4_dev * 118 to_vc4_dev(struct drm_device *dev) 119 { 120 return (struct vc4_dev *)dev->dev_private; 121 } 122 123 struct vc4_bo { 124 struct drm_gem_cma_object base; 125 126 /* seqno of the last job to render using this BO. */ 127 uint64_t seqno; 128 129 /* seqno of the last job to use the RCL to write to this BO. 130 * 131 * Note that this doesn't include binner overflow memory 132 * writes. 133 */ 134 uint64_t write_seqno; 135 136 /* List entry for the BO's position in either 137 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list 138 */ 139 struct list_head unref_head; 140 141 /* Time in jiffies when the BO was put in vc4->bo_cache. */ 142 unsigned long free_time; 143 144 /* List entry for the BO's position in vc4_dev->bo_cache.size_list */ 145 struct list_head size_head; 146 147 /* Struct for shader validation state, if created by 148 * DRM_IOCTL_VC4_CREATE_SHADER_BO. 149 */ 150 struct vc4_validated_shader_info *validated_shader; 151 }; 152 153 static inline struct vc4_bo * 154 to_vc4_bo(struct drm_gem_object *bo) 155 { 156 return (struct vc4_bo *)bo; 157 } 158 159 struct vc4_seqno_cb { 160 struct work_struct work; 161 uint64_t seqno; 162 void (*func)(struct vc4_seqno_cb *cb); 163 }; 164 165 struct vc4_v3d { 166 struct vc4_dev *vc4; 167 struct platform_device *pdev; 168 void __iomem *regs; 169 }; 170 171 struct vc4_hvs { 172 struct platform_device *pdev; 173 void __iomem *regs; 174 u32 __iomem *dlist; 175 176 /* Memory manager for CRTCs to allocate space in the display 177 * list. Units are dwords. 178 */ 179 struct drm_mm dlist_mm; 180 /* Memory manager for the LBM memory used by HVS scaling. */ 181 struct drm_mm lbm_mm; 182 spinlock_t mm_lock; 183 184 struct drm_mm_node mitchell_netravali_filter; 185 }; 186 187 struct vc4_plane { 188 struct drm_plane base; 189 }; 190 191 static inline struct vc4_plane * 192 to_vc4_plane(struct drm_plane *plane) 193 { 194 return (struct vc4_plane *)plane; 195 } 196 197 enum vc4_encoder_type { 198 VC4_ENCODER_TYPE_NONE, 199 VC4_ENCODER_TYPE_HDMI, 200 VC4_ENCODER_TYPE_VEC, 201 VC4_ENCODER_TYPE_DSI0, 202 VC4_ENCODER_TYPE_DSI1, 203 VC4_ENCODER_TYPE_SMI, 204 VC4_ENCODER_TYPE_DPI, 205 }; 206 207 struct vc4_encoder { 208 struct drm_encoder base; 209 enum vc4_encoder_type type; 210 u32 clock_select; 211 }; 212 213 static inline struct vc4_encoder * 214 to_vc4_encoder(struct drm_encoder *encoder) 215 { 216 return container_of(encoder, struct vc4_encoder, base); 217 } 218 219 #define V3D_READ(offset) readl(vc4->v3d->regs + offset) 220 #define V3D_WRITE(offset, val) writel(val, vc4->v3d->regs + offset) 221 #define HVS_READ(offset) readl(vc4->hvs->regs + offset) 222 #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset) 223 224 struct vc4_exec_info { 225 /* Sequence number for this bin/render job. */ 226 uint64_t seqno; 227 228 /* Latest write_seqno of any BO that binning depends on. */ 229 uint64_t bin_dep_seqno; 230 231 /* Last current addresses the hardware was processing when the 232 * hangcheck timer checked on us. 233 */ 234 uint32_t last_ct0ca, last_ct1ca; 235 236 /* Kernel-space copy of the ioctl arguments */ 237 struct drm_vc4_submit_cl *args; 238 239 /* This is the array of BOs that were looked up at the start of exec. 240 * Command validation will use indices into this array. 241 */ 242 struct drm_gem_cma_object **bo; 243 uint32_t bo_count; 244 245 /* List of BOs that are being written by the RCL. Other than 246 * the binner temporary storage, this is all the BOs written 247 * by the job. 248 */ 249 struct drm_gem_cma_object *rcl_write_bo[4]; 250 uint32_t rcl_write_bo_count; 251 252 /* Pointers for our position in vc4->job_list */ 253 struct list_head head; 254 255 /* List of other BOs used in the job that need to be released 256 * once the job is complete. 257 */ 258 struct list_head unref_list; 259 260 /* Current unvalidated indices into @bo loaded by the non-hardware 261 * VC4_PACKET_GEM_HANDLES. 262 */ 263 uint32_t bo_index[2]; 264 265 /* This is the BO where we store the validated command lists, shader 266 * records, and uniforms. 267 */ 268 struct drm_gem_cma_object *exec_bo; 269 270 /** 271 * This tracks the per-shader-record state (packet 64) that 272 * determines the length of the shader record and the offset 273 * it's expected to be found at. It gets read in from the 274 * command lists. 275 */ 276 struct vc4_shader_state { 277 uint32_t addr; 278 /* Maximum vertex index referenced by any primitive using this 279 * shader state. 280 */ 281 uint32_t max_index; 282 } *shader_state; 283 284 /** How many shader states the user declared they were using. */ 285 uint32_t shader_state_size; 286 /** How many shader state records the validator has seen. */ 287 uint32_t shader_state_count; 288 289 bool found_tile_binning_mode_config_packet; 290 bool found_start_tile_binning_packet; 291 bool found_increment_semaphore_packet; 292 bool found_flush; 293 uint8_t bin_tiles_x, bin_tiles_y; 294 struct drm_gem_cma_object *tile_bo; 295 uint32_t tile_alloc_offset; 296 297 /** 298 * Computed addresses pointing into exec_bo where we start the 299 * bin thread (ct0) and render thread (ct1). 300 */ 301 uint32_t ct0ca, ct0ea; 302 uint32_t ct1ca, ct1ea; 303 304 /* Pointer to the unvalidated bin CL (if present). */ 305 void *bin_u; 306 307 /* Pointers to the shader recs. These paddr gets incremented as CL 308 * packets are relocated in validate_gl_shader_state, and the vaddrs 309 * (u and v) get incremented and size decremented as the shader recs 310 * themselves are validated. 311 */ 312 void *shader_rec_u; 313 void *shader_rec_v; 314 uint32_t shader_rec_p; 315 uint32_t shader_rec_size; 316 317 /* Pointers to the uniform data. These pointers are incremented, and 318 * size decremented, as each batch of uniforms is uploaded. 319 */ 320 void *uniforms_u; 321 void *uniforms_v; 322 uint32_t uniforms_p; 323 uint32_t uniforms_size; 324 }; 325 326 static inline struct vc4_exec_info * 327 vc4_first_bin_job(struct vc4_dev *vc4) 328 { 329 return list_first_entry_or_null(&vc4->bin_job_list, 330 struct vc4_exec_info, head); 331 } 332 333 static inline struct vc4_exec_info * 334 vc4_first_render_job(struct vc4_dev *vc4) 335 { 336 return list_first_entry_or_null(&vc4->render_job_list, 337 struct vc4_exec_info, head); 338 } 339 340 static inline struct vc4_exec_info * 341 vc4_last_render_job(struct vc4_dev *vc4) 342 { 343 if (list_empty(&vc4->render_job_list)) 344 return NULL; 345 return list_last_entry(&vc4->render_job_list, 346 struct vc4_exec_info, head); 347 } 348 349 /** 350 * struct vc4_texture_sample_info - saves the offsets into the UBO for texture 351 * setup parameters. 352 * 353 * This will be used at draw time to relocate the reference to the texture 354 * contents in p0, and validate that the offset combined with 355 * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO. 356 * Note that the hardware treats unprovided config parameters as 0, so not all 357 * of them need to be set up for every texure sample, and we'll store ~0 as 358 * the offset to mark the unused ones. 359 * 360 * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit 361 * Setup") for definitions of the texture parameters. 362 */ 363 struct vc4_texture_sample_info { 364 bool is_direct; 365 uint32_t p_offset[4]; 366 }; 367 368 /** 369 * struct vc4_validated_shader_info - information about validated shaders that 370 * needs to be used from command list validation. 371 * 372 * For a given shader, each time a shader state record references it, we need 373 * to verify that the shader doesn't read more uniforms than the shader state 374 * record's uniform BO pointer can provide, and we need to apply relocations 375 * and validate the shader state record's uniforms that define the texture 376 * samples. 377 */ 378 struct vc4_validated_shader_info { 379 uint32_t uniforms_size; 380 uint32_t uniforms_src_size; 381 uint32_t num_texture_samples; 382 struct vc4_texture_sample_info *texture_samples; 383 384 uint32_t num_uniform_addr_offsets; 385 uint32_t *uniform_addr_offsets; 386 387 bool is_threaded; 388 }; 389 390 /** 391 * _wait_for - magic (register) wait macro 392 * 393 * Does the right thing for modeset paths when run under kdgb or similar atomic 394 * contexts. Note that it's important that we check the condition again after 395 * having timed out, since the timeout could be due to preemption or similar and 396 * we've never had a chance to check the condition before the timeout. 397 */ 398 #define _wait_for(COND, MS, W) ({ \ 399 unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ 400 int ret__ = 0; \ 401 while (!(COND)) { \ 402 if (time_after(jiffies, timeout__)) { \ 403 if (!(COND)) \ 404 ret__ = -ETIMEDOUT; \ 405 break; \ 406 } \ 407 if (W && drm_can_sleep()) { \ 408 msleep(W); \ 409 } else { \ 410 cpu_relax(); \ 411 } \ 412 } \ 413 ret__; \ 414 }) 415 416 #define wait_for(COND, MS) _wait_for(COND, MS, 1) 417 418 /* vc4_bo.c */ 419 struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size); 420 void vc4_free_object(struct drm_gem_object *gem_obj); 421 struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size, 422 bool from_cache); 423 int vc4_dumb_create(struct drm_file *file_priv, 424 struct drm_device *dev, 425 struct drm_mode_create_dumb *args); 426 struct dma_buf *vc4_prime_export(struct drm_device *dev, 427 struct drm_gem_object *obj, int flags); 428 int vc4_create_bo_ioctl(struct drm_device *dev, void *data, 429 struct drm_file *file_priv); 430 int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, 431 struct drm_file *file_priv); 432 int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, 433 struct drm_file *file_priv); 434 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 435 struct drm_file *file_priv); 436 int vc4_mmap(struct file *filp, struct vm_area_struct *vma); 437 int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); 438 void *vc4_prime_vmap(struct drm_gem_object *obj); 439 void vc4_bo_cache_init(struct drm_device *dev); 440 void vc4_bo_cache_destroy(struct drm_device *dev); 441 int vc4_bo_stats_debugfs(struct seq_file *m, void *arg); 442 443 /* vc4_crtc.c */ 444 extern struct platform_driver vc4_crtc_driver; 445 int vc4_enable_vblank(struct drm_device *dev, unsigned int crtc_id); 446 void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id); 447 bool vc4_event_pending(struct drm_crtc *crtc); 448 int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg); 449 int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, 450 unsigned int flags, int *vpos, int *hpos, 451 ktime_t *stime, ktime_t *etime, 452 const struct drm_display_mode *mode); 453 int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id, 454 int *max_error, struct timeval *vblank_time, 455 unsigned flags); 456 457 /* vc4_debugfs.c */ 458 int vc4_debugfs_init(struct drm_minor *minor); 459 void vc4_debugfs_cleanup(struct drm_minor *minor); 460 461 /* vc4_drv.c */ 462 void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); 463 464 /* vc4_dpi.c */ 465 extern struct platform_driver vc4_dpi_driver; 466 int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused); 467 468 /* vc4_gem.c */ 469 void vc4_gem_init(struct drm_device *dev); 470 void vc4_gem_destroy(struct drm_device *dev); 471 int vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 472 struct drm_file *file_priv); 473 int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 474 struct drm_file *file_priv); 475 int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 476 struct drm_file *file_priv); 477 void vc4_submit_next_bin_job(struct drm_device *dev); 478 void vc4_submit_next_render_job(struct drm_device *dev); 479 void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec); 480 int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, 481 uint64_t timeout_ns, bool interruptible); 482 void vc4_job_handle_completed(struct vc4_dev *vc4); 483 int vc4_queue_seqno_cb(struct drm_device *dev, 484 struct vc4_seqno_cb *cb, uint64_t seqno, 485 void (*func)(struct vc4_seqno_cb *cb)); 486 487 /* vc4_hdmi.c */ 488 extern struct platform_driver vc4_hdmi_driver; 489 int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused); 490 491 /* vc4_hdmi.c */ 492 extern struct platform_driver vc4_vec_driver; 493 int vc4_vec_debugfs_regs(struct seq_file *m, void *unused); 494 495 /* vc4_irq.c */ 496 irqreturn_t vc4_irq(int irq, void *arg); 497 void vc4_irq_preinstall(struct drm_device *dev); 498 int vc4_irq_postinstall(struct drm_device *dev); 499 void vc4_irq_uninstall(struct drm_device *dev); 500 void vc4_irq_reset(struct drm_device *dev); 501 502 /* vc4_hvs.c */ 503 extern struct platform_driver vc4_hvs_driver; 504 void vc4_hvs_dump_state(struct drm_device *dev); 505 int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused); 506 507 /* vc4_kms.c */ 508 int vc4_kms_load(struct drm_device *dev); 509 510 /* vc4_plane.c */ 511 struct drm_plane *vc4_plane_init(struct drm_device *dev, 512 enum drm_plane_type type); 513 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist); 514 u32 vc4_plane_dlist_size(const struct drm_plane_state *state); 515 void vc4_plane_async_set_fb(struct drm_plane *plane, 516 struct drm_framebuffer *fb); 517 518 /* vc4_v3d.c */ 519 extern struct platform_driver vc4_v3d_driver; 520 int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused); 521 int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused); 522 523 /* vc4_validate.c */ 524 int 525 vc4_validate_bin_cl(struct drm_device *dev, 526 void *validated, 527 void *unvalidated, 528 struct vc4_exec_info *exec); 529 530 int 531 vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); 532 533 struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec, 534 uint32_t hindex); 535 536 int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec); 537 538 bool vc4_check_tex_size(struct vc4_exec_info *exec, 539 struct drm_gem_cma_object *fbo, 540 uint32_t offset, uint8_t tiling_format, 541 uint32_t width, uint32_t height, uint8_t cpp); 542 543 /* vc4_validate_shader.c */ 544 struct vc4_validated_shader_info * 545 vc4_validate_shader(struct drm_gem_cma_object *shader_obj); 546