1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 6 /** 7 * DOC: VC4 plane module 8 * 9 * Each DRM plane is a layer of pixels being scanned out by the HVS. 10 * 11 * At atomic modeset check time, we compute the HVS display element 12 * state that would be necessary for displaying the plane (giving us a 13 * chance to figure out if a plane configuration is invalid), then at 14 * atomic flush time the CRTC will ask us to write our element state 15 * into the region of the HVS that it has allocated for us. 16 */ 17 18 #include <drm/drm_atomic.h> 19 #include <drm/drm_atomic_helper.h> 20 #include <drm/drm_atomic_uapi.h> 21 #include <drm/drm_fb_cma_helper.h> 22 #include <drm/drm_fourcc.h> 23 #include <drm/drm_gem_framebuffer_helper.h> 24 #include <drm/drm_plane_helper.h> 25 26 #include "uapi/drm/vc4_drm.h" 27 28 #include "vc4_drv.h" 29 #include "vc4_regs.h" 30 31 static const struct hvs_format { 32 u32 drm; /* DRM_FORMAT_* */ 33 u32 hvs; /* HVS_FORMAT_* */ 34 u32 pixel_order; 35 u32 pixel_order_hvs5; 36 } hvs_formats[] = { 37 { 38 .drm = DRM_FORMAT_XRGB8888, 39 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 40 .pixel_order = HVS_PIXEL_ORDER_ABGR, 41 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 42 }, 43 { 44 .drm = DRM_FORMAT_ARGB8888, 45 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 46 .pixel_order = HVS_PIXEL_ORDER_ABGR, 47 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 48 }, 49 { 50 .drm = DRM_FORMAT_ABGR8888, 51 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 52 .pixel_order = HVS_PIXEL_ORDER_ARGB, 53 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 54 }, 55 { 56 .drm = DRM_FORMAT_XBGR8888, 57 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 58 .pixel_order = HVS_PIXEL_ORDER_ARGB, 59 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 60 }, 61 { 62 .drm = DRM_FORMAT_RGB565, 63 .hvs = HVS_PIXEL_FORMAT_RGB565, 64 .pixel_order = HVS_PIXEL_ORDER_XRGB, 65 }, 66 { 67 .drm = DRM_FORMAT_BGR565, 68 .hvs = HVS_PIXEL_FORMAT_RGB565, 69 .pixel_order = HVS_PIXEL_ORDER_XBGR, 70 }, 71 { 72 .drm = DRM_FORMAT_ARGB1555, 73 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 74 .pixel_order = HVS_PIXEL_ORDER_ABGR, 75 }, 76 { 77 .drm = DRM_FORMAT_XRGB1555, 78 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 79 .pixel_order = HVS_PIXEL_ORDER_ABGR, 80 }, 81 { 82 .drm = DRM_FORMAT_RGB888, 83 .hvs = HVS_PIXEL_FORMAT_RGB888, 84 .pixel_order = HVS_PIXEL_ORDER_XRGB, 85 }, 86 { 87 .drm = DRM_FORMAT_BGR888, 88 .hvs = HVS_PIXEL_FORMAT_RGB888, 89 .pixel_order = HVS_PIXEL_ORDER_XBGR, 90 }, 91 { 92 .drm = DRM_FORMAT_YUV422, 93 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 94 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 95 }, 96 { 97 .drm = DRM_FORMAT_YVU422, 98 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 99 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 100 }, 101 { 102 .drm = DRM_FORMAT_YUV420, 103 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 104 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 105 }, 106 { 107 .drm = DRM_FORMAT_YVU420, 108 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 109 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 110 }, 111 { 112 .drm = DRM_FORMAT_NV12, 113 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 114 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 115 }, 116 { 117 .drm = DRM_FORMAT_NV21, 118 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 119 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 120 }, 121 { 122 .drm = DRM_FORMAT_NV16, 123 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 124 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 125 }, 126 { 127 .drm = DRM_FORMAT_NV61, 128 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 129 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 130 }, 131 }; 132 133 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) 134 { 135 unsigned i; 136 137 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 138 if (hvs_formats[i].drm == drm_format) 139 return &hvs_formats[i]; 140 } 141 142 return NULL; 143 } 144 145 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) 146 { 147 if (dst == src) 148 return VC4_SCALING_NONE; 149 if (3 * dst >= 2 * src) 150 return VC4_SCALING_PPF; 151 else 152 return VC4_SCALING_TPZ; 153 } 154 155 static bool plane_enabled(struct drm_plane_state *state) 156 { 157 return state->fb && !WARN_ON(!state->crtc); 158 } 159 160 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) 161 { 162 struct vc4_plane_state *vc4_state; 163 164 if (WARN_ON(!plane->state)) 165 return NULL; 166 167 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL); 168 if (!vc4_state) 169 return NULL; 170 171 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); 172 vc4_state->dlist_initialized = 0; 173 174 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); 175 176 if (vc4_state->dlist) { 177 vc4_state->dlist = kmemdup(vc4_state->dlist, 178 vc4_state->dlist_count * 4, 179 GFP_KERNEL); 180 if (!vc4_state->dlist) { 181 kfree(vc4_state); 182 return NULL; 183 } 184 vc4_state->dlist_size = vc4_state->dlist_count; 185 } 186 187 return &vc4_state->base; 188 } 189 190 static void vc4_plane_destroy_state(struct drm_plane *plane, 191 struct drm_plane_state *state) 192 { 193 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 194 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 195 196 if (drm_mm_node_allocated(&vc4_state->lbm)) { 197 unsigned long irqflags; 198 199 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 200 drm_mm_remove_node(&vc4_state->lbm); 201 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 202 } 203 204 kfree(vc4_state->dlist); 205 __drm_atomic_helper_plane_destroy_state(&vc4_state->base); 206 kfree(state); 207 } 208 209 /* Called during init to allocate the plane's atomic state. */ 210 static void vc4_plane_reset(struct drm_plane *plane) 211 { 212 struct vc4_plane_state *vc4_state; 213 214 WARN_ON(plane->state); 215 216 vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); 217 if (!vc4_state) 218 return; 219 220 __drm_atomic_helper_plane_reset(plane, &vc4_state->base); 221 } 222 223 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state) 224 { 225 if (vc4_state->dlist_count == vc4_state->dlist_size) { 226 u32 new_size = max(4u, vc4_state->dlist_count * 2); 227 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL); 228 229 if (!new_dlist) 230 return; 231 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4); 232 233 kfree(vc4_state->dlist); 234 vc4_state->dlist = new_dlist; 235 vc4_state->dlist_size = new_size; 236 } 237 238 vc4_state->dlist_count++; 239 } 240 241 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) 242 { 243 unsigned int idx = vc4_state->dlist_count; 244 245 vc4_dlist_counter_increment(vc4_state); 246 vc4_state->dlist[idx] = val; 247 } 248 249 /* Returns the scl0/scl1 field based on whether the dimensions need to 250 * be up/down/non-scaled. 251 * 252 * This is a replication of a table from the spec. 253 */ 254 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) 255 { 256 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 257 258 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { 259 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: 260 return SCALER_CTL0_SCL_H_PPF_V_PPF; 261 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: 262 return SCALER_CTL0_SCL_H_TPZ_V_PPF; 263 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: 264 return SCALER_CTL0_SCL_H_PPF_V_TPZ; 265 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: 266 return SCALER_CTL0_SCL_H_TPZ_V_TPZ; 267 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: 268 return SCALER_CTL0_SCL_H_PPF_V_NONE; 269 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: 270 return SCALER_CTL0_SCL_H_NONE_V_PPF; 271 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: 272 return SCALER_CTL0_SCL_H_NONE_V_TPZ; 273 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: 274 return SCALER_CTL0_SCL_H_TPZ_V_NONE; 275 default: 276 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: 277 /* The unity case is independently handled by 278 * SCALER_CTL0_UNITY. 279 */ 280 return 0; 281 } 282 } 283 284 static int vc4_plane_margins_adj(struct drm_plane_state *pstate) 285 { 286 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate); 287 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay; 288 struct drm_crtc_state *crtc_state; 289 290 crtc_state = drm_atomic_get_new_crtc_state(pstate->state, 291 pstate->crtc); 292 293 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom); 294 if (!left && !right && !top && !bottom) 295 return 0; 296 297 if (left + right >= crtc_state->mode.hdisplay || 298 top + bottom >= crtc_state->mode.vdisplay) 299 return -EINVAL; 300 301 adjhdisplay = crtc_state->mode.hdisplay - (left + right); 302 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x * 303 adjhdisplay, 304 crtc_state->mode.hdisplay); 305 vc4_pstate->crtc_x += left; 306 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - left) 307 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - left; 308 309 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom); 310 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y * 311 adjvdisplay, 312 crtc_state->mode.vdisplay); 313 vc4_pstate->crtc_y += top; 314 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - top) 315 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - top; 316 317 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w * 318 adjhdisplay, 319 crtc_state->mode.hdisplay); 320 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h * 321 adjvdisplay, 322 crtc_state->mode.vdisplay); 323 324 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h) 325 return -EINVAL; 326 327 return 0; 328 } 329 330 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) 331 { 332 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 333 struct drm_framebuffer *fb = state->fb; 334 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 335 u32 subpixel_src_mask = (1 << 16) - 1; 336 int num_planes = fb->format->num_planes; 337 struct drm_crtc_state *crtc_state; 338 u32 h_subsample = fb->format->hsub; 339 u32 v_subsample = fb->format->vsub; 340 int i, ret; 341 342 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 343 state->crtc); 344 if (!crtc_state) { 345 DRM_DEBUG_KMS("Invalid crtc state\n"); 346 return -EINVAL; 347 } 348 349 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1, 350 INT_MAX, true, true); 351 if (ret) 352 return ret; 353 354 for (i = 0; i < num_planes; i++) 355 vc4_state->offsets[i] = bo->paddr + fb->offsets[i]; 356 357 /* We don't support subpixel source positioning for scaling. */ 358 if ((state->src.x1 & subpixel_src_mask) || 359 (state->src.x2 & subpixel_src_mask) || 360 (state->src.y1 & subpixel_src_mask) || 361 (state->src.y2 & subpixel_src_mask)) { 362 return -EINVAL; 363 } 364 365 vc4_state->src_x = state->src.x1 >> 16; 366 vc4_state->src_y = state->src.y1 >> 16; 367 vc4_state->src_w[0] = (state->src.x2 - state->src.x1) >> 16; 368 vc4_state->src_h[0] = (state->src.y2 - state->src.y1) >> 16; 369 370 vc4_state->crtc_x = state->dst.x1; 371 vc4_state->crtc_y = state->dst.y1; 372 vc4_state->crtc_w = state->dst.x2 - state->dst.x1; 373 vc4_state->crtc_h = state->dst.y2 - state->dst.y1; 374 375 ret = vc4_plane_margins_adj(state); 376 if (ret) 377 return ret; 378 379 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], 380 vc4_state->crtc_w); 381 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], 382 vc4_state->crtc_h); 383 384 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 385 vc4_state->y_scaling[0] == VC4_SCALING_NONE); 386 387 if (num_planes > 1) { 388 vc4_state->is_yuv = true; 389 390 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; 391 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; 392 393 vc4_state->x_scaling[1] = 394 vc4_get_scaling_mode(vc4_state->src_w[1], 395 vc4_state->crtc_w); 396 vc4_state->y_scaling[1] = 397 vc4_get_scaling_mode(vc4_state->src_h[1], 398 vc4_state->crtc_h); 399 400 /* YUV conversion requires that horizontal scaling be enabled 401 * on the UV plane even if vc4_get_scaling_mode() returned 402 * VC4_SCALING_NONE (which can happen when the down-scaling 403 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this 404 * case. 405 */ 406 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE) 407 vc4_state->x_scaling[1] = VC4_SCALING_PPF; 408 } else { 409 vc4_state->is_yuv = false; 410 vc4_state->x_scaling[1] = VC4_SCALING_NONE; 411 vc4_state->y_scaling[1] = VC4_SCALING_NONE; 412 } 413 414 return 0; 415 } 416 417 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 418 { 419 u32 scale, recip; 420 421 scale = (1 << 16) * src / dst; 422 423 /* The specs note that while the reciprocal would be defined 424 * as (1<<32)/scale, ~0 is close enough. 425 */ 426 recip = ~0 / scale; 427 428 vc4_dlist_write(vc4_state, 429 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | 430 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); 431 vc4_dlist_write(vc4_state, 432 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); 433 } 434 435 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 436 { 437 u32 scale = (1 << 16) * src / dst; 438 439 vc4_dlist_write(vc4_state, 440 SCALER_PPF_AGC | 441 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | 442 VC4_SET_FIELD(0, SCALER_PPF_IPHASE)); 443 } 444 445 static u32 vc4_lbm_size(struct drm_plane_state *state) 446 { 447 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 448 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 449 u32 pix_per_line; 450 u32 lbm; 451 452 /* LBM is not needed when there's no vertical scaling. */ 453 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 454 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 455 return 0; 456 457 /* 458 * This can be further optimized in the RGB/YUV444 case if the PPF 459 * decimation factor is between 0.5 and 1.0 by using crtc_w. 460 * 461 * It's not an issue though, since in that case since src_w[0] is going 462 * to be greater than or equal to crtc_w. 463 */ 464 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ) 465 pix_per_line = vc4_state->crtc_w; 466 else 467 pix_per_line = vc4_state->src_w[0]; 468 469 if (!vc4_state->is_yuv) { 470 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) 471 lbm = pix_per_line * 8; 472 else { 473 /* In special cases, this multiplier might be 12. */ 474 lbm = pix_per_line * 16; 475 } 476 } else { 477 /* There are cases for this going down to a multiplier 478 * of 2, but according to the firmware source, the 479 * table in the docs is somewhat wrong. 480 */ 481 lbm = pix_per_line * 16; 482 } 483 484 /* Align it to 64 or 128 (hvs5) bytes */ 485 lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64); 486 487 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ 488 lbm /= vc4->hvs->hvs5 ? 4 : 2; 489 490 return lbm; 491 } 492 493 static void vc4_write_scaling_parameters(struct drm_plane_state *state, 494 int channel) 495 { 496 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 497 498 /* Ch0 H-PPF Word 0: Scaling Parameters */ 499 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { 500 vc4_write_ppf(vc4_state, 501 vc4_state->src_w[channel], vc4_state->crtc_w); 502 } 503 504 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ 505 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { 506 vc4_write_ppf(vc4_state, 507 vc4_state->src_h[channel], vc4_state->crtc_h); 508 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 509 } 510 511 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ 512 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { 513 vc4_write_tpz(vc4_state, 514 vc4_state->src_w[channel], vc4_state->crtc_w); 515 } 516 517 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ 518 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { 519 vc4_write_tpz(vc4_state, 520 vc4_state->src_h[channel], vc4_state->crtc_h); 521 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 522 } 523 } 524 525 static void vc4_plane_calc_load(struct drm_plane_state *state) 526 { 527 unsigned int hvs_load_shift, vrefresh, i; 528 struct drm_framebuffer *fb = state->fb; 529 struct vc4_plane_state *vc4_state; 530 struct drm_crtc_state *crtc_state; 531 unsigned int vscale_factor; 532 struct vc4_dev *vc4; 533 534 vc4 = to_vc4_dev(state->plane->dev); 535 if (!vc4->load_tracker_available) 536 return; 537 538 vc4_state = to_vc4_plane_state(state); 539 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 540 state->crtc); 541 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode); 542 543 /* The HVS is able to process 2 pixels/cycle when scaling the source, 544 * 4 pixels/cycle otherwise. 545 * Alpha blending step seems to be pipelined and it's always operating 546 * at 4 pixels/cycle, so the limiting aspect here seems to be the 547 * scaler block. 548 * HVS load is expressed in clk-cycles/sec (AKA Hz). 549 */ 550 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 551 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 552 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 553 vc4_state->y_scaling[1] != VC4_SCALING_NONE) 554 hvs_load_shift = 1; 555 else 556 hvs_load_shift = 2; 557 558 vc4_state->membus_load = 0; 559 vc4_state->hvs_load = 0; 560 for (i = 0; i < fb->format->num_planes; i++) { 561 /* Even if the bandwidth/plane required for a single frame is 562 * 563 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh 564 * 565 * when downscaling, we have to read more pixels per line in 566 * the time frame reserved for a single line, so the bandwidth 567 * demand can be punctually higher. To account for that, we 568 * calculate the down-scaling factor and multiply the plane 569 * load by this number. We're likely over-estimating the read 570 * demand, but that's better than under-estimating it. 571 */ 572 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i], 573 vc4_state->crtc_h); 574 vc4_state->membus_load += vc4_state->src_w[i] * 575 vc4_state->src_h[i] * vscale_factor * 576 fb->format->cpp[i]; 577 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; 578 } 579 580 vc4_state->hvs_load *= vrefresh; 581 vc4_state->hvs_load >>= hvs_load_shift; 582 vc4_state->membus_load *= vrefresh; 583 } 584 585 static int vc4_plane_allocate_lbm(struct drm_plane_state *state) 586 { 587 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 588 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 589 unsigned long irqflags; 590 u32 lbm_size; 591 592 lbm_size = vc4_lbm_size(state); 593 if (!lbm_size) 594 return 0; 595 596 if (WARN_ON(!vc4_state->lbm_offset)) 597 return -EINVAL; 598 599 /* Allocate the LBM memory that the HVS will use for temporary 600 * storage due to our scaling/format conversion. 601 */ 602 if (!drm_mm_node_allocated(&vc4_state->lbm)) { 603 int ret; 604 605 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 606 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, 607 &vc4_state->lbm, 608 lbm_size, 609 vc4->hvs->hvs5 ? 64 : 32, 610 0, 0); 611 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 612 613 if (ret) 614 return ret; 615 } else { 616 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); 617 } 618 619 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start; 620 621 return 0; 622 } 623 624 /* Writes out a full display list for an active plane to the plane's 625 * private dlist state. 626 */ 627 static int vc4_plane_mode_set(struct drm_plane *plane, 628 struct drm_plane_state *state) 629 { 630 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 631 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 632 struct drm_framebuffer *fb = state->fb; 633 u32 ctl0_offset = vc4_state->dlist_count; 634 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 635 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 636 int num_planes = fb->format->num_planes; 637 u32 h_subsample = fb->format->hsub; 638 u32 v_subsample = fb->format->vsub; 639 bool mix_plane_alpha; 640 bool covers_screen; 641 u32 scl0, scl1, pitch0; 642 u32 tiling, src_y; 643 u32 hvs_format = format->hvs; 644 unsigned int rotation; 645 int ret, i; 646 647 if (vc4_state->dlist_initialized) 648 return 0; 649 650 ret = vc4_plane_setup_clipping_and_scaling(state); 651 if (ret) 652 return ret; 653 654 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 655 * and 4:4:4, scl1 should be set to scl0 so both channels of 656 * the scaler do the same thing. For YUV, the Y plane needs 657 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 658 * the scl fields here. 659 */ 660 if (num_planes == 1) { 661 scl0 = vc4_get_scl_field(state, 0); 662 scl1 = scl0; 663 } else { 664 scl0 = vc4_get_scl_field(state, 1); 665 scl1 = vc4_get_scl_field(state, 0); 666 } 667 668 rotation = drm_rotation_simplify(state->rotation, 669 DRM_MODE_ROTATE_0 | 670 DRM_MODE_REFLECT_X | 671 DRM_MODE_REFLECT_Y); 672 673 /* We must point to the last line when Y reflection is enabled. */ 674 src_y = vc4_state->src_y; 675 if (rotation & DRM_MODE_REFLECT_Y) 676 src_y += vc4_state->src_h[0] - 1; 677 678 switch (base_format_mod) { 679 case DRM_FORMAT_MOD_LINEAR: 680 tiling = SCALER_CTL0_TILING_LINEAR; 681 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); 682 683 /* Adjust the base pointer to the first pixel to be scanned 684 * out. 685 */ 686 for (i = 0; i < num_planes; i++) { 687 vc4_state->offsets[i] += src_y / 688 (i ? v_subsample : 1) * 689 fb->pitches[i]; 690 691 vc4_state->offsets[i] += vc4_state->src_x / 692 (i ? h_subsample : 1) * 693 fb->format->cpp[i]; 694 } 695 696 break; 697 698 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { 699 u32 tile_size_shift = 12; /* T tiles are 4kb */ 700 /* Whole-tile offsets, mostly for setting the pitch. */ 701 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5; 702 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */ 703 u32 tile_w_mask = (1 << tile_w_shift) - 1; 704 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice 705 * the height (in pixels) of a 4k tile. 706 */ 707 u32 tile_h_mask = (2 << tile_h_shift) - 1; 708 /* For T-tiled, the FB pitch is "how many bytes from one row to 709 * the next, such that 710 * 711 * pitch * tile_h == tile_size * tiles_per_row 712 */ 713 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); 714 u32 tiles_l = vc4_state->src_x >> tile_w_shift; 715 u32 tiles_r = tiles_w - tiles_l; 716 u32 tiles_t = src_y >> tile_h_shift; 717 /* Intra-tile offsets, which modify the base address (the 718 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that 719 * base address). 720 */ 721 u32 tile_y = (src_y >> 4) & 1; 722 u32 subtile_y = (src_y >> 2) & 3; 723 u32 utile_y = src_y & 3; 724 u32 x_off = vc4_state->src_x & tile_w_mask; 725 u32 y_off = src_y & tile_h_mask; 726 727 /* When Y reflection is requested we must set the 728 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines 729 * after the initial one should be fetched in descending order, 730 * which makes sense since we start from the last line and go 731 * backward. 732 * Don't know why we need y_off = max_y_off - y_off, but it's 733 * definitely required (I guess it's also related to the "going 734 * backward" situation). 735 */ 736 if (rotation & DRM_MODE_REFLECT_Y) { 737 y_off = tile_h_mask - y_off; 738 pitch0 = SCALER_PITCH0_TILE_LINE_DIR; 739 } else { 740 pitch0 = 0; 741 } 742 743 tiling = SCALER_CTL0_TILING_256B_OR_T; 744 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) | 745 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) | 746 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) | 747 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R)); 748 vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift); 749 vc4_state->offsets[0] += subtile_y << 8; 750 vc4_state->offsets[0] += utile_y << 4; 751 752 /* Rows of tiles alternate left-to-right and right-to-left. */ 753 if (tiles_t & 1) { 754 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR; 755 vc4_state->offsets[0] += (tiles_w - tiles_l) << 756 tile_size_shift; 757 vc4_state->offsets[0] -= (1 + !tile_y) << 10; 758 } else { 759 vc4_state->offsets[0] += tiles_l << tile_size_shift; 760 vc4_state->offsets[0] += tile_y << 10; 761 } 762 763 break; 764 } 765 766 case DRM_FORMAT_MOD_BROADCOM_SAND64: 767 case DRM_FORMAT_MOD_BROADCOM_SAND128: 768 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 769 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 770 u32 tile_w, tile, x_off, pix_per_tile; 771 772 hvs_format = HVS_PIXEL_FORMAT_H264; 773 774 switch (base_format_mod) { 775 case DRM_FORMAT_MOD_BROADCOM_SAND64: 776 tiling = SCALER_CTL0_TILING_64B; 777 tile_w = 64; 778 break; 779 case DRM_FORMAT_MOD_BROADCOM_SAND128: 780 tiling = SCALER_CTL0_TILING_128B; 781 tile_w = 128; 782 break; 783 case DRM_FORMAT_MOD_BROADCOM_SAND256: 784 tiling = SCALER_CTL0_TILING_256B_OR_T; 785 tile_w = 256; 786 break; 787 default: 788 break; 789 } 790 791 if (param > SCALER_TILE_HEIGHT_MASK) { 792 DRM_DEBUG_KMS("SAND height too large (%d)\n", param); 793 return -EINVAL; 794 } 795 796 pix_per_tile = tile_w / fb->format->cpp[0]; 797 tile = vc4_state->src_x / pix_per_tile; 798 x_off = vc4_state->src_x % pix_per_tile; 799 800 /* Adjust the base pointer to the first pixel to be scanned 801 * out. 802 */ 803 for (i = 0; i < num_planes; i++) { 804 vc4_state->offsets[i] += param * tile_w * tile; 805 vc4_state->offsets[i] += src_y / 806 (i ? v_subsample : 1) * 807 tile_w; 808 vc4_state->offsets[i] += x_off / 809 (i ? h_subsample : 1) * 810 fb->format->cpp[i]; 811 } 812 813 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT); 814 break; 815 } 816 817 default: 818 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 819 (long long)fb->modifier); 820 return -EINVAL; 821 } 822 823 /* Don't waste cycles mixing with plane alpha if the set alpha 824 * is opaque or there is no per-pixel alpha information. 825 * In any case we use the alpha property value as the fixed alpha. 826 */ 827 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 828 fb->format->has_alpha; 829 830 if (!vc4->hvs->hvs5) { 831 /* Control word */ 832 vc4_dlist_write(vc4_state, 833 SCALER_CTL0_VALID | 834 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) | 835 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) | 836 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) | 837 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 838 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 839 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 840 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 841 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 842 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 843 844 /* Position Word 0: Image Positions and Alpha Value */ 845 vc4_state->pos0_offset = vc4_state->dlist_count; 846 vc4_dlist_write(vc4_state, 847 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | 848 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | 849 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); 850 851 /* Position Word 1: Scaled Image Dimensions. */ 852 if (!vc4_state->is_unity) { 853 vc4_dlist_write(vc4_state, 854 VC4_SET_FIELD(vc4_state->crtc_w, 855 SCALER_POS1_SCL_WIDTH) | 856 VC4_SET_FIELD(vc4_state->crtc_h, 857 SCALER_POS1_SCL_HEIGHT)); 858 } 859 860 /* Position Word 2: Source Image Size, Alpha */ 861 vc4_state->pos2_offset = vc4_state->dlist_count; 862 vc4_dlist_write(vc4_state, 863 VC4_SET_FIELD(fb->format->has_alpha ? 864 SCALER_POS2_ALPHA_MODE_PIPELINE : 865 SCALER_POS2_ALPHA_MODE_FIXED, 866 SCALER_POS2_ALPHA_MODE) | 867 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | 868 (fb->format->has_alpha ? 869 SCALER_POS2_ALPHA_PREMULT : 0) | 870 VC4_SET_FIELD(vc4_state->src_w[0], 871 SCALER_POS2_WIDTH) | 872 VC4_SET_FIELD(vc4_state->src_h[0], 873 SCALER_POS2_HEIGHT)); 874 875 /* Position Word 3: Context. Written by the HVS. */ 876 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 877 878 } else { 879 u32 hvs_pixel_order = format->pixel_order; 880 881 if (format->pixel_order_hvs5) 882 hvs_pixel_order = format->pixel_order_hvs5; 883 884 /* Control word */ 885 vc4_dlist_write(vc4_state, 886 SCALER_CTL0_VALID | 887 (hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) | 888 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 889 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 890 (vc4_state->is_unity ? 891 SCALER5_CTL0_UNITY : 0) | 892 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 893 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) | 894 SCALER5_CTL0_ALPHA_EXPAND | 895 SCALER5_CTL0_RGB_EXPAND); 896 897 /* Position Word 0: Image Positions and Alpha Value */ 898 vc4_state->pos0_offset = vc4_state->dlist_count; 899 vc4_dlist_write(vc4_state, 900 (rotation & DRM_MODE_REFLECT_Y ? 901 SCALER5_POS0_VFLIP : 0) | 902 VC4_SET_FIELD(vc4_state->crtc_x, 903 SCALER_POS0_START_X) | 904 (rotation & DRM_MODE_REFLECT_X ? 905 SCALER5_POS0_HFLIP : 0) | 906 VC4_SET_FIELD(vc4_state->crtc_y, 907 SCALER5_POS0_START_Y) 908 ); 909 910 /* Control Word 2 */ 911 vc4_dlist_write(vc4_state, 912 VC4_SET_FIELD(state->alpha >> 4, 913 SCALER5_CTL2_ALPHA) | 914 (fb->format->has_alpha ? 915 SCALER5_CTL2_ALPHA_PREMULT : 0) | 916 (mix_plane_alpha ? 917 SCALER5_CTL2_ALPHA_MIX : 0) | 918 VC4_SET_FIELD(fb->format->has_alpha ? 919 SCALER5_CTL2_ALPHA_MODE_PIPELINE : 920 SCALER5_CTL2_ALPHA_MODE_FIXED, 921 SCALER5_CTL2_ALPHA_MODE) 922 ); 923 924 /* Position Word 1: Scaled Image Dimensions. */ 925 if (!vc4_state->is_unity) { 926 vc4_dlist_write(vc4_state, 927 VC4_SET_FIELD(vc4_state->crtc_w, 928 SCALER5_POS1_SCL_WIDTH) | 929 VC4_SET_FIELD(vc4_state->crtc_h, 930 SCALER5_POS1_SCL_HEIGHT)); 931 } 932 933 /* Position Word 2: Source Image Size */ 934 vc4_state->pos2_offset = vc4_state->dlist_count; 935 vc4_dlist_write(vc4_state, 936 VC4_SET_FIELD(vc4_state->src_w[0], 937 SCALER5_POS2_WIDTH) | 938 VC4_SET_FIELD(vc4_state->src_h[0], 939 SCALER5_POS2_HEIGHT)); 940 941 /* Position Word 3: Context. Written by the HVS. */ 942 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 943 } 944 945 946 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers 947 * 948 * The pointers may be any byte address. 949 */ 950 vc4_state->ptr0_offset = vc4_state->dlist_count; 951 for (i = 0; i < num_planes; i++) 952 vc4_dlist_write(vc4_state, vc4_state->offsets[i]); 953 954 /* Pointer Context Word 0/1/2: Written by the HVS */ 955 for (i = 0; i < num_planes; i++) 956 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 957 958 /* Pitch word 0 */ 959 vc4_dlist_write(vc4_state, pitch0); 960 961 /* Pitch word 1/2 */ 962 for (i = 1; i < num_planes; i++) { 963 if (hvs_format != HVS_PIXEL_FORMAT_H264) { 964 vc4_dlist_write(vc4_state, 965 VC4_SET_FIELD(fb->pitches[i], 966 SCALER_SRC_PITCH)); 967 } else { 968 vc4_dlist_write(vc4_state, pitch0); 969 } 970 } 971 972 /* Colorspace conversion words */ 973 if (vc4_state->is_yuv) { 974 vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5); 975 vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5); 976 vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5); 977 } 978 979 vc4_state->lbm_offset = 0; 980 981 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 982 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 983 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 984 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 985 /* Reserve a slot for the LBM Base Address. The real value will 986 * be set when calling vc4_plane_allocate_lbm(). 987 */ 988 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 989 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 990 vc4_state->lbm_offset = vc4_state->dlist_count; 991 vc4_dlist_counter_increment(vc4_state); 992 } 993 994 if (num_planes > 1) { 995 /* Emit Cb/Cr as channel 0 and Y as channel 996 * 1. This matches how we set up scl0/scl1 997 * above. 998 */ 999 vc4_write_scaling_parameters(state, 1); 1000 } 1001 vc4_write_scaling_parameters(state, 0); 1002 1003 /* If any PPF setup was done, then all the kernel 1004 * pointers get uploaded. 1005 */ 1006 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 1007 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 1008 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 1009 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 1010 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 1011 SCALER_PPF_KERNEL_OFFSET); 1012 1013 /* HPPF plane 0 */ 1014 vc4_dlist_write(vc4_state, kernel); 1015 /* VPPF plane 0 */ 1016 vc4_dlist_write(vc4_state, kernel); 1017 /* HPPF plane 1 */ 1018 vc4_dlist_write(vc4_state, kernel); 1019 /* VPPF plane 1 */ 1020 vc4_dlist_write(vc4_state, kernel); 1021 } 1022 } 1023 1024 vc4_state->dlist[ctl0_offset] |= 1025 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); 1026 1027 /* crtc_* are already clipped coordinates. */ 1028 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 1029 vc4_state->crtc_w == state->crtc->mode.hdisplay && 1030 vc4_state->crtc_h == state->crtc->mode.vdisplay; 1031 /* Background fill might be necessary when the plane has per-pixel 1032 * alpha content or a non-opaque plane alpha and could blend from the 1033 * background or does not cover the entire screen. 1034 */ 1035 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 1036 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 1037 1038 /* Flag the dlist as initialized to avoid checking it twice in case 1039 * the async update check already called vc4_plane_mode_set() and 1040 * decided to fallback to sync update because async update was not 1041 * possible. 1042 */ 1043 vc4_state->dlist_initialized = 1; 1044 1045 vc4_plane_calc_load(state); 1046 1047 return 0; 1048 } 1049 1050 /* If a modeset involves changing the setup of a plane, the atomic 1051 * infrastructure will call this to validate a proposed plane setup. 1052 * However, if a plane isn't getting updated, this (and the 1053 * corresponding vc4_plane_atomic_update) won't get called. Thus, we 1054 * compute the dlist here and have all active plane dlists get updated 1055 * in the CRTC's flush. 1056 */ 1057 static int vc4_plane_atomic_check(struct drm_plane *plane, 1058 struct drm_plane_state *state) 1059 { 1060 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1061 int ret; 1062 1063 vc4_state->dlist_count = 0; 1064 1065 if (!plane_enabled(state)) 1066 return 0; 1067 1068 ret = vc4_plane_mode_set(plane, state); 1069 if (ret) 1070 return ret; 1071 1072 return vc4_plane_allocate_lbm(state); 1073 } 1074 1075 static void vc4_plane_atomic_update(struct drm_plane *plane, 1076 struct drm_plane_state *old_state) 1077 { 1078 /* No contents here. Since we don't know where in the CRTC's 1079 * dlist we should be stored, our dlist is uploaded to the 1080 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush 1081 * time. 1082 */ 1083 } 1084 1085 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) 1086 { 1087 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1088 int i; 1089 1090 vc4_state->hw_dlist = dlist; 1091 1092 /* Can't memcpy_toio() because it needs to be 32-bit writes. */ 1093 for (i = 0; i < vc4_state->dlist_count; i++) 1094 writel(vc4_state->dlist[i], &dlist[i]); 1095 1096 return vc4_state->dlist_count; 1097 } 1098 1099 u32 vc4_plane_dlist_size(const struct drm_plane_state *state) 1100 { 1101 const struct vc4_plane_state *vc4_state = 1102 container_of(state, typeof(*vc4_state), base); 1103 1104 return vc4_state->dlist_count; 1105 } 1106 1107 /* Updates the plane to immediately (well, once the FIFO needs 1108 * refilling) scan out from at a new framebuffer. 1109 */ 1110 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) 1111 { 1112 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1113 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 1114 uint32_t addr; 1115 1116 /* We're skipping the address adjustment for negative origin, 1117 * because this is only called on the primary plane. 1118 */ 1119 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); 1120 addr = bo->paddr + fb->offsets[0]; 1121 1122 /* Write the new address into the hardware immediately. The 1123 * scanout will start from this address as soon as the FIFO 1124 * needs to refill with pixels. 1125 */ 1126 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1127 1128 /* Also update the CPU-side dlist copy, so that any later 1129 * atomic updates that don't do a new modeset on our plane 1130 * also use our updated address. 1131 */ 1132 vc4_state->dlist[vc4_state->ptr0_offset] = addr; 1133 } 1134 1135 static void vc4_plane_atomic_async_update(struct drm_plane *plane, 1136 struct drm_plane_state *state) 1137 { 1138 struct vc4_plane_state *vc4_state, *new_vc4_state; 1139 1140 swap(plane->state->fb, state->fb); 1141 plane->state->crtc_x = state->crtc_x; 1142 plane->state->crtc_y = state->crtc_y; 1143 plane->state->crtc_w = state->crtc_w; 1144 plane->state->crtc_h = state->crtc_h; 1145 plane->state->src_x = state->src_x; 1146 plane->state->src_y = state->src_y; 1147 plane->state->src_w = state->src_w; 1148 plane->state->src_h = state->src_h; 1149 plane->state->src_h = state->src_h; 1150 plane->state->alpha = state->alpha; 1151 plane->state->pixel_blend_mode = state->pixel_blend_mode; 1152 plane->state->rotation = state->rotation; 1153 plane->state->zpos = state->zpos; 1154 plane->state->normalized_zpos = state->normalized_zpos; 1155 plane->state->color_encoding = state->color_encoding; 1156 plane->state->color_range = state->color_range; 1157 plane->state->src = state->src; 1158 plane->state->dst = state->dst; 1159 plane->state->visible = state->visible; 1160 1161 new_vc4_state = to_vc4_plane_state(state); 1162 vc4_state = to_vc4_plane_state(plane->state); 1163 1164 vc4_state->crtc_x = new_vc4_state->crtc_x; 1165 vc4_state->crtc_y = new_vc4_state->crtc_y; 1166 vc4_state->crtc_h = new_vc4_state->crtc_h; 1167 vc4_state->crtc_w = new_vc4_state->crtc_w; 1168 vc4_state->src_x = new_vc4_state->src_x; 1169 vc4_state->src_y = new_vc4_state->src_y; 1170 memcpy(vc4_state->src_w, new_vc4_state->src_w, 1171 sizeof(vc4_state->src_w)); 1172 memcpy(vc4_state->src_h, new_vc4_state->src_h, 1173 sizeof(vc4_state->src_h)); 1174 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling, 1175 sizeof(vc4_state->x_scaling)); 1176 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling, 1177 sizeof(vc4_state->y_scaling)); 1178 vc4_state->is_unity = new_vc4_state->is_unity; 1179 vc4_state->is_yuv = new_vc4_state->is_yuv; 1180 memcpy(vc4_state->offsets, new_vc4_state->offsets, 1181 sizeof(vc4_state->offsets)); 1182 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill; 1183 1184 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */ 1185 vc4_state->dlist[vc4_state->pos0_offset] = 1186 new_vc4_state->dlist[vc4_state->pos0_offset]; 1187 vc4_state->dlist[vc4_state->pos2_offset] = 1188 new_vc4_state->dlist[vc4_state->pos2_offset]; 1189 vc4_state->dlist[vc4_state->ptr0_offset] = 1190 new_vc4_state->dlist[vc4_state->ptr0_offset]; 1191 1192 /* Note that we can't just call vc4_plane_write_dlist() 1193 * because that would smash the context data that the HVS is 1194 * currently using. 1195 */ 1196 writel(vc4_state->dlist[vc4_state->pos0_offset], 1197 &vc4_state->hw_dlist[vc4_state->pos0_offset]); 1198 writel(vc4_state->dlist[vc4_state->pos2_offset], 1199 &vc4_state->hw_dlist[vc4_state->pos2_offset]); 1200 writel(vc4_state->dlist[vc4_state->ptr0_offset], 1201 &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1202 } 1203 1204 static int vc4_plane_atomic_async_check(struct drm_plane *plane, 1205 struct drm_plane_state *state) 1206 { 1207 struct vc4_plane_state *old_vc4_state, *new_vc4_state; 1208 int ret; 1209 u32 i; 1210 1211 ret = vc4_plane_mode_set(plane, state); 1212 if (ret) 1213 return ret; 1214 1215 old_vc4_state = to_vc4_plane_state(plane->state); 1216 new_vc4_state = to_vc4_plane_state(state); 1217 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || 1218 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || 1219 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || 1220 old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset || 1221 vc4_lbm_size(plane->state) != vc4_lbm_size(state)) 1222 return -EINVAL; 1223 1224 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update 1225 * if anything else has changed, fallback to a sync update. 1226 */ 1227 for (i = 0; i < new_vc4_state->dlist_count; i++) { 1228 if (i == new_vc4_state->pos0_offset || 1229 i == new_vc4_state->pos2_offset || 1230 i == new_vc4_state->ptr0_offset || 1231 (new_vc4_state->lbm_offset && 1232 i == new_vc4_state->lbm_offset)) 1233 continue; 1234 1235 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i]) 1236 return -EINVAL; 1237 } 1238 1239 return 0; 1240 } 1241 1242 static int vc4_prepare_fb(struct drm_plane *plane, 1243 struct drm_plane_state *state) 1244 { 1245 struct vc4_bo *bo; 1246 int ret; 1247 1248 if (!state->fb) 1249 return 0; 1250 1251 bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); 1252 1253 drm_gem_fb_prepare_fb(plane, state); 1254 1255 if (plane->state->fb == state->fb) 1256 return 0; 1257 1258 ret = vc4_bo_inc_usecnt(bo); 1259 if (ret) 1260 return ret; 1261 1262 return 0; 1263 } 1264 1265 static void vc4_cleanup_fb(struct drm_plane *plane, 1266 struct drm_plane_state *state) 1267 { 1268 struct vc4_bo *bo; 1269 1270 if (plane->state->fb == state->fb || !state->fb) 1271 return; 1272 1273 bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); 1274 vc4_bo_dec_usecnt(bo); 1275 } 1276 1277 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { 1278 .atomic_check = vc4_plane_atomic_check, 1279 .atomic_update = vc4_plane_atomic_update, 1280 .prepare_fb = vc4_prepare_fb, 1281 .cleanup_fb = vc4_cleanup_fb, 1282 .atomic_async_check = vc4_plane_atomic_async_check, 1283 .atomic_async_update = vc4_plane_atomic_async_update, 1284 }; 1285 1286 static bool vc4_format_mod_supported(struct drm_plane *plane, 1287 uint32_t format, 1288 uint64_t modifier) 1289 { 1290 /* Support T_TILING for RGB formats only. */ 1291 switch (format) { 1292 case DRM_FORMAT_XRGB8888: 1293 case DRM_FORMAT_ARGB8888: 1294 case DRM_FORMAT_ABGR8888: 1295 case DRM_FORMAT_XBGR8888: 1296 case DRM_FORMAT_RGB565: 1297 case DRM_FORMAT_BGR565: 1298 case DRM_FORMAT_ARGB1555: 1299 case DRM_FORMAT_XRGB1555: 1300 switch (fourcc_mod_broadcom_mod(modifier)) { 1301 case DRM_FORMAT_MOD_LINEAR: 1302 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: 1303 return true; 1304 default: 1305 return false; 1306 } 1307 case DRM_FORMAT_NV12: 1308 case DRM_FORMAT_NV21: 1309 switch (fourcc_mod_broadcom_mod(modifier)) { 1310 case DRM_FORMAT_MOD_LINEAR: 1311 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1312 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1313 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1314 return true; 1315 default: 1316 return false; 1317 } 1318 case DRM_FORMAT_RGBX1010102: 1319 case DRM_FORMAT_BGRX1010102: 1320 case DRM_FORMAT_RGBA1010102: 1321 case DRM_FORMAT_BGRA1010102: 1322 case DRM_FORMAT_YUV422: 1323 case DRM_FORMAT_YVU422: 1324 case DRM_FORMAT_YUV420: 1325 case DRM_FORMAT_YVU420: 1326 case DRM_FORMAT_NV16: 1327 case DRM_FORMAT_NV61: 1328 default: 1329 return (modifier == DRM_FORMAT_MOD_LINEAR); 1330 } 1331 } 1332 1333 static const struct drm_plane_funcs vc4_plane_funcs = { 1334 .update_plane = drm_atomic_helper_update_plane, 1335 .disable_plane = drm_atomic_helper_disable_plane, 1336 .destroy = drm_plane_cleanup, 1337 .set_property = NULL, 1338 .reset = vc4_plane_reset, 1339 .atomic_duplicate_state = vc4_plane_duplicate_state, 1340 .atomic_destroy_state = vc4_plane_destroy_state, 1341 .format_mod_supported = vc4_format_mod_supported, 1342 }; 1343 1344 struct drm_plane *vc4_plane_init(struct drm_device *dev, 1345 enum drm_plane_type type) 1346 { 1347 struct drm_plane *plane = NULL; 1348 struct vc4_plane *vc4_plane; 1349 u32 formats[ARRAY_SIZE(hvs_formats)]; 1350 int ret = 0; 1351 unsigned i; 1352 static const uint64_t modifiers[] = { 1353 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, 1354 DRM_FORMAT_MOD_BROADCOM_SAND128, 1355 DRM_FORMAT_MOD_BROADCOM_SAND64, 1356 DRM_FORMAT_MOD_BROADCOM_SAND256, 1357 DRM_FORMAT_MOD_LINEAR, 1358 DRM_FORMAT_MOD_INVALID 1359 }; 1360 1361 vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane), 1362 GFP_KERNEL); 1363 if (!vc4_plane) 1364 return ERR_PTR(-ENOMEM); 1365 1366 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) 1367 formats[i] = hvs_formats[i].drm; 1368 1369 plane = &vc4_plane->base; 1370 ret = drm_universal_plane_init(dev, plane, 0, 1371 &vc4_plane_funcs, 1372 formats, ARRAY_SIZE(formats), 1373 modifiers, type, NULL); 1374 if (ret) 1375 return ERR_PTR(ret); 1376 1377 drm_plane_helper_add(plane, &vc4_plane_helper_funcs); 1378 1379 drm_plane_create_alpha_property(plane); 1380 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, 1381 DRM_MODE_ROTATE_0 | 1382 DRM_MODE_ROTATE_180 | 1383 DRM_MODE_REFLECT_X | 1384 DRM_MODE_REFLECT_Y); 1385 1386 return plane; 1387 } 1388 1389 int vc4_plane_create_additional_planes(struct drm_device *drm) 1390 { 1391 struct drm_plane *cursor_plane; 1392 struct drm_crtc *crtc; 1393 unsigned int i; 1394 1395 /* Set up some arbitrary number of planes. We're not limited 1396 * by a set number of physical registers, just the space in 1397 * the HVS (16k) and how small an plane can be (28 bytes). 1398 * However, each plane we set up takes up some memory, and 1399 * increases the cost of looping over planes, which atomic 1400 * modesetting does quite a bit. As a result, we pick a 1401 * modest number of planes to expose, that should hopefully 1402 * still cover any sane usecase. 1403 */ 1404 for (i = 0; i < 16; i++) { 1405 struct drm_plane *plane = 1406 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY); 1407 1408 if (IS_ERR(plane)) 1409 continue; 1410 1411 plane->possible_crtcs = 1412 GENMASK(drm->mode_config.num_crtc - 1, 0); 1413 } 1414 1415 drm_for_each_crtc(crtc, drm) { 1416 /* Set up the legacy cursor after overlay initialization, 1417 * since we overlay planes on the CRTC in the order they were 1418 * initialized. 1419 */ 1420 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); 1421 if (!IS_ERR(cursor_plane)) { 1422 cursor_plane->possible_crtcs = drm_crtc_mask(crtc); 1423 crtc->cursor = cursor_plane; 1424 } 1425 } 1426 1427 return 0; 1428 } 1429