1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 6 /** 7 * DOC: VC4 plane module 8 * 9 * Each DRM plane is a layer of pixels being scanned out by the HVS. 10 * 11 * At atomic modeset check time, we compute the HVS display element 12 * state that would be necessary for displaying the plane (giving us a 13 * chance to figure out if a plane configuration is invalid), then at 14 * atomic flush time the CRTC will ask us to write our element state 15 * into the region of the HVS that it has allocated for us. 16 */ 17 18 #include <drm/drm_atomic.h> 19 #include <drm/drm_atomic_helper.h> 20 #include <drm/drm_atomic_uapi.h> 21 #include <drm/drm_fb_cma_helper.h> 22 #include <drm/drm_fourcc.h> 23 #include <drm/drm_gem_atomic_helper.h> 24 #include <drm/drm_plane_helper.h> 25 26 #include "uapi/drm/vc4_drm.h" 27 28 #include "vc4_drv.h" 29 #include "vc4_regs.h" 30 31 static const struct hvs_format { 32 u32 drm; /* DRM_FORMAT_* */ 33 u32 hvs; /* HVS_FORMAT_* */ 34 u32 pixel_order; 35 u32 pixel_order_hvs5; 36 bool hvs5_only; 37 } hvs_formats[] = { 38 { 39 .drm = DRM_FORMAT_XRGB8888, 40 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 41 .pixel_order = HVS_PIXEL_ORDER_ABGR, 42 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 43 }, 44 { 45 .drm = DRM_FORMAT_ARGB8888, 46 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 47 .pixel_order = HVS_PIXEL_ORDER_ABGR, 48 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 49 }, 50 { 51 .drm = DRM_FORMAT_ABGR8888, 52 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 53 .pixel_order = HVS_PIXEL_ORDER_ARGB, 54 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 55 }, 56 { 57 .drm = DRM_FORMAT_XBGR8888, 58 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 59 .pixel_order = HVS_PIXEL_ORDER_ARGB, 60 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 61 }, 62 { 63 .drm = DRM_FORMAT_RGB565, 64 .hvs = HVS_PIXEL_FORMAT_RGB565, 65 .pixel_order = HVS_PIXEL_ORDER_XRGB, 66 }, 67 { 68 .drm = DRM_FORMAT_BGR565, 69 .hvs = HVS_PIXEL_FORMAT_RGB565, 70 .pixel_order = HVS_PIXEL_ORDER_XBGR, 71 }, 72 { 73 .drm = DRM_FORMAT_ARGB1555, 74 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 75 .pixel_order = HVS_PIXEL_ORDER_ABGR, 76 }, 77 { 78 .drm = DRM_FORMAT_XRGB1555, 79 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 80 .pixel_order = HVS_PIXEL_ORDER_ABGR, 81 }, 82 { 83 .drm = DRM_FORMAT_RGB888, 84 .hvs = HVS_PIXEL_FORMAT_RGB888, 85 .pixel_order = HVS_PIXEL_ORDER_XRGB, 86 }, 87 { 88 .drm = DRM_FORMAT_BGR888, 89 .hvs = HVS_PIXEL_FORMAT_RGB888, 90 .pixel_order = HVS_PIXEL_ORDER_XBGR, 91 }, 92 { 93 .drm = DRM_FORMAT_YUV422, 94 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 95 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 96 }, 97 { 98 .drm = DRM_FORMAT_YVU422, 99 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 100 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 101 }, 102 { 103 .drm = DRM_FORMAT_YUV420, 104 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 105 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 106 }, 107 { 108 .drm = DRM_FORMAT_YVU420, 109 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 110 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 111 }, 112 { 113 .drm = DRM_FORMAT_NV12, 114 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 115 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 116 }, 117 { 118 .drm = DRM_FORMAT_NV21, 119 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 120 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 121 }, 122 { 123 .drm = DRM_FORMAT_NV16, 124 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 125 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 126 }, 127 { 128 .drm = DRM_FORMAT_NV61, 129 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 130 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 131 }, 132 { 133 .drm = DRM_FORMAT_P030, 134 .hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT, 135 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 136 .hvs5_only = true, 137 }, 138 }; 139 140 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) 141 { 142 unsigned i; 143 144 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 145 if (hvs_formats[i].drm == drm_format) 146 return &hvs_formats[i]; 147 } 148 149 return NULL; 150 } 151 152 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) 153 { 154 if (dst == src) 155 return VC4_SCALING_NONE; 156 if (3 * dst >= 2 * src) 157 return VC4_SCALING_PPF; 158 else 159 return VC4_SCALING_TPZ; 160 } 161 162 static bool plane_enabled(struct drm_plane_state *state) 163 { 164 return state->fb && !WARN_ON(!state->crtc); 165 } 166 167 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) 168 { 169 struct vc4_plane_state *vc4_state; 170 171 if (WARN_ON(!plane->state)) 172 return NULL; 173 174 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL); 175 if (!vc4_state) 176 return NULL; 177 178 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); 179 vc4_state->dlist_initialized = 0; 180 181 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); 182 183 if (vc4_state->dlist) { 184 vc4_state->dlist = kmemdup(vc4_state->dlist, 185 vc4_state->dlist_count * 4, 186 GFP_KERNEL); 187 if (!vc4_state->dlist) { 188 kfree(vc4_state); 189 return NULL; 190 } 191 vc4_state->dlist_size = vc4_state->dlist_count; 192 } 193 194 return &vc4_state->base; 195 } 196 197 static void vc4_plane_destroy_state(struct drm_plane *plane, 198 struct drm_plane_state *state) 199 { 200 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 201 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 202 203 if (drm_mm_node_allocated(&vc4_state->lbm)) { 204 unsigned long irqflags; 205 206 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 207 drm_mm_remove_node(&vc4_state->lbm); 208 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 209 } 210 211 kfree(vc4_state->dlist); 212 __drm_atomic_helper_plane_destroy_state(&vc4_state->base); 213 kfree(state); 214 } 215 216 /* Called during init to allocate the plane's atomic state. */ 217 static void vc4_plane_reset(struct drm_plane *plane) 218 { 219 struct vc4_plane_state *vc4_state; 220 221 WARN_ON(plane->state); 222 223 vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); 224 if (!vc4_state) 225 return; 226 227 __drm_atomic_helper_plane_reset(plane, &vc4_state->base); 228 } 229 230 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state) 231 { 232 if (vc4_state->dlist_count == vc4_state->dlist_size) { 233 u32 new_size = max(4u, vc4_state->dlist_count * 2); 234 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL); 235 236 if (!new_dlist) 237 return; 238 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4); 239 240 kfree(vc4_state->dlist); 241 vc4_state->dlist = new_dlist; 242 vc4_state->dlist_size = new_size; 243 } 244 245 vc4_state->dlist_count++; 246 } 247 248 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) 249 { 250 unsigned int idx = vc4_state->dlist_count; 251 252 vc4_dlist_counter_increment(vc4_state); 253 vc4_state->dlist[idx] = val; 254 } 255 256 /* Returns the scl0/scl1 field based on whether the dimensions need to 257 * be up/down/non-scaled. 258 * 259 * This is a replication of a table from the spec. 260 */ 261 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) 262 { 263 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 264 265 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { 266 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: 267 return SCALER_CTL0_SCL_H_PPF_V_PPF; 268 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: 269 return SCALER_CTL0_SCL_H_TPZ_V_PPF; 270 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: 271 return SCALER_CTL0_SCL_H_PPF_V_TPZ; 272 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: 273 return SCALER_CTL0_SCL_H_TPZ_V_TPZ; 274 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: 275 return SCALER_CTL0_SCL_H_PPF_V_NONE; 276 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: 277 return SCALER_CTL0_SCL_H_NONE_V_PPF; 278 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: 279 return SCALER_CTL0_SCL_H_NONE_V_TPZ; 280 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: 281 return SCALER_CTL0_SCL_H_TPZ_V_NONE; 282 default: 283 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: 284 /* The unity case is independently handled by 285 * SCALER_CTL0_UNITY. 286 */ 287 return 0; 288 } 289 } 290 291 static int vc4_plane_margins_adj(struct drm_plane_state *pstate) 292 { 293 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate); 294 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay; 295 struct drm_crtc_state *crtc_state; 296 297 crtc_state = drm_atomic_get_new_crtc_state(pstate->state, 298 pstate->crtc); 299 300 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom); 301 if (!left && !right && !top && !bottom) 302 return 0; 303 304 if (left + right >= crtc_state->mode.hdisplay || 305 top + bottom >= crtc_state->mode.vdisplay) 306 return -EINVAL; 307 308 adjhdisplay = crtc_state->mode.hdisplay - (left + right); 309 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x * 310 adjhdisplay, 311 crtc_state->mode.hdisplay); 312 vc4_pstate->crtc_x += left; 313 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - left) 314 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - left; 315 316 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom); 317 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y * 318 adjvdisplay, 319 crtc_state->mode.vdisplay); 320 vc4_pstate->crtc_y += top; 321 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - top) 322 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - top; 323 324 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w * 325 adjhdisplay, 326 crtc_state->mode.hdisplay); 327 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h * 328 adjvdisplay, 329 crtc_state->mode.vdisplay); 330 331 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h) 332 return -EINVAL; 333 334 return 0; 335 } 336 337 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) 338 { 339 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 340 struct drm_framebuffer *fb = state->fb; 341 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 342 u32 subpixel_src_mask = (1 << 16) - 1; 343 int num_planes = fb->format->num_planes; 344 struct drm_crtc_state *crtc_state; 345 u32 h_subsample = fb->format->hsub; 346 u32 v_subsample = fb->format->vsub; 347 int i, ret; 348 349 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 350 state->crtc); 351 if (!crtc_state) { 352 DRM_DEBUG_KMS("Invalid crtc state\n"); 353 return -EINVAL; 354 } 355 356 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1, 357 INT_MAX, true, true); 358 if (ret) 359 return ret; 360 361 for (i = 0; i < num_planes; i++) 362 vc4_state->offsets[i] = bo->paddr + fb->offsets[i]; 363 364 /* We don't support subpixel source positioning for scaling. */ 365 if ((state->src.x1 & subpixel_src_mask) || 366 (state->src.x2 & subpixel_src_mask) || 367 (state->src.y1 & subpixel_src_mask) || 368 (state->src.y2 & subpixel_src_mask)) { 369 return -EINVAL; 370 } 371 372 vc4_state->src_x = state->src.x1 >> 16; 373 vc4_state->src_y = state->src.y1 >> 16; 374 vc4_state->src_w[0] = (state->src.x2 - state->src.x1) >> 16; 375 vc4_state->src_h[0] = (state->src.y2 - state->src.y1) >> 16; 376 377 vc4_state->crtc_x = state->dst.x1; 378 vc4_state->crtc_y = state->dst.y1; 379 vc4_state->crtc_w = state->dst.x2 - state->dst.x1; 380 vc4_state->crtc_h = state->dst.y2 - state->dst.y1; 381 382 ret = vc4_plane_margins_adj(state); 383 if (ret) 384 return ret; 385 386 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], 387 vc4_state->crtc_w); 388 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], 389 vc4_state->crtc_h); 390 391 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 392 vc4_state->y_scaling[0] == VC4_SCALING_NONE); 393 394 if (num_planes > 1) { 395 vc4_state->is_yuv = true; 396 397 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; 398 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; 399 400 vc4_state->x_scaling[1] = 401 vc4_get_scaling_mode(vc4_state->src_w[1], 402 vc4_state->crtc_w); 403 vc4_state->y_scaling[1] = 404 vc4_get_scaling_mode(vc4_state->src_h[1], 405 vc4_state->crtc_h); 406 407 /* YUV conversion requires that horizontal scaling be enabled 408 * on the UV plane even if vc4_get_scaling_mode() returned 409 * VC4_SCALING_NONE (which can happen when the down-scaling 410 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this 411 * case. 412 */ 413 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE) 414 vc4_state->x_scaling[1] = VC4_SCALING_PPF; 415 } else { 416 vc4_state->is_yuv = false; 417 vc4_state->x_scaling[1] = VC4_SCALING_NONE; 418 vc4_state->y_scaling[1] = VC4_SCALING_NONE; 419 } 420 421 return 0; 422 } 423 424 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 425 { 426 u32 scale, recip; 427 428 scale = (1 << 16) * src / dst; 429 430 /* The specs note that while the reciprocal would be defined 431 * as (1<<32)/scale, ~0 is close enough. 432 */ 433 recip = ~0 / scale; 434 435 vc4_dlist_write(vc4_state, 436 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | 437 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); 438 vc4_dlist_write(vc4_state, 439 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); 440 } 441 442 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 443 { 444 u32 scale = (1 << 16) * src / dst; 445 446 vc4_dlist_write(vc4_state, 447 SCALER_PPF_AGC | 448 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | 449 VC4_SET_FIELD(0, SCALER_PPF_IPHASE)); 450 } 451 452 static u32 vc4_lbm_size(struct drm_plane_state *state) 453 { 454 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 455 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 456 u32 pix_per_line; 457 u32 lbm; 458 459 /* LBM is not needed when there's no vertical scaling. */ 460 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 461 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 462 return 0; 463 464 /* 465 * This can be further optimized in the RGB/YUV444 case if the PPF 466 * decimation factor is between 0.5 and 1.0 by using crtc_w. 467 * 468 * It's not an issue though, since in that case since src_w[0] is going 469 * to be greater than or equal to crtc_w. 470 */ 471 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ) 472 pix_per_line = vc4_state->crtc_w; 473 else 474 pix_per_line = vc4_state->src_w[0]; 475 476 if (!vc4_state->is_yuv) { 477 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) 478 lbm = pix_per_line * 8; 479 else { 480 /* In special cases, this multiplier might be 12. */ 481 lbm = pix_per_line * 16; 482 } 483 } else { 484 /* There are cases for this going down to a multiplier 485 * of 2, but according to the firmware source, the 486 * table in the docs is somewhat wrong. 487 */ 488 lbm = pix_per_line * 16; 489 } 490 491 /* Align it to 64 or 128 (hvs5) bytes */ 492 lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64); 493 494 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ 495 lbm /= vc4->hvs->hvs5 ? 4 : 2; 496 497 return lbm; 498 } 499 500 static void vc4_write_scaling_parameters(struct drm_plane_state *state, 501 int channel) 502 { 503 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 504 505 /* Ch0 H-PPF Word 0: Scaling Parameters */ 506 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { 507 vc4_write_ppf(vc4_state, 508 vc4_state->src_w[channel], vc4_state->crtc_w); 509 } 510 511 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ 512 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { 513 vc4_write_ppf(vc4_state, 514 vc4_state->src_h[channel], vc4_state->crtc_h); 515 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 516 } 517 518 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ 519 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { 520 vc4_write_tpz(vc4_state, 521 vc4_state->src_w[channel], vc4_state->crtc_w); 522 } 523 524 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ 525 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { 526 vc4_write_tpz(vc4_state, 527 vc4_state->src_h[channel], vc4_state->crtc_h); 528 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 529 } 530 } 531 532 static void vc4_plane_calc_load(struct drm_plane_state *state) 533 { 534 unsigned int hvs_load_shift, vrefresh, i; 535 struct drm_framebuffer *fb = state->fb; 536 struct vc4_plane_state *vc4_state; 537 struct drm_crtc_state *crtc_state; 538 unsigned int vscale_factor; 539 540 vc4_state = to_vc4_plane_state(state); 541 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 542 state->crtc); 543 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode); 544 545 /* The HVS is able to process 2 pixels/cycle when scaling the source, 546 * 4 pixels/cycle otherwise. 547 * Alpha blending step seems to be pipelined and it's always operating 548 * at 4 pixels/cycle, so the limiting aspect here seems to be the 549 * scaler block. 550 * HVS load is expressed in clk-cycles/sec (AKA Hz). 551 */ 552 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 553 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 554 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 555 vc4_state->y_scaling[1] != VC4_SCALING_NONE) 556 hvs_load_shift = 1; 557 else 558 hvs_load_shift = 2; 559 560 vc4_state->membus_load = 0; 561 vc4_state->hvs_load = 0; 562 for (i = 0; i < fb->format->num_planes; i++) { 563 /* Even if the bandwidth/plane required for a single frame is 564 * 565 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh 566 * 567 * when downscaling, we have to read more pixels per line in 568 * the time frame reserved for a single line, so the bandwidth 569 * demand can be punctually higher. To account for that, we 570 * calculate the down-scaling factor and multiply the plane 571 * load by this number. We're likely over-estimating the read 572 * demand, but that's better than under-estimating it. 573 */ 574 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i], 575 vc4_state->crtc_h); 576 vc4_state->membus_load += vc4_state->src_w[i] * 577 vc4_state->src_h[i] * vscale_factor * 578 fb->format->cpp[i]; 579 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; 580 } 581 582 vc4_state->hvs_load *= vrefresh; 583 vc4_state->hvs_load >>= hvs_load_shift; 584 vc4_state->membus_load *= vrefresh; 585 } 586 587 static int vc4_plane_allocate_lbm(struct drm_plane_state *state) 588 { 589 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 590 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 591 unsigned long irqflags; 592 u32 lbm_size; 593 594 lbm_size = vc4_lbm_size(state); 595 if (!lbm_size) 596 return 0; 597 598 if (WARN_ON(!vc4_state->lbm_offset)) 599 return -EINVAL; 600 601 /* Allocate the LBM memory that the HVS will use for temporary 602 * storage due to our scaling/format conversion. 603 */ 604 if (!drm_mm_node_allocated(&vc4_state->lbm)) { 605 int ret; 606 607 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 608 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, 609 &vc4_state->lbm, 610 lbm_size, 611 vc4->hvs->hvs5 ? 64 : 32, 612 0, 0); 613 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 614 615 if (ret) 616 return ret; 617 } else { 618 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); 619 } 620 621 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start; 622 623 return 0; 624 } 625 626 /* 627 * The colorspace conversion matrices are held in 3 entries in the dlist. 628 * Create an array of them, with entries for each full and limited mode, and 629 * each supported colorspace. 630 */ 631 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = { 632 { 633 /* Limited range */ 634 { 635 /* BT601 */ 636 SCALER_CSC0_ITR_R_601_5, 637 SCALER_CSC1_ITR_R_601_5, 638 SCALER_CSC2_ITR_R_601_5, 639 }, { 640 /* BT709 */ 641 SCALER_CSC0_ITR_R_709_3, 642 SCALER_CSC1_ITR_R_709_3, 643 SCALER_CSC2_ITR_R_709_3, 644 }, { 645 /* BT2020 */ 646 SCALER_CSC0_ITR_R_2020, 647 SCALER_CSC1_ITR_R_2020, 648 SCALER_CSC2_ITR_R_2020, 649 } 650 }, { 651 /* Full range */ 652 { 653 /* JFIF */ 654 SCALER_CSC0_JPEG_JFIF, 655 SCALER_CSC1_JPEG_JFIF, 656 SCALER_CSC2_JPEG_JFIF, 657 }, { 658 /* BT709 */ 659 SCALER_CSC0_ITR_R_709_3_FR, 660 SCALER_CSC1_ITR_R_709_3_FR, 661 SCALER_CSC2_ITR_R_709_3_FR, 662 }, { 663 /* BT2020 */ 664 SCALER_CSC0_ITR_R_2020_FR, 665 SCALER_CSC1_ITR_R_2020_FR, 666 SCALER_CSC2_ITR_R_2020_FR, 667 } 668 } 669 }; 670 671 /* Writes out a full display list for an active plane to the plane's 672 * private dlist state. 673 */ 674 static int vc4_plane_mode_set(struct drm_plane *plane, 675 struct drm_plane_state *state) 676 { 677 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 678 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 679 struct drm_framebuffer *fb = state->fb; 680 u32 ctl0_offset = vc4_state->dlist_count; 681 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 682 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 683 int num_planes = fb->format->num_planes; 684 u32 h_subsample = fb->format->hsub; 685 u32 v_subsample = fb->format->vsub; 686 bool mix_plane_alpha; 687 bool covers_screen; 688 u32 scl0, scl1, pitch0; 689 u32 tiling, src_y; 690 u32 hvs_format = format->hvs; 691 unsigned int rotation; 692 int ret, i; 693 694 if (vc4_state->dlist_initialized) 695 return 0; 696 697 ret = vc4_plane_setup_clipping_and_scaling(state); 698 if (ret) 699 return ret; 700 701 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 702 * and 4:4:4, scl1 should be set to scl0 so both channels of 703 * the scaler do the same thing. For YUV, the Y plane needs 704 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 705 * the scl fields here. 706 */ 707 if (num_planes == 1) { 708 scl0 = vc4_get_scl_field(state, 0); 709 scl1 = scl0; 710 } else { 711 scl0 = vc4_get_scl_field(state, 1); 712 scl1 = vc4_get_scl_field(state, 0); 713 } 714 715 rotation = drm_rotation_simplify(state->rotation, 716 DRM_MODE_ROTATE_0 | 717 DRM_MODE_REFLECT_X | 718 DRM_MODE_REFLECT_Y); 719 720 /* We must point to the last line when Y reflection is enabled. */ 721 src_y = vc4_state->src_y; 722 if (rotation & DRM_MODE_REFLECT_Y) 723 src_y += vc4_state->src_h[0] - 1; 724 725 switch (base_format_mod) { 726 case DRM_FORMAT_MOD_LINEAR: 727 tiling = SCALER_CTL0_TILING_LINEAR; 728 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); 729 730 /* Adjust the base pointer to the first pixel to be scanned 731 * out. 732 */ 733 for (i = 0; i < num_planes; i++) { 734 vc4_state->offsets[i] += src_y / 735 (i ? v_subsample : 1) * 736 fb->pitches[i]; 737 738 vc4_state->offsets[i] += vc4_state->src_x / 739 (i ? h_subsample : 1) * 740 fb->format->cpp[i]; 741 } 742 743 break; 744 745 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { 746 u32 tile_size_shift = 12; /* T tiles are 4kb */ 747 /* Whole-tile offsets, mostly for setting the pitch. */ 748 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5; 749 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */ 750 u32 tile_w_mask = (1 << tile_w_shift) - 1; 751 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice 752 * the height (in pixels) of a 4k tile. 753 */ 754 u32 tile_h_mask = (2 << tile_h_shift) - 1; 755 /* For T-tiled, the FB pitch is "how many bytes from one row to 756 * the next, such that 757 * 758 * pitch * tile_h == tile_size * tiles_per_row 759 */ 760 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); 761 u32 tiles_l = vc4_state->src_x >> tile_w_shift; 762 u32 tiles_r = tiles_w - tiles_l; 763 u32 tiles_t = src_y >> tile_h_shift; 764 /* Intra-tile offsets, which modify the base address (the 765 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that 766 * base address). 767 */ 768 u32 tile_y = (src_y >> 4) & 1; 769 u32 subtile_y = (src_y >> 2) & 3; 770 u32 utile_y = src_y & 3; 771 u32 x_off = vc4_state->src_x & tile_w_mask; 772 u32 y_off = src_y & tile_h_mask; 773 774 /* When Y reflection is requested we must set the 775 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines 776 * after the initial one should be fetched in descending order, 777 * which makes sense since we start from the last line and go 778 * backward. 779 * Don't know why we need y_off = max_y_off - y_off, but it's 780 * definitely required (I guess it's also related to the "going 781 * backward" situation). 782 */ 783 if (rotation & DRM_MODE_REFLECT_Y) { 784 y_off = tile_h_mask - y_off; 785 pitch0 = SCALER_PITCH0_TILE_LINE_DIR; 786 } else { 787 pitch0 = 0; 788 } 789 790 tiling = SCALER_CTL0_TILING_256B_OR_T; 791 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) | 792 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) | 793 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) | 794 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R)); 795 vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift); 796 vc4_state->offsets[0] += subtile_y << 8; 797 vc4_state->offsets[0] += utile_y << 4; 798 799 /* Rows of tiles alternate left-to-right and right-to-left. */ 800 if (tiles_t & 1) { 801 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR; 802 vc4_state->offsets[0] += (tiles_w - tiles_l) << 803 tile_size_shift; 804 vc4_state->offsets[0] -= (1 + !tile_y) << 10; 805 } else { 806 vc4_state->offsets[0] += tiles_l << tile_size_shift; 807 vc4_state->offsets[0] += tile_y << 10; 808 } 809 810 break; 811 } 812 813 case DRM_FORMAT_MOD_BROADCOM_SAND64: 814 case DRM_FORMAT_MOD_BROADCOM_SAND128: 815 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 816 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 817 818 if (param > SCALER_TILE_HEIGHT_MASK) { 819 DRM_DEBUG_KMS("SAND height too large (%d)\n", 820 param); 821 return -EINVAL; 822 } 823 824 if (fb->format->format == DRM_FORMAT_P030) { 825 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; 826 tiling = SCALER_CTL0_TILING_128B; 827 } else { 828 hvs_format = HVS_PIXEL_FORMAT_H264; 829 830 switch (base_format_mod) { 831 case DRM_FORMAT_MOD_BROADCOM_SAND64: 832 tiling = SCALER_CTL0_TILING_64B; 833 break; 834 case DRM_FORMAT_MOD_BROADCOM_SAND128: 835 tiling = SCALER_CTL0_TILING_128B; 836 break; 837 case DRM_FORMAT_MOD_BROADCOM_SAND256: 838 tiling = SCALER_CTL0_TILING_256B_OR_T; 839 break; 840 default: 841 return -EINVAL; 842 } 843 } 844 845 /* Adjust the base pointer to the first pixel to be scanned 846 * out. 847 * 848 * For P030, y_ptr [31:4] is the 128bit word for the start pixel 849 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit 850 * word that should be taken as the first pixel. 851 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the 852 * element within the 128bit word, eg for pixel 3 the value 853 * should be 6. 854 */ 855 for (i = 0; i < num_planes; i++) { 856 u32 tile_w, tile, x_off, pix_per_tile; 857 858 if (fb->format->format == DRM_FORMAT_P030) { 859 /* 860 * Spec says: bits [31:4] of the given address 861 * should point to the 128-bit word containing 862 * the desired starting pixel, and bits[3:0] 863 * should be between 0 and 11, indicating which 864 * of the 12-pixels in that 128-bit word is the 865 * first pixel to be used 866 */ 867 u32 remaining_pixels = vc4_state->src_x % 96; 868 u32 aligned = remaining_pixels / 12; 869 u32 last_bits = remaining_pixels % 12; 870 871 x_off = aligned * 16 + last_bits; 872 tile_w = 128; 873 pix_per_tile = 96; 874 } else { 875 switch (base_format_mod) { 876 case DRM_FORMAT_MOD_BROADCOM_SAND64: 877 tile_w = 64; 878 break; 879 case DRM_FORMAT_MOD_BROADCOM_SAND128: 880 tile_w = 128; 881 break; 882 case DRM_FORMAT_MOD_BROADCOM_SAND256: 883 tile_w = 256; 884 break; 885 default: 886 return -EINVAL; 887 } 888 pix_per_tile = tile_w / fb->format->cpp[0]; 889 x_off = (vc4_state->src_x % pix_per_tile) / 890 (i ? h_subsample : 1) * 891 fb->format->cpp[i]; 892 } 893 894 tile = vc4_state->src_x / pix_per_tile; 895 896 vc4_state->offsets[i] += param * tile_w * tile; 897 vc4_state->offsets[i] += src_y / 898 (i ? v_subsample : 1) * 899 tile_w; 900 vc4_state->offsets[i] += x_off & ~(i ? 1 : 0); 901 } 902 903 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT); 904 break; 905 } 906 907 default: 908 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 909 (long long)fb->modifier); 910 return -EINVAL; 911 } 912 913 /* Don't waste cycles mixing with plane alpha if the set alpha 914 * is opaque or there is no per-pixel alpha information. 915 * In any case we use the alpha property value as the fixed alpha. 916 */ 917 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 918 fb->format->has_alpha; 919 920 if (!vc4->hvs->hvs5) { 921 /* Control word */ 922 vc4_dlist_write(vc4_state, 923 SCALER_CTL0_VALID | 924 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) | 925 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) | 926 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) | 927 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 928 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 929 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 930 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 931 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 932 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 933 934 /* Position Word 0: Image Positions and Alpha Value */ 935 vc4_state->pos0_offset = vc4_state->dlist_count; 936 vc4_dlist_write(vc4_state, 937 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | 938 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | 939 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); 940 941 /* Position Word 1: Scaled Image Dimensions. */ 942 if (!vc4_state->is_unity) { 943 vc4_dlist_write(vc4_state, 944 VC4_SET_FIELD(vc4_state->crtc_w, 945 SCALER_POS1_SCL_WIDTH) | 946 VC4_SET_FIELD(vc4_state->crtc_h, 947 SCALER_POS1_SCL_HEIGHT)); 948 } 949 950 /* Position Word 2: Source Image Size, Alpha */ 951 vc4_state->pos2_offset = vc4_state->dlist_count; 952 vc4_dlist_write(vc4_state, 953 VC4_SET_FIELD(fb->format->has_alpha ? 954 SCALER_POS2_ALPHA_MODE_PIPELINE : 955 SCALER_POS2_ALPHA_MODE_FIXED, 956 SCALER_POS2_ALPHA_MODE) | 957 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | 958 (fb->format->has_alpha ? 959 SCALER_POS2_ALPHA_PREMULT : 0) | 960 VC4_SET_FIELD(vc4_state->src_w[0], 961 SCALER_POS2_WIDTH) | 962 VC4_SET_FIELD(vc4_state->src_h[0], 963 SCALER_POS2_HEIGHT)); 964 965 /* Position Word 3: Context. Written by the HVS. */ 966 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 967 968 } else { 969 u32 hvs_pixel_order = format->pixel_order; 970 971 if (format->pixel_order_hvs5) 972 hvs_pixel_order = format->pixel_order_hvs5; 973 974 /* Control word */ 975 vc4_dlist_write(vc4_state, 976 SCALER_CTL0_VALID | 977 (hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) | 978 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 979 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 980 (vc4_state->is_unity ? 981 SCALER5_CTL0_UNITY : 0) | 982 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 983 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) | 984 SCALER5_CTL0_ALPHA_EXPAND | 985 SCALER5_CTL0_RGB_EXPAND); 986 987 /* Position Word 0: Image Positions and Alpha Value */ 988 vc4_state->pos0_offset = vc4_state->dlist_count; 989 vc4_dlist_write(vc4_state, 990 (rotation & DRM_MODE_REFLECT_Y ? 991 SCALER5_POS0_VFLIP : 0) | 992 VC4_SET_FIELD(vc4_state->crtc_x, 993 SCALER_POS0_START_X) | 994 (rotation & DRM_MODE_REFLECT_X ? 995 SCALER5_POS0_HFLIP : 0) | 996 VC4_SET_FIELD(vc4_state->crtc_y, 997 SCALER5_POS0_START_Y) 998 ); 999 1000 /* Control Word 2 */ 1001 vc4_dlist_write(vc4_state, 1002 VC4_SET_FIELD(state->alpha >> 4, 1003 SCALER5_CTL2_ALPHA) | 1004 (fb->format->has_alpha ? 1005 SCALER5_CTL2_ALPHA_PREMULT : 0) | 1006 (mix_plane_alpha ? 1007 SCALER5_CTL2_ALPHA_MIX : 0) | 1008 VC4_SET_FIELD(fb->format->has_alpha ? 1009 SCALER5_CTL2_ALPHA_MODE_PIPELINE : 1010 SCALER5_CTL2_ALPHA_MODE_FIXED, 1011 SCALER5_CTL2_ALPHA_MODE) 1012 ); 1013 1014 /* Position Word 1: Scaled Image Dimensions. */ 1015 if (!vc4_state->is_unity) { 1016 vc4_dlist_write(vc4_state, 1017 VC4_SET_FIELD(vc4_state->crtc_w, 1018 SCALER5_POS1_SCL_WIDTH) | 1019 VC4_SET_FIELD(vc4_state->crtc_h, 1020 SCALER5_POS1_SCL_HEIGHT)); 1021 } 1022 1023 /* Position Word 2: Source Image Size */ 1024 vc4_state->pos2_offset = vc4_state->dlist_count; 1025 vc4_dlist_write(vc4_state, 1026 VC4_SET_FIELD(vc4_state->src_w[0], 1027 SCALER5_POS2_WIDTH) | 1028 VC4_SET_FIELD(vc4_state->src_h[0], 1029 SCALER5_POS2_HEIGHT)); 1030 1031 /* Position Word 3: Context. Written by the HVS. */ 1032 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1033 } 1034 1035 1036 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers 1037 * 1038 * The pointers may be any byte address. 1039 */ 1040 vc4_state->ptr0_offset = vc4_state->dlist_count; 1041 for (i = 0; i < num_planes; i++) 1042 vc4_dlist_write(vc4_state, vc4_state->offsets[i]); 1043 1044 /* Pointer Context Word 0/1/2: Written by the HVS */ 1045 for (i = 0; i < num_planes; i++) 1046 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1047 1048 /* Pitch word 0 */ 1049 vc4_dlist_write(vc4_state, pitch0); 1050 1051 /* Pitch word 1/2 */ 1052 for (i = 1; i < num_planes; i++) { 1053 if (hvs_format != HVS_PIXEL_FORMAT_H264 && 1054 hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) { 1055 vc4_dlist_write(vc4_state, 1056 VC4_SET_FIELD(fb->pitches[i], 1057 SCALER_SRC_PITCH)); 1058 } else { 1059 vc4_dlist_write(vc4_state, pitch0); 1060 } 1061 } 1062 1063 /* Colorspace conversion words */ 1064 if (vc4_state->is_yuv) { 1065 enum drm_color_encoding color_encoding = state->color_encoding; 1066 enum drm_color_range color_range = state->color_range; 1067 const u32 *ccm; 1068 1069 if (color_encoding >= DRM_COLOR_ENCODING_MAX) 1070 color_encoding = DRM_COLOR_YCBCR_BT601; 1071 if (color_range >= DRM_COLOR_RANGE_MAX) 1072 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; 1073 1074 ccm = colorspace_coeffs[color_range][color_encoding]; 1075 1076 vc4_dlist_write(vc4_state, ccm[0]); 1077 vc4_dlist_write(vc4_state, ccm[1]); 1078 vc4_dlist_write(vc4_state, ccm[2]); 1079 } 1080 1081 vc4_state->lbm_offset = 0; 1082 1083 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 1084 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 1085 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1086 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1087 /* Reserve a slot for the LBM Base Address. The real value will 1088 * be set when calling vc4_plane_allocate_lbm(). 1089 */ 1090 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1091 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1092 vc4_state->lbm_offset = vc4_state->dlist_count; 1093 vc4_dlist_counter_increment(vc4_state); 1094 } 1095 1096 if (num_planes > 1) { 1097 /* Emit Cb/Cr as channel 0 and Y as channel 1098 * 1. This matches how we set up scl0/scl1 1099 * above. 1100 */ 1101 vc4_write_scaling_parameters(state, 1); 1102 } 1103 vc4_write_scaling_parameters(state, 0); 1104 1105 /* If any PPF setup was done, then all the kernel 1106 * pointers get uploaded. 1107 */ 1108 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 1109 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 1110 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 1111 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 1112 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 1113 SCALER_PPF_KERNEL_OFFSET); 1114 1115 /* HPPF plane 0 */ 1116 vc4_dlist_write(vc4_state, kernel); 1117 /* VPPF plane 0 */ 1118 vc4_dlist_write(vc4_state, kernel); 1119 /* HPPF plane 1 */ 1120 vc4_dlist_write(vc4_state, kernel); 1121 /* VPPF plane 1 */ 1122 vc4_dlist_write(vc4_state, kernel); 1123 } 1124 } 1125 1126 vc4_state->dlist[ctl0_offset] |= 1127 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); 1128 1129 /* crtc_* are already clipped coordinates. */ 1130 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 1131 vc4_state->crtc_w == state->crtc->mode.hdisplay && 1132 vc4_state->crtc_h == state->crtc->mode.vdisplay; 1133 /* Background fill might be necessary when the plane has per-pixel 1134 * alpha content or a non-opaque plane alpha and could blend from the 1135 * background or does not cover the entire screen. 1136 */ 1137 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 1138 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 1139 1140 /* Flag the dlist as initialized to avoid checking it twice in case 1141 * the async update check already called vc4_plane_mode_set() and 1142 * decided to fallback to sync update because async update was not 1143 * possible. 1144 */ 1145 vc4_state->dlist_initialized = 1; 1146 1147 vc4_plane_calc_load(state); 1148 1149 return 0; 1150 } 1151 1152 /* If a modeset involves changing the setup of a plane, the atomic 1153 * infrastructure will call this to validate a proposed plane setup. 1154 * However, if a plane isn't getting updated, this (and the 1155 * corresponding vc4_plane_atomic_update) won't get called. Thus, we 1156 * compute the dlist here and have all active plane dlists get updated 1157 * in the CRTC's flush. 1158 */ 1159 static int vc4_plane_atomic_check(struct drm_plane *plane, 1160 struct drm_atomic_state *state) 1161 { 1162 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 1163 plane); 1164 struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state); 1165 int ret; 1166 1167 vc4_state->dlist_count = 0; 1168 1169 if (!plane_enabled(new_plane_state)) 1170 return 0; 1171 1172 ret = vc4_plane_mode_set(plane, new_plane_state); 1173 if (ret) 1174 return ret; 1175 1176 return vc4_plane_allocate_lbm(new_plane_state); 1177 } 1178 1179 static void vc4_plane_atomic_update(struct drm_plane *plane, 1180 struct drm_atomic_state *state) 1181 { 1182 /* No contents here. Since we don't know where in the CRTC's 1183 * dlist we should be stored, our dlist is uploaded to the 1184 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush 1185 * time. 1186 */ 1187 } 1188 1189 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) 1190 { 1191 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1192 int i; 1193 1194 vc4_state->hw_dlist = dlist; 1195 1196 /* Can't memcpy_toio() because it needs to be 32-bit writes. */ 1197 for (i = 0; i < vc4_state->dlist_count; i++) 1198 writel(vc4_state->dlist[i], &dlist[i]); 1199 1200 return vc4_state->dlist_count; 1201 } 1202 1203 u32 vc4_plane_dlist_size(const struct drm_plane_state *state) 1204 { 1205 const struct vc4_plane_state *vc4_state = 1206 container_of(state, typeof(*vc4_state), base); 1207 1208 return vc4_state->dlist_count; 1209 } 1210 1211 /* Updates the plane to immediately (well, once the FIFO needs 1212 * refilling) scan out from at a new framebuffer. 1213 */ 1214 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) 1215 { 1216 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1217 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 1218 uint32_t addr; 1219 1220 /* We're skipping the address adjustment for negative origin, 1221 * because this is only called on the primary plane. 1222 */ 1223 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); 1224 addr = bo->paddr + fb->offsets[0]; 1225 1226 /* Write the new address into the hardware immediately. The 1227 * scanout will start from this address as soon as the FIFO 1228 * needs to refill with pixels. 1229 */ 1230 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1231 1232 /* Also update the CPU-side dlist copy, so that any later 1233 * atomic updates that don't do a new modeset on our plane 1234 * also use our updated address. 1235 */ 1236 vc4_state->dlist[vc4_state->ptr0_offset] = addr; 1237 } 1238 1239 static void vc4_plane_atomic_async_update(struct drm_plane *plane, 1240 struct drm_atomic_state *state) 1241 { 1242 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 1243 plane); 1244 struct vc4_plane_state *vc4_state, *new_vc4_state; 1245 1246 swap(plane->state->fb, new_plane_state->fb); 1247 plane->state->crtc_x = new_plane_state->crtc_x; 1248 plane->state->crtc_y = new_plane_state->crtc_y; 1249 plane->state->crtc_w = new_plane_state->crtc_w; 1250 plane->state->crtc_h = new_plane_state->crtc_h; 1251 plane->state->src_x = new_plane_state->src_x; 1252 plane->state->src_y = new_plane_state->src_y; 1253 plane->state->src_w = new_plane_state->src_w; 1254 plane->state->src_h = new_plane_state->src_h; 1255 plane->state->alpha = new_plane_state->alpha; 1256 plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode; 1257 plane->state->rotation = new_plane_state->rotation; 1258 plane->state->zpos = new_plane_state->zpos; 1259 plane->state->normalized_zpos = new_plane_state->normalized_zpos; 1260 plane->state->color_encoding = new_plane_state->color_encoding; 1261 plane->state->color_range = new_plane_state->color_range; 1262 plane->state->src = new_plane_state->src; 1263 plane->state->dst = new_plane_state->dst; 1264 plane->state->visible = new_plane_state->visible; 1265 1266 new_vc4_state = to_vc4_plane_state(new_plane_state); 1267 vc4_state = to_vc4_plane_state(plane->state); 1268 1269 vc4_state->crtc_x = new_vc4_state->crtc_x; 1270 vc4_state->crtc_y = new_vc4_state->crtc_y; 1271 vc4_state->crtc_h = new_vc4_state->crtc_h; 1272 vc4_state->crtc_w = new_vc4_state->crtc_w; 1273 vc4_state->src_x = new_vc4_state->src_x; 1274 vc4_state->src_y = new_vc4_state->src_y; 1275 memcpy(vc4_state->src_w, new_vc4_state->src_w, 1276 sizeof(vc4_state->src_w)); 1277 memcpy(vc4_state->src_h, new_vc4_state->src_h, 1278 sizeof(vc4_state->src_h)); 1279 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling, 1280 sizeof(vc4_state->x_scaling)); 1281 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling, 1282 sizeof(vc4_state->y_scaling)); 1283 vc4_state->is_unity = new_vc4_state->is_unity; 1284 vc4_state->is_yuv = new_vc4_state->is_yuv; 1285 memcpy(vc4_state->offsets, new_vc4_state->offsets, 1286 sizeof(vc4_state->offsets)); 1287 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill; 1288 1289 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */ 1290 vc4_state->dlist[vc4_state->pos0_offset] = 1291 new_vc4_state->dlist[vc4_state->pos0_offset]; 1292 vc4_state->dlist[vc4_state->pos2_offset] = 1293 new_vc4_state->dlist[vc4_state->pos2_offset]; 1294 vc4_state->dlist[vc4_state->ptr0_offset] = 1295 new_vc4_state->dlist[vc4_state->ptr0_offset]; 1296 1297 /* Note that we can't just call vc4_plane_write_dlist() 1298 * because that would smash the context data that the HVS is 1299 * currently using. 1300 */ 1301 writel(vc4_state->dlist[vc4_state->pos0_offset], 1302 &vc4_state->hw_dlist[vc4_state->pos0_offset]); 1303 writel(vc4_state->dlist[vc4_state->pos2_offset], 1304 &vc4_state->hw_dlist[vc4_state->pos2_offset]); 1305 writel(vc4_state->dlist[vc4_state->ptr0_offset], 1306 &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1307 } 1308 1309 static int vc4_plane_atomic_async_check(struct drm_plane *plane, 1310 struct drm_atomic_state *state) 1311 { 1312 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 1313 plane); 1314 struct vc4_plane_state *old_vc4_state, *new_vc4_state; 1315 int ret; 1316 u32 i; 1317 1318 ret = vc4_plane_mode_set(plane, new_plane_state); 1319 if (ret) 1320 return ret; 1321 1322 old_vc4_state = to_vc4_plane_state(plane->state); 1323 new_vc4_state = to_vc4_plane_state(new_plane_state); 1324 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || 1325 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || 1326 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || 1327 old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset || 1328 vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state)) 1329 return -EINVAL; 1330 1331 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update 1332 * if anything else has changed, fallback to a sync update. 1333 */ 1334 for (i = 0; i < new_vc4_state->dlist_count; i++) { 1335 if (i == new_vc4_state->pos0_offset || 1336 i == new_vc4_state->pos2_offset || 1337 i == new_vc4_state->ptr0_offset || 1338 (new_vc4_state->lbm_offset && 1339 i == new_vc4_state->lbm_offset)) 1340 continue; 1341 1342 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i]) 1343 return -EINVAL; 1344 } 1345 1346 return 0; 1347 } 1348 1349 static int vc4_prepare_fb(struct drm_plane *plane, 1350 struct drm_plane_state *state) 1351 { 1352 struct vc4_bo *bo; 1353 int ret; 1354 1355 if (!state->fb) 1356 return 0; 1357 1358 bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); 1359 1360 drm_gem_plane_helper_prepare_fb(plane, state); 1361 1362 if (plane->state->fb == state->fb) 1363 return 0; 1364 1365 ret = vc4_bo_inc_usecnt(bo); 1366 if (ret) 1367 return ret; 1368 1369 return 0; 1370 } 1371 1372 static void vc4_cleanup_fb(struct drm_plane *plane, 1373 struct drm_plane_state *state) 1374 { 1375 struct vc4_bo *bo; 1376 1377 if (plane->state->fb == state->fb || !state->fb) 1378 return; 1379 1380 bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); 1381 vc4_bo_dec_usecnt(bo); 1382 } 1383 1384 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { 1385 .atomic_check = vc4_plane_atomic_check, 1386 .atomic_update = vc4_plane_atomic_update, 1387 .prepare_fb = vc4_prepare_fb, 1388 .cleanup_fb = vc4_cleanup_fb, 1389 .atomic_async_check = vc4_plane_atomic_async_check, 1390 .atomic_async_update = vc4_plane_atomic_async_update, 1391 }; 1392 1393 static bool vc4_format_mod_supported(struct drm_plane *plane, 1394 uint32_t format, 1395 uint64_t modifier) 1396 { 1397 /* Support T_TILING for RGB formats only. */ 1398 switch (format) { 1399 case DRM_FORMAT_XRGB8888: 1400 case DRM_FORMAT_ARGB8888: 1401 case DRM_FORMAT_ABGR8888: 1402 case DRM_FORMAT_XBGR8888: 1403 case DRM_FORMAT_RGB565: 1404 case DRM_FORMAT_BGR565: 1405 case DRM_FORMAT_ARGB1555: 1406 case DRM_FORMAT_XRGB1555: 1407 switch (fourcc_mod_broadcom_mod(modifier)) { 1408 case DRM_FORMAT_MOD_LINEAR: 1409 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: 1410 return true; 1411 default: 1412 return false; 1413 } 1414 case DRM_FORMAT_NV12: 1415 case DRM_FORMAT_NV21: 1416 switch (fourcc_mod_broadcom_mod(modifier)) { 1417 case DRM_FORMAT_MOD_LINEAR: 1418 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1419 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1420 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1421 return true; 1422 default: 1423 return false; 1424 } 1425 case DRM_FORMAT_P030: 1426 switch (fourcc_mod_broadcom_mod(modifier)) { 1427 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1428 return true; 1429 default: 1430 return false; 1431 } 1432 case DRM_FORMAT_RGBX1010102: 1433 case DRM_FORMAT_BGRX1010102: 1434 case DRM_FORMAT_RGBA1010102: 1435 case DRM_FORMAT_BGRA1010102: 1436 case DRM_FORMAT_YUV422: 1437 case DRM_FORMAT_YVU422: 1438 case DRM_FORMAT_YUV420: 1439 case DRM_FORMAT_YVU420: 1440 case DRM_FORMAT_NV16: 1441 case DRM_FORMAT_NV61: 1442 default: 1443 return (modifier == DRM_FORMAT_MOD_LINEAR); 1444 } 1445 } 1446 1447 static const struct drm_plane_funcs vc4_plane_funcs = { 1448 .update_plane = drm_atomic_helper_update_plane, 1449 .disable_plane = drm_atomic_helper_disable_plane, 1450 .destroy = drm_plane_cleanup, 1451 .set_property = NULL, 1452 .reset = vc4_plane_reset, 1453 .atomic_duplicate_state = vc4_plane_duplicate_state, 1454 .atomic_destroy_state = vc4_plane_destroy_state, 1455 .format_mod_supported = vc4_format_mod_supported, 1456 }; 1457 1458 struct drm_plane *vc4_plane_init(struct drm_device *dev, 1459 enum drm_plane_type type) 1460 { 1461 struct drm_plane *plane = NULL; 1462 struct vc4_plane *vc4_plane; 1463 u32 formats[ARRAY_SIZE(hvs_formats)]; 1464 int num_formats = 0; 1465 int ret = 0; 1466 unsigned i; 1467 bool hvs5 = of_device_is_compatible(dev->dev->of_node, 1468 "brcm,bcm2711-vc5"); 1469 static const uint64_t modifiers[] = { 1470 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, 1471 DRM_FORMAT_MOD_BROADCOM_SAND128, 1472 DRM_FORMAT_MOD_BROADCOM_SAND64, 1473 DRM_FORMAT_MOD_BROADCOM_SAND256, 1474 DRM_FORMAT_MOD_LINEAR, 1475 DRM_FORMAT_MOD_INVALID 1476 }; 1477 1478 vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane), 1479 GFP_KERNEL); 1480 if (!vc4_plane) 1481 return ERR_PTR(-ENOMEM); 1482 1483 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 1484 if (!hvs_formats[i].hvs5_only || hvs5) { 1485 formats[num_formats] = hvs_formats[i].drm; 1486 num_formats++; 1487 } 1488 } 1489 1490 plane = &vc4_plane->base; 1491 ret = drm_universal_plane_init(dev, plane, 0, 1492 &vc4_plane_funcs, 1493 formats, num_formats, 1494 modifiers, type, NULL); 1495 if (ret) 1496 return ERR_PTR(ret); 1497 1498 drm_plane_helper_add(plane, &vc4_plane_helper_funcs); 1499 1500 drm_plane_create_alpha_property(plane); 1501 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, 1502 DRM_MODE_ROTATE_0 | 1503 DRM_MODE_ROTATE_180 | 1504 DRM_MODE_REFLECT_X | 1505 DRM_MODE_REFLECT_Y); 1506 1507 drm_plane_create_color_properties(plane, 1508 BIT(DRM_COLOR_YCBCR_BT601) | 1509 BIT(DRM_COLOR_YCBCR_BT709) | 1510 BIT(DRM_COLOR_YCBCR_BT2020), 1511 BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | 1512 BIT(DRM_COLOR_YCBCR_FULL_RANGE), 1513 DRM_COLOR_YCBCR_BT709, 1514 DRM_COLOR_YCBCR_LIMITED_RANGE); 1515 1516 return plane; 1517 } 1518 1519 int vc4_plane_create_additional_planes(struct drm_device *drm) 1520 { 1521 struct drm_plane *cursor_plane; 1522 struct drm_crtc *crtc; 1523 unsigned int i; 1524 1525 /* Set up some arbitrary number of planes. We're not limited 1526 * by a set number of physical registers, just the space in 1527 * the HVS (16k) and how small an plane can be (28 bytes). 1528 * However, each plane we set up takes up some memory, and 1529 * increases the cost of looping over planes, which atomic 1530 * modesetting does quite a bit. As a result, we pick a 1531 * modest number of planes to expose, that should hopefully 1532 * still cover any sane usecase. 1533 */ 1534 for (i = 0; i < 16; i++) { 1535 struct drm_plane *plane = 1536 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY); 1537 1538 if (IS_ERR(plane)) 1539 continue; 1540 1541 plane->possible_crtcs = 1542 GENMASK(drm->mode_config.num_crtc - 1, 0); 1543 } 1544 1545 drm_for_each_crtc(crtc, drm) { 1546 /* Set up the legacy cursor after overlay initialization, 1547 * since we overlay planes on the CRTC in the order they were 1548 * initialized. 1549 */ 1550 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); 1551 if (!IS_ERR(cursor_plane)) { 1552 cursor_plane->possible_crtcs = drm_crtc_mask(crtc); 1553 crtc->cursor = cursor_plane; 1554 } 1555 } 1556 1557 return 0; 1558 } 1559