1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 6 /** 7 * DOC: VC4 plane module 8 * 9 * Each DRM plane is a layer of pixels being scanned out by the HVS. 10 * 11 * At atomic modeset check time, we compute the HVS display element 12 * state that would be necessary for displaying the plane (giving us a 13 * chance to figure out if a plane configuration is invalid), then at 14 * atomic flush time the CRTC will ask us to write our element state 15 * into the region of the HVS that it has allocated for us. 16 */ 17 18 #include <drm/drm_atomic.h> 19 #include <drm/drm_atomic_helper.h> 20 #include <drm/drm_atomic_uapi.h> 21 #include <drm/drm_fb_cma_helper.h> 22 #include <drm/drm_fourcc.h> 23 #include <drm/drm_gem_atomic_helper.h> 24 #include <drm/drm_plane_helper.h> 25 26 #include "uapi/drm/vc4_drm.h" 27 28 #include "vc4_drv.h" 29 #include "vc4_regs.h" 30 31 static const struct hvs_format { 32 u32 drm; /* DRM_FORMAT_* */ 33 u32 hvs; /* HVS_FORMAT_* */ 34 u32 pixel_order; 35 u32 pixel_order_hvs5; 36 } hvs_formats[] = { 37 { 38 .drm = DRM_FORMAT_XRGB8888, 39 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 40 .pixel_order = HVS_PIXEL_ORDER_ABGR, 41 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 42 }, 43 { 44 .drm = DRM_FORMAT_ARGB8888, 45 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 46 .pixel_order = HVS_PIXEL_ORDER_ABGR, 47 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 48 }, 49 { 50 .drm = DRM_FORMAT_ABGR8888, 51 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 52 .pixel_order = HVS_PIXEL_ORDER_ARGB, 53 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 54 }, 55 { 56 .drm = DRM_FORMAT_XBGR8888, 57 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 58 .pixel_order = HVS_PIXEL_ORDER_ARGB, 59 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 60 }, 61 { 62 .drm = DRM_FORMAT_RGB565, 63 .hvs = HVS_PIXEL_FORMAT_RGB565, 64 .pixel_order = HVS_PIXEL_ORDER_XRGB, 65 }, 66 { 67 .drm = DRM_FORMAT_BGR565, 68 .hvs = HVS_PIXEL_FORMAT_RGB565, 69 .pixel_order = HVS_PIXEL_ORDER_XBGR, 70 }, 71 { 72 .drm = DRM_FORMAT_ARGB1555, 73 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 74 .pixel_order = HVS_PIXEL_ORDER_ABGR, 75 }, 76 { 77 .drm = DRM_FORMAT_XRGB1555, 78 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 79 .pixel_order = HVS_PIXEL_ORDER_ABGR, 80 }, 81 { 82 .drm = DRM_FORMAT_RGB888, 83 .hvs = HVS_PIXEL_FORMAT_RGB888, 84 .pixel_order = HVS_PIXEL_ORDER_XRGB, 85 }, 86 { 87 .drm = DRM_FORMAT_BGR888, 88 .hvs = HVS_PIXEL_FORMAT_RGB888, 89 .pixel_order = HVS_PIXEL_ORDER_XBGR, 90 }, 91 { 92 .drm = DRM_FORMAT_YUV422, 93 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 94 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 95 }, 96 { 97 .drm = DRM_FORMAT_YVU422, 98 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 99 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 100 }, 101 { 102 .drm = DRM_FORMAT_YUV420, 103 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 104 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 105 }, 106 { 107 .drm = DRM_FORMAT_YVU420, 108 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 109 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 110 }, 111 { 112 .drm = DRM_FORMAT_NV12, 113 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 114 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 115 }, 116 { 117 .drm = DRM_FORMAT_NV21, 118 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 119 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 120 }, 121 { 122 .drm = DRM_FORMAT_NV16, 123 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 124 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 125 }, 126 { 127 .drm = DRM_FORMAT_NV61, 128 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 129 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 130 }, 131 }; 132 133 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) 134 { 135 unsigned i; 136 137 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 138 if (hvs_formats[i].drm == drm_format) 139 return &hvs_formats[i]; 140 } 141 142 return NULL; 143 } 144 145 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) 146 { 147 if (dst == src) 148 return VC4_SCALING_NONE; 149 if (3 * dst >= 2 * src) 150 return VC4_SCALING_PPF; 151 else 152 return VC4_SCALING_TPZ; 153 } 154 155 static bool plane_enabled(struct drm_plane_state *state) 156 { 157 return state->fb && !WARN_ON(!state->crtc); 158 } 159 160 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) 161 { 162 struct vc4_plane_state *vc4_state; 163 164 if (WARN_ON(!plane->state)) 165 return NULL; 166 167 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL); 168 if (!vc4_state) 169 return NULL; 170 171 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); 172 vc4_state->dlist_initialized = 0; 173 174 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); 175 176 if (vc4_state->dlist) { 177 vc4_state->dlist = kmemdup(vc4_state->dlist, 178 vc4_state->dlist_count * 4, 179 GFP_KERNEL); 180 if (!vc4_state->dlist) { 181 kfree(vc4_state); 182 return NULL; 183 } 184 vc4_state->dlist_size = vc4_state->dlist_count; 185 } 186 187 return &vc4_state->base; 188 } 189 190 static void vc4_plane_destroy_state(struct drm_plane *plane, 191 struct drm_plane_state *state) 192 { 193 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 194 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 195 196 if (drm_mm_node_allocated(&vc4_state->lbm)) { 197 unsigned long irqflags; 198 199 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 200 drm_mm_remove_node(&vc4_state->lbm); 201 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 202 } 203 204 kfree(vc4_state->dlist); 205 __drm_atomic_helper_plane_destroy_state(&vc4_state->base); 206 kfree(state); 207 } 208 209 /* Called during init to allocate the plane's atomic state. */ 210 static void vc4_plane_reset(struct drm_plane *plane) 211 { 212 struct vc4_plane_state *vc4_state; 213 214 WARN_ON(plane->state); 215 216 vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); 217 if (!vc4_state) 218 return; 219 220 __drm_atomic_helper_plane_reset(plane, &vc4_state->base); 221 } 222 223 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state) 224 { 225 if (vc4_state->dlist_count == vc4_state->dlist_size) { 226 u32 new_size = max(4u, vc4_state->dlist_count * 2); 227 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL); 228 229 if (!new_dlist) 230 return; 231 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4); 232 233 kfree(vc4_state->dlist); 234 vc4_state->dlist = new_dlist; 235 vc4_state->dlist_size = new_size; 236 } 237 238 vc4_state->dlist_count++; 239 } 240 241 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) 242 { 243 unsigned int idx = vc4_state->dlist_count; 244 245 vc4_dlist_counter_increment(vc4_state); 246 vc4_state->dlist[idx] = val; 247 } 248 249 /* Returns the scl0/scl1 field based on whether the dimensions need to 250 * be up/down/non-scaled. 251 * 252 * This is a replication of a table from the spec. 253 */ 254 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) 255 { 256 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 257 258 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { 259 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: 260 return SCALER_CTL0_SCL_H_PPF_V_PPF; 261 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: 262 return SCALER_CTL0_SCL_H_TPZ_V_PPF; 263 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: 264 return SCALER_CTL0_SCL_H_PPF_V_TPZ; 265 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: 266 return SCALER_CTL0_SCL_H_TPZ_V_TPZ; 267 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: 268 return SCALER_CTL0_SCL_H_PPF_V_NONE; 269 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: 270 return SCALER_CTL0_SCL_H_NONE_V_PPF; 271 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: 272 return SCALER_CTL0_SCL_H_NONE_V_TPZ; 273 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: 274 return SCALER_CTL0_SCL_H_TPZ_V_NONE; 275 default: 276 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: 277 /* The unity case is independently handled by 278 * SCALER_CTL0_UNITY. 279 */ 280 return 0; 281 } 282 } 283 284 static int vc4_plane_margins_adj(struct drm_plane_state *pstate) 285 { 286 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate); 287 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay; 288 struct drm_crtc_state *crtc_state; 289 290 crtc_state = drm_atomic_get_new_crtc_state(pstate->state, 291 pstate->crtc); 292 293 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom); 294 if (!left && !right && !top && !bottom) 295 return 0; 296 297 if (left + right >= crtc_state->mode.hdisplay || 298 top + bottom >= crtc_state->mode.vdisplay) 299 return -EINVAL; 300 301 adjhdisplay = crtc_state->mode.hdisplay - (left + right); 302 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x * 303 adjhdisplay, 304 crtc_state->mode.hdisplay); 305 vc4_pstate->crtc_x += left; 306 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - left) 307 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - left; 308 309 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom); 310 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y * 311 adjvdisplay, 312 crtc_state->mode.vdisplay); 313 vc4_pstate->crtc_y += top; 314 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - top) 315 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - top; 316 317 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w * 318 adjhdisplay, 319 crtc_state->mode.hdisplay); 320 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h * 321 adjvdisplay, 322 crtc_state->mode.vdisplay); 323 324 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h) 325 return -EINVAL; 326 327 return 0; 328 } 329 330 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) 331 { 332 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 333 struct drm_framebuffer *fb = state->fb; 334 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 335 u32 subpixel_src_mask = (1 << 16) - 1; 336 int num_planes = fb->format->num_planes; 337 struct drm_crtc_state *crtc_state; 338 u32 h_subsample = fb->format->hsub; 339 u32 v_subsample = fb->format->vsub; 340 int i, ret; 341 342 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 343 state->crtc); 344 if (!crtc_state) { 345 DRM_DEBUG_KMS("Invalid crtc state\n"); 346 return -EINVAL; 347 } 348 349 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1, 350 INT_MAX, true, true); 351 if (ret) 352 return ret; 353 354 for (i = 0; i < num_planes; i++) 355 vc4_state->offsets[i] = bo->paddr + fb->offsets[i]; 356 357 /* We don't support subpixel source positioning for scaling. */ 358 if ((state->src.x1 & subpixel_src_mask) || 359 (state->src.x2 & subpixel_src_mask) || 360 (state->src.y1 & subpixel_src_mask) || 361 (state->src.y2 & subpixel_src_mask)) { 362 return -EINVAL; 363 } 364 365 vc4_state->src_x = state->src.x1 >> 16; 366 vc4_state->src_y = state->src.y1 >> 16; 367 vc4_state->src_w[0] = (state->src.x2 - state->src.x1) >> 16; 368 vc4_state->src_h[0] = (state->src.y2 - state->src.y1) >> 16; 369 370 vc4_state->crtc_x = state->dst.x1; 371 vc4_state->crtc_y = state->dst.y1; 372 vc4_state->crtc_w = state->dst.x2 - state->dst.x1; 373 vc4_state->crtc_h = state->dst.y2 - state->dst.y1; 374 375 ret = vc4_plane_margins_adj(state); 376 if (ret) 377 return ret; 378 379 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], 380 vc4_state->crtc_w); 381 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], 382 vc4_state->crtc_h); 383 384 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 385 vc4_state->y_scaling[0] == VC4_SCALING_NONE); 386 387 if (num_planes > 1) { 388 vc4_state->is_yuv = true; 389 390 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; 391 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; 392 393 vc4_state->x_scaling[1] = 394 vc4_get_scaling_mode(vc4_state->src_w[1], 395 vc4_state->crtc_w); 396 vc4_state->y_scaling[1] = 397 vc4_get_scaling_mode(vc4_state->src_h[1], 398 vc4_state->crtc_h); 399 400 /* YUV conversion requires that horizontal scaling be enabled 401 * on the UV plane even if vc4_get_scaling_mode() returned 402 * VC4_SCALING_NONE (which can happen when the down-scaling 403 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this 404 * case. 405 */ 406 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE) 407 vc4_state->x_scaling[1] = VC4_SCALING_PPF; 408 } else { 409 vc4_state->is_yuv = false; 410 vc4_state->x_scaling[1] = VC4_SCALING_NONE; 411 vc4_state->y_scaling[1] = VC4_SCALING_NONE; 412 } 413 414 return 0; 415 } 416 417 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 418 { 419 u32 scale, recip; 420 421 scale = (1 << 16) * src / dst; 422 423 /* The specs note that while the reciprocal would be defined 424 * as (1<<32)/scale, ~0 is close enough. 425 */ 426 recip = ~0 / scale; 427 428 vc4_dlist_write(vc4_state, 429 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | 430 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); 431 vc4_dlist_write(vc4_state, 432 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); 433 } 434 435 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 436 { 437 u32 scale = (1 << 16) * src / dst; 438 439 vc4_dlist_write(vc4_state, 440 SCALER_PPF_AGC | 441 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | 442 VC4_SET_FIELD(0, SCALER_PPF_IPHASE)); 443 } 444 445 static u32 vc4_lbm_size(struct drm_plane_state *state) 446 { 447 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 448 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 449 u32 pix_per_line; 450 u32 lbm; 451 452 /* LBM is not needed when there's no vertical scaling. */ 453 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 454 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 455 return 0; 456 457 /* 458 * This can be further optimized in the RGB/YUV444 case if the PPF 459 * decimation factor is between 0.5 and 1.0 by using crtc_w. 460 * 461 * It's not an issue though, since in that case since src_w[0] is going 462 * to be greater than or equal to crtc_w. 463 */ 464 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ) 465 pix_per_line = vc4_state->crtc_w; 466 else 467 pix_per_line = vc4_state->src_w[0]; 468 469 if (!vc4_state->is_yuv) { 470 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) 471 lbm = pix_per_line * 8; 472 else { 473 /* In special cases, this multiplier might be 12. */ 474 lbm = pix_per_line * 16; 475 } 476 } else { 477 /* There are cases for this going down to a multiplier 478 * of 2, but according to the firmware source, the 479 * table in the docs is somewhat wrong. 480 */ 481 lbm = pix_per_line * 16; 482 } 483 484 /* Align it to 64 or 128 (hvs5) bytes */ 485 lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64); 486 487 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ 488 lbm /= vc4->hvs->hvs5 ? 4 : 2; 489 490 return lbm; 491 } 492 493 static void vc4_write_scaling_parameters(struct drm_plane_state *state, 494 int channel) 495 { 496 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 497 498 /* Ch0 H-PPF Word 0: Scaling Parameters */ 499 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { 500 vc4_write_ppf(vc4_state, 501 vc4_state->src_w[channel], vc4_state->crtc_w); 502 } 503 504 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ 505 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { 506 vc4_write_ppf(vc4_state, 507 vc4_state->src_h[channel], vc4_state->crtc_h); 508 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 509 } 510 511 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ 512 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { 513 vc4_write_tpz(vc4_state, 514 vc4_state->src_w[channel], vc4_state->crtc_w); 515 } 516 517 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ 518 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { 519 vc4_write_tpz(vc4_state, 520 vc4_state->src_h[channel], vc4_state->crtc_h); 521 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 522 } 523 } 524 525 static void vc4_plane_calc_load(struct drm_plane_state *state) 526 { 527 unsigned int hvs_load_shift, vrefresh, i; 528 struct drm_framebuffer *fb = state->fb; 529 struct vc4_plane_state *vc4_state; 530 struct drm_crtc_state *crtc_state; 531 unsigned int vscale_factor; 532 533 vc4_state = to_vc4_plane_state(state); 534 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 535 state->crtc); 536 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode); 537 538 /* The HVS is able to process 2 pixels/cycle when scaling the source, 539 * 4 pixels/cycle otherwise. 540 * Alpha blending step seems to be pipelined and it's always operating 541 * at 4 pixels/cycle, so the limiting aspect here seems to be the 542 * scaler block. 543 * HVS load is expressed in clk-cycles/sec (AKA Hz). 544 */ 545 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 546 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 547 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 548 vc4_state->y_scaling[1] != VC4_SCALING_NONE) 549 hvs_load_shift = 1; 550 else 551 hvs_load_shift = 2; 552 553 vc4_state->membus_load = 0; 554 vc4_state->hvs_load = 0; 555 for (i = 0; i < fb->format->num_planes; i++) { 556 /* Even if the bandwidth/plane required for a single frame is 557 * 558 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh 559 * 560 * when downscaling, we have to read more pixels per line in 561 * the time frame reserved for a single line, so the bandwidth 562 * demand can be punctually higher. To account for that, we 563 * calculate the down-scaling factor and multiply the plane 564 * load by this number. We're likely over-estimating the read 565 * demand, but that's better than under-estimating it. 566 */ 567 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i], 568 vc4_state->crtc_h); 569 vc4_state->membus_load += vc4_state->src_w[i] * 570 vc4_state->src_h[i] * vscale_factor * 571 fb->format->cpp[i]; 572 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; 573 } 574 575 vc4_state->hvs_load *= vrefresh; 576 vc4_state->hvs_load >>= hvs_load_shift; 577 vc4_state->membus_load *= vrefresh; 578 } 579 580 static int vc4_plane_allocate_lbm(struct drm_plane_state *state) 581 { 582 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 583 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 584 unsigned long irqflags; 585 u32 lbm_size; 586 587 lbm_size = vc4_lbm_size(state); 588 if (!lbm_size) 589 return 0; 590 591 if (WARN_ON(!vc4_state->lbm_offset)) 592 return -EINVAL; 593 594 /* Allocate the LBM memory that the HVS will use for temporary 595 * storage due to our scaling/format conversion. 596 */ 597 if (!drm_mm_node_allocated(&vc4_state->lbm)) { 598 int ret; 599 600 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 601 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, 602 &vc4_state->lbm, 603 lbm_size, 604 vc4->hvs->hvs5 ? 64 : 32, 605 0, 0); 606 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 607 608 if (ret) 609 return ret; 610 } else { 611 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); 612 } 613 614 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start; 615 616 return 0; 617 } 618 619 /* Writes out a full display list for an active plane to the plane's 620 * private dlist state. 621 */ 622 static int vc4_plane_mode_set(struct drm_plane *plane, 623 struct drm_plane_state *state) 624 { 625 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 626 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 627 struct drm_framebuffer *fb = state->fb; 628 u32 ctl0_offset = vc4_state->dlist_count; 629 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 630 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 631 int num_planes = fb->format->num_planes; 632 u32 h_subsample = fb->format->hsub; 633 u32 v_subsample = fb->format->vsub; 634 bool mix_plane_alpha; 635 bool covers_screen; 636 u32 scl0, scl1, pitch0; 637 u32 tiling, src_y; 638 u32 hvs_format = format->hvs; 639 unsigned int rotation; 640 int ret, i; 641 642 if (vc4_state->dlist_initialized) 643 return 0; 644 645 ret = vc4_plane_setup_clipping_and_scaling(state); 646 if (ret) 647 return ret; 648 649 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 650 * and 4:4:4, scl1 should be set to scl0 so both channels of 651 * the scaler do the same thing. For YUV, the Y plane needs 652 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 653 * the scl fields here. 654 */ 655 if (num_planes == 1) { 656 scl0 = vc4_get_scl_field(state, 0); 657 scl1 = scl0; 658 } else { 659 scl0 = vc4_get_scl_field(state, 1); 660 scl1 = vc4_get_scl_field(state, 0); 661 } 662 663 rotation = drm_rotation_simplify(state->rotation, 664 DRM_MODE_ROTATE_0 | 665 DRM_MODE_REFLECT_X | 666 DRM_MODE_REFLECT_Y); 667 668 /* We must point to the last line when Y reflection is enabled. */ 669 src_y = vc4_state->src_y; 670 if (rotation & DRM_MODE_REFLECT_Y) 671 src_y += vc4_state->src_h[0] - 1; 672 673 switch (base_format_mod) { 674 case DRM_FORMAT_MOD_LINEAR: 675 tiling = SCALER_CTL0_TILING_LINEAR; 676 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); 677 678 /* Adjust the base pointer to the first pixel to be scanned 679 * out. 680 */ 681 for (i = 0; i < num_planes; i++) { 682 vc4_state->offsets[i] += src_y / 683 (i ? v_subsample : 1) * 684 fb->pitches[i]; 685 686 vc4_state->offsets[i] += vc4_state->src_x / 687 (i ? h_subsample : 1) * 688 fb->format->cpp[i]; 689 } 690 691 break; 692 693 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { 694 u32 tile_size_shift = 12; /* T tiles are 4kb */ 695 /* Whole-tile offsets, mostly for setting the pitch. */ 696 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5; 697 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */ 698 u32 tile_w_mask = (1 << tile_w_shift) - 1; 699 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice 700 * the height (in pixels) of a 4k tile. 701 */ 702 u32 tile_h_mask = (2 << tile_h_shift) - 1; 703 /* For T-tiled, the FB pitch is "how many bytes from one row to 704 * the next, such that 705 * 706 * pitch * tile_h == tile_size * tiles_per_row 707 */ 708 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); 709 u32 tiles_l = vc4_state->src_x >> tile_w_shift; 710 u32 tiles_r = tiles_w - tiles_l; 711 u32 tiles_t = src_y >> tile_h_shift; 712 /* Intra-tile offsets, which modify the base address (the 713 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that 714 * base address). 715 */ 716 u32 tile_y = (src_y >> 4) & 1; 717 u32 subtile_y = (src_y >> 2) & 3; 718 u32 utile_y = src_y & 3; 719 u32 x_off = vc4_state->src_x & tile_w_mask; 720 u32 y_off = src_y & tile_h_mask; 721 722 /* When Y reflection is requested we must set the 723 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines 724 * after the initial one should be fetched in descending order, 725 * which makes sense since we start from the last line and go 726 * backward. 727 * Don't know why we need y_off = max_y_off - y_off, but it's 728 * definitely required (I guess it's also related to the "going 729 * backward" situation). 730 */ 731 if (rotation & DRM_MODE_REFLECT_Y) { 732 y_off = tile_h_mask - y_off; 733 pitch0 = SCALER_PITCH0_TILE_LINE_DIR; 734 } else { 735 pitch0 = 0; 736 } 737 738 tiling = SCALER_CTL0_TILING_256B_OR_T; 739 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) | 740 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) | 741 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) | 742 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R)); 743 vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift); 744 vc4_state->offsets[0] += subtile_y << 8; 745 vc4_state->offsets[0] += utile_y << 4; 746 747 /* Rows of tiles alternate left-to-right and right-to-left. */ 748 if (tiles_t & 1) { 749 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR; 750 vc4_state->offsets[0] += (tiles_w - tiles_l) << 751 tile_size_shift; 752 vc4_state->offsets[0] -= (1 + !tile_y) << 10; 753 } else { 754 vc4_state->offsets[0] += tiles_l << tile_size_shift; 755 vc4_state->offsets[0] += tile_y << 10; 756 } 757 758 break; 759 } 760 761 case DRM_FORMAT_MOD_BROADCOM_SAND64: 762 case DRM_FORMAT_MOD_BROADCOM_SAND128: 763 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 764 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 765 u32 tile_w, tile, x_off, pix_per_tile; 766 767 hvs_format = HVS_PIXEL_FORMAT_H264; 768 769 switch (base_format_mod) { 770 case DRM_FORMAT_MOD_BROADCOM_SAND64: 771 tiling = SCALER_CTL0_TILING_64B; 772 tile_w = 64; 773 break; 774 case DRM_FORMAT_MOD_BROADCOM_SAND128: 775 tiling = SCALER_CTL0_TILING_128B; 776 tile_w = 128; 777 break; 778 case DRM_FORMAT_MOD_BROADCOM_SAND256: 779 tiling = SCALER_CTL0_TILING_256B_OR_T; 780 tile_w = 256; 781 break; 782 default: 783 break; 784 } 785 786 if (param > SCALER_TILE_HEIGHT_MASK) { 787 DRM_DEBUG_KMS("SAND height too large (%d)\n", param); 788 return -EINVAL; 789 } 790 791 pix_per_tile = tile_w / fb->format->cpp[0]; 792 tile = vc4_state->src_x / pix_per_tile; 793 x_off = vc4_state->src_x % pix_per_tile; 794 795 /* Adjust the base pointer to the first pixel to be scanned 796 * out. 797 */ 798 for (i = 0; i < num_planes; i++) { 799 vc4_state->offsets[i] += param * tile_w * tile; 800 vc4_state->offsets[i] += src_y / 801 (i ? v_subsample : 1) * 802 tile_w; 803 vc4_state->offsets[i] += x_off / 804 (i ? h_subsample : 1) * 805 fb->format->cpp[i]; 806 } 807 808 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT); 809 break; 810 } 811 812 default: 813 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 814 (long long)fb->modifier); 815 return -EINVAL; 816 } 817 818 /* Don't waste cycles mixing with plane alpha if the set alpha 819 * is opaque or there is no per-pixel alpha information. 820 * In any case we use the alpha property value as the fixed alpha. 821 */ 822 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 823 fb->format->has_alpha; 824 825 if (!vc4->hvs->hvs5) { 826 /* Control word */ 827 vc4_dlist_write(vc4_state, 828 SCALER_CTL0_VALID | 829 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) | 830 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) | 831 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) | 832 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 833 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 834 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 835 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 836 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 837 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 838 839 /* Position Word 0: Image Positions and Alpha Value */ 840 vc4_state->pos0_offset = vc4_state->dlist_count; 841 vc4_dlist_write(vc4_state, 842 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | 843 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | 844 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); 845 846 /* Position Word 1: Scaled Image Dimensions. */ 847 if (!vc4_state->is_unity) { 848 vc4_dlist_write(vc4_state, 849 VC4_SET_FIELD(vc4_state->crtc_w, 850 SCALER_POS1_SCL_WIDTH) | 851 VC4_SET_FIELD(vc4_state->crtc_h, 852 SCALER_POS1_SCL_HEIGHT)); 853 } 854 855 /* Position Word 2: Source Image Size, Alpha */ 856 vc4_state->pos2_offset = vc4_state->dlist_count; 857 vc4_dlist_write(vc4_state, 858 VC4_SET_FIELD(fb->format->has_alpha ? 859 SCALER_POS2_ALPHA_MODE_PIPELINE : 860 SCALER_POS2_ALPHA_MODE_FIXED, 861 SCALER_POS2_ALPHA_MODE) | 862 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | 863 (fb->format->has_alpha ? 864 SCALER_POS2_ALPHA_PREMULT : 0) | 865 VC4_SET_FIELD(vc4_state->src_w[0], 866 SCALER_POS2_WIDTH) | 867 VC4_SET_FIELD(vc4_state->src_h[0], 868 SCALER_POS2_HEIGHT)); 869 870 /* Position Word 3: Context. Written by the HVS. */ 871 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 872 873 } else { 874 u32 hvs_pixel_order = format->pixel_order; 875 876 if (format->pixel_order_hvs5) 877 hvs_pixel_order = format->pixel_order_hvs5; 878 879 /* Control word */ 880 vc4_dlist_write(vc4_state, 881 SCALER_CTL0_VALID | 882 (hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) | 883 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 884 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 885 (vc4_state->is_unity ? 886 SCALER5_CTL0_UNITY : 0) | 887 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 888 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) | 889 SCALER5_CTL0_ALPHA_EXPAND | 890 SCALER5_CTL0_RGB_EXPAND); 891 892 /* Position Word 0: Image Positions and Alpha Value */ 893 vc4_state->pos0_offset = vc4_state->dlist_count; 894 vc4_dlist_write(vc4_state, 895 (rotation & DRM_MODE_REFLECT_Y ? 896 SCALER5_POS0_VFLIP : 0) | 897 VC4_SET_FIELD(vc4_state->crtc_x, 898 SCALER_POS0_START_X) | 899 (rotation & DRM_MODE_REFLECT_X ? 900 SCALER5_POS0_HFLIP : 0) | 901 VC4_SET_FIELD(vc4_state->crtc_y, 902 SCALER5_POS0_START_Y) 903 ); 904 905 /* Control Word 2 */ 906 vc4_dlist_write(vc4_state, 907 VC4_SET_FIELD(state->alpha >> 4, 908 SCALER5_CTL2_ALPHA) | 909 (fb->format->has_alpha ? 910 SCALER5_CTL2_ALPHA_PREMULT : 0) | 911 (mix_plane_alpha ? 912 SCALER5_CTL2_ALPHA_MIX : 0) | 913 VC4_SET_FIELD(fb->format->has_alpha ? 914 SCALER5_CTL2_ALPHA_MODE_PIPELINE : 915 SCALER5_CTL2_ALPHA_MODE_FIXED, 916 SCALER5_CTL2_ALPHA_MODE) 917 ); 918 919 /* Position Word 1: Scaled Image Dimensions. */ 920 if (!vc4_state->is_unity) { 921 vc4_dlist_write(vc4_state, 922 VC4_SET_FIELD(vc4_state->crtc_w, 923 SCALER5_POS1_SCL_WIDTH) | 924 VC4_SET_FIELD(vc4_state->crtc_h, 925 SCALER5_POS1_SCL_HEIGHT)); 926 } 927 928 /* Position Word 2: Source Image Size */ 929 vc4_state->pos2_offset = vc4_state->dlist_count; 930 vc4_dlist_write(vc4_state, 931 VC4_SET_FIELD(vc4_state->src_w[0], 932 SCALER5_POS2_WIDTH) | 933 VC4_SET_FIELD(vc4_state->src_h[0], 934 SCALER5_POS2_HEIGHT)); 935 936 /* Position Word 3: Context. Written by the HVS. */ 937 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 938 } 939 940 941 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers 942 * 943 * The pointers may be any byte address. 944 */ 945 vc4_state->ptr0_offset = vc4_state->dlist_count; 946 for (i = 0; i < num_planes; i++) 947 vc4_dlist_write(vc4_state, vc4_state->offsets[i]); 948 949 /* Pointer Context Word 0/1/2: Written by the HVS */ 950 for (i = 0; i < num_planes; i++) 951 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 952 953 /* Pitch word 0 */ 954 vc4_dlist_write(vc4_state, pitch0); 955 956 /* Pitch word 1/2 */ 957 for (i = 1; i < num_planes; i++) { 958 if (hvs_format != HVS_PIXEL_FORMAT_H264) { 959 vc4_dlist_write(vc4_state, 960 VC4_SET_FIELD(fb->pitches[i], 961 SCALER_SRC_PITCH)); 962 } else { 963 vc4_dlist_write(vc4_state, pitch0); 964 } 965 } 966 967 /* Colorspace conversion words */ 968 if (vc4_state->is_yuv) { 969 vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5); 970 vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5); 971 vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5); 972 } 973 974 vc4_state->lbm_offset = 0; 975 976 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 977 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 978 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 979 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 980 /* Reserve a slot for the LBM Base Address. The real value will 981 * be set when calling vc4_plane_allocate_lbm(). 982 */ 983 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 984 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 985 vc4_state->lbm_offset = vc4_state->dlist_count; 986 vc4_dlist_counter_increment(vc4_state); 987 } 988 989 if (num_planes > 1) { 990 /* Emit Cb/Cr as channel 0 and Y as channel 991 * 1. This matches how we set up scl0/scl1 992 * above. 993 */ 994 vc4_write_scaling_parameters(state, 1); 995 } 996 vc4_write_scaling_parameters(state, 0); 997 998 /* If any PPF setup was done, then all the kernel 999 * pointers get uploaded. 1000 */ 1001 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 1002 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 1003 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 1004 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 1005 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 1006 SCALER_PPF_KERNEL_OFFSET); 1007 1008 /* HPPF plane 0 */ 1009 vc4_dlist_write(vc4_state, kernel); 1010 /* VPPF plane 0 */ 1011 vc4_dlist_write(vc4_state, kernel); 1012 /* HPPF plane 1 */ 1013 vc4_dlist_write(vc4_state, kernel); 1014 /* VPPF plane 1 */ 1015 vc4_dlist_write(vc4_state, kernel); 1016 } 1017 } 1018 1019 vc4_state->dlist[ctl0_offset] |= 1020 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); 1021 1022 /* crtc_* are already clipped coordinates. */ 1023 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 1024 vc4_state->crtc_w == state->crtc->mode.hdisplay && 1025 vc4_state->crtc_h == state->crtc->mode.vdisplay; 1026 /* Background fill might be necessary when the plane has per-pixel 1027 * alpha content or a non-opaque plane alpha and could blend from the 1028 * background or does not cover the entire screen. 1029 */ 1030 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 1031 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 1032 1033 /* Flag the dlist as initialized to avoid checking it twice in case 1034 * the async update check already called vc4_plane_mode_set() and 1035 * decided to fallback to sync update because async update was not 1036 * possible. 1037 */ 1038 vc4_state->dlist_initialized = 1; 1039 1040 vc4_plane_calc_load(state); 1041 1042 return 0; 1043 } 1044 1045 /* If a modeset involves changing the setup of a plane, the atomic 1046 * infrastructure will call this to validate a proposed plane setup. 1047 * However, if a plane isn't getting updated, this (and the 1048 * corresponding vc4_plane_atomic_update) won't get called. Thus, we 1049 * compute the dlist here and have all active plane dlists get updated 1050 * in the CRTC's flush. 1051 */ 1052 static int vc4_plane_atomic_check(struct drm_plane *plane, 1053 struct drm_atomic_state *state) 1054 { 1055 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 1056 plane); 1057 struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state); 1058 int ret; 1059 1060 vc4_state->dlist_count = 0; 1061 1062 if (!plane_enabled(new_plane_state)) 1063 return 0; 1064 1065 ret = vc4_plane_mode_set(plane, new_plane_state); 1066 if (ret) 1067 return ret; 1068 1069 return vc4_plane_allocate_lbm(new_plane_state); 1070 } 1071 1072 static void vc4_plane_atomic_update(struct drm_plane *plane, 1073 struct drm_atomic_state *state) 1074 { 1075 /* No contents here. Since we don't know where in the CRTC's 1076 * dlist we should be stored, our dlist is uploaded to the 1077 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush 1078 * time. 1079 */ 1080 } 1081 1082 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) 1083 { 1084 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1085 int i; 1086 1087 vc4_state->hw_dlist = dlist; 1088 1089 /* Can't memcpy_toio() because it needs to be 32-bit writes. */ 1090 for (i = 0; i < vc4_state->dlist_count; i++) 1091 writel(vc4_state->dlist[i], &dlist[i]); 1092 1093 return vc4_state->dlist_count; 1094 } 1095 1096 u32 vc4_plane_dlist_size(const struct drm_plane_state *state) 1097 { 1098 const struct vc4_plane_state *vc4_state = 1099 container_of(state, typeof(*vc4_state), base); 1100 1101 return vc4_state->dlist_count; 1102 } 1103 1104 /* Updates the plane to immediately (well, once the FIFO needs 1105 * refilling) scan out from at a new framebuffer. 1106 */ 1107 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) 1108 { 1109 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1110 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 1111 uint32_t addr; 1112 1113 /* We're skipping the address adjustment for negative origin, 1114 * because this is only called on the primary plane. 1115 */ 1116 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); 1117 addr = bo->paddr + fb->offsets[0]; 1118 1119 /* Write the new address into the hardware immediately. The 1120 * scanout will start from this address as soon as the FIFO 1121 * needs to refill with pixels. 1122 */ 1123 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1124 1125 /* Also update the CPU-side dlist copy, so that any later 1126 * atomic updates that don't do a new modeset on our plane 1127 * also use our updated address. 1128 */ 1129 vc4_state->dlist[vc4_state->ptr0_offset] = addr; 1130 } 1131 1132 static void vc4_plane_atomic_async_update(struct drm_plane *plane, 1133 struct drm_atomic_state *state) 1134 { 1135 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 1136 plane); 1137 struct vc4_plane_state *vc4_state, *new_vc4_state; 1138 1139 swap(plane->state->fb, new_plane_state->fb); 1140 plane->state->crtc_x = new_plane_state->crtc_x; 1141 plane->state->crtc_y = new_plane_state->crtc_y; 1142 plane->state->crtc_w = new_plane_state->crtc_w; 1143 plane->state->crtc_h = new_plane_state->crtc_h; 1144 plane->state->src_x = new_plane_state->src_x; 1145 plane->state->src_y = new_plane_state->src_y; 1146 plane->state->src_w = new_plane_state->src_w; 1147 plane->state->src_h = new_plane_state->src_h; 1148 plane->state->alpha = new_plane_state->alpha; 1149 plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode; 1150 plane->state->rotation = new_plane_state->rotation; 1151 plane->state->zpos = new_plane_state->zpos; 1152 plane->state->normalized_zpos = new_plane_state->normalized_zpos; 1153 plane->state->color_encoding = new_plane_state->color_encoding; 1154 plane->state->color_range = new_plane_state->color_range; 1155 plane->state->src = new_plane_state->src; 1156 plane->state->dst = new_plane_state->dst; 1157 plane->state->visible = new_plane_state->visible; 1158 1159 new_vc4_state = to_vc4_plane_state(new_plane_state); 1160 vc4_state = to_vc4_plane_state(plane->state); 1161 1162 vc4_state->crtc_x = new_vc4_state->crtc_x; 1163 vc4_state->crtc_y = new_vc4_state->crtc_y; 1164 vc4_state->crtc_h = new_vc4_state->crtc_h; 1165 vc4_state->crtc_w = new_vc4_state->crtc_w; 1166 vc4_state->src_x = new_vc4_state->src_x; 1167 vc4_state->src_y = new_vc4_state->src_y; 1168 memcpy(vc4_state->src_w, new_vc4_state->src_w, 1169 sizeof(vc4_state->src_w)); 1170 memcpy(vc4_state->src_h, new_vc4_state->src_h, 1171 sizeof(vc4_state->src_h)); 1172 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling, 1173 sizeof(vc4_state->x_scaling)); 1174 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling, 1175 sizeof(vc4_state->y_scaling)); 1176 vc4_state->is_unity = new_vc4_state->is_unity; 1177 vc4_state->is_yuv = new_vc4_state->is_yuv; 1178 memcpy(vc4_state->offsets, new_vc4_state->offsets, 1179 sizeof(vc4_state->offsets)); 1180 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill; 1181 1182 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */ 1183 vc4_state->dlist[vc4_state->pos0_offset] = 1184 new_vc4_state->dlist[vc4_state->pos0_offset]; 1185 vc4_state->dlist[vc4_state->pos2_offset] = 1186 new_vc4_state->dlist[vc4_state->pos2_offset]; 1187 vc4_state->dlist[vc4_state->ptr0_offset] = 1188 new_vc4_state->dlist[vc4_state->ptr0_offset]; 1189 1190 /* Note that we can't just call vc4_plane_write_dlist() 1191 * because that would smash the context data that the HVS is 1192 * currently using. 1193 */ 1194 writel(vc4_state->dlist[vc4_state->pos0_offset], 1195 &vc4_state->hw_dlist[vc4_state->pos0_offset]); 1196 writel(vc4_state->dlist[vc4_state->pos2_offset], 1197 &vc4_state->hw_dlist[vc4_state->pos2_offset]); 1198 writel(vc4_state->dlist[vc4_state->ptr0_offset], 1199 &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1200 } 1201 1202 static int vc4_plane_atomic_async_check(struct drm_plane *plane, 1203 struct drm_atomic_state *state) 1204 { 1205 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 1206 plane); 1207 struct vc4_plane_state *old_vc4_state, *new_vc4_state; 1208 int ret; 1209 u32 i; 1210 1211 ret = vc4_plane_mode_set(plane, new_plane_state); 1212 if (ret) 1213 return ret; 1214 1215 old_vc4_state = to_vc4_plane_state(plane->state); 1216 new_vc4_state = to_vc4_plane_state(new_plane_state); 1217 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || 1218 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || 1219 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || 1220 old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset || 1221 vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state)) 1222 return -EINVAL; 1223 1224 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update 1225 * if anything else has changed, fallback to a sync update. 1226 */ 1227 for (i = 0; i < new_vc4_state->dlist_count; i++) { 1228 if (i == new_vc4_state->pos0_offset || 1229 i == new_vc4_state->pos2_offset || 1230 i == new_vc4_state->ptr0_offset || 1231 (new_vc4_state->lbm_offset && 1232 i == new_vc4_state->lbm_offset)) 1233 continue; 1234 1235 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i]) 1236 return -EINVAL; 1237 } 1238 1239 return 0; 1240 } 1241 1242 static int vc4_prepare_fb(struct drm_plane *plane, 1243 struct drm_plane_state *state) 1244 { 1245 struct vc4_bo *bo; 1246 int ret; 1247 1248 if (!state->fb) 1249 return 0; 1250 1251 bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); 1252 1253 drm_gem_plane_helper_prepare_fb(plane, state); 1254 1255 if (plane->state->fb == state->fb) 1256 return 0; 1257 1258 ret = vc4_bo_inc_usecnt(bo); 1259 if (ret) 1260 return ret; 1261 1262 return 0; 1263 } 1264 1265 static void vc4_cleanup_fb(struct drm_plane *plane, 1266 struct drm_plane_state *state) 1267 { 1268 struct vc4_bo *bo; 1269 1270 if (plane->state->fb == state->fb || !state->fb) 1271 return; 1272 1273 bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); 1274 vc4_bo_dec_usecnt(bo); 1275 } 1276 1277 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { 1278 .atomic_check = vc4_plane_atomic_check, 1279 .atomic_update = vc4_plane_atomic_update, 1280 .prepare_fb = vc4_prepare_fb, 1281 .cleanup_fb = vc4_cleanup_fb, 1282 .atomic_async_check = vc4_plane_atomic_async_check, 1283 .atomic_async_update = vc4_plane_atomic_async_update, 1284 }; 1285 1286 static bool vc4_format_mod_supported(struct drm_plane *plane, 1287 uint32_t format, 1288 uint64_t modifier) 1289 { 1290 /* Support T_TILING for RGB formats only. */ 1291 switch (format) { 1292 case DRM_FORMAT_XRGB8888: 1293 case DRM_FORMAT_ARGB8888: 1294 case DRM_FORMAT_ABGR8888: 1295 case DRM_FORMAT_XBGR8888: 1296 case DRM_FORMAT_RGB565: 1297 case DRM_FORMAT_BGR565: 1298 case DRM_FORMAT_ARGB1555: 1299 case DRM_FORMAT_XRGB1555: 1300 switch (fourcc_mod_broadcom_mod(modifier)) { 1301 case DRM_FORMAT_MOD_LINEAR: 1302 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: 1303 return true; 1304 default: 1305 return false; 1306 } 1307 case DRM_FORMAT_NV12: 1308 case DRM_FORMAT_NV21: 1309 switch (fourcc_mod_broadcom_mod(modifier)) { 1310 case DRM_FORMAT_MOD_LINEAR: 1311 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1312 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1313 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1314 return true; 1315 default: 1316 return false; 1317 } 1318 case DRM_FORMAT_RGBX1010102: 1319 case DRM_FORMAT_BGRX1010102: 1320 case DRM_FORMAT_RGBA1010102: 1321 case DRM_FORMAT_BGRA1010102: 1322 case DRM_FORMAT_YUV422: 1323 case DRM_FORMAT_YVU422: 1324 case DRM_FORMAT_YUV420: 1325 case DRM_FORMAT_YVU420: 1326 case DRM_FORMAT_NV16: 1327 case DRM_FORMAT_NV61: 1328 default: 1329 return (modifier == DRM_FORMAT_MOD_LINEAR); 1330 } 1331 } 1332 1333 static const struct drm_plane_funcs vc4_plane_funcs = { 1334 .update_plane = drm_atomic_helper_update_plane, 1335 .disable_plane = drm_atomic_helper_disable_plane, 1336 .destroy = drm_plane_cleanup, 1337 .set_property = NULL, 1338 .reset = vc4_plane_reset, 1339 .atomic_duplicate_state = vc4_plane_duplicate_state, 1340 .atomic_destroy_state = vc4_plane_destroy_state, 1341 .format_mod_supported = vc4_format_mod_supported, 1342 }; 1343 1344 struct drm_plane *vc4_plane_init(struct drm_device *dev, 1345 enum drm_plane_type type) 1346 { 1347 struct drm_plane *plane = NULL; 1348 struct vc4_plane *vc4_plane; 1349 u32 formats[ARRAY_SIZE(hvs_formats)]; 1350 int ret = 0; 1351 unsigned i; 1352 static const uint64_t modifiers[] = { 1353 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, 1354 DRM_FORMAT_MOD_BROADCOM_SAND128, 1355 DRM_FORMAT_MOD_BROADCOM_SAND64, 1356 DRM_FORMAT_MOD_BROADCOM_SAND256, 1357 DRM_FORMAT_MOD_LINEAR, 1358 DRM_FORMAT_MOD_INVALID 1359 }; 1360 1361 vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane), 1362 GFP_KERNEL); 1363 if (!vc4_plane) 1364 return ERR_PTR(-ENOMEM); 1365 1366 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) 1367 formats[i] = hvs_formats[i].drm; 1368 1369 plane = &vc4_plane->base; 1370 ret = drm_universal_plane_init(dev, plane, 0, 1371 &vc4_plane_funcs, 1372 formats, ARRAY_SIZE(formats), 1373 modifiers, type, NULL); 1374 if (ret) 1375 return ERR_PTR(ret); 1376 1377 drm_plane_helper_add(plane, &vc4_plane_helper_funcs); 1378 1379 drm_plane_create_alpha_property(plane); 1380 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, 1381 DRM_MODE_ROTATE_0 | 1382 DRM_MODE_ROTATE_180 | 1383 DRM_MODE_REFLECT_X | 1384 DRM_MODE_REFLECT_Y); 1385 1386 return plane; 1387 } 1388 1389 int vc4_plane_create_additional_planes(struct drm_device *drm) 1390 { 1391 struct drm_plane *cursor_plane; 1392 struct drm_crtc *crtc; 1393 unsigned int i; 1394 1395 /* Set up some arbitrary number of planes. We're not limited 1396 * by a set number of physical registers, just the space in 1397 * the HVS (16k) and how small an plane can be (28 bytes). 1398 * However, each plane we set up takes up some memory, and 1399 * increases the cost of looping over planes, which atomic 1400 * modesetting does quite a bit. As a result, we pick a 1401 * modest number of planes to expose, that should hopefully 1402 * still cover any sane usecase. 1403 */ 1404 for (i = 0; i < 16; i++) { 1405 struct drm_plane *plane = 1406 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY); 1407 1408 if (IS_ERR(plane)) 1409 continue; 1410 1411 plane->possible_crtcs = 1412 GENMASK(drm->mode_config.num_crtc - 1, 0); 1413 } 1414 1415 drm_for_each_crtc(crtc, drm) { 1416 /* Set up the legacy cursor after overlay initialization, 1417 * since we overlay planes on the CRTC in the order they were 1418 * initialized. 1419 */ 1420 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); 1421 if (!IS_ERR(cursor_plane)) { 1422 cursor_plane->possible_crtcs = drm_crtc_mask(crtc); 1423 crtc->cursor = cursor_plane; 1424 } 1425 } 1426 1427 return 0; 1428 } 1429