1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 6 /** 7 * DOC: VC4 plane module 8 * 9 * Each DRM plane is a layer of pixels being scanned out by the HVS. 10 * 11 * At atomic modeset check time, we compute the HVS display element 12 * state that would be necessary for displaying the plane (giving us a 13 * chance to figure out if a plane configuration is invalid), then at 14 * atomic flush time the CRTC will ask us to write our element state 15 * into the region of the HVS that it has allocated for us. 16 */ 17 18 #include <drm/drm_atomic.h> 19 #include <drm/drm_atomic_helper.h> 20 #include <drm/drm_atomic_uapi.h> 21 #include <drm/drm_fb_cma_helper.h> 22 #include <drm/drm_fourcc.h> 23 #include <drm/drm_gem_framebuffer_helper.h> 24 #include <drm/drm_plane_helper.h> 25 26 #include "uapi/drm/vc4_drm.h" 27 28 #include "vc4_drv.h" 29 #include "vc4_regs.h" 30 31 static const struct hvs_format { 32 u32 drm; /* DRM_FORMAT_* */ 33 u32 hvs; /* HVS_FORMAT_* */ 34 u32 pixel_order; 35 u32 pixel_order_hvs5; 36 } hvs_formats[] = { 37 { 38 .drm = DRM_FORMAT_XRGB8888, 39 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 40 .pixel_order = HVS_PIXEL_ORDER_ABGR, 41 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 42 }, 43 { 44 .drm = DRM_FORMAT_ARGB8888, 45 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 46 .pixel_order = HVS_PIXEL_ORDER_ABGR, 47 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 48 }, 49 { 50 .drm = DRM_FORMAT_ABGR8888, 51 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 52 .pixel_order = HVS_PIXEL_ORDER_ARGB, 53 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 54 }, 55 { 56 .drm = DRM_FORMAT_XBGR8888, 57 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 58 .pixel_order = HVS_PIXEL_ORDER_ARGB, 59 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 60 }, 61 { 62 .drm = DRM_FORMAT_RGB565, 63 .hvs = HVS_PIXEL_FORMAT_RGB565, 64 .pixel_order = HVS_PIXEL_ORDER_XRGB, 65 }, 66 { 67 .drm = DRM_FORMAT_BGR565, 68 .hvs = HVS_PIXEL_FORMAT_RGB565, 69 .pixel_order = HVS_PIXEL_ORDER_XBGR, 70 }, 71 { 72 .drm = DRM_FORMAT_ARGB1555, 73 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 74 .pixel_order = HVS_PIXEL_ORDER_ABGR, 75 }, 76 { 77 .drm = DRM_FORMAT_XRGB1555, 78 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 79 .pixel_order = HVS_PIXEL_ORDER_ABGR, 80 }, 81 { 82 .drm = DRM_FORMAT_RGB888, 83 .hvs = HVS_PIXEL_FORMAT_RGB888, 84 .pixel_order = HVS_PIXEL_ORDER_XRGB, 85 }, 86 { 87 .drm = DRM_FORMAT_BGR888, 88 .hvs = HVS_PIXEL_FORMAT_RGB888, 89 .pixel_order = HVS_PIXEL_ORDER_XBGR, 90 }, 91 { 92 .drm = DRM_FORMAT_YUV422, 93 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 94 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 95 }, 96 { 97 .drm = DRM_FORMAT_YVU422, 98 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 99 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 100 }, 101 { 102 .drm = DRM_FORMAT_YUV420, 103 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 104 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 105 }, 106 { 107 .drm = DRM_FORMAT_YVU420, 108 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 109 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 110 }, 111 { 112 .drm = DRM_FORMAT_NV12, 113 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 114 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 115 }, 116 { 117 .drm = DRM_FORMAT_NV21, 118 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 119 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 120 }, 121 { 122 .drm = DRM_FORMAT_NV16, 123 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 124 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 125 }, 126 { 127 .drm = DRM_FORMAT_NV61, 128 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 129 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 130 }, 131 }; 132 133 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) 134 { 135 unsigned i; 136 137 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 138 if (hvs_formats[i].drm == drm_format) 139 return &hvs_formats[i]; 140 } 141 142 return NULL; 143 } 144 145 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) 146 { 147 if (dst == src) 148 return VC4_SCALING_NONE; 149 if (3 * dst >= 2 * src) 150 return VC4_SCALING_PPF; 151 else 152 return VC4_SCALING_TPZ; 153 } 154 155 static bool plane_enabled(struct drm_plane_state *state) 156 { 157 return state->fb && !WARN_ON(!state->crtc); 158 } 159 160 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) 161 { 162 struct vc4_plane_state *vc4_state; 163 164 if (WARN_ON(!plane->state)) 165 return NULL; 166 167 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL); 168 if (!vc4_state) 169 return NULL; 170 171 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); 172 vc4_state->dlist_initialized = 0; 173 174 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); 175 176 if (vc4_state->dlist) { 177 vc4_state->dlist = kmemdup(vc4_state->dlist, 178 vc4_state->dlist_count * 4, 179 GFP_KERNEL); 180 if (!vc4_state->dlist) { 181 kfree(vc4_state); 182 return NULL; 183 } 184 vc4_state->dlist_size = vc4_state->dlist_count; 185 } 186 187 return &vc4_state->base; 188 } 189 190 static void vc4_plane_destroy_state(struct drm_plane *plane, 191 struct drm_plane_state *state) 192 { 193 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 194 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 195 196 if (drm_mm_node_allocated(&vc4_state->lbm)) { 197 unsigned long irqflags; 198 199 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 200 drm_mm_remove_node(&vc4_state->lbm); 201 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 202 } 203 204 kfree(vc4_state->dlist); 205 __drm_atomic_helper_plane_destroy_state(&vc4_state->base); 206 kfree(state); 207 } 208 209 /* Called during init to allocate the plane's atomic state. */ 210 static void vc4_plane_reset(struct drm_plane *plane) 211 { 212 struct vc4_plane_state *vc4_state; 213 214 WARN_ON(plane->state); 215 216 vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); 217 if (!vc4_state) 218 return; 219 220 __drm_atomic_helper_plane_reset(plane, &vc4_state->base); 221 } 222 223 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) 224 { 225 if (vc4_state->dlist_count == vc4_state->dlist_size) { 226 u32 new_size = max(4u, vc4_state->dlist_count * 2); 227 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL); 228 229 if (!new_dlist) 230 return; 231 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4); 232 233 kfree(vc4_state->dlist); 234 vc4_state->dlist = new_dlist; 235 vc4_state->dlist_size = new_size; 236 } 237 238 vc4_state->dlist[vc4_state->dlist_count++] = val; 239 } 240 241 /* Returns the scl0/scl1 field based on whether the dimensions need to 242 * be up/down/non-scaled. 243 * 244 * This is a replication of a table from the spec. 245 */ 246 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) 247 { 248 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 249 250 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { 251 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: 252 return SCALER_CTL0_SCL_H_PPF_V_PPF; 253 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: 254 return SCALER_CTL0_SCL_H_TPZ_V_PPF; 255 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: 256 return SCALER_CTL0_SCL_H_PPF_V_TPZ; 257 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: 258 return SCALER_CTL0_SCL_H_TPZ_V_TPZ; 259 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: 260 return SCALER_CTL0_SCL_H_PPF_V_NONE; 261 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: 262 return SCALER_CTL0_SCL_H_NONE_V_PPF; 263 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: 264 return SCALER_CTL0_SCL_H_NONE_V_TPZ; 265 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: 266 return SCALER_CTL0_SCL_H_TPZ_V_NONE; 267 default: 268 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: 269 /* The unity case is independently handled by 270 * SCALER_CTL0_UNITY. 271 */ 272 return 0; 273 } 274 } 275 276 static int vc4_plane_margins_adj(struct drm_plane_state *pstate) 277 { 278 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate); 279 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay; 280 struct drm_crtc_state *crtc_state; 281 282 crtc_state = drm_atomic_get_new_crtc_state(pstate->state, 283 pstate->crtc); 284 285 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom); 286 if (!left && !right && !top && !bottom) 287 return 0; 288 289 if (left + right >= crtc_state->mode.hdisplay || 290 top + bottom >= crtc_state->mode.vdisplay) 291 return -EINVAL; 292 293 adjhdisplay = crtc_state->mode.hdisplay - (left + right); 294 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x * 295 adjhdisplay, 296 crtc_state->mode.hdisplay); 297 vc4_pstate->crtc_x += left; 298 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - left) 299 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - left; 300 301 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom); 302 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y * 303 adjvdisplay, 304 crtc_state->mode.vdisplay); 305 vc4_pstate->crtc_y += top; 306 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - top) 307 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - top; 308 309 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w * 310 adjhdisplay, 311 crtc_state->mode.hdisplay); 312 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h * 313 adjvdisplay, 314 crtc_state->mode.vdisplay); 315 316 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h) 317 return -EINVAL; 318 319 return 0; 320 } 321 322 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) 323 { 324 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 325 struct drm_framebuffer *fb = state->fb; 326 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 327 u32 subpixel_src_mask = (1 << 16) - 1; 328 int num_planes = fb->format->num_planes; 329 struct drm_crtc_state *crtc_state; 330 u32 h_subsample = fb->format->hsub; 331 u32 v_subsample = fb->format->vsub; 332 int i, ret; 333 334 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 335 state->crtc); 336 if (!crtc_state) { 337 DRM_DEBUG_KMS("Invalid crtc state\n"); 338 return -EINVAL; 339 } 340 341 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1, 342 INT_MAX, true, true); 343 if (ret) 344 return ret; 345 346 for (i = 0; i < num_planes; i++) 347 vc4_state->offsets[i] = bo->paddr + fb->offsets[i]; 348 349 /* We don't support subpixel source positioning for scaling. */ 350 if ((state->src.x1 & subpixel_src_mask) || 351 (state->src.x2 & subpixel_src_mask) || 352 (state->src.y1 & subpixel_src_mask) || 353 (state->src.y2 & subpixel_src_mask)) { 354 return -EINVAL; 355 } 356 357 vc4_state->src_x = state->src.x1 >> 16; 358 vc4_state->src_y = state->src.y1 >> 16; 359 vc4_state->src_w[0] = (state->src.x2 - state->src.x1) >> 16; 360 vc4_state->src_h[0] = (state->src.y2 - state->src.y1) >> 16; 361 362 vc4_state->crtc_x = state->dst.x1; 363 vc4_state->crtc_y = state->dst.y1; 364 vc4_state->crtc_w = state->dst.x2 - state->dst.x1; 365 vc4_state->crtc_h = state->dst.y2 - state->dst.y1; 366 367 ret = vc4_plane_margins_adj(state); 368 if (ret) 369 return ret; 370 371 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], 372 vc4_state->crtc_w); 373 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], 374 vc4_state->crtc_h); 375 376 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 377 vc4_state->y_scaling[0] == VC4_SCALING_NONE); 378 379 if (num_planes > 1) { 380 vc4_state->is_yuv = true; 381 382 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; 383 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; 384 385 vc4_state->x_scaling[1] = 386 vc4_get_scaling_mode(vc4_state->src_w[1], 387 vc4_state->crtc_w); 388 vc4_state->y_scaling[1] = 389 vc4_get_scaling_mode(vc4_state->src_h[1], 390 vc4_state->crtc_h); 391 392 /* YUV conversion requires that horizontal scaling be enabled 393 * on the UV plane even if vc4_get_scaling_mode() returned 394 * VC4_SCALING_NONE (which can happen when the down-scaling 395 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this 396 * case. 397 */ 398 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE) 399 vc4_state->x_scaling[1] = VC4_SCALING_PPF; 400 } else { 401 vc4_state->is_yuv = false; 402 vc4_state->x_scaling[1] = VC4_SCALING_NONE; 403 vc4_state->y_scaling[1] = VC4_SCALING_NONE; 404 } 405 406 return 0; 407 } 408 409 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 410 { 411 u32 scale, recip; 412 413 scale = (1 << 16) * src / dst; 414 415 /* The specs note that while the reciprocal would be defined 416 * as (1<<32)/scale, ~0 is close enough. 417 */ 418 recip = ~0 / scale; 419 420 vc4_dlist_write(vc4_state, 421 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | 422 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); 423 vc4_dlist_write(vc4_state, 424 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); 425 } 426 427 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 428 { 429 u32 scale = (1 << 16) * src / dst; 430 431 vc4_dlist_write(vc4_state, 432 SCALER_PPF_AGC | 433 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | 434 VC4_SET_FIELD(0, SCALER_PPF_IPHASE)); 435 } 436 437 static u32 vc4_lbm_size(struct drm_plane_state *state) 438 { 439 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 440 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 441 u32 pix_per_line; 442 u32 lbm; 443 444 /* LBM is not needed when there's no vertical scaling. */ 445 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 446 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 447 return 0; 448 449 /* 450 * This can be further optimized in the RGB/YUV444 case if the PPF 451 * decimation factor is between 0.5 and 1.0 by using crtc_w. 452 * 453 * It's not an issue though, since in that case since src_w[0] is going 454 * to be greater than or equal to crtc_w. 455 */ 456 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ) 457 pix_per_line = vc4_state->crtc_w; 458 else 459 pix_per_line = vc4_state->src_w[0]; 460 461 if (!vc4_state->is_yuv) { 462 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) 463 lbm = pix_per_line * 8; 464 else { 465 /* In special cases, this multiplier might be 12. */ 466 lbm = pix_per_line * 16; 467 } 468 } else { 469 /* There are cases for this going down to a multiplier 470 * of 2, but according to the firmware source, the 471 * table in the docs is somewhat wrong. 472 */ 473 lbm = pix_per_line * 16; 474 } 475 476 /* Align it to 64 or 128 (hvs5) bytes */ 477 lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64); 478 479 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ 480 lbm /= vc4->hvs->hvs5 ? 4 : 2; 481 482 return lbm; 483 } 484 485 static void vc4_write_scaling_parameters(struct drm_plane_state *state, 486 int channel) 487 { 488 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 489 490 /* Ch0 H-PPF Word 0: Scaling Parameters */ 491 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { 492 vc4_write_ppf(vc4_state, 493 vc4_state->src_w[channel], vc4_state->crtc_w); 494 } 495 496 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ 497 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { 498 vc4_write_ppf(vc4_state, 499 vc4_state->src_h[channel], vc4_state->crtc_h); 500 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 501 } 502 503 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ 504 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { 505 vc4_write_tpz(vc4_state, 506 vc4_state->src_w[channel], vc4_state->crtc_w); 507 } 508 509 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ 510 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { 511 vc4_write_tpz(vc4_state, 512 vc4_state->src_h[channel], vc4_state->crtc_h); 513 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 514 } 515 } 516 517 static void vc4_plane_calc_load(struct drm_plane_state *state) 518 { 519 unsigned int hvs_load_shift, vrefresh, i; 520 struct drm_framebuffer *fb = state->fb; 521 struct vc4_plane_state *vc4_state; 522 struct drm_crtc_state *crtc_state; 523 unsigned int vscale_factor; 524 struct vc4_dev *vc4; 525 526 vc4 = to_vc4_dev(state->plane->dev); 527 if (!vc4->load_tracker_available) 528 return; 529 530 vc4_state = to_vc4_plane_state(state); 531 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 532 state->crtc); 533 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode); 534 535 /* The HVS is able to process 2 pixels/cycle when scaling the source, 536 * 4 pixels/cycle otherwise. 537 * Alpha blending step seems to be pipelined and it's always operating 538 * at 4 pixels/cycle, so the limiting aspect here seems to be the 539 * scaler block. 540 * HVS load is expressed in clk-cycles/sec (AKA Hz). 541 */ 542 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 543 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 544 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 545 vc4_state->y_scaling[1] != VC4_SCALING_NONE) 546 hvs_load_shift = 1; 547 else 548 hvs_load_shift = 2; 549 550 vc4_state->membus_load = 0; 551 vc4_state->hvs_load = 0; 552 for (i = 0; i < fb->format->num_planes; i++) { 553 /* Even if the bandwidth/plane required for a single frame is 554 * 555 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh 556 * 557 * when downscaling, we have to read more pixels per line in 558 * the time frame reserved for a single line, so the bandwidth 559 * demand can be punctually higher. To account for that, we 560 * calculate the down-scaling factor and multiply the plane 561 * load by this number. We're likely over-estimating the read 562 * demand, but that's better than under-estimating it. 563 */ 564 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i], 565 vc4_state->crtc_h); 566 vc4_state->membus_load += vc4_state->src_w[i] * 567 vc4_state->src_h[i] * vscale_factor * 568 fb->format->cpp[i]; 569 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; 570 } 571 572 vc4_state->hvs_load *= vrefresh; 573 vc4_state->hvs_load >>= hvs_load_shift; 574 vc4_state->membus_load *= vrefresh; 575 } 576 577 static int vc4_plane_allocate_lbm(struct drm_plane_state *state) 578 { 579 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 580 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 581 unsigned long irqflags; 582 u32 lbm_size; 583 584 lbm_size = vc4_lbm_size(state); 585 if (!lbm_size) 586 return 0; 587 588 if (WARN_ON(!vc4_state->lbm_offset)) 589 return -EINVAL; 590 591 /* Allocate the LBM memory that the HVS will use for temporary 592 * storage due to our scaling/format conversion. 593 */ 594 if (!drm_mm_node_allocated(&vc4_state->lbm)) { 595 int ret; 596 597 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 598 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, 599 &vc4_state->lbm, 600 lbm_size, 601 vc4->hvs->hvs5 ? 64 : 32, 602 0, 0); 603 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 604 605 if (ret) 606 return ret; 607 } else { 608 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); 609 } 610 611 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start; 612 613 return 0; 614 } 615 616 /* Writes out a full display list for an active plane to the plane's 617 * private dlist state. 618 */ 619 static int vc4_plane_mode_set(struct drm_plane *plane, 620 struct drm_plane_state *state) 621 { 622 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 623 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 624 struct drm_framebuffer *fb = state->fb; 625 u32 ctl0_offset = vc4_state->dlist_count; 626 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 627 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 628 int num_planes = fb->format->num_planes; 629 u32 h_subsample = fb->format->hsub; 630 u32 v_subsample = fb->format->vsub; 631 bool mix_plane_alpha; 632 bool covers_screen; 633 u32 scl0, scl1, pitch0; 634 u32 tiling, src_y; 635 u32 hvs_format = format->hvs; 636 unsigned int rotation; 637 int ret, i; 638 639 if (vc4_state->dlist_initialized) 640 return 0; 641 642 ret = vc4_plane_setup_clipping_and_scaling(state); 643 if (ret) 644 return ret; 645 646 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 647 * and 4:4:4, scl1 should be set to scl0 so both channels of 648 * the scaler do the same thing. For YUV, the Y plane needs 649 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 650 * the scl fields here. 651 */ 652 if (num_planes == 1) { 653 scl0 = vc4_get_scl_field(state, 0); 654 scl1 = scl0; 655 } else { 656 scl0 = vc4_get_scl_field(state, 1); 657 scl1 = vc4_get_scl_field(state, 0); 658 } 659 660 rotation = drm_rotation_simplify(state->rotation, 661 DRM_MODE_ROTATE_0 | 662 DRM_MODE_REFLECT_X | 663 DRM_MODE_REFLECT_Y); 664 665 /* We must point to the last line when Y reflection is enabled. */ 666 src_y = vc4_state->src_y; 667 if (rotation & DRM_MODE_REFLECT_Y) 668 src_y += vc4_state->src_h[0] - 1; 669 670 switch (base_format_mod) { 671 case DRM_FORMAT_MOD_LINEAR: 672 tiling = SCALER_CTL0_TILING_LINEAR; 673 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); 674 675 /* Adjust the base pointer to the first pixel to be scanned 676 * out. 677 */ 678 for (i = 0; i < num_planes; i++) { 679 vc4_state->offsets[i] += src_y / 680 (i ? v_subsample : 1) * 681 fb->pitches[i]; 682 683 vc4_state->offsets[i] += vc4_state->src_x / 684 (i ? h_subsample : 1) * 685 fb->format->cpp[i]; 686 } 687 688 break; 689 690 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { 691 u32 tile_size_shift = 12; /* T tiles are 4kb */ 692 /* Whole-tile offsets, mostly for setting the pitch. */ 693 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5; 694 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */ 695 u32 tile_w_mask = (1 << tile_w_shift) - 1; 696 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice 697 * the height (in pixels) of a 4k tile. 698 */ 699 u32 tile_h_mask = (2 << tile_h_shift) - 1; 700 /* For T-tiled, the FB pitch is "how many bytes from one row to 701 * the next, such that 702 * 703 * pitch * tile_h == tile_size * tiles_per_row 704 */ 705 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); 706 u32 tiles_l = vc4_state->src_x >> tile_w_shift; 707 u32 tiles_r = tiles_w - tiles_l; 708 u32 tiles_t = src_y >> tile_h_shift; 709 /* Intra-tile offsets, which modify the base address (the 710 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that 711 * base address). 712 */ 713 u32 tile_y = (src_y >> 4) & 1; 714 u32 subtile_y = (src_y >> 2) & 3; 715 u32 utile_y = src_y & 3; 716 u32 x_off = vc4_state->src_x & tile_w_mask; 717 u32 y_off = src_y & tile_h_mask; 718 719 /* When Y reflection is requested we must set the 720 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines 721 * after the initial one should be fetched in descending order, 722 * which makes sense since we start from the last line and go 723 * backward. 724 * Don't know why we need y_off = max_y_off - y_off, but it's 725 * definitely required (I guess it's also related to the "going 726 * backward" situation). 727 */ 728 if (rotation & DRM_MODE_REFLECT_Y) { 729 y_off = tile_h_mask - y_off; 730 pitch0 = SCALER_PITCH0_TILE_LINE_DIR; 731 } else { 732 pitch0 = 0; 733 } 734 735 tiling = SCALER_CTL0_TILING_256B_OR_T; 736 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) | 737 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) | 738 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) | 739 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R)); 740 vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift); 741 vc4_state->offsets[0] += subtile_y << 8; 742 vc4_state->offsets[0] += utile_y << 4; 743 744 /* Rows of tiles alternate left-to-right and right-to-left. */ 745 if (tiles_t & 1) { 746 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR; 747 vc4_state->offsets[0] += (tiles_w - tiles_l) << 748 tile_size_shift; 749 vc4_state->offsets[0] -= (1 + !tile_y) << 10; 750 } else { 751 vc4_state->offsets[0] += tiles_l << tile_size_shift; 752 vc4_state->offsets[0] += tile_y << 10; 753 } 754 755 break; 756 } 757 758 case DRM_FORMAT_MOD_BROADCOM_SAND64: 759 case DRM_FORMAT_MOD_BROADCOM_SAND128: 760 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 761 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 762 u32 tile_w, tile, x_off, pix_per_tile; 763 764 hvs_format = HVS_PIXEL_FORMAT_H264; 765 766 switch (base_format_mod) { 767 case DRM_FORMAT_MOD_BROADCOM_SAND64: 768 tiling = SCALER_CTL0_TILING_64B; 769 tile_w = 64; 770 break; 771 case DRM_FORMAT_MOD_BROADCOM_SAND128: 772 tiling = SCALER_CTL0_TILING_128B; 773 tile_w = 128; 774 break; 775 case DRM_FORMAT_MOD_BROADCOM_SAND256: 776 tiling = SCALER_CTL0_TILING_256B_OR_T; 777 tile_w = 256; 778 break; 779 default: 780 break; 781 } 782 783 if (param > SCALER_TILE_HEIGHT_MASK) { 784 DRM_DEBUG_KMS("SAND height too large (%d)\n", param); 785 return -EINVAL; 786 } 787 788 pix_per_tile = tile_w / fb->format->cpp[0]; 789 tile = vc4_state->src_x / pix_per_tile; 790 x_off = vc4_state->src_x % pix_per_tile; 791 792 /* Adjust the base pointer to the first pixel to be scanned 793 * out. 794 */ 795 for (i = 0; i < num_planes; i++) { 796 vc4_state->offsets[i] += param * tile_w * tile; 797 vc4_state->offsets[i] += src_y / 798 (i ? v_subsample : 1) * 799 tile_w; 800 vc4_state->offsets[i] += x_off / 801 (i ? h_subsample : 1) * 802 fb->format->cpp[i]; 803 } 804 805 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT); 806 break; 807 } 808 809 default: 810 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 811 (long long)fb->modifier); 812 return -EINVAL; 813 } 814 815 /* Don't waste cycles mixing with plane alpha if the set alpha 816 * is opaque or there is no per-pixel alpha information. 817 * In any case we use the alpha property value as the fixed alpha. 818 */ 819 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 820 fb->format->has_alpha; 821 822 if (!vc4->hvs->hvs5) { 823 /* Control word */ 824 vc4_dlist_write(vc4_state, 825 SCALER_CTL0_VALID | 826 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) | 827 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) | 828 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) | 829 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 830 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 831 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 832 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 833 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 834 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 835 836 /* Position Word 0: Image Positions and Alpha Value */ 837 vc4_state->pos0_offset = vc4_state->dlist_count; 838 vc4_dlist_write(vc4_state, 839 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | 840 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | 841 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); 842 843 /* Position Word 1: Scaled Image Dimensions. */ 844 if (!vc4_state->is_unity) { 845 vc4_dlist_write(vc4_state, 846 VC4_SET_FIELD(vc4_state->crtc_w, 847 SCALER_POS1_SCL_WIDTH) | 848 VC4_SET_FIELD(vc4_state->crtc_h, 849 SCALER_POS1_SCL_HEIGHT)); 850 } 851 852 /* Position Word 2: Source Image Size, Alpha */ 853 vc4_state->pos2_offset = vc4_state->dlist_count; 854 vc4_dlist_write(vc4_state, 855 VC4_SET_FIELD(fb->format->has_alpha ? 856 SCALER_POS2_ALPHA_MODE_PIPELINE : 857 SCALER_POS2_ALPHA_MODE_FIXED, 858 SCALER_POS2_ALPHA_MODE) | 859 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | 860 (fb->format->has_alpha ? 861 SCALER_POS2_ALPHA_PREMULT : 0) | 862 VC4_SET_FIELD(vc4_state->src_w[0], 863 SCALER_POS2_WIDTH) | 864 VC4_SET_FIELD(vc4_state->src_h[0], 865 SCALER_POS2_HEIGHT)); 866 867 /* Position Word 3: Context. Written by the HVS. */ 868 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 869 870 } else { 871 u32 hvs_pixel_order = format->pixel_order; 872 873 if (format->pixel_order_hvs5) 874 hvs_pixel_order = format->pixel_order_hvs5; 875 876 /* Control word */ 877 vc4_dlist_write(vc4_state, 878 SCALER_CTL0_VALID | 879 (hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) | 880 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 881 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 882 (vc4_state->is_unity ? 883 SCALER5_CTL0_UNITY : 0) | 884 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 885 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) | 886 SCALER5_CTL0_ALPHA_EXPAND | 887 SCALER5_CTL0_RGB_EXPAND); 888 889 /* Position Word 0: Image Positions and Alpha Value */ 890 vc4_state->pos0_offset = vc4_state->dlist_count; 891 vc4_dlist_write(vc4_state, 892 (rotation & DRM_MODE_REFLECT_Y ? 893 SCALER5_POS0_VFLIP : 0) | 894 VC4_SET_FIELD(vc4_state->crtc_x, 895 SCALER_POS0_START_X) | 896 (rotation & DRM_MODE_REFLECT_X ? 897 SCALER5_POS0_HFLIP : 0) | 898 VC4_SET_FIELD(vc4_state->crtc_y, 899 SCALER5_POS0_START_Y) 900 ); 901 902 /* Control Word 2 */ 903 vc4_dlist_write(vc4_state, 904 VC4_SET_FIELD(state->alpha >> 4, 905 SCALER5_CTL2_ALPHA) | 906 (fb->format->has_alpha ? 907 SCALER5_CTL2_ALPHA_PREMULT : 0) | 908 (mix_plane_alpha ? 909 SCALER5_CTL2_ALPHA_MIX : 0) | 910 VC4_SET_FIELD(fb->format->has_alpha ? 911 SCALER5_CTL2_ALPHA_MODE_PIPELINE : 912 SCALER5_CTL2_ALPHA_MODE_FIXED, 913 SCALER5_CTL2_ALPHA_MODE) 914 ); 915 916 /* Position Word 1: Scaled Image Dimensions. */ 917 if (!vc4_state->is_unity) { 918 vc4_dlist_write(vc4_state, 919 VC4_SET_FIELD(vc4_state->crtc_w, 920 SCALER5_POS1_SCL_WIDTH) | 921 VC4_SET_FIELD(vc4_state->crtc_h, 922 SCALER5_POS1_SCL_HEIGHT)); 923 } 924 925 /* Position Word 2: Source Image Size */ 926 vc4_state->pos2_offset = vc4_state->dlist_count; 927 vc4_dlist_write(vc4_state, 928 VC4_SET_FIELD(vc4_state->src_w[0], 929 SCALER5_POS2_WIDTH) | 930 VC4_SET_FIELD(vc4_state->src_h[0], 931 SCALER5_POS2_HEIGHT)); 932 933 /* Position Word 3: Context. Written by the HVS. */ 934 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 935 } 936 937 938 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers 939 * 940 * The pointers may be any byte address. 941 */ 942 vc4_state->ptr0_offset = vc4_state->dlist_count; 943 for (i = 0; i < num_planes; i++) 944 vc4_dlist_write(vc4_state, vc4_state->offsets[i]); 945 946 /* Pointer Context Word 0/1/2: Written by the HVS */ 947 for (i = 0; i < num_planes; i++) 948 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 949 950 /* Pitch word 0 */ 951 vc4_dlist_write(vc4_state, pitch0); 952 953 /* Pitch word 1/2 */ 954 for (i = 1; i < num_planes; i++) { 955 if (hvs_format != HVS_PIXEL_FORMAT_H264) { 956 vc4_dlist_write(vc4_state, 957 VC4_SET_FIELD(fb->pitches[i], 958 SCALER_SRC_PITCH)); 959 } else { 960 vc4_dlist_write(vc4_state, pitch0); 961 } 962 } 963 964 /* Colorspace conversion words */ 965 if (vc4_state->is_yuv) { 966 vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5); 967 vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5); 968 vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5); 969 } 970 971 vc4_state->lbm_offset = 0; 972 973 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 974 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 975 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 976 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 977 /* Reserve a slot for the LBM Base Address. The real value will 978 * be set when calling vc4_plane_allocate_lbm(). 979 */ 980 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 981 vc4_state->y_scaling[1] != VC4_SCALING_NONE) 982 vc4_state->lbm_offset = vc4_state->dlist_count++; 983 984 if (num_planes > 1) { 985 /* Emit Cb/Cr as channel 0 and Y as channel 986 * 1. This matches how we set up scl0/scl1 987 * above. 988 */ 989 vc4_write_scaling_parameters(state, 1); 990 } 991 vc4_write_scaling_parameters(state, 0); 992 993 /* If any PPF setup was done, then all the kernel 994 * pointers get uploaded. 995 */ 996 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 997 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 998 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 999 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 1000 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 1001 SCALER_PPF_KERNEL_OFFSET); 1002 1003 /* HPPF plane 0 */ 1004 vc4_dlist_write(vc4_state, kernel); 1005 /* VPPF plane 0 */ 1006 vc4_dlist_write(vc4_state, kernel); 1007 /* HPPF plane 1 */ 1008 vc4_dlist_write(vc4_state, kernel); 1009 /* VPPF plane 1 */ 1010 vc4_dlist_write(vc4_state, kernel); 1011 } 1012 } 1013 1014 vc4_state->dlist[ctl0_offset] |= 1015 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); 1016 1017 /* crtc_* are already clipped coordinates. */ 1018 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 1019 vc4_state->crtc_w == state->crtc->mode.hdisplay && 1020 vc4_state->crtc_h == state->crtc->mode.vdisplay; 1021 /* Background fill might be necessary when the plane has per-pixel 1022 * alpha content or a non-opaque plane alpha and could blend from the 1023 * background or does not cover the entire screen. 1024 */ 1025 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 1026 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 1027 1028 /* Flag the dlist as initialized to avoid checking it twice in case 1029 * the async update check already called vc4_plane_mode_set() and 1030 * decided to fallback to sync update because async update was not 1031 * possible. 1032 */ 1033 vc4_state->dlist_initialized = 1; 1034 1035 vc4_plane_calc_load(state); 1036 1037 return 0; 1038 } 1039 1040 /* If a modeset involves changing the setup of a plane, the atomic 1041 * infrastructure will call this to validate a proposed plane setup. 1042 * However, if a plane isn't getting updated, this (and the 1043 * corresponding vc4_plane_atomic_update) won't get called. Thus, we 1044 * compute the dlist here and have all active plane dlists get updated 1045 * in the CRTC's flush. 1046 */ 1047 static int vc4_plane_atomic_check(struct drm_plane *plane, 1048 struct drm_plane_state *state) 1049 { 1050 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1051 int ret; 1052 1053 vc4_state->dlist_count = 0; 1054 1055 if (!plane_enabled(state)) 1056 return 0; 1057 1058 ret = vc4_plane_mode_set(plane, state); 1059 if (ret) 1060 return ret; 1061 1062 return vc4_plane_allocate_lbm(state); 1063 } 1064 1065 static void vc4_plane_atomic_update(struct drm_plane *plane, 1066 struct drm_plane_state *old_state) 1067 { 1068 /* No contents here. Since we don't know where in the CRTC's 1069 * dlist we should be stored, our dlist is uploaded to the 1070 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush 1071 * time. 1072 */ 1073 } 1074 1075 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) 1076 { 1077 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1078 int i; 1079 1080 vc4_state->hw_dlist = dlist; 1081 1082 /* Can't memcpy_toio() because it needs to be 32-bit writes. */ 1083 for (i = 0; i < vc4_state->dlist_count; i++) 1084 writel(vc4_state->dlist[i], &dlist[i]); 1085 1086 return vc4_state->dlist_count; 1087 } 1088 1089 u32 vc4_plane_dlist_size(const struct drm_plane_state *state) 1090 { 1091 const struct vc4_plane_state *vc4_state = 1092 container_of(state, typeof(*vc4_state), base); 1093 1094 return vc4_state->dlist_count; 1095 } 1096 1097 /* Updates the plane to immediately (well, once the FIFO needs 1098 * refilling) scan out from at a new framebuffer. 1099 */ 1100 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) 1101 { 1102 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1103 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 1104 uint32_t addr; 1105 1106 /* We're skipping the address adjustment for negative origin, 1107 * because this is only called on the primary plane. 1108 */ 1109 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); 1110 addr = bo->paddr + fb->offsets[0]; 1111 1112 /* Write the new address into the hardware immediately. The 1113 * scanout will start from this address as soon as the FIFO 1114 * needs to refill with pixels. 1115 */ 1116 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1117 1118 /* Also update the CPU-side dlist copy, so that any later 1119 * atomic updates that don't do a new modeset on our plane 1120 * also use our updated address. 1121 */ 1122 vc4_state->dlist[vc4_state->ptr0_offset] = addr; 1123 } 1124 1125 static void vc4_plane_atomic_async_update(struct drm_plane *plane, 1126 struct drm_plane_state *state) 1127 { 1128 struct vc4_plane_state *vc4_state, *new_vc4_state; 1129 1130 swap(plane->state->fb, state->fb); 1131 plane->state->crtc_x = state->crtc_x; 1132 plane->state->crtc_y = state->crtc_y; 1133 plane->state->crtc_w = state->crtc_w; 1134 plane->state->crtc_h = state->crtc_h; 1135 plane->state->src_x = state->src_x; 1136 plane->state->src_y = state->src_y; 1137 plane->state->src_w = state->src_w; 1138 plane->state->src_h = state->src_h; 1139 plane->state->src_h = state->src_h; 1140 plane->state->alpha = state->alpha; 1141 plane->state->pixel_blend_mode = state->pixel_blend_mode; 1142 plane->state->rotation = state->rotation; 1143 plane->state->zpos = state->zpos; 1144 plane->state->normalized_zpos = state->normalized_zpos; 1145 plane->state->color_encoding = state->color_encoding; 1146 plane->state->color_range = state->color_range; 1147 plane->state->src = state->src; 1148 plane->state->dst = state->dst; 1149 plane->state->visible = state->visible; 1150 1151 new_vc4_state = to_vc4_plane_state(state); 1152 vc4_state = to_vc4_plane_state(plane->state); 1153 1154 vc4_state->crtc_x = new_vc4_state->crtc_x; 1155 vc4_state->crtc_y = new_vc4_state->crtc_y; 1156 vc4_state->crtc_h = new_vc4_state->crtc_h; 1157 vc4_state->crtc_w = new_vc4_state->crtc_w; 1158 vc4_state->src_x = new_vc4_state->src_x; 1159 vc4_state->src_y = new_vc4_state->src_y; 1160 memcpy(vc4_state->src_w, new_vc4_state->src_w, 1161 sizeof(vc4_state->src_w)); 1162 memcpy(vc4_state->src_h, new_vc4_state->src_h, 1163 sizeof(vc4_state->src_h)); 1164 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling, 1165 sizeof(vc4_state->x_scaling)); 1166 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling, 1167 sizeof(vc4_state->y_scaling)); 1168 vc4_state->is_unity = new_vc4_state->is_unity; 1169 vc4_state->is_yuv = new_vc4_state->is_yuv; 1170 memcpy(vc4_state->offsets, new_vc4_state->offsets, 1171 sizeof(vc4_state->offsets)); 1172 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill; 1173 1174 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */ 1175 vc4_state->dlist[vc4_state->pos0_offset] = 1176 new_vc4_state->dlist[vc4_state->pos0_offset]; 1177 vc4_state->dlist[vc4_state->pos2_offset] = 1178 new_vc4_state->dlist[vc4_state->pos2_offset]; 1179 vc4_state->dlist[vc4_state->ptr0_offset] = 1180 new_vc4_state->dlist[vc4_state->ptr0_offset]; 1181 1182 /* Note that we can't just call vc4_plane_write_dlist() 1183 * because that would smash the context data that the HVS is 1184 * currently using. 1185 */ 1186 writel(vc4_state->dlist[vc4_state->pos0_offset], 1187 &vc4_state->hw_dlist[vc4_state->pos0_offset]); 1188 writel(vc4_state->dlist[vc4_state->pos2_offset], 1189 &vc4_state->hw_dlist[vc4_state->pos2_offset]); 1190 writel(vc4_state->dlist[vc4_state->ptr0_offset], 1191 &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1192 } 1193 1194 static int vc4_plane_atomic_async_check(struct drm_plane *plane, 1195 struct drm_plane_state *state) 1196 { 1197 struct vc4_plane_state *old_vc4_state, *new_vc4_state; 1198 int ret; 1199 u32 i; 1200 1201 ret = vc4_plane_mode_set(plane, state); 1202 if (ret) 1203 return ret; 1204 1205 old_vc4_state = to_vc4_plane_state(plane->state); 1206 new_vc4_state = to_vc4_plane_state(state); 1207 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || 1208 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || 1209 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || 1210 old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset || 1211 vc4_lbm_size(plane->state) != vc4_lbm_size(state)) 1212 return -EINVAL; 1213 1214 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update 1215 * if anything else has changed, fallback to a sync update. 1216 */ 1217 for (i = 0; i < new_vc4_state->dlist_count; i++) { 1218 if (i == new_vc4_state->pos0_offset || 1219 i == new_vc4_state->pos2_offset || 1220 i == new_vc4_state->ptr0_offset || 1221 (new_vc4_state->lbm_offset && 1222 i == new_vc4_state->lbm_offset)) 1223 continue; 1224 1225 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i]) 1226 return -EINVAL; 1227 } 1228 1229 return 0; 1230 } 1231 1232 static int vc4_prepare_fb(struct drm_plane *plane, 1233 struct drm_plane_state *state) 1234 { 1235 struct vc4_bo *bo; 1236 int ret; 1237 1238 if (!state->fb) 1239 return 0; 1240 1241 bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); 1242 1243 drm_gem_fb_prepare_fb(plane, state); 1244 1245 if (plane->state->fb == state->fb) 1246 return 0; 1247 1248 ret = vc4_bo_inc_usecnt(bo); 1249 if (ret) 1250 return ret; 1251 1252 return 0; 1253 } 1254 1255 static void vc4_cleanup_fb(struct drm_plane *plane, 1256 struct drm_plane_state *state) 1257 { 1258 struct vc4_bo *bo; 1259 1260 if (plane->state->fb == state->fb || !state->fb) 1261 return; 1262 1263 bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); 1264 vc4_bo_dec_usecnt(bo); 1265 } 1266 1267 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { 1268 .atomic_check = vc4_plane_atomic_check, 1269 .atomic_update = vc4_plane_atomic_update, 1270 .prepare_fb = vc4_prepare_fb, 1271 .cleanup_fb = vc4_cleanup_fb, 1272 .atomic_async_check = vc4_plane_atomic_async_check, 1273 .atomic_async_update = vc4_plane_atomic_async_update, 1274 }; 1275 1276 static void vc4_plane_destroy(struct drm_plane *plane) 1277 { 1278 drm_plane_cleanup(plane); 1279 } 1280 1281 static bool vc4_format_mod_supported(struct drm_plane *plane, 1282 uint32_t format, 1283 uint64_t modifier) 1284 { 1285 /* Support T_TILING for RGB formats only. */ 1286 switch (format) { 1287 case DRM_FORMAT_XRGB8888: 1288 case DRM_FORMAT_ARGB8888: 1289 case DRM_FORMAT_ABGR8888: 1290 case DRM_FORMAT_XBGR8888: 1291 case DRM_FORMAT_RGB565: 1292 case DRM_FORMAT_BGR565: 1293 case DRM_FORMAT_ARGB1555: 1294 case DRM_FORMAT_XRGB1555: 1295 switch (fourcc_mod_broadcom_mod(modifier)) { 1296 case DRM_FORMAT_MOD_LINEAR: 1297 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: 1298 return true; 1299 default: 1300 return false; 1301 } 1302 case DRM_FORMAT_NV12: 1303 case DRM_FORMAT_NV21: 1304 switch (fourcc_mod_broadcom_mod(modifier)) { 1305 case DRM_FORMAT_MOD_LINEAR: 1306 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1307 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1308 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1309 return true; 1310 default: 1311 return false; 1312 } 1313 case DRM_FORMAT_RGBX1010102: 1314 case DRM_FORMAT_BGRX1010102: 1315 case DRM_FORMAT_RGBA1010102: 1316 case DRM_FORMAT_BGRA1010102: 1317 case DRM_FORMAT_YUV422: 1318 case DRM_FORMAT_YVU422: 1319 case DRM_FORMAT_YUV420: 1320 case DRM_FORMAT_YVU420: 1321 case DRM_FORMAT_NV16: 1322 case DRM_FORMAT_NV61: 1323 default: 1324 return (modifier == DRM_FORMAT_MOD_LINEAR); 1325 } 1326 } 1327 1328 static const struct drm_plane_funcs vc4_plane_funcs = { 1329 .update_plane = drm_atomic_helper_update_plane, 1330 .disable_plane = drm_atomic_helper_disable_plane, 1331 .destroy = vc4_plane_destroy, 1332 .set_property = NULL, 1333 .reset = vc4_plane_reset, 1334 .atomic_duplicate_state = vc4_plane_duplicate_state, 1335 .atomic_destroy_state = vc4_plane_destroy_state, 1336 .format_mod_supported = vc4_format_mod_supported, 1337 }; 1338 1339 struct drm_plane *vc4_plane_init(struct drm_device *dev, 1340 enum drm_plane_type type) 1341 { 1342 struct drm_plane *plane = NULL; 1343 struct vc4_plane *vc4_plane; 1344 u32 formats[ARRAY_SIZE(hvs_formats)]; 1345 int ret = 0; 1346 unsigned i; 1347 static const uint64_t modifiers[] = { 1348 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, 1349 DRM_FORMAT_MOD_BROADCOM_SAND128, 1350 DRM_FORMAT_MOD_BROADCOM_SAND64, 1351 DRM_FORMAT_MOD_BROADCOM_SAND256, 1352 DRM_FORMAT_MOD_LINEAR, 1353 DRM_FORMAT_MOD_INVALID 1354 }; 1355 1356 vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane), 1357 GFP_KERNEL); 1358 if (!vc4_plane) 1359 return ERR_PTR(-ENOMEM); 1360 1361 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) 1362 formats[i] = hvs_formats[i].drm; 1363 1364 plane = &vc4_plane->base; 1365 ret = drm_universal_plane_init(dev, plane, 0, 1366 &vc4_plane_funcs, 1367 formats, ARRAY_SIZE(formats), 1368 modifiers, type, NULL); 1369 if (ret) 1370 return ERR_PTR(ret); 1371 1372 drm_plane_helper_add(plane, &vc4_plane_helper_funcs); 1373 1374 drm_plane_create_alpha_property(plane); 1375 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, 1376 DRM_MODE_ROTATE_0 | 1377 DRM_MODE_ROTATE_180 | 1378 DRM_MODE_REFLECT_X | 1379 DRM_MODE_REFLECT_Y); 1380 1381 return plane; 1382 } 1383 1384 int vc4_plane_create_additional_planes(struct drm_device *drm) 1385 { 1386 struct drm_plane *cursor_plane; 1387 struct drm_crtc *crtc; 1388 unsigned int i; 1389 1390 /* Set up some arbitrary number of planes. We're not limited 1391 * by a set number of physical registers, just the space in 1392 * the HVS (16k) and how small an plane can be (28 bytes). 1393 * However, each plane we set up takes up some memory, and 1394 * increases the cost of looping over planes, which atomic 1395 * modesetting does quite a bit. As a result, we pick a 1396 * modest number of planes to expose, that should hopefully 1397 * still cover any sane usecase. 1398 */ 1399 for (i = 0; i < 16; i++) { 1400 struct drm_plane *plane = 1401 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY); 1402 1403 if (IS_ERR(plane)) 1404 continue; 1405 1406 plane->possible_crtcs = 1407 GENMASK(drm->mode_config.num_crtc - 1, 0); 1408 } 1409 1410 drm_for_each_crtc(crtc, drm) { 1411 /* Set up the legacy cursor after overlay initialization, 1412 * since we overlay planes on the CRTC in the order they were 1413 * initialized. 1414 */ 1415 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); 1416 if (!IS_ERR(cursor_plane)) { 1417 cursor_plane->possible_crtcs = drm_crtc_mask(crtc); 1418 crtc->cursor = cursor_plane; 1419 } 1420 } 1421 1422 return 0; 1423 } 1424