1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NVIDIA Tegra Video decoder driver 4 * 5 * Copyright (C) 2016-2022 Dmitry Osipenko <digetx@gmail.com> 6 * 7 */ 8 9 #include <linux/iopoll.h> 10 #include <linux/pm_runtime.h> 11 #include <linux/reset.h> 12 #include <linux/slab.h> 13 14 #include <media/v4l2-h264.h> 15 16 #include "trace.h" 17 #include "vde.h" 18 19 #define FLAG_B_FRAME 0x1 20 #define FLAG_REFERENCE 0x2 21 22 struct tegra_vde_h264_frame { 23 unsigned int frame_num; 24 unsigned int flags; 25 }; 26 27 struct tegra_vde_h264_decoder_ctx { 28 unsigned int dpb_frames_nb; 29 unsigned int dpb_ref_frames_with_earlier_poc_nb; 30 unsigned int baseline_profile; 31 unsigned int level_idc; 32 unsigned int log2_max_pic_order_cnt_lsb; 33 unsigned int log2_max_frame_num; 34 unsigned int pic_order_cnt_type; 35 unsigned int direct_8x8_inference_flag; 36 unsigned int pic_width_in_mbs; 37 unsigned int pic_height_in_mbs; 38 unsigned int pic_init_qp; 39 unsigned int deblocking_filter_control_present_flag; 40 unsigned int constrained_intra_pred_flag; 41 unsigned int chroma_qp_index_offset; 42 unsigned int pic_order_present_flag; 43 unsigned int num_ref_idx_l0_active_minus1; 44 unsigned int num_ref_idx_l1_active_minus1; 45 }; 46 47 struct h264_reflists { 48 u8 p[V4L2_H264_NUM_DPB_ENTRIES]; 49 u8 b0[V4L2_H264_NUM_DPB_ENTRIES]; 50 u8 b1[V4L2_H264_NUM_DPB_ENTRIES]; 51 }; 52 53 static int tegra_vde_wait_mbe(struct tegra_vde *vde) 54 { 55 u32 tmp; 56 57 return readl_relaxed_poll_timeout(vde->mbe + 0x8C, tmp, 58 tmp >= 0x10, 1, 100); 59 } 60 61 static int tegra_vde_setup_mbe_frame_idx(struct tegra_vde *vde, 62 unsigned int refs_nb, 63 bool setup_refs) 64 { 65 u32 value, frame_idx_enb_mask = 0; 66 unsigned int frame_idx; 67 unsigned int idx; 68 int err; 69 70 tegra_vde_writel(vde, 0xD0000000 | (0 << 23), vde->mbe, 0x80); 71 tegra_vde_writel(vde, 0xD0200000 | (0 << 23), vde->mbe, 0x80); 72 73 err = tegra_vde_wait_mbe(vde); 74 if (err) 75 return err; 76 77 if (!setup_refs) 78 return 0; 79 80 for (idx = 0, frame_idx = 1; idx < refs_nb; idx++, frame_idx++) { 81 tegra_vde_writel(vde, 0xD0000000 | (frame_idx << 23), 82 vde->mbe, 0x80); 83 tegra_vde_writel(vde, 0xD0200000 | (frame_idx << 23), 84 vde->mbe, 0x80); 85 86 frame_idx_enb_mask |= frame_idx << (6 * (idx % 4)); 87 88 if (idx % 4 == 3 || idx == refs_nb - 1) { 89 value = 0xC0000000; 90 value |= (idx >> 2) << 24; 91 value |= frame_idx_enb_mask; 92 93 tegra_vde_writel(vde, value, vde->mbe, 0x80); 94 95 err = tegra_vde_wait_mbe(vde); 96 if (err) 97 return err; 98 99 frame_idx_enb_mask = 0; 100 } 101 } 102 103 return 0; 104 } 105 106 static void tegra_vde_mbe_set_0xa_reg(struct tegra_vde *vde, int reg, u32 val) 107 { 108 tegra_vde_writel(vde, 0xA0000000 | (reg << 24) | (val & 0xFFFF), 109 vde->mbe, 0x80); 110 tegra_vde_writel(vde, 0xA0000000 | ((reg + 1) << 24) | (val >> 16), 111 vde->mbe, 0x80); 112 } 113 114 static int tegra_vde_wait_bsev(struct tegra_vde *vde, bool wait_dma) 115 { 116 struct device *dev = vde->dev; 117 u32 value; 118 int err; 119 120 err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value, 121 !(value & BIT(2)), 1, 100); 122 if (err) { 123 dev_err(dev, "BSEV unknown bit timeout\n"); 124 return err; 125 } 126 127 err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value, 128 (value & BSE_ICMDQUE_EMPTY), 1, 100); 129 if (err) { 130 dev_err(dev, "BSEV ICMDQUE flush timeout\n"); 131 return err; 132 } 133 134 if (!wait_dma) 135 return 0; 136 137 err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value, 138 !(value & BSE_DMA_BUSY), 1, 1000); 139 if (err) { 140 dev_err(dev, "BSEV DMA timeout\n"); 141 return err; 142 } 143 144 return 0; 145 } 146 147 static int tegra_vde_push_to_bsev_icmdqueue(struct tegra_vde *vde, 148 u32 value, bool wait_dma) 149 { 150 tegra_vde_writel(vde, value, vde->bsev, ICMDQUE_WR); 151 152 return tegra_vde_wait_bsev(vde, wait_dma); 153 } 154 155 static void tegra_vde_setup_frameid(struct tegra_vde *vde, 156 struct tegra_video_frame *frame, 157 unsigned int frameid, 158 u32 mbs_width, u32 mbs_height) 159 { 160 u32 y_addr = frame ? frame->y_addr : 0x6CDEAD00; 161 u32 cb_addr = frame ? frame->cb_addr : 0x6CDEAD00; 162 u32 cr_addr = frame ? frame->cr_addr : 0x6CDEAD00; 163 u32 value1 = frame ? ((frame->luma_atoms_pitch << 16) | mbs_height) : 0; 164 u32 value2 = frame ? ((frame->chroma_atoms_pitch << 6) | 1) : 0; 165 166 tegra_vde_writel(vde, y_addr >> 8, vde->frameid, 0x000 + frameid * 4); 167 tegra_vde_writel(vde, cb_addr >> 8, vde->frameid, 0x100 + frameid * 4); 168 tegra_vde_writel(vde, cr_addr >> 8, vde->frameid, 0x180 + frameid * 4); 169 tegra_vde_writel(vde, value1, vde->frameid, 0x080 + frameid * 4); 170 tegra_vde_writel(vde, value2, vde->frameid, 0x280 + frameid * 4); 171 } 172 173 static void tegra_setup_frameidx(struct tegra_vde *vde, 174 struct tegra_video_frame *frames, 175 unsigned int frames_nb, 176 u32 mbs_width, u32 mbs_height) 177 { 178 unsigned int idx; 179 180 for (idx = 0; idx < frames_nb; idx++) 181 tegra_vde_setup_frameid(vde, &frames[idx], idx, 182 mbs_width, mbs_height); 183 184 for (; idx < 17; idx++) 185 tegra_vde_setup_frameid(vde, NULL, idx, 0, 0); 186 } 187 188 static void tegra_vde_setup_iram_entry(struct tegra_vde *vde, 189 unsigned int table, 190 unsigned int row, 191 u32 value1, u32 value2) 192 { 193 u32 *iram_tables = vde->iram; 194 195 trace_vde_setup_iram_entry(table, row, value1, value2); 196 197 iram_tables[0x20 * table + row * 2 + 0] = value1; 198 iram_tables[0x20 * table + row * 2 + 1] = value2; 199 } 200 201 static void tegra_vde_setup_iram_tables(struct tegra_vde *vde, 202 struct tegra_video_frame *dpb_frames, 203 unsigned int ref_frames_nb, 204 unsigned int with_earlier_poc_nb) 205 { 206 struct tegra_video_frame *frame; 207 int with_later_poc_nb; 208 u32 value, aux_addr; 209 unsigned int i, k; 210 211 trace_vde_ref_l0(dpb_frames[0].frame_num); 212 213 for (i = 0; i < 16; i++) { 214 if (i < ref_frames_nb) { 215 frame = &dpb_frames[i + 1]; 216 217 aux_addr = frame->aux_addr; 218 219 value = (i + 1) << 26; 220 value |= !(frame->flags & FLAG_B_FRAME) << 25; 221 value |= 1 << 24; 222 value |= frame->frame_num; 223 } else { 224 aux_addr = 0x6ADEAD00; 225 value = 0x3f; 226 } 227 228 tegra_vde_setup_iram_entry(vde, 0, i, value, aux_addr); 229 tegra_vde_setup_iram_entry(vde, 1, i, value, aux_addr); 230 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr); 231 tegra_vde_setup_iram_entry(vde, 3, i, value, aux_addr); 232 } 233 234 if (!(dpb_frames[0].flags & FLAG_B_FRAME)) 235 return; 236 237 if (with_earlier_poc_nb >= ref_frames_nb) 238 return; 239 240 with_later_poc_nb = ref_frames_nb - with_earlier_poc_nb; 241 242 trace_vde_ref_l1(with_later_poc_nb, with_earlier_poc_nb); 243 244 for (i = 0, k = with_earlier_poc_nb; i < with_later_poc_nb; i++, k++) { 245 frame = &dpb_frames[k + 1]; 246 247 aux_addr = frame->aux_addr; 248 249 value = (k + 1) << 26; 250 value |= !(frame->flags & FLAG_B_FRAME) << 25; 251 value |= 1 << 24; 252 value |= frame->frame_num; 253 254 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr); 255 } 256 257 for (k = 0; i < ref_frames_nb; i++, k++) { 258 frame = &dpb_frames[k + 1]; 259 260 aux_addr = frame->aux_addr; 261 262 value = (k + 1) << 26; 263 value |= !(frame->flags & FLAG_B_FRAME) << 25; 264 value |= 1 << 24; 265 value |= frame->frame_num; 266 267 tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr); 268 } 269 } 270 271 static int tegra_vde_setup_hw_context(struct tegra_vde *vde, 272 struct tegra_vde_h264_decoder_ctx *ctx, 273 struct tegra_video_frame *dpb_frames, 274 dma_addr_t bitstream_data_addr, 275 size_t bitstream_data_size, 276 unsigned int macroblocks_nb) 277 { 278 struct device *dev = vde->dev; 279 u32 value; 280 int err; 281 282 tegra_vde_set_bits(vde, 0x000A, vde->sxe, 0xF0); 283 tegra_vde_set_bits(vde, 0x000B, vde->bsev, CMDQUE_CONTROL); 284 tegra_vde_set_bits(vde, 0x8002, vde->mbe, 0x50); 285 tegra_vde_set_bits(vde, 0x000A, vde->mbe, 0xA0); 286 tegra_vde_set_bits(vde, 0x000A, vde->ppe, 0x14); 287 tegra_vde_set_bits(vde, 0x000A, vde->ppe, 0x28); 288 tegra_vde_set_bits(vde, 0x0A00, vde->mce, 0x08); 289 tegra_vde_set_bits(vde, 0x000A, vde->tfe, 0x00); 290 tegra_vde_set_bits(vde, 0x0005, vde->vdma, 0x04); 291 292 tegra_vde_writel(vde, 0x00000000, vde->vdma, 0x1C); 293 tegra_vde_writel(vde, 0x00000000, vde->vdma, 0x00); 294 tegra_vde_writel(vde, 0x00000007, vde->vdma, 0x04); 295 tegra_vde_writel(vde, 0x00000007, vde->frameid, 0x200); 296 tegra_vde_writel(vde, 0x00000005, vde->tfe, 0x04); 297 tegra_vde_writel(vde, 0x00000000, vde->mbe, 0x84); 298 tegra_vde_writel(vde, 0x00000010, vde->sxe, 0x08); 299 tegra_vde_writel(vde, 0x00000150, vde->sxe, 0x54); 300 tegra_vde_writel(vde, 0x0000054C, vde->sxe, 0x58); 301 tegra_vde_writel(vde, 0x00000E34, vde->sxe, 0x5C); 302 tegra_vde_writel(vde, 0x063C063C, vde->mce, 0x10); 303 tegra_vde_writel(vde, 0x0003FC00, vde->bsev, INTR_STATUS); 304 tegra_vde_writel(vde, 0x0000150D, vde->bsev, BSE_CONFIG); 305 tegra_vde_writel(vde, 0x00000100, vde->bsev, BSE_INT_ENB); 306 tegra_vde_writel(vde, 0x00000000, vde->bsev, 0x98); 307 tegra_vde_writel(vde, 0x00000060, vde->bsev, 0x9C); 308 309 memset(vde->iram + 128, 0, macroblocks_nb / 2); 310 311 tegra_setup_frameidx(vde, dpb_frames, ctx->dpb_frames_nb, 312 ctx->pic_width_in_mbs, ctx->pic_height_in_mbs); 313 314 tegra_vde_setup_iram_tables(vde, dpb_frames, 315 ctx->dpb_frames_nb - 1, 316 ctx->dpb_ref_frames_with_earlier_poc_nb); 317 318 /* 319 * The IRAM mapping is write-combine, ensure that CPU buffers have 320 * been flushed at this point. 321 */ 322 wmb(); 323 324 tegra_vde_writel(vde, 0x00000000, vde->bsev, 0x8C); 325 tegra_vde_writel(vde, bitstream_data_addr + bitstream_data_size, 326 vde->bsev, 0x54); 327 328 vde->bitstream_data_addr = bitstream_data_addr; 329 330 value = ctx->pic_width_in_mbs << 11 | ctx->pic_height_in_mbs << 3; 331 332 tegra_vde_writel(vde, value, vde->bsev, 0x88); 333 334 err = tegra_vde_wait_bsev(vde, false); 335 if (err) 336 return err; 337 338 err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x800003FC, false); 339 if (err) 340 return err; 341 342 value = 0x01500000; 343 value |= ((vde->iram_lists_addr + 512) >> 2) & 0xFFFF; 344 345 err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true); 346 if (err) 347 return err; 348 349 err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x840F054C, false); 350 if (err) 351 return err; 352 353 err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x80000080, false); 354 if (err) 355 return err; 356 357 value = 0x0E340000 | ((vde->iram_lists_addr >> 2) & 0xFFFF); 358 359 err = tegra_vde_push_to_bsev_icmdqueue(vde, value, true); 360 if (err) 361 return err; 362 363 value = 0x00800005; 364 value |= ctx->pic_width_in_mbs << 11; 365 value |= ctx->pic_height_in_mbs << 3; 366 367 tegra_vde_writel(vde, value, vde->sxe, 0x10); 368 369 value = !ctx->baseline_profile << 17; 370 value |= ctx->level_idc << 13; 371 value |= ctx->log2_max_pic_order_cnt_lsb << 7; 372 value |= ctx->pic_order_cnt_type << 5; 373 value |= ctx->log2_max_frame_num; 374 375 tegra_vde_writel(vde, value, vde->sxe, 0x40); 376 377 value = ctx->pic_init_qp << 25; 378 value |= !!(ctx->deblocking_filter_control_present_flag) << 2; 379 value |= !!ctx->pic_order_present_flag; 380 381 tegra_vde_writel(vde, value, vde->sxe, 0x44); 382 383 value = ctx->chroma_qp_index_offset; 384 value |= ctx->num_ref_idx_l0_active_minus1 << 5; 385 value |= ctx->num_ref_idx_l1_active_minus1 << 10; 386 value |= !!ctx->constrained_intra_pred_flag << 15; 387 388 tegra_vde_writel(vde, value, vde->sxe, 0x48); 389 390 value = 0x0C000000; 391 value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 24; 392 393 tegra_vde_writel(vde, value, vde->sxe, 0x4C); 394 395 value = 0x03800000; 396 value |= bitstream_data_size & GENMASK(19, 15); 397 398 tegra_vde_writel(vde, value, vde->sxe, 0x68); 399 400 tegra_vde_writel(vde, bitstream_data_addr, vde->sxe, 0x6C); 401 402 if (vde->soc->supports_ref_pic_marking) 403 tegra_vde_writel(vde, vde->secure_bo->dma_addr, vde->sxe, 0x7c); 404 405 value = 0x10000005; 406 value |= ctx->pic_width_in_mbs << 11; 407 value |= ctx->pic_height_in_mbs << 3; 408 409 tegra_vde_writel(vde, value, vde->mbe, 0x80); 410 411 value = 0x26800000; 412 value |= ctx->level_idc << 4; 413 value |= !ctx->baseline_profile << 1; 414 value |= !!ctx->direct_8x8_inference_flag; 415 416 tegra_vde_writel(vde, value, vde->mbe, 0x80); 417 418 tegra_vde_writel(vde, 0xF4000001, vde->mbe, 0x80); 419 tegra_vde_writel(vde, 0x20000000, vde->mbe, 0x80); 420 tegra_vde_writel(vde, 0xF4000101, vde->mbe, 0x80); 421 422 value = 0x20000000; 423 value |= ctx->chroma_qp_index_offset << 8; 424 425 tegra_vde_writel(vde, value, vde->mbe, 0x80); 426 427 err = tegra_vde_setup_mbe_frame_idx(vde, 428 ctx->dpb_frames_nb - 1, 429 ctx->pic_order_cnt_type == 0); 430 if (err) { 431 dev_err(dev, "MBE frames setup failed %d\n", err); 432 return err; 433 } 434 435 tegra_vde_mbe_set_0xa_reg(vde, 0, 0x000009FC); 436 tegra_vde_mbe_set_0xa_reg(vde, 2, 0x61DEAD00); 437 tegra_vde_mbe_set_0xa_reg(vde, 4, 0x62DEAD00); 438 tegra_vde_mbe_set_0xa_reg(vde, 6, 0x63DEAD00); 439 tegra_vde_mbe_set_0xa_reg(vde, 8, dpb_frames[0].aux_addr); 440 441 value = 0xFC000000; 442 value |= !!(dpb_frames[0].flags & FLAG_B_FRAME) << 2; 443 444 if (!ctx->baseline_profile) 445 value |= !!(dpb_frames[0].flags & FLAG_REFERENCE) << 1; 446 447 tegra_vde_writel(vde, value, vde->mbe, 0x80); 448 449 err = tegra_vde_wait_mbe(vde); 450 if (err) { 451 dev_err(dev, "MBE programming failed %d\n", err); 452 return err; 453 } 454 455 return 0; 456 } 457 458 static void tegra_vde_decode_frame(struct tegra_vde *vde, 459 unsigned int macroblocks_nb) 460 { 461 reinit_completion(&vde->decode_completion); 462 463 tegra_vde_writel(vde, 0x00000001, vde->bsev, 0x8C); 464 tegra_vde_writel(vde, 0x20000000 | (macroblocks_nb - 1), 465 vde->sxe, 0x00); 466 } 467 468 static int tegra_vde_validate_h264_ctx(struct device *dev, 469 struct tegra_vde_h264_decoder_ctx *ctx) 470 { 471 if (ctx->dpb_frames_nb == 0 || ctx->dpb_frames_nb > 17) { 472 dev_err(dev, "Bad DPB size %u\n", ctx->dpb_frames_nb); 473 return -EINVAL; 474 } 475 476 if (ctx->level_idc > 15) { 477 dev_err(dev, "Bad level value %u\n", ctx->level_idc); 478 return -EINVAL; 479 } 480 481 if (ctx->pic_init_qp > 52) { 482 dev_err(dev, "Bad pic_init_qp value %u\n", ctx->pic_init_qp); 483 return -EINVAL; 484 } 485 486 if (ctx->log2_max_pic_order_cnt_lsb > 16) { 487 dev_err(dev, "Bad log2_max_pic_order_cnt_lsb value %u\n", 488 ctx->log2_max_pic_order_cnt_lsb); 489 return -EINVAL; 490 } 491 492 if (ctx->log2_max_frame_num > 16) { 493 dev_err(dev, "Bad log2_max_frame_num value %u\n", 494 ctx->log2_max_frame_num); 495 return -EINVAL; 496 } 497 498 if (ctx->chroma_qp_index_offset > 31) { 499 dev_err(dev, "Bad chroma_qp_index_offset value %u\n", 500 ctx->chroma_qp_index_offset); 501 return -EINVAL; 502 } 503 504 if (ctx->pic_order_cnt_type > 2) { 505 dev_err(dev, "Bad pic_order_cnt_type value %u\n", 506 ctx->pic_order_cnt_type); 507 return -EINVAL; 508 } 509 510 if (ctx->num_ref_idx_l0_active_minus1 > 15) { 511 dev_err(dev, "Bad num_ref_idx_l0_active_minus1 value %u\n", 512 ctx->num_ref_idx_l0_active_minus1); 513 return -EINVAL; 514 } 515 516 if (ctx->num_ref_idx_l1_active_minus1 > 15) { 517 dev_err(dev, "Bad num_ref_idx_l1_active_minus1 value %u\n", 518 ctx->num_ref_idx_l1_active_minus1); 519 return -EINVAL; 520 } 521 522 if (!ctx->pic_width_in_mbs || ctx->pic_width_in_mbs > 127) { 523 dev_err(dev, "Bad pic_width_in_mbs value %u\n", 524 ctx->pic_width_in_mbs); 525 return -EINVAL; 526 } 527 528 if (!ctx->pic_height_in_mbs || ctx->pic_height_in_mbs > 127) { 529 dev_err(dev, "Bad pic_height_in_mbs value %u\n", 530 ctx->pic_height_in_mbs); 531 return -EINVAL; 532 } 533 534 return 0; 535 } 536 537 static int tegra_vde_decode_begin(struct tegra_vde *vde, 538 struct tegra_vde_h264_decoder_ctx *ctx, 539 struct tegra_video_frame *dpb_frames, 540 dma_addr_t bitstream_data_addr, 541 size_t bitstream_data_size) 542 { 543 struct device *dev = vde->dev; 544 unsigned int macroblocks_nb; 545 int err; 546 547 err = mutex_lock_interruptible(&vde->lock); 548 if (err) 549 return err; 550 551 err = pm_runtime_resume_and_get(dev); 552 if (err < 0) 553 goto unlock; 554 555 /* 556 * We rely on the VDE registers reset value, otherwise VDE 557 * causes bus lockup. 558 */ 559 err = reset_control_assert(vde->rst_mc); 560 if (err) { 561 dev_err(dev, "DEC start: Failed to assert MC reset: %d\n", 562 err); 563 goto put_runtime_pm; 564 } 565 566 err = reset_control_reset(vde->rst); 567 if (err) { 568 dev_err(dev, "DEC start: Failed to reset HW: %d\n", err); 569 goto put_runtime_pm; 570 } 571 572 err = reset_control_deassert(vde->rst_mc); 573 if (err) { 574 dev_err(dev, "DEC start: Failed to deassert MC reset: %d\n", 575 err); 576 goto put_runtime_pm; 577 } 578 579 macroblocks_nb = ctx->pic_width_in_mbs * ctx->pic_height_in_mbs; 580 581 err = tegra_vde_setup_hw_context(vde, ctx, dpb_frames, 582 bitstream_data_addr, 583 bitstream_data_size, 584 macroblocks_nb); 585 if (err) 586 goto put_runtime_pm; 587 588 tegra_vde_decode_frame(vde, macroblocks_nb); 589 590 return 0; 591 592 put_runtime_pm: 593 pm_runtime_mark_last_busy(dev); 594 pm_runtime_put_autosuspend(dev); 595 596 unlock: 597 mutex_unlock(&vde->lock); 598 599 return err; 600 } 601 602 static void tegra_vde_decode_abort(struct tegra_vde *vde) 603 { 604 struct device *dev = vde->dev; 605 int err; 606 607 /* 608 * At first reset memory client to avoid resetting VDE HW in the 609 * middle of DMA which could result into memory corruption or hang 610 * the whole system. 611 */ 612 err = reset_control_assert(vde->rst_mc); 613 if (err) 614 dev_err(dev, "DEC end: Failed to assert MC reset: %d\n", err); 615 616 err = reset_control_assert(vde->rst); 617 if (err) 618 dev_err(dev, "DEC end: Failed to assert HW reset: %d\n", err); 619 620 pm_runtime_mark_last_busy(dev); 621 pm_runtime_put_autosuspend(dev); 622 623 mutex_unlock(&vde->lock); 624 } 625 626 static int tegra_vde_decode_end(struct tegra_vde *vde) 627 { 628 unsigned int read_bytes, macroblocks_nb; 629 struct device *dev = vde->dev; 630 dma_addr_t bsev_ptr; 631 long timeout; 632 int ret; 633 634 timeout = wait_for_completion_interruptible_timeout( 635 &vde->decode_completion, msecs_to_jiffies(1000)); 636 if (timeout == 0) { 637 bsev_ptr = tegra_vde_readl(vde, vde->bsev, 0x10); 638 macroblocks_nb = tegra_vde_readl(vde, vde->sxe, 0xC8) & 0x1FFF; 639 read_bytes = bsev_ptr ? bsev_ptr - vde->bitstream_data_addr : 0; 640 641 dev_err(dev, "Decoding failed: read 0x%X bytes, %u macroblocks parsed\n", 642 read_bytes, macroblocks_nb); 643 644 ret = -EIO; 645 } else if (timeout < 0) { 646 ret = timeout; 647 } else { 648 ret = 0; 649 } 650 651 tegra_vde_decode_abort(vde); 652 653 return ret; 654 } 655 656 static struct vb2_buffer *get_ref_buf(struct tegra_ctx *ctx, 657 struct vb2_v4l2_buffer *dst, 658 unsigned int dpb_idx) 659 { 660 const struct v4l2_h264_dpb_entry *dpb = ctx->h264.decode_params->dpb; 661 struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q; 662 int buf_idx = -1; 663 664 if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) 665 buf_idx = vb2_find_timestamp(cap_q, 666 dpb[dpb_idx].reference_ts, 0); 667 668 /* 669 * If a DPB entry is unused or invalid, address of current destination 670 * buffer is returned. 671 */ 672 if (buf_idx < 0) 673 return &dst->vb2_buf; 674 675 return vb2_get_buffer(cap_q, buf_idx); 676 } 677 678 static int tegra_vde_validate_vb_size(struct tegra_ctx *ctx, 679 struct vb2_buffer *vb, 680 unsigned int plane_id, 681 size_t min_size) 682 { 683 u64 offset = vb->planes[plane_id].data_offset; 684 struct device *dev = ctx->vde->dev; 685 686 if (offset + min_size > vb2_plane_size(vb, plane_id)) { 687 dev_err(dev, "Too small plane[%u] size %lu @0x%llX, should be at least %zu\n", 688 plane_id, vb2_plane_size(vb, plane_id), offset, min_size); 689 return -EINVAL; 690 } 691 692 return 0; 693 } 694 695 static int tegra_vde_h264_setup_frame(struct tegra_ctx *ctx, 696 struct tegra_vde_h264_decoder_ctx *h264, 697 struct v4l2_h264_reflist_builder *b, 698 struct vb2_buffer *vb, 699 unsigned int ref_id, 700 unsigned int id) 701 { 702 struct v4l2_pix_format_mplane *pixfmt = &ctx->decoded_fmt.fmt.pix_mp; 703 struct tegra_m2m_buffer *tb = vb_to_tegra_buf(vb); 704 struct tegra_ctx_h264 *h = &ctx->h264; 705 struct tegra_vde *vde = ctx->vde; 706 struct device *dev = vde->dev; 707 unsigned int cstride, lstride; 708 unsigned int flags = 0; 709 size_t lsize, csize; 710 int err, frame_num; 711 712 lsize = h264->pic_width_in_mbs * 16 * h264->pic_height_in_mbs * 16; 713 csize = h264->pic_width_in_mbs * 8 * h264->pic_height_in_mbs * 8; 714 lstride = pixfmt->plane_fmt[0].bytesperline; 715 cstride = pixfmt->plane_fmt[1].bytesperline; 716 717 err = tegra_vde_validate_vb_size(ctx, vb, 0, lsize); 718 if (err) 719 return err; 720 721 err = tegra_vde_validate_vb_size(ctx, vb, 1, csize); 722 if (err) 723 return err; 724 725 err = tegra_vde_validate_vb_size(ctx, vb, 2, csize); 726 if (err) 727 return err; 728 729 if (!tb->aux || tb->aux->size < csize) { 730 dev_err(dev, "Too small aux size %zd, should be at least %zu\n", 731 tb->aux ? tb->aux->size : -1, csize); 732 return -EINVAL; 733 } 734 735 if (id == 0) { 736 frame_num = h->decode_params->frame_num; 737 738 if (h->decode_params->nal_ref_idc) 739 flags |= FLAG_REFERENCE; 740 } else { 741 frame_num = b->refs[ref_id].frame_num; 742 } 743 744 if (tb->b_frame) 745 flags |= FLAG_B_FRAME; 746 747 vde->frames[id].flags = flags; 748 vde->frames[id].y_addr = tb->dma_addr[0]; 749 vde->frames[id].cb_addr = tb->dma_addr[1]; 750 vde->frames[id].cr_addr = tb->dma_addr[2]; 751 vde->frames[id].aux_addr = tb->aux->dma_addr; 752 vde->frames[id].frame_num = frame_num & 0x7fffff; 753 vde->frames[id].luma_atoms_pitch = lstride / VDE_ATOM; 754 vde->frames[id].chroma_atoms_pitch = cstride / VDE_ATOM; 755 756 return 0; 757 } 758 759 static int tegra_vde_h264_setup_frames(struct tegra_ctx *ctx, 760 struct tegra_vde_h264_decoder_ctx *h264) 761 { 762 struct vb2_v4l2_buffer *src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); 763 struct vb2_v4l2_buffer *dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 764 const struct v4l2_h264_dpb_entry *dpb = ctx->h264.decode_params->dpb; 765 struct tegra_m2m_buffer *tb = vb_to_tegra_buf(&dst->vb2_buf); 766 struct tegra_ctx_h264 *h = &ctx->h264; 767 struct v4l2_h264_reflist_builder b; 768 struct h264_reflists reflists; 769 struct vb2_buffer *ref; 770 unsigned int i; 771 u8 *dpb_id; 772 int err; 773 774 /* 775 * Tegra hardware requires information about frame's type, assuming 776 * that frame consists of the same type slices. Userspace must tag 777 * frame's type appropriately. 778 * 779 * Decoding of a non-uniform frames isn't supported by hardware and 780 * require software preprocessing that we don't implement. Decoding 781 * is expected to fail in this case. Such video streams are rare in 782 * practice, so not a big deal. 783 * 784 * If userspace doesn't tell us frame's type, then we will try decode 785 * as-is. 786 */ 787 v4l2_m2m_buf_copy_metadata(src, dst, true); 788 789 if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BFRAME) 790 tb->b_frame = true; 791 else 792 tb->b_frame = false; 793 794 err = tegra_vde_h264_setup_frame(ctx, h264, NULL, &dst->vb2_buf, 0, 795 h264->dpb_frames_nb++); 796 if (err) 797 return err; 798 799 if (!(h->decode_params->flags & (V4L2_H264_DECODE_PARAM_FLAG_PFRAME | 800 V4L2_H264_DECODE_PARAM_FLAG_BFRAME))) 801 return 0; 802 803 v4l2_h264_init_reflist_builder(&b, h->decode_params, h->sps, dpb); 804 805 if (h->decode_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BFRAME) { 806 v4l2_h264_build_b_ref_lists(&b, reflists.b0, reflists.b1); 807 dpb_id = reflists.b0; 808 } else { 809 v4l2_h264_build_p_ref_list(&b, reflists.p); 810 dpb_id = reflists.p; 811 } 812 813 for (i = 0; i < b.num_valid; i++) { 814 ref = get_ref_buf(ctx, dst, dpb_id[i]); 815 816 err = tegra_vde_h264_setup_frame(ctx, h264, &b, ref, dpb_id[i], 817 h264->dpb_frames_nb++); 818 if (err) 819 return err; 820 821 if (b.refs[dpb_id[i]].pic_order_count < b.cur_pic_order_count) 822 h264->dpb_ref_frames_with_earlier_poc_nb++; 823 } 824 825 return 0; 826 } 827 828 static unsigned int to_tegra_vde_h264_level_idc(unsigned int level_idc) 829 { 830 switch (level_idc) { 831 case 11: 832 return 2; 833 case 12: 834 return 3; 835 case 13: 836 return 4; 837 case 20: 838 return 5; 839 case 21: 840 return 6; 841 case 22: 842 return 7; 843 case 30: 844 return 8; 845 case 31: 846 return 9; 847 case 32: 848 return 10; 849 case 40: 850 return 11; 851 case 41: 852 return 12; 853 case 42: 854 return 13; 855 case 50: 856 return 14; 857 default: 858 break; 859 } 860 861 return 15; 862 } 863 864 static int tegra_vde_h264_setup_context(struct tegra_ctx *ctx, 865 struct tegra_vde_h264_decoder_ctx *h264) 866 { 867 struct tegra_ctx_h264 *h = &ctx->h264; 868 struct tegra_vde *vde = ctx->vde; 869 struct device *dev = vde->dev; 870 int err; 871 872 memset(h264, 0, sizeof(*h264)); 873 memset(vde->frames, 0, sizeof(vde->frames)); 874 875 tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); 876 tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_SPS); 877 tegra_vde_prepare_control_data(ctx, V4L2_CID_STATELESS_H264_PPS); 878 879 /* CABAC unsupported by hardware, requires software preprocessing */ 880 if (h->pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) 881 return -EOPNOTSUPP; 882 883 if (h->sps->profile_idc == 66) 884 h264->baseline_profile = 1; 885 886 if (h->sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) 887 h264->direct_8x8_inference_flag = 1; 888 889 if (h->pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) 890 h264->constrained_intra_pred_flag = 1; 891 892 if (h->pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT) 893 h264->deblocking_filter_control_present_flag = 1; 894 895 if (h->pps->flags & V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT) 896 h264->pic_order_present_flag = 1; 897 898 h264->level_idc = to_tegra_vde_h264_level_idc(h->sps->level_idc); 899 h264->log2_max_pic_order_cnt_lsb = h->sps->log2_max_pic_order_cnt_lsb_minus4 + 4; 900 h264->log2_max_frame_num = h->sps->log2_max_frame_num_minus4 + 4; 901 h264->pic_order_cnt_type = h->sps->pic_order_cnt_type; 902 h264->pic_width_in_mbs = h->sps->pic_width_in_mbs_minus1 + 1; 903 h264->pic_height_in_mbs = h->sps->pic_height_in_map_units_minus1 + 1; 904 905 h264->num_ref_idx_l0_active_minus1 = h->pps->num_ref_idx_l0_default_active_minus1; 906 h264->num_ref_idx_l1_active_minus1 = h->pps->num_ref_idx_l1_default_active_minus1; 907 h264->chroma_qp_index_offset = h->pps->chroma_qp_index_offset & 0x1f; 908 h264->pic_init_qp = h->pps->pic_init_qp_minus26 + 26; 909 910 err = tegra_vde_h264_setup_frames(ctx, h264); 911 if (err) 912 return err; 913 914 err = tegra_vde_validate_h264_ctx(dev, h264); 915 if (err) 916 return err; 917 918 return 0; 919 } 920 921 int tegra_vde_h264_decode_run(struct tegra_ctx *ctx) 922 { 923 struct vb2_v4l2_buffer *src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); 924 struct tegra_m2m_buffer *bitstream = vb_to_tegra_buf(&src->vb2_buf); 925 size_t bitstream_size = vb2_get_plane_payload(&src->vb2_buf, 0); 926 struct tegra_vde_h264_decoder_ctx h264; 927 struct tegra_vde *vde = ctx->vde; 928 int err; 929 930 err = tegra_vde_h264_setup_context(ctx, &h264); 931 if (err) 932 return err; 933 934 err = tegra_vde_decode_begin(vde, &h264, vde->frames, 935 bitstream->dma_addr[0], 936 bitstream_size); 937 if (err) 938 return err; 939 940 return 0; 941 } 942 943 int tegra_vde_h264_decode_wait(struct tegra_ctx *ctx) 944 { 945 return tegra_vde_decode_end(ctx->vde); 946 } 947