1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Cedrus VPU driver 4 * 5 * Copyright (C) 2013 Jens Kuske <jenskuske@gmail.com> 6 * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> 7 * Copyright (C) 2018 Bootlin 8 */ 9 10 #include <linux/delay.h> 11 #include <linux/types.h> 12 13 #include <media/videobuf2-dma-contig.h> 14 15 #include "cedrus.h" 16 #include "cedrus_hw.h" 17 #include "cedrus_regs.h" 18 19 /* 20 * These are the sizes for side buffers required by the hardware for storing 21 * internal decoding metadata. They match the values used by the early BSP 22 * implementations, that were initially exposed in libvdpau-sunxi. 23 * Subsequent BSP implementations seem to double the neighbor info buffer size 24 * for the H6 SoC, which may be related to 10 bit H265 support. 25 */ 26 #define CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE (794 * SZ_1K) 27 #define CEDRUS_H265_ENTRY_POINTS_BUF_SIZE (4 * SZ_1K) 28 #define CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE 160 29 30 struct cedrus_h265_sram_frame_info { 31 __le32 top_pic_order_cnt; 32 __le32 bottom_pic_order_cnt; 33 __le32 top_mv_col_buf_addr; 34 __le32 bottom_mv_col_buf_addr; 35 __le32 luma_addr; 36 __le32 chroma_addr; 37 } __packed; 38 39 struct cedrus_h265_sram_pred_weight { 40 __s8 delta_weight; 41 __s8 offset; 42 } __packed; 43 44 static enum cedrus_irq_status cedrus_h265_irq_status(struct cedrus_ctx *ctx) 45 { 46 struct cedrus_dev *dev = ctx->dev; 47 u32 reg; 48 49 reg = cedrus_read(dev, VE_DEC_H265_STATUS); 50 reg &= VE_DEC_H265_STATUS_CHECK_MASK; 51 52 if (reg & VE_DEC_H265_STATUS_CHECK_ERROR || 53 !(reg & VE_DEC_H265_STATUS_SUCCESS)) 54 return CEDRUS_IRQ_ERROR; 55 56 return CEDRUS_IRQ_OK; 57 } 58 59 static void cedrus_h265_irq_clear(struct cedrus_ctx *ctx) 60 { 61 struct cedrus_dev *dev = ctx->dev; 62 63 cedrus_write(dev, VE_DEC_H265_STATUS, VE_DEC_H265_STATUS_CHECK_MASK); 64 } 65 66 static void cedrus_h265_irq_disable(struct cedrus_ctx *ctx) 67 { 68 struct cedrus_dev *dev = ctx->dev; 69 u32 reg = cedrus_read(dev, VE_DEC_H265_CTRL); 70 71 reg &= ~VE_DEC_H265_CTRL_IRQ_MASK; 72 73 cedrus_write(dev, VE_DEC_H265_CTRL, reg); 74 } 75 76 static void cedrus_h265_sram_write_offset(struct cedrus_dev *dev, u32 offset) 77 { 78 cedrus_write(dev, VE_DEC_H265_SRAM_OFFSET, offset); 79 } 80 81 static void cedrus_h265_sram_write_data(struct cedrus_dev *dev, void *data, 82 unsigned int size) 83 { 84 u32 *word = data; 85 86 while (size >= sizeof(u32)) { 87 cedrus_write(dev, VE_DEC_H265_SRAM_DATA, *word++); 88 size -= sizeof(u32); 89 } 90 } 91 92 static inline dma_addr_t 93 cedrus_h265_frame_info_mv_col_buf_addr(struct cedrus_ctx *ctx, 94 unsigned int index, unsigned int field) 95 { 96 return ctx->codec.h265.mv_col_buf_addr + index * 97 ctx->codec.h265.mv_col_buf_unit_size + 98 field * ctx->codec.h265.mv_col_buf_unit_size / 2; 99 } 100 101 static void cedrus_h265_frame_info_write_single(struct cedrus_ctx *ctx, 102 unsigned int index, 103 bool field_pic, 104 u32 pic_order_cnt[], 105 int buffer_index) 106 { 107 struct cedrus_dev *dev = ctx->dev; 108 dma_addr_t dst_luma_addr = cedrus_dst_buf_addr(ctx, buffer_index, 0); 109 dma_addr_t dst_chroma_addr = cedrus_dst_buf_addr(ctx, buffer_index, 1); 110 dma_addr_t mv_col_buf_addr[2] = { 111 cedrus_h265_frame_info_mv_col_buf_addr(ctx, buffer_index, 0), 112 cedrus_h265_frame_info_mv_col_buf_addr(ctx, buffer_index, 113 field_pic ? 1 : 0) 114 }; 115 u32 offset = VE_DEC_H265_SRAM_OFFSET_FRAME_INFO + 116 VE_DEC_H265_SRAM_OFFSET_FRAME_INFO_UNIT * index; 117 struct cedrus_h265_sram_frame_info frame_info = { 118 .top_pic_order_cnt = cpu_to_le32(pic_order_cnt[0]), 119 .bottom_pic_order_cnt = cpu_to_le32(field_pic ? 120 pic_order_cnt[1] : 121 pic_order_cnt[0]), 122 .top_mv_col_buf_addr = 123 cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[0])), 124 .bottom_mv_col_buf_addr = cpu_to_le32(field_pic ? 125 VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[1]) : 126 VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[0])), 127 .luma_addr = cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(dst_luma_addr)), 128 .chroma_addr = cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(dst_chroma_addr)), 129 }; 130 131 cedrus_h265_sram_write_offset(dev, offset); 132 cedrus_h265_sram_write_data(dev, &frame_info, sizeof(frame_info)); 133 } 134 135 static void cedrus_h265_frame_info_write_dpb(struct cedrus_ctx *ctx, 136 const struct v4l2_hevc_dpb_entry *dpb, 137 u8 num_active_dpb_entries) 138 { 139 struct vb2_queue *vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, 140 V4L2_BUF_TYPE_VIDEO_CAPTURE); 141 unsigned int i; 142 143 for (i = 0; i < num_active_dpb_entries; i++) { 144 int buffer_index = vb2_find_timestamp(vq, dpb[i].timestamp, 0); 145 u32 pic_order_cnt[2] = { 146 dpb[i].pic_order_cnt_val, 147 dpb[i].pic_order_cnt_val 148 }; 149 150 if (buffer_index < 0) 151 continue; 152 153 cedrus_h265_frame_info_write_single(ctx, i, dpb[i].field_pic, 154 pic_order_cnt, 155 buffer_index); 156 } 157 } 158 159 static void cedrus_h265_ref_pic_list_write(struct cedrus_dev *dev, 160 const struct v4l2_hevc_dpb_entry *dpb, 161 const u8 list[], 162 u8 num_ref_idx_active, 163 u32 sram_offset) 164 { 165 unsigned int i; 166 u32 word = 0; 167 168 cedrus_h265_sram_write_offset(dev, sram_offset); 169 170 for (i = 0; i < num_ref_idx_active; i++) { 171 unsigned int shift = (i % 4) * 8; 172 unsigned int index = list[i]; 173 u8 value = list[i]; 174 175 if (dpb[index].flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE) 176 value |= VE_DEC_H265_SRAM_REF_PIC_LIST_LT_REF; 177 178 /* Each SRAM word gathers up to 4 references. */ 179 word |= value << shift; 180 181 /* Write the word to SRAM and clear it for the next batch. */ 182 if ((i % 4) == 3 || i == (num_ref_idx_active - 1)) { 183 cedrus_h265_sram_write_data(dev, &word, sizeof(word)); 184 word = 0; 185 } 186 } 187 } 188 189 static void cedrus_h265_pred_weight_write(struct cedrus_dev *dev, 190 const s8 delta_luma_weight[], 191 const s8 luma_offset[], 192 const s8 delta_chroma_weight[][2], 193 const s8 chroma_offset[][2], 194 u8 num_ref_idx_active, 195 u32 sram_luma_offset, 196 u32 sram_chroma_offset) 197 { 198 struct cedrus_h265_sram_pred_weight pred_weight[2] = { { 0 } }; 199 unsigned int i, j; 200 201 cedrus_h265_sram_write_offset(dev, sram_luma_offset); 202 203 for (i = 0; i < num_ref_idx_active; i++) { 204 unsigned int index = i % 2; 205 206 pred_weight[index].delta_weight = delta_luma_weight[i]; 207 pred_weight[index].offset = luma_offset[i]; 208 209 if (index == 1 || i == (num_ref_idx_active - 1)) 210 cedrus_h265_sram_write_data(dev, (u32 *)&pred_weight, 211 sizeof(pred_weight)); 212 } 213 214 cedrus_h265_sram_write_offset(dev, sram_chroma_offset); 215 216 for (i = 0; i < num_ref_idx_active; i++) { 217 for (j = 0; j < 2; j++) { 218 pred_weight[j].delta_weight = delta_chroma_weight[i][j]; 219 pred_weight[j].offset = chroma_offset[i][j]; 220 } 221 222 cedrus_h265_sram_write_data(dev, &pred_weight, 223 sizeof(pred_weight)); 224 } 225 } 226 227 static void cedrus_h265_skip_bits(struct cedrus_dev *dev, int num) 228 { 229 int count = 0; 230 231 while (count < num) { 232 int tmp = min(num - count, 32); 233 234 cedrus_write(dev, VE_DEC_H265_TRIGGER, 235 VE_DEC_H265_TRIGGER_FLUSH_BITS | 236 VE_DEC_H265_TRIGGER_TYPE_N_BITS(tmp)); 237 while (cedrus_read(dev, VE_DEC_H265_STATUS) & VE_DEC_H265_STATUS_VLD_BUSY) 238 udelay(1); 239 240 count += tmp; 241 } 242 } 243 244 static void cedrus_h265_write_scaling_list(struct cedrus_ctx *ctx, 245 struct cedrus_run *run) 246 { 247 const struct v4l2_ctrl_hevc_scaling_matrix *scaling; 248 struct cedrus_dev *dev = ctx->dev; 249 u32 i, j, k, val; 250 251 scaling = run->h265.scaling_matrix; 252 253 cedrus_write(dev, VE_DEC_H265_SCALING_LIST_DC_COEF0, 254 (scaling->scaling_list_dc_coef_32x32[1] << 24) | 255 (scaling->scaling_list_dc_coef_32x32[0] << 16) | 256 (scaling->scaling_list_dc_coef_16x16[1] << 8) | 257 (scaling->scaling_list_dc_coef_16x16[0] << 0)); 258 259 cedrus_write(dev, VE_DEC_H265_SCALING_LIST_DC_COEF1, 260 (scaling->scaling_list_dc_coef_16x16[5] << 24) | 261 (scaling->scaling_list_dc_coef_16x16[4] << 16) | 262 (scaling->scaling_list_dc_coef_16x16[3] << 8) | 263 (scaling->scaling_list_dc_coef_16x16[2] << 0)); 264 265 cedrus_h265_sram_write_offset(dev, VE_DEC_H265_SRAM_OFFSET_SCALING_LISTS); 266 267 for (i = 0; i < 6; i++) 268 for (j = 0; j < 8; j++) 269 for (k = 0; k < 8; k += 4) { 270 val = ((u32)scaling->scaling_list_8x8[i][j + (k + 3) * 8] << 24) | 271 ((u32)scaling->scaling_list_8x8[i][j + (k + 2) * 8] << 16) | 272 ((u32)scaling->scaling_list_8x8[i][j + (k + 1) * 8] << 8) | 273 scaling->scaling_list_8x8[i][j + k * 8]; 274 cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); 275 } 276 277 for (i = 0; i < 2; i++) 278 for (j = 0; j < 8; j++) 279 for (k = 0; k < 8; k += 4) { 280 val = ((u32)scaling->scaling_list_32x32[i][j + (k + 3) * 8] << 24) | 281 ((u32)scaling->scaling_list_32x32[i][j + (k + 2) * 8] << 16) | 282 ((u32)scaling->scaling_list_32x32[i][j + (k + 1) * 8] << 8) | 283 scaling->scaling_list_32x32[i][j + k * 8]; 284 cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); 285 } 286 287 for (i = 0; i < 6; i++) 288 for (j = 0; j < 8; j++) 289 for (k = 0; k < 8; k += 4) { 290 val = ((u32)scaling->scaling_list_16x16[i][j + (k + 3) * 8] << 24) | 291 ((u32)scaling->scaling_list_16x16[i][j + (k + 2) * 8] << 16) | 292 ((u32)scaling->scaling_list_16x16[i][j + (k + 1) * 8] << 8) | 293 scaling->scaling_list_16x16[i][j + k * 8]; 294 cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); 295 } 296 297 for (i = 0; i < 6; i++) 298 for (j = 0; j < 4; j++) { 299 val = ((u32)scaling->scaling_list_4x4[i][j + 12] << 24) | 300 ((u32)scaling->scaling_list_4x4[i][j + 8] << 16) | 301 ((u32)scaling->scaling_list_4x4[i][j + 4] << 8) | 302 scaling->scaling_list_4x4[i][j]; 303 cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); 304 } 305 } 306 307 static int cedrus_h265_is_low_delay(struct cedrus_run *run) 308 { 309 const struct v4l2_ctrl_hevc_slice_params *slice_params; 310 const struct v4l2_hevc_dpb_entry *dpb; 311 s32 poc; 312 int i; 313 314 slice_params = run->h265.slice_params; 315 poc = run->h265.decode_params->pic_order_cnt_val; 316 dpb = run->h265.decode_params->dpb; 317 318 for (i = 0; i < slice_params->num_ref_idx_l0_active_minus1 + 1; i++) 319 if (dpb[slice_params->ref_idx_l0[i]].pic_order_cnt_val > poc) 320 return 1; 321 322 if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_B) 323 return 0; 324 325 for (i = 0; i < slice_params->num_ref_idx_l1_active_minus1 + 1; i++) 326 if (dpb[slice_params->ref_idx_l1[i]].pic_order_cnt_val > poc) 327 return 1; 328 329 return 0; 330 } 331 332 static void cedrus_h265_write_tiles(struct cedrus_ctx *ctx, 333 struct cedrus_run *run, 334 unsigned int ctb_addr_x, 335 unsigned int ctb_addr_y) 336 { 337 const struct v4l2_ctrl_hevc_slice_params *slice_params; 338 const struct v4l2_ctrl_hevc_pps *pps; 339 struct cedrus_dev *dev = ctx->dev; 340 const u32 *entry_points; 341 u32 *entry_points_buf; 342 int i, x, tx, y, ty; 343 344 pps = run->h265.pps; 345 slice_params = run->h265.slice_params; 346 entry_points = run->h265.entry_points; 347 entry_points_buf = ctx->codec.h265.entry_points_buf; 348 349 for (x = 0, tx = 0; tx < pps->num_tile_columns_minus1 + 1; tx++) { 350 if (x + pps->column_width_minus1[tx] + 1 > ctb_addr_x) 351 break; 352 353 x += pps->column_width_minus1[tx] + 1; 354 } 355 356 for (y = 0, ty = 0; ty < pps->num_tile_rows_minus1 + 1; ty++) { 357 if (y + pps->row_height_minus1[ty] + 1 > ctb_addr_y) 358 break; 359 360 y += pps->row_height_minus1[ty] + 1; 361 } 362 363 cedrus_write(dev, VE_DEC_H265_TILE_START_CTB, (y << 16) | (x << 0)); 364 cedrus_write(dev, VE_DEC_H265_TILE_END_CTB, 365 ((y + pps->row_height_minus1[ty]) << 16) | 366 ((x + pps->column_width_minus1[tx]) << 0)); 367 368 if (pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED) { 369 for (i = 0; i < slice_params->num_entry_point_offsets; i++) 370 entry_points_buf[i] = entry_points[i]; 371 } else { 372 for (i = 0; i < slice_params->num_entry_point_offsets; i++) { 373 if (tx + 1 >= pps->num_tile_columns_minus1 + 1) { 374 x = 0; 375 tx = 0; 376 y += pps->row_height_minus1[ty++] + 1; 377 } else { 378 x += pps->column_width_minus1[tx++] + 1; 379 } 380 381 entry_points_buf[i * 4 + 0] = entry_points[i]; 382 entry_points_buf[i * 4 + 1] = 0x0; 383 entry_points_buf[i * 4 + 2] = (y << 16) | (x << 0); 384 entry_points_buf[i * 4 + 3] = 385 ((y + pps->row_height_minus1[ty]) << 16) | 386 ((x + pps->column_width_minus1[tx]) << 0); 387 } 388 } 389 } 390 391 static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) 392 { 393 struct cedrus_dev *dev = ctx->dev; 394 const struct v4l2_ctrl_hevc_sps *sps; 395 const struct v4l2_ctrl_hevc_pps *pps; 396 const struct v4l2_ctrl_hevc_slice_params *slice_params; 397 const struct v4l2_ctrl_hevc_decode_params *decode_params; 398 const struct v4l2_hevc_pred_weight_table *pred_weight_table; 399 unsigned int width_in_ctb_luma, ctb_size_luma; 400 unsigned int log2_max_luma_coding_block_size; 401 unsigned int ctb_addr_x, ctb_addr_y; 402 dma_addr_t src_buf_addr; 403 dma_addr_t src_buf_end_addr; 404 u32 chroma_log2_weight_denom; 405 u32 num_entry_point_offsets; 406 u32 output_pic_list_index; 407 u32 pic_order_cnt[2]; 408 u8 *padding; 409 int count; 410 u32 reg; 411 412 sps = run->h265.sps; 413 pps = run->h265.pps; 414 slice_params = run->h265.slice_params; 415 decode_params = run->h265.decode_params; 416 pred_weight_table = &slice_params->pred_weight_table; 417 num_entry_point_offsets = slice_params->num_entry_point_offsets; 418 419 /* 420 * If entry points offsets are present, we should get them 421 * exactly the right amount. 422 */ 423 if (num_entry_point_offsets && 424 num_entry_point_offsets != run->h265.entry_points_count) 425 return -ERANGE; 426 427 log2_max_luma_coding_block_size = 428 sps->log2_min_luma_coding_block_size_minus3 + 3 + 429 sps->log2_diff_max_min_luma_coding_block_size; 430 ctb_size_luma = 1UL << log2_max_luma_coding_block_size; 431 width_in_ctb_luma = 432 DIV_ROUND_UP(sps->pic_width_in_luma_samples, ctb_size_luma); 433 434 /* MV column buffer size and allocation. */ 435 if (!ctx->codec.h265.mv_col_buf_size) { 436 unsigned int num_buffers = 437 run->dst->vb2_buf.vb2_queue->num_buffers; 438 439 /* 440 * Each CTB requires a MV col buffer with a specific unit size. 441 * Since the address is given with missing lsb bits, 1 KiB is 442 * added to each buffer to ensure proper alignment. 443 */ 444 ctx->codec.h265.mv_col_buf_unit_size = 445 DIV_ROUND_UP(ctx->src_fmt.width, ctb_size_luma) * 446 DIV_ROUND_UP(ctx->src_fmt.height, ctb_size_luma) * 447 CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE + SZ_1K; 448 449 ctx->codec.h265.mv_col_buf_size = num_buffers * 450 ctx->codec.h265.mv_col_buf_unit_size; 451 452 /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ 453 ctx->codec.h265.mv_col_buf = 454 dma_alloc_attrs(dev->dev, 455 ctx->codec.h265.mv_col_buf_size, 456 &ctx->codec.h265.mv_col_buf_addr, 457 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 458 if (!ctx->codec.h265.mv_col_buf) { 459 ctx->codec.h265.mv_col_buf_size = 0; 460 return -ENOMEM; 461 } 462 } 463 464 /* Activate H265 engine. */ 465 cedrus_engine_enable(ctx, CEDRUS_CODEC_H265); 466 467 /* Source offset and length in bits. */ 468 469 cedrus_write(dev, VE_DEC_H265_BITS_OFFSET, 0); 470 471 reg = slice_params->bit_size; 472 cedrus_write(dev, VE_DEC_H265_BITS_LEN, reg); 473 474 /* Source beginning and end addresses. */ 475 476 src_buf_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0); 477 478 reg = VE_DEC_H265_BITS_ADDR_BASE(src_buf_addr); 479 reg |= VE_DEC_H265_BITS_ADDR_VALID_SLICE_DATA; 480 reg |= VE_DEC_H265_BITS_ADDR_LAST_SLICE_DATA; 481 reg |= VE_DEC_H265_BITS_ADDR_FIRST_SLICE_DATA; 482 483 cedrus_write(dev, VE_DEC_H265_BITS_ADDR, reg); 484 485 src_buf_end_addr = src_buf_addr + 486 DIV_ROUND_UP(slice_params->bit_size, 8); 487 488 reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_end_addr); 489 cedrus_write(dev, VE_DEC_H265_BITS_END_ADDR, reg); 490 491 /* Coding tree block address */ 492 ctb_addr_x = slice_params->slice_segment_addr % width_in_ctb_luma; 493 ctb_addr_y = slice_params->slice_segment_addr / width_in_ctb_luma; 494 reg = VE_DEC_H265_DEC_CTB_ADDR_X(ctb_addr_x); 495 reg |= VE_DEC_H265_DEC_CTB_ADDR_Y(ctb_addr_y); 496 cedrus_write(dev, VE_DEC_H265_DEC_CTB_ADDR, reg); 497 498 if ((pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED) || 499 (pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)) { 500 cedrus_h265_write_tiles(ctx, run, ctb_addr_x, ctb_addr_y); 501 } else { 502 cedrus_write(dev, VE_DEC_H265_TILE_START_CTB, 0); 503 cedrus_write(dev, VE_DEC_H265_TILE_END_CTB, 0); 504 } 505 506 /* Clear the number of correctly-decoded coding tree blocks. */ 507 if (ctx->fh.m2m_ctx->new_frame) 508 cedrus_write(dev, VE_DEC_H265_DEC_CTB_NUM, 0); 509 510 /* Initialize bitstream access. */ 511 cedrus_write(dev, VE_DEC_H265_TRIGGER, VE_DEC_H265_TRIGGER_INIT_SWDEC); 512 513 /* 514 * Cedrus expects that bitstream pointer is actually at the end of the slice header 515 * instead of start of slice data. Padding is 8 bits at most (one bit set to 1 and 516 * at most seven bits set to 0), so we have to inspect only one byte before slice data. 517 */ 518 519 if (slice_params->data_byte_offset == 0) 520 return -EOPNOTSUPP; 521 522 padding = (u8 *)vb2_plane_vaddr(&run->src->vb2_buf, 0) + 523 slice_params->data_byte_offset - 1; 524 525 /* at least one bit must be set in that byte */ 526 if (*padding == 0) 527 return -EINVAL; 528 529 for (count = 0; count < 8; count++) 530 if (*padding & (1 << count)) 531 break; 532 533 /* Include the one bit. */ 534 count++; 535 536 cedrus_h265_skip_bits(dev, slice_params->data_byte_offset * 8 - count); 537 538 /* Bitstream parameters. */ 539 540 reg = VE_DEC_H265_DEC_NAL_HDR_NAL_UNIT_TYPE(slice_params->nal_unit_type) | 541 VE_DEC_H265_DEC_NAL_HDR_NUH_TEMPORAL_ID_PLUS1(slice_params->nuh_temporal_id_plus1); 542 543 cedrus_write(dev, VE_DEC_H265_DEC_NAL_HDR, reg); 544 545 /* SPS. */ 546 547 reg = VE_DEC_H265_DEC_SPS_HDR_MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA(sps->max_transform_hierarchy_depth_intra) | 548 VE_DEC_H265_DEC_SPS_HDR_MAX_TRANSFORM_HIERARCHY_DEPTH_INTER(sps->max_transform_hierarchy_depth_inter) | 549 VE_DEC_H265_DEC_SPS_HDR_LOG2_DIFF_MAX_MIN_TRANSFORM_BLOCK_SIZE(sps->log2_diff_max_min_luma_transform_block_size) | 550 VE_DEC_H265_DEC_SPS_HDR_LOG2_MIN_TRANSFORM_BLOCK_SIZE_MINUS2(sps->log2_min_luma_transform_block_size_minus2) | 551 VE_DEC_H265_DEC_SPS_HDR_LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE(sps->log2_diff_max_min_luma_coding_block_size) | 552 VE_DEC_H265_DEC_SPS_HDR_LOG2_MIN_LUMA_CODING_BLOCK_SIZE_MINUS3(sps->log2_min_luma_coding_block_size_minus3) | 553 VE_DEC_H265_DEC_SPS_HDR_BIT_DEPTH_CHROMA_MINUS8(sps->bit_depth_chroma_minus8) | 554 VE_DEC_H265_DEC_SPS_HDR_BIT_DEPTH_LUMA_MINUS8(sps->bit_depth_luma_minus8) | 555 VE_DEC_H265_DEC_SPS_HDR_CHROMA_FORMAT_IDC(sps->chroma_format_idc); 556 557 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_STRONG_INTRA_SMOOTHING_ENABLE, 558 V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED, 559 sps->flags); 560 561 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SPS_TEMPORAL_MVP_ENABLED, 562 V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED, 563 sps->flags); 564 565 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SAMPLE_ADAPTIVE_OFFSET_ENABLED, 566 V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET, 567 sps->flags); 568 569 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_AMP_ENABLED, 570 V4L2_HEVC_SPS_FLAG_AMP_ENABLED, sps->flags); 571 572 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SEPARATE_COLOUR_PLANE, 573 V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE, 574 sps->flags); 575 576 cedrus_write(dev, VE_DEC_H265_DEC_SPS_HDR, reg); 577 578 reg = VE_DEC_H265_DEC_PCM_CTRL_LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE(sps->log2_diff_max_min_pcm_luma_coding_block_size) | 579 VE_DEC_H265_DEC_PCM_CTRL_LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE_MINUS3(sps->log2_min_pcm_luma_coding_block_size_minus3) | 580 VE_DEC_H265_DEC_PCM_CTRL_PCM_SAMPLE_BIT_DEPTH_CHROMA_MINUS1(sps->pcm_sample_bit_depth_chroma_minus1) | 581 VE_DEC_H265_DEC_PCM_CTRL_PCM_SAMPLE_BIT_DEPTH_LUMA_MINUS1(sps->pcm_sample_bit_depth_luma_minus1); 582 583 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PCM_CTRL_FLAG_PCM_ENABLED, 584 V4L2_HEVC_SPS_FLAG_PCM_ENABLED, sps->flags); 585 586 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PCM_CTRL_FLAG_PCM_LOOP_FILTER_DISABLED, 587 V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED, 588 sps->flags); 589 590 cedrus_write(dev, VE_DEC_H265_DEC_PCM_CTRL, reg); 591 592 /* PPS. */ 593 594 reg = VE_DEC_H265_DEC_PPS_CTRL0_PPS_CR_QP_OFFSET(pps->pps_cr_qp_offset) | 595 VE_DEC_H265_DEC_PPS_CTRL0_PPS_CB_QP_OFFSET(pps->pps_cb_qp_offset) | 596 VE_DEC_H265_DEC_PPS_CTRL0_INIT_QP_MINUS26(pps->init_qp_minus26) | 597 VE_DEC_H265_DEC_PPS_CTRL0_DIFF_CU_QP_DELTA_DEPTH(pps->diff_cu_qp_delta_depth); 598 599 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_CU_QP_DELTA_ENABLED, 600 V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED, 601 pps->flags); 602 603 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_TRANSFORM_SKIP_ENABLED, 604 V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED, 605 pps->flags); 606 607 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_CONSTRAINED_INTRA_PRED, 608 V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED, 609 pps->flags); 610 611 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_SIGN_DATA_HIDING_ENABLED, 612 V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED, 613 pps->flags); 614 615 cedrus_write(dev, VE_DEC_H265_DEC_PPS_CTRL0, reg); 616 617 reg = VE_DEC_H265_DEC_PPS_CTRL1_LOG2_PARALLEL_MERGE_LEVEL_MINUS2(pps->log2_parallel_merge_level_minus2); 618 619 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED, 620 V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED, 621 pps->flags); 622 623 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED, 624 V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED, 625 pps->flags); 626 627 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_ENTROPY_CODING_SYNC_ENABLED, 628 V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED, 629 pps->flags); 630 631 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_TILES_ENABLED, 632 V4L2_HEVC_PPS_FLAG_TILES_ENABLED, 633 pps->flags); 634 635 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_TRANSQUANT_BYPASS_ENABLED, 636 V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED, 637 pps->flags); 638 639 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_WEIGHTED_BIPRED, 640 V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED, pps->flags); 641 642 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_WEIGHTED_PRED, 643 V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED, pps->flags); 644 645 cedrus_write(dev, VE_DEC_H265_DEC_PPS_CTRL1, reg); 646 647 /* Slice Parameters. */ 648 649 reg = VE_DEC_H265_DEC_SLICE_HDR_INFO0_PICTURE_TYPE(slice_params->pic_struct) | 650 VE_DEC_H265_DEC_SLICE_HDR_INFO0_FIVE_MINUS_MAX_NUM_MERGE_CAND(slice_params->five_minus_max_num_merge_cand) | 651 VE_DEC_H265_DEC_SLICE_HDR_INFO0_NUM_REF_IDX_L1_ACTIVE_MINUS1(slice_params->num_ref_idx_l1_active_minus1) | 652 VE_DEC_H265_DEC_SLICE_HDR_INFO0_NUM_REF_IDX_L0_ACTIVE_MINUS1(slice_params->num_ref_idx_l0_active_minus1) | 653 VE_DEC_H265_DEC_SLICE_HDR_INFO0_COLLOCATED_REF_IDX(slice_params->collocated_ref_idx) | 654 VE_DEC_H265_DEC_SLICE_HDR_INFO0_COLOUR_PLANE_ID(slice_params->colour_plane_id) | 655 VE_DEC_H265_DEC_SLICE_HDR_INFO0_SLICE_TYPE(slice_params->slice_type); 656 657 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_COLLOCATED_FROM_L0, 658 V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0, 659 slice_params->flags); 660 661 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_CABAC_INIT, 662 V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT, 663 slice_params->flags); 664 665 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_MVD_L1_ZERO, 666 V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO, 667 slice_params->flags); 668 669 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_SAO_CHROMA, 670 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA, 671 slice_params->flags); 672 673 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_SAO_LUMA, 674 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA, 675 slice_params->flags); 676 677 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_TEMPORAL_MVP_ENABLE, 678 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED, 679 slice_params->flags); 680 681 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_DEPENDENT_SLICE_SEGMENT, 682 V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT, 683 slice_params->flags); 684 685 if (ctx->fh.m2m_ctx->new_frame) 686 reg |= VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_FIRST_SLICE_SEGMENT_IN_PIC; 687 688 cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO0, reg); 689 690 reg = VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_TC_OFFSET_DIV2(slice_params->slice_tc_offset_div2) | 691 VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_BETA_OFFSET_DIV2(slice_params->slice_beta_offset_div2) | 692 VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CR_QP_OFFSET(slice_params->slice_cr_qp_offset) | 693 VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CB_QP_OFFSET(slice_params->slice_cb_qp_offset) | 694 VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_QP_DELTA(slice_params->slice_qp_delta); 695 696 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED, 697 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED, 698 slice_params->flags); 699 700 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED, 701 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED, 702 slice_params->flags); 703 704 if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_I && !cedrus_h265_is_low_delay(run)) 705 reg |= VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_NOT_LOW_DELAY; 706 707 cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO1, reg); 708 709 chroma_log2_weight_denom = pred_weight_table->luma_log2_weight_denom + 710 pred_weight_table->delta_chroma_log2_weight_denom; 711 reg = VE_DEC_H265_DEC_SLICE_HDR_INFO2_NUM_ENTRY_POINT_OFFSETS(num_entry_point_offsets) | 712 VE_DEC_H265_DEC_SLICE_HDR_INFO2_CHROMA_LOG2_WEIGHT_DENOM(chroma_log2_weight_denom) | 713 VE_DEC_H265_DEC_SLICE_HDR_INFO2_LUMA_LOG2_WEIGHT_DENOM(pred_weight_table->luma_log2_weight_denom); 714 715 cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO2, reg); 716 717 cedrus_write(dev, VE_DEC_H265_ENTRY_POINT_OFFSET_ADDR, 718 ctx->codec.h265.entry_points_buf_addr >> 8); 719 720 /* Decoded picture size. */ 721 722 reg = VE_DEC_H265_DEC_PIC_SIZE_WIDTH(ctx->src_fmt.width) | 723 VE_DEC_H265_DEC_PIC_SIZE_HEIGHT(ctx->src_fmt.height); 724 725 cedrus_write(dev, VE_DEC_H265_DEC_PIC_SIZE, reg); 726 727 /* Scaling list. */ 728 729 if (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) { 730 cedrus_h265_write_scaling_list(ctx, run); 731 reg = VE_DEC_H265_SCALING_LIST_CTRL0_FLAG_ENABLED; 732 } else { 733 reg = VE_DEC_H265_SCALING_LIST_CTRL0_DEFAULT; 734 } 735 cedrus_write(dev, VE_DEC_H265_SCALING_LIST_CTRL0, reg); 736 737 /* Neightbor information address. */ 738 reg = VE_DEC_H265_NEIGHBOR_INFO_ADDR_BASE(ctx->codec.h265.neighbor_info_buf_addr); 739 cedrus_write(dev, VE_DEC_H265_NEIGHBOR_INFO_ADDR, reg); 740 741 /* Write decoded picture buffer in pic list. */ 742 cedrus_h265_frame_info_write_dpb(ctx, decode_params->dpb, 743 decode_params->num_active_dpb_entries); 744 745 /* Output frame. */ 746 747 output_pic_list_index = V4L2_HEVC_DPB_ENTRIES_NUM_MAX; 748 pic_order_cnt[0] = slice_params->slice_pic_order_cnt; 749 pic_order_cnt[1] = slice_params->slice_pic_order_cnt; 750 751 cedrus_h265_frame_info_write_single(ctx, output_pic_list_index, 752 slice_params->pic_struct != 0, 753 pic_order_cnt, 754 run->dst->vb2_buf.index); 755 756 cedrus_write(dev, VE_DEC_H265_OUTPUT_FRAME_IDX, output_pic_list_index); 757 758 /* Reference picture list 0 (for P/B frames). */ 759 if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_I) { 760 cedrus_h265_ref_pic_list_write(dev, decode_params->dpb, 761 slice_params->ref_idx_l0, 762 slice_params->num_ref_idx_l0_active_minus1 + 1, 763 VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST0); 764 765 if ((pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED) || 766 (pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED)) 767 cedrus_h265_pred_weight_write(dev, 768 pred_weight_table->delta_luma_weight_l0, 769 pred_weight_table->luma_offset_l0, 770 pred_weight_table->delta_chroma_weight_l0, 771 pred_weight_table->chroma_offset_l0, 772 slice_params->num_ref_idx_l0_active_minus1 + 1, 773 VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_LUMA_L0, 774 VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_CHROMA_L0); 775 } 776 777 /* Reference picture list 1 (for B frames). */ 778 if (slice_params->slice_type == V4L2_HEVC_SLICE_TYPE_B) { 779 cedrus_h265_ref_pic_list_write(dev, decode_params->dpb, 780 slice_params->ref_idx_l1, 781 slice_params->num_ref_idx_l1_active_minus1 + 1, 782 VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST1); 783 784 if (pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED) 785 cedrus_h265_pred_weight_write(dev, 786 pred_weight_table->delta_luma_weight_l1, 787 pred_weight_table->luma_offset_l1, 788 pred_weight_table->delta_chroma_weight_l1, 789 pred_weight_table->chroma_offset_l1, 790 slice_params->num_ref_idx_l1_active_minus1 + 1, 791 VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_LUMA_L1, 792 VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_CHROMA_L1); 793 } 794 795 /* Enable appropriate interruptions. */ 796 cedrus_write(dev, VE_DEC_H265_CTRL, VE_DEC_H265_CTRL_IRQ_MASK); 797 798 return 0; 799 } 800 801 static int cedrus_h265_start(struct cedrus_ctx *ctx) 802 { 803 struct cedrus_dev *dev = ctx->dev; 804 805 /* The buffer size is calculated at setup time. */ 806 ctx->codec.h265.mv_col_buf_size = 0; 807 808 /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ 809 ctx->codec.h265.neighbor_info_buf = 810 dma_alloc_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, 811 &ctx->codec.h265.neighbor_info_buf_addr, 812 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 813 if (!ctx->codec.h265.neighbor_info_buf) 814 return -ENOMEM; 815 816 ctx->codec.h265.entry_points_buf = 817 dma_alloc_coherent(dev->dev, CEDRUS_H265_ENTRY_POINTS_BUF_SIZE, 818 &ctx->codec.h265.entry_points_buf_addr, 819 GFP_KERNEL); 820 if (!ctx->codec.h265.entry_points_buf) { 821 dma_free_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, 822 ctx->codec.h265.neighbor_info_buf, 823 ctx->codec.h265.neighbor_info_buf_addr, 824 DMA_ATTR_NO_KERNEL_MAPPING); 825 return -ENOMEM; 826 } 827 828 return 0; 829 } 830 831 static void cedrus_h265_stop(struct cedrus_ctx *ctx) 832 { 833 struct cedrus_dev *dev = ctx->dev; 834 835 if (ctx->codec.h265.mv_col_buf_size > 0) { 836 dma_free_attrs(dev->dev, ctx->codec.h265.mv_col_buf_size, 837 ctx->codec.h265.mv_col_buf, 838 ctx->codec.h265.mv_col_buf_addr, 839 DMA_ATTR_NO_KERNEL_MAPPING); 840 841 ctx->codec.h265.mv_col_buf_size = 0; 842 } 843 844 dma_free_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, 845 ctx->codec.h265.neighbor_info_buf, 846 ctx->codec.h265.neighbor_info_buf_addr, 847 DMA_ATTR_NO_KERNEL_MAPPING); 848 dma_free_coherent(dev->dev, CEDRUS_H265_ENTRY_POINTS_BUF_SIZE, 849 ctx->codec.h265.entry_points_buf, 850 ctx->codec.h265.entry_points_buf_addr); 851 } 852 853 static void cedrus_h265_trigger(struct cedrus_ctx *ctx) 854 { 855 struct cedrus_dev *dev = ctx->dev; 856 857 cedrus_write(dev, VE_DEC_H265_TRIGGER, VE_DEC_H265_TRIGGER_DEC_SLICE); 858 } 859 860 struct cedrus_dec_ops cedrus_dec_ops_h265 = { 861 .irq_clear = cedrus_h265_irq_clear, 862 .irq_disable = cedrus_h265_irq_disable, 863 .irq_status = cedrus_h265_irq_status, 864 .setup = cedrus_h265_setup, 865 .start = cedrus_h265_start, 866 .stop = cedrus_h265_stop, 867 .trigger = cedrus_h265_trigger, 868 }; 869