1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Cedrus VPU driver 4 * 5 * Copyright (C) 2013 Jens Kuske <jenskuske@gmail.com> 6 * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> 7 * Copyright (C) 2018 Bootlin 8 */ 9 10 #include <linux/delay.h> 11 #include <linux/types.h> 12 13 #include <media/videobuf2-dma-contig.h> 14 15 #include "cedrus.h" 16 #include "cedrus_hw.h" 17 #include "cedrus_regs.h" 18 19 /* 20 * These are the sizes for side buffers required by the hardware for storing 21 * internal decoding metadata. They match the values used by the early BSP 22 * implementations, that were initially exposed in libvdpau-sunxi. 23 * Subsequent BSP implementations seem to double the neighbor info buffer size 24 * for the H6 SoC, which may be related to 10 bit H265 support. 25 */ 26 #define CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE (794 * SZ_1K) 27 #define CEDRUS_H265_ENTRY_POINTS_BUF_SIZE (4 * SZ_1K) 28 #define CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE 160 29 30 struct cedrus_h265_sram_frame_info { 31 __le32 top_pic_order_cnt; 32 __le32 bottom_pic_order_cnt; 33 __le32 top_mv_col_buf_addr; 34 __le32 bottom_mv_col_buf_addr; 35 __le32 luma_addr; 36 __le32 chroma_addr; 37 } __packed; 38 39 struct cedrus_h265_sram_pred_weight { 40 __s8 delta_weight; 41 __s8 offset; 42 } __packed; 43 44 static enum cedrus_irq_status cedrus_h265_irq_status(struct cedrus_ctx *ctx) 45 { 46 struct cedrus_dev *dev = ctx->dev; 47 u32 reg; 48 49 reg = cedrus_read(dev, VE_DEC_H265_STATUS); 50 reg &= VE_DEC_H265_STATUS_CHECK_MASK; 51 52 if (reg & VE_DEC_H265_STATUS_CHECK_ERROR || 53 !(reg & VE_DEC_H265_STATUS_SUCCESS)) 54 return CEDRUS_IRQ_ERROR; 55 56 return CEDRUS_IRQ_OK; 57 } 58 59 static void cedrus_h265_irq_clear(struct cedrus_ctx *ctx) 60 { 61 struct cedrus_dev *dev = ctx->dev; 62 63 cedrus_write(dev, VE_DEC_H265_STATUS, VE_DEC_H265_STATUS_CHECK_MASK); 64 } 65 66 static void cedrus_h265_irq_disable(struct cedrus_ctx *ctx) 67 { 68 struct cedrus_dev *dev = ctx->dev; 69 u32 reg = cedrus_read(dev, VE_DEC_H265_CTRL); 70 71 reg &= ~VE_DEC_H265_CTRL_IRQ_MASK; 72 73 cedrus_write(dev, VE_DEC_H265_CTRL, reg); 74 } 75 76 static void cedrus_h265_sram_write_offset(struct cedrus_dev *dev, u32 offset) 77 { 78 cedrus_write(dev, VE_DEC_H265_SRAM_OFFSET, offset); 79 } 80 81 static void cedrus_h265_sram_write_data(struct cedrus_dev *dev, void *data, 82 unsigned int size) 83 { 84 u32 *word = data; 85 86 while (size >= sizeof(u32)) { 87 cedrus_write(dev, VE_DEC_H265_SRAM_DATA, *word++); 88 size -= sizeof(u32); 89 } 90 } 91 92 static inline dma_addr_t 93 cedrus_h265_frame_info_mv_col_buf_addr(struct cedrus_ctx *ctx, 94 unsigned int index, unsigned int field) 95 { 96 return ctx->codec.h265.mv_col_buf_addr + index * 97 ctx->codec.h265.mv_col_buf_unit_size + 98 field * ctx->codec.h265.mv_col_buf_unit_size / 2; 99 } 100 101 static void cedrus_h265_frame_info_write_single(struct cedrus_ctx *ctx, 102 unsigned int index, 103 bool field_pic, 104 u32 pic_order_cnt[], 105 struct vb2_buffer *buf) 106 { 107 struct cedrus_dev *dev = ctx->dev; 108 dma_addr_t dst_luma_addr = cedrus_dst_buf_addr(ctx, buf, 0); 109 dma_addr_t dst_chroma_addr = cedrus_dst_buf_addr(ctx, buf, 1); 110 dma_addr_t mv_col_buf_addr[2] = { 111 cedrus_h265_frame_info_mv_col_buf_addr(ctx, buf->index, 0), 112 cedrus_h265_frame_info_mv_col_buf_addr(ctx, buf->index, 113 field_pic ? 1 : 0) 114 }; 115 u32 offset = VE_DEC_H265_SRAM_OFFSET_FRAME_INFO + 116 VE_DEC_H265_SRAM_OFFSET_FRAME_INFO_UNIT * index; 117 struct cedrus_h265_sram_frame_info frame_info = { 118 .top_pic_order_cnt = cpu_to_le32(pic_order_cnt[0]), 119 .bottom_pic_order_cnt = cpu_to_le32(field_pic ? 120 pic_order_cnt[1] : 121 pic_order_cnt[0]), 122 .top_mv_col_buf_addr = 123 cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[0])), 124 .bottom_mv_col_buf_addr = cpu_to_le32(field_pic ? 125 VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[1]) : 126 VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[0])), 127 .luma_addr = cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(dst_luma_addr)), 128 .chroma_addr = cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(dst_chroma_addr)), 129 }; 130 131 cedrus_h265_sram_write_offset(dev, offset); 132 cedrus_h265_sram_write_data(dev, &frame_info, sizeof(frame_info)); 133 } 134 135 static void cedrus_h265_frame_info_write_dpb(struct cedrus_ctx *ctx, 136 const struct v4l2_hevc_dpb_entry *dpb, 137 u8 num_active_dpb_entries) 138 { 139 struct vb2_queue *vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, 140 V4L2_BUF_TYPE_VIDEO_CAPTURE); 141 unsigned int i; 142 143 for (i = 0; i < num_active_dpb_entries; i++) { 144 struct vb2_buffer *buf = vb2_find_buffer(vq, dpb[i].timestamp); 145 u32 pic_order_cnt[2] = { 146 dpb[i].pic_order_cnt_val, 147 dpb[i].pic_order_cnt_val 148 }; 149 150 if (!buf) 151 continue; 152 153 cedrus_h265_frame_info_write_single(ctx, i, dpb[i].field_pic, 154 pic_order_cnt, 155 buf); 156 } 157 } 158 159 static void cedrus_h265_ref_pic_list_write(struct cedrus_dev *dev, 160 const struct v4l2_hevc_dpb_entry *dpb, 161 const u8 list[], 162 u8 num_ref_idx_active, 163 u32 sram_offset) 164 { 165 unsigned int i; 166 u32 word = 0; 167 168 cedrus_h265_sram_write_offset(dev, sram_offset); 169 170 for (i = 0; i < num_ref_idx_active; i++) { 171 unsigned int shift = (i % 4) * 8; 172 unsigned int index = list[i]; 173 u8 value = list[i]; 174 175 if (dpb[index].flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE) 176 value |= VE_DEC_H265_SRAM_REF_PIC_LIST_LT_REF; 177 178 /* Each SRAM word gathers up to 4 references. */ 179 word |= value << shift; 180 181 /* Write the word to SRAM and clear it for the next batch. */ 182 if ((i % 4) == 3 || i == (num_ref_idx_active - 1)) { 183 cedrus_h265_sram_write_data(dev, &word, sizeof(word)); 184 word = 0; 185 } 186 } 187 } 188 189 static void cedrus_h265_pred_weight_write(struct cedrus_dev *dev, 190 const s8 delta_luma_weight[], 191 const s8 luma_offset[], 192 const s8 delta_chroma_weight[][2], 193 const s8 chroma_offset[][2], 194 u8 num_ref_idx_active, 195 u32 sram_luma_offset, 196 u32 sram_chroma_offset) 197 { 198 struct cedrus_h265_sram_pred_weight pred_weight[2] = { { 0 } }; 199 unsigned int i, j; 200 201 cedrus_h265_sram_write_offset(dev, sram_luma_offset); 202 203 for (i = 0; i < num_ref_idx_active; i++) { 204 unsigned int index = i % 2; 205 206 pred_weight[index].delta_weight = delta_luma_weight[i]; 207 pred_weight[index].offset = luma_offset[i]; 208 209 if (index == 1 || i == (num_ref_idx_active - 1)) 210 cedrus_h265_sram_write_data(dev, (u32 *)&pred_weight, 211 sizeof(pred_weight)); 212 } 213 214 cedrus_h265_sram_write_offset(dev, sram_chroma_offset); 215 216 for (i = 0; i < num_ref_idx_active; i++) { 217 for (j = 0; j < 2; j++) { 218 pred_weight[j].delta_weight = delta_chroma_weight[i][j]; 219 pred_weight[j].offset = chroma_offset[i][j]; 220 } 221 222 cedrus_h265_sram_write_data(dev, &pred_weight, 223 sizeof(pred_weight)); 224 } 225 } 226 227 static void cedrus_h265_skip_bits(struct cedrus_dev *dev, int num) 228 { 229 int count = 0; 230 231 while (count < num) { 232 int tmp = min(num - count, 32); 233 234 cedrus_write(dev, VE_DEC_H265_TRIGGER, 235 VE_DEC_H265_TRIGGER_FLUSH_BITS | 236 VE_DEC_H265_TRIGGER_TYPE_N_BITS(tmp)); 237 238 if (cedrus_wait_for(dev, VE_DEC_H265_STATUS, VE_DEC_H265_STATUS_VLD_BUSY)) 239 dev_err_ratelimited(dev->dev, "timed out waiting to skip bits\n"); 240 241 count += tmp; 242 } 243 } 244 245 static void cedrus_h265_write_scaling_list(struct cedrus_ctx *ctx, 246 struct cedrus_run *run) 247 { 248 const struct v4l2_ctrl_hevc_scaling_matrix *scaling; 249 struct cedrus_dev *dev = ctx->dev; 250 u32 i, j, k, val; 251 252 scaling = run->h265.scaling_matrix; 253 254 cedrus_write(dev, VE_DEC_H265_SCALING_LIST_DC_COEF0, 255 (scaling->scaling_list_dc_coef_32x32[1] << 24) | 256 (scaling->scaling_list_dc_coef_32x32[0] << 16) | 257 (scaling->scaling_list_dc_coef_16x16[1] << 8) | 258 (scaling->scaling_list_dc_coef_16x16[0] << 0)); 259 260 cedrus_write(dev, VE_DEC_H265_SCALING_LIST_DC_COEF1, 261 (scaling->scaling_list_dc_coef_16x16[5] << 24) | 262 (scaling->scaling_list_dc_coef_16x16[4] << 16) | 263 (scaling->scaling_list_dc_coef_16x16[3] << 8) | 264 (scaling->scaling_list_dc_coef_16x16[2] << 0)); 265 266 cedrus_h265_sram_write_offset(dev, VE_DEC_H265_SRAM_OFFSET_SCALING_LISTS); 267 268 for (i = 0; i < 6; i++) 269 for (j = 0; j < 8; j++) 270 for (k = 0; k < 8; k += 4) { 271 val = ((u32)scaling->scaling_list_8x8[i][j + (k + 3) * 8] << 24) | 272 ((u32)scaling->scaling_list_8x8[i][j + (k + 2) * 8] << 16) | 273 ((u32)scaling->scaling_list_8x8[i][j + (k + 1) * 8] << 8) | 274 scaling->scaling_list_8x8[i][j + k * 8]; 275 cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); 276 } 277 278 for (i = 0; i < 2; i++) 279 for (j = 0; j < 8; j++) 280 for (k = 0; k < 8; k += 4) { 281 val = ((u32)scaling->scaling_list_32x32[i][j + (k + 3) * 8] << 24) | 282 ((u32)scaling->scaling_list_32x32[i][j + (k + 2) * 8] << 16) | 283 ((u32)scaling->scaling_list_32x32[i][j + (k + 1) * 8] << 8) | 284 scaling->scaling_list_32x32[i][j + k * 8]; 285 cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); 286 } 287 288 for (i = 0; i < 6; i++) 289 for (j = 0; j < 8; j++) 290 for (k = 0; k < 8; k += 4) { 291 val = ((u32)scaling->scaling_list_16x16[i][j + (k + 3) * 8] << 24) | 292 ((u32)scaling->scaling_list_16x16[i][j + (k + 2) * 8] << 16) | 293 ((u32)scaling->scaling_list_16x16[i][j + (k + 1) * 8] << 8) | 294 scaling->scaling_list_16x16[i][j + k * 8]; 295 cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); 296 } 297 298 for (i = 0; i < 6; i++) 299 for (j = 0; j < 4; j++) { 300 val = ((u32)scaling->scaling_list_4x4[i][j + 12] << 24) | 301 ((u32)scaling->scaling_list_4x4[i][j + 8] << 16) | 302 ((u32)scaling->scaling_list_4x4[i][j + 4] << 8) | 303 scaling->scaling_list_4x4[i][j]; 304 cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); 305 } 306 } 307 308 static int cedrus_h265_is_low_delay(struct cedrus_run *run) 309 { 310 const struct v4l2_ctrl_hevc_slice_params *slice_params; 311 const struct v4l2_hevc_dpb_entry *dpb; 312 s32 poc; 313 int i; 314 315 slice_params = run->h265.slice_params; 316 poc = run->h265.decode_params->pic_order_cnt_val; 317 dpb = run->h265.decode_params->dpb; 318 319 for (i = 0; i < slice_params->num_ref_idx_l0_active_minus1 + 1; i++) 320 if (dpb[slice_params->ref_idx_l0[i]].pic_order_cnt_val > poc) 321 return 1; 322 323 if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_B) 324 return 0; 325 326 for (i = 0; i < slice_params->num_ref_idx_l1_active_minus1 + 1; i++) 327 if (dpb[slice_params->ref_idx_l1[i]].pic_order_cnt_val > poc) 328 return 1; 329 330 return 0; 331 } 332 333 static void cedrus_h265_write_tiles(struct cedrus_ctx *ctx, 334 struct cedrus_run *run, 335 unsigned int ctb_addr_x, 336 unsigned int ctb_addr_y) 337 { 338 const struct v4l2_ctrl_hevc_slice_params *slice_params; 339 const struct v4l2_ctrl_hevc_pps *pps; 340 struct cedrus_dev *dev = ctx->dev; 341 const u32 *entry_points; 342 u32 *entry_points_buf; 343 int i, x, tx, y, ty; 344 345 pps = run->h265.pps; 346 slice_params = run->h265.slice_params; 347 entry_points = run->h265.entry_points; 348 entry_points_buf = ctx->codec.h265.entry_points_buf; 349 350 for (x = 0, tx = 0; tx < pps->num_tile_columns_minus1 + 1; tx++) { 351 if (x + pps->column_width_minus1[tx] + 1 > ctb_addr_x) 352 break; 353 354 x += pps->column_width_minus1[tx] + 1; 355 } 356 357 for (y = 0, ty = 0; ty < pps->num_tile_rows_minus1 + 1; ty++) { 358 if (y + pps->row_height_minus1[ty] + 1 > ctb_addr_y) 359 break; 360 361 y += pps->row_height_minus1[ty] + 1; 362 } 363 364 cedrus_write(dev, VE_DEC_H265_TILE_START_CTB, (y << 16) | (x << 0)); 365 cedrus_write(dev, VE_DEC_H265_TILE_END_CTB, 366 ((y + pps->row_height_minus1[ty]) << 16) | 367 ((x + pps->column_width_minus1[tx]) << 0)); 368 369 if (pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED) { 370 for (i = 0; i < slice_params->num_entry_point_offsets; i++) 371 entry_points_buf[i] = entry_points[i]; 372 } else { 373 for (i = 0; i < slice_params->num_entry_point_offsets; i++) { 374 if (tx + 1 >= pps->num_tile_columns_minus1 + 1) { 375 x = 0; 376 tx = 0; 377 y += pps->row_height_minus1[ty++] + 1; 378 } else { 379 x += pps->column_width_minus1[tx++] + 1; 380 } 381 382 entry_points_buf[i * 4 + 0] = entry_points[i]; 383 entry_points_buf[i * 4 + 1] = 0x0; 384 entry_points_buf[i * 4 + 2] = (y << 16) | (x << 0); 385 entry_points_buf[i * 4 + 3] = 386 ((y + pps->row_height_minus1[ty]) << 16) | 387 ((x + pps->column_width_minus1[tx]) << 0); 388 } 389 } 390 } 391 392 static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) 393 { 394 struct cedrus_dev *dev = ctx->dev; 395 const struct v4l2_ctrl_hevc_sps *sps; 396 const struct v4l2_ctrl_hevc_pps *pps; 397 const struct v4l2_ctrl_hevc_slice_params *slice_params; 398 const struct v4l2_ctrl_hevc_decode_params *decode_params; 399 const struct v4l2_hevc_pred_weight_table *pred_weight_table; 400 unsigned int width_in_ctb_luma, ctb_size_luma; 401 unsigned int log2_max_luma_coding_block_size; 402 unsigned int ctb_addr_x, ctb_addr_y; 403 dma_addr_t src_buf_addr; 404 dma_addr_t src_buf_end_addr; 405 u32 chroma_log2_weight_denom; 406 u32 num_entry_point_offsets; 407 u32 output_pic_list_index; 408 u32 pic_order_cnt[2]; 409 u8 *padding; 410 int count; 411 u32 reg; 412 413 sps = run->h265.sps; 414 pps = run->h265.pps; 415 slice_params = run->h265.slice_params; 416 decode_params = run->h265.decode_params; 417 pred_weight_table = &slice_params->pred_weight_table; 418 num_entry_point_offsets = slice_params->num_entry_point_offsets; 419 420 /* 421 * If entry points offsets are present, we should get them 422 * exactly the right amount. 423 */ 424 if (num_entry_point_offsets && 425 num_entry_point_offsets != run->h265.entry_points_count) 426 return -ERANGE; 427 428 log2_max_luma_coding_block_size = 429 sps->log2_min_luma_coding_block_size_minus3 + 3 + 430 sps->log2_diff_max_min_luma_coding_block_size; 431 ctb_size_luma = 1UL << log2_max_luma_coding_block_size; 432 width_in_ctb_luma = 433 DIV_ROUND_UP(sps->pic_width_in_luma_samples, ctb_size_luma); 434 435 /* MV column buffer size and allocation. */ 436 if (!ctx->codec.h265.mv_col_buf_size) { 437 unsigned int num_buffers = 438 run->dst->vb2_buf.vb2_queue->num_buffers; 439 440 /* 441 * Each CTB requires a MV col buffer with a specific unit size. 442 * Since the address is given with missing lsb bits, 1 KiB is 443 * added to each buffer to ensure proper alignment. 444 */ 445 ctx->codec.h265.mv_col_buf_unit_size = 446 DIV_ROUND_UP(ctx->src_fmt.width, ctb_size_luma) * 447 DIV_ROUND_UP(ctx->src_fmt.height, ctb_size_luma) * 448 CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE + SZ_1K; 449 450 ctx->codec.h265.mv_col_buf_size = num_buffers * 451 ctx->codec.h265.mv_col_buf_unit_size; 452 453 /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ 454 ctx->codec.h265.mv_col_buf = 455 dma_alloc_attrs(dev->dev, 456 ctx->codec.h265.mv_col_buf_size, 457 &ctx->codec.h265.mv_col_buf_addr, 458 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 459 if (!ctx->codec.h265.mv_col_buf) { 460 ctx->codec.h265.mv_col_buf_size = 0; 461 return -ENOMEM; 462 } 463 } 464 465 /* Activate H265 engine. */ 466 cedrus_engine_enable(ctx, CEDRUS_CODEC_H265); 467 468 /* Source offset and length in bits. */ 469 470 cedrus_write(dev, VE_DEC_H265_BITS_OFFSET, 0); 471 472 reg = slice_params->bit_size; 473 cedrus_write(dev, VE_DEC_H265_BITS_LEN, reg); 474 475 /* Source beginning and end addresses. */ 476 477 src_buf_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0); 478 479 reg = VE_DEC_H265_BITS_ADDR_BASE(src_buf_addr); 480 reg |= VE_DEC_H265_BITS_ADDR_VALID_SLICE_DATA; 481 reg |= VE_DEC_H265_BITS_ADDR_LAST_SLICE_DATA; 482 reg |= VE_DEC_H265_BITS_ADDR_FIRST_SLICE_DATA; 483 484 cedrus_write(dev, VE_DEC_H265_BITS_ADDR, reg); 485 486 src_buf_end_addr = src_buf_addr + 487 DIV_ROUND_UP(slice_params->bit_size, 8); 488 489 reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_end_addr); 490 cedrus_write(dev, VE_DEC_H265_BITS_END_ADDR, reg); 491 492 /* Coding tree block address */ 493 ctb_addr_x = slice_params->slice_segment_addr % width_in_ctb_luma; 494 ctb_addr_y = slice_params->slice_segment_addr / width_in_ctb_luma; 495 reg = VE_DEC_H265_DEC_CTB_ADDR_X(ctb_addr_x); 496 reg |= VE_DEC_H265_DEC_CTB_ADDR_Y(ctb_addr_y); 497 cedrus_write(dev, VE_DEC_H265_DEC_CTB_ADDR, reg); 498 499 if ((pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED) || 500 (pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)) { 501 cedrus_h265_write_tiles(ctx, run, ctb_addr_x, ctb_addr_y); 502 } else { 503 cedrus_write(dev, VE_DEC_H265_TILE_START_CTB, 0); 504 cedrus_write(dev, VE_DEC_H265_TILE_END_CTB, 0); 505 } 506 507 /* Clear the number of correctly-decoded coding tree blocks. */ 508 if (ctx->fh.m2m_ctx->new_frame) 509 cedrus_write(dev, VE_DEC_H265_DEC_CTB_NUM, 0); 510 511 /* Initialize bitstream access. */ 512 cedrus_write(dev, VE_DEC_H265_TRIGGER, VE_DEC_H265_TRIGGER_INIT_SWDEC); 513 514 /* 515 * Cedrus expects that bitstream pointer is actually at the end of the slice header 516 * instead of start of slice data. Padding is 8 bits at most (one bit set to 1 and 517 * at most seven bits set to 0), so we have to inspect only one byte before slice data. 518 */ 519 520 if (slice_params->data_byte_offset == 0) 521 return -EOPNOTSUPP; 522 523 padding = (u8 *)vb2_plane_vaddr(&run->src->vb2_buf, 0) + 524 slice_params->data_byte_offset - 1; 525 526 /* at least one bit must be set in that byte */ 527 if (*padding == 0) 528 return -EINVAL; 529 530 for (count = 0; count < 8; count++) 531 if (*padding & (1 << count)) 532 break; 533 534 /* Include the one bit. */ 535 count++; 536 537 cedrus_h265_skip_bits(dev, slice_params->data_byte_offset * 8 - count); 538 539 /* Bitstream parameters. */ 540 541 reg = VE_DEC_H265_DEC_NAL_HDR_NAL_UNIT_TYPE(slice_params->nal_unit_type) | 542 VE_DEC_H265_DEC_NAL_HDR_NUH_TEMPORAL_ID_PLUS1(slice_params->nuh_temporal_id_plus1); 543 544 cedrus_write(dev, VE_DEC_H265_DEC_NAL_HDR, reg); 545 546 /* SPS. */ 547 548 reg = VE_DEC_H265_DEC_SPS_HDR_MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA(sps->max_transform_hierarchy_depth_intra) | 549 VE_DEC_H265_DEC_SPS_HDR_MAX_TRANSFORM_HIERARCHY_DEPTH_INTER(sps->max_transform_hierarchy_depth_inter) | 550 VE_DEC_H265_DEC_SPS_HDR_LOG2_DIFF_MAX_MIN_TRANSFORM_BLOCK_SIZE(sps->log2_diff_max_min_luma_transform_block_size) | 551 VE_DEC_H265_DEC_SPS_HDR_LOG2_MIN_TRANSFORM_BLOCK_SIZE_MINUS2(sps->log2_min_luma_transform_block_size_minus2) | 552 VE_DEC_H265_DEC_SPS_HDR_LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE(sps->log2_diff_max_min_luma_coding_block_size) | 553 VE_DEC_H265_DEC_SPS_HDR_LOG2_MIN_LUMA_CODING_BLOCK_SIZE_MINUS3(sps->log2_min_luma_coding_block_size_minus3) | 554 VE_DEC_H265_DEC_SPS_HDR_BIT_DEPTH_CHROMA_MINUS8(sps->bit_depth_chroma_minus8) | 555 VE_DEC_H265_DEC_SPS_HDR_BIT_DEPTH_LUMA_MINUS8(sps->bit_depth_luma_minus8) | 556 VE_DEC_H265_DEC_SPS_HDR_CHROMA_FORMAT_IDC(sps->chroma_format_idc); 557 558 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_STRONG_INTRA_SMOOTHING_ENABLE, 559 V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED, 560 sps->flags); 561 562 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SPS_TEMPORAL_MVP_ENABLED, 563 V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED, 564 sps->flags); 565 566 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SAMPLE_ADAPTIVE_OFFSET_ENABLED, 567 V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET, 568 sps->flags); 569 570 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_AMP_ENABLED, 571 V4L2_HEVC_SPS_FLAG_AMP_ENABLED, sps->flags); 572 573 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SEPARATE_COLOUR_PLANE, 574 V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE, 575 sps->flags); 576 577 cedrus_write(dev, VE_DEC_H265_DEC_SPS_HDR, reg); 578 579 reg = VE_DEC_H265_DEC_PCM_CTRL_LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE(sps->log2_diff_max_min_pcm_luma_coding_block_size) | 580 VE_DEC_H265_DEC_PCM_CTRL_LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE_MINUS3(sps->log2_min_pcm_luma_coding_block_size_minus3) | 581 VE_DEC_H265_DEC_PCM_CTRL_PCM_SAMPLE_BIT_DEPTH_CHROMA_MINUS1(sps->pcm_sample_bit_depth_chroma_minus1) | 582 VE_DEC_H265_DEC_PCM_CTRL_PCM_SAMPLE_BIT_DEPTH_LUMA_MINUS1(sps->pcm_sample_bit_depth_luma_minus1); 583 584 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PCM_CTRL_FLAG_PCM_ENABLED, 585 V4L2_HEVC_SPS_FLAG_PCM_ENABLED, sps->flags); 586 587 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PCM_CTRL_FLAG_PCM_LOOP_FILTER_DISABLED, 588 V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED, 589 sps->flags); 590 591 cedrus_write(dev, VE_DEC_H265_DEC_PCM_CTRL, reg); 592 593 /* PPS. */ 594 595 reg = VE_DEC_H265_DEC_PPS_CTRL0_PPS_CR_QP_OFFSET(pps->pps_cr_qp_offset) | 596 VE_DEC_H265_DEC_PPS_CTRL0_PPS_CB_QP_OFFSET(pps->pps_cb_qp_offset) | 597 VE_DEC_H265_DEC_PPS_CTRL0_INIT_QP_MINUS26(pps->init_qp_minus26) | 598 VE_DEC_H265_DEC_PPS_CTRL0_DIFF_CU_QP_DELTA_DEPTH(pps->diff_cu_qp_delta_depth); 599 600 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_CU_QP_DELTA_ENABLED, 601 V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED, 602 pps->flags); 603 604 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_TRANSFORM_SKIP_ENABLED, 605 V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED, 606 pps->flags); 607 608 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_CONSTRAINED_INTRA_PRED, 609 V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED, 610 pps->flags); 611 612 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_SIGN_DATA_HIDING_ENABLED, 613 V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED, 614 pps->flags); 615 616 cedrus_write(dev, VE_DEC_H265_DEC_PPS_CTRL0, reg); 617 618 reg = VE_DEC_H265_DEC_PPS_CTRL1_LOG2_PARALLEL_MERGE_LEVEL_MINUS2(pps->log2_parallel_merge_level_minus2); 619 620 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED, 621 V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED, 622 pps->flags); 623 624 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED, 625 V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED, 626 pps->flags); 627 628 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_ENTROPY_CODING_SYNC_ENABLED, 629 V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED, 630 pps->flags); 631 632 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_TILES_ENABLED, 633 V4L2_HEVC_PPS_FLAG_TILES_ENABLED, 634 pps->flags); 635 636 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_TRANSQUANT_BYPASS_ENABLED, 637 V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED, 638 pps->flags); 639 640 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_WEIGHTED_BIPRED, 641 V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED, pps->flags); 642 643 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_WEIGHTED_PRED, 644 V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED, pps->flags); 645 646 cedrus_write(dev, VE_DEC_H265_DEC_PPS_CTRL1, reg); 647 648 /* Slice Parameters. */ 649 650 reg = VE_DEC_H265_DEC_SLICE_HDR_INFO0_PICTURE_TYPE(slice_params->pic_struct) | 651 VE_DEC_H265_DEC_SLICE_HDR_INFO0_FIVE_MINUS_MAX_NUM_MERGE_CAND(slice_params->five_minus_max_num_merge_cand) | 652 VE_DEC_H265_DEC_SLICE_HDR_INFO0_NUM_REF_IDX_L1_ACTIVE_MINUS1(slice_params->num_ref_idx_l1_active_minus1) | 653 VE_DEC_H265_DEC_SLICE_HDR_INFO0_NUM_REF_IDX_L0_ACTIVE_MINUS1(slice_params->num_ref_idx_l0_active_minus1) | 654 VE_DEC_H265_DEC_SLICE_HDR_INFO0_COLLOCATED_REF_IDX(slice_params->collocated_ref_idx) | 655 VE_DEC_H265_DEC_SLICE_HDR_INFO0_COLOUR_PLANE_ID(slice_params->colour_plane_id) | 656 VE_DEC_H265_DEC_SLICE_HDR_INFO0_SLICE_TYPE(slice_params->slice_type); 657 658 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_COLLOCATED_FROM_L0, 659 V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0, 660 slice_params->flags); 661 662 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_CABAC_INIT, 663 V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT, 664 slice_params->flags); 665 666 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_MVD_L1_ZERO, 667 V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO, 668 slice_params->flags); 669 670 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_SAO_CHROMA, 671 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA, 672 slice_params->flags); 673 674 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_SAO_LUMA, 675 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA, 676 slice_params->flags); 677 678 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_TEMPORAL_MVP_ENABLE, 679 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED, 680 slice_params->flags); 681 682 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_DEPENDENT_SLICE_SEGMENT, 683 V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT, 684 slice_params->flags); 685 686 if (ctx->fh.m2m_ctx->new_frame) 687 reg |= VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_FIRST_SLICE_SEGMENT_IN_PIC; 688 689 cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO0, reg); 690 691 reg = VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_TC_OFFSET_DIV2(slice_params->slice_tc_offset_div2) | 692 VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_BETA_OFFSET_DIV2(slice_params->slice_beta_offset_div2) | 693 VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CR_QP_OFFSET(slice_params->slice_cr_qp_offset) | 694 VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CB_QP_OFFSET(slice_params->slice_cb_qp_offset) | 695 VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_QP_DELTA(slice_params->slice_qp_delta); 696 697 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED, 698 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED, 699 slice_params->flags); 700 701 reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED, 702 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED, 703 slice_params->flags); 704 705 if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_I && !cedrus_h265_is_low_delay(run)) 706 reg |= VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_NOT_LOW_DELAY; 707 708 cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO1, reg); 709 710 chroma_log2_weight_denom = pred_weight_table->luma_log2_weight_denom + 711 pred_weight_table->delta_chroma_log2_weight_denom; 712 reg = VE_DEC_H265_DEC_SLICE_HDR_INFO2_NUM_ENTRY_POINT_OFFSETS(num_entry_point_offsets) | 713 VE_DEC_H265_DEC_SLICE_HDR_INFO2_CHROMA_LOG2_WEIGHT_DENOM(chroma_log2_weight_denom) | 714 VE_DEC_H265_DEC_SLICE_HDR_INFO2_LUMA_LOG2_WEIGHT_DENOM(pred_weight_table->luma_log2_weight_denom); 715 716 cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO2, reg); 717 718 cedrus_write(dev, VE_DEC_H265_ENTRY_POINT_OFFSET_ADDR, 719 ctx->codec.h265.entry_points_buf_addr >> 8); 720 721 /* Decoded picture size. */ 722 723 reg = VE_DEC_H265_DEC_PIC_SIZE_WIDTH(ctx->src_fmt.width) | 724 VE_DEC_H265_DEC_PIC_SIZE_HEIGHT(ctx->src_fmt.height); 725 726 cedrus_write(dev, VE_DEC_H265_DEC_PIC_SIZE, reg); 727 728 /* Scaling list. */ 729 730 if (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) { 731 cedrus_h265_write_scaling_list(ctx, run); 732 reg = VE_DEC_H265_SCALING_LIST_CTRL0_FLAG_ENABLED; 733 } else { 734 reg = VE_DEC_H265_SCALING_LIST_CTRL0_DEFAULT; 735 } 736 cedrus_write(dev, VE_DEC_H265_SCALING_LIST_CTRL0, reg); 737 738 /* Neightbor information address. */ 739 reg = VE_DEC_H265_NEIGHBOR_INFO_ADDR_BASE(ctx->codec.h265.neighbor_info_buf_addr); 740 cedrus_write(dev, VE_DEC_H265_NEIGHBOR_INFO_ADDR, reg); 741 742 /* Write decoded picture buffer in pic list. */ 743 cedrus_h265_frame_info_write_dpb(ctx, decode_params->dpb, 744 decode_params->num_active_dpb_entries); 745 746 /* Output frame. */ 747 748 output_pic_list_index = V4L2_HEVC_DPB_ENTRIES_NUM_MAX; 749 pic_order_cnt[0] = slice_params->slice_pic_order_cnt; 750 pic_order_cnt[1] = slice_params->slice_pic_order_cnt; 751 752 cedrus_h265_frame_info_write_single(ctx, output_pic_list_index, 753 slice_params->pic_struct != 0, 754 pic_order_cnt, 755 &run->dst->vb2_buf); 756 757 cedrus_write(dev, VE_DEC_H265_OUTPUT_FRAME_IDX, output_pic_list_index); 758 759 /* Reference picture list 0 (for P/B frames). */ 760 if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_I) { 761 cedrus_h265_ref_pic_list_write(dev, decode_params->dpb, 762 slice_params->ref_idx_l0, 763 slice_params->num_ref_idx_l0_active_minus1 + 1, 764 VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST0); 765 766 if ((pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED) || 767 (pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED)) 768 cedrus_h265_pred_weight_write(dev, 769 pred_weight_table->delta_luma_weight_l0, 770 pred_weight_table->luma_offset_l0, 771 pred_weight_table->delta_chroma_weight_l0, 772 pred_weight_table->chroma_offset_l0, 773 slice_params->num_ref_idx_l0_active_minus1 + 1, 774 VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_LUMA_L0, 775 VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_CHROMA_L0); 776 } 777 778 /* Reference picture list 1 (for B frames). */ 779 if (slice_params->slice_type == V4L2_HEVC_SLICE_TYPE_B) { 780 cedrus_h265_ref_pic_list_write(dev, decode_params->dpb, 781 slice_params->ref_idx_l1, 782 slice_params->num_ref_idx_l1_active_minus1 + 1, 783 VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST1); 784 785 if (pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED) 786 cedrus_h265_pred_weight_write(dev, 787 pred_weight_table->delta_luma_weight_l1, 788 pred_weight_table->luma_offset_l1, 789 pred_weight_table->delta_chroma_weight_l1, 790 pred_weight_table->chroma_offset_l1, 791 slice_params->num_ref_idx_l1_active_minus1 + 1, 792 VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_LUMA_L1, 793 VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_CHROMA_L1); 794 } 795 796 /* Enable appropriate interruptions. */ 797 cedrus_write(dev, VE_DEC_H265_CTRL, VE_DEC_H265_CTRL_IRQ_MASK); 798 799 return 0; 800 } 801 802 static int cedrus_h265_start(struct cedrus_ctx *ctx) 803 { 804 struct cedrus_dev *dev = ctx->dev; 805 806 /* The buffer size is calculated at setup time. */ 807 ctx->codec.h265.mv_col_buf_size = 0; 808 809 /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ 810 ctx->codec.h265.neighbor_info_buf = 811 dma_alloc_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, 812 &ctx->codec.h265.neighbor_info_buf_addr, 813 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 814 if (!ctx->codec.h265.neighbor_info_buf) 815 return -ENOMEM; 816 817 ctx->codec.h265.entry_points_buf = 818 dma_alloc_coherent(dev->dev, CEDRUS_H265_ENTRY_POINTS_BUF_SIZE, 819 &ctx->codec.h265.entry_points_buf_addr, 820 GFP_KERNEL); 821 if (!ctx->codec.h265.entry_points_buf) { 822 dma_free_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, 823 ctx->codec.h265.neighbor_info_buf, 824 ctx->codec.h265.neighbor_info_buf_addr, 825 DMA_ATTR_NO_KERNEL_MAPPING); 826 return -ENOMEM; 827 } 828 829 return 0; 830 } 831 832 static void cedrus_h265_stop(struct cedrus_ctx *ctx) 833 { 834 struct cedrus_dev *dev = ctx->dev; 835 836 if (ctx->codec.h265.mv_col_buf_size > 0) { 837 dma_free_attrs(dev->dev, ctx->codec.h265.mv_col_buf_size, 838 ctx->codec.h265.mv_col_buf, 839 ctx->codec.h265.mv_col_buf_addr, 840 DMA_ATTR_NO_KERNEL_MAPPING); 841 842 ctx->codec.h265.mv_col_buf_size = 0; 843 } 844 845 dma_free_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, 846 ctx->codec.h265.neighbor_info_buf, 847 ctx->codec.h265.neighbor_info_buf_addr, 848 DMA_ATTR_NO_KERNEL_MAPPING); 849 dma_free_coherent(dev->dev, CEDRUS_H265_ENTRY_POINTS_BUF_SIZE, 850 ctx->codec.h265.entry_points_buf, 851 ctx->codec.h265.entry_points_buf_addr); 852 } 853 854 static void cedrus_h265_trigger(struct cedrus_ctx *ctx) 855 { 856 struct cedrus_dev *dev = ctx->dev; 857 858 cedrus_write(dev, VE_DEC_H265_TRIGGER, VE_DEC_H265_TRIGGER_DEC_SLICE); 859 } 860 861 struct cedrus_dec_ops cedrus_dec_ops_h265 = { 862 .irq_clear = cedrus_h265_irq_clear, 863 .irq_disable = cedrus_h265_irq_disable, 864 .irq_status = cedrus_h265_irq_status, 865 .setup = cedrus_h265_setup, 866 .start = cedrus_h265_start, 867 .stop = cedrus_h265_stop, 868 .trigger = cedrus_h265_trigger, 869 }; 870