1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Cedrus VPU driver 4 * 5 * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com> 6 * Copyright (c) 2018 Bootlin 7 */ 8 9 #include <linux/delay.h> 10 #include <linux/types.h> 11 12 #include <media/videobuf2-dma-contig.h> 13 14 #include "cedrus.h" 15 #include "cedrus_hw.h" 16 #include "cedrus_regs.h" 17 18 enum cedrus_h264_sram_off { 19 CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE = 0x000, 20 CEDRUS_SRAM_H264_FRAMEBUFFER_LIST = 0x100, 21 CEDRUS_SRAM_H264_REF_LIST_0 = 0x190, 22 CEDRUS_SRAM_H264_REF_LIST_1 = 0x199, 23 CEDRUS_SRAM_H264_SCALING_LIST_8x8_0 = 0x200, 24 CEDRUS_SRAM_H264_SCALING_LIST_8x8_1 = 0x210, 25 CEDRUS_SRAM_H264_SCALING_LIST_4x4 = 0x220, 26 }; 27 28 struct cedrus_h264_sram_ref_pic { 29 __le32 top_field_order_cnt; 30 __le32 bottom_field_order_cnt; 31 __le32 frame_info; 32 __le32 luma_ptr; 33 __le32 chroma_ptr; 34 __le32 mv_col_top_ptr; 35 __le32 mv_col_bot_ptr; 36 __le32 reserved; 37 } __packed; 38 39 #define CEDRUS_H264_FRAME_NUM 18 40 41 #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (32 * SZ_1K) 42 #define CEDRUS_MIN_PIC_INFO_BUF_SIZE (130 * SZ_1K) 43 44 static void cedrus_h264_write_sram(struct cedrus_dev *dev, 45 enum cedrus_h264_sram_off off, 46 const void *data, size_t len) 47 { 48 const u32 *buffer = data; 49 size_t count = DIV_ROUND_UP(len, 4); 50 51 cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2); 52 53 while (count--) 54 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++); 55 } 56 57 static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_ctx *ctx, 58 unsigned int position, 59 unsigned int field) 60 { 61 dma_addr_t addr = ctx->codec.h264.mv_col_buf_dma; 62 63 /* Adjust for the position */ 64 addr += position * ctx->codec.h264.mv_col_buf_field_size * 2; 65 66 /* Adjust for the field */ 67 addr += field * ctx->codec.h264.mv_col_buf_field_size; 68 69 return addr; 70 } 71 72 static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx, 73 struct cedrus_buffer *buf, 74 unsigned int top_field_order_cnt, 75 unsigned int bottom_field_order_cnt, 76 struct cedrus_h264_sram_ref_pic *pic) 77 { 78 struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf; 79 unsigned int position = buf->codec.h264.position; 80 81 pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt); 82 pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt); 83 pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8); 84 85 pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0)); 86 pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1)); 87 pic->mv_col_top_ptr = 88 cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 0)); 89 pic->mv_col_bot_ptr = 90 cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 1)); 91 } 92 93 static void cedrus_write_frame_list(struct cedrus_ctx *ctx, 94 struct cedrus_run *run) 95 { 96 struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM]; 97 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; 98 const struct v4l2_ctrl_h264_sps *sps = run->h264.sps; 99 struct vb2_queue *cap_q; 100 struct cedrus_buffer *output_buf; 101 struct cedrus_dev *dev = ctx->dev; 102 unsigned long used_dpbs = 0; 103 unsigned int position; 104 int output = -1; 105 unsigned int i; 106 107 cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 108 109 memset(pic_list, 0, sizeof(pic_list)); 110 111 for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) { 112 const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i]; 113 struct cedrus_buffer *cedrus_buf; 114 int buf_idx; 115 116 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID)) 117 continue; 118 119 buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0); 120 if (buf_idx < 0) 121 continue; 122 123 cedrus_buf = vb2_to_cedrus_buffer(cap_q->bufs[buf_idx]); 124 position = cedrus_buf->codec.h264.position; 125 used_dpbs |= BIT(position); 126 127 if (run->dst->vb2_buf.timestamp == dpb->reference_ts) { 128 output = position; 129 continue; 130 } 131 132 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) 133 continue; 134 135 cedrus_fill_ref_pic(ctx, cedrus_buf, 136 dpb->top_field_order_cnt, 137 dpb->bottom_field_order_cnt, 138 &pic_list[position]); 139 } 140 141 if (output >= 0) 142 position = output; 143 else 144 position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM); 145 146 output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf); 147 output_buf->codec.h264.position = position; 148 149 if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) 150 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD; 151 else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) 152 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF; 153 else 154 output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME; 155 156 cedrus_fill_ref_pic(ctx, output_buf, 157 decode->top_field_order_cnt, 158 decode->bottom_field_order_cnt, 159 &pic_list[position]); 160 161 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST, 162 pic_list, sizeof(pic_list)); 163 164 cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position); 165 } 166 167 #define CEDRUS_MAX_REF_IDX 32 168 169 static void _cedrus_write_ref_list(struct cedrus_ctx *ctx, 170 struct cedrus_run *run, 171 const struct v4l2_h264_reference *ref_list, 172 u8 num_ref, enum cedrus_h264_sram_off sram) 173 { 174 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; 175 struct vb2_queue *cap_q; 176 struct cedrus_dev *dev = ctx->dev; 177 u8 sram_array[CEDRUS_MAX_REF_IDX]; 178 unsigned int i; 179 size_t size; 180 181 cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 182 183 memset(sram_array, 0, sizeof(sram_array)); 184 185 for (i = 0; i < num_ref; i++) { 186 const struct v4l2_h264_dpb_entry *dpb; 187 const struct cedrus_buffer *cedrus_buf; 188 unsigned int position; 189 int buf_idx; 190 u8 dpb_idx; 191 192 dpb_idx = ref_list[i].index; 193 dpb = &decode->dpb[dpb_idx]; 194 195 if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) 196 continue; 197 198 buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0); 199 if (buf_idx < 0) 200 continue; 201 202 cedrus_buf = vb2_to_cedrus_buffer(cap_q->bufs[buf_idx]); 203 position = cedrus_buf->codec.h264.position; 204 205 sram_array[i] |= position << 1; 206 if (ref_list[i].fields == V4L2_H264_BOTTOM_FIELD_REF) 207 sram_array[i] |= BIT(0); 208 } 209 210 size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array)); 211 cedrus_h264_write_sram(dev, sram, &sram_array, size); 212 } 213 214 static void cedrus_write_ref_list0(struct cedrus_ctx *ctx, 215 struct cedrus_run *run) 216 { 217 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; 218 219 _cedrus_write_ref_list(ctx, run, 220 slice->ref_pic_list0, 221 slice->num_ref_idx_l0_active_minus1 + 1, 222 CEDRUS_SRAM_H264_REF_LIST_0); 223 } 224 225 static void cedrus_write_ref_list1(struct cedrus_ctx *ctx, 226 struct cedrus_run *run) 227 { 228 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; 229 230 _cedrus_write_ref_list(ctx, run, 231 slice->ref_pic_list1, 232 slice->num_ref_idx_l1_active_minus1 + 1, 233 CEDRUS_SRAM_H264_REF_LIST_1); 234 } 235 236 static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx, 237 struct cedrus_run *run) 238 { 239 const struct v4l2_ctrl_h264_scaling_matrix *scaling = 240 run->h264.scaling_matrix; 241 const struct v4l2_ctrl_h264_pps *pps = run->h264.pps; 242 struct cedrus_dev *dev = ctx->dev; 243 244 if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)) 245 return; 246 247 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0, 248 scaling->scaling_list_8x8[0], 249 sizeof(scaling->scaling_list_8x8[0])); 250 251 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1, 252 scaling->scaling_list_8x8[1], 253 sizeof(scaling->scaling_list_8x8[1])); 254 255 cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4, 256 scaling->scaling_list_4x4, 257 sizeof(scaling->scaling_list_4x4)); 258 } 259 260 static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx, 261 struct cedrus_run *run) 262 { 263 const struct v4l2_ctrl_h264_pred_weights *pred_weight = 264 run->h264.pred_weights; 265 struct cedrus_dev *dev = ctx->dev; 266 int i, j, k; 267 268 cedrus_write(dev, VE_H264_SHS_WP, 269 ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) | 270 ((pred_weight->luma_log2_weight_denom & 0x7) << 0)); 271 272 cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, 273 CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2); 274 275 for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) { 276 const struct v4l2_h264_weight_factors *factors = 277 &pred_weight->weight_factors[i]; 278 279 for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) { 280 u32 val; 281 282 val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) | 283 (factors->luma_weight[j] & 0x1ff); 284 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val); 285 } 286 287 for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) { 288 for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) { 289 u32 val; 290 291 val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) | 292 (factors->chroma_weight[j][k] & 0x1ff); 293 cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val); 294 } 295 } 296 } 297 } 298 299 /* 300 * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In 301 * rare cases frame is not decoded correctly. However, setting offset to 0 and 302 * skipping appropriate amount of bits with flush bits trigger always works. 303 */ 304 static void cedrus_skip_bits(struct cedrus_dev *dev, int num) 305 { 306 int count = 0; 307 308 while (count < num) { 309 int tmp = min(num - count, 32); 310 311 cedrus_write(dev, VE_H264_TRIGGER_TYPE, 312 VE_H264_TRIGGER_TYPE_FLUSH_BITS | 313 VE_H264_TRIGGER_TYPE_N_BITS(tmp)); 314 while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY) 315 udelay(1); 316 317 count += tmp; 318 } 319 } 320 321 static void cedrus_set_params(struct cedrus_ctx *ctx, 322 struct cedrus_run *run) 323 { 324 const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; 325 const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; 326 const struct v4l2_ctrl_h264_pps *pps = run->h264.pps; 327 const struct v4l2_ctrl_h264_sps *sps = run->h264.sps; 328 struct vb2_buffer *src_buf = &run->src->vb2_buf; 329 struct cedrus_dev *dev = ctx->dev; 330 dma_addr_t src_buf_addr; 331 size_t slice_bytes = vb2_get_plane_payload(src_buf, 0); 332 unsigned int pic_width_in_mbs; 333 bool mbaff_pic; 334 u32 reg; 335 336 cedrus_write(dev, VE_H264_VLD_LEN, slice_bytes * 8); 337 cedrus_write(dev, VE_H264_VLD_OFFSET, 0); 338 339 src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0); 340 cedrus_write(dev, VE_H264_VLD_END, src_buf_addr + slice_bytes); 341 cedrus_write(dev, VE_H264_VLD_ADDR, 342 VE_H264_VLD_ADDR_VAL(src_buf_addr) | 343 VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID | 344 VE_H264_VLD_ADDR_LAST); 345 346 if (ctx->src_fmt.width > 2048) { 347 cedrus_write(dev, VE_BUF_CTRL, 348 VE_BUF_CTRL_INTRAPRED_MIXED_RAM | 349 VE_BUF_CTRL_DBLK_MIXED_RAM); 350 cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR, 351 ctx->codec.h264.deblk_buf_dma); 352 cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR, 353 ctx->codec.h264.intra_pred_buf_dma); 354 } else { 355 cedrus_write(dev, VE_BUF_CTRL, 356 VE_BUF_CTRL_INTRAPRED_INT_SRAM | 357 VE_BUF_CTRL_DBLK_INT_SRAM); 358 } 359 360 /* 361 * FIXME: Since the bitstream parsing is done in software, and 362 * in userspace, this shouldn't be needed anymore. But it 363 * turns out that removing it breaks the decoding process, 364 * without any clear indication why. 365 */ 366 cedrus_write(dev, VE_H264_TRIGGER_TYPE, 367 VE_H264_TRIGGER_TYPE_INIT_SWDEC); 368 369 cedrus_skip_bits(dev, slice->header_bit_size); 370 371 if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice)) 372 cedrus_write_pred_weight_table(ctx, run); 373 374 if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) || 375 (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) || 376 (slice->slice_type == V4L2_H264_SLICE_TYPE_B)) 377 cedrus_write_ref_list0(ctx, run); 378 379 if (slice->slice_type == V4L2_H264_SLICE_TYPE_B) 380 cedrus_write_ref_list1(ctx, run); 381 382 // picture parameters 383 reg = 0; 384 /* 385 * FIXME: the kernel headers are allowing the default value to 386 * be passed, but the libva doesn't give us that. 387 */ 388 reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10; 389 reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5; 390 reg |= (pps->weighted_bipred_idc & 0x3) << 2; 391 if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) 392 reg |= VE_H264_PPS_ENTROPY_CODING_MODE; 393 if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) 394 reg |= VE_H264_PPS_WEIGHTED_PRED; 395 if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) 396 reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED; 397 if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) 398 reg |= VE_H264_PPS_TRANSFORM_8X8_MODE; 399 cedrus_write(dev, VE_H264_PPS, reg); 400 401 // sequence parameters 402 reg = 0; 403 reg |= (sps->chroma_format_idc & 0x7) << 19; 404 reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8; 405 reg |= sps->pic_height_in_map_units_minus1 & 0xff; 406 if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) 407 reg |= VE_H264_SPS_MBS_ONLY; 408 if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) 409 reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD; 410 if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) 411 reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE; 412 cedrus_write(dev, VE_H264_SPS, reg); 413 414 mbaff_pic = !(decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) && 415 (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD); 416 pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1; 417 418 // slice parameters 419 reg = 0; 420 reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24; 421 reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) * 422 (mbaff_pic + 1)) & 0xff) << 16; 423 reg |= decode->nal_ref_idc ? BIT(12) : 0; 424 reg |= (slice->slice_type & 0xf) << 8; 425 reg |= slice->cabac_init_idc & 0x3; 426 if (ctx->fh.m2m_ctx->new_frame) 427 reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC; 428 if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) 429 reg |= VE_H264_SHS_FIELD_PIC; 430 if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) 431 reg |= VE_H264_SHS_BOTTOM_FIELD; 432 if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED) 433 reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED; 434 cedrus_write(dev, VE_H264_SHS, reg); 435 436 reg = 0; 437 reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD; 438 reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24; 439 reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16; 440 reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8; 441 reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4; 442 reg |= slice->slice_beta_offset_div2 & 0xf; 443 cedrus_write(dev, VE_H264_SHS2, reg); 444 445 reg = 0; 446 reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16; 447 reg |= (pps->chroma_qp_index_offset & 0x3f) << 8; 448 reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f; 449 if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)) 450 reg |= VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT; 451 cedrus_write(dev, VE_H264_SHS_QP, reg); 452 453 // clear status flags 454 cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS)); 455 456 // enable int 457 cedrus_write(dev, VE_H264_CTRL, 458 VE_H264_CTRL_SLICE_DECODE_INT | 459 VE_H264_CTRL_DECODE_ERR_INT | 460 VE_H264_CTRL_VLD_DATA_REQ_INT); 461 } 462 463 static enum cedrus_irq_status 464 cedrus_h264_irq_status(struct cedrus_ctx *ctx) 465 { 466 struct cedrus_dev *dev = ctx->dev; 467 u32 reg = cedrus_read(dev, VE_H264_STATUS); 468 469 if (reg & (VE_H264_STATUS_DECODE_ERR_INT | 470 VE_H264_STATUS_VLD_DATA_REQ_INT)) 471 return CEDRUS_IRQ_ERROR; 472 473 if (reg & VE_H264_CTRL_SLICE_DECODE_INT) 474 return CEDRUS_IRQ_OK; 475 476 return CEDRUS_IRQ_NONE; 477 } 478 479 static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx) 480 { 481 struct cedrus_dev *dev = ctx->dev; 482 483 cedrus_write(dev, VE_H264_STATUS, 484 VE_H264_STATUS_INT_MASK); 485 } 486 487 static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx) 488 { 489 struct cedrus_dev *dev = ctx->dev; 490 u32 reg = cedrus_read(dev, VE_H264_CTRL); 491 492 cedrus_write(dev, VE_H264_CTRL, 493 reg & ~VE_H264_CTRL_INT_MASK); 494 } 495 496 static int cedrus_h264_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) 497 { 498 struct cedrus_dev *dev = ctx->dev; 499 500 cedrus_engine_enable(ctx, CEDRUS_CODEC_H264); 501 502 cedrus_write(dev, VE_H264_SDROT_CTRL, 0); 503 cedrus_write(dev, VE_H264_EXTRA_BUFFER1, 504 ctx->codec.h264.pic_info_buf_dma); 505 cedrus_write(dev, VE_H264_EXTRA_BUFFER2, 506 ctx->codec.h264.neighbor_info_buf_dma); 507 508 cedrus_write_scaling_lists(ctx, run); 509 cedrus_write_frame_list(ctx, run); 510 511 cedrus_set_params(ctx, run); 512 513 return 0; 514 } 515 516 static int cedrus_h264_start(struct cedrus_ctx *ctx) 517 { 518 struct cedrus_dev *dev = ctx->dev; 519 unsigned int pic_info_size; 520 unsigned int field_size; 521 unsigned int mv_col_size; 522 int ret; 523 524 /* 525 * NOTE: All buffers allocated here are only used by HW, so we 526 * can add DMA_ATTR_NO_KERNEL_MAPPING flag when allocating them. 527 */ 528 529 /* Formula for picture buffer size is taken from CedarX source. */ 530 531 if (ctx->src_fmt.width > 2048) 532 pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000; 533 else 534 pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000; 535 536 /* 537 * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set, 538 * there is no need to multiply by 2. 539 */ 540 pic_info_size += ctx->src_fmt.height * 2 * 64; 541 542 if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE) 543 pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE; 544 545 ctx->codec.h264.pic_info_buf_size = pic_info_size; 546 ctx->codec.h264.pic_info_buf = 547 dma_alloc_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, 548 &ctx->codec.h264.pic_info_buf_dma, 549 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 550 if (!ctx->codec.h264.pic_info_buf) 551 return -ENOMEM; 552 553 /* 554 * That buffer is supposed to be 16kiB in size, and be aligned 555 * on 16kiB as well. However, dma_alloc_attrs provides the 556 * guarantee that we'll have a DMA address aligned on the 557 * smallest page order that is greater to the requested size, 558 * so we don't have to overallocate. 559 */ 560 ctx->codec.h264.neighbor_info_buf = 561 dma_alloc_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 562 &ctx->codec.h264.neighbor_info_buf_dma, 563 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 564 if (!ctx->codec.h264.neighbor_info_buf) { 565 ret = -ENOMEM; 566 goto err_pic_buf; 567 } 568 569 field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) * 570 DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16; 571 572 /* 573 * FIXME: This is actually conditional to 574 * V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE not being set, we 575 * might have to rework this if memory efficiency ever is 576 * something we need to work on. 577 */ 578 field_size = field_size * 2; 579 580 /* 581 * FIXME: This is actually conditional to 582 * V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY not being set, we might 583 * have to rework this if memory efficiency ever is something 584 * we need to work on. 585 */ 586 field_size = field_size * 2; 587 ctx->codec.h264.mv_col_buf_field_size = field_size; 588 589 mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM; 590 ctx->codec.h264.mv_col_buf_size = mv_col_size; 591 ctx->codec.h264.mv_col_buf = 592 dma_alloc_attrs(dev->dev, 593 ctx->codec.h264.mv_col_buf_size, 594 &ctx->codec.h264.mv_col_buf_dma, 595 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 596 if (!ctx->codec.h264.mv_col_buf) { 597 ret = -ENOMEM; 598 goto err_neighbor_buf; 599 } 600 601 if (ctx->src_fmt.width > 2048) { 602 /* 603 * Formulas for deblock and intra prediction buffer sizes 604 * are taken from CedarX source. 605 */ 606 607 ctx->codec.h264.deblk_buf_size = 608 ALIGN(ctx->src_fmt.width, 32) * 12; 609 ctx->codec.h264.deblk_buf = 610 dma_alloc_attrs(dev->dev, 611 ctx->codec.h264.deblk_buf_size, 612 &ctx->codec.h264.deblk_buf_dma, 613 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 614 if (!ctx->codec.h264.deblk_buf) { 615 ret = -ENOMEM; 616 goto err_mv_col_buf; 617 } 618 619 /* 620 * NOTE: Multiplying by two deviates from CedarX logic, but it 621 * is for some unknown reason needed for H264 4K decoding on H6. 622 */ 623 ctx->codec.h264.intra_pred_buf_size = 624 ALIGN(ctx->src_fmt.width, 64) * 5 * 2; 625 ctx->codec.h264.intra_pred_buf = 626 dma_alloc_attrs(dev->dev, 627 ctx->codec.h264.intra_pred_buf_size, 628 &ctx->codec.h264.intra_pred_buf_dma, 629 GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); 630 if (!ctx->codec.h264.intra_pred_buf) { 631 ret = -ENOMEM; 632 goto err_deblk_buf; 633 } 634 } 635 636 return 0; 637 638 err_deblk_buf: 639 dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size, 640 ctx->codec.h264.deblk_buf, 641 ctx->codec.h264.deblk_buf_dma, 642 DMA_ATTR_NO_KERNEL_MAPPING); 643 644 err_mv_col_buf: 645 dma_free_attrs(dev->dev, ctx->codec.h264.mv_col_buf_size, 646 ctx->codec.h264.mv_col_buf, 647 ctx->codec.h264.mv_col_buf_dma, 648 DMA_ATTR_NO_KERNEL_MAPPING); 649 650 err_neighbor_buf: 651 dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 652 ctx->codec.h264.neighbor_info_buf, 653 ctx->codec.h264.neighbor_info_buf_dma, 654 DMA_ATTR_NO_KERNEL_MAPPING); 655 656 err_pic_buf: 657 dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, 658 ctx->codec.h264.pic_info_buf, 659 ctx->codec.h264.pic_info_buf_dma, 660 DMA_ATTR_NO_KERNEL_MAPPING); 661 return ret; 662 } 663 664 static void cedrus_h264_stop(struct cedrus_ctx *ctx) 665 { 666 struct cedrus_dev *dev = ctx->dev; 667 668 dma_free_attrs(dev->dev, ctx->codec.h264.mv_col_buf_size, 669 ctx->codec.h264.mv_col_buf, 670 ctx->codec.h264.mv_col_buf_dma, 671 DMA_ATTR_NO_KERNEL_MAPPING); 672 dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, 673 ctx->codec.h264.neighbor_info_buf, 674 ctx->codec.h264.neighbor_info_buf_dma, 675 DMA_ATTR_NO_KERNEL_MAPPING); 676 dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, 677 ctx->codec.h264.pic_info_buf, 678 ctx->codec.h264.pic_info_buf_dma, 679 DMA_ATTR_NO_KERNEL_MAPPING); 680 if (ctx->codec.h264.deblk_buf_size) 681 dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size, 682 ctx->codec.h264.deblk_buf, 683 ctx->codec.h264.deblk_buf_dma, 684 DMA_ATTR_NO_KERNEL_MAPPING); 685 if (ctx->codec.h264.intra_pred_buf_size) 686 dma_free_attrs(dev->dev, ctx->codec.h264.intra_pred_buf_size, 687 ctx->codec.h264.intra_pred_buf, 688 ctx->codec.h264.intra_pred_buf_dma, 689 DMA_ATTR_NO_KERNEL_MAPPING); 690 } 691 692 static void cedrus_h264_trigger(struct cedrus_ctx *ctx) 693 { 694 struct cedrus_dev *dev = ctx->dev; 695 696 cedrus_write(dev, VE_H264_TRIGGER_TYPE, 697 VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE); 698 } 699 700 struct cedrus_dec_ops cedrus_dec_ops_h264 = { 701 .irq_clear = cedrus_h264_irq_clear, 702 .irq_disable = cedrus_h264_irq_disable, 703 .irq_status = cedrus_h264_irq_status, 704 .setup = cedrus_h264_setup, 705 .start = cedrus_h264_start, 706 .stop = cedrus_h264_stop, 707 .trigger = cedrus_h264_trigger, 708 }; 709