1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2020, The Linux Foundation. All rights reserved. 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sizes.h> 7 #include <linux/videodev2.h> 8 9 #include "hfi.h" 10 #include "hfi_plat_bufs.h" 11 #include "helpers.h" 12 13 #define MIN_INPUT_BUFFERS 4 14 #define MIN_ENC_OUTPUT_BUFFERS 4 15 16 #define NV12_UBWC_Y_TILE_WIDTH 32 17 #define NV12_UBWC_Y_TILE_HEIGHT 8 18 #define NV12_UBWC_UV_TILE_WIDTH 16 19 #define NV12_UBWC_UV_TILE_HEIGHT 8 20 #define TP10_UBWC_Y_TILE_WIDTH 48 21 #define TP10_UBWC_Y_TILE_HEIGHT 4 22 #define METADATA_STRIDE_MULTIPLE 64 23 #define METADATA_HEIGHT_MULTIPLE 16 24 #define HFI_DMA_ALIGNMENT 256 25 26 #define MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE 64 27 #define MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE 64 28 #define MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE 64 29 #define MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE 640 30 #define MAX_FE_NBR_DATA_CB_LINE_BUFFER_SIZE 320 31 #define MAX_FE_NBR_DATA_CR_LINE_BUFFER_SIZE 320 32 33 #define MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE (128 / 8) 34 #define MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE (128 / 8) 35 #define MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE (128 / 8) 36 37 #define MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE (64 * 2 * 3) 38 #define MAX_PE_NBR_DATA_LCU32_LINE_BUFFER_SIZE (32 * 2 * 3) 39 #define MAX_PE_NBR_DATA_LCU16_LINE_BUFFER_SIZE (16 * 2 * 3) 40 41 #define MAX_TILE_COLUMNS 32 /* 8K/256 */ 42 43 #define NUM_HW_PIC_BUF 10 44 #define BIN_BUFFER_THRESHOLD (1280 * 736) 45 #define H264D_MAX_SLICE 1800 46 /* sizeof(h264d_buftab_t) aligned to 256 */ 47 #define SIZE_H264D_BUFTAB_T 256 48 /* sizeof(h264d_hw_pic_t) aligned to 32 */ 49 #define SIZE_H264D_HW_PIC_T BIT(11) 50 #define SIZE_H264D_BSE_CMD_PER_BUF (32 * 4) 51 #define SIZE_H264D_VPP_CMD_PER_BUF 512 52 53 /* Line Buffer definitions, One for Luma and 1/2 for each Chroma */ 54 #define SIZE_H264D_LB_FE_TOP_DATA(width, height) \ 55 (MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * ALIGN((width), 16) * 3) 56 57 #define SIZE_H264D_LB_FE_TOP_CTRL(width, height) \ 58 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4)) 59 60 #define SIZE_H264D_LB_FE_LEFT_CTRL(width, height) \ 61 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((height) + 15) >> 4)) 62 63 #define SIZE_H264D_LB_SE_TOP_CTRL(width, height) \ 64 (MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4)) 65 66 #define SIZE_H264D_LB_SE_LEFT_CTRL(width, height) \ 67 (MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((height) + 15) >> 4)) 68 69 #define SIZE_H264D_LB_PE_TOP_DATA(width, height) \ 70 (MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4)) 71 72 #define SIZE_H264D_LB_VSP_TOP(width, height) (((((width) + 15) >> 4) << 7)) 73 74 #define SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height) \ 75 (ALIGN((height), 16) * 32) 76 77 #define SIZE_H264D_QP(width, height) \ 78 ((((width) + 63) >> 6) * (((height) + 63) >> 6) * 128) 79 80 #define SIZE_HW_PIC(size_per_buf) (NUM_HW_PIC_BUF * (size_per_buf)) 81 82 #define H264_CABAC_HDR_RATIO_HD_TOT 1 83 #define H264_CABAC_RES_RATIO_HD_TOT 3 84 85 /* 86 * Some content need more bin buffer, but limit buffer 87 * size for high resolution 88 */ 89 #define NUM_SLIST_BUF_H264 (256 + 32) 90 #define SIZE_SLIST_BUF_H264 512 91 #define LCU_MAX_SIZE_PELS 64 92 #define LCU_MIN_SIZE_PELS 16 93 94 #define H265D_MAX_SLICE 600 95 #define SIZE_H265D_HW_PIC_T SIZE_H264D_HW_PIC_T 96 #define SIZE_H265D_BSE_CMD_PER_BUF (16 * sizeof(u32)) 97 #define SIZE_H265D_VPP_CMD_PER_BUF 256 98 99 #define SIZE_H265D_LB_FE_TOP_DATA(width, height) \ 100 (MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * (ALIGN(width, 64) + 8) * 2) 101 102 #define SIZE_H265D_LB_FE_TOP_CTRL(width, height) \ 103 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * \ 104 (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) 105 106 #define SIZE_H265D_LB_FE_LEFT_CTRL(width, height) \ 107 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * \ 108 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) 109 110 #define SIZE_H265D_LB_SE_TOP_CTRL(width, height) \ 111 ((LCU_MAX_SIZE_PELS / 8 * (128 / 8)) * (((width) + 15) >> 4)) 112 113 static inline u32 size_h265d_lb_se_left_ctrl(u32 width, u32 height) 114 { 115 u32 x, y, z; 116 117 x = ((height + 16 - 1) / 8) * MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE; 118 y = ((height + 32 - 1) / 8) * MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE; 119 z = ((height + 64 - 1) / 8) * MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE; 120 121 return max3(x, y, z); 122 } 123 124 #define SIZE_H265D_LB_PE_TOP_DATA(width, height) \ 125 (MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE * \ 126 (ALIGN(width, LCU_MIN_SIZE_PELS) / LCU_MIN_SIZE_PELS)) 127 128 #define SIZE_H265D_LB_VSP_TOP(width, height) ((((width) + 63) >> 6) * 128) 129 130 #define SIZE_H265D_LB_VSP_LEFT(width, height) ((((height) + 63) >> 6) * 128) 131 132 #define SIZE_H265D_LB_RECON_DMA_METADATA_WR(width, height) \ 133 SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height) 134 135 #define SIZE_H265D_QP(width, height) SIZE_H264D_QP(width, height) 136 137 #define H265_CABAC_HDR_RATIO_HD_TOT 2 138 #define H265_CABAC_RES_RATIO_HD_TOT 2 139 140 /* 141 * Some content need more bin buffer, but limit buffer size 142 * for high resolution 143 */ 144 #define SIZE_SLIST_BUF_H265 BIT(10) 145 #define NUM_SLIST_BUF_H265 (80 + 20) 146 #define H265_NUM_TILE_COL 32 147 #define H265_NUM_TILE_ROW 128 148 #define H265_NUM_TILE (H265_NUM_TILE_ROW * H265_NUM_TILE_COL + 1) 149 150 static inline u32 size_vpxd_lb_fe_left_ctrl(u32 width, u32 height) 151 { 152 u32 x, y, z; 153 154 x = ((height + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE; 155 y = ((height + 31) >> 5) * MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE; 156 z = ((height + 63) >> 6) * MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE; 157 158 return max3(x, y, z); 159 } 160 161 #define SIZE_VPXD_LB_FE_TOP_CTRL(width, height) \ 162 (((ALIGN(width, 64) + 8) * 10 * 2)) /* small line */ 163 #define SIZE_VPXD_LB_SE_TOP_CTRL(width, height) \ 164 ((((width) + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE) 165 166 static inline u32 size_vpxd_lb_se_left_ctrl(u32 width, u32 height) 167 { 168 u32 x, y, z; 169 170 x = ((height + 15) >> 4) * MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE; 171 y = ((height + 31) >> 5) * MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE; 172 z = ((height + 63) >> 6) * MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE; 173 174 return max3(x, y, z); 175 } 176 177 #define SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height) \ 178 ALIGN((ALIGN(height, 16) / (4 / 2)) * 64, 32) 179 #define SIZE_VP8D_LB_FE_TOP_DATA(width, height) \ 180 ((ALIGN(width, 16) + 8) * 10 * 2) 181 #define SIZE_VP9D_LB_FE_TOP_DATA(width, height) \ 182 ((ALIGN(ALIGN(width, 16), 64) + 8) * 10 * 2) 183 #define SIZE_VP8D_LB_PE_TOP_DATA(width, height) \ 184 ((ALIGN(width, 16) >> 4) * 64) 185 #define SIZE_VP9D_LB_PE_TOP_DATA(width, height) \ 186 ((ALIGN(ALIGN(width, 16), 64) >> 6) * 176) 187 #define SIZE_VP8D_LB_VSP_TOP(width, height) \ 188 (((ALIGN(width, 16) >> 4) * 64 / 2) + 256) 189 #define SIZE_VP9D_LB_VSP_TOP(width, height) \ 190 (((ALIGN(ALIGN(width, 16), 64) >> 6) * 64 * 8) + 256) 191 192 #define HFI_IRIS2_VP9D_COMV_SIZE \ 193 ((((8192 + 63) >> 6) * ((4320 + 63) >> 6) * 8 * 8 * 2 * 8)) 194 195 #define VPX_DECODER_FRAME_CONCURENCY_LVL 2 196 #define VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_NUM 1 197 #define VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_DEN 2 198 #define VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_NUM 3 199 #define VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_DEN 2 200 201 #define VP8_NUM_FRAME_INFO_BUF (5 + 1) 202 #define VP9_NUM_FRAME_INFO_BUF (8 + 2 + 1 + 8) 203 #define VP8_NUM_PROBABILITY_TABLE_BUF VP8_NUM_FRAME_INFO_BUF 204 #define VP9_NUM_PROBABILITY_TABLE_BUF (VP9_NUM_FRAME_INFO_BUF + 4) 205 #define VP8_PROB_TABLE_SIZE 3840 206 #define VP9_PROB_TABLE_SIZE 3840 207 208 #define VP9_UDC_HEADER_BUF_SIZE (3 * 128) 209 #define MAX_SUPERFRAME_HEADER_LEN 34 210 #define CCE_TILE_OFFSET_SIZE ALIGN(32 * 4 * 4, 32) 211 212 #define QMATRIX_SIZE (sizeof(u32) * 128 + 256) 213 #define MP2D_QPDUMP_SIZE 115200 214 #define HFI_IRIS2_ENC_PERSIST_SIZE 102400 215 #define HFI_MAX_COL_FRAME 6 216 #define HFI_VENUS_VENC_TRE_WB_BUFF_SIZE (65 << 4) /* in Bytes */ 217 #define HFI_VENUS_VENC_DB_LINE_BUFF_PER_MB 512 218 #define HFI_VENUS_VPPSG_MAX_REGISTERS 2048 219 #define HFI_VENUS_WIDTH_ALIGNMENT 128 220 #define HFI_VENUS_WIDTH_TEN_BIT_ALIGNMENT 192 221 #define HFI_VENUS_HEIGHT_ALIGNMENT 32 222 223 #define SYSTEM_LAL_TILE10 192 224 #define NUM_MBS_720P (((1280 + 15) >> 4) * ((720 + 15) >> 4)) 225 #define NUM_MBS_4K (((4096 + 15) >> 4) * ((2304 + 15) >> 4)) 226 #define MB_SIZE_IN_PIXEL (16 * 16) 227 #define HDR10PLUS_PAYLOAD_SIZE 1024 228 #define HDR10_HIST_EXTRADATA_SIZE 4096 229 230 static u32 size_vpss_lb(u32 width, u32 height, u32 num_vpp_pipes) 231 { 232 u32 vpss_4tap_top_buffer_size, vpss_div2_top_buffer_size; 233 u32 vpss_4tap_left_buffer_size, vpss_div2_left_buffer_size; 234 u32 opb_wr_top_line_luma_buf_size, opb_wr_top_line_chroma_buf_size; 235 u32 opb_lb_wr_llb_y_buffer_size, opb_lb_wr_llb_uv_buffer_size; 236 u32 macrotiling_size; 237 u32 size = 0; 238 239 vpss_4tap_top_buffer_size = 0; 240 vpss_div2_top_buffer_size = 0; 241 vpss_4tap_left_buffer_size = 0; 242 vpss_div2_left_buffer_size = 0; 243 244 macrotiling_size = 32; 245 opb_wr_top_line_luma_buf_size = 246 ALIGN(width, macrotiling_size) / macrotiling_size * 256; 247 opb_wr_top_line_luma_buf_size = 248 ALIGN(opb_wr_top_line_luma_buf_size, HFI_DMA_ALIGNMENT) + 249 (MAX_TILE_COLUMNS - 1) * 256; 250 opb_wr_top_line_luma_buf_size = 251 max(opb_wr_top_line_luma_buf_size, (32 * ALIGN(height, 16))); 252 opb_wr_top_line_chroma_buf_size = opb_wr_top_line_luma_buf_size; 253 opb_lb_wr_llb_y_buffer_size = ALIGN((ALIGN(height, 16) / 2) * 64, 32); 254 opb_lb_wr_llb_uv_buffer_size = opb_lb_wr_llb_y_buffer_size; 255 size = num_vpp_pipes * 256 2 * (vpss_4tap_top_buffer_size + vpss_div2_top_buffer_size) + 257 2 * (vpss_4tap_left_buffer_size + vpss_div2_left_buffer_size) + 258 opb_wr_top_line_luma_buf_size + 259 opb_wr_top_line_chroma_buf_size + 260 opb_lb_wr_llb_uv_buffer_size + 261 opb_lb_wr_llb_y_buffer_size; 262 263 return size; 264 } 265 266 static u32 size_h264d_hw_bin_buffer(u32 width, u32 height) 267 { 268 u32 size_yuv, size_bin_hdr, size_bin_res; 269 u32 size = 0; 270 u32 product; 271 272 product = width * height; 273 size_yuv = (product <= BIN_BUFFER_THRESHOLD) ? 274 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1); 275 276 size_bin_hdr = size_yuv * H264_CABAC_HDR_RATIO_HD_TOT; 277 size_bin_res = size_yuv * H264_CABAC_RES_RATIO_HD_TOT; 278 size_bin_hdr = ALIGN(size_bin_hdr, HFI_DMA_ALIGNMENT); 279 size_bin_res = ALIGN(size_bin_res, HFI_DMA_ALIGNMENT); 280 size = size_bin_hdr + size_bin_res; 281 282 return size; 283 } 284 285 static u32 h264d_scratch_size(u32 width, u32 height, bool is_interlaced) 286 { 287 u32 aligned_width = ALIGN(width, 16); 288 u32 aligned_height = ALIGN(height, 16); 289 u32 size = 0; 290 291 if (!is_interlaced) 292 size = size_h264d_hw_bin_buffer(aligned_width, aligned_height); 293 294 return size; 295 } 296 297 static u32 size_h265d_hw_bin_buffer(u32 width, u32 height) 298 { 299 u32 size_yuv, size_bin_hdr, size_bin_res; 300 u32 size = 0; 301 u32 product; 302 303 product = width * height; 304 size_yuv = (product <= BIN_BUFFER_THRESHOLD) ? 305 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1); 306 size_bin_hdr = size_yuv * H265_CABAC_HDR_RATIO_HD_TOT; 307 size_bin_res = size_yuv * H265_CABAC_RES_RATIO_HD_TOT; 308 size_bin_hdr = ALIGN(size_bin_hdr, HFI_DMA_ALIGNMENT); 309 size_bin_res = ALIGN(size_bin_res, HFI_DMA_ALIGNMENT); 310 size = size_bin_hdr + size_bin_res; 311 312 return size; 313 } 314 315 static u32 h265d_scratch_size(u32 width, u32 height, bool is_interlaced) 316 { 317 u32 aligned_width = ALIGN(width, 16); 318 u32 aligned_height = ALIGN(height, 16); 319 u32 size = 0; 320 321 if (!is_interlaced) 322 size = size_h265d_hw_bin_buffer(aligned_width, aligned_height); 323 324 return size; 325 } 326 327 static u32 vpxd_scratch_size(u32 width, u32 height, bool is_interlaced) 328 { 329 u32 aligned_width = ALIGN(width, 16); 330 u32 aligned_height = ALIGN(height, 16); 331 u32 size_yuv = aligned_width * aligned_height * 3 / 2; 332 u32 size = 0; 333 334 if (!is_interlaced) { 335 u32 binbuffer1_size, binbufer2_size; 336 337 binbuffer1_size = max_t(u32, size_yuv, 338 ((BIN_BUFFER_THRESHOLD * 3) >> 1)); 339 binbuffer1_size *= VPX_DECODER_FRAME_CONCURENCY_LVL * 340 VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_NUM / 341 VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_DEN; 342 binbufer2_size = max_t(u32, size_yuv, 343 ((BIN_BUFFER_THRESHOLD * 3) >> 1)); 344 binbufer2_size *= VPX_DECODER_FRAME_CONCURENCY_LVL * 345 VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_NUM / 346 VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_DEN; 347 size = ALIGN(binbuffer1_size + binbufer2_size, 348 HFI_DMA_ALIGNMENT); 349 } 350 351 return size; 352 } 353 354 static u32 mpeg2d_scratch_size(u32 width, u32 height, bool is_interlaced) 355 { 356 return 0; 357 } 358 359 static u32 calculate_enc_output_frame_size(u32 width, u32 height, u32 rc_type) 360 { 361 u32 aligned_width, aligned_height; 362 u32 mbs_per_frame; 363 u32 frame_size; 364 365 /* 366 * Encoder output size calculation: 32 Align width/height 367 * For resolution < 720p : YUVsize * 4 368 * For resolution > 720p & <= 4K : YUVsize / 2 369 * For resolution > 4k : YUVsize / 4 370 * Initially frame_size = YUVsize * 2; 371 */ 372 aligned_width = ALIGN(width, 32); 373 aligned_height = ALIGN(height, 32); 374 mbs_per_frame = (ALIGN(aligned_height, 16) * 375 ALIGN(aligned_width, 16)) / 256; 376 frame_size = width * height * 3; 377 378 if (mbs_per_frame < NUM_MBS_720P) 379 frame_size = frame_size << 1; 380 else if (mbs_per_frame <= NUM_MBS_4K) 381 frame_size = frame_size >> 2; 382 else 383 frame_size = frame_size >> 3; 384 385 if (rc_type == HFI_RATE_CONTROL_OFF || rc_type == HFI_RATE_CONTROL_CQ) 386 frame_size = frame_size << 1; 387 388 /* 389 * In case of opaque color format bitdepth will be known 390 * with first ETB, buffers allocated already with 8 bit 391 * won't be sufficient for 10 bit 392 * calculate size considering 10-bit by default 393 * For 10-bit cases size = size * 1.25 394 */ 395 frame_size *= 5; 396 frame_size /= 4; 397 398 return ALIGN(frame_size, SZ_4K); 399 } 400 401 static u32 calculate_enc_scratch_size(u32 width, u32 height, u32 work_mode, 402 u32 lcu_size, u32 num_vpp_pipes, 403 u32 rc_type) 404 { 405 u32 aligned_width, aligned_height, bitstream_size; 406 u32 total_bitbin_buffers, size_single_pipe, bitbin_size; 407 u32 sao_bin_buffer_size, padded_bin_size, size; 408 409 aligned_width = ALIGN(width, lcu_size); 410 aligned_height = ALIGN(height, lcu_size); 411 bitstream_size = 412 calculate_enc_output_frame_size(width, height, rc_type); 413 414 bitstream_size = ALIGN(bitstream_size, HFI_DMA_ALIGNMENT); 415 416 if (work_mode == VIDC_WORK_MODE_2) { 417 total_bitbin_buffers = 3; 418 bitbin_size = bitstream_size * 17 / 10; 419 bitbin_size = ALIGN(bitbin_size, HFI_DMA_ALIGNMENT); 420 } else { 421 total_bitbin_buffers = 1; 422 bitstream_size = aligned_width * aligned_height * 3; 423 bitbin_size = ALIGN(bitstream_size, HFI_DMA_ALIGNMENT); 424 } 425 426 if (num_vpp_pipes > 2) 427 size_single_pipe = bitbin_size / 2; 428 else 429 size_single_pipe = bitbin_size; 430 431 size_single_pipe = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT); 432 sao_bin_buffer_size = 433 (64 * (((width + 32) * (height + 32)) >> 10)) + 384; 434 padded_bin_size = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT); 435 size_single_pipe = sao_bin_buffer_size + padded_bin_size; 436 size_single_pipe = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT); 437 bitbin_size = size_single_pipe * num_vpp_pipes; 438 size = ALIGN(bitbin_size, HFI_DMA_ALIGNMENT) * 439 total_bitbin_buffers + 512; 440 441 return size; 442 } 443 444 static u32 h264e_scratch_size(u32 width, u32 height, u32 work_mode, 445 u32 num_vpp_pipes, u32 rc_type) 446 { 447 return calculate_enc_scratch_size(width, height, work_mode, 16, 448 num_vpp_pipes, rc_type); 449 } 450 451 static u32 h265e_scratch_size(u32 width, u32 height, u32 work_mode, 452 u32 num_vpp_pipes, u32 rc_type) 453 { 454 return calculate_enc_scratch_size(width, height, work_mode, 32, 455 num_vpp_pipes, rc_type); 456 } 457 458 static u32 vp8e_scratch_size(u32 width, u32 height, u32 work_mode, 459 u32 num_vpp_pipes, u32 rc_type) 460 { 461 return calculate_enc_scratch_size(width, height, work_mode, 16, 462 num_vpp_pipes, rc_type); 463 } 464 465 static u32 hfi_iris2_h264d_comv_size(u32 width, u32 height, 466 u32 yuv_buf_min_count) 467 { 468 u32 frame_width_in_mbs = ((width + 15) >> 4); 469 u32 frame_height_in_mbs = ((height + 15) >> 4); 470 u32 col_mv_aligned_width = (frame_width_in_mbs << 6); 471 u32 col_zero_aligned_width = (frame_width_in_mbs << 2); 472 u32 col_zero_size = 0, size_colloc = 0, comv_size = 0; 473 474 col_mv_aligned_width = ALIGN(col_mv_aligned_width, 16); 475 col_zero_aligned_width = ALIGN(col_zero_aligned_width, 16); 476 col_zero_size = 477 col_zero_aligned_width * ((frame_height_in_mbs + 1) >> 1); 478 col_zero_size = ALIGN(col_zero_size, 64); 479 col_zero_size <<= 1; 480 col_zero_size = ALIGN(col_zero_size, 512); 481 size_colloc = col_mv_aligned_width * ((frame_height_in_mbs + 1) >> 1); 482 size_colloc = ALIGN(size_colloc, 64); 483 size_colloc <<= 1; 484 size_colloc = ALIGN(size_colloc, 512); 485 size_colloc += (col_zero_size + SIZE_H264D_BUFTAB_T * 2); 486 comv_size = size_colloc * yuv_buf_min_count; 487 comv_size += 512; 488 489 return comv_size; 490 } 491 492 static u32 size_h264d_bse_cmd_buf(u32 height) 493 { 494 u32 aligned_height = ALIGN(height, 32); 495 496 return min_t(u32, (((aligned_height + 15) >> 4) * 3 * 4), 497 H264D_MAX_SLICE) * SIZE_H264D_BSE_CMD_PER_BUF; 498 } 499 500 static u32 size_h264d_vpp_cmd_buf(u32 height) 501 { 502 u32 aligned_height = ALIGN(height, 32); 503 504 return min_t(u32, (((aligned_height + 15) >> 4) * 3 * 4), 505 H264D_MAX_SLICE) * SIZE_H264D_VPP_CMD_PER_BUF; 506 } 507 508 static u32 hfi_iris2_h264d_non_comv_size(u32 width, u32 height, 509 u32 num_vpp_pipes) 510 { 511 u32 size_bse, size_vpp, size; 512 513 size_bse = size_h264d_bse_cmd_buf(height); 514 size_vpp = size_h264d_vpp_cmd_buf(height); 515 size = 516 ALIGN(size_bse, HFI_DMA_ALIGNMENT) + 517 ALIGN(size_vpp, HFI_DMA_ALIGNMENT) + 518 ALIGN(SIZE_HW_PIC(SIZE_H264D_HW_PIC_T), HFI_DMA_ALIGNMENT) + 519 ALIGN(SIZE_H264D_LB_FE_TOP_DATA(width, height), 520 HFI_DMA_ALIGNMENT) + 521 ALIGN(SIZE_H264D_LB_FE_TOP_CTRL(width, height), 522 HFI_DMA_ALIGNMENT) + 523 ALIGN(SIZE_H264D_LB_FE_LEFT_CTRL(width, height), 524 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 525 ALIGN(SIZE_H264D_LB_SE_TOP_CTRL(width, height), 526 HFI_DMA_ALIGNMENT) + 527 ALIGN(SIZE_H264D_LB_SE_LEFT_CTRL(width, height), 528 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 529 ALIGN(SIZE_H264D_LB_PE_TOP_DATA(width, height), 530 HFI_DMA_ALIGNMENT) + 531 ALIGN(SIZE_H264D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 532 ALIGN(SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height), 533 HFI_DMA_ALIGNMENT) * 2 + 534 ALIGN(SIZE_H264D_QP(width, height), HFI_DMA_ALIGNMENT); 535 536 return ALIGN(size, HFI_DMA_ALIGNMENT); 537 } 538 539 static u32 size_h265d_bse_cmd_buf(u32 width, u32 height) 540 { 541 u32 size; 542 543 size = (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 544 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 545 NUM_HW_PIC_BUF; 546 size = min_t(u32, size, H265D_MAX_SLICE + 1); 547 size = 2 * size * SIZE_H265D_BSE_CMD_PER_BUF; 548 549 return ALIGN(size, HFI_DMA_ALIGNMENT); 550 } 551 552 static u32 size_h265d_vpp_cmd_buf(u32 width, u32 height) 553 { 554 u32 size; 555 556 size = (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 557 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 558 NUM_HW_PIC_BUF; 559 size = min_t(u32, size, H265D_MAX_SLICE + 1); 560 size = ALIGN(size, 4); 561 size = 2 * size * SIZE_H265D_VPP_CMD_PER_BUF; 562 563 return ALIGN(size, HFI_DMA_ALIGNMENT); 564 } 565 566 static u32 hfi_iris2_h265d_comv_size(u32 width, u32 height, 567 u32 yuv_buf_count_min) 568 { 569 u32 size; 570 571 size = ALIGN(((((width + 15) >> 4) * ((height + 15) >> 4)) << 8), 512); 572 size *= yuv_buf_count_min; 573 size += 512; 574 575 return size; 576 } 577 578 static u32 hfi_iris2_h265d_non_comv_size(u32 width, u32 height, 579 u32 num_vpp_pipes) 580 { 581 u32 size_bse, size_vpp, size; 582 583 size_bse = size_h265d_bse_cmd_buf(width, height); 584 size_vpp = size_h265d_vpp_cmd_buf(width, height); 585 size = 586 ALIGN(size_bse, HFI_DMA_ALIGNMENT) + 587 ALIGN(size_vpp, HFI_DMA_ALIGNMENT) + 588 ALIGN(NUM_HW_PIC_BUF * 20 * 22 * 4, HFI_DMA_ALIGNMENT) + 589 ALIGN(2 * sizeof(u16) * 590 (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 591 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS), 592 HFI_DMA_ALIGNMENT) + 593 ALIGN(SIZE_HW_PIC(SIZE_H265D_HW_PIC_T), HFI_DMA_ALIGNMENT) + 594 ALIGN(SIZE_H265D_LB_FE_TOP_DATA(width, height), 595 HFI_DMA_ALIGNMENT) + 596 ALIGN(SIZE_H265D_LB_FE_TOP_CTRL(width, height), 597 HFI_DMA_ALIGNMENT) + 598 ALIGN(SIZE_H265D_LB_FE_LEFT_CTRL(width, height), 599 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 600 ALIGN(size_h265d_lb_se_left_ctrl(width, height), 601 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 602 ALIGN(SIZE_H265D_LB_SE_TOP_CTRL(width, height), 603 HFI_DMA_ALIGNMENT) + 604 ALIGN(SIZE_H265D_LB_PE_TOP_DATA(width, height), 605 HFI_DMA_ALIGNMENT) + 606 ALIGN(SIZE_H265D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 607 ALIGN(SIZE_H265D_LB_VSP_LEFT(width, height), 608 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 609 ALIGN(SIZE_H265D_LB_RECON_DMA_METADATA_WR(width, height), 610 HFI_DMA_ALIGNMENT) 611 * 4 + 612 ALIGN(SIZE_H265D_QP(width, height), HFI_DMA_ALIGNMENT); 613 614 return ALIGN(size, HFI_DMA_ALIGNMENT); 615 } 616 617 static u32 hfi_iris2_vp8d_comv_size(u32 width, u32 height, 618 u32 yuv_min_buf_count) 619 { 620 return (((width + 15) >> 4) * ((height + 15) >> 4) * 8 * 2); 621 } 622 623 static u32 h264d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 624 bool split_mode_enabled, u32 num_vpp_pipes) 625 { 626 u32 co_mv_size, nonco_mv_size, vpss_lb_size = 0; 627 628 co_mv_size = hfi_iris2_h264d_comv_size(width, height, min_buf_count); 629 nonco_mv_size = hfi_iris2_h264d_non_comv_size(width, height, 630 num_vpp_pipes); 631 if (split_mode_enabled) 632 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 633 634 return co_mv_size + nonco_mv_size + vpss_lb_size; 635 } 636 637 static u32 h265d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 638 bool split_mode_enabled, u32 num_vpp_pipes) 639 { 640 u32 co_mv_size, nonco_mv_size, vpss_lb_size = 0; 641 642 co_mv_size = hfi_iris2_h265d_comv_size(width, height, min_buf_count); 643 nonco_mv_size = hfi_iris2_h265d_non_comv_size(width, height, 644 num_vpp_pipes); 645 if (split_mode_enabled) 646 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 647 648 return co_mv_size + nonco_mv_size + vpss_lb_size + 649 HDR10_HIST_EXTRADATA_SIZE; 650 } 651 652 static u32 vp8d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 653 bool split_mode_enabled, u32 num_vpp_pipes) 654 { 655 u32 vpss_lb_size = 0, size; 656 657 size = hfi_iris2_vp8d_comv_size(width, height, 0); 658 size += ALIGN(size_vpxd_lb_fe_left_ctrl(width, height), 659 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 660 ALIGN(size_vpxd_lb_se_left_ctrl(width, height), 661 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 662 ALIGN(SIZE_VP8D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 663 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height), 664 HFI_DMA_ALIGNMENT) + 665 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height), 666 HFI_DMA_ALIGNMENT) + 667 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height), 668 HFI_DMA_ALIGNMENT) + 669 ALIGN(SIZE_VP8D_LB_PE_TOP_DATA(width, height), 670 HFI_DMA_ALIGNMENT) + 671 ALIGN(SIZE_VP8D_LB_FE_TOP_DATA(width, height), 672 HFI_DMA_ALIGNMENT); 673 if (split_mode_enabled) 674 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 675 676 size += vpss_lb_size; 677 678 return size; 679 } 680 681 static u32 vp9d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 682 bool split_mode_enabled, u32 num_vpp_pipes) 683 { 684 u32 vpss_lb_size = 0; 685 u32 size; 686 687 size = 688 ALIGN(size_vpxd_lb_fe_left_ctrl(width, height), 689 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 690 ALIGN(size_vpxd_lb_se_left_ctrl(width, height), 691 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 692 ALIGN(SIZE_VP9D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 693 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height), 694 HFI_DMA_ALIGNMENT) + 695 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height), 696 HFI_DMA_ALIGNMENT) + 697 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height), 698 HFI_DMA_ALIGNMENT) + 699 ALIGN(SIZE_VP9D_LB_PE_TOP_DATA(width, height), 700 HFI_DMA_ALIGNMENT) + 701 ALIGN(SIZE_VP9D_LB_FE_TOP_DATA(width, height), 702 HFI_DMA_ALIGNMENT); 703 704 if (split_mode_enabled) 705 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 706 707 size += vpss_lb_size + HDR10_HIST_EXTRADATA_SIZE; 708 709 return size; 710 } 711 712 static u32 mpeg2d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 713 bool split_mode_enabled, u32 num_vpp_pipes) 714 { 715 u32 vpss_lb_size = 0; 716 u32 size; 717 718 size = 719 ALIGN(size_vpxd_lb_fe_left_ctrl(width, height), 720 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 721 ALIGN(size_vpxd_lb_se_left_ctrl(width, height), 722 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 723 ALIGN(SIZE_VP8D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 724 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height), 725 HFI_DMA_ALIGNMENT) + 726 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height), 727 HFI_DMA_ALIGNMENT) + 728 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height), 729 HFI_DMA_ALIGNMENT) + 730 ALIGN(SIZE_VP8D_LB_PE_TOP_DATA(width, height), 731 HFI_DMA_ALIGNMENT) + 732 ALIGN(SIZE_VP8D_LB_FE_TOP_DATA(width, height), 733 HFI_DMA_ALIGNMENT); 734 735 if (split_mode_enabled) 736 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 737 738 size += vpss_lb_size; 739 740 return size; 741 } 742 743 static u32 744 calculate_enc_scratch1_size(u32 width, u32 height, u32 lcu_size, u32 num_ref, 745 bool ten_bit, u32 num_vpp_pipes, bool is_h265) 746 { 747 u32 line_buf_ctrl_size, line_buf_data_size, leftline_buf_ctrl_size; 748 u32 line_buf_sde_size, sps_pps_slice_hdr, topline_buf_ctrl_size_FE; 749 u32 leftline_buf_ctrl_size_FE, line_buf_recon_pix_size; 750 u32 leftline_buf_recon_pix_size, lambda_lut_size, override_buffer_size; 751 u32 col_mv_buf_size, vpp_reg_buffer_size, ir_buffer_size; 752 u32 vpss_line_buf, leftline_buf_meta_recony, h265e_colrcbuf_size; 753 u32 h265e_framerc_bufsize, h265e_lcubitcnt_bufsize; 754 u32 h265e_lcubitmap_bufsize, se_stats_bufsize; 755 u32 bse_reg_buffer_size, bse_slice_cmd_buffer_size, slice_info_bufsize; 756 u32 line_buf_ctrl_size_buffid2, slice_cmd_buffer_size; 757 u32 width_lcu_num, height_lcu_num, width_coded, height_coded; 758 u32 frame_num_lcu, linebuf_meta_recon_uv, topline_bufsize_fe_1stg_sao; 759 u32 size, bit_depth, num_lcu_mb; 760 u32 vpss_line_buffer_size_1; 761 762 width_lcu_num = (width + lcu_size - 1) / lcu_size; 763 height_lcu_num = (height + lcu_size - 1) / lcu_size; 764 frame_num_lcu = width_lcu_num * height_lcu_num; 765 width_coded = width_lcu_num * lcu_size; 766 height_coded = height_lcu_num * lcu_size; 767 num_lcu_mb = (height_coded / lcu_size) * 768 ((width_coded + lcu_size * 8) / lcu_size); 769 slice_info_bufsize = 256 + (frame_num_lcu << 4); 770 slice_info_bufsize = ALIGN(slice_info_bufsize, HFI_DMA_ALIGNMENT); 771 line_buf_ctrl_size = ALIGN(width_coded, HFI_DMA_ALIGNMENT); 772 line_buf_ctrl_size_buffid2 = ALIGN(width_coded, HFI_DMA_ALIGNMENT); 773 774 bit_depth = ten_bit ? 10 : 8; 775 line_buf_data_size = 776 (((((bit_depth * width_coded + 1024) + 777 (HFI_DMA_ALIGNMENT - 1)) & (~(HFI_DMA_ALIGNMENT - 1))) * 1) + 778 (((((bit_depth * width_coded + 1024) >> 1) + 779 (HFI_DMA_ALIGNMENT - 1)) & (~(HFI_DMA_ALIGNMENT - 1))) * 2)); 780 781 leftline_buf_ctrl_size = is_h265 ? 782 ((height_coded + 32) / 32 * 4 * 16) : 783 ((height_coded + 15) / 16 * 5 * 16); 784 785 if (num_vpp_pipes > 1) { 786 leftline_buf_ctrl_size += 512; 787 leftline_buf_ctrl_size = 788 ALIGN(leftline_buf_ctrl_size, 512) * num_vpp_pipes; 789 } 790 791 leftline_buf_ctrl_size = 792 ALIGN(leftline_buf_ctrl_size, HFI_DMA_ALIGNMENT); 793 leftline_buf_recon_pix_size = (((ten_bit + 1) * 2 * 794 (height_coded) + HFI_DMA_ALIGNMENT) + 795 (HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1) & 796 (~((HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1)) * 1; 797 798 topline_buf_ctrl_size_FE = is_h265 ? (64 * (width_coded >> 5)) : 799 (HFI_DMA_ALIGNMENT + 16 * (width_coded >> 4)); 800 topline_buf_ctrl_size_FE = 801 ALIGN(topline_buf_ctrl_size_FE, HFI_DMA_ALIGNMENT); 802 leftline_buf_ctrl_size_FE = 803 (((HFI_DMA_ALIGNMENT + 64 * (height_coded >> 4)) + 804 (HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1) & 805 (~((HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1)) * 1) * 806 num_vpp_pipes; 807 leftline_buf_meta_recony = (HFI_DMA_ALIGNMENT + 64 * 808 ((height_coded) / (8 * (ten_bit ? 4 : 8)))); 809 leftline_buf_meta_recony = 810 ALIGN(leftline_buf_meta_recony, HFI_DMA_ALIGNMENT); 811 leftline_buf_meta_recony = leftline_buf_meta_recony * num_vpp_pipes; 812 linebuf_meta_recon_uv = (HFI_DMA_ALIGNMENT + 64 * 813 ((height_coded) / (4 * (ten_bit ? 4 : 8)))); 814 linebuf_meta_recon_uv = ALIGN(linebuf_meta_recon_uv, HFI_DMA_ALIGNMENT); 815 linebuf_meta_recon_uv = linebuf_meta_recon_uv * num_vpp_pipes; 816 line_buf_recon_pix_size = ((ten_bit ? 3 : 2) * width_coded); 817 line_buf_recon_pix_size = 818 ALIGN(line_buf_recon_pix_size, HFI_DMA_ALIGNMENT); 819 slice_cmd_buffer_size = ALIGN(20480, HFI_DMA_ALIGNMENT); 820 sps_pps_slice_hdr = 2048 + 4096; 821 col_mv_buf_size = is_h265 ? (16 * ((frame_num_lcu << 2) + 32)) : 822 (3 * 16 * (width_lcu_num * height_lcu_num + 32)); 823 col_mv_buf_size = 824 ALIGN(col_mv_buf_size, HFI_DMA_ALIGNMENT) * (num_ref + 1); 825 h265e_colrcbuf_size = 826 (((width_lcu_num + 7) >> 3) * 16 * 2 * height_lcu_num); 827 if (num_vpp_pipes > 1) 828 h265e_colrcbuf_size = 829 ALIGN(h265e_colrcbuf_size, HFI_DMA_ALIGNMENT) * 830 num_vpp_pipes; 831 832 h265e_colrcbuf_size = ALIGN(h265e_colrcbuf_size, HFI_DMA_ALIGNMENT) * 833 HFI_MAX_COL_FRAME; 834 h265e_framerc_bufsize = (is_h265) ? (256 + 16 * 835 (14 + (((height_coded >> 5) + 7) >> 3))) : 836 (256 + 16 * (14 + (((height_coded >> 4) + 7) >> 3))); 837 h265e_framerc_bufsize *= 6; /* multiply by max numtilescol */ 838 if (num_vpp_pipes > 1) 839 h265e_framerc_bufsize = 840 ALIGN(h265e_framerc_bufsize, HFI_DMA_ALIGNMENT) * 841 num_vpp_pipes; 842 843 h265e_framerc_bufsize = ALIGN(h265e_framerc_bufsize, 512) * 844 HFI_MAX_COL_FRAME; 845 h265e_lcubitcnt_bufsize = 256 + 4 * frame_num_lcu; 846 h265e_lcubitcnt_bufsize = 847 ALIGN(h265e_lcubitcnt_bufsize, HFI_DMA_ALIGNMENT); 848 h265e_lcubitmap_bufsize = 256 + (frame_num_lcu >> 3); 849 h265e_lcubitmap_bufsize = 850 ALIGN(h265e_lcubitmap_bufsize, HFI_DMA_ALIGNMENT); 851 line_buf_sde_size = 256 + 16 * (width_coded >> 4); 852 line_buf_sde_size = ALIGN(line_buf_sde_size, HFI_DMA_ALIGNMENT); 853 if ((width_coded * height_coded) > (4096 * 2160)) 854 se_stats_bufsize = 0; 855 else if ((width_coded * height_coded) > (1920 * 1088)) 856 se_stats_bufsize = (40 * 4 * frame_num_lcu + 256 + 256); 857 else 858 se_stats_bufsize = (1024 * frame_num_lcu + 256 + 256); 859 860 se_stats_bufsize = ALIGN(se_stats_bufsize, HFI_DMA_ALIGNMENT) * 2; 861 bse_slice_cmd_buffer_size = (((8192 << 2) + 7) & (~7)) * 6; 862 bse_reg_buffer_size = (((512 << 3) + 7) & (~7)) * 4; 863 vpp_reg_buffer_size = 864 (((HFI_VENUS_VPPSG_MAX_REGISTERS << 3) + 31) & (~31)) * 10; 865 lambda_lut_size = 256 * 11; 866 override_buffer_size = 16 * ((num_lcu_mb + 7) >> 3); 867 override_buffer_size = 868 ALIGN(override_buffer_size, HFI_DMA_ALIGNMENT) * 2; 869 ir_buffer_size = (((frame_num_lcu << 1) + 7) & (~7)) * 3; 870 vpss_line_buffer_size_1 = (((8192 >> 2) << 5) * num_vpp_pipes) + 64; 871 vpss_line_buf = 872 (((((max(width_coded, height_coded) + 3) >> 2) << 5) + 256) * 873 16) + vpss_line_buffer_size_1; 874 topline_bufsize_fe_1stg_sao = 16 * (width_coded >> 5); 875 topline_bufsize_fe_1stg_sao = 876 ALIGN(topline_bufsize_fe_1stg_sao, HFI_DMA_ALIGNMENT); 877 878 size = 879 line_buf_ctrl_size + line_buf_data_size + 880 line_buf_ctrl_size_buffid2 + leftline_buf_ctrl_size + 881 vpss_line_buf + col_mv_buf_size + topline_buf_ctrl_size_FE + 882 leftline_buf_ctrl_size_FE + line_buf_recon_pix_size + 883 leftline_buf_recon_pix_size + 884 leftline_buf_meta_recony + linebuf_meta_recon_uv + 885 h265e_colrcbuf_size + h265e_framerc_bufsize + 886 h265e_lcubitcnt_bufsize + h265e_lcubitmap_bufsize + 887 line_buf_sde_size + 888 topline_bufsize_fe_1stg_sao + override_buffer_size + 889 bse_reg_buffer_size + vpp_reg_buffer_size + sps_pps_slice_hdr + 890 slice_cmd_buffer_size + bse_slice_cmd_buffer_size + 891 ir_buffer_size + slice_info_bufsize + lambda_lut_size + 892 se_stats_bufsize + 1024; 893 894 return size; 895 } 896 897 static u32 h264e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit, 898 u32 num_vpp_pipes) 899 { 900 return calculate_enc_scratch1_size(width, height, 16, num_ref, ten_bit, 901 num_vpp_pipes, false); 902 } 903 904 static u32 h265e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit, 905 u32 num_vpp_pipes) 906 { 907 return calculate_enc_scratch1_size(width, height, 32, num_ref, ten_bit, 908 num_vpp_pipes, true); 909 } 910 911 static u32 vp8e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit, 912 u32 num_vpp_pipes) 913 { 914 return calculate_enc_scratch1_size(width, height, 16, num_ref, ten_bit, 915 1, false); 916 } 917 918 static u32 ubwc_metadata_plane_stride(u32 width, u32 metadata_stride_multi, 919 u32 tile_width_pels) 920 { 921 return ALIGN(((width + (tile_width_pels - 1)) / tile_width_pels), 922 metadata_stride_multi); 923 } 924 925 static u32 ubwc_metadata_plane_bufheight(u32 height, u32 metadata_height_multi, 926 u32 tile_height_pels) 927 { 928 return ALIGN(((height + (tile_height_pels - 1)) / tile_height_pels), 929 metadata_height_multi); 930 } 931 932 static u32 ubwc_metadata_plane_buffer_size(u32 metadata_stride, 933 u32 metadata_buf_height) 934 { 935 return ALIGN(metadata_stride * metadata_buf_height, SZ_4K); 936 } 937 938 static u32 enc_scratch2_size(u32 width, u32 height, u32 num_ref, bool ten_bit) 939 { 940 u32 aligned_width, aligned_height, chroma_height, ref_buf_height; 941 u32 luma_size, chroma_size; 942 u32 metadata_stride, meta_buf_height, meta_size_y, meta_size_c; 943 u32 ref_luma_stride_bytes, ref_chroma_height_bytes; 944 u32 ref_buf_size, ref_stride; 945 u32 size; 946 947 if (!ten_bit) { 948 aligned_height = ALIGN(height, HFI_VENUS_HEIGHT_ALIGNMENT); 949 chroma_height = height >> 1; 950 chroma_height = ALIGN(chroma_height, 951 HFI_VENUS_HEIGHT_ALIGNMENT); 952 aligned_width = ALIGN(width, HFI_VENUS_WIDTH_ALIGNMENT); 953 metadata_stride = 954 ubwc_metadata_plane_stride(width, 64, 955 NV12_UBWC_Y_TILE_WIDTH); 956 meta_buf_height = 957 ubwc_metadata_plane_bufheight(height, 16, 958 NV12_UBWC_Y_TILE_HEIGHT); 959 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride, 960 meta_buf_height); 961 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride, 962 meta_buf_height); 963 size = (aligned_height + chroma_height) * aligned_width + 964 meta_size_y + meta_size_c; 965 size = (size * (num_ref + 3)) + 4096; 966 } else { 967 ref_buf_height = (height + (HFI_VENUS_HEIGHT_ALIGNMENT - 1)) 968 & (~(HFI_VENUS_HEIGHT_ALIGNMENT - 1)); 969 ref_luma_stride_bytes = 970 ((width + SYSTEM_LAL_TILE10 - 1) / SYSTEM_LAL_TILE10) * 971 SYSTEM_LAL_TILE10; 972 ref_stride = 4 * (ref_luma_stride_bytes / 3); 973 ref_stride = (ref_stride + (128 - 1)) & (~(128 - 1)); 974 luma_size = ref_buf_height * ref_stride; 975 ref_chroma_height_bytes = (((height + 1) >> 1) + 976 (32 - 1)) & (~(32 - 1)); 977 chroma_size = ref_stride * ref_chroma_height_bytes; 978 luma_size = (luma_size + (SZ_4K - 1)) & (~(SZ_4K - 1)); 979 chroma_size = (chroma_size + (SZ_4K - 1)) & (~(SZ_4K - 1)); 980 ref_buf_size = luma_size + chroma_size; 981 metadata_stride = 982 ubwc_metadata_plane_stride(width, 983 METADATA_STRIDE_MULTIPLE, 984 TP10_UBWC_Y_TILE_WIDTH); 985 meta_buf_height = 986 ubwc_metadata_plane_bufheight(height, 987 METADATA_HEIGHT_MULTIPLE, 988 TP10_UBWC_Y_TILE_HEIGHT); 989 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride, 990 meta_buf_height); 991 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride, 992 meta_buf_height); 993 size = ref_buf_size + meta_size_y + meta_size_c; 994 size = (size * (num_ref + 3)) + 4096; 995 } 996 997 return size; 998 } 999 1000 static u32 enc_persist_size(void) 1001 { 1002 return HFI_IRIS2_ENC_PERSIST_SIZE; 1003 } 1004 1005 static u32 h264d_persist1_size(void) 1006 { 1007 return ALIGN((SIZE_SLIST_BUF_H264 * NUM_SLIST_BUF_H264), 1008 HFI_DMA_ALIGNMENT); 1009 } 1010 1011 static u32 h265d_persist1_size(void) 1012 { 1013 return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 + H265_NUM_TILE 1014 * sizeof(u32)), HFI_DMA_ALIGNMENT); 1015 } 1016 1017 static u32 vp8d_persist1_size(void) 1018 { 1019 return ALIGN(VP8_NUM_PROBABILITY_TABLE_BUF * VP8_PROB_TABLE_SIZE, 1020 HFI_DMA_ALIGNMENT); 1021 } 1022 1023 static u32 vp9d_persist1_size(void) 1024 { 1025 return 1026 ALIGN(VP9_NUM_PROBABILITY_TABLE_BUF * VP9_PROB_TABLE_SIZE, 1027 HFI_DMA_ALIGNMENT) + 1028 ALIGN(HFI_IRIS2_VP9D_COMV_SIZE, HFI_DMA_ALIGNMENT) + 1029 ALIGN(MAX_SUPERFRAME_HEADER_LEN, HFI_DMA_ALIGNMENT) + 1030 ALIGN(VP9_UDC_HEADER_BUF_SIZE, HFI_DMA_ALIGNMENT) + 1031 ALIGN(VP9_NUM_FRAME_INFO_BUF * CCE_TILE_OFFSET_SIZE, 1032 HFI_DMA_ALIGNMENT); 1033 } 1034 1035 static u32 mpeg2d_persist1_size(void) 1036 { 1037 return QMATRIX_SIZE + MP2D_QPDUMP_SIZE; 1038 } 1039 1040 struct dec_bufsize_ops { 1041 u32 (*scratch)(u32 width, u32 height, bool is_interlaced); 1042 u32 (*scratch1)(u32 width, u32 height, u32 min_buf_count, 1043 bool split_mode_enabled, u32 num_vpp_pipes); 1044 u32 (*persist1)(void); 1045 }; 1046 1047 struct enc_bufsize_ops { 1048 u32 (*scratch)(u32 width, u32 height, u32 work_mode, u32 num_vpp_pipes, 1049 u32 rc_type); 1050 u32 (*scratch1)(u32 width, u32 height, u32 num_ref, bool ten_bit, 1051 u32 num_vpp_pipes); 1052 u32 (*scratch2)(u32 width, u32 height, u32 num_ref, bool ten_bit); 1053 u32 (*persist)(void); 1054 }; 1055 1056 static struct dec_bufsize_ops dec_h264_ops = { 1057 .scratch = h264d_scratch_size, 1058 .scratch1 = h264d_scratch1_size, 1059 .persist1 = h264d_persist1_size, 1060 }; 1061 1062 static struct dec_bufsize_ops dec_h265_ops = { 1063 .scratch = h265d_scratch_size, 1064 .scratch1 = h265d_scratch1_size, 1065 .persist1 = h265d_persist1_size, 1066 }; 1067 1068 static struct dec_bufsize_ops dec_vp8_ops = { 1069 .scratch = vpxd_scratch_size, 1070 .scratch1 = vp8d_scratch1_size, 1071 .persist1 = vp8d_persist1_size, 1072 }; 1073 1074 static struct dec_bufsize_ops dec_vp9_ops = { 1075 .scratch = vpxd_scratch_size, 1076 .scratch1 = vp9d_scratch1_size, 1077 .persist1 = vp9d_persist1_size, 1078 }; 1079 1080 static struct dec_bufsize_ops dec_mpeg2_ops = { 1081 .scratch = mpeg2d_scratch_size, 1082 .scratch1 = mpeg2d_scratch1_size, 1083 .persist1 = mpeg2d_persist1_size, 1084 }; 1085 1086 static struct enc_bufsize_ops enc_h264_ops = { 1087 .scratch = h264e_scratch_size, 1088 .scratch1 = h264e_scratch1_size, 1089 .scratch2 = enc_scratch2_size, 1090 .persist = enc_persist_size, 1091 }; 1092 1093 static struct enc_bufsize_ops enc_h265_ops = { 1094 .scratch = h265e_scratch_size, 1095 .scratch1 = h265e_scratch1_size, 1096 .scratch2 = enc_scratch2_size, 1097 .persist = enc_persist_size, 1098 }; 1099 1100 static struct enc_bufsize_ops enc_vp8_ops = { 1101 .scratch = vp8e_scratch_size, 1102 .scratch1 = vp8e_scratch1_size, 1103 .scratch2 = enc_scratch2_size, 1104 .persist = enc_persist_size, 1105 }; 1106 1107 static u32 1108 calculate_dec_input_frame_size(u32 width, u32 height, u32 codec, 1109 u32 max_mbs_per_frame, u32 buffer_size_limit) 1110 { 1111 u32 frame_size, num_mbs; 1112 u32 div_factor = 1; 1113 u32 base_res_mbs = NUM_MBS_4K; 1114 1115 /* 1116 * Decoder input size calculation: 1117 * If clip is 8k buffer size is calculated for 8k : 8k mbs/4 1118 * For 8k cases we expect width/height to be set always. 1119 * In all other cases size is calculated for 4k: 1120 * 4k mbs for VP8/VP9 and 4k/2 for remaining codecs 1121 */ 1122 num_mbs = (ALIGN(height, 16) * ALIGN(width, 16)) / 256; 1123 if (num_mbs > NUM_MBS_4K) { 1124 div_factor = 4; 1125 base_res_mbs = max_mbs_per_frame; 1126 } else { 1127 base_res_mbs = NUM_MBS_4K; 1128 if (codec == V4L2_PIX_FMT_VP9) 1129 div_factor = 1; 1130 else 1131 div_factor = 2; 1132 } 1133 1134 frame_size = base_res_mbs * MB_SIZE_IN_PIXEL * 3 / 2 / div_factor; 1135 1136 /* multiply by 10/8 (1.25) to get size for 10 bit case */ 1137 if (codec == V4L2_PIX_FMT_VP9 || codec == V4L2_PIX_FMT_HEVC) 1138 frame_size = frame_size + (frame_size >> 2); 1139 1140 if (buffer_size_limit && buffer_size_limit < frame_size) 1141 frame_size = buffer_size_limit; 1142 1143 return ALIGN(frame_size, SZ_4K); 1144 } 1145 1146 static int output_buffer_count(u32 session_type, u32 codec) 1147 { 1148 u32 output_min_count; 1149 1150 if (session_type == VIDC_SESSION_TYPE_DEC) { 1151 switch (codec) { 1152 case V4L2_PIX_FMT_MPEG2: 1153 case V4L2_PIX_FMT_VP8: 1154 output_min_count = 6; 1155 break; 1156 case V4L2_PIX_FMT_VP9: 1157 output_min_count = 9; 1158 break; 1159 case V4L2_PIX_FMT_H264: 1160 case V4L2_PIX_FMT_HEVC: 1161 default: 1162 output_min_count = 8; 1163 break; 1164 } 1165 } else { 1166 output_min_count = MIN_ENC_OUTPUT_BUFFERS; 1167 } 1168 1169 return output_min_count; 1170 } 1171 1172 static int bufreq_dec(struct hfi_plat_buffers_params *params, u32 buftype, 1173 struct hfi_buffer_requirements *bufreq) 1174 { 1175 enum hfi_version version = params->version; 1176 u32 codec = params->codec; 1177 u32 width = params->width, height = params->height, out_min_count; 1178 struct dec_bufsize_ops *dec_ops; 1179 bool is_secondary_output = params->dec.is_secondary_output; 1180 bool is_interlaced = params->dec.is_interlaced; 1181 u32 max_mbs_per_frame = params->dec.max_mbs_per_frame; 1182 u32 buffer_size_limit = params->dec.buffer_size_limit; 1183 u32 num_vpp_pipes = params->num_vpp_pipes; 1184 1185 switch (codec) { 1186 case V4L2_PIX_FMT_H264: 1187 dec_ops = &dec_h264_ops; 1188 break; 1189 case V4L2_PIX_FMT_HEVC: 1190 dec_ops = &dec_h265_ops; 1191 break; 1192 case V4L2_PIX_FMT_VP8: 1193 dec_ops = &dec_vp8_ops; 1194 break; 1195 case V4L2_PIX_FMT_VP9: 1196 dec_ops = &dec_vp9_ops; 1197 break; 1198 case V4L2_PIX_FMT_MPEG2: 1199 dec_ops = &dec_mpeg2_ops; 1200 break; 1201 default: 1202 return -EINVAL; 1203 } 1204 1205 out_min_count = output_buffer_count(VIDC_SESSION_TYPE_DEC, codec); 1206 1207 bufreq->type = buftype; 1208 bufreq->region_size = 0; 1209 bufreq->count_min = 1; 1210 bufreq->count_actual = 1; 1211 bufreq->hold_count = 1; 1212 bufreq->contiguous = 1; 1213 bufreq->alignment = 256; 1214 1215 if (buftype == HFI_BUFFER_INPUT) { 1216 bufreq->count_min = MIN_INPUT_BUFFERS; 1217 bufreq->size = 1218 calculate_dec_input_frame_size(width, height, codec, 1219 max_mbs_per_frame, 1220 buffer_size_limit); 1221 } else if (buftype == HFI_BUFFER_OUTPUT || 1222 buftype == HFI_BUFFER_OUTPUT2) { 1223 bufreq->count_min = out_min_count; 1224 bufreq->size = 1225 venus_helper_get_framesz_raw(params->hfi_color_fmt, 1226 width, height); 1227 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH(version)) { 1228 bufreq->size = dec_ops->scratch(width, height, is_interlaced); 1229 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_1(version)) { 1230 bufreq->size = dec_ops->scratch1(width, height, out_min_count, 1231 is_secondary_output, 1232 num_vpp_pipes); 1233 } else if (buftype == HFI_BUFFER_INTERNAL_PERSIST_1) { 1234 bufreq->size = dec_ops->persist1(); 1235 } else { 1236 return -EINVAL; 1237 } 1238 1239 return 0; 1240 } 1241 1242 int bufreq_enc(struct hfi_plat_buffers_params *params, u32 buftype, 1243 struct hfi_buffer_requirements *bufreq) 1244 { 1245 enum hfi_version version = params->version; 1246 struct enc_bufsize_ops *enc_ops; 1247 u32 width = params->width; 1248 u32 height = params->height; 1249 bool is_tenbit = params->enc.is_tenbit; 1250 u32 num_bframes = params->enc.num_b_frames; 1251 u32 codec = params->codec; 1252 u32 work_mode = params->enc.work_mode; 1253 u32 rc_type = params->enc.rc_type; 1254 u32 num_vpp_pipes = params->num_vpp_pipes; 1255 u32 num_ref; 1256 1257 switch (codec) { 1258 case V4L2_PIX_FMT_H264: 1259 enc_ops = &enc_h264_ops; 1260 break; 1261 case V4L2_PIX_FMT_HEVC: 1262 enc_ops = &enc_h265_ops; 1263 break; 1264 case V4L2_PIX_FMT_VP8: 1265 enc_ops = &enc_vp8_ops; 1266 break; 1267 default: 1268 return -EINVAL; 1269 } 1270 1271 num_ref = num_bframes > 0 ? num_bframes + 1 : 1; 1272 1273 bufreq->type = buftype; 1274 bufreq->region_size = 0; 1275 bufreq->count_min = 1; 1276 bufreq->count_actual = 1; 1277 bufreq->hold_count = 1; 1278 bufreq->contiguous = 1; 1279 bufreq->alignment = 256; 1280 1281 if (buftype == HFI_BUFFER_INPUT) { 1282 bufreq->count_min = MIN_INPUT_BUFFERS; 1283 bufreq->size = 1284 venus_helper_get_framesz_raw(params->hfi_color_fmt, 1285 width, height); 1286 } else if (buftype == HFI_BUFFER_OUTPUT || 1287 buftype == HFI_BUFFER_OUTPUT2) { 1288 bufreq->count_min = 1289 output_buffer_count(VIDC_SESSION_TYPE_ENC, codec); 1290 bufreq->size = calculate_enc_output_frame_size(width, height, 1291 rc_type); 1292 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH(version)) { 1293 bufreq->size = enc_ops->scratch(width, height, work_mode, 1294 num_vpp_pipes, rc_type); 1295 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_1(version)) { 1296 bufreq->size = enc_ops->scratch1(width, height, num_ref, 1297 is_tenbit, num_vpp_pipes); 1298 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_2(version)) { 1299 bufreq->size = enc_ops->scratch2(width, height, num_ref, 1300 is_tenbit); 1301 } else if (buftype == HFI_BUFFER_INTERNAL_PERSIST) { 1302 bufreq->size = enc_ops->persist(); 1303 } else { 1304 return -EINVAL; 1305 } 1306 1307 return 0; 1308 } 1309 1310 int hfi_plat_bufreq_v6(struct hfi_plat_buffers_params *params, u32 session_type, 1311 u32 buftype, struct hfi_buffer_requirements *bufreq) 1312 { 1313 if (session_type == VIDC_SESSION_TYPE_DEC) 1314 return bufreq_dec(params, buftype, bufreq); 1315 else 1316 return bufreq_enc(params, buftype, bufreq); 1317 } 1318