1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2020, The Linux Foundation. All rights reserved. 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sizes.h> 7 #include <linux/videodev2.h> 8 9 #include "hfi.h" 10 #include "hfi_plat_bufs.h" 11 #include "helpers.h" 12 13 #define MIN_INPUT_BUFFERS 4 14 #define MIN_ENC_OUTPUT_BUFFERS 4 15 16 #define NV12_UBWC_Y_TILE_WIDTH 32 17 #define NV12_UBWC_Y_TILE_HEIGHT 8 18 #define NV12_UBWC_UV_TILE_WIDTH 16 19 #define NV12_UBWC_UV_TILE_HEIGHT 8 20 #define TP10_UBWC_Y_TILE_WIDTH 48 21 #define TP10_UBWC_Y_TILE_HEIGHT 4 22 #define METADATA_STRIDE_MULTIPLE 64 23 #define METADATA_HEIGHT_MULTIPLE 16 24 #define HFI_DMA_ALIGNMENT 256 25 26 #define MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE 64 27 #define MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE 64 28 #define MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE 64 29 #define MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE 640 30 #define MAX_FE_NBR_DATA_CB_LINE_BUFFER_SIZE 320 31 #define MAX_FE_NBR_DATA_CR_LINE_BUFFER_SIZE 320 32 33 #define MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE (128 / 8) 34 #define MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE (128 / 8) 35 #define MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE (128 / 8) 36 37 #define MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE (64 * 2 * 3) 38 #define MAX_PE_NBR_DATA_LCU32_LINE_BUFFER_SIZE (32 * 2 * 3) 39 #define MAX_PE_NBR_DATA_LCU16_LINE_BUFFER_SIZE (16 * 2 * 3) 40 41 #define MAX_TILE_COLUMNS 32 /* 8K/256 */ 42 43 #define VPP_CMD_MAX_SIZE BIT(20) 44 #define NUM_HW_PIC_BUF 32 45 #define BIN_BUFFER_THRESHOLD (1280 * 736) 46 #define H264D_MAX_SLICE 1800 47 /* sizeof(h264d_buftab_t) aligned to 256 */ 48 #define SIZE_H264D_BUFTAB_T 256 49 /* sizeof(h264d_hw_pic_t) aligned to 32 */ 50 #define SIZE_H264D_HW_PIC_T BIT(11) 51 #define SIZE_H264D_BSE_CMD_PER_BUF (32 * 4) 52 #define SIZE_H264D_VPP_CMD_PER_BUF 512 53 54 /* Line Buffer definitions, One for Luma and 1/2 for each Chroma */ 55 #define SIZE_H264D_LB_FE_TOP_DATA(width, height) \ 56 (MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * ALIGN((width), 16) * 3) 57 58 #define SIZE_H264D_LB_FE_TOP_CTRL(width, height) \ 59 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4)) 60 61 #define SIZE_H264D_LB_FE_LEFT_CTRL(width, height) \ 62 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((height) + 15) >> 4)) 63 64 #define SIZE_H264D_LB_SE_TOP_CTRL(width, height) \ 65 (MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4)) 66 67 #define SIZE_H264D_LB_SE_LEFT_CTRL(width, height) \ 68 (MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * (((height) + 15) >> 4)) 69 70 #define SIZE_H264D_LB_PE_TOP_DATA(width, height) \ 71 (MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE * (((width) + 15) >> 4)) 72 73 #define SIZE_H264D_LB_VSP_TOP(width, height) (((((width) + 15) >> 4) << 7)) 74 75 #define SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height) \ 76 (ALIGN((height), 16) * 32) 77 78 #define SIZE_H264D_QP(width, height) \ 79 ((((width) + 63) >> 6) * (((height) + 63) >> 6) * 128) 80 81 #define SIZE_HW_PIC(size_per_buf) (NUM_HW_PIC_BUF * (size_per_buf)) 82 83 #define H264_CABAC_HDR_RATIO_HD_TOT 1 84 #define H264_CABAC_RES_RATIO_HD_TOT 3 85 86 /* 87 * Some content need more bin buffer, but limit buffer 88 * size for high resolution 89 */ 90 #define NUM_SLIST_BUF_H264 (256 + 32) 91 #define SIZE_SLIST_BUF_H264 512 92 #define LCU_MAX_SIZE_PELS 64 93 #define LCU_MIN_SIZE_PELS 16 94 #define SIZE_SEI_USERDATA 4096 95 96 #define H265D_MAX_SLICE 600 97 #define SIZE_H265D_HW_PIC_T SIZE_H264D_HW_PIC_T 98 #define SIZE_H265D_BSE_CMD_PER_BUF (16 * sizeof(u32)) 99 #define SIZE_H265D_VPP_CMD_PER_BUF 256 100 101 #define SIZE_H265D_LB_FE_TOP_DATA(width, height) \ 102 (MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE * (ALIGN(width, 64) + 8) * 2) 103 104 #define SIZE_H265D_LB_FE_TOP_CTRL(width, height) \ 105 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * \ 106 (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) 107 108 #define SIZE_H265D_LB_FE_LEFT_CTRL(width, height) \ 109 (MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE * \ 110 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) 111 112 #define SIZE_H265D_LB_SE_TOP_CTRL(width, height) \ 113 ((LCU_MAX_SIZE_PELS / 8 * (128 / 8)) * (((width) + 15) >> 4)) 114 115 static inline u32 size_h265d_lb_se_left_ctrl(u32 width, u32 height) 116 { 117 u32 x, y, z; 118 119 x = ((height + 16 - 1) / 8) * MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE; 120 y = ((height + 32 - 1) / 8) * MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE; 121 z = ((height + 64 - 1) / 8) * MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE; 122 123 return max3(x, y, z); 124 } 125 126 #define SIZE_H265D_LB_PE_TOP_DATA(width, height) \ 127 (MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE * \ 128 (ALIGN(width, LCU_MIN_SIZE_PELS) / LCU_MIN_SIZE_PELS)) 129 130 #define SIZE_H265D_LB_VSP_TOP(width, height) ((((width) + 63) >> 6) * 128) 131 132 #define SIZE_H265D_LB_VSP_LEFT(width, height) ((((height) + 63) >> 6) * 128) 133 134 #define SIZE_H265D_LB_RECON_DMA_METADATA_WR(width, height) \ 135 SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height) 136 137 #define SIZE_H265D_QP(width, height) SIZE_H264D_QP(width, height) 138 139 #define H265_CABAC_HDR_RATIO_HD_TOT 2 140 #define H265_CABAC_RES_RATIO_HD_TOT 2 141 142 /* 143 * Some content need more bin buffer, but limit buffer size 144 * for high resolution 145 */ 146 #define SIZE_SLIST_BUF_H265 BIT(10) 147 #define NUM_SLIST_BUF_H265 (80 + 20) 148 #define H265_NUM_TILE_COL 32 149 #define H265_NUM_TILE_ROW 128 150 #define H265_NUM_TILE (H265_NUM_TILE_ROW * H265_NUM_TILE_COL + 1) 151 152 static inline u32 size_vpxd_lb_fe_left_ctrl(u32 width, u32 height) 153 { 154 u32 x, y, z; 155 156 x = ((height + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE; 157 y = ((height + 31) >> 5) * MAX_FE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE; 158 z = ((height + 63) >> 6) * MAX_FE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE; 159 160 return max3(x, y, z); 161 } 162 163 #define SIZE_VPXD_LB_FE_TOP_CTRL(width, height) \ 164 (((ALIGN(width, 64) + 8) * 10 * 2)) /* small line */ 165 #define SIZE_VPXD_LB_SE_TOP_CTRL(width, height) \ 166 ((((width) + 15) >> 4) * MAX_FE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE) 167 168 static inline u32 size_vpxd_lb_se_left_ctrl(u32 width, u32 height) 169 { 170 u32 x, y, z; 171 172 x = ((height + 15) >> 4) * MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE; 173 y = ((height + 31) >> 5) * MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE; 174 z = ((height + 63) >> 6) * MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE; 175 176 return max3(x, y, z); 177 } 178 179 #define SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height) \ 180 ALIGN((ALIGN(height, 16) / (4 / 2)) * 64, 32) 181 #define SIZE_VP8D_LB_FE_TOP_DATA(width, height) \ 182 ((ALIGN(width, 16) + 8) * 10 * 2) 183 #define SIZE_VP9D_LB_FE_TOP_DATA(width, height) \ 184 ((ALIGN(ALIGN(width, 16), 64) + 8) * 10 * 2) 185 #define SIZE_VP8D_LB_PE_TOP_DATA(width, height) \ 186 ((ALIGN(width, 16) >> 4) * 64) 187 #define SIZE_VP9D_LB_PE_TOP_DATA(width, height) \ 188 ((ALIGN(ALIGN(width, 16), 64) >> 6) * 176) 189 #define SIZE_VP8D_LB_VSP_TOP(width, height) \ 190 (((ALIGN(width, 16) >> 4) * 64 / 2) + 256) 191 #define SIZE_VP9D_LB_VSP_TOP(width, height) \ 192 (((ALIGN(ALIGN(width, 16), 64) >> 6) * 64 * 8) + 256) 193 194 #define HFI_IRIS2_VP9D_COMV_SIZE \ 195 ((((8192 + 63) >> 6) * ((4320 + 63) >> 6) * 8 * 8 * 2 * 8)) 196 197 #define VPX_DECODER_FRAME_CONCURENCY_LVL 2 198 #define VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_NUM 1 199 #define VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_DEN 2 200 #define VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_NUM 3 201 #define VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_DEN 2 202 203 #define VP8_NUM_FRAME_INFO_BUF (5 + 1) 204 #define VP9_NUM_FRAME_INFO_BUF 32 205 #define VP8_NUM_PROBABILITY_TABLE_BUF VP8_NUM_FRAME_INFO_BUF 206 #define VP9_NUM_PROBABILITY_TABLE_BUF (VP9_NUM_FRAME_INFO_BUF + 4) 207 #define VP8_PROB_TABLE_SIZE 3840 208 #define VP9_PROB_TABLE_SIZE 3840 209 210 #define VP9_UDC_HEADER_BUF_SIZE (3 * 128) 211 #define MAX_SUPERFRAME_HEADER_LEN 34 212 #define CCE_TILE_OFFSET_SIZE ALIGN(32 * 4 * 4, 32) 213 214 #define QMATRIX_SIZE (sizeof(u32) * 128 + 256) 215 #define MP2D_QPDUMP_SIZE 115200 216 #define HFI_IRIS2_ENC_PERSIST_SIZE 204800 217 #define HFI_MAX_COL_FRAME 6 218 #define HFI_VENUS_VENC_TRE_WB_BUFF_SIZE (65 << 4) /* in Bytes */ 219 #define HFI_VENUS_VENC_DB_LINE_BUFF_PER_MB 512 220 #define HFI_VENUS_VPPSG_MAX_REGISTERS 2048 221 #define HFI_VENUS_WIDTH_ALIGNMENT 128 222 #define HFI_VENUS_WIDTH_TEN_BIT_ALIGNMENT 192 223 #define HFI_VENUS_HEIGHT_ALIGNMENT 32 224 225 #define SYSTEM_LAL_TILE10 192 226 #define NUM_MBS_720P (((1280 + 15) >> 4) * ((720 + 15) >> 4)) 227 #define NUM_MBS_4K (((4096 + 15) >> 4) * ((2304 + 15) >> 4)) 228 #define MB_SIZE_IN_PIXEL (16 * 16) 229 #define HDR10PLUS_PAYLOAD_SIZE 1024 230 #define HDR10_HIST_EXTRADATA_SIZE 4096 231 232 static u32 size_vpss_lb(u32 width, u32 height, u32 num_vpp_pipes) 233 { 234 u32 vpss_4tap_top_buffer_size, vpss_div2_top_buffer_size; 235 u32 vpss_4tap_left_buffer_size, vpss_div2_left_buffer_size; 236 u32 opb_wr_top_line_luma_buf_size, opb_wr_top_line_chroma_buf_size; 237 u32 opb_lb_wr_llb_y_buffer_size, opb_lb_wr_llb_uv_buffer_size; 238 u32 macrotiling_size; 239 u32 size = 0; 240 241 vpss_4tap_top_buffer_size = 0; 242 vpss_div2_top_buffer_size = 0; 243 vpss_4tap_left_buffer_size = 0; 244 vpss_div2_left_buffer_size = 0; 245 246 macrotiling_size = 32; 247 opb_wr_top_line_luma_buf_size = 248 ALIGN(width, macrotiling_size) / macrotiling_size * 256; 249 opb_wr_top_line_luma_buf_size = 250 ALIGN(opb_wr_top_line_luma_buf_size, HFI_DMA_ALIGNMENT) + 251 (MAX_TILE_COLUMNS - 1) * 256; 252 opb_wr_top_line_luma_buf_size = 253 max(opb_wr_top_line_luma_buf_size, (32 * ALIGN(height, 16))); 254 opb_wr_top_line_chroma_buf_size = opb_wr_top_line_luma_buf_size; 255 opb_lb_wr_llb_y_buffer_size = ALIGN((ALIGN(height, 16) / 2) * 64, 32); 256 opb_lb_wr_llb_uv_buffer_size = opb_lb_wr_llb_y_buffer_size; 257 size = num_vpp_pipes * 258 2 * (vpss_4tap_top_buffer_size + vpss_div2_top_buffer_size) + 259 2 * (vpss_4tap_left_buffer_size + vpss_div2_left_buffer_size) + 260 opb_wr_top_line_luma_buf_size + 261 opb_wr_top_line_chroma_buf_size + 262 opb_lb_wr_llb_uv_buffer_size + 263 opb_lb_wr_llb_y_buffer_size; 264 265 return size; 266 } 267 268 static u32 size_h264d_hw_bin_buffer(u32 width, u32 height) 269 { 270 u32 size_yuv, size_bin_hdr, size_bin_res; 271 u32 size = 0; 272 u32 product; 273 274 product = width * height; 275 size_yuv = (product <= BIN_BUFFER_THRESHOLD) ? 276 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1); 277 278 size_bin_hdr = size_yuv * H264_CABAC_HDR_RATIO_HD_TOT; 279 size_bin_res = size_yuv * H264_CABAC_RES_RATIO_HD_TOT; 280 size_bin_hdr = ALIGN(size_bin_hdr, HFI_DMA_ALIGNMENT); 281 size_bin_res = ALIGN(size_bin_res, HFI_DMA_ALIGNMENT); 282 size = size_bin_hdr + size_bin_res; 283 284 return size; 285 } 286 287 static u32 h264d_scratch_size(u32 width, u32 height, bool is_interlaced) 288 { 289 u32 aligned_width = ALIGN(width, 16); 290 u32 aligned_height = ALIGN(height, 16); 291 u32 size = 0; 292 293 if (!is_interlaced) 294 size = size_h264d_hw_bin_buffer(aligned_width, aligned_height); 295 296 return size; 297 } 298 299 static u32 size_h265d_hw_bin_buffer(u32 width, u32 height) 300 { 301 u32 size_yuv, size_bin_hdr, size_bin_res; 302 u32 size = 0; 303 u32 product; 304 305 product = width * height; 306 size_yuv = (product <= BIN_BUFFER_THRESHOLD) ? 307 ((BIN_BUFFER_THRESHOLD * 3) >> 1) : ((product * 3) >> 1); 308 size_bin_hdr = size_yuv * H265_CABAC_HDR_RATIO_HD_TOT; 309 size_bin_res = size_yuv * H265_CABAC_RES_RATIO_HD_TOT; 310 size_bin_hdr = ALIGN(size_bin_hdr, HFI_DMA_ALIGNMENT); 311 size_bin_res = ALIGN(size_bin_res, HFI_DMA_ALIGNMENT); 312 size = size_bin_hdr + size_bin_res; 313 314 return size; 315 } 316 317 static u32 h265d_scratch_size(u32 width, u32 height, bool is_interlaced) 318 { 319 u32 aligned_width = ALIGN(width, 16); 320 u32 aligned_height = ALIGN(height, 16); 321 u32 size = 0; 322 323 if (!is_interlaced) 324 size = size_h265d_hw_bin_buffer(aligned_width, aligned_height); 325 326 return size; 327 } 328 329 static u32 vpxd_scratch_size(u32 width, u32 height, bool is_interlaced) 330 { 331 u32 aligned_width = ALIGN(width, 16); 332 u32 aligned_height = ALIGN(height, 16); 333 u32 size_yuv = aligned_width * aligned_height * 3 / 2; 334 u32 size = 0; 335 336 if (!is_interlaced) { 337 u32 binbuffer1_size, binbufer2_size; 338 339 binbuffer1_size = max_t(u32, size_yuv, 340 ((BIN_BUFFER_THRESHOLD * 3) >> 1)); 341 binbuffer1_size *= VPX_DECODER_FRAME_CONCURENCY_LVL * 342 VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_NUM / 343 VPX_DECODER_FRAME_BIN_HDR_BUDGET_RATIO_DEN; 344 binbufer2_size = max_t(u32, size_yuv, 345 ((BIN_BUFFER_THRESHOLD * 3) >> 1)); 346 binbufer2_size *= VPX_DECODER_FRAME_CONCURENCY_LVL * 347 VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_NUM / 348 VPX_DECODER_FRAME_BIN_RES_BUDGET_RATIO_DEN; 349 size = ALIGN(binbuffer1_size + binbufer2_size, 350 HFI_DMA_ALIGNMENT); 351 } 352 353 return size; 354 } 355 356 static u32 mpeg2d_scratch_size(u32 width, u32 height, bool is_interlaced) 357 { 358 return 0; 359 } 360 361 static u32 calculate_enc_output_frame_size(u32 width, u32 height, u32 rc_type) 362 { 363 u32 aligned_width, aligned_height; 364 u32 mbs_per_frame; 365 u32 frame_size; 366 367 /* 368 * Encoder output size calculation: 32 Align width/height 369 * For resolution < 720p : YUVsize * 4 370 * For resolution > 720p & <= 4K : YUVsize / 2 371 * For resolution > 4k : YUVsize / 4 372 * Initially frame_size = YUVsize * 2; 373 */ 374 aligned_width = ALIGN(width, 32); 375 aligned_height = ALIGN(height, 32); 376 mbs_per_frame = (ALIGN(aligned_height, 16) * 377 ALIGN(aligned_width, 16)) / 256; 378 frame_size = width * height * 3; 379 380 if (mbs_per_frame < NUM_MBS_720P) 381 frame_size = frame_size << 1; 382 else if (mbs_per_frame <= NUM_MBS_4K) 383 frame_size = frame_size >> 2; 384 else 385 frame_size = frame_size >> 3; 386 387 if (rc_type == HFI_RATE_CONTROL_OFF || rc_type == HFI_RATE_CONTROL_CQ) 388 frame_size = frame_size << 1; 389 390 /* 391 * In case of opaque color format bitdepth will be known 392 * with first ETB, buffers allocated already with 8 bit 393 * won't be sufficient for 10 bit 394 * calculate size considering 10-bit by default 395 * For 10-bit cases size = size * 1.25 396 */ 397 frame_size *= 5; 398 frame_size /= 4; 399 400 return ALIGN(frame_size, SZ_4K); 401 } 402 403 static u32 calculate_enc_scratch_size(u32 width, u32 height, u32 work_mode, 404 u32 lcu_size, u32 num_vpp_pipes, 405 u32 rc_type) 406 { 407 u32 aligned_width, aligned_height, bitstream_size; 408 u32 total_bitbin_buffers, size_single_pipe, bitbin_size; 409 u32 sao_bin_buffer_size, padded_bin_size, size; 410 411 aligned_width = ALIGN(width, lcu_size); 412 aligned_height = ALIGN(height, lcu_size); 413 bitstream_size = 414 calculate_enc_output_frame_size(width, height, rc_type); 415 416 bitstream_size = ALIGN(bitstream_size, HFI_DMA_ALIGNMENT); 417 418 if (work_mode == VIDC_WORK_MODE_2) { 419 total_bitbin_buffers = 3; 420 bitbin_size = bitstream_size * 17 / 10; 421 bitbin_size = ALIGN(bitbin_size, HFI_DMA_ALIGNMENT); 422 } else { 423 total_bitbin_buffers = 1; 424 bitstream_size = aligned_width * aligned_height * 3; 425 bitbin_size = ALIGN(bitstream_size, HFI_DMA_ALIGNMENT); 426 } 427 428 if (num_vpp_pipes > 2) 429 size_single_pipe = bitbin_size / 2; 430 else 431 size_single_pipe = bitbin_size; 432 433 size_single_pipe = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT); 434 sao_bin_buffer_size = 435 (64 * (((width + 32) * (height + 32)) >> 10)) + 384; 436 padded_bin_size = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT); 437 size_single_pipe = sao_bin_buffer_size + padded_bin_size; 438 size_single_pipe = ALIGN(size_single_pipe, HFI_DMA_ALIGNMENT); 439 bitbin_size = size_single_pipe * num_vpp_pipes; 440 size = ALIGN(bitbin_size, HFI_DMA_ALIGNMENT) * 441 total_bitbin_buffers + 512; 442 443 return size; 444 } 445 446 static u32 h264e_scratch_size(u32 width, u32 height, u32 work_mode, 447 u32 num_vpp_pipes, u32 rc_type) 448 { 449 return calculate_enc_scratch_size(width, height, work_mode, 16, 450 num_vpp_pipes, rc_type); 451 } 452 453 static u32 h265e_scratch_size(u32 width, u32 height, u32 work_mode, 454 u32 num_vpp_pipes, u32 rc_type) 455 { 456 return calculate_enc_scratch_size(width, height, work_mode, 32, 457 num_vpp_pipes, rc_type); 458 } 459 460 static u32 vp8e_scratch_size(u32 width, u32 height, u32 work_mode, 461 u32 num_vpp_pipes, u32 rc_type) 462 { 463 return calculate_enc_scratch_size(width, height, work_mode, 16, 464 num_vpp_pipes, rc_type); 465 } 466 467 static u32 hfi_iris2_h264d_comv_size(u32 width, u32 height, 468 u32 yuv_buf_min_count) 469 { 470 u32 frame_width_in_mbs = ((width + 15) >> 4); 471 u32 frame_height_in_mbs = ((height + 15) >> 4); 472 u32 col_mv_aligned_width = (frame_width_in_mbs << 7); 473 u32 col_zero_aligned_width = (frame_width_in_mbs << 2); 474 u32 col_zero_size = 0, size_colloc = 0, comv_size = 0; 475 476 col_mv_aligned_width = ALIGN(col_mv_aligned_width, 16); 477 col_zero_aligned_width = ALIGN(col_zero_aligned_width, 16); 478 col_zero_size = 479 col_zero_aligned_width * ((frame_height_in_mbs + 1) >> 1); 480 col_zero_size = ALIGN(col_zero_size, 64); 481 col_zero_size <<= 1; 482 col_zero_size = ALIGN(col_zero_size, 512); 483 size_colloc = col_mv_aligned_width * ((frame_height_in_mbs + 1) >> 1); 484 size_colloc = ALIGN(size_colloc, 64); 485 size_colloc <<= 1; 486 size_colloc = ALIGN(size_colloc, 512); 487 size_colloc += (col_zero_size + SIZE_H264D_BUFTAB_T * 2); 488 comv_size = size_colloc * yuv_buf_min_count; 489 comv_size += 512; 490 491 return comv_size; 492 } 493 494 static u32 size_h264d_bse_cmd_buf(u32 height) 495 { 496 u32 aligned_height = ALIGN(height, 32); 497 498 return min_t(u32, (((aligned_height + 15) >> 4) * 3 * 4), 499 H264D_MAX_SLICE) * SIZE_H264D_BSE_CMD_PER_BUF; 500 } 501 502 static u32 size_h264d_vpp_cmd_buf(u32 height) 503 { 504 u32 aligned_height = ALIGN(height, 32); 505 u32 size; 506 507 size = min_t(u32, (((aligned_height + 15) >> 4) * 3 * 4), 508 H264D_MAX_SLICE) * SIZE_H264D_VPP_CMD_PER_BUF; 509 if (size > VPP_CMD_MAX_SIZE) 510 size = VPP_CMD_MAX_SIZE; 511 512 return size; 513 } 514 515 static u32 hfi_iris2_h264d_non_comv_size(u32 width, u32 height, 516 u32 num_vpp_pipes) 517 { 518 u32 size_bse, size_vpp, size; 519 520 size_bse = size_h264d_bse_cmd_buf(height); 521 size_vpp = size_h264d_vpp_cmd_buf(height); 522 size = 523 ALIGN(size_bse, HFI_DMA_ALIGNMENT) + 524 ALIGN(size_vpp, HFI_DMA_ALIGNMENT) + 525 ALIGN(SIZE_HW_PIC(SIZE_H264D_HW_PIC_T), HFI_DMA_ALIGNMENT) + 526 ALIGN(SIZE_H264D_LB_FE_TOP_DATA(width, height), 527 HFI_DMA_ALIGNMENT) + 528 ALIGN(SIZE_H264D_LB_FE_TOP_CTRL(width, height), 529 HFI_DMA_ALIGNMENT) + 530 ALIGN(SIZE_H264D_LB_FE_LEFT_CTRL(width, height), 531 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 532 ALIGN(SIZE_H264D_LB_SE_TOP_CTRL(width, height), 533 HFI_DMA_ALIGNMENT) + 534 ALIGN(SIZE_H264D_LB_SE_LEFT_CTRL(width, height), 535 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 536 ALIGN(SIZE_H264D_LB_PE_TOP_DATA(width, height), 537 HFI_DMA_ALIGNMENT) + 538 ALIGN(SIZE_H264D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 539 ALIGN(SIZE_H264D_LB_RECON_DMA_METADATA_WR(width, height), 540 HFI_DMA_ALIGNMENT) * 2 + 541 ALIGN(SIZE_H264D_QP(width, height), HFI_DMA_ALIGNMENT); 542 543 return ALIGN(size, HFI_DMA_ALIGNMENT); 544 } 545 546 static u32 size_h265d_bse_cmd_buf(u32 width, u32 height) 547 { 548 u32 size; 549 550 size = (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 551 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 552 NUM_HW_PIC_BUF; 553 size = min_t(u32, size, H265D_MAX_SLICE + 1); 554 size = 2 * size * SIZE_H265D_BSE_CMD_PER_BUF; 555 556 return ALIGN(size, HFI_DMA_ALIGNMENT); 557 } 558 559 static u32 size_h265d_vpp_cmd_buf(u32 width, u32 height) 560 { 561 u32 size; 562 563 size = (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 564 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 565 NUM_HW_PIC_BUF; 566 size = min_t(u32, size, H265D_MAX_SLICE + 1); 567 size = ALIGN(size, 4); 568 size = 2 * size * SIZE_H265D_VPP_CMD_PER_BUF; 569 size = ALIGN(size, HFI_DMA_ALIGNMENT); 570 if (size > VPP_CMD_MAX_SIZE) 571 size = VPP_CMD_MAX_SIZE; 572 573 return size; 574 } 575 576 static u32 hfi_iris2_h265d_comv_size(u32 width, u32 height, 577 u32 yuv_buf_count_min) 578 { 579 u32 size; 580 581 size = ALIGN(((((width + 15) >> 4) * ((height + 15) >> 4)) << 8), 512); 582 size *= yuv_buf_count_min; 583 size += 512; 584 585 return size; 586 } 587 588 static u32 hfi_iris2_h265d_non_comv_size(u32 width, u32 height, 589 u32 num_vpp_pipes) 590 { 591 u32 size_bse, size_vpp, size; 592 593 size_bse = size_h265d_bse_cmd_buf(width, height); 594 size_vpp = size_h265d_vpp_cmd_buf(width, height); 595 size = 596 ALIGN(size_bse, HFI_DMA_ALIGNMENT) + 597 ALIGN(size_vpp, HFI_DMA_ALIGNMENT) + 598 ALIGN(NUM_HW_PIC_BUF * 20 * 22 * 4, HFI_DMA_ALIGNMENT) + 599 ALIGN(2 * sizeof(u16) * 600 (ALIGN(width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) * 601 (ALIGN(height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS), 602 HFI_DMA_ALIGNMENT) + 603 ALIGN(SIZE_HW_PIC(SIZE_H265D_HW_PIC_T), HFI_DMA_ALIGNMENT) + 604 ALIGN(SIZE_H265D_LB_FE_TOP_DATA(width, height), 605 HFI_DMA_ALIGNMENT) + 606 ALIGN(SIZE_H265D_LB_FE_TOP_CTRL(width, height), 607 HFI_DMA_ALIGNMENT) + 608 ALIGN(SIZE_H265D_LB_FE_LEFT_CTRL(width, height), 609 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 610 ALIGN(size_h265d_lb_se_left_ctrl(width, height), 611 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 612 ALIGN(SIZE_H265D_LB_SE_TOP_CTRL(width, height), 613 HFI_DMA_ALIGNMENT) + 614 ALIGN(SIZE_H265D_LB_PE_TOP_DATA(width, height), 615 HFI_DMA_ALIGNMENT) + 616 ALIGN(SIZE_H265D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 617 ALIGN(SIZE_H265D_LB_VSP_LEFT(width, height), 618 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 619 ALIGN(SIZE_H265D_LB_RECON_DMA_METADATA_WR(width, height), 620 HFI_DMA_ALIGNMENT) 621 * 4 + 622 ALIGN(SIZE_H265D_QP(width, height), HFI_DMA_ALIGNMENT); 623 624 return ALIGN(size, HFI_DMA_ALIGNMENT); 625 } 626 627 static u32 hfi_iris2_vp8d_comv_size(u32 width, u32 height, 628 u32 yuv_min_buf_count) 629 { 630 return (((width + 15) >> 4) * ((height + 15) >> 4) * 8 * 2); 631 } 632 633 static u32 h264d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 634 bool split_mode_enabled, u32 num_vpp_pipes) 635 { 636 u32 co_mv_size, nonco_mv_size, vpss_lb_size = 0; 637 638 co_mv_size = hfi_iris2_h264d_comv_size(width, height, min_buf_count); 639 nonco_mv_size = hfi_iris2_h264d_non_comv_size(width, height, 640 num_vpp_pipes); 641 if (split_mode_enabled) 642 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 643 644 return co_mv_size + nonco_mv_size + vpss_lb_size; 645 } 646 647 static u32 h265d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 648 bool split_mode_enabled, u32 num_vpp_pipes) 649 { 650 u32 co_mv_size, nonco_mv_size, vpss_lb_size = 0; 651 652 co_mv_size = hfi_iris2_h265d_comv_size(width, height, min_buf_count); 653 nonco_mv_size = hfi_iris2_h265d_non_comv_size(width, height, 654 num_vpp_pipes); 655 if (split_mode_enabled) 656 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 657 658 return co_mv_size + nonco_mv_size + vpss_lb_size + 659 HDR10_HIST_EXTRADATA_SIZE; 660 } 661 662 static u32 vp8d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 663 bool split_mode_enabled, u32 num_vpp_pipes) 664 { 665 u32 vpss_lb_size = 0, size; 666 667 size = hfi_iris2_vp8d_comv_size(width, height, 0); 668 size += ALIGN(size_vpxd_lb_fe_left_ctrl(width, height), 669 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 670 ALIGN(size_vpxd_lb_se_left_ctrl(width, height), 671 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 672 ALIGN(SIZE_VP8D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 673 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height), 674 HFI_DMA_ALIGNMENT) + 675 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height), 676 HFI_DMA_ALIGNMENT) + 677 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height), 678 HFI_DMA_ALIGNMENT) + 679 ALIGN(SIZE_VP8D_LB_PE_TOP_DATA(width, height), 680 HFI_DMA_ALIGNMENT) + 681 ALIGN(SIZE_VP8D_LB_FE_TOP_DATA(width, height), 682 HFI_DMA_ALIGNMENT); 683 if (split_mode_enabled) 684 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 685 686 size += vpss_lb_size; 687 688 return size; 689 } 690 691 static u32 vp9d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 692 bool split_mode_enabled, u32 num_vpp_pipes) 693 { 694 u32 vpss_lb_size = 0; 695 u32 size; 696 697 size = 698 ALIGN(size_vpxd_lb_fe_left_ctrl(width, height), 699 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 700 ALIGN(size_vpxd_lb_se_left_ctrl(width, height), 701 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 702 ALIGN(SIZE_VP9D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 703 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height), 704 HFI_DMA_ALIGNMENT) + 705 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height), 706 HFI_DMA_ALIGNMENT) + 707 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height), 708 HFI_DMA_ALIGNMENT) + 709 ALIGN(SIZE_VP9D_LB_PE_TOP_DATA(width, height), 710 HFI_DMA_ALIGNMENT) + 711 ALIGN(SIZE_VP9D_LB_FE_TOP_DATA(width, height), 712 HFI_DMA_ALIGNMENT); 713 714 if (split_mode_enabled) 715 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 716 717 size += vpss_lb_size + HDR10_HIST_EXTRADATA_SIZE; 718 719 return size; 720 } 721 722 static u32 mpeg2d_scratch1_size(u32 width, u32 height, u32 min_buf_count, 723 bool split_mode_enabled, u32 num_vpp_pipes) 724 { 725 u32 vpss_lb_size = 0; 726 u32 size; 727 728 size = 729 ALIGN(size_vpxd_lb_fe_left_ctrl(width, height), 730 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 731 ALIGN(size_vpxd_lb_se_left_ctrl(width, height), 732 HFI_DMA_ALIGNMENT) * num_vpp_pipes + 733 ALIGN(SIZE_VP8D_LB_VSP_TOP(width, height), HFI_DMA_ALIGNMENT) + 734 ALIGN(SIZE_VPXD_LB_FE_TOP_CTRL(width, height), 735 HFI_DMA_ALIGNMENT) + 736 2 * ALIGN(SIZE_VPXD_LB_RECON_DMA_METADATA_WR(width, height), 737 HFI_DMA_ALIGNMENT) + 738 ALIGN(SIZE_VPXD_LB_SE_TOP_CTRL(width, height), 739 HFI_DMA_ALIGNMENT) + 740 ALIGN(SIZE_VP8D_LB_PE_TOP_DATA(width, height), 741 HFI_DMA_ALIGNMENT) + 742 ALIGN(SIZE_VP8D_LB_FE_TOP_DATA(width, height), 743 HFI_DMA_ALIGNMENT); 744 745 if (split_mode_enabled) 746 vpss_lb_size = size_vpss_lb(width, height, num_vpp_pipes); 747 748 size += vpss_lb_size; 749 750 return size; 751 } 752 753 static u32 754 calculate_enc_scratch1_size(u32 width, u32 height, u32 lcu_size, u32 num_ref, 755 bool ten_bit, u32 num_vpp_pipes, bool is_h265) 756 { 757 u32 line_buf_ctrl_size, line_buf_data_size, leftline_buf_ctrl_size; 758 u32 line_buf_sde_size, sps_pps_slice_hdr, topline_buf_ctrl_size_FE; 759 u32 leftline_buf_ctrl_size_FE, line_buf_recon_pix_size; 760 u32 leftline_buf_recon_pix_size, lambda_lut_size, override_buffer_size; 761 u32 col_mv_buf_size, vpp_reg_buffer_size, ir_buffer_size; 762 u32 vpss_line_buf, leftline_buf_meta_recony, h265e_colrcbuf_size; 763 u32 h265e_framerc_bufsize, h265e_lcubitcnt_bufsize; 764 u32 h265e_lcubitmap_bufsize, se_stats_bufsize; 765 u32 bse_reg_buffer_size, bse_slice_cmd_buffer_size, slice_info_bufsize; 766 u32 line_buf_ctrl_size_buffid2, slice_cmd_buffer_size; 767 u32 width_lcu_num, height_lcu_num, width_coded, height_coded; 768 u32 frame_num_lcu, linebuf_meta_recon_uv, topline_bufsize_fe_1stg_sao; 769 u32 size, bit_depth, num_lcu_mb; 770 u32 vpss_line_buffer_size_1; 771 772 width_lcu_num = (width + lcu_size - 1) / lcu_size; 773 height_lcu_num = (height + lcu_size - 1) / lcu_size; 774 frame_num_lcu = width_lcu_num * height_lcu_num; 775 width_coded = width_lcu_num * lcu_size; 776 height_coded = height_lcu_num * lcu_size; 777 num_lcu_mb = (height_coded / lcu_size) * 778 ((width_coded + lcu_size * 8) / lcu_size); 779 slice_info_bufsize = 256 + (frame_num_lcu << 4); 780 slice_info_bufsize = ALIGN(slice_info_bufsize, HFI_DMA_ALIGNMENT); 781 line_buf_ctrl_size = ALIGN(width_coded, HFI_DMA_ALIGNMENT); 782 line_buf_ctrl_size_buffid2 = ALIGN(width_coded, HFI_DMA_ALIGNMENT); 783 784 bit_depth = ten_bit ? 10 : 8; 785 line_buf_data_size = 786 (((((bit_depth * width_coded + 1024) + 787 (HFI_DMA_ALIGNMENT - 1)) & (~(HFI_DMA_ALIGNMENT - 1))) * 1) + 788 (((((bit_depth * width_coded + 1024) >> 1) + 789 (HFI_DMA_ALIGNMENT - 1)) & (~(HFI_DMA_ALIGNMENT - 1))) * 2)); 790 791 leftline_buf_ctrl_size = is_h265 ? 792 ((height_coded + 32) / 32 * 4 * 16) : 793 ((height_coded + 15) / 16 * 5 * 16); 794 795 if (num_vpp_pipes > 1) { 796 leftline_buf_ctrl_size += 512; 797 leftline_buf_ctrl_size = 798 ALIGN(leftline_buf_ctrl_size, 512) * num_vpp_pipes; 799 } 800 801 leftline_buf_ctrl_size = 802 ALIGN(leftline_buf_ctrl_size, HFI_DMA_ALIGNMENT); 803 leftline_buf_recon_pix_size = (((ten_bit + 1) * 2 * 804 (height_coded) + HFI_DMA_ALIGNMENT) + 805 (HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1) & 806 (~((HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1)) * 1; 807 808 topline_buf_ctrl_size_FE = is_h265 ? (64 * (width_coded >> 5)) : 809 (HFI_DMA_ALIGNMENT + 16 * (width_coded >> 4)); 810 topline_buf_ctrl_size_FE = 811 ALIGN(topline_buf_ctrl_size_FE, HFI_DMA_ALIGNMENT); 812 leftline_buf_ctrl_size_FE = 813 (((HFI_DMA_ALIGNMENT + 64 * (height_coded >> 4)) + 814 (HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1) & 815 (~((HFI_DMA_ALIGNMENT << (num_vpp_pipes - 1)) - 1)) * 1) * 816 num_vpp_pipes; 817 leftline_buf_meta_recony = (HFI_DMA_ALIGNMENT + 64 * 818 ((height_coded) / (8 * (ten_bit ? 4 : 8)))); 819 leftline_buf_meta_recony = 820 ALIGN(leftline_buf_meta_recony, HFI_DMA_ALIGNMENT); 821 leftline_buf_meta_recony = leftline_buf_meta_recony * num_vpp_pipes; 822 linebuf_meta_recon_uv = (HFI_DMA_ALIGNMENT + 64 * 823 ((height_coded) / (4 * (ten_bit ? 4 : 8)))); 824 linebuf_meta_recon_uv = ALIGN(linebuf_meta_recon_uv, HFI_DMA_ALIGNMENT); 825 linebuf_meta_recon_uv = linebuf_meta_recon_uv * num_vpp_pipes; 826 line_buf_recon_pix_size = ((ten_bit ? 3 : 2) * width_coded); 827 line_buf_recon_pix_size = 828 ALIGN(line_buf_recon_pix_size, HFI_DMA_ALIGNMENT); 829 slice_cmd_buffer_size = ALIGN(20480, HFI_DMA_ALIGNMENT); 830 sps_pps_slice_hdr = 2048 + 4096; 831 col_mv_buf_size = is_h265 ? (16 * ((frame_num_lcu << 2) + 32)) : 832 (3 * 16 * (width_lcu_num * height_lcu_num + 32)); 833 col_mv_buf_size = 834 ALIGN(col_mv_buf_size, HFI_DMA_ALIGNMENT) * (num_ref + 1); 835 h265e_colrcbuf_size = 836 (((width_lcu_num + 7) >> 3) * 16 * 2 * height_lcu_num); 837 if (num_vpp_pipes > 1) 838 h265e_colrcbuf_size = 839 ALIGN(h265e_colrcbuf_size, HFI_DMA_ALIGNMENT) * 840 num_vpp_pipes; 841 842 h265e_colrcbuf_size = ALIGN(h265e_colrcbuf_size, HFI_DMA_ALIGNMENT) * 843 HFI_MAX_COL_FRAME; 844 h265e_framerc_bufsize = (is_h265) ? (256 + 16 * 845 (14 + (((height_coded >> 5) + 7) >> 3))) : 846 (256 + 16 * (14 + (((height_coded >> 4) + 7) >> 3))); 847 h265e_framerc_bufsize *= 6; /* multiply by max numtilescol */ 848 if (num_vpp_pipes > 1) 849 h265e_framerc_bufsize = 850 ALIGN(h265e_framerc_bufsize, HFI_DMA_ALIGNMENT) * 851 num_vpp_pipes; 852 853 h265e_framerc_bufsize = ALIGN(h265e_framerc_bufsize, 512) * 854 HFI_MAX_COL_FRAME; 855 h265e_lcubitcnt_bufsize = 256 + 4 * frame_num_lcu; 856 h265e_lcubitcnt_bufsize = 857 ALIGN(h265e_lcubitcnt_bufsize, HFI_DMA_ALIGNMENT); 858 h265e_lcubitmap_bufsize = 256 + (frame_num_lcu >> 3); 859 h265e_lcubitmap_bufsize = 860 ALIGN(h265e_lcubitmap_bufsize, HFI_DMA_ALIGNMENT); 861 line_buf_sde_size = 256 + 16 * (width_coded >> 4); 862 line_buf_sde_size = ALIGN(line_buf_sde_size, HFI_DMA_ALIGNMENT); 863 if ((width_coded * height_coded) > (4096 * 2160)) 864 se_stats_bufsize = 0; 865 else if ((width_coded * height_coded) > (1920 * 1088)) 866 se_stats_bufsize = (40 * 4 * frame_num_lcu + 256 + 256); 867 else 868 se_stats_bufsize = (1024 * frame_num_lcu + 256 + 256); 869 870 se_stats_bufsize = ALIGN(se_stats_bufsize, HFI_DMA_ALIGNMENT) * 2; 871 bse_slice_cmd_buffer_size = (((8192 << 2) + 7) & (~7)) * 6; 872 bse_reg_buffer_size = (((512 << 3) + 7) & (~7)) * 4; 873 vpp_reg_buffer_size = 874 (((HFI_VENUS_VPPSG_MAX_REGISTERS << 3) + 31) & (~31)) * 10; 875 lambda_lut_size = 256 * 11; 876 override_buffer_size = 16 * ((num_lcu_mb + 7) >> 3); 877 override_buffer_size = 878 ALIGN(override_buffer_size, HFI_DMA_ALIGNMENT) * 2; 879 ir_buffer_size = (((frame_num_lcu << 1) + 7) & (~7)) * 3; 880 vpss_line_buffer_size_1 = (((8192 >> 2) << 5) * num_vpp_pipes) + 64; 881 vpss_line_buf = 882 (((((max(width_coded, height_coded) + 3) >> 2) << 5) + 256) * 883 16) + vpss_line_buffer_size_1; 884 topline_bufsize_fe_1stg_sao = 16 * (width_coded >> 5); 885 topline_bufsize_fe_1stg_sao = 886 ALIGN(topline_bufsize_fe_1stg_sao, HFI_DMA_ALIGNMENT); 887 888 size = 889 line_buf_ctrl_size + line_buf_data_size + 890 line_buf_ctrl_size_buffid2 + leftline_buf_ctrl_size + 891 vpss_line_buf + col_mv_buf_size + topline_buf_ctrl_size_FE + 892 leftline_buf_ctrl_size_FE + line_buf_recon_pix_size + 893 leftline_buf_recon_pix_size + 894 leftline_buf_meta_recony + linebuf_meta_recon_uv + 895 h265e_colrcbuf_size + h265e_framerc_bufsize + 896 h265e_lcubitcnt_bufsize + h265e_lcubitmap_bufsize + 897 line_buf_sde_size + 898 topline_bufsize_fe_1stg_sao + override_buffer_size + 899 bse_reg_buffer_size + vpp_reg_buffer_size + sps_pps_slice_hdr + 900 slice_cmd_buffer_size + bse_slice_cmd_buffer_size + 901 ir_buffer_size + slice_info_bufsize + lambda_lut_size + 902 se_stats_bufsize + 1024; 903 904 return size; 905 } 906 907 static u32 h264e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit, 908 u32 num_vpp_pipes) 909 { 910 return calculate_enc_scratch1_size(width, height, 16, num_ref, ten_bit, 911 num_vpp_pipes, false); 912 } 913 914 static u32 h265e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit, 915 u32 num_vpp_pipes) 916 { 917 return calculate_enc_scratch1_size(width, height, 32, num_ref, ten_bit, 918 num_vpp_pipes, true); 919 } 920 921 static u32 vp8e_scratch1_size(u32 width, u32 height, u32 num_ref, bool ten_bit, 922 u32 num_vpp_pipes) 923 { 924 return calculate_enc_scratch1_size(width, height, 16, num_ref, ten_bit, 925 1, false); 926 } 927 928 static u32 ubwc_metadata_plane_stride(u32 width, u32 metadata_stride_multi, 929 u32 tile_width_pels) 930 { 931 return ALIGN(((width + (tile_width_pels - 1)) / tile_width_pels), 932 metadata_stride_multi); 933 } 934 935 static u32 ubwc_metadata_plane_bufheight(u32 height, u32 metadata_height_multi, 936 u32 tile_height_pels) 937 { 938 return ALIGN(((height + (tile_height_pels - 1)) / tile_height_pels), 939 metadata_height_multi); 940 } 941 942 static u32 ubwc_metadata_plane_buffer_size(u32 metadata_stride, 943 u32 metadata_buf_height) 944 { 945 return ALIGN(metadata_stride * metadata_buf_height, SZ_4K); 946 } 947 948 static u32 enc_scratch2_size(u32 width, u32 height, u32 num_ref, bool ten_bit) 949 { 950 u32 aligned_width, aligned_height, chroma_height, ref_buf_height; 951 u32 luma_size, chroma_size; 952 u32 metadata_stride, meta_buf_height, meta_size_y, meta_size_c; 953 u32 ref_luma_stride_bytes, ref_chroma_height_bytes; 954 u32 ref_buf_size, ref_stride; 955 u32 size; 956 957 if (!ten_bit) { 958 aligned_height = ALIGN(height, HFI_VENUS_HEIGHT_ALIGNMENT); 959 chroma_height = height >> 1; 960 chroma_height = ALIGN(chroma_height, 961 HFI_VENUS_HEIGHT_ALIGNMENT); 962 aligned_width = ALIGN(width, HFI_VENUS_WIDTH_ALIGNMENT); 963 metadata_stride = 964 ubwc_metadata_plane_stride(width, 64, 965 NV12_UBWC_Y_TILE_WIDTH); 966 meta_buf_height = 967 ubwc_metadata_plane_bufheight(height, 16, 968 NV12_UBWC_Y_TILE_HEIGHT); 969 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride, 970 meta_buf_height); 971 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride, 972 meta_buf_height); 973 size = (aligned_height + chroma_height) * aligned_width + 974 meta_size_y + meta_size_c; 975 size = (size * (num_ref + 3)) + 4096; 976 } else { 977 ref_buf_height = (height + (HFI_VENUS_HEIGHT_ALIGNMENT - 1)) 978 & (~(HFI_VENUS_HEIGHT_ALIGNMENT - 1)); 979 ref_luma_stride_bytes = 980 ((width + SYSTEM_LAL_TILE10 - 1) / SYSTEM_LAL_TILE10) * 981 SYSTEM_LAL_TILE10; 982 ref_stride = 4 * (ref_luma_stride_bytes / 3); 983 ref_stride = (ref_stride + (128 - 1)) & (~(128 - 1)); 984 luma_size = ref_buf_height * ref_stride; 985 ref_chroma_height_bytes = (((height + 1) >> 1) + 986 (32 - 1)) & (~(32 - 1)); 987 chroma_size = ref_stride * ref_chroma_height_bytes; 988 luma_size = (luma_size + (SZ_4K - 1)) & (~(SZ_4K - 1)); 989 chroma_size = (chroma_size + (SZ_4K - 1)) & (~(SZ_4K - 1)); 990 ref_buf_size = luma_size + chroma_size; 991 metadata_stride = 992 ubwc_metadata_plane_stride(width, 993 METADATA_STRIDE_MULTIPLE, 994 TP10_UBWC_Y_TILE_WIDTH); 995 meta_buf_height = 996 ubwc_metadata_plane_bufheight(height, 997 METADATA_HEIGHT_MULTIPLE, 998 TP10_UBWC_Y_TILE_HEIGHT); 999 meta_size_y = ubwc_metadata_plane_buffer_size(metadata_stride, 1000 meta_buf_height); 1001 meta_size_c = ubwc_metadata_plane_buffer_size(metadata_stride, 1002 meta_buf_height); 1003 size = ref_buf_size + meta_size_y + meta_size_c; 1004 size = (size * (num_ref + 3)) + 4096; 1005 } 1006 1007 return size; 1008 } 1009 1010 static u32 enc_persist_size(void) 1011 { 1012 return HFI_IRIS2_ENC_PERSIST_SIZE; 1013 } 1014 1015 static u32 h264d_persist1_size(void) 1016 { 1017 return ALIGN((SIZE_SLIST_BUF_H264 * NUM_SLIST_BUF_H264 1018 + NUM_HW_PIC_BUF * SIZE_SEI_USERDATA), HFI_DMA_ALIGNMENT); 1019 } 1020 1021 static u32 h265d_persist1_size(void) 1022 { 1023 return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 + H265_NUM_TILE 1024 * sizeof(u32)), HFI_DMA_ALIGNMENT); 1025 } 1026 1027 static u32 vp8d_persist1_size(void) 1028 { 1029 return ALIGN(VP8_NUM_PROBABILITY_TABLE_BUF * VP8_PROB_TABLE_SIZE, 1030 HFI_DMA_ALIGNMENT); 1031 } 1032 1033 static u32 vp9d_persist1_size(void) 1034 { 1035 return 1036 ALIGN(VP9_NUM_PROBABILITY_TABLE_BUF * VP9_PROB_TABLE_SIZE, 1037 HFI_DMA_ALIGNMENT) + 1038 ALIGN(HFI_IRIS2_VP9D_COMV_SIZE, HFI_DMA_ALIGNMENT) + 1039 ALIGN(MAX_SUPERFRAME_HEADER_LEN, HFI_DMA_ALIGNMENT) + 1040 ALIGN(VP9_UDC_HEADER_BUF_SIZE, HFI_DMA_ALIGNMENT) + 1041 ALIGN(VP9_NUM_FRAME_INFO_BUF * CCE_TILE_OFFSET_SIZE, 1042 HFI_DMA_ALIGNMENT); 1043 } 1044 1045 static u32 mpeg2d_persist1_size(void) 1046 { 1047 return QMATRIX_SIZE + MP2D_QPDUMP_SIZE; 1048 } 1049 1050 struct dec_bufsize_ops { 1051 u32 (*scratch)(u32 width, u32 height, bool is_interlaced); 1052 u32 (*scratch1)(u32 width, u32 height, u32 min_buf_count, 1053 bool split_mode_enabled, u32 num_vpp_pipes); 1054 u32 (*persist1)(void); 1055 }; 1056 1057 struct enc_bufsize_ops { 1058 u32 (*scratch)(u32 width, u32 height, u32 work_mode, u32 num_vpp_pipes, 1059 u32 rc_type); 1060 u32 (*scratch1)(u32 width, u32 height, u32 num_ref, bool ten_bit, 1061 u32 num_vpp_pipes); 1062 u32 (*scratch2)(u32 width, u32 height, u32 num_ref, bool ten_bit); 1063 u32 (*persist)(void); 1064 }; 1065 1066 static struct dec_bufsize_ops dec_h264_ops = { 1067 .scratch = h264d_scratch_size, 1068 .scratch1 = h264d_scratch1_size, 1069 .persist1 = h264d_persist1_size, 1070 }; 1071 1072 static struct dec_bufsize_ops dec_h265_ops = { 1073 .scratch = h265d_scratch_size, 1074 .scratch1 = h265d_scratch1_size, 1075 .persist1 = h265d_persist1_size, 1076 }; 1077 1078 static struct dec_bufsize_ops dec_vp8_ops = { 1079 .scratch = vpxd_scratch_size, 1080 .scratch1 = vp8d_scratch1_size, 1081 .persist1 = vp8d_persist1_size, 1082 }; 1083 1084 static struct dec_bufsize_ops dec_vp9_ops = { 1085 .scratch = vpxd_scratch_size, 1086 .scratch1 = vp9d_scratch1_size, 1087 .persist1 = vp9d_persist1_size, 1088 }; 1089 1090 static struct dec_bufsize_ops dec_mpeg2_ops = { 1091 .scratch = mpeg2d_scratch_size, 1092 .scratch1 = mpeg2d_scratch1_size, 1093 .persist1 = mpeg2d_persist1_size, 1094 }; 1095 1096 static struct enc_bufsize_ops enc_h264_ops = { 1097 .scratch = h264e_scratch_size, 1098 .scratch1 = h264e_scratch1_size, 1099 .scratch2 = enc_scratch2_size, 1100 .persist = enc_persist_size, 1101 }; 1102 1103 static struct enc_bufsize_ops enc_h265_ops = { 1104 .scratch = h265e_scratch_size, 1105 .scratch1 = h265e_scratch1_size, 1106 .scratch2 = enc_scratch2_size, 1107 .persist = enc_persist_size, 1108 }; 1109 1110 static struct enc_bufsize_ops enc_vp8_ops = { 1111 .scratch = vp8e_scratch_size, 1112 .scratch1 = vp8e_scratch1_size, 1113 .scratch2 = enc_scratch2_size, 1114 .persist = enc_persist_size, 1115 }; 1116 1117 static u32 1118 calculate_dec_input_frame_size(u32 width, u32 height, u32 codec, 1119 u32 max_mbs_per_frame, u32 buffer_size_limit) 1120 { 1121 u32 frame_size, num_mbs; 1122 u32 div_factor = 1; 1123 u32 base_res_mbs = NUM_MBS_4K; 1124 1125 /* 1126 * Decoder input size calculation: 1127 * If clip is 8k buffer size is calculated for 8k : 8k mbs/4 1128 * For 8k cases we expect width/height to be set always. 1129 * In all other cases size is calculated for 4k: 1130 * 4k mbs for VP8/VP9 and 4k/2 for remaining codecs 1131 */ 1132 num_mbs = (ALIGN(height, 16) * ALIGN(width, 16)) / 256; 1133 if (num_mbs > NUM_MBS_4K) { 1134 div_factor = 4; 1135 base_res_mbs = max_mbs_per_frame; 1136 } else { 1137 base_res_mbs = NUM_MBS_4K; 1138 if (codec == V4L2_PIX_FMT_VP9) 1139 div_factor = 1; 1140 else 1141 div_factor = 2; 1142 } 1143 1144 frame_size = base_res_mbs * MB_SIZE_IN_PIXEL * 3 / 2 / div_factor; 1145 1146 /* multiply by 10/8 (1.25) to get size for 10 bit case */ 1147 if (codec == V4L2_PIX_FMT_VP9 || codec == V4L2_PIX_FMT_HEVC) 1148 frame_size = frame_size + (frame_size >> 2); 1149 1150 if (buffer_size_limit && buffer_size_limit < frame_size) 1151 frame_size = buffer_size_limit; 1152 1153 return ALIGN(frame_size, SZ_4K); 1154 } 1155 1156 static int output_buffer_count(u32 session_type, u32 codec) 1157 { 1158 u32 output_min_count; 1159 1160 if (session_type == VIDC_SESSION_TYPE_DEC) { 1161 switch (codec) { 1162 case V4L2_PIX_FMT_MPEG2: 1163 case V4L2_PIX_FMT_VP8: 1164 output_min_count = 6; 1165 break; 1166 case V4L2_PIX_FMT_VP9: 1167 output_min_count = 11; 1168 break; 1169 case V4L2_PIX_FMT_H264: 1170 case V4L2_PIX_FMT_HEVC: 1171 default: 1172 output_min_count = 18; 1173 break; 1174 } 1175 } else { 1176 output_min_count = MIN_ENC_OUTPUT_BUFFERS; 1177 } 1178 1179 return output_min_count; 1180 } 1181 1182 static int bufreq_dec(struct hfi_plat_buffers_params *params, u32 buftype, 1183 struct hfi_buffer_requirements *bufreq) 1184 { 1185 enum hfi_version version = params->version; 1186 u32 codec = params->codec; 1187 u32 width = params->width, height = params->height, out_min_count; 1188 struct dec_bufsize_ops *dec_ops; 1189 bool is_secondary_output = params->dec.is_secondary_output; 1190 bool is_interlaced = params->dec.is_interlaced; 1191 u32 max_mbs_per_frame = params->dec.max_mbs_per_frame; 1192 u32 buffer_size_limit = params->dec.buffer_size_limit; 1193 u32 num_vpp_pipes = params->num_vpp_pipes; 1194 1195 switch (codec) { 1196 case V4L2_PIX_FMT_H264: 1197 dec_ops = &dec_h264_ops; 1198 break; 1199 case V4L2_PIX_FMT_HEVC: 1200 dec_ops = &dec_h265_ops; 1201 break; 1202 case V4L2_PIX_FMT_VP8: 1203 dec_ops = &dec_vp8_ops; 1204 break; 1205 case V4L2_PIX_FMT_VP9: 1206 dec_ops = &dec_vp9_ops; 1207 break; 1208 case V4L2_PIX_FMT_MPEG2: 1209 dec_ops = &dec_mpeg2_ops; 1210 break; 1211 default: 1212 return -EINVAL; 1213 } 1214 1215 out_min_count = output_buffer_count(VIDC_SESSION_TYPE_DEC, codec); 1216 /* Max of driver and FW count */ 1217 out_min_count = max(out_min_count, bufreq->count_min); 1218 1219 bufreq->type = buftype; 1220 bufreq->region_size = 0; 1221 bufreq->count_min = 1; 1222 bufreq->count_actual = 1; 1223 bufreq->hold_count = 1; 1224 bufreq->contiguous = 1; 1225 bufreq->alignment = 256; 1226 1227 if (buftype == HFI_BUFFER_INPUT) { 1228 bufreq->count_min = MIN_INPUT_BUFFERS; 1229 bufreq->size = 1230 calculate_dec_input_frame_size(width, height, codec, 1231 max_mbs_per_frame, 1232 buffer_size_limit); 1233 } else if (buftype == HFI_BUFFER_OUTPUT || 1234 buftype == HFI_BUFFER_OUTPUT2) { 1235 bufreq->count_min = out_min_count; 1236 bufreq->size = 1237 venus_helper_get_framesz_raw(params->hfi_color_fmt, 1238 width, height); 1239 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH(version)) { 1240 bufreq->size = dec_ops->scratch(width, height, is_interlaced); 1241 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_1(version)) { 1242 bufreq->size = dec_ops->scratch1(width, height, VB2_MAX_FRAME, 1243 is_secondary_output, 1244 num_vpp_pipes); 1245 } else if (buftype == HFI_BUFFER_INTERNAL_PERSIST_1) { 1246 bufreq->size = dec_ops->persist1(); 1247 } else { 1248 bufreq->size = 0; 1249 } 1250 1251 return 0; 1252 } 1253 1254 static int bufreq_enc(struct hfi_plat_buffers_params *params, u32 buftype, 1255 struct hfi_buffer_requirements *bufreq) 1256 { 1257 enum hfi_version version = params->version; 1258 struct enc_bufsize_ops *enc_ops; 1259 u32 width = params->width; 1260 u32 height = params->height; 1261 bool is_tenbit = params->enc.is_tenbit; 1262 u32 num_bframes = params->enc.num_b_frames; 1263 u32 codec = params->codec; 1264 u32 work_mode = params->enc.work_mode; 1265 u32 rc_type = params->enc.rc_type; 1266 u32 num_vpp_pipes = params->num_vpp_pipes; 1267 u32 num_ref; 1268 1269 switch (codec) { 1270 case V4L2_PIX_FMT_H264: 1271 enc_ops = &enc_h264_ops; 1272 break; 1273 case V4L2_PIX_FMT_HEVC: 1274 enc_ops = &enc_h265_ops; 1275 break; 1276 case V4L2_PIX_FMT_VP8: 1277 enc_ops = &enc_vp8_ops; 1278 break; 1279 default: 1280 return -EINVAL; 1281 } 1282 1283 num_ref = num_bframes > 0 ? num_bframes + 1 : 1; 1284 1285 bufreq->type = buftype; 1286 bufreq->region_size = 0; 1287 bufreq->count_min = 1; 1288 bufreq->count_actual = 1; 1289 bufreq->hold_count = 1; 1290 bufreq->contiguous = 1; 1291 bufreq->alignment = 256; 1292 1293 if (buftype == HFI_BUFFER_INPUT) { 1294 bufreq->count_min = MIN_INPUT_BUFFERS; 1295 bufreq->size = 1296 venus_helper_get_framesz_raw(params->hfi_color_fmt, 1297 width, height); 1298 } else if (buftype == HFI_BUFFER_OUTPUT || 1299 buftype == HFI_BUFFER_OUTPUT2) { 1300 bufreq->count_min = 1301 output_buffer_count(VIDC_SESSION_TYPE_ENC, codec); 1302 bufreq->size = calculate_enc_output_frame_size(width, height, 1303 rc_type); 1304 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH(version)) { 1305 bufreq->size = enc_ops->scratch(width, height, work_mode, 1306 num_vpp_pipes, rc_type); 1307 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_1(version)) { 1308 bufreq->size = enc_ops->scratch1(width, height, num_ref, 1309 is_tenbit, num_vpp_pipes); 1310 } else if (buftype == HFI_BUFFER_INTERNAL_SCRATCH_2(version)) { 1311 bufreq->size = enc_ops->scratch2(width, height, num_ref, 1312 is_tenbit); 1313 } else if (buftype == HFI_BUFFER_INTERNAL_PERSIST) { 1314 bufreq->size = enc_ops->persist(); 1315 } else { 1316 bufreq->size = 0; 1317 } 1318 1319 return 0; 1320 } 1321 1322 int hfi_plat_bufreq_v6(struct hfi_plat_buffers_params *params, u32 session_type, 1323 u32 buftype, struct hfi_buffer_requirements *bufreq) 1324 { 1325 if (session_type == VIDC_SESSION_TYPE_DEC) 1326 return bufreq_dec(params, buftype, bufreq); 1327 else 1328 return bufreq_enc(params, buftype, bufreq); 1329 } 1330