1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2023 MediaTek Inc. 4 * Author: Xiaoyong Lu <xiaoyong.lu@mediatek.com> 5 */ 6 7 #include <linux/module.h> 8 #include <linux/slab.h> 9 #include <media/videobuf2-dma-contig.h> 10 11 #include "../mtk_vcodec_dec.h" 12 #include "../../common/mtk_vcodec_intr.h" 13 #include "../vdec_drv_base.h" 14 #include "../vdec_drv_if.h" 15 #include "../vdec_vpu_if.h" 16 17 #define AV1_MAX_FRAME_BUF_COUNT (V4L2_AV1_TOTAL_REFS_PER_FRAME + 1) 18 #define AV1_TILE_BUF_SIZE 64 19 #define AV1_SCALE_SUBPEL_BITS 10 20 #define AV1_REF_SCALE_SHIFT 14 21 #define AV1_REF_NO_SCALE BIT(AV1_REF_SCALE_SHIFT) 22 #define AV1_REF_INVALID_SCALE -1 23 #define AV1_CDF_TABLE_BUFFER_SIZE 16384 24 #define AV1_PRIMARY_REF_NONE 7 25 26 #define AV1_INVALID_IDX -1 27 28 #define AV1_DIV_ROUND_UP_POW2(value, n) \ 29 ({ \ 30 typeof(n) _n = n; \ 31 typeof(value) _value = value; \ 32 (_value + (BIT(_n) >> 1)) >> _n; \ 33 }) 34 35 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \ 36 ({ \ 37 typeof(n) _n_ = n; \ 38 typeof(value) _value_ = value; \ 39 (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \ 40 : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \ 41 }) 42 43 #define BIT_FLAG(x, bit) (!!((x)->flags & (bit))) 44 #define SEGMENTATION_FLAG(x, name) (!!((x)->flags & V4L2_AV1_SEGMENTATION_FLAG_##name)) 45 #define QUANT_FLAG(x, name) (!!((x)->flags & V4L2_AV1_QUANTIZATION_FLAG_##name)) 46 #define SEQUENCE_FLAG(x, name) (!!((x)->flags & V4L2_AV1_SEQUENCE_FLAG_##name)) 47 #define FH_FLAG(x, name) (!!((x)->flags & V4L2_AV1_FRAME_FLAG_##name)) 48 49 #define MINQ 0 50 #define MAXQ 255 51 52 #define DIV_LUT_PREC_BITS 14 53 #define DIV_LUT_BITS 8 54 #define DIV_LUT_NUM BIT(DIV_LUT_BITS) 55 #define WARP_PARAM_REDUCE_BITS 6 56 #define WARPEDMODEL_PREC_BITS 16 57 58 #define SEG_LVL_ALT_Q 0 59 #define SECONDARY_FILTER_STRENGTH_NUM_BITS 2 60 61 static const short div_lut[DIV_LUT_NUM + 1] = { 62 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768, 63 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142, 64 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564, 65 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028, 66 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530, 67 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066, 68 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633, 69 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228, 70 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848, 71 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491, 72 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155, 73 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838, 74 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538, 75 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255, 76 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986, 77 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732, 78 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489, 79 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259, 80 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039, 81 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830, 82 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630, 83 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439, 84 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257, 85 8240, 8224, 8208, 8192, 86 }; 87 88 /** 89 * struct vdec_av1_slice_init_vsi - VSI used to initialize instance 90 * @architecture: architecture type 91 * @reserved: reserved 92 * @core_vsi: for core vsi 93 * @cdf_table_addr: cdf table addr 94 * @cdf_table_size: cdf table size 95 * @iq_table_addr: iq table addr 96 * @iq_table_size: iq table size 97 * @vsi_size: share vsi structure size 98 */ 99 struct vdec_av1_slice_init_vsi { 100 u32 architecture; 101 u32 reserved; 102 u64 core_vsi; 103 u64 cdf_table_addr; 104 u32 cdf_table_size; 105 u64 iq_table_addr; 106 u32 iq_table_size; 107 u32 vsi_size; 108 }; 109 110 /** 111 * struct vdec_av1_slice_mem - memory address and size 112 * @buf: dma_addr padding 113 * @dma_addr: buffer address 114 * @size: buffer size 115 * @dma_addr_end: buffer end address 116 * @padding: for padding 117 */ 118 struct vdec_av1_slice_mem { 119 union { 120 u64 buf; 121 dma_addr_t dma_addr; 122 }; 123 union { 124 size_t size; 125 dma_addr_t dma_addr_end; 126 u64 padding; 127 }; 128 }; 129 130 /** 131 * struct vdec_av1_slice_state - decoding state 132 * @err : err type for decode 133 * @full : transcoded buffer is full or not 134 * @timeout : decode timeout or not 135 * @perf : performance enable 136 * @crc : hw checksum 137 * @out_size : hw output size 138 */ 139 struct vdec_av1_slice_state { 140 int err; 141 u32 full; 142 u32 timeout; 143 u32 perf; 144 u32 crc[16]; 145 u32 out_size; 146 }; 147 148 /* 149 * enum vdec_av1_slice_resolution_level - resolution level 150 */ 151 enum vdec_av1_slice_resolution_level { 152 AV1_RES_NONE, 153 AV1_RES_FHD, 154 AV1_RES_4K, 155 AV1_RES_8K, 156 }; 157 158 /* 159 * enum vdec_av1_slice_frame_type - av1 frame type 160 */ 161 enum vdec_av1_slice_frame_type { 162 AV1_KEY_FRAME = 0, 163 AV1_INTER_FRAME, 164 AV1_INTRA_ONLY_FRAME, 165 AV1_SWITCH_FRAME, 166 AV1_FRAME_TYPES, 167 }; 168 169 /* 170 * enum vdec_av1_slice_reference_mode - reference mode type 171 */ 172 enum vdec_av1_slice_reference_mode { 173 AV1_SINGLE_REFERENCE = 0, 174 AV1_COMPOUND_REFERENCE, 175 AV1_REFERENCE_MODE_SELECT, 176 AV1_REFERENCE_MODES, 177 }; 178 179 /** 180 * struct vdec_av1_slice_tile_group - info for each tile 181 * @num_tiles: tile number 182 * @tile_size: input size for each tile 183 * @tile_start_offset: tile offset to input buffer 184 */ 185 struct vdec_av1_slice_tile_group { 186 u32 num_tiles; 187 u32 tile_size[V4L2_AV1_MAX_TILE_COUNT]; 188 u32 tile_start_offset[V4L2_AV1_MAX_TILE_COUNT]; 189 }; 190 191 /** 192 * struct vdec_av1_slice_scale_factors - scale info for each ref frame 193 * @is_scaled: frame is scaled or not 194 * @x_scale: frame width scale coefficient 195 * @y_scale: frame height scale coefficient 196 * @x_step: width step for x_scale 197 * @y_step: height step for y_scale 198 */ 199 struct vdec_av1_slice_scale_factors { 200 u8 is_scaled; 201 int x_scale; 202 int y_scale; 203 int x_step; 204 int y_step; 205 }; 206 207 /** 208 * struct vdec_av1_slice_frame_refs - ref frame info 209 * @ref_fb_idx: ref slot index 210 * @ref_map_idx: ref frame index 211 * @scale_factors: scale factors for each ref frame 212 */ 213 struct vdec_av1_slice_frame_refs { 214 int ref_fb_idx; 215 int ref_map_idx; 216 struct vdec_av1_slice_scale_factors scale_factors; 217 }; 218 219 /** 220 * struct vdec_av1_slice_gm - AV1 Global Motion parameters 221 * @wmtype: The type of global motion transform used 222 * @wmmat: gm_params 223 * @alpha: alpha info 224 * @beta: beta info 225 * @gamma: gamma info 226 * @delta: delta info 227 * @invalid: is invalid or not 228 */ 229 struct vdec_av1_slice_gm { 230 int wmtype; 231 int wmmat[8]; 232 short alpha; 233 short beta; 234 short gamma; 235 short delta; 236 char invalid; 237 }; 238 239 /** 240 * struct vdec_av1_slice_sm - AV1 Skip Mode parameters 241 * @skip_mode_allowed: Skip Mode is allowed or not 242 * @skip_mode_present: specified that the skip_mode will be present or not 243 * @skip_mode_frame: specifies the frames to use for compound prediction 244 */ 245 struct vdec_av1_slice_sm { 246 u8 skip_mode_allowed; 247 u8 skip_mode_present; 248 int skip_mode_frame[2]; 249 }; 250 251 /** 252 * struct vdec_av1_slice_seg - AV1 Segmentation params 253 * @segmentation_enabled: this frame makes use of the segmentation tool or not 254 * @segmentation_update_map: segmentation map are updated during the decoding frame 255 * @segmentation_temporal_update:segmentation map are coded relative the existing segmentaion map 256 * @segmentation_update_data: new parameters are about to be specified for each segment 257 * @feature_data: specifies the feature data for a segment feature 258 * @feature_enabled_mask: the corresponding feature value is coded or not. 259 * @segid_preskip: segment id will be read before the skip syntax element. 260 * @last_active_segid: the highest numbered segment id that has some enabled feature 261 */ 262 struct vdec_av1_slice_seg { 263 u8 segmentation_enabled; 264 u8 segmentation_update_map; 265 u8 segmentation_temporal_update; 266 u8 segmentation_update_data; 267 int feature_data[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX]; 268 u16 feature_enabled_mask[V4L2_AV1_MAX_SEGMENTS]; 269 int segid_preskip; 270 int last_active_segid; 271 }; 272 273 /** 274 * struct vdec_av1_slice_delta_q_lf - AV1 Loop Filter delta parameters 275 * @delta_q_present: specified whether quantizer index delta values are present 276 * @delta_q_res: specifies the left shift which should be applied to decoded quantizer index 277 * @delta_lf_present: specifies whether loop filter delta values are present 278 * @delta_lf_res: specifies the left shift which should be applied to decoded 279 * loop filter delta values 280 * @delta_lf_multi: specifies that separate loop filter deltas are sent for horizontal 281 * luma edges,vertical luma edges,the u edges, and the v edges. 282 */ 283 struct vdec_av1_slice_delta_q_lf { 284 u8 delta_q_present; 285 u8 delta_q_res; 286 u8 delta_lf_present; 287 u8 delta_lf_res; 288 u8 delta_lf_multi; 289 }; 290 291 /** 292 * struct vdec_av1_slice_quantization - AV1 Quantization params 293 * @base_q_idx: indicates the base frame qindex. This is used for Y AC 294 * coefficients and as the base value for the other quantizers. 295 * @qindex: qindex 296 * @delta_qydc: indicates the Y DC quantizer relative to base_q_idx 297 * @delta_qudc: indicates the U DC quantizer relative to base_q_idx. 298 * @delta_quac: indicates the U AC quantizer relative to base_q_idx 299 * @delta_qvdc: indicates the V DC quantizer relative to base_q_idx 300 * @delta_qvac: indicates the V AC quantizer relative to base_q_idx 301 * @using_qmatrix: specifies that the quantizer matrix will be used to 302 * compute quantizers 303 * @qm_y: specifies the level in the quantizer matrix that should 304 * be used for luma plane decoding 305 * @qm_u: specifies the level in the quantizer matrix that should 306 * be used for chroma U plane decoding. 307 * @qm_v: specifies the level in the quantizer matrix that should be 308 * used for chroma V plane decoding 309 */ 310 struct vdec_av1_slice_quantization { 311 int base_q_idx; 312 int qindex[V4L2_AV1_MAX_SEGMENTS]; 313 int delta_qydc; 314 int delta_qudc; 315 int delta_quac; 316 int delta_qvdc; 317 int delta_qvac; 318 u8 using_qmatrix; 319 u8 qm_y; 320 u8 qm_u; 321 u8 qm_v; 322 }; 323 324 /** 325 * struct vdec_av1_slice_lr - AV1 Loop Restauration parameters 326 * @use_lr: whether to use loop restoration 327 * @use_chroma_lr: whether to use chroma loop restoration 328 * @frame_restoration_type: specifies the type of restoration used for each plane 329 * @loop_restoration_size: pecifies the size of loop restoration units in units 330 * of samples in the current plane 331 */ 332 struct vdec_av1_slice_lr { 333 u8 use_lr; 334 u8 use_chroma_lr; 335 u8 frame_restoration_type[V4L2_AV1_NUM_PLANES_MAX]; 336 u32 loop_restoration_size[V4L2_AV1_NUM_PLANES_MAX]; 337 }; 338 339 /** 340 * struct vdec_av1_slice_loop_filter - AV1 Loop filter parameters 341 * @loop_filter_level: an array containing loop filter strength values. 342 * @loop_filter_ref_deltas: contains the adjustment needed for the filter 343 * level based on the chosen reference frame 344 * @loop_filter_mode_deltas: contains the adjustment needed for the filter 345 * level based on the chosen mode 346 * @loop_filter_sharpness: indicates the sharpness level. The loop_filter_level 347 * and loop_filter_sharpness together determine when 348 * a block edge is filtered, and by how much the 349 * filtering can change the sample values 350 * @loop_filter_delta_enabled: filetr level depends on the mode and reference 351 * frame used to predict a block 352 */ 353 struct vdec_av1_slice_loop_filter { 354 u8 loop_filter_level[4]; 355 int loop_filter_ref_deltas[V4L2_AV1_TOTAL_REFS_PER_FRAME]; 356 int loop_filter_mode_deltas[2]; 357 u8 loop_filter_sharpness; 358 u8 loop_filter_delta_enabled; 359 }; 360 361 /** 362 * struct vdec_av1_slice_cdef - AV1 CDEF parameters 363 * @cdef_damping: controls the amount of damping in the deringing filter 364 * @cdef_y_strength: specifies the strength of the primary filter and secondary filter 365 * @cdef_uv_strength: specifies the strength of the primary filter and secondary filter 366 * @cdef_bits: specifies the number of bits needed to specify which 367 * CDEF filter to apply 368 */ 369 struct vdec_av1_slice_cdef { 370 u8 cdef_damping; 371 u8 cdef_y_strength[8]; 372 u8 cdef_uv_strength[8]; 373 u8 cdef_bits; 374 }; 375 376 /** 377 * struct vdec_av1_slice_mfmv - AV1 mfmv parameters 378 * @mfmv_valid_ref: mfmv_valid_ref 379 * @mfmv_dir: mfmv_dir 380 * @mfmv_ref_to_cur: mfmv_ref_to_cur 381 * @mfmv_ref_frame_idx: mfmv_ref_frame_idx 382 * @mfmv_count: mfmv_count 383 */ 384 struct vdec_av1_slice_mfmv { 385 u32 mfmv_valid_ref[3]; 386 u32 mfmv_dir[3]; 387 int mfmv_ref_to_cur[3]; 388 int mfmv_ref_frame_idx[3]; 389 int mfmv_count; 390 }; 391 392 /** 393 * struct vdec_av1_slice_tile - AV1 Tile info 394 * @tile_cols: specifies the number of tiles across the frame 395 * @tile_rows: pecifies the number of tiles down the frame 396 * @mi_col_starts: an array specifying the start column 397 * @mi_row_starts: an array specifying the start row 398 * @context_update_tile_id: specifies which tile to use for the CDF update 399 * @uniform_tile_spacing_flag: tiles are uniformly spaced across the frame 400 * or the tile sizes are coded 401 */ 402 struct vdec_av1_slice_tile { 403 u8 tile_cols; 404 u8 tile_rows; 405 int mi_col_starts[V4L2_AV1_MAX_TILE_COLS + 1]; 406 int mi_row_starts[V4L2_AV1_MAX_TILE_ROWS + 1]; 407 u8 context_update_tile_id; 408 u8 uniform_tile_spacing_flag; 409 }; 410 411 /** 412 * struct vdec_av1_slice_uncompressed_header - Represents an AV1 Frame Header OBU 413 * @use_ref_frame_mvs: use_ref_frame_mvs flag 414 * @order_hint: specifies OrderHintBits least significant bits of the expected 415 * @gm: global motion param 416 * @upscaled_width: the upscaled width 417 * @frame_width: frame's width 418 * @frame_height: frame's height 419 * @reduced_tx_set: frame is restricted to a reduced subset of the full 420 * set of transform types 421 * @tx_mode: specifies how the transform size is determined 422 * @uniform_tile_spacing_flag: tiles are uniformly spaced across the frame 423 * or the tile sizes are coded 424 * @interpolation_filter: specifies the filter selection used for performing inter prediction 425 * @allow_warped_motion: motion_mode may be present or not 426 * @is_motion_mode_switchable : euqlt to 0 specifies that only the SIMPLE motion mode will be used 427 * @reference_mode : frame reference mode selected 428 * @allow_high_precision_mv: specifies that motion vectors are specified to 429 * quarter pel precision or to eighth pel precision 430 * @allow_intra_bc: ubducates that intra block copy may be used in this frame 431 * @force_integer_mv: specifies motion vectors will always be integers or 432 * can contain fractional bits 433 * @allow_screen_content_tools: intra blocks may use palette encoding 434 * @error_resilient_mode: error resislent mode is enable/disable 435 * @frame_type: specifies the AV1 frame type 436 * @primary_ref_frame: specifies which reference frame contains the CDF values 437 * and other state that should be loaded at the start of the frame 438 * slots will be updated with the current frame after it is decoded 439 * @disable_frame_end_update_cdf:indicates the end of frame CDF update is disable or enable 440 * @disable_cdf_update: specified whether the CDF update in the symbol 441 * decoding process should be disables 442 * @skip_mode: av1 skip mode parameters 443 * @seg: av1 segmentaon parameters 444 * @delta_q_lf: av1 delta loop fileter 445 * @quant: av1 Quantization params 446 * @lr: av1 Loop Restauration parameters 447 * @superres_denom: the denominator for the upscaling ratio 448 * @loop_filter: av1 Loop filter parameters 449 * @cdef: av1 CDEF parameters 450 * @mfmv: av1 mfmv parameters 451 * @tile: av1 Tile info 452 * @frame_is_intra: intra frame 453 * @loss_less_array: loss less array 454 * @coded_loss_less: coded lsss less 455 * @mi_rows: size of mi unit in rows 456 * @mi_cols: size of mi unit in cols 457 */ 458 struct vdec_av1_slice_uncompressed_header { 459 u8 use_ref_frame_mvs; 460 int order_hint; 461 struct vdec_av1_slice_gm gm[V4L2_AV1_TOTAL_REFS_PER_FRAME]; 462 u32 upscaled_width; 463 u32 frame_width; 464 u32 frame_height; 465 u8 reduced_tx_set; 466 u8 tx_mode; 467 u8 uniform_tile_spacing_flag; 468 u8 interpolation_filter; 469 u8 allow_warped_motion; 470 u8 is_motion_mode_switchable; 471 u8 reference_mode; 472 u8 allow_high_precision_mv; 473 u8 allow_intra_bc; 474 u8 force_integer_mv; 475 u8 allow_screen_content_tools; 476 u8 error_resilient_mode; 477 u8 frame_type; 478 u8 primary_ref_frame; 479 u8 disable_frame_end_update_cdf; 480 u32 disable_cdf_update; 481 struct vdec_av1_slice_sm skip_mode; 482 struct vdec_av1_slice_seg seg; 483 struct vdec_av1_slice_delta_q_lf delta_q_lf; 484 struct vdec_av1_slice_quantization quant; 485 struct vdec_av1_slice_lr lr; 486 u32 superres_denom; 487 struct vdec_av1_slice_loop_filter loop_filter; 488 struct vdec_av1_slice_cdef cdef; 489 struct vdec_av1_slice_mfmv mfmv; 490 struct vdec_av1_slice_tile tile; 491 u8 frame_is_intra; 492 u8 loss_less_array[V4L2_AV1_MAX_SEGMENTS]; 493 u8 coded_loss_less; 494 u32 mi_rows; 495 u32 mi_cols; 496 }; 497 498 /** 499 * struct vdec_av1_slice_seq_header - Represents an AV1 Sequence OBU 500 * @bitdepth: the bitdepth to use for the sequence 501 * @enable_superres: specifies whether the use_superres syntax element may be present 502 * @enable_filter_intra: specifies the use_filter_intra syntax element may be present 503 * @enable_intra_edge_filter: whether the intra edge filtering process should be enabled 504 * @enable_interintra_compound: specifies the mode info fo rinter blocks may 505 * contain the syntax element interintra 506 * @enable_masked_compound: specifies the mode info fo rinter blocks may 507 * contain the syntax element compound_type 508 * @enable_dual_filter: the inter prediction filter type may be specified independently 509 * @enable_jnt_comp: distance weights process may be used for inter prediction 510 * @mono_chrome: indicates the video does not contain U and V color planes 511 * @enable_order_hint: tools based on the values of order hints may be used 512 * @order_hint_bits: the number of bits used for the order_hint field at each frame 513 * @use_128x128_superblock: indicates superblocks contain 128*128 luma samples 514 * @subsampling_x: the chroma subsamling format 515 * @subsampling_y: the chroma subsamling format 516 * @max_frame_width: the maximum frame width for the frames represented by sequence 517 * @max_frame_height: the maximum frame height for the frames represented by sequence 518 */ 519 struct vdec_av1_slice_seq_header { 520 u8 bitdepth; 521 u8 enable_superres; 522 u8 enable_filter_intra; 523 u8 enable_intra_edge_filter; 524 u8 enable_interintra_compound; 525 u8 enable_masked_compound; 526 u8 enable_dual_filter; 527 u8 enable_jnt_comp; 528 u8 mono_chrome; 529 u8 enable_order_hint; 530 u8 order_hint_bits; 531 u8 use_128x128_superblock; 532 u8 subsampling_x; 533 u8 subsampling_y; 534 u32 max_frame_width; 535 u32 max_frame_height; 536 }; 537 538 /** 539 * struct vdec_av1_slice_frame - Represents current Frame info 540 * @uh: uncompressed header info 541 * @seq: sequence header info 542 * @large_scale_tile: is large scale mode 543 * @cur_ts: current frame timestamp 544 * @prev_fb_idx: prev slot id 545 * @ref_frame_sign_bias: arrays for ref_frame sign bias 546 * @order_hints: arrays for ref_frame order hint 547 * @ref_frame_valid: arrays for valid ref_frame 548 * @ref_frame_map: map to slot frame info 549 * @frame_refs: ref_frame info 550 */ 551 struct vdec_av1_slice_frame { 552 struct vdec_av1_slice_uncompressed_header uh; 553 struct vdec_av1_slice_seq_header seq; 554 u8 large_scale_tile; 555 u64 cur_ts; 556 int prev_fb_idx; 557 u8 ref_frame_sign_bias[V4L2_AV1_TOTAL_REFS_PER_FRAME]; 558 u32 order_hints[V4L2_AV1_REFS_PER_FRAME]; 559 u32 ref_frame_valid[V4L2_AV1_REFS_PER_FRAME]; 560 int ref_frame_map[V4L2_AV1_TOTAL_REFS_PER_FRAME]; 561 struct vdec_av1_slice_frame_refs frame_refs[V4L2_AV1_REFS_PER_FRAME]; 562 }; 563 564 /** 565 * struct vdec_av1_slice_work_buffer - work buffer for lat 566 * @mv_addr: mv buffer memory info 567 * @cdf_addr: cdf buffer memory info 568 * @segid_addr: segid buffer memory info 569 */ 570 struct vdec_av1_slice_work_buffer { 571 struct vdec_av1_slice_mem mv_addr; 572 struct vdec_av1_slice_mem cdf_addr; 573 struct vdec_av1_slice_mem segid_addr; 574 }; 575 576 /** 577 * struct vdec_av1_slice_frame_info - frame info for each slot 578 * @frame_type: frame type 579 * @frame_is_intra: is intra frame 580 * @order_hint: order hint 581 * @order_hints: referece frame order hint 582 * @upscaled_width: upscale width 583 * @pic_pitch: buffer pitch 584 * @frame_width: frane width 585 * @frame_height: frame height 586 * @mi_rows: rows in mode info 587 * @mi_cols: cols in mode info 588 * @ref_count: mark to reference frame counts 589 */ 590 struct vdec_av1_slice_frame_info { 591 u8 frame_type; 592 u8 frame_is_intra; 593 int order_hint; 594 u32 order_hints[V4L2_AV1_REFS_PER_FRAME]; 595 u32 upscaled_width; 596 u32 pic_pitch; 597 u32 frame_width; 598 u32 frame_height; 599 u32 mi_rows; 600 u32 mi_cols; 601 int ref_count; 602 }; 603 604 /** 605 * struct vdec_av1_slice_slot - slot info that needs to be saved in the global instance 606 * @frame_info: frame info for each slot 607 * @timestamp: time stamp info 608 */ 609 struct vdec_av1_slice_slot { 610 struct vdec_av1_slice_frame_info frame_info[AV1_MAX_FRAME_BUF_COUNT]; 611 u64 timestamp[AV1_MAX_FRAME_BUF_COUNT]; 612 }; 613 614 /** 615 * struct vdec_av1_slice_fb - frame buffer for decoding 616 * @y: current y buffer address info 617 * @c: current c buffer address info 618 */ 619 struct vdec_av1_slice_fb { 620 struct vdec_av1_slice_mem y; 621 struct vdec_av1_slice_mem c; 622 }; 623 624 /** 625 * struct vdec_av1_slice_vsi - exchange frame information between Main CPU and MicroP 626 * @bs: input buffer info 627 * @work_buffer: working buffe for hw 628 * @cdf_table: cdf_table buffer 629 * @cdf_tmp: cdf temp buffer 630 * @rd_mv: mv buffer for lat output , core input 631 * @ube: ube buffer 632 * @trans: transcoded buffer 633 * @err_map: err map buffer 634 * @row_info: row info buffer 635 * @fb: current y/c buffer 636 * @ref: ref y/c buffer 637 * @iq_table: iq table buffer 638 * @tile: tile buffer 639 * @slots: slots info for each frame 640 * @slot_id: current frame slot id 641 * @frame: current frame info 642 * @state: status after decode done 643 * @cur_lst_tile_id: tile id for large scale 644 */ 645 struct vdec_av1_slice_vsi { 646 /* lat */ 647 struct vdec_av1_slice_mem bs; 648 struct vdec_av1_slice_work_buffer work_buffer[AV1_MAX_FRAME_BUF_COUNT]; 649 struct vdec_av1_slice_mem cdf_table; 650 struct vdec_av1_slice_mem cdf_tmp; 651 /* LAT stage's output, Core stage's input */ 652 struct vdec_av1_slice_mem rd_mv; 653 struct vdec_av1_slice_mem ube; 654 struct vdec_av1_slice_mem trans; 655 struct vdec_av1_slice_mem err_map; 656 struct vdec_av1_slice_mem row_info; 657 /* core */ 658 struct vdec_av1_slice_fb fb; 659 struct vdec_av1_slice_fb ref[V4L2_AV1_REFS_PER_FRAME]; 660 struct vdec_av1_slice_mem iq_table; 661 /* lat and core share*/ 662 struct vdec_av1_slice_mem tile; 663 struct vdec_av1_slice_slot slots; 664 s8 slot_id; 665 struct vdec_av1_slice_frame frame; 666 struct vdec_av1_slice_state state; 667 u32 cur_lst_tile_id; 668 }; 669 670 /** 671 * struct vdec_av1_slice_pfc - per-frame context that contains a local vsi. 672 * pass it from lat to core 673 * @vsi: local vsi. copy to/from remote vsi before/after decoding 674 * @ref_idx: reference buffer timestamp 675 * @seq: picture sequence 676 */ 677 struct vdec_av1_slice_pfc { 678 struct vdec_av1_slice_vsi vsi; 679 u64 ref_idx[V4L2_AV1_REFS_PER_FRAME]; 680 int seq; 681 }; 682 683 /** 684 * struct vdec_av1_slice_instance - represent one av1 instance 685 * @ctx: pointer to codec's context 686 * @vpu: VPU instance 687 * @iq_table: iq table buffer 688 * @cdf_table: cdf table buffer 689 * @mv: mv working buffer 690 * @cdf: cdf working buffer 691 * @seg: segmentation working buffer 692 * @cdf_temp: cdf temp buffer 693 * @tile: tile buffer 694 * @slots: slots info 695 * @tile_group: tile_group entry 696 * @level: level of current resolution 697 * @width: width of last picture 698 * @height: height of last picture 699 * @frame_type: frame_type of last picture 700 * @irq_enabled: irq to Main CPU or MicroP 701 * @inneracing_mode: is inneracing mode 702 * @init_vsi: vsi used for initialized AV1 instance 703 * @vsi: vsi used for decoding/flush ... 704 * @core_vsi: vsi used for Core stage 705 * @seq: global picture sequence 706 */ 707 struct vdec_av1_slice_instance { 708 struct mtk_vcodec_dec_ctx *ctx; 709 struct vdec_vpu_inst vpu; 710 711 struct mtk_vcodec_mem iq_table; 712 struct mtk_vcodec_mem cdf_table; 713 714 struct mtk_vcodec_mem mv[AV1_MAX_FRAME_BUF_COUNT]; 715 struct mtk_vcodec_mem cdf[AV1_MAX_FRAME_BUF_COUNT]; 716 struct mtk_vcodec_mem seg[AV1_MAX_FRAME_BUF_COUNT]; 717 struct mtk_vcodec_mem cdf_temp; 718 struct mtk_vcodec_mem tile; 719 struct vdec_av1_slice_slot slots; 720 struct vdec_av1_slice_tile_group tile_group; 721 722 /* for resolution change and get_pic_info */ 723 enum vdec_av1_slice_resolution_level level; 724 u32 width; 725 u32 height; 726 727 u32 frame_type; 728 u32 irq_enabled; 729 u32 inneracing_mode; 730 731 /* MicroP vsi */ 732 union { 733 struct vdec_av1_slice_init_vsi *init_vsi; 734 struct vdec_av1_slice_vsi *vsi; 735 }; 736 struct vdec_av1_slice_vsi *core_vsi; 737 int seq; 738 }; 739 740 static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf); 741 vdec_av1_slice_get_msb(u32 n)742 static inline int vdec_av1_slice_get_msb(u32 n) 743 { 744 if (n == 0) 745 return 0; 746 return 31 ^ __builtin_clz(n); 747 } 748 vdec_av1_slice_need_scale(u32 ref_width,u32 ref_height,u32 this_width,u32 this_height)749 static inline bool vdec_av1_slice_need_scale(u32 ref_width, u32 ref_height, 750 u32 this_width, u32 this_height) 751 { 752 return ((this_width << 1) >= ref_width) && 753 ((this_height << 1) >= ref_height) && 754 (this_width <= (ref_width << 4)) && 755 (this_height <= (ref_height << 4)); 756 } 757 vdec_av1_get_ctrl_ptr(struct mtk_vcodec_dec_ctx * ctx,int id)758 static void *vdec_av1_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id) 759 { 760 struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id); 761 762 if (!ctrl) 763 return ERR_PTR(-EINVAL); 764 765 return ctrl->p_cur.p; 766 } 767 vdec_av1_slice_init_cdf_table(struct vdec_av1_slice_instance * instance)768 static int vdec_av1_slice_init_cdf_table(struct vdec_av1_slice_instance *instance) 769 { 770 u8 *remote_cdf_table; 771 struct mtk_vcodec_dec_ctx *ctx; 772 struct vdec_av1_slice_init_vsi *vsi; 773 int ret; 774 775 ctx = instance->ctx; 776 vsi = instance->vpu.vsi; 777 remote_cdf_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, 778 (u32)vsi->cdf_table_addr); 779 if (IS_ERR(remote_cdf_table)) { 780 mtk_vdec_err(ctx, "failed to map cdf table\n"); 781 return PTR_ERR(remote_cdf_table); 782 } 783 784 mtk_vdec_debug(ctx, "map cdf table to 0x%p\n", remote_cdf_table); 785 786 if (instance->cdf_table.va) 787 mtk_vcodec_mem_free(ctx, &instance->cdf_table); 788 instance->cdf_table.size = vsi->cdf_table_size; 789 790 ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_table); 791 if (ret) 792 return ret; 793 794 memcpy(instance->cdf_table.va, remote_cdf_table, vsi->cdf_table_size); 795 796 return 0; 797 } 798 vdec_av1_slice_init_iq_table(struct vdec_av1_slice_instance * instance)799 static int vdec_av1_slice_init_iq_table(struct vdec_av1_slice_instance *instance) 800 { 801 u8 *remote_iq_table; 802 struct mtk_vcodec_dec_ctx *ctx; 803 struct vdec_av1_slice_init_vsi *vsi; 804 int ret; 805 806 ctx = instance->ctx; 807 vsi = instance->vpu.vsi; 808 remote_iq_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, 809 (u32)vsi->iq_table_addr); 810 if (IS_ERR(remote_iq_table)) { 811 mtk_vdec_err(ctx, "failed to map iq table\n"); 812 return PTR_ERR(remote_iq_table); 813 } 814 815 mtk_vdec_debug(ctx, "map iq table to 0x%p\n", remote_iq_table); 816 817 if (instance->iq_table.va) 818 mtk_vcodec_mem_free(ctx, &instance->iq_table); 819 instance->iq_table.size = vsi->iq_table_size; 820 821 ret = mtk_vcodec_mem_alloc(ctx, &instance->iq_table); 822 if (ret) 823 return ret; 824 825 memcpy(instance->iq_table.va, remote_iq_table, vsi->iq_table_size); 826 827 return 0; 828 } 829 vdec_av1_slice_get_new_slot(struct vdec_av1_slice_vsi * vsi)830 static int vdec_av1_slice_get_new_slot(struct vdec_av1_slice_vsi *vsi) 831 { 832 struct vdec_av1_slice_slot *slots = &vsi->slots; 833 int new_slot_idx = AV1_INVALID_IDX; 834 int i; 835 836 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) { 837 if (slots->frame_info[i].ref_count == 0) { 838 new_slot_idx = i; 839 break; 840 } 841 } 842 843 if (new_slot_idx != AV1_INVALID_IDX) { 844 slots->frame_info[new_slot_idx].ref_count++; 845 slots->timestamp[new_slot_idx] = vsi->frame.cur_ts; 846 } 847 848 return new_slot_idx; 849 } 850 vdec_av1_slice_clear_fb(struct vdec_av1_slice_frame_info * frame_info)851 static inline void vdec_av1_slice_clear_fb(struct vdec_av1_slice_frame_info *frame_info) 852 { 853 memset((void *)frame_info, 0, sizeof(struct vdec_av1_slice_frame_info)); 854 } 855 vdec_av1_slice_decrease_ref_count(struct vdec_av1_slice_slot * slots,int fb_idx)856 static void vdec_av1_slice_decrease_ref_count(struct vdec_av1_slice_slot *slots, int fb_idx) 857 { 858 struct vdec_av1_slice_frame_info *frame_info = slots->frame_info; 859 860 frame_info[fb_idx].ref_count--; 861 if (frame_info[fb_idx].ref_count < 0) { 862 frame_info[fb_idx].ref_count = 0; 863 pr_err(MTK_DBG_V4L2_STR "av1_error: %s() fb_idx %d decrease ref_count error\n", 864 __func__, fb_idx); 865 } 866 867 vdec_av1_slice_clear_fb(&frame_info[fb_idx]); 868 } 869 vdec_av1_slice_cleanup_slots(struct vdec_av1_slice_slot * slots,struct vdec_av1_slice_frame * frame,struct v4l2_ctrl_av1_frame * ctrl_fh)870 static void vdec_av1_slice_cleanup_slots(struct vdec_av1_slice_slot *slots, 871 struct vdec_av1_slice_frame *frame, 872 struct v4l2_ctrl_av1_frame *ctrl_fh) 873 { 874 int slot_id, ref_id; 875 876 for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) 877 frame->ref_frame_map[ref_id] = AV1_INVALID_IDX; 878 879 for (slot_id = 0; slot_id < AV1_MAX_FRAME_BUF_COUNT; slot_id++) { 880 u64 timestamp = slots->timestamp[slot_id]; 881 bool ref_used = false; 882 883 /* ignored unused slots */ 884 if (slots->frame_info[slot_id].ref_count == 0) 885 continue; 886 887 for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) { 888 if (ctrl_fh->reference_frame_ts[ref_id] == timestamp) { 889 frame->ref_frame_map[ref_id] = slot_id; 890 ref_used = true; 891 } 892 } 893 894 if (!ref_used) 895 vdec_av1_slice_decrease_ref_count(slots, slot_id); 896 } 897 } 898 vdec_av1_slice_setup_slot(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct v4l2_ctrl_av1_frame * ctrl_fh)899 static void vdec_av1_slice_setup_slot(struct vdec_av1_slice_instance *instance, 900 struct vdec_av1_slice_vsi *vsi, 901 struct v4l2_ctrl_av1_frame *ctrl_fh) 902 { 903 struct vdec_av1_slice_frame_info *cur_frame_info; 904 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; 905 int ref_id; 906 907 memcpy(&vsi->slots, &instance->slots, sizeof(instance->slots)); 908 vdec_av1_slice_cleanup_slots(&vsi->slots, &vsi->frame, ctrl_fh); 909 vsi->slot_id = vdec_av1_slice_get_new_slot(vsi); 910 911 if (vsi->slot_id == AV1_INVALID_IDX) { 912 mtk_v4l2_vdec_err(instance->ctx, "warning:av1 get invalid index slot\n"); 913 vsi->slot_id = 0; 914 } 915 cur_frame_info = &vsi->slots.frame_info[vsi->slot_id]; 916 cur_frame_info->frame_type = uh->frame_type; 917 cur_frame_info->frame_is_intra = ((uh->frame_type == AV1_INTRA_ONLY_FRAME) || 918 (uh->frame_type == AV1_KEY_FRAME)); 919 cur_frame_info->order_hint = uh->order_hint; 920 cur_frame_info->upscaled_width = uh->upscaled_width; 921 cur_frame_info->pic_pitch = 0; 922 cur_frame_info->frame_width = uh->frame_width; 923 cur_frame_info->frame_height = uh->frame_height; 924 cur_frame_info->mi_cols = ((uh->frame_width + 7) >> 3) << 1; 925 cur_frame_info->mi_rows = ((uh->frame_height + 7) >> 3) << 1; 926 927 /* ensure current frame is properly mapped if referenced */ 928 for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) { 929 u64 timestamp = vsi->slots.timestamp[vsi->slot_id]; 930 931 if (ctrl_fh->reference_frame_ts[ref_id] == timestamp) 932 vsi->frame.ref_frame_map[ref_id] = vsi->slot_id; 933 } 934 } 935 vdec_av1_slice_alloc_working_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi)936 static int vdec_av1_slice_alloc_working_buffer(struct vdec_av1_slice_instance *instance, 937 struct vdec_av1_slice_vsi *vsi) 938 { 939 struct mtk_vcodec_dec_ctx *ctx = instance->ctx; 940 enum vdec_av1_slice_resolution_level level; 941 u32 max_sb_w, max_sb_h, max_w, max_h, w, h; 942 int i, ret; 943 944 w = vsi->frame.uh.frame_width; 945 h = vsi->frame.uh.frame_height; 946 947 if (w > VCODEC_DEC_4K_CODED_WIDTH || h > VCODEC_DEC_4K_CODED_HEIGHT) 948 /* 8K */ 949 return -EINVAL; 950 951 if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) { 952 /* 4K */ 953 level = AV1_RES_4K; 954 max_w = VCODEC_DEC_4K_CODED_WIDTH; 955 max_h = VCODEC_DEC_4K_CODED_HEIGHT; 956 } else { 957 /* FHD */ 958 level = AV1_RES_FHD; 959 max_w = MTK_VDEC_MAX_W; 960 max_h = MTK_VDEC_MAX_H; 961 } 962 963 if (level == instance->level) 964 return 0; 965 966 mtk_vdec_debug(ctx, "resolution level changed from %u to %u, %ux%u", 967 instance->level, level, w, h); 968 969 max_sb_w = DIV_ROUND_UP(max_w, 128); 970 max_sb_h = DIV_ROUND_UP(max_h, 128); 971 972 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) { 973 if (instance->mv[i].va) 974 mtk_vcodec_mem_free(ctx, &instance->mv[i]); 975 instance->mv[i].size = max_sb_w * max_sb_h * SZ_1K; 976 ret = mtk_vcodec_mem_alloc(ctx, &instance->mv[i]); 977 if (ret) 978 goto err; 979 980 if (instance->seg[i].va) 981 mtk_vcodec_mem_free(ctx, &instance->seg[i]); 982 instance->seg[i].size = max_sb_w * max_sb_h * 512; 983 ret = mtk_vcodec_mem_alloc(ctx, &instance->seg[i]); 984 if (ret) 985 goto err; 986 987 if (instance->cdf[i].va) 988 mtk_vcodec_mem_free(ctx, &instance->cdf[i]); 989 instance->cdf[i].size = AV1_CDF_TABLE_BUFFER_SIZE; 990 ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf[i]); 991 if (ret) 992 goto err; 993 } 994 995 if (!instance->cdf_temp.va) { 996 instance->cdf_temp.size = (SZ_1K * 16 * 100); 997 ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_temp); 998 if (ret) 999 goto err; 1000 vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr; 1001 vsi->cdf_tmp.size = instance->cdf_temp.size; 1002 } 1003 1004 if (instance->tile.va) 1005 mtk_vcodec_mem_free(ctx, &instance->tile); 1006 1007 instance->tile.size = AV1_TILE_BUF_SIZE * V4L2_AV1_MAX_TILE_COUNT; 1008 ret = mtk_vcodec_mem_alloc(ctx, &instance->tile); 1009 if (ret) 1010 goto err; 1011 1012 instance->level = level; 1013 return 0; 1014 1015 err: 1016 instance->level = AV1_RES_NONE; 1017 return ret; 1018 } 1019 vdec_av1_slice_free_working_buffer(struct vdec_av1_slice_instance * instance)1020 static void vdec_av1_slice_free_working_buffer(struct vdec_av1_slice_instance *instance) 1021 { 1022 struct mtk_vcodec_dec_ctx *ctx = instance->ctx; 1023 int i; 1024 1025 for (i = 0; i < ARRAY_SIZE(instance->mv); i++) 1026 if (instance->mv[i].va) 1027 mtk_vcodec_mem_free(ctx, &instance->mv[i]); 1028 1029 for (i = 0; i < ARRAY_SIZE(instance->seg); i++) 1030 if (instance->seg[i].va) 1031 mtk_vcodec_mem_free(ctx, &instance->seg[i]); 1032 1033 for (i = 0; i < ARRAY_SIZE(instance->cdf); i++) 1034 if (instance->cdf[i].va) 1035 mtk_vcodec_mem_free(ctx, &instance->cdf[i]); 1036 1037 1038 if (instance->tile.va) 1039 mtk_vcodec_mem_free(ctx, &instance->tile); 1040 if (instance->cdf_temp.va) 1041 mtk_vcodec_mem_free(ctx, &instance->cdf_temp); 1042 if (instance->cdf_table.va) 1043 mtk_vcodec_mem_free(ctx, &instance->cdf_table); 1044 if (instance->iq_table.va) 1045 mtk_vcodec_mem_free(ctx, &instance->iq_table); 1046 1047 instance->level = AV1_RES_NONE; 1048 } 1049 vdec_av1_slice_vsi_from_remote(struct vdec_av1_slice_vsi * vsi,struct vdec_av1_slice_vsi * remote_vsi)1050 static inline void vdec_av1_slice_vsi_from_remote(struct vdec_av1_slice_vsi *vsi, 1051 struct vdec_av1_slice_vsi *remote_vsi) 1052 { 1053 memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans)); 1054 memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state)); 1055 } 1056 vdec_av1_slice_vsi_to_remote(struct vdec_av1_slice_vsi * vsi,struct vdec_av1_slice_vsi * remote_vsi)1057 static inline void vdec_av1_slice_vsi_to_remote(struct vdec_av1_slice_vsi *vsi, 1058 struct vdec_av1_slice_vsi *remote_vsi) 1059 { 1060 memcpy(remote_vsi, vsi, sizeof(*vsi)); 1061 } 1062 vdec_av1_slice_setup_lat_from_src_buf(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct vdec_lat_buf * lat_buf)1063 static int vdec_av1_slice_setup_lat_from_src_buf(struct vdec_av1_slice_instance *instance, 1064 struct vdec_av1_slice_vsi *vsi, 1065 struct vdec_lat_buf *lat_buf) 1066 { 1067 struct vb2_v4l2_buffer *src; 1068 struct vb2_v4l2_buffer *dst; 1069 1070 src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx); 1071 if (!src) 1072 return -EINVAL; 1073 1074 lat_buf->src_buf_req = src->vb2_buf.req_obj.req; 1075 dst = &lat_buf->ts_info; 1076 v4l2_m2m_buf_copy_metadata(src, dst, true); 1077 vsi->frame.cur_ts = dst->vb2_buf.timestamp; 1078 1079 return 0; 1080 } 1081 vdec_av1_slice_resolve_divisor_32(u32 D,short * shift)1082 static short vdec_av1_slice_resolve_divisor_32(u32 D, short *shift) 1083 { 1084 int f; 1085 int e; 1086 1087 *shift = vdec_av1_slice_get_msb(D); 1088 /* e is obtained from D after resetting the most significant 1 bit. */ 1089 e = D - ((u32)1 << *shift); 1090 /* Get the most significant DIV_LUT_BITS (8) bits of e into f */ 1091 if (*shift > DIV_LUT_BITS) 1092 f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS); 1093 else 1094 f = e << (DIV_LUT_BITS - *shift); 1095 if (f > DIV_LUT_NUM) 1096 return -1; 1097 *shift += DIV_LUT_PREC_BITS; 1098 /* Use f as lookup into the precomputed table of multipliers */ 1099 return div_lut[f]; 1100 } 1101 vdec_av1_slice_get_shear_params(struct vdec_av1_slice_gm * gm_params)1102 static void vdec_av1_slice_get_shear_params(struct vdec_av1_slice_gm *gm_params) 1103 { 1104 const int *mat = gm_params->wmmat; 1105 short shift; 1106 short y; 1107 long long gv, dv; 1108 1109 if (gm_params->wmmat[2] <= 0) 1110 return; 1111 1112 gm_params->alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX); 1113 gm_params->beta = clamp_val(mat[3], S16_MIN, S16_MAX); 1114 1115 y = vdec_av1_slice_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1); 1116 1117 gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y; 1118 gm_params->gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), 1119 S16_MIN, S16_MAX); 1120 1121 dv = ((long long)mat[3] * mat[4]) * y; 1122 gm_params->delta = clamp_val(mat[5] - (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - 1123 (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX); 1124 1125 gm_params->alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->alpha, WARP_PARAM_REDUCE_BITS) * 1126 (1 << WARP_PARAM_REDUCE_BITS); 1127 gm_params->beta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->beta, WARP_PARAM_REDUCE_BITS) * 1128 (1 << WARP_PARAM_REDUCE_BITS); 1129 gm_params->gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->gamma, WARP_PARAM_REDUCE_BITS) * 1130 (1 << WARP_PARAM_REDUCE_BITS); 1131 gm_params->delta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->delta, WARP_PARAM_REDUCE_BITS) * 1132 (1 << WARP_PARAM_REDUCE_BITS); 1133 } 1134 vdec_av1_slice_setup_gm(struct vdec_av1_slice_gm * gm,struct v4l2_av1_global_motion * ctrl_gm)1135 static void vdec_av1_slice_setup_gm(struct vdec_av1_slice_gm *gm, 1136 struct v4l2_av1_global_motion *ctrl_gm) 1137 { 1138 u32 i, j; 1139 1140 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) { 1141 gm[i].wmtype = ctrl_gm->type[i]; 1142 for (j = 0; j < 6; j++) 1143 gm[i].wmmat[j] = ctrl_gm->params[i][j]; 1144 1145 gm[i].invalid = !!(ctrl_gm->invalid & BIT(i)); 1146 gm[i].alpha = 0; 1147 gm[i].beta = 0; 1148 gm[i].gamma = 0; 1149 gm[i].delta = 0; 1150 if (gm[i].wmtype <= V4L2_AV1_WARP_MODEL_AFFINE) 1151 vdec_av1_slice_get_shear_params(&gm[i]); 1152 } 1153 } 1154 vdec_av1_slice_setup_seg(struct vdec_av1_slice_seg * seg,struct v4l2_av1_segmentation * ctrl_seg)1155 static void vdec_av1_slice_setup_seg(struct vdec_av1_slice_seg *seg, 1156 struct v4l2_av1_segmentation *ctrl_seg) 1157 { 1158 u32 i, j; 1159 1160 seg->segmentation_enabled = SEGMENTATION_FLAG(ctrl_seg, ENABLED); 1161 seg->segmentation_update_map = SEGMENTATION_FLAG(ctrl_seg, UPDATE_MAP); 1162 seg->segmentation_temporal_update = SEGMENTATION_FLAG(ctrl_seg, TEMPORAL_UPDATE); 1163 seg->segmentation_update_data = SEGMENTATION_FLAG(ctrl_seg, UPDATE_DATA); 1164 seg->segid_preskip = SEGMENTATION_FLAG(ctrl_seg, SEG_ID_PRE_SKIP); 1165 seg->last_active_segid = ctrl_seg->last_active_seg_id; 1166 1167 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) { 1168 seg->feature_enabled_mask[i] = ctrl_seg->feature_enabled[i]; 1169 for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) 1170 seg->feature_data[i][j] = ctrl_seg->feature_data[i][j]; 1171 } 1172 } 1173 vdec_av1_slice_setup_quant(struct vdec_av1_slice_quantization * quant,struct v4l2_av1_quantization * ctrl_quant)1174 static void vdec_av1_slice_setup_quant(struct vdec_av1_slice_quantization *quant, 1175 struct v4l2_av1_quantization *ctrl_quant) 1176 { 1177 quant->base_q_idx = ctrl_quant->base_q_idx; 1178 quant->delta_qydc = ctrl_quant->delta_q_y_dc; 1179 quant->delta_qudc = ctrl_quant->delta_q_u_dc; 1180 quant->delta_quac = ctrl_quant->delta_q_u_ac; 1181 quant->delta_qvdc = ctrl_quant->delta_q_v_dc; 1182 quant->delta_qvac = ctrl_quant->delta_q_v_ac; 1183 quant->qm_y = ctrl_quant->qm_y; 1184 quant->qm_u = ctrl_quant->qm_u; 1185 quant->qm_v = ctrl_quant->qm_v; 1186 quant->using_qmatrix = QUANT_FLAG(ctrl_quant, USING_QMATRIX); 1187 } 1188 vdec_av1_slice_get_qindex(struct vdec_av1_slice_uncompressed_header * uh,int segmentation_id)1189 static int vdec_av1_slice_get_qindex(struct vdec_av1_slice_uncompressed_header *uh, 1190 int segmentation_id) 1191 { 1192 struct vdec_av1_slice_seg *seg = &uh->seg; 1193 struct vdec_av1_slice_quantization *quant = &uh->quant; 1194 int data = 0, qindex = 0; 1195 1196 if (seg->segmentation_enabled && 1197 (seg->feature_enabled_mask[segmentation_id] & BIT(SEG_LVL_ALT_Q))) { 1198 data = seg->feature_data[segmentation_id][SEG_LVL_ALT_Q]; 1199 qindex = quant->base_q_idx + data; 1200 return clamp_val(qindex, 0, MAXQ); 1201 } 1202 1203 return quant->base_q_idx; 1204 } 1205 vdec_av1_slice_setup_lr(struct vdec_av1_slice_lr * lr,struct v4l2_av1_loop_restoration * ctrl_lr)1206 static void vdec_av1_slice_setup_lr(struct vdec_av1_slice_lr *lr, 1207 struct v4l2_av1_loop_restoration *ctrl_lr) 1208 { 1209 int i; 1210 1211 lr->use_lr = 0; 1212 lr->use_chroma_lr = 0; 1213 for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) { 1214 lr->frame_restoration_type[i] = ctrl_lr->frame_restoration_type[i]; 1215 lr->loop_restoration_size[i] = ctrl_lr->loop_restoration_size[i]; 1216 if (lr->frame_restoration_type[i]) { 1217 lr->use_lr = 1; 1218 if (i > 0) 1219 lr->use_chroma_lr = 1; 1220 } 1221 } 1222 } 1223 vdec_av1_slice_setup_lf(struct vdec_av1_slice_loop_filter * lf,struct v4l2_av1_loop_filter * ctrl_lf)1224 static void vdec_av1_slice_setup_lf(struct vdec_av1_slice_loop_filter *lf, 1225 struct v4l2_av1_loop_filter *ctrl_lf) 1226 { 1227 int i; 1228 1229 for (i = 0; i < ARRAY_SIZE(lf->loop_filter_level); i++) 1230 lf->loop_filter_level[i] = ctrl_lf->level[i]; 1231 1232 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) 1233 lf->loop_filter_ref_deltas[i] = ctrl_lf->ref_deltas[i]; 1234 1235 for (i = 0; i < ARRAY_SIZE(lf->loop_filter_mode_deltas); i++) 1236 lf->loop_filter_mode_deltas[i] = ctrl_lf->mode_deltas[i]; 1237 1238 lf->loop_filter_sharpness = ctrl_lf->sharpness; 1239 lf->loop_filter_delta_enabled = 1240 BIT_FLAG(ctrl_lf, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED); 1241 } 1242 vdec_av1_slice_setup_cdef(struct vdec_av1_slice_cdef * cdef,struct v4l2_av1_cdef * ctrl_cdef)1243 static void vdec_av1_slice_setup_cdef(struct vdec_av1_slice_cdef *cdef, 1244 struct v4l2_av1_cdef *ctrl_cdef) 1245 { 1246 int i; 1247 1248 cdef->cdef_damping = ctrl_cdef->damping_minus_3 + 3; 1249 cdef->cdef_bits = ctrl_cdef->bits; 1250 1251 for (i = 0; i < V4L2_AV1_CDEF_MAX; i++) { 1252 if (ctrl_cdef->y_sec_strength[i] == 4) 1253 ctrl_cdef->y_sec_strength[i] -= 1; 1254 1255 if (ctrl_cdef->uv_sec_strength[i] == 4) 1256 ctrl_cdef->uv_sec_strength[i] -= 1; 1257 1258 cdef->cdef_y_strength[i] = 1259 ctrl_cdef->y_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS | 1260 ctrl_cdef->y_sec_strength[i]; 1261 cdef->cdef_uv_strength[i] = 1262 ctrl_cdef->uv_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS | 1263 ctrl_cdef->uv_sec_strength[i]; 1264 } 1265 } 1266 vdec_av1_slice_setup_seq(struct vdec_av1_slice_seq_header * seq,struct v4l2_ctrl_av1_sequence * ctrl_seq)1267 static void vdec_av1_slice_setup_seq(struct vdec_av1_slice_seq_header *seq, 1268 struct v4l2_ctrl_av1_sequence *ctrl_seq) 1269 { 1270 seq->bitdepth = ctrl_seq->bit_depth; 1271 seq->max_frame_width = ctrl_seq->max_frame_width_minus_1 + 1; 1272 seq->max_frame_height = ctrl_seq->max_frame_height_minus_1 + 1; 1273 seq->enable_superres = SEQUENCE_FLAG(ctrl_seq, ENABLE_SUPERRES); 1274 seq->enable_filter_intra = SEQUENCE_FLAG(ctrl_seq, ENABLE_FILTER_INTRA); 1275 seq->enable_intra_edge_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTRA_EDGE_FILTER); 1276 seq->enable_interintra_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTERINTRA_COMPOUND); 1277 seq->enable_masked_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_MASKED_COMPOUND); 1278 seq->enable_dual_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_DUAL_FILTER); 1279 seq->enable_jnt_comp = SEQUENCE_FLAG(ctrl_seq, ENABLE_JNT_COMP); 1280 seq->mono_chrome = SEQUENCE_FLAG(ctrl_seq, MONO_CHROME); 1281 seq->enable_order_hint = SEQUENCE_FLAG(ctrl_seq, ENABLE_ORDER_HINT); 1282 seq->order_hint_bits = ctrl_seq->order_hint_bits; 1283 seq->use_128x128_superblock = SEQUENCE_FLAG(ctrl_seq, USE_128X128_SUPERBLOCK); 1284 seq->subsampling_x = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_X); 1285 seq->subsampling_y = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_Y); 1286 } 1287 vdec_av1_slice_setup_tile(struct vdec_av1_slice_frame * frame,struct v4l2_av1_tile_info * ctrl_tile)1288 static void vdec_av1_slice_setup_tile(struct vdec_av1_slice_frame *frame, 1289 struct v4l2_av1_tile_info *ctrl_tile) 1290 { 1291 struct vdec_av1_slice_seq_header *seq = &frame->seq; 1292 struct vdec_av1_slice_tile *tile = &frame->uh.tile; 1293 u32 mib_size_log2 = seq->use_128x128_superblock ? 5 : 4; 1294 int i; 1295 1296 tile->tile_cols = ctrl_tile->tile_cols; 1297 tile->tile_rows = ctrl_tile->tile_rows; 1298 tile->context_update_tile_id = ctrl_tile->context_update_tile_id; 1299 tile->uniform_tile_spacing_flag = 1300 BIT_FLAG(ctrl_tile, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING); 1301 1302 for (i = 0; i < tile->tile_cols + 1; i++) 1303 tile->mi_col_starts[i] = 1304 ALIGN(ctrl_tile->mi_col_starts[i], BIT(mib_size_log2)) >> mib_size_log2; 1305 1306 for (i = 0; i < tile->tile_rows + 1; i++) 1307 tile->mi_row_starts[i] = 1308 ALIGN(ctrl_tile->mi_row_starts[i], BIT(mib_size_log2)) >> mib_size_log2; 1309 } 1310 vdec_av1_slice_setup_uh(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_frame * frame,struct v4l2_ctrl_av1_frame * ctrl_fh)1311 static void vdec_av1_slice_setup_uh(struct vdec_av1_slice_instance *instance, 1312 struct vdec_av1_slice_frame *frame, 1313 struct v4l2_ctrl_av1_frame *ctrl_fh) 1314 { 1315 struct vdec_av1_slice_uncompressed_header *uh = &frame->uh; 1316 int i; 1317 1318 uh->use_ref_frame_mvs = FH_FLAG(ctrl_fh, USE_REF_FRAME_MVS); 1319 uh->order_hint = ctrl_fh->order_hint; 1320 vdec_av1_slice_setup_gm(uh->gm, &ctrl_fh->global_motion); 1321 uh->upscaled_width = ctrl_fh->upscaled_width; 1322 uh->frame_width = ctrl_fh->frame_width_minus_1 + 1; 1323 uh->frame_height = ctrl_fh->frame_height_minus_1 + 1; 1324 uh->mi_cols = ((uh->frame_width + 7) >> 3) << 1; 1325 uh->mi_rows = ((uh->frame_height + 7) >> 3) << 1; 1326 uh->reduced_tx_set = FH_FLAG(ctrl_fh, REDUCED_TX_SET); 1327 uh->tx_mode = ctrl_fh->tx_mode; 1328 uh->uniform_tile_spacing_flag = 1329 BIT_FLAG(&ctrl_fh->tile_info, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING); 1330 uh->interpolation_filter = ctrl_fh->interpolation_filter; 1331 uh->allow_warped_motion = FH_FLAG(ctrl_fh, ALLOW_WARPED_MOTION); 1332 uh->is_motion_mode_switchable = FH_FLAG(ctrl_fh, IS_MOTION_MODE_SWITCHABLE); 1333 uh->frame_type = ctrl_fh->frame_type; 1334 uh->frame_is_intra = (uh->frame_type == V4L2_AV1_INTRA_ONLY_FRAME || 1335 uh->frame_type == V4L2_AV1_KEY_FRAME); 1336 1337 if (!uh->frame_is_intra && FH_FLAG(ctrl_fh, REFERENCE_SELECT)) 1338 uh->reference_mode = AV1_REFERENCE_MODE_SELECT; 1339 else 1340 uh->reference_mode = AV1_SINGLE_REFERENCE; 1341 1342 uh->allow_high_precision_mv = FH_FLAG(ctrl_fh, ALLOW_HIGH_PRECISION_MV); 1343 uh->allow_intra_bc = FH_FLAG(ctrl_fh, ALLOW_INTRABC); 1344 uh->force_integer_mv = FH_FLAG(ctrl_fh, FORCE_INTEGER_MV); 1345 uh->allow_screen_content_tools = FH_FLAG(ctrl_fh, ALLOW_SCREEN_CONTENT_TOOLS); 1346 uh->error_resilient_mode = FH_FLAG(ctrl_fh, ERROR_RESILIENT_MODE); 1347 uh->primary_ref_frame = ctrl_fh->primary_ref_frame; 1348 uh->disable_frame_end_update_cdf = 1349 FH_FLAG(ctrl_fh, DISABLE_FRAME_END_UPDATE_CDF); 1350 uh->disable_cdf_update = FH_FLAG(ctrl_fh, DISABLE_CDF_UPDATE); 1351 uh->skip_mode.skip_mode_present = FH_FLAG(ctrl_fh, SKIP_MODE_PRESENT); 1352 uh->skip_mode.skip_mode_frame[0] = 1353 ctrl_fh->skip_mode_frame[0] - V4L2_AV1_REF_LAST_FRAME; 1354 uh->skip_mode.skip_mode_frame[1] = 1355 ctrl_fh->skip_mode_frame[1] - V4L2_AV1_REF_LAST_FRAME; 1356 uh->skip_mode.skip_mode_allowed = ctrl_fh->skip_mode_frame[0] ? 1 : 0; 1357 1358 vdec_av1_slice_setup_seg(&uh->seg, &ctrl_fh->segmentation); 1359 uh->delta_q_lf.delta_q_present = QUANT_FLAG(&ctrl_fh->quantization, DELTA_Q_PRESENT); 1360 uh->delta_q_lf.delta_q_res = 1 << ctrl_fh->quantization.delta_q_res; 1361 uh->delta_q_lf.delta_lf_present = 1362 BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT); 1363 uh->delta_q_lf.delta_lf_res = ctrl_fh->loop_filter.delta_lf_res; 1364 uh->delta_q_lf.delta_lf_multi = 1365 BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI); 1366 vdec_av1_slice_setup_quant(&uh->quant, &ctrl_fh->quantization); 1367 1368 uh->coded_loss_less = 1; 1369 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) { 1370 uh->quant.qindex[i] = vdec_av1_slice_get_qindex(uh, i); 1371 uh->loss_less_array[i] = 1372 (uh->quant.qindex[i] == 0 && uh->quant.delta_qydc == 0 && 1373 uh->quant.delta_quac == 0 && uh->quant.delta_qudc == 0 && 1374 uh->quant.delta_qvac == 0 && uh->quant.delta_qvdc == 0); 1375 1376 if (!uh->loss_less_array[i]) 1377 uh->coded_loss_less = 0; 1378 } 1379 1380 vdec_av1_slice_setup_lr(&uh->lr, &ctrl_fh->loop_restoration); 1381 uh->superres_denom = ctrl_fh->superres_denom; 1382 vdec_av1_slice_setup_lf(&uh->loop_filter, &ctrl_fh->loop_filter); 1383 vdec_av1_slice_setup_cdef(&uh->cdef, &ctrl_fh->cdef); 1384 vdec_av1_slice_setup_tile(frame, &ctrl_fh->tile_info); 1385 } 1386 vdec_av1_slice_setup_tile_group(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi)1387 static int vdec_av1_slice_setup_tile_group(struct vdec_av1_slice_instance *instance, 1388 struct vdec_av1_slice_vsi *vsi) 1389 { 1390 struct v4l2_ctrl_av1_tile_group_entry *ctrl_tge; 1391 struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group; 1392 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; 1393 struct vdec_av1_slice_tile *tile = &uh->tile; 1394 struct v4l2_ctrl *ctrl; 1395 u32 tge_size; 1396 int i; 1397 1398 ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY); 1399 if (!ctrl) 1400 return -EINVAL; 1401 1402 tge_size = ctrl->elems; 1403 ctrl_tge = (struct v4l2_ctrl_av1_tile_group_entry *)ctrl->p_cur.p; 1404 1405 tile_group->num_tiles = tile->tile_cols * tile->tile_rows; 1406 1407 if (tile_group->num_tiles != tge_size || 1408 tile_group->num_tiles > V4L2_AV1_MAX_TILE_COUNT) { 1409 mtk_vdec_err(instance->ctx, "invalid tge_size %d, tile_num:%d\n", 1410 tge_size, tile_group->num_tiles); 1411 return -EINVAL; 1412 } 1413 1414 for (i = 0; i < tge_size; i++) { 1415 if (i != ctrl_tge[i].tile_row * vsi->frame.uh.tile.tile_cols + 1416 ctrl_tge[i].tile_col) { 1417 mtk_vdec_err(instance->ctx, "invalid tge info %d, %d %d %d\n", 1418 i, ctrl_tge[i].tile_row, ctrl_tge[i].tile_col, 1419 vsi->frame.uh.tile.tile_rows); 1420 return -EINVAL; 1421 } 1422 tile_group->tile_size[i] = ctrl_tge[i].tile_size; 1423 tile_group->tile_start_offset[i] = ctrl_tge[i].tile_offset; 1424 } 1425 1426 return 0; 1427 } 1428 vdec_av1_slice_setup_state(struct vdec_av1_slice_vsi * vsi)1429 static inline void vdec_av1_slice_setup_state(struct vdec_av1_slice_vsi *vsi) 1430 { 1431 memset(&vsi->state, 0, sizeof(vsi->state)); 1432 } 1433 vdec_av1_slice_setup_scale_factors(struct vdec_av1_slice_frame_refs * frame_ref,struct vdec_av1_slice_frame_info * ref_frame_info,struct vdec_av1_slice_uncompressed_header * uh)1434 static void vdec_av1_slice_setup_scale_factors(struct vdec_av1_slice_frame_refs *frame_ref, 1435 struct vdec_av1_slice_frame_info *ref_frame_info, 1436 struct vdec_av1_slice_uncompressed_header *uh) 1437 { 1438 struct vdec_av1_slice_scale_factors *scale_factors = &frame_ref->scale_factors; 1439 u32 ref_upscaled_width = ref_frame_info->upscaled_width; 1440 u32 ref_frame_height = ref_frame_info->frame_height; 1441 u32 frame_width = uh->frame_width; 1442 u32 frame_height = uh->frame_height; 1443 1444 if (!vdec_av1_slice_need_scale(ref_upscaled_width, ref_frame_height, 1445 frame_width, frame_height)) { 1446 scale_factors->x_scale = -1; 1447 scale_factors->y_scale = -1; 1448 scale_factors->is_scaled = 0; 1449 return; 1450 } 1451 1452 scale_factors->x_scale = 1453 ((ref_upscaled_width << AV1_REF_SCALE_SHIFT) + (frame_width >> 1)) / frame_width; 1454 scale_factors->y_scale = 1455 ((ref_frame_height << AV1_REF_SCALE_SHIFT) + (frame_height >> 1)) / frame_height; 1456 scale_factors->is_scaled = 1457 (scale_factors->x_scale != AV1_REF_INVALID_SCALE) && 1458 (scale_factors->y_scale != AV1_REF_INVALID_SCALE) && 1459 (scale_factors->x_scale != AV1_REF_NO_SCALE || 1460 scale_factors->y_scale != AV1_REF_NO_SCALE); 1461 scale_factors->x_step = 1462 AV1_DIV_ROUND_UP_POW2(scale_factors->x_scale, 1463 AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS); 1464 scale_factors->y_step = 1465 AV1_DIV_ROUND_UP_POW2(scale_factors->y_scale, 1466 AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS); 1467 } 1468 vdec_av1_slice_get_sign_bias(int a,int b,u8 enable_order_hint,u8 order_hint_bits)1469 static unsigned char vdec_av1_slice_get_sign_bias(int a, 1470 int b, 1471 u8 enable_order_hint, 1472 u8 order_hint_bits) 1473 { 1474 int diff = 0; 1475 int m = 0; 1476 unsigned char result = 0; 1477 1478 if (!enable_order_hint) 1479 return 0; 1480 1481 diff = a - b; 1482 m = 1 << (order_hint_bits - 1); 1483 diff = (diff & (m - 1)) - (diff & m); 1484 1485 if (diff > 0) 1486 result = 1; 1487 1488 return result; 1489 } 1490 vdec_av1_slice_setup_ref(struct vdec_av1_slice_pfc * pfc,struct v4l2_ctrl_av1_frame * ctrl_fh)1491 static void vdec_av1_slice_setup_ref(struct vdec_av1_slice_pfc *pfc, 1492 struct v4l2_ctrl_av1_frame *ctrl_fh) 1493 { 1494 struct vdec_av1_slice_vsi *vsi = &pfc->vsi; 1495 struct vdec_av1_slice_frame *frame = &vsi->frame; 1496 struct vdec_av1_slice_slot *slots = &vsi->slots; 1497 struct vdec_av1_slice_uncompressed_header *uh = &frame->uh; 1498 struct vdec_av1_slice_seq_header *seq = &frame->seq; 1499 struct vdec_av1_slice_frame_info *cur_frame_info = 1500 &slots->frame_info[vsi->slot_id]; 1501 struct vdec_av1_slice_frame_info *frame_info; 1502 int i, slot_id; 1503 1504 if (uh->frame_is_intra) 1505 return; 1506 1507 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) { 1508 int ref_idx = ctrl_fh->ref_frame_idx[i]; 1509 1510 pfc->ref_idx[i] = ctrl_fh->reference_frame_ts[ref_idx]; 1511 slot_id = frame->ref_frame_map[ref_idx]; 1512 frame_info = &slots->frame_info[slot_id]; 1513 if (slot_id == AV1_INVALID_IDX) { 1514 pr_err(MTK_DBG_V4L2_STR "cannot match reference[%d] 0x%llx\n", i, 1515 ctrl_fh->reference_frame_ts[ref_idx]); 1516 frame->order_hints[i] = 0; 1517 frame->ref_frame_valid[i] = 0; 1518 continue; 1519 } 1520 1521 frame->frame_refs[i].ref_fb_idx = slot_id; 1522 vdec_av1_slice_setup_scale_factors(&frame->frame_refs[i], 1523 frame_info, uh); 1524 if (!seq->enable_order_hint) 1525 frame->ref_frame_sign_bias[i + 1] = 0; 1526 else 1527 frame->ref_frame_sign_bias[i + 1] = 1528 vdec_av1_slice_get_sign_bias(frame_info->order_hint, 1529 uh->order_hint, 1530 seq->enable_order_hint, 1531 seq->order_hint_bits); 1532 1533 frame->order_hints[i] = ctrl_fh->order_hints[i + 1]; 1534 cur_frame_info->order_hints[i] = frame->order_hints[i]; 1535 frame->ref_frame_valid[i] = 1; 1536 } 1537 } 1538 vdec_av1_slice_get_previous(struct vdec_av1_slice_vsi * vsi)1539 static void vdec_av1_slice_get_previous(struct vdec_av1_slice_vsi *vsi) 1540 { 1541 struct vdec_av1_slice_frame *frame = &vsi->frame; 1542 1543 if (frame->uh.primary_ref_frame == AV1_PRIMARY_REF_NONE) 1544 frame->prev_fb_idx = AV1_INVALID_IDX; 1545 else 1546 frame->prev_fb_idx = frame->frame_refs[frame->uh.primary_ref_frame].ref_fb_idx; 1547 } 1548 vdec_av1_slice_setup_operating_mode(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_frame * frame)1549 static inline void vdec_av1_slice_setup_operating_mode(struct vdec_av1_slice_instance *instance, 1550 struct vdec_av1_slice_frame *frame) 1551 { 1552 frame->large_scale_tile = 0; 1553 } 1554 vdec_av1_slice_setup_pfc(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_pfc * pfc)1555 static int vdec_av1_slice_setup_pfc(struct vdec_av1_slice_instance *instance, 1556 struct vdec_av1_slice_pfc *pfc) 1557 { 1558 struct v4l2_ctrl_av1_frame *ctrl_fh; 1559 struct v4l2_ctrl_av1_sequence *ctrl_seq; 1560 struct vdec_av1_slice_vsi *vsi = &pfc->vsi; 1561 int ret = 0; 1562 1563 /* frame header */ 1564 ctrl_fh = (struct v4l2_ctrl_av1_frame *) 1565 vdec_av1_get_ctrl_ptr(instance->ctx, 1566 V4L2_CID_STATELESS_AV1_FRAME); 1567 if (IS_ERR(ctrl_fh)) 1568 return PTR_ERR(ctrl_fh); 1569 1570 ctrl_seq = (struct v4l2_ctrl_av1_sequence *) 1571 vdec_av1_get_ctrl_ptr(instance->ctx, 1572 V4L2_CID_STATELESS_AV1_SEQUENCE); 1573 if (IS_ERR(ctrl_seq)) 1574 return PTR_ERR(ctrl_seq); 1575 1576 /* setup vsi information */ 1577 vdec_av1_slice_setup_seq(&vsi->frame.seq, ctrl_seq); 1578 vdec_av1_slice_setup_uh(instance, &vsi->frame, ctrl_fh); 1579 vdec_av1_slice_setup_operating_mode(instance, &vsi->frame); 1580 1581 vdec_av1_slice_setup_state(vsi); 1582 vdec_av1_slice_setup_slot(instance, vsi, ctrl_fh); 1583 vdec_av1_slice_setup_ref(pfc, ctrl_fh); 1584 vdec_av1_slice_get_previous(vsi); 1585 1586 pfc->seq = instance->seq; 1587 instance->seq++; 1588 1589 return ret; 1590 } 1591 vdec_av1_slice_setup_lat_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct mtk_vcodec_mem * bs,struct vdec_lat_buf * lat_buf)1592 static void vdec_av1_slice_setup_lat_buffer(struct vdec_av1_slice_instance *instance, 1593 struct vdec_av1_slice_vsi *vsi, 1594 struct mtk_vcodec_mem *bs, 1595 struct vdec_lat_buf *lat_buf) 1596 { 1597 struct vdec_av1_slice_work_buffer *work_buffer; 1598 int i; 1599 1600 vsi->bs.dma_addr = bs->dma_addr; 1601 vsi->bs.size = bs->size; 1602 1603 vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; 1604 vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size; 1605 vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr; 1606 /* used to store trans end */ 1607 vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr; 1608 vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr; 1609 vsi->err_map.size = lat_buf->wdma_err_addr.size; 1610 vsi->rd_mv.dma_addr = lat_buf->rd_mv_addr.dma_addr; 1611 vsi->rd_mv.size = lat_buf->rd_mv_addr.size; 1612 1613 vsi->row_info.buf = 0; 1614 vsi->row_info.size = 0; 1615 1616 work_buffer = vsi->work_buffer; 1617 1618 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) { 1619 work_buffer[i].mv_addr.buf = instance->mv[i].dma_addr; 1620 work_buffer[i].mv_addr.size = instance->mv[i].size; 1621 work_buffer[i].segid_addr.buf = instance->seg[i].dma_addr; 1622 work_buffer[i].segid_addr.size = instance->seg[i].size; 1623 work_buffer[i].cdf_addr.buf = instance->cdf[i].dma_addr; 1624 work_buffer[i].cdf_addr.size = instance->cdf[i].size; 1625 } 1626 1627 vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr; 1628 vsi->cdf_tmp.size = instance->cdf_temp.size; 1629 1630 vsi->tile.buf = instance->tile.dma_addr; 1631 vsi->tile.size = instance->tile.size; 1632 memcpy(lat_buf->tile_addr.va, instance->tile.va, 64 * instance->tile_group.num_tiles); 1633 1634 vsi->cdf_table.buf = instance->cdf_table.dma_addr; 1635 vsi->cdf_table.size = instance->cdf_table.size; 1636 vsi->iq_table.buf = instance->iq_table.dma_addr; 1637 vsi->iq_table.size = instance->iq_table.size; 1638 } 1639 vdec_av1_slice_setup_seg_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi)1640 static void vdec_av1_slice_setup_seg_buffer(struct vdec_av1_slice_instance *instance, 1641 struct vdec_av1_slice_vsi *vsi) 1642 { 1643 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; 1644 struct mtk_vcodec_mem *buf; 1645 1646 /* reset segment buffer */ 1647 if (uh->primary_ref_frame == AV1_PRIMARY_REF_NONE || !uh->seg.segmentation_enabled) { 1648 mtk_vdec_debug(instance->ctx, "reset seg %d\n", vsi->slot_id); 1649 if (vsi->slot_id != AV1_INVALID_IDX) { 1650 buf = &instance->seg[vsi->slot_id]; 1651 memset(buf->va, 0, buf->size); 1652 } 1653 } 1654 } 1655 vdec_av1_slice_setup_tile_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct mtk_vcodec_mem * bs)1656 static void vdec_av1_slice_setup_tile_buffer(struct vdec_av1_slice_instance *instance, 1657 struct vdec_av1_slice_vsi *vsi, 1658 struct mtk_vcodec_mem *bs) 1659 { 1660 struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group; 1661 struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; 1662 struct vdec_av1_slice_tile *tile = &uh->tile; 1663 u32 tile_num, tile_row, tile_col; 1664 u32 allow_update_cdf = 0; 1665 u32 sb_boundary_x_m1 = 0, sb_boundary_y_m1 = 0; 1666 int tile_info_base; 1667 u64 tile_buf_pa; 1668 u32 *tile_info_buf = instance->tile.va; 1669 u64 pa = (u64)bs->dma_addr; 1670 1671 if (uh->disable_cdf_update == 0) 1672 allow_update_cdf = 1; 1673 1674 for (tile_num = 0; tile_num < tile_group->num_tiles; tile_num++) { 1675 /* each uint32 takes place of 4 bytes */ 1676 tile_info_base = (AV1_TILE_BUF_SIZE * tile_num) >> 2; 1677 tile_row = tile_num / tile->tile_cols; 1678 tile_col = tile_num % tile->tile_cols; 1679 tile_info_buf[tile_info_base + 0] = (tile_group->tile_size[tile_num] << 3); 1680 tile_buf_pa = pa + tile_group->tile_start_offset[tile_num]; 1681 1682 /* save av1 tile high 4bits(bit 32-35) address in lower 4 bits position 1683 * and clear original for hw requirement. 1684 */ 1685 tile_info_buf[tile_info_base + 1] = (tile_buf_pa & 0xFFFFFFF0ull) | 1686 ((tile_buf_pa & 0xF00000000ull) >> 32); 1687 tile_info_buf[tile_info_base + 2] = (tile_buf_pa & 0xFull) << 3; 1688 1689 sb_boundary_x_m1 = 1690 (tile->mi_col_starts[tile_col + 1] - tile->mi_col_starts[tile_col] - 1) & 1691 0x3f; 1692 sb_boundary_y_m1 = 1693 (tile->mi_row_starts[tile_row + 1] - tile->mi_row_starts[tile_row] - 1) & 1694 0x1ff; 1695 1696 tile_info_buf[tile_info_base + 3] = (sb_boundary_y_m1 << 7) | sb_boundary_x_m1; 1697 tile_info_buf[tile_info_base + 4] = ((allow_update_cdf << 18) | (1 << 16)); 1698 1699 if (tile_num == tile->context_update_tile_id && 1700 uh->disable_frame_end_update_cdf == 0) 1701 tile_info_buf[tile_info_base + 4] |= (1 << 17); 1702 1703 mtk_vdec_debug(instance->ctx, "// tile buf %d pos(%dx%d) offset 0x%x\n", 1704 tile_num, tile_row, tile_col, tile_info_base); 1705 mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n", 1706 tile_info_buf[tile_info_base + 0], 1707 tile_info_buf[tile_info_base + 1], 1708 tile_info_buf[tile_info_base + 2], 1709 tile_info_buf[tile_info_base + 3]); 1710 mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n", 1711 tile_info_buf[tile_info_base + 4], 1712 tile_info_buf[tile_info_base + 5], 1713 tile_info_buf[tile_info_base + 6], 1714 tile_info_buf[tile_info_base + 7]); 1715 } 1716 } 1717 vdec_av1_slice_setup_lat(struct vdec_av1_slice_instance * instance,struct mtk_vcodec_mem * bs,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1718 static int vdec_av1_slice_setup_lat(struct vdec_av1_slice_instance *instance, 1719 struct mtk_vcodec_mem *bs, 1720 struct vdec_lat_buf *lat_buf, 1721 struct vdec_av1_slice_pfc *pfc) 1722 { 1723 struct vdec_av1_slice_vsi *vsi = &pfc->vsi; 1724 int ret; 1725 1726 ret = vdec_av1_slice_setup_lat_from_src_buf(instance, vsi, lat_buf); 1727 if (ret) 1728 return ret; 1729 1730 ret = vdec_av1_slice_setup_pfc(instance, pfc); 1731 if (ret) 1732 return ret; 1733 1734 ret = vdec_av1_slice_setup_tile_group(instance, vsi); 1735 if (ret) 1736 return ret; 1737 1738 ret = vdec_av1_slice_alloc_working_buffer(instance, vsi); 1739 if (ret) 1740 return ret; 1741 1742 vdec_av1_slice_setup_seg_buffer(instance, vsi); 1743 vdec_av1_slice_setup_tile_buffer(instance, vsi, bs); 1744 vdec_av1_slice_setup_lat_buffer(instance, vsi, bs, lat_buf); 1745 1746 return 0; 1747 } 1748 vdec_av1_slice_update_lat(struct vdec_av1_slice_instance * instance,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1749 static int vdec_av1_slice_update_lat(struct vdec_av1_slice_instance *instance, 1750 struct vdec_lat_buf *lat_buf, 1751 struct vdec_av1_slice_pfc *pfc) 1752 { 1753 struct vdec_av1_slice_vsi *vsi; 1754 1755 vsi = &pfc->vsi; 1756 mtk_vdec_debug(instance->ctx, "frame %u LAT CRC 0x%08x, output size is %d\n", 1757 pfc->seq, vsi->state.crc[0], vsi->state.out_size); 1758 1759 /* buffer full, need to re-decode */ 1760 if (vsi->state.full) { 1761 /* buffer not enough */ 1762 if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == vsi->ube.size) 1763 return -ENOMEM; 1764 return -EAGAIN; 1765 } 1766 1767 instance->width = vsi->frame.uh.upscaled_width; 1768 instance->height = vsi->frame.uh.frame_height; 1769 instance->frame_type = vsi->frame.uh.frame_type; 1770 1771 return 0; 1772 } 1773 vdec_av1_slice_setup_core_to_dst_buf(struct vdec_av1_slice_instance * instance,struct vdec_lat_buf * lat_buf)1774 static int vdec_av1_slice_setup_core_to_dst_buf(struct vdec_av1_slice_instance *instance, 1775 struct vdec_lat_buf *lat_buf) 1776 { 1777 struct vb2_v4l2_buffer *dst; 1778 1779 dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx); 1780 if (!dst) 1781 return -EINVAL; 1782 1783 v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true); 1784 1785 return 0; 1786 } 1787 vdec_av1_slice_setup_core_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_pfc * pfc,struct vdec_av1_slice_vsi * vsi,struct vdec_fb * fb,struct vdec_lat_buf * lat_buf)1788 static int vdec_av1_slice_setup_core_buffer(struct vdec_av1_slice_instance *instance, 1789 struct vdec_av1_slice_pfc *pfc, 1790 struct vdec_av1_slice_vsi *vsi, 1791 struct vdec_fb *fb, 1792 struct vdec_lat_buf *lat_buf) 1793 { 1794 struct vb2_buffer *vb; 1795 struct vb2_queue *vq; 1796 int w, h, plane, size; 1797 int i; 1798 1799 plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; 1800 w = vsi->frame.uh.upscaled_width; 1801 h = vsi->frame.uh.frame_height; 1802 size = ALIGN(w, VCODEC_DEC_ALIGNED_64) * ALIGN(h, VCODEC_DEC_ALIGNED_64); 1803 1804 /* frame buffer */ 1805 vsi->fb.y.dma_addr = fb->base_y.dma_addr; 1806 if (plane == 1) 1807 vsi->fb.c.dma_addr = fb->base_y.dma_addr + size; 1808 else 1809 vsi->fb.c.dma_addr = fb->base_c.dma_addr; 1810 1811 /* reference buffers */ 1812 vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); 1813 if (!vq) 1814 return -EINVAL; 1815 1816 /* get current output buffer */ 1817 vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf; 1818 if (!vb) 1819 return -EINVAL; 1820 1821 /* get buffer address from vb2buf */ 1822 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) { 1823 struct vdec_av1_slice_fb *vref = &vsi->ref[i]; 1824 1825 vb = vb2_find_buffer(vq, pfc->ref_idx[i]); 1826 if (!vb) { 1827 memset(vref, 0, sizeof(*vref)); 1828 continue; 1829 } 1830 1831 vref->y.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0); 1832 if (plane == 1) 1833 vref->c.dma_addr = vref->y.dma_addr + size; 1834 else 1835 vref->c.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 1); 1836 } 1837 vsi->tile.dma_addr = lat_buf->tile_addr.dma_addr; 1838 vsi->tile.size = lat_buf->tile_addr.size; 1839 1840 return 0; 1841 } 1842 vdec_av1_slice_setup_core(struct vdec_av1_slice_instance * instance,struct vdec_fb * fb,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1843 static int vdec_av1_slice_setup_core(struct vdec_av1_slice_instance *instance, 1844 struct vdec_fb *fb, 1845 struct vdec_lat_buf *lat_buf, 1846 struct vdec_av1_slice_pfc *pfc) 1847 { 1848 struct vdec_av1_slice_vsi *vsi = &pfc->vsi; 1849 int ret; 1850 1851 ret = vdec_av1_slice_setup_core_to_dst_buf(instance, lat_buf); 1852 if (ret) 1853 return ret; 1854 1855 ret = vdec_av1_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf); 1856 if (ret) 1857 return ret; 1858 1859 return 0; 1860 } 1861 vdec_av1_slice_update_core(struct vdec_av1_slice_instance * instance,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1862 static int vdec_av1_slice_update_core(struct vdec_av1_slice_instance *instance, 1863 struct vdec_lat_buf *lat_buf, 1864 struct vdec_av1_slice_pfc *pfc) 1865 { 1866 struct vdec_av1_slice_vsi *vsi = instance->core_vsi; 1867 1868 mtk_vdec_debug(instance->ctx, "frame %u Y_CRC %08x %08x %08x %08x\n", 1869 pfc->seq, vsi->state.crc[0], vsi->state.crc[1], 1870 vsi->state.crc[2], vsi->state.crc[3]); 1871 mtk_vdec_debug(instance->ctx, "frame %u C_CRC %08x %08x %08x %08x\n", 1872 pfc->seq, vsi->state.crc[8], vsi->state.crc[9], 1873 vsi->state.crc[10], vsi->state.crc[11]); 1874 1875 return 0; 1876 } 1877 vdec_av1_slice_init(struct mtk_vcodec_dec_ctx * ctx)1878 static int vdec_av1_slice_init(struct mtk_vcodec_dec_ctx *ctx) 1879 { 1880 struct vdec_av1_slice_instance *instance; 1881 struct vdec_av1_slice_init_vsi *vsi; 1882 int ret; 1883 1884 instance = kzalloc(sizeof(*instance), GFP_KERNEL); 1885 if (!instance) 1886 return -ENOMEM; 1887 1888 instance->ctx = ctx; 1889 instance->vpu.id = SCP_IPI_VDEC_LAT; 1890 instance->vpu.core_id = SCP_IPI_VDEC_CORE; 1891 instance->vpu.ctx = ctx; 1892 instance->vpu.codec_type = ctx->current_codec; 1893 1894 ret = vpu_dec_init(&instance->vpu); 1895 if (ret) { 1896 mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret); 1897 goto error_vpu_init; 1898 } 1899 1900 /* init vsi and global flags */ 1901 vsi = instance->vpu.vsi; 1902 if (!vsi) { 1903 mtk_vdec_err(ctx, "failed to get AV1 vsi\n"); 1904 ret = -EINVAL; 1905 goto error_vsi; 1906 } 1907 instance->init_vsi = vsi; 1908 instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, (u32)vsi->core_vsi); 1909 1910 if (!instance->core_vsi) { 1911 mtk_vdec_err(ctx, "failed to get AV1 core vsi\n"); 1912 ret = -EINVAL; 1913 goto error_vsi; 1914 } 1915 1916 if (vsi->vsi_size != sizeof(struct vdec_av1_slice_vsi)) 1917 mtk_vdec_err(ctx, "remote vsi size 0x%x mismatch! expected: 0x%zx\n", 1918 vsi->vsi_size, sizeof(struct vdec_av1_slice_vsi)); 1919 1920 instance->irq_enabled = 1; 1921 instance->inneracing_mode = IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability); 1922 1923 mtk_vdec_debug(ctx, "vsi 0x%p core_vsi 0x%llx 0x%p, inneracing_mode %d\n", 1924 vsi, vsi->core_vsi, instance->core_vsi, instance->inneracing_mode); 1925 1926 ret = vdec_av1_slice_init_cdf_table(instance); 1927 if (ret) 1928 goto error_vsi; 1929 1930 ret = vdec_av1_slice_init_iq_table(instance); 1931 if (ret) 1932 goto error_vsi; 1933 1934 ctx->drv_handle = instance; 1935 1936 return 0; 1937 error_vsi: 1938 vpu_dec_deinit(&instance->vpu); 1939 error_vpu_init: 1940 kfree(instance); 1941 1942 return ret; 1943 } 1944 vdec_av1_slice_deinit(void * h_vdec)1945 static void vdec_av1_slice_deinit(void *h_vdec) 1946 { 1947 struct vdec_av1_slice_instance *instance = h_vdec; 1948 1949 if (!instance) 1950 return; 1951 mtk_vdec_debug(instance->ctx, "h_vdec 0x%p\n", h_vdec); 1952 vpu_dec_deinit(&instance->vpu); 1953 vdec_av1_slice_free_working_buffer(instance); 1954 vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx); 1955 kfree(instance); 1956 } 1957 vdec_av1_slice_flush(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)1958 static int vdec_av1_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs, 1959 struct vdec_fb *fb, bool *res_chg) 1960 { 1961 struct vdec_av1_slice_instance *instance = h_vdec; 1962 int i; 1963 1964 mtk_vdec_debug(instance->ctx, "flush ...\n"); 1965 1966 vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue); 1967 1968 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) 1969 vdec_av1_slice_clear_fb(&instance->slots.frame_info[i]); 1970 1971 return vpu_dec_reset(&instance->vpu); 1972 } 1973 vdec_av1_slice_get_pic_info(struct vdec_av1_slice_instance * instance)1974 static void vdec_av1_slice_get_pic_info(struct vdec_av1_slice_instance *instance) 1975 { 1976 struct mtk_vcodec_dec_ctx *ctx = instance->ctx; 1977 u32 data[3]; 1978 1979 mtk_vdec_debug(ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h); 1980 1981 data[0] = ctx->picinfo.pic_w; 1982 data[1] = ctx->picinfo.pic_h; 1983 data[2] = ctx->capture_fourcc; 1984 vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO); 1985 1986 ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64); 1987 ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64); 1988 ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0]; 1989 ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1]; 1990 } 1991 vdec_av1_slice_get_dpb_size(struct vdec_av1_slice_instance * instance,u32 * dpb_sz)1992 static inline void vdec_av1_slice_get_dpb_size(struct vdec_av1_slice_instance *instance, 1993 u32 *dpb_sz) 1994 { 1995 /* refer av1 specification */ 1996 *dpb_sz = V4L2_AV1_TOTAL_REFS_PER_FRAME + 1; 1997 } 1998 vdec_av1_slice_get_crop_info(struct vdec_av1_slice_instance * instance,struct v4l2_rect * cr)1999 static void vdec_av1_slice_get_crop_info(struct vdec_av1_slice_instance *instance, 2000 struct v4l2_rect *cr) 2001 { 2002 struct mtk_vcodec_dec_ctx *ctx = instance->ctx; 2003 2004 cr->left = 0; 2005 cr->top = 0; 2006 cr->width = ctx->picinfo.pic_w; 2007 cr->height = ctx->picinfo.pic_h; 2008 2009 mtk_vdec_debug(ctx, "l=%d, t=%d, w=%d, h=%d\n", 2010 cr->left, cr->top, cr->width, cr->height); 2011 } 2012 vdec_av1_slice_get_param(void * h_vdec,enum vdec_get_param_type type,void * out)2013 static int vdec_av1_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) 2014 { 2015 struct vdec_av1_slice_instance *instance = h_vdec; 2016 2017 switch (type) { 2018 case GET_PARAM_PIC_INFO: 2019 vdec_av1_slice_get_pic_info(instance); 2020 break; 2021 case GET_PARAM_DPB_SIZE: 2022 vdec_av1_slice_get_dpb_size(instance, out); 2023 break; 2024 case GET_PARAM_CROP_INFO: 2025 vdec_av1_slice_get_crop_info(instance, out); 2026 break; 2027 default: 2028 mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type); 2029 return -EINVAL; 2030 } 2031 2032 return 0; 2033 } 2034 vdec_av1_slice_lat_decode(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)2035 static int vdec_av1_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs, 2036 struct vdec_fb *fb, bool *res_chg) 2037 { 2038 struct vdec_av1_slice_instance *instance = h_vdec; 2039 struct vdec_lat_buf *lat_buf; 2040 struct vdec_av1_slice_pfc *pfc; 2041 struct vdec_av1_slice_vsi *vsi; 2042 struct mtk_vcodec_dec_ctx *ctx; 2043 int ret; 2044 2045 if (!instance || !instance->ctx) 2046 return -EINVAL; 2047 2048 ctx = instance->ctx; 2049 /* init msgQ for the first time */ 2050 if (vdec_msg_queue_init(&ctx->msg_queue, ctx, 2051 vdec_av1_slice_core_decode, sizeof(*pfc))) { 2052 mtk_vdec_err(ctx, "failed to init AV1 msg queue\n"); 2053 return -ENOMEM; 2054 } 2055 2056 /* bs NULL means flush decoder */ 2057 if (!bs) 2058 return vdec_av1_slice_flush(h_vdec, bs, fb, res_chg); 2059 2060 lat_buf = vdec_msg_queue_dqbuf(&ctx->msg_queue.lat_ctx); 2061 if (!lat_buf) { 2062 mtk_vdec_err(ctx, "failed to get AV1 lat buf\n"); 2063 return -EAGAIN; 2064 } 2065 pfc = (struct vdec_av1_slice_pfc *)lat_buf->private_data; 2066 if (!pfc) { 2067 ret = -EINVAL; 2068 goto err_free_fb_out; 2069 } 2070 vsi = &pfc->vsi; 2071 2072 ret = vdec_av1_slice_setup_lat(instance, bs, lat_buf, pfc); 2073 if (ret) { 2074 mtk_vdec_err(ctx, "failed to setup AV1 lat ret %d\n", ret); 2075 goto err_free_fb_out; 2076 } 2077 2078 vdec_av1_slice_vsi_to_remote(vsi, instance->vsi); 2079 ret = vpu_dec_start(&instance->vpu, NULL, 0); 2080 if (ret) { 2081 mtk_vdec_err(ctx, "failed to dec AV1 ret %d\n", ret); 2082 goto err_free_fb_out; 2083 } 2084 if (instance->inneracing_mode) 2085 vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf); 2086 2087 if (instance->irq_enabled) { 2088 ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, 2089 WAIT_INTR_TIMEOUT_MS, 2090 MTK_VDEC_LAT0); 2091 /* update remote vsi if decode timeout */ 2092 if (ret) { 2093 mtk_vdec_err(ctx, "AV1 Frame %d decode timeout %d\n", pfc->seq, ret); 2094 WRITE_ONCE(instance->vsi->state.timeout, 1); 2095 } 2096 vpu_dec_end(&instance->vpu); 2097 } 2098 2099 vdec_av1_slice_vsi_from_remote(vsi, instance->vsi); 2100 ret = vdec_av1_slice_update_lat(instance, lat_buf, pfc); 2101 2102 /* LAT trans full, re-decode */ 2103 if (ret == -EAGAIN) { 2104 mtk_vdec_err(ctx, "AV1 Frame %d trans full\n", pfc->seq); 2105 if (!instance->inneracing_mode) 2106 vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); 2107 return 0; 2108 } 2109 2110 /* LAT trans full, no more UBE or decode timeout */ 2111 if (ret == -ENOMEM || vsi->state.timeout) { 2112 mtk_vdec_err(ctx, "AV1 Frame %d insufficient buffer or timeout\n", pfc->seq); 2113 if (!instance->inneracing_mode) 2114 vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); 2115 return -EBUSY; 2116 } 2117 vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr; 2118 mtk_vdec_debug(ctx, "lat dma 1 0x%pad 0x%pad\n", 2119 &pfc->vsi.trans.dma_addr, &pfc->vsi.trans.dma_addr_end); 2120 2121 vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi->trans.dma_addr_end); 2122 2123 if (!instance->inneracing_mode) 2124 vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf); 2125 memcpy(&instance->slots, &vsi->slots, sizeof(instance->slots)); 2126 2127 return 0; 2128 2129 err_free_fb_out: 2130 vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); 2131 2132 if (pfc) 2133 mtk_vdec_err(ctx, "slice dec number: %d err: %d", pfc->seq, ret); 2134 2135 return ret; 2136 } 2137 vdec_av1_slice_core_decode(struct vdec_lat_buf * lat_buf)2138 static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf) 2139 { 2140 struct vdec_av1_slice_instance *instance; 2141 struct vdec_av1_slice_pfc *pfc; 2142 struct mtk_vcodec_dec_ctx *ctx = NULL; 2143 struct vdec_fb *fb = NULL; 2144 int ret = -EINVAL; 2145 2146 if (!lat_buf) 2147 return -EINVAL; 2148 2149 pfc = lat_buf->private_data; 2150 ctx = lat_buf->ctx; 2151 if (!pfc || !ctx) 2152 return -EINVAL; 2153 2154 instance = ctx->drv_handle; 2155 if (!instance) 2156 goto err; 2157 2158 fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); 2159 if (!fb) { 2160 ret = -EBUSY; 2161 goto err; 2162 } 2163 2164 ret = vdec_av1_slice_setup_core(instance, fb, lat_buf, pfc); 2165 if (ret) { 2166 mtk_vdec_err(ctx, "vdec_av1_slice_setup_core\n"); 2167 goto err; 2168 } 2169 vdec_av1_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi); 2170 ret = vpu_dec_core(&instance->vpu); 2171 if (ret) { 2172 mtk_vdec_err(ctx, "vpu_dec_core\n"); 2173 goto err; 2174 } 2175 2176 if (instance->irq_enabled) { 2177 ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, 2178 WAIT_INTR_TIMEOUT_MS, 2179 MTK_VDEC_CORE); 2180 /* update remote vsi if decode timeout */ 2181 if (ret) { 2182 mtk_vdec_err(ctx, "AV1 frame %d core timeout\n", pfc->seq); 2183 WRITE_ONCE(instance->vsi->state.timeout, 1); 2184 } 2185 vpu_dec_core_end(&instance->vpu); 2186 } 2187 2188 ret = vdec_av1_slice_update_core(instance, lat_buf, pfc); 2189 if (ret) { 2190 mtk_vdec_err(ctx, "vdec_av1_slice_update_core\n"); 2191 goto err; 2192 } 2193 2194 mtk_vdec_debug(ctx, "core dma_addr_end 0x%pad\n", 2195 &instance->core_vsi->trans.dma_addr_end); 2196 vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, instance->core_vsi->trans.dma_addr_end); 2197 2198 ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req); 2199 2200 return 0; 2201 2202 err: 2203 /* always update read pointer */ 2204 vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end); 2205 2206 if (fb) 2207 ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req); 2208 2209 return ret; 2210 } 2211 2212 const struct vdec_common_if vdec_av1_slice_lat_if = { 2213 .init = vdec_av1_slice_init, 2214 .decode = vdec_av1_slice_lat_decode, 2215 .get_param = vdec_av1_slice_get_param, 2216 .deinit = vdec_av1_slice_deinit, 2217 }; 2218