xref: /openbmc/linux/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_av1_req_lat_if.c (revision ee1cd5048959de496cd005c50b137212a5b62062)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Copyright (c) 2023 MediaTek Inc.
4   * Author: Xiaoyong Lu <xiaoyong.lu@mediatek.com>
5   */
6  
7  #include <linux/module.h>
8  #include <linux/slab.h>
9  #include <media/videobuf2-dma-contig.h>
10  
11  #include "../mtk_vcodec_dec.h"
12  #include "../../common/mtk_vcodec_intr.h"
13  #include "../vdec_drv_base.h"
14  #include "../vdec_drv_if.h"
15  #include "../vdec_vpu_if.h"
16  
17  #define AV1_MAX_FRAME_BUF_COUNT		(V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
18  #define AV1_TILE_BUF_SIZE		64
19  #define AV1_SCALE_SUBPEL_BITS		10
20  #define AV1_REF_SCALE_SHIFT		14
21  #define AV1_REF_NO_SCALE		BIT(AV1_REF_SCALE_SHIFT)
22  #define AV1_REF_INVALID_SCALE		-1
23  #define AV1_CDF_TABLE_BUFFER_SIZE	16384
24  #define AV1_PRIMARY_REF_NONE		7
25  
26  #define AV1_INVALID_IDX			-1
27  
28  #define AV1_DIV_ROUND_UP_POW2(value, n)			\
29  ({							\
30  	typeof(n) _n  = n;				\
31  	typeof(value) _value = value;			\
32  	(_value + (BIT(_n) >> 1)) >> _n;		\
33  })
34  
35  #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
36  ({									\
37  	typeof(n) _n_  = n;						\
38  	typeof(value) _value_ = value;					\
39  	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
40  		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
41  })
42  
43  #define BIT_FLAG(x, bit)		(!!((x)->flags & (bit)))
44  #define SEGMENTATION_FLAG(x, name)	(!!((x)->flags & V4L2_AV1_SEGMENTATION_FLAG_##name))
45  #define QUANT_FLAG(x, name)		(!!((x)->flags & V4L2_AV1_QUANTIZATION_FLAG_##name))
46  #define SEQUENCE_FLAG(x, name)		(!!((x)->flags & V4L2_AV1_SEQUENCE_FLAG_##name))
47  #define FH_FLAG(x, name)		(!!((x)->flags & V4L2_AV1_FRAME_FLAG_##name))
48  
49  #define MINQ 0
50  #define MAXQ 255
51  
52  #define DIV_LUT_PREC_BITS 14
53  #define DIV_LUT_BITS 8
54  #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
55  #define WARP_PARAM_REDUCE_BITS 6
56  #define WARPEDMODEL_PREC_BITS 16
57  
58  #define SEG_LVL_ALT_Q 0
59  #define SECONDARY_FILTER_STRENGTH_NUM_BITS 2
60  
61  static const short div_lut[DIV_LUT_NUM + 1] = {
62  	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
63  	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
64  	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
65  	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
66  	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
67  	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
68  	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
69  	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
70  	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
71  	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
72  	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
73  	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
74  	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
75  	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
76  	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
77  	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
78  	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
79  	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
80  	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
81  	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
82  	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
83  	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
84  	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
85  	8240,  8224,  8208,  8192,
86  };
87  
88  /**
89   * struct vdec_av1_slice_init_vsi - VSI used to initialize instance
90   * @architecture:	architecture type
91   * @reserved:		reserved
92   * @core_vsi:		for core vsi
93   * @cdf_table_addr:	cdf table addr
94   * @cdf_table_size:	cdf table size
95   * @iq_table_addr:	iq table addr
96   * @iq_table_size:	iq table size
97   * @vsi_size:		share vsi structure size
98   */
99  struct vdec_av1_slice_init_vsi {
100  	u32 architecture;
101  	u32 reserved;
102  	u64 core_vsi;
103  	u64 cdf_table_addr;
104  	u32 cdf_table_size;
105  	u64 iq_table_addr;
106  	u32 iq_table_size;
107  	u32 vsi_size;
108  };
109  
110  /**
111   * struct vdec_av1_slice_mem - memory address and size
112   * @buf:		dma_addr padding
113   * @dma_addr:		buffer address
114   * @size:		buffer size
115   * @dma_addr_end:	buffer end address
116   * @padding:		for padding
117   */
118  struct vdec_av1_slice_mem {
119  	union {
120  		u64 buf;
121  		dma_addr_t dma_addr;
122  	};
123  	union {
124  		size_t size;
125  		dma_addr_t dma_addr_end;
126  		u64 padding;
127  	};
128  };
129  
130  /**
131   * struct vdec_av1_slice_state - decoding state
132   * @err                   : err type for decode
133   * @full                  : transcoded buffer is full or not
134   * @timeout               : decode timeout or not
135   * @perf                  : performance enable
136   * @crc                   : hw checksum
137   * @out_size              : hw output size
138   */
139  struct vdec_av1_slice_state {
140  	int err;
141  	u32 full;
142  	u32 timeout;
143  	u32 perf;
144  	u32 crc[16];
145  	u32 out_size;
146  };
147  
148  /*
149   * enum vdec_av1_slice_resolution_level - resolution level
150   */
151  enum vdec_av1_slice_resolution_level {
152  	AV1_RES_NONE,
153  	AV1_RES_FHD,
154  	AV1_RES_4K,
155  	AV1_RES_8K,
156  };
157  
158  /*
159   * enum vdec_av1_slice_frame_type - av1 frame type
160   */
161  enum vdec_av1_slice_frame_type {
162  	AV1_KEY_FRAME = 0,
163  	AV1_INTER_FRAME,
164  	AV1_INTRA_ONLY_FRAME,
165  	AV1_SWITCH_FRAME,
166  	AV1_FRAME_TYPES,
167  };
168  
169  /*
170   * enum vdec_av1_slice_reference_mode - reference mode type
171   */
172  enum vdec_av1_slice_reference_mode {
173  	AV1_SINGLE_REFERENCE = 0,
174  	AV1_COMPOUND_REFERENCE,
175  	AV1_REFERENCE_MODE_SELECT,
176  	AV1_REFERENCE_MODES,
177  };
178  
179  /**
180   * struct vdec_av1_slice_tile_group - info for each tile
181   * @num_tiles:			tile number
182   * @tile_size:			input size for each tile
183   * @tile_start_offset:		tile offset to input buffer
184   */
185  struct vdec_av1_slice_tile_group {
186  	u32 num_tiles;
187  	u32 tile_size[V4L2_AV1_MAX_TILE_COUNT];
188  	u32 tile_start_offset[V4L2_AV1_MAX_TILE_COUNT];
189  };
190  
191  /**
192   * struct vdec_av1_slice_scale_factors - scale info for each ref frame
193   * @is_scaled:  frame is scaled or not
194   * @x_scale:    frame width scale coefficient
195   * @y_scale:    frame height scale coefficient
196   * @x_step:     width step for x_scale
197   * @y_step:     height step for y_scale
198   */
199  struct vdec_av1_slice_scale_factors {
200  	u8 is_scaled;
201  	int x_scale;
202  	int y_scale;
203  	int x_step;
204  	int y_step;
205  };
206  
207  /**
208   * struct vdec_av1_slice_frame_refs - ref frame info
209   * @ref_fb_idx:         ref slot index
210   * @ref_map_idx:        ref frame index
211   * @scale_factors:      scale factors for each ref frame
212   */
213  struct vdec_av1_slice_frame_refs {
214  	int ref_fb_idx;
215  	int ref_map_idx;
216  	struct vdec_av1_slice_scale_factors scale_factors;
217  };
218  
219  /**
220   * struct vdec_av1_slice_gm - AV1 Global Motion parameters
221   * @wmtype:     The type of global motion transform used
222   * @wmmat:      gm_params
223   * @alpha:      alpha info
224   * @beta:       beta info
225   * @gamma:      gamma info
226   * @delta:      delta info
227   * @invalid:    is invalid or not
228   */
229  struct vdec_av1_slice_gm {
230  	int wmtype;
231  	int wmmat[8];
232  	short alpha;
233  	short beta;
234  	short gamma;
235  	short delta;
236  	char invalid;
237  };
238  
239  /**
240   * struct vdec_av1_slice_sm - AV1 Skip Mode parameters
241   * @skip_mode_allowed:  Skip Mode is allowed or not
242   * @skip_mode_present:  specified that the skip_mode will be present or not
243   * @skip_mode_frame:    specifies the frames to use for compound prediction
244   */
245  struct vdec_av1_slice_sm {
246  	u8 skip_mode_allowed;
247  	u8 skip_mode_present;
248  	int skip_mode_frame[2];
249  };
250  
251  /**
252   * struct vdec_av1_slice_seg - AV1 Segmentation params
253   * @segmentation_enabled:        this frame makes use of the segmentation tool or not
254   * @segmentation_update_map:     segmentation map are updated during the decoding frame
255   * @segmentation_temporal_update:segmentation map are coded relative the existing segmentaion map
256   * @segmentation_update_data:    new parameters are about to be specified for each segment
257   * @feature_data:                specifies the feature data for a segment feature
258   * @feature_enabled_mask:        the corresponding feature value is coded or not.
259   * @segid_preskip:               segment id will be read before the skip syntax element.
260   * @last_active_segid:           the highest numbered segment id that has some enabled feature
261   */
262  struct vdec_av1_slice_seg {
263  	u8 segmentation_enabled;
264  	u8 segmentation_update_map;
265  	u8 segmentation_temporal_update;
266  	u8 segmentation_update_data;
267  	int feature_data[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX];
268  	u16 feature_enabled_mask[V4L2_AV1_MAX_SEGMENTS];
269  	int segid_preskip;
270  	int last_active_segid;
271  };
272  
273  /**
274   * struct vdec_av1_slice_delta_q_lf - AV1 Loop Filter delta parameters
275   * @delta_q_present:    specified whether quantizer index delta values are present
276   * @delta_q_res:        specifies the left shift which should be applied to decoded quantizer index
277   * @delta_lf_present:   specifies whether loop filter delta values are present
278   * @delta_lf_res:       specifies the left shift which should be applied to decoded
279   *                      loop filter delta values
280   * @delta_lf_multi:     specifies that separate loop filter deltas are sent for horizontal
281   *                      luma edges,vertical luma edges,the u edges, and the v edges.
282   */
283  struct vdec_av1_slice_delta_q_lf {
284  	u8 delta_q_present;
285  	u8 delta_q_res;
286  	u8 delta_lf_present;
287  	u8 delta_lf_res;
288  	u8 delta_lf_multi;
289  };
290  
291  /**
292   * struct vdec_av1_slice_quantization - AV1 Quantization params
293   * @base_q_idx:         indicates the base frame qindex. This is used for Y AC
294   *                      coefficients and as the base value for the other quantizers.
295   * @qindex:             qindex
296   * @delta_qydc:         indicates the Y DC quantizer relative to base_q_idx
297   * @delta_qudc:         indicates the U DC quantizer relative to base_q_idx.
298   * @delta_quac:         indicates the U AC quantizer relative to base_q_idx
299   * @delta_qvdc:         indicates the V DC quantizer relative to base_q_idx
300   * @delta_qvac:         indicates the V AC quantizer relative to base_q_idx
301   * @using_qmatrix:      specifies that the quantizer matrix will be used to
302   *                      compute quantizers
303   * @qm_y:               specifies the level in the quantizer matrix that should
304   *                      be used for luma plane decoding
305   * @qm_u:               specifies the level in the quantizer matrix that should
306   *                      be used for chroma U plane decoding.
307   * @qm_v:               specifies the level in the quantizer matrix that should be
308   *                      used for chroma V plane decoding
309   */
310  struct vdec_av1_slice_quantization {
311  	int base_q_idx;
312  	int qindex[V4L2_AV1_MAX_SEGMENTS];
313  	int delta_qydc;
314  	int delta_qudc;
315  	int delta_quac;
316  	int delta_qvdc;
317  	int delta_qvac;
318  	u8 using_qmatrix;
319  	u8 qm_y;
320  	u8 qm_u;
321  	u8 qm_v;
322  };
323  
324  /**
325   * struct vdec_av1_slice_lr - AV1 Loop Restauration parameters
326   * @use_lr:                     whether to use loop restoration
327   * @use_chroma_lr:              whether to use chroma loop restoration
328   * @frame_restoration_type:     specifies the type of restoration used for each plane
329   * @loop_restoration_size:      pecifies the size of loop restoration units in units
330   *                              of samples in the current plane
331   */
332  struct vdec_av1_slice_lr {
333  	u8 use_lr;
334  	u8 use_chroma_lr;
335  	u8 frame_restoration_type[V4L2_AV1_NUM_PLANES_MAX];
336  	u32 loop_restoration_size[V4L2_AV1_NUM_PLANES_MAX];
337  };
338  
339  /**
340   * struct vdec_av1_slice_loop_filter - AV1 Loop filter parameters
341   * @loop_filter_level:          an array containing loop filter strength values.
342   * @loop_filter_ref_deltas:     contains the adjustment needed for the filter
343   *                              level based on the chosen reference frame
344   * @loop_filter_mode_deltas:    contains the adjustment needed for the filter
345   *                              level based on the chosen mode
346   * @loop_filter_sharpness:      indicates the sharpness level. The loop_filter_level
347   *                              and loop_filter_sharpness together determine when
348   *                              a block edge is filtered, and by how much the
349   *                              filtering can change the sample values
350   * @loop_filter_delta_enabled:  filetr level depends on the mode and reference
351   *                              frame used to predict a block
352   */
353  struct vdec_av1_slice_loop_filter {
354  	u8 loop_filter_level[4];
355  	int loop_filter_ref_deltas[V4L2_AV1_TOTAL_REFS_PER_FRAME];
356  	int loop_filter_mode_deltas[2];
357  	u8 loop_filter_sharpness;
358  	u8 loop_filter_delta_enabled;
359  };
360  
361  /**
362   * struct vdec_av1_slice_cdef - AV1 CDEF parameters
363   * @cdef_damping:       controls the amount of damping in the deringing filter
364   * @cdef_y_strength:    specifies the strength of the primary filter and secondary filter
365   * @cdef_uv_strength:   specifies the strength of the primary filter and secondary filter
366   * @cdef_bits:          specifies the number of bits needed to specify which
367   *                      CDEF filter to apply
368   */
369  struct vdec_av1_slice_cdef {
370  	u8 cdef_damping;
371  	u8 cdef_y_strength[8];
372  	u8 cdef_uv_strength[8];
373  	u8 cdef_bits;
374  };
375  
376  /**
377   * struct vdec_av1_slice_mfmv - AV1 mfmv parameters
378   * @mfmv_valid_ref:     mfmv_valid_ref
379   * @mfmv_dir:           mfmv_dir
380   * @mfmv_ref_to_cur:    mfmv_ref_to_cur
381   * @mfmv_ref_frame_idx: mfmv_ref_frame_idx
382   * @mfmv_count:         mfmv_count
383   */
384  struct vdec_av1_slice_mfmv {
385  	u32 mfmv_valid_ref[3];
386  	u32 mfmv_dir[3];
387  	int mfmv_ref_to_cur[3];
388  	int mfmv_ref_frame_idx[3];
389  	int mfmv_count;
390  };
391  
392  /**
393   * struct vdec_av1_slice_tile - AV1 Tile info
394   * @tile_cols:                  specifies the number of tiles across the frame
395   * @tile_rows:                  pecifies the number of tiles down the frame
396   * @mi_col_starts:              an array specifying the start column
397   * @mi_row_starts:              an array specifying the start row
398   * @context_update_tile_id:     specifies which tile to use for the CDF update
399   * @uniform_tile_spacing_flag:  tiles are uniformly spaced across the frame
400   *                              or the tile sizes are coded
401   */
402  struct vdec_av1_slice_tile {
403  	u8 tile_cols;
404  	u8 tile_rows;
405  	int mi_col_starts[V4L2_AV1_MAX_TILE_COLS + 1];
406  	int mi_row_starts[V4L2_AV1_MAX_TILE_ROWS + 1];
407  	u8 context_update_tile_id;
408  	u8 uniform_tile_spacing_flag;
409  };
410  
411  /**
412   * struct vdec_av1_slice_uncompressed_header - Represents an AV1 Frame Header OBU
413   * @use_ref_frame_mvs:          use_ref_frame_mvs flag
414   * @order_hint:                 specifies OrderHintBits least significant bits of the expected
415   * @gm:                         global motion param
416   * @upscaled_width:             the upscaled width
417   * @frame_width:                frame's width
418   * @frame_height:               frame's height
419   * @reduced_tx_set:             frame is restricted to a reduced subset of the full
420   *                              set of transform types
421   * @tx_mode:                    specifies how the transform size is determined
422   * @uniform_tile_spacing_flag:  tiles are uniformly spaced across the frame
423   *                              or the tile sizes are coded
424   * @interpolation_filter:       specifies the filter selection used for performing inter prediction
425   * @allow_warped_motion:        motion_mode may be present or not
426   * @is_motion_mode_switchable : euqlt to 0 specifies that only the SIMPLE motion mode will be used
427   * @reference_mode :            frame reference mode selected
428   * @allow_high_precision_mv:    specifies that motion vectors are specified to
429   *                              quarter pel precision or to eighth pel precision
430   * @allow_intra_bc:             ubducates that intra block copy may be used in this frame
431   * @force_integer_mv:           specifies motion vectors will always be integers or
432   *                              can contain fractional bits
433   * @allow_screen_content_tools: intra blocks may use palette encoding
434   * @error_resilient_mode:       error resislent mode is enable/disable
435   * @frame_type:                 specifies the AV1 frame type
436   * @primary_ref_frame:          specifies which reference frame contains the CDF values
437   *                              and other state that should be loaded at the start of the frame
438   *                              slots will be updated with the current frame after it is decoded
439   * @disable_frame_end_update_cdf:indicates the end of frame CDF update is disable or enable
440   * @disable_cdf_update:         specified whether the CDF update in the symbol
441   *                              decoding process should be disables
442   * @skip_mode:                  av1 skip mode parameters
443   * @seg:                        av1 segmentaon parameters
444   * @delta_q_lf:                 av1 delta loop fileter
445   * @quant:                      av1 Quantization params
446   * @lr:                         av1 Loop Restauration parameters
447   * @superres_denom:             the denominator for the upscaling ratio
448   * @loop_filter:                av1 Loop filter parameters
449   * @cdef:                       av1 CDEF parameters
450   * @mfmv:                       av1 mfmv parameters
451   * @tile:                       av1 Tile info
452   * @frame_is_intra:             intra frame
453   * @loss_less_array:            loss less array
454   * @coded_loss_less:            coded lsss less
455   * @mi_rows:                    size of mi unit in rows
456   * @mi_cols:                    size of mi unit in cols
457   */
458  struct vdec_av1_slice_uncompressed_header {
459  	u8 use_ref_frame_mvs;
460  	int order_hint;
461  	struct vdec_av1_slice_gm gm[V4L2_AV1_TOTAL_REFS_PER_FRAME];
462  	u32 upscaled_width;
463  	u32 frame_width;
464  	u32 frame_height;
465  	u8 reduced_tx_set;
466  	u8 tx_mode;
467  	u8 uniform_tile_spacing_flag;
468  	u8 interpolation_filter;
469  	u8 allow_warped_motion;
470  	u8 is_motion_mode_switchable;
471  	u8 reference_mode;
472  	u8 allow_high_precision_mv;
473  	u8 allow_intra_bc;
474  	u8 force_integer_mv;
475  	u8 allow_screen_content_tools;
476  	u8 error_resilient_mode;
477  	u8 frame_type;
478  	u8 primary_ref_frame;
479  	u8 disable_frame_end_update_cdf;
480  	u32 disable_cdf_update;
481  	struct vdec_av1_slice_sm skip_mode;
482  	struct vdec_av1_slice_seg seg;
483  	struct vdec_av1_slice_delta_q_lf delta_q_lf;
484  	struct vdec_av1_slice_quantization quant;
485  	struct vdec_av1_slice_lr lr;
486  	u32 superres_denom;
487  	struct vdec_av1_slice_loop_filter loop_filter;
488  	struct vdec_av1_slice_cdef cdef;
489  	struct vdec_av1_slice_mfmv mfmv;
490  	struct vdec_av1_slice_tile tile;
491  	u8 frame_is_intra;
492  	u8 loss_less_array[V4L2_AV1_MAX_SEGMENTS];
493  	u8 coded_loss_less;
494  	u32 mi_rows;
495  	u32 mi_cols;
496  };
497  
498  /**
499   * struct vdec_av1_slice_seq_header - Represents an AV1 Sequence OBU
500   * @bitdepth:                   the bitdepth to use for the sequence
501   * @enable_superres:            specifies whether the use_superres syntax element may be present
502   * @enable_filter_intra:        specifies the use_filter_intra syntax element may be present
503   * @enable_intra_edge_filter:   whether the intra edge filtering process should be enabled
504   * @enable_interintra_compound: specifies the mode info fo rinter blocks may
505   *                              contain the syntax element interintra
506   * @enable_masked_compound:     specifies the mode info fo rinter blocks may
507   *                              contain the syntax element compound_type
508   * @enable_dual_filter:         the inter prediction filter type may be specified independently
509   * @enable_jnt_comp:            distance weights process may be used for inter prediction
510   * @mono_chrome:                indicates the video does not contain U and V color planes
511   * @enable_order_hint:          tools based on the values of order hints may be used
512   * @order_hint_bits:            the number of bits used for the order_hint field at each frame
513   * @use_128x128_superblock:     indicates superblocks contain 128*128 luma samples
514   * @subsampling_x:              the chroma subsamling format
515   * @subsampling_y:              the chroma subsamling format
516   * @max_frame_width:            the maximum frame width for the frames represented by sequence
517   * @max_frame_height:           the maximum frame height for the frames represented by sequence
518   */
519  struct vdec_av1_slice_seq_header {
520  	u8 bitdepth;
521  	u8 enable_superres;
522  	u8 enable_filter_intra;
523  	u8 enable_intra_edge_filter;
524  	u8 enable_interintra_compound;
525  	u8 enable_masked_compound;
526  	u8 enable_dual_filter;
527  	u8 enable_jnt_comp;
528  	u8 mono_chrome;
529  	u8 enable_order_hint;
530  	u8 order_hint_bits;
531  	u8 use_128x128_superblock;
532  	u8 subsampling_x;
533  	u8 subsampling_y;
534  	u32 max_frame_width;
535  	u32 max_frame_height;
536  };
537  
538  /**
539   * struct vdec_av1_slice_frame - Represents current Frame info
540   * @uh:                         uncompressed header info
541   * @seq:                        sequence header info
542   * @large_scale_tile:           is large scale mode
543   * @cur_ts:                     current frame timestamp
544   * @prev_fb_idx:                prev slot id
545   * @ref_frame_sign_bias:        arrays for ref_frame sign bias
546   * @order_hints:                arrays for ref_frame order hint
547   * @ref_frame_valid:            arrays for valid ref_frame
548   * @ref_frame_map:              map to slot frame info
549   * @frame_refs:                 ref_frame info
550   */
551  struct vdec_av1_slice_frame {
552  	struct vdec_av1_slice_uncompressed_header uh;
553  	struct vdec_av1_slice_seq_header seq;
554  	u8 large_scale_tile;
555  	u64 cur_ts;
556  	int prev_fb_idx;
557  	u8 ref_frame_sign_bias[V4L2_AV1_TOTAL_REFS_PER_FRAME];
558  	u32 order_hints[V4L2_AV1_REFS_PER_FRAME];
559  	u32 ref_frame_valid[V4L2_AV1_REFS_PER_FRAME];
560  	int ref_frame_map[V4L2_AV1_TOTAL_REFS_PER_FRAME];
561  	struct vdec_av1_slice_frame_refs frame_refs[V4L2_AV1_REFS_PER_FRAME];
562  };
563  
564  /**
565   * struct vdec_av1_slice_work_buffer - work buffer for lat
566   * @mv_addr:    mv buffer memory info
567   * @cdf_addr:   cdf buffer memory info
568   * @segid_addr: segid buffer memory info
569   */
570  struct vdec_av1_slice_work_buffer {
571  	struct vdec_av1_slice_mem mv_addr;
572  	struct vdec_av1_slice_mem cdf_addr;
573  	struct vdec_av1_slice_mem segid_addr;
574  };
575  
576  /**
577   * struct vdec_av1_slice_frame_info - frame info for each slot
578   * @frame_type:         frame type
579   * @frame_is_intra:     is intra frame
580   * @order_hint:         order hint
581   * @order_hints:        referece frame order hint
582   * @upscaled_width:     upscale width
583   * @pic_pitch:          buffer pitch
584   * @frame_width:        frane width
585   * @frame_height:       frame height
586   * @mi_rows:            rows in mode info
587   * @mi_cols:            cols in mode info
588   * @ref_count:          mark to reference frame counts
589   */
590  struct vdec_av1_slice_frame_info {
591  	u8 frame_type;
592  	u8 frame_is_intra;
593  	int order_hint;
594  	u32 order_hints[V4L2_AV1_REFS_PER_FRAME];
595  	u32 upscaled_width;
596  	u32 pic_pitch;
597  	u32 frame_width;
598  	u32 frame_height;
599  	u32 mi_rows;
600  	u32 mi_cols;
601  	int ref_count;
602  };
603  
604  /**
605   * struct vdec_av1_slice_slot - slot info that needs to be saved in the global instance
606   * @frame_info: frame info for each slot
607   * @timestamp:  time stamp info
608   */
609  struct vdec_av1_slice_slot {
610  	struct vdec_av1_slice_frame_info frame_info[AV1_MAX_FRAME_BUF_COUNT];
611  	u64 timestamp[AV1_MAX_FRAME_BUF_COUNT];
612  };
613  
614  /**
615   * struct vdec_av1_slice_fb - frame buffer for decoding
616   * @y:  current y buffer address info
617   * @c:  current c buffer address info
618   */
619  struct vdec_av1_slice_fb {
620  	struct vdec_av1_slice_mem y;
621  	struct vdec_av1_slice_mem c;
622  };
623  
624  /**
625   * struct vdec_av1_slice_vsi - exchange frame information between Main CPU and MicroP
626   * @bs:			input buffer info
627   * @work_buffer:	working buffe for hw
628   * @cdf_table:		cdf_table buffer
629   * @cdf_tmp:		cdf temp buffer
630   * @rd_mv:		mv buffer for lat output , core input
631   * @ube:		ube buffer
632   * @trans:		transcoded buffer
633   * @err_map:		err map buffer
634   * @row_info:		row info buffer
635   * @fb:			current y/c buffer
636   * @ref:		ref y/c buffer
637   * @iq_table:		iq table buffer
638   * @tile:		tile buffer
639   * @slots:		slots info for each frame
640   * @slot_id:		current frame slot id
641   * @frame:		current frame info
642   * @state:		status after decode done
643   * @cur_lst_tile_id:	tile id for large scale
644   */
645  struct vdec_av1_slice_vsi {
646  	/* lat */
647  	struct vdec_av1_slice_mem bs;
648  	struct vdec_av1_slice_work_buffer work_buffer[AV1_MAX_FRAME_BUF_COUNT];
649  	struct vdec_av1_slice_mem cdf_table;
650  	struct vdec_av1_slice_mem cdf_tmp;
651  	/* LAT stage's output, Core stage's input */
652  	struct vdec_av1_slice_mem rd_mv;
653  	struct vdec_av1_slice_mem ube;
654  	struct vdec_av1_slice_mem trans;
655  	struct vdec_av1_slice_mem err_map;
656  	struct vdec_av1_slice_mem row_info;
657  	/* core */
658  	struct vdec_av1_slice_fb fb;
659  	struct vdec_av1_slice_fb ref[V4L2_AV1_REFS_PER_FRAME];
660  	struct vdec_av1_slice_mem iq_table;
661  	/* lat and core share*/
662  	struct vdec_av1_slice_mem tile;
663  	struct vdec_av1_slice_slot slots;
664  	s8 slot_id;
665  	struct vdec_av1_slice_frame frame;
666  	struct vdec_av1_slice_state state;
667  	u32 cur_lst_tile_id;
668  };
669  
670  /**
671   * struct vdec_av1_slice_pfc - per-frame context that contains a local vsi.
672   *                             pass it from lat to core
673   * @vsi:        local vsi. copy to/from remote vsi before/after decoding
674   * @ref_idx:    reference buffer timestamp
675   * @seq:        picture sequence
676   */
677  struct vdec_av1_slice_pfc {
678  	struct vdec_av1_slice_vsi vsi;
679  	u64 ref_idx[V4L2_AV1_REFS_PER_FRAME];
680  	int seq;
681  };
682  
683  /**
684   * struct vdec_av1_slice_instance - represent one av1 instance
685   * @ctx:                pointer to codec's context
686   * @vpu:                VPU instance
687   * @iq_table:           iq table buffer
688   * @cdf_table:          cdf table buffer
689   * @mv:                 mv working buffer
690   * @cdf:                cdf working buffer
691   * @seg:                segmentation working buffer
692   * @cdf_temp:           cdf temp buffer
693   * @tile:               tile buffer
694   * @slots:              slots info
695   * @tile_group:         tile_group entry
696   * @level:              level of current resolution
697   * @width:              width of last picture
698   * @height:             height of last picture
699   * @frame_type:         frame_type of last picture
700   * @irq_enabled:        irq to Main CPU or MicroP
701   * @inneracing_mode:    is inneracing mode
702   * @init_vsi:           vsi used for initialized AV1 instance
703   * @vsi:                vsi used for decoding/flush ...
704   * @core_vsi:           vsi used for Core stage
705   * @seq:                global picture sequence
706   */
707  struct vdec_av1_slice_instance {
708  	struct mtk_vcodec_dec_ctx *ctx;
709  	struct vdec_vpu_inst vpu;
710  
711  	struct mtk_vcodec_mem iq_table;
712  	struct mtk_vcodec_mem cdf_table;
713  
714  	struct mtk_vcodec_mem mv[AV1_MAX_FRAME_BUF_COUNT];
715  	struct mtk_vcodec_mem cdf[AV1_MAX_FRAME_BUF_COUNT];
716  	struct mtk_vcodec_mem seg[AV1_MAX_FRAME_BUF_COUNT];
717  	struct mtk_vcodec_mem cdf_temp;
718  	struct mtk_vcodec_mem tile;
719  	struct vdec_av1_slice_slot slots;
720  	struct vdec_av1_slice_tile_group tile_group;
721  
722  	/* for resolution change and get_pic_info */
723  	enum vdec_av1_slice_resolution_level level;
724  	u32 width;
725  	u32 height;
726  
727  	u32 frame_type;
728  	u32 irq_enabled;
729  	u32 inneracing_mode;
730  
731  	/* MicroP vsi */
732  	union {
733  		struct vdec_av1_slice_init_vsi *init_vsi;
734  		struct vdec_av1_slice_vsi *vsi;
735  	};
736  	struct vdec_av1_slice_vsi *core_vsi;
737  	int seq;
738  };
739  
740  static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf);
741  
vdec_av1_slice_get_msb(u32 n)742  static inline int vdec_av1_slice_get_msb(u32 n)
743  {
744  	if (n == 0)
745  		return 0;
746  	return 31 ^ __builtin_clz(n);
747  }
748  
vdec_av1_slice_need_scale(u32 ref_width,u32 ref_height,u32 this_width,u32 this_height)749  static inline bool vdec_av1_slice_need_scale(u32 ref_width, u32 ref_height,
750  					     u32 this_width, u32 this_height)
751  {
752  	return ((this_width << 1) >= ref_width) &&
753  		((this_height << 1) >= ref_height) &&
754  		(this_width <= (ref_width << 4)) &&
755  		(this_height <= (ref_height << 4));
756  }
757  
vdec_av1_get_ctrl_ptr(struct mtk_vcodec_dec_ctx * ctx,int id)758  static void *vdec_av1_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id)
759  {
760  	struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id);
761  
762  	if (!ctrl)
763  		return ERR_PTR(-EINVAL);
764  
765  	return ctrl->p_cur.p;
766  }
767  
vdec_av1_slice_init_cdf_table(struct vdec_av1_slice_instance * instance)768  static int vdec_av1_slice_init_cdf_table(struct vdec_av1_slice_instance *instance)
769  {
770  	u8 *remote_cdf_table;
771  	struct mtk_vcodec_dec_ctx *ctx;
772  	struct vdec_av1_slice_init_vsi *vsi;
773  	int ret;
774  
775  	ctx = instance->ctx;
776  	vsi = instance->vpu.vsi;
777  	remote_cdf_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
778  						     (u32)vsi->cdf_table_addr);
779  	if (IS_ERR(remote_cdf_table)) {
780  		mtk_vdec_err(ctx, "failed to map cdf table\n");
781  		return PTR_ERR(remote_cdf_table);
782  	}
783  
784  	mtk_vdec_debug(ctx, "map cdf table to 0x%p\n", remote_cdf_table);
785  
786  	if (instance->cdf_table.va)
787  		mtk_vcodec_mem_free(ctx, &instance->cdf_table);
788  	instance->cdf_table.size = vsi->cdf_table_size;
789  
790  	ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_table);
791  	if (ret)
792  		return ret;
793  
794  	memcpy(instance->cdf_table.va, remote_cdf_table, vsi->cdf_table_size);
795  
796  	return 0;
797  }
798  
vdec_av1_slice_init_iq_table(struct vdec_av1_slice_instance * instance)799  static int vdec_av1_slice_init_iq_table(struct vdec_av1_slice_instance *instance)
800  {
801  	u8 *remote_iq_table;
802  	struct mtk_vcodec_dec_ctx *ctx;
803  	struct vdec_av1_slice_init_vsi *vsi;
804  	int ret;
805  
806  	ctx = instance->ctx;
807  	vsi = instance->vpu.vsi;
808  	remote_iq_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
809  						    (u32)vsi->iq_table_addr);
810  	if (IS_ERR(remote_iq_table)) {
811  		mtk_vdec_err(ctx, "failed to map iq table\n");
812  		return PTR_ERR(remote_iq_table);
813  	}
814  
815  	mtk_vdec_debug(ctx, "map iq table to 0x%p\n", remote_iq_table);
816  
817  	if (instance->iq_table.va)
818  		mtk_vcodec_mem_free(ctx, &instance->iq_table);
819  	instance->iq_table.size = vsi->iq_table_size;
820  
821  	ret = mtk_vcodec_mem_alloc(ctx, &instance->iq_table);
822  	if (ret)
823  		return ret;
824  
825  	memcpy(instance->iq_table.va, remote_iq_table, vsi->iq_table_size);
826  
827  	return 0;
828  }
829  
vdec_av1_slice_get_new_slot(struct vdec_av1_slice_vsi * vsi)830  static int vdec_av1_slice_get_new_slot(struct vdec_av1_slice_vsi *vsi)
831  {
832  	struct vdec_av1_slice_slot *slots = &vsi->slots;
833  	int new_slot_idx = AV1_INVALID_IDX;
834  	int i;
835  
836  	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
837  		if (slots->frame_info[i].ref_count == 0) {
838  			new_slot_idx = i;
839  			break;
840  		}
841  	}
842  
843  	if (new_slot_idx != AV1_INVALID_IDX) {
844  		slots->frame_info[new_slot_idx].ref_count++;
845  		slots->timestamp[new_slot_idx] = vsi->frame.cur_ts;
846  	}
847  
848  	return new_slot_idx;
849  }
850  
vdec_av1_slice_clear_fb(struct vdec_av1_slice_frame_info * frame_info)851  static inline void vdec_av1_slice_clear_fb(struct vdec_av1_slice_frame_info *frame_info)
852  {
853  	memset((void *)frame_info, 0, sizeof(struct vdec_av1_slice_frame_info));
854  }
855  
vdec_av1_slice_decrease_ref_count(struct vdec_av1_slice_slot * slots,int fb_idx)856  static void vdec_av1_slice_decrease_ref_count(struct vdec_av1_slice_slot *slots, int fb_idx)
857  {
858  	struct vdec_av1_slice_frame_info *frame_info = slots->frame_info;
859  
860  	frame_info[fb_idx].ref_count--;
861  	if (frame_info[fb_idx].ref_count < 0) {
862  		frame_info[fb_idx].ref_count = 0;
863  		pr_err(MTK_DBG_V4L2_STR "av1_error: %s() fb_idx %d decrease ref_count error\n",
864  		       __func__, fb_idx);
865  	}
866  
867  	vdec_av1_slice_clear_fb(&frame_info[fb_idx]);
868  }
869  
vdec_av1_slice_cleanup_slots(struct vdec_av1_slice_slot * slots,struct vdec_av1_slice_frame * frame,struct v4l2_ctrl_av1_frame * ctrl_fh)870  static void vdec_av1_slice_cleanup_slots(struct vdec_av1_slice_slot *slots,
871  					 struct vdec_av1_slice_frame *frame,
872  					 struct v4l2_ctrl_av1_frame *ctrl_fh)
873  {
874  	int slot_id, ref_id;
875  
876  	for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++)
877  		frame->ref_frame_map[ref_id] = AV1_INVALID_IDX;
878  
879  	for (slot_id = 0; slot_id < AV1_MAX_FRAME_BUF_COUNT; slot_id++) {
880  		u64 timestamp = slots->timestamp[slot_id];
881  		bool ref_used = false;
882  
883  		/* ignored unused slots */
884  		if (slots->frame_info[slot_id].ref_count == 0)
885  			continue;
886  
887  		for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) {
888  			if (ctrl_fh->reference_frame_ts[ref_id] == timestamp) {
889  				frame->ref_frame_map[ref_id] = slot_id;
890  				ref_used = true;
891  			}
892  		}
893  
894  		if (!ref_used)
895  			vdec_av1_slice_decrease_ref_count(slots, slot_id);
896  	}
897  }
898  
vdec_av1_slice_setup_slot(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct v4l2_ctrl_av1_frame * ctrl_fh)899  static void vdec_av1_slice_setup_slot(struct vdec_av1_slice_instance *instance,
900  				      struct vdec_av1_slice_vsi *vsi,
901  				      struct v4l2_ctrl_av1_frame *ctrl_fh)
902  {
903  	struct vdec_av1_slice_frame_info *cur_frame_info;
904  	struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
905  	int ref_id;
906  
907  	memcpy(&vsi->slots, &instance->slots, sizeof(instance->slots));
908  	vdec_av1_slice_cleanup_slots(&vsi->slots, &vsi->frame, ctrl_fh);
909  	vsi->slot_id = vdec_av1_slice_get_new_slot(vsi);
910  
911  	if (vsi->slot_id == AV1_INVALID_IDX) {
912  		mtk_v4l2_vdec_err(instance->ctx, "warning:av1 get invalid index slot\n");
913  		vsi->slot_id = 0;
914  	}
915  	cur_frame_info = &vsi->slots.frame_info[vsi->slot_id];
916  	cur_frame_info->frame_type = uh->frame_type;
917  	cur_frame_info->frame_is_intra = ((uh->frame_type == AV1_INTRA_ONLY_FRAME) ||
918  					  (uh->frame_type == AV1_KEY_FRAME));
919  	cur_frame_info->order_hint = uh->order_hint;
920  	cur_frame_info->upscaled_width = uh->upscaled_width;
921  	cur_frame_info->pic_pitch = 0;
922  	cur_frame_info->frame_width = uh->frame_width;
923  	cur_frame_info->frame_height = uh->frame_height;
924  	cur_frame_info->mi_cols = ((uh->frame_width + 7) >> 3) << 1;
925  	cur_frame_info->mi_rows = ((uh->frame_height + 7) >> 3) << 1;
926  
927  	/* ensure current frame is properly mapped if referenced */
928  	for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) {
929  		u64 timestamp = vsi->slots.timestamp[vsi->slot_id];
930  
931  		if (ctrl_fh->reference_frame_ts[ref_id] == timestamp)
932  			vsi->frame.ref_frame_map[ref_id] = vsi->slot_id;
933  	}
934  }
935  
vdec_av1_slice_alloc_working_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi)936  static int vdec_av1_slice_alloc_working_buffer(struct vdec_av1_slice_instance *instance,
937  					       struct vdec_av1_slice_vsi *vsi)
938  {
939  	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
940  	enum vdec_av1_slice_resolution_level level;
941  	u32 max_sb_w, max_sb_h, max_w, max_h, w, h;
942  	int i, ret;
943  
944  	w = vsi->frame.uh.frame_width;
945  	h = vsi->frame.uh.frame_height;
946  
947  	if (w > VCODEC_DEC_4K_CODED_WIDTH || h > VCODEC_DEC_4K_CODED_HEIGHT)
948  		/* 8K */
949  		return -EINVAL;
950  
951  	if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) {
952  		/* 4K */
953  		level = AV1_RES_4K;
954  		max_w = VCODEC_DEC_4K_CODED_WIDTH;
955  		max_h = VCODEC_DEC_4K_CODED_HEIGHT;
956  	} else {
957  		/* FHD */
958  		level = AV1_RES_FHD;
959  		max_w = MTK_VDEC_MAX_W;
960  		max_h = MTK_VDEC_MAX_H;
961  	}
962  
963  	if (level == instance->level)
964  		return 0;
965  
966  	mtk_vdec_debug(ctx, "resolution level changed from %u to %u, %ux%u",
967  		       instance->level, level, w, h);
968  
969  	max_sb_w = DIV_ROUND_UP(max_w, 128);
970  	max_sb_h = DIV_ROUND_UP(max_h, 128);
971  
972  	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
973  		if (instance->mv[i].va)
974  			mtk_vcodec_mem_free(ctx, &instance->mv[i]);
975  		instance->mv[i].size = max_sb_w * max_sb_h * SZ_1K;
976  		ret = mtk_vcodec_mem_alloc(ctx, &instance->mv[i]);
977  		if (ret)
978  			goto err;
979  
980  		if (instance->seg[i].va)
981  			mtk_vcodec_mem_free(ctx, &instance->seg[i]);
982  		instance->seg[i].size = max_sb_w * max_sb_h * 512;
983  		ret = mtk_vcodec_mem_alloc(ctx, &instance->seg[i]);
984  		if (ret)
985  			goto err;
986  
987  		if (instance->cdf[i].va)
988  			mtk_vcodec_mem_free(ctx, &instance->cdf[i]);
989  		instance->cdf[i].size = AV1_CDF_TABLE_BUFFER_SIZE;
990  		ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf[i]);
991  		if (ret)
992  			goto err;
993  	}
994  
995  	if (!instance->cdf_temp.va) {
996  		instance->cdf_temp.size = (SZ_1K * 16 * 100);
997  		ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_temp);
998  		if (ret)
999  			goto err;
1000  		vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr;
1001  		vsi->cdf_tmp.size = instance->cdf_temp.size;
1002  	}
1003  
1004  	if (instance->tile.va)
1005  		mtk_vcodec_mem_free(ctx, &instance->tile);
1006  
1007  	instance->tile.size = AV1_TILE_BUF_SIZE * V4L2_AV1_MAX_TILE_COUNT;
1008  	ret = mtk_vcodec_mem_alloc(ctx, &instance->tile);
1009  	if (ret)
1010  		goto err;
1011  
1012  	instance->level = level;
1013  	return 0;
1014  
1015  err:
1016  	instance->level = AV1_RES_NONE;
1017  	return ret;
1018  }
1019  
vdec_av1_slice_free_working_buffer(struct vdec_av1_slice_instance * instance)1020  static void vdec_av1_slice_free_working_buffer(struct vdec_av1_slice_instance *instance)
1021  {
1022  	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
1023  	int i;
1024  
1025  	for (i = 0; i < ARRAY_SIZE(instance->mv); i++)
1026  		if (instance->mv[i].va)
1027  			mtk_vcodec_mem_free(ctx, &instance->mv[i]);
1028  
1029  	for (i = 0; i < ARRAY_SIZE(instance->seg); i++)
1030  		if (instance->seg[i].va)
1031  			mtk_vcodec_mem_free(ctx, &instance->seg[i]);
1032  
1033  	for (i = 0; i < ARRAY_SIZE(instance->cdf); i++)
1034  		if (instance->cdf[i].va)
1035  			mtk_vcodec_mem_free(ctx, &instance->cdf[i]);
1036  
1037  
1038  	if (instance->tile.va)
1039  		mtk_vcodec_mem_free(ctx, &instance->tile);
1040  	if (instance->cdf_temp.va)
1041  		mtk_vcodec_mem_free(ctx, &instance->cdf_temp);
1042  	if (instance->cdf_table.va)
1043  		mtk_vcodec_mem_free(ctx, &instance->cdf_table);
1044  	if (instance->iq_table.va)
1045  		mtk_vcodec_mem_free(ctx, &instance->iq_table);
1046  
1047  	instance->level = AV1_RES_NONE;
1048  }
1049  
vdec_av1_slice_vsi_from_remote(struct vdec_av1_slice_vsi * vsi,struct vdec_av1_slice_vsi * remote_vsi)1050  static inline void vdec_av1_slice_vsi_from_remote(struct vdec_av1_slice_vsi *vsi,
1051  						  struct vdec_av1_slice_vsi *remote_vsi)
1052  {
1053  	memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans));
1054  	memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state));
1055  }
1056  
vdec_av1_slice_vsi_to_remote(struct vdec_av1_slice_vsi * vsi,struct vdec_av1_slice_vsi * remote_vsi)1057  static inline void vdec_av1_slice_vsi_to_remote(struct vdec_av1_slice_vsi *vsi,
1058  						struct vdec_av1_slice_vsi *remote_vsi)
1059  {
1060  	memcpy(remote_vsi, vsi, sizeof(*vsi));
1061  }
1062  
vdec_av1_slice_setup_lat_from_src_buf(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct vdec_lat_buf * lat_buf)1063  static int vdec_av1_slice_setup_lat_from_src_buf(struct vdec_av1_slice_instance *instance,
1064  						 struct vdec_av1_slice_vsi *vsi,
1065  						 struct vdec_lat_buf *lat_buf)
1066  {
1067  	struct vb2_v4l2_buffer *src;
1068  	struct vb2_v4l2_buffer *dst;
1069  
1070  	src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
1071  	if (!src)
1072  		return -EINVAL;
1073  
1074  	lat_buf->src_buf_req = src->vb2_buf.req_obj.req;
1075  	dst = &lat_buf->ts_info;
1076  	v4l2_m2m_buf_copy_metadata(src, dst, true);
1077  	vsi->frame.cur_ts = dst->vb2_buf.timestamp;
1078  
1079  	return 0;
1080  }
1081  
vdec_av1_slice_resolve_divisor_32(u32 D,short * shift)1082  static short vdec_av1_slice_resolve_divisor_32(u32 D, short *shift)
1083  {
1084  	int f;
1085  	int e;
1086  
1087  	*shift = vdec_av1_slice_get_msb(D);
1088  	/* e is obtained from D after resetting the most significant 1 bit. */
1089  	e = D - ((u32)1 << *shift);
1090  	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
1091  	if (*shift > DIV_LUT_BITS)
1092  		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
1093  	else
1094  		f = e << (DIV_LUT_BITS - *shift);
1095  	if (f > DIV_LUT_NUM)
1096  		return -1;
1097  	*shift += DIV_LUT_PREC_BITS;
1098  	/* Use f as lookup into the precomputed table of multipliers */
1099  	return div_lut[f];
1100  }
1101  
vdec_av1_slice_get_shear_params(struct vdec_av1_slice_gm * gm_params)1102  static void vdec_av1_slice_get_shear_params(struct vdec_av1_slice_gm *gm_params)
1103  {
1104  	const int *mat = gm_params->wmmat;
1105  	short shift;
1106  	short y;
1107  	long long gv, dv;
1108  
1109  	if (gm_params->wmmat[2] <= 0)
1110  		return;
1111  
1112  	gm_params->alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
1113  	gm_params->beta = clamp_val(mat[3], S16_MIN, S16_MAX);
1114  
1115  	y = vdec_av1_slice_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
1116  
1117  	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
1118  	gm_params->gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift),
1119  				     S16_MIN, S16_MAX);
1120  
1121  	dv = ((long long)mat[3] * mat[4]) * y;
1122  	gm_params->delta = clamp_val(mat[5] - (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) -
1123  				     (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
1124  
1125  	gm_params->alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->alpha, WARP_PARAM_REDUCE_BITS) *
1126  							(1 << WARP_PARAM_REDUCE_BITS);
1127  	gm_params->beta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->beta, WARP_PARAM_REDUCE_BITS) *
1128  						       (1 << WARP_PARAM_REDUCE_BITS);
1129  	gm_params->gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->gamma, WARP_PARAM_REDUCE_BITS) *
1130  							(1 << WARP_PARAM_REDUCE_BITS);
1131  	gm_params->delta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->delta, WARP_PARAM_REDUCE_BITS) *
1132  							(1 << WARP_PARAM_REDUCE_BITS);
1133  }
1134  
vdec_av1_slice_setup_gm(struct vdec_av1_slice_gm * gm,struct v4l2_av1_global_motion * ctrl_gm)1135  static void vdec_av1_slice_setup_gm(struct vdec_av1_slice_gm *gm,
1136  				    struct v4l2_av1_global_motion *ctrl_gm)
1137  {
1138  	u32 i, j;
1139  
1140  	for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1141  		gm[i].wmtype = ctrl_gm->type[i];
1142  		for (j = 0; j < 6; j++)
1143  			gm[i].wmmat[j] = ctrl_gm->params[i][j];
1144  
1145  		gm[i].invalid = !!(ctrl_gm->invalid & BIT(i));
1146  		gm[i].alpha = 0;
1147  		gm[i].beta = 0;
1148  		gm[i].gamma = 0;
1149  		gm[i].delta = 0;
1150  		if (gm[i].wmtype <= V4L2_AV1_WARP_MODEL_AFFINE)
1151  			vdec_av1_slice_get_shear_params(&gm[i]);
1152  	}
1153  }
1154  
vdec_av1_slice_setup_seg(struct vdec_av1_slice_seg * seg,struct v4l2_av1_segmentation * ctrl_seg)1155  static void vdec_av1_slice_setup_seg(struct vdec_av1_slice_seg *seg,
1156  				     struct v4l2_av1_segmentation *ctrl_seg)
1157  {
1158  	u32 i, j;
1159  
1160  	seg->segmentation_enabled = SEGMENTATION_FLAG(ctrl_seg, ENABLED);
1161  	seg->segmentation_update_map = SEGMENTATION_FLAG(ctrl_seg, UPDATE_MAP);
1162  	seg->segmentation_temporal_update = SEGMENTATION_FLAG(ctrl_seg, TEMPORAL_UPDATE);
1163  	seg->segmentation_update_data = SEGMENTATION_FLAG(ctrl_seg, UPDATE_DATA);
1164  	seg->segid_preskip = SEGMENTATION_FLAG(ctrl_seg, SEG_ID_PRE_SKIP);
1165  	seg->last_active_segid = ctrl_seg->last_active_seg_id;
1166  
1167  	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1168  		seg->feature_enabled_mask[i] = ctrl_seg->feature_enabled[i];
1169  		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++)
1170  			seg->feature_data[i][j] = ctrl_seg->feature_data[i][j];
1171  	}
1172  }
1173  
vdec_av1_slice_setup_quant(struct vdec_av1_slice_quantization * quant,struct v4l2_av1_quantization * ctrl_quant)1174  static void vdec_av1_slice_setup_quant(struct vdec_av1_slice_quantization *quant,
1175  				       struct v4l2_av1_quantization *ctrl_quant)
1176  {
1177  	quant->base_q_idx = ctrl_quant->base_q_idx;
1178  	quant->delta_qydc = ctrl_quant->delta_q_y_dc;
1179  	quant->delta_qudc = ctrl_quant->delta_q_u_dc;
1180  	quant->delta_quac = ctrl_quant->delta_q_u_ac;
1181  	quant->delta_qvdc = ctrl_quant->delta_q_v_dc;
1182  	quant->delta_qvac = ctrl_quant->delta_q_v_ac;
1183  	quant->qm_y = ctrl_quant->qm_y;
1184  	quant->qm_u = ctrl_quant->qm_u;
1185  	quant->qm_v = ctrl_quant->qm_v;
1186  	quant->using_qmatrix = QUANT_FLAG(ctrl_quant, USING_QMATRIX);
1187  }
1188  
vdec_av1_slice_get_qindex(struct vdec_av1_slice_uncompressed_header * uh,int segmentation_id)1189  static int vdec_av1_slice_get_qindex(struct vdec_av1_slice_uncompressed_header *uh,
1190  				     int segmentation_id)
1191  {
1192  	struct vdec_av1_slice_seg *seg = &uh->seg;
1193  	struct vdec_av1_slice_quantization *quant = &uh->quant;
1194  	int data = 0, qindex = 0;
1195  
1196  	if (seg->segmentation_enabled &&
1197  	    (seg->feature_enabled_mask[segmentation_id] & BIT(SEG_LVL_ALT_Q))) {
1198  		data = seg->feature_data[segmentation_id][SEG_LVL_ALT_Q];
1199  		qindex = quant->base_q_idx + data;
1200  		return clamp_val(qindex, 0, MAXQ);
1201  	}
1202  
1203  	return quant->base_q_idx;
1204  }
1205  
vdec_av1_slice_setup_lr(struct vdec_av1_slice_lr * lr,struct v4l2_av1_loop_restoration * ctrl_lr)1206  static void vdec_av1_slice_setup_lr(struct vdec_av1_slice_lr *lr,
1207  				    struct v4l2_av1_loop_restoration  *ctrl_lr)
1208  {
1209  	int i;
1210  
1211  	lr->use_lr = 0;
1212  	lr->use_chroma_lr = 0;
1213  	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1214  		lr->frame_restoration_type[i] = ctrl_lr->frame_restoration_type[i];
1215  		lr->loop_restoration_size[i] = ctrl_lr->loop_restoration_size[i];
1216  		if (lr->frame_restoration_type[i]) {
1217  			lr->use_lr = 1;
1218  			if (i > 0)
1219  				lr->use_chroma_lr = 1;
1220  		}
1221  	}
1222  }
1223  
vdec_av1_slice_setup_lf(struct vdec_av1_slice_loop_filter * lf,struct v4l2_av1_loop_filter * ctrl_lf)1224  static void vdec_av1_slice_setup_lf(struct vdec_av1_slice_loop_filter *lf,
1225  				    struct v4l2_av1_loop_filter *ctrl_lf)
1226  {
1227  	int i;
1228  
1229  	for (i = 0; i < ARRAY_SIZE(lf->loop_filter_level); i++)
1230  		lf->loop_filter_level[i] = ctrl_lf->level[i];
1231  
1232  	for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
1233  		lf->loop_filter_ref_deltas[i] = ctrl_lf->ref_deltas[i];
1234  
1235  	for (i = 0; i < ARRAY_SIZE(lf->loop_filter_mode_deltas); i++)
1236  		lf->loop_filter_mode_deltas[i] = ctrl_lf->mode_deltas[i];
1237  
1238  	lf->loop_filter_sharpness = ctrl_lf->sharpness;
1239  	lf->loop_filter_delta_enabled =
1240  		   BIT_FLAG(ctrl_lf, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED);
1241  }
1242  
vdec_av1_slice_setup_cdef(struct vdec_av1_slice_cdef * cdef,struct v4l2_av1_cdef * ctrl_cdef)1243  static void vdec_av1_slice_setup_cdef(struct vdec_av1_slice_cdef *cdef,
1244  				      struct v4l2_av1_cdef *ctrl_cdef)
1245  {
1246  	int i;
1247  
1248  	cdef->cdef_damping = ctrl_cdef->damping_minus_3 + 3;
1249  	cdef->cdef_bits = ctrl_cdef->bits;
1250  
1251  	for (i = 0; i < V4L2_AV1_CDEF_MAX; i++) {
1252  		if (ctrl_cdef->y_sec_strength[i] == 4)
1253  			ctrl_cdef->y_sec_strength[i] -= 1;
1254  
1255  		if (ctrl_cdef->uv_sec_strength[i] == 4)
1256  			ctrl_cdef->uv_sec_strength[i] -= 1;
1257  
1258  		cdef->cdef_y_strength[i] =
1259  			ctrl_cdef->y_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS |
1260  			ctrl_cdef->y_sec_strength[i];
1261  		cdef->cdef_uv_strength[i] =
1262  			ctrl_cdef->uv_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS |
1263  			ctrl_cdef->uv_sec_strength[i];
1264  	}
1265  }
1266  
vdec_av1_slice_setup_seq(struct vdec_av1_slice_seq_header * seq,struct v4l2_ctrl_av1_sequence * ctrl_seq)1267  static void vdec_av1_slice_setup_seq(struct vdec_av1_slice_seq_header *seq,
1268  				     struct v4l2_ctrl_av1_sequence *ctrl_seq)
1269  {
1270  	seq->bitdepth = ctrl_seq->bit_depth;
1271  	seq->max_frame_width = ctrl_seq->max_frame_width_minus_1 + 1;
1272  	seq->max_frame_height = ctrl_seq->max_frame_height_minus_1 + 1;
1273  	seq->enable_superres = SEQUENCE_FLAG(ctrl_seq, ENABLE_SUPERRES);
1274  	seq->enable_filter_intra = SEQUENCE_FLAG(ctrl_seq, ENABLE_FILTER_INTRA);
1275  	seq->enable_intra_edge_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTRA_EDGE_FILTER);
1276  	seq->enable_interintra_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTERINTRA_COMPOUND);
1277  	seq->enable_masked_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_MASKED_COMPOUND);
1278  	seq->enable_dual_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_DUAL_FILTER);
1279  	seq->enable_jnt_comp = SEQUENCE_FLAG(ctrl_seq, ENABLE_JNT_COMP);
1280  	seq->mono_chrome = SEQUENCE_FLAG(ctrl_seq, MONO_CHROME);
1281  	seq->enable_order_hint = SEQUENCE_FLAG(ctrl_seq, ENABLE_ORDER_HINT);
1282  	seq->order_hint_bits = ctrl_seq->order_hint_bits;
1283  	seq->use_128x128_superblock = SEQUENCE_FLAG(ctrl_seq, USE_128X128_SUPERBLOCK);
1284  	seq->subsampling_x = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_X);
1285  	seq->subsampling_y = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_Y);
1286  }
1287  
vdec_av1_slice_setup_tile(struct vdec_av1_slice_frame * frame,struct v4l2_av1_tile_info * ctrl_tile)1288  static void vdec_av1_slice_setup_tile(struct vdec_av1_slice_frame *frame,
1289  				      struct v4l2_av1_tile_info *ctrl_tile)
1290  {
1291  	struct vdec_av1_slice_seq_header *seq = &frame->seq;
1292  	struct vdec_av1_slice_tile *tile = &frame->uh.tile;
1293  	u32 mib_size_log2 = seq->use_128x128_superblock ? 5 : 4;
1294  	int i;
1295  
1296  	tile->tile_cols = ctrl_tile->tile_cols;
1297  	tile->tile_rows = ctrl_tile->tile_rows;
1298  	tile->context_update_tile_id = ctrl_tile->context_update_tile_id;
1299  	tile->uniform_tile_spacing_flag =
1300  		BIT_FLAG(ctrl_tile, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING);
1301  
1302  	for (i = 0; i < tile->tile_cols + 1; i++)
1303  		tile->mi_col_starts[i] =
1304  			ALIGN(ctrl_tile->mi_col_starts[i], BIT(mib_size_log2)) >> mib_size_log2;
1305  
1306  	for (i = 0; i < tile->tile_rows + 1; i++)
1307  		tile->mi_row_starts[i] =
1308  			ALIGN(ctrl_tile->mi_row_starts[i], BIT(mib_size_log2)) >> mib_size_log2;
1309  }
1310  
vdec_av1_slice_setup_uh(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_frame * frame,struct v4l2_ctrl_av1_frame * ctrl_fh)1311  static void vdec_av1_slice_setup_uh(struct vdec_av1_slice_instance *instance,
1312  				    struct vdec_av1_slice_frame *frame,
1313  				    struct v4l2_ctrl_av1_frame *ctrl_fh)
1314  {
1315  	struct vdec_av1_slice_uncompressed_header *uh = &frame->uh;
1316  	int i;
1317  
1318  	uh->use_ref_frame_mvs = FH_FLAG(ctrl_fh, USE_REF_FRAME_MVS);
1319  	uh->order_hint = ctrl_fh->order_hint;
1320  	vdec_av1_slice_setup_gm(uh->gm, &ctrl_fh->global_motion);
1321  	uh->upscaled_width = ctrl_fh->upscaled_width;
1322  	uh->frame_width = ctrl_fh->frame_width_minus_1 + 1;
1323  	uh->frame_height = ctrl_fh->frame_height_minus_1 + 1;
1324  	uh->mi_cols = ((uh->frame_width + 7) >> 3) << 1;
1325  	uh->mi_rows = ((uh->frame_height + 7) >> 3) << 1;
1326  	uh->reduced_tx_set = FH_FLAG(ctrl_fh, REDUCED_TX_SET);
1327  	uh->tx_mode = ctrl_fh->tx_mode;
1328  	uh->uniform_tile_spacing_flag =
1329  		BIT_FLAG(&ctrl_fh->tile_info, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING);
1330  	uh->interpolation_filter = ctrl_fh->interpolation_filter;
1331  	uh->allow_warped_motion = FH_FLAG(ctrl_fh, ALLOW_WARPED_MOTION);
1332  	uh->is_motion_mode_switchable = FH_FLAG(ctrl_fh, IS_MOTION_MODE_SWITCHABLE);
1333  	uh->frame_type = ctrl_fh->frame_type;
1334  	uh->frame_is_intra = (uh->frame_type == V4L2_AV1_INTRA_ONLY_FRAME ||
1335  			      uh->frame_type == V4L2_AV1_KEY_FRAME);
1336  
1337  	if (!uh->frame_is_intra && FH_FLAG(ctrl_fh, REFERENCE_SELECT))
1338  		uh->reference_mode = AV1_REFERENCE_MODE_SELECT;
1339  	else
1340  		uh->reference_mode = AV1_SINGLE_REFERENCE;
1341  
1342  	uh->allow_high_precision_mv = FH_FLAG(ctrl_fh, ALLOW_HIGH_PRECISION_MV);
1343  	uh->allow_intra_bc = FH_FLAG(ctrl_fh, ALLOW_INTRABC);
1344  	uh->force_integer_mv = FH_FLAG(ctrl_fh, FORCE_INTEGER_MV);
1345  	uh->allow_screen_content_tools = FH_FLAG(ctrl_fh, ALLOW_SCREEN_CONTENT_TOOLS);
1346  	uh->error_resilient_mode = FH_FLAG(ctrl_fh, ERROR_RESILIENT_MODE);
1347  	uh->primary_ref_frame = ctrl_fh->primary_ref_frame;
1348  	uh->disable_frame_end_update_cdf =
1349  			FH_FLAG(ctrl_fh, DISABLE_FRAME_END_UPDATE_CDF);
1350  	uh->disable_cdf_update = FH_FLAG(ctrl_fh, DISABLE_CDF_UPDATE);
1351  	uh->skip_mode.skip_mode_present = FH_FLAG(ctrl_fh, SKIP_MODE_PRESENT);
1352  	uh->skip_mode.skip_mode_frame[0] =
1353  		ctrl_fh->skip_mode_frame[0] - V4L2_AV1_REF_LAST_FRAME;
1354  	uh->skip_mode.skip_mode_frame[1] =
1355  		ctrl_fh->skip_mode_frame[1] - V4L2_AV1_REF_LAST_FRAME;
1356  	uh->skip_mode.skip_mode_allowed = ctrl_fh->skip_mode_frame[0] ? 1 : 0;
1357  
1358  	vdec_av1_slice_setup_seg(&uh->seg, &ctrl_fh->segmentation);
1359  	uh->delta_q_lf.delta_q_present = QUANT_FLAG(&ctrl_fh->quantization, DELTA_Q_PRESENT);
1360  	uh->delta_q_lf.delta_q_res = 1 << ctrl_fh->quantization.delta_q_res;
1361  	uh->delta_q_lf.delta_lf_present =
1362  		BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT);
1363  	uh->delta_q_lf.delta_lf_res = ctrl_fh->loop_filter.delta_lf_res;
1364  	uh->delta_q_lf.delta_lf_multi =
1365  		BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI);
1366  	vdec_av1_slice_setup_quant(&uh->quant, &ctrl_fh->quantization);
1367  
1368  	uh->coded_loss_less = 1;
1369  	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1370  		uh->quant.qindex[i] = vdec_av1_slice_get_qindex(uh, i);
1371  		uh->loss_less_array[i] =
1372  			(uh->quant.qindex[i] == 0 && uh->quant.delta_qydc == 0 &&
1373  			uh->quant.delta_quac == 0 && uh->quant.delta_qudc == 0 &&
1374  			uh->quant.delta_qvac == 0 && uh->quant.delta_qvdc == 0);
1375  
1376  		if (!uh->loss_less_array[i])
1377  			uh->coded_loss_less = 0;
1378  	}
1379  
1380  	vdec_av1_slice_setup_lr(&uh->lr, &ctrl_fh->loop_restoration);
1381  	uh->superres_denom = ctrl_fh->superres_denom;
1382  	vdec_av1_slice_setup_lf(&uh->loop_filter, &ctrl_fh->loop_filter);
1383  	vdec_av1_slice_setup_cdef(&uh->cdef, &ctrl_fh->cdef);
1384  	vdec_av1_slice_setup_tile(frame, &ctrl_fh->tile_info);
1385  }
1386  
vdec_av1_slice_setup_tile_group(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi)1387  static int vdec_av1_slice_setup_tile_group(struct vdec_av1_slice_instance *instance,
1388  					   struct vdec_av1_slice_vsi *vsi)
1389  {
1390  	struct v4l2_ctrl_av1_tile_group_entry *ctrl_tge;
1391  	struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group;
1392  	struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
1393  	struct vdec_av1_slice_tile *tile = &uh->tile;
1394  	struct v4l2_ctrl *ctrl;
1395  	u32 tge_size;
1396  	int i;
1397  
1398  	ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
1399  	if (!ctrl)
1400  		return -EINVAL;
1401  
1402  	tge_size = ctrl->elems;
1403  	ctrl_tge = (struct v4l2_ctrl_av1_tile_group_entry *)ctrl->p_cur.p;
1404  
1405  	tile_group->num_tiles = tile->tile_cols * tile->tile_rows;
1406  
1407  	if (tile_group->num_tiles != tge_size ||
1408  	    tile_group->num_tiles > V4L2_AV1_MAX_TILE_COUNT) {
1409  		mtk_vdec_err(instance->ctx, "invalid tge_size %d, tile_num:%d\n",
1410  			     tge_size, tile_group->num_tiles);
1411  		return -EINVAL;
1412  	}
1413  
1414  	for (i = 0; i < tge_size; i++) {
1415  		if (i != ctrl_tge[i].tile_row * vsi->frame.uh.tile.tile_cols +
1416  		    ctrl_tge[i].tile_col) {
1417  			mtk_vdec_err(instance->ctx, "invalid tge info %d, %d %d %d\n",
1418  				     i, ctrl_tge[i].tile_row, ctrl_tge[i].tile_col,
1419  				     vsi->frame.uh.tile.tile_rows);
1420  			return -EINVAL;
1421  		}
1422  		tile_group->tile_size[i] = ctrl_tge[i].tile_size;
1423  		tile_group->tile_start_offset[i] = ctrl_tge[i].tile_offset;
1424  	}
1425  
1426  	return 0;
1427  }
1428  
vdec_av1_slice_setup_state(struct vdec_av1_slice_vsi * vsi)1429  static inline void vdec_av1_slice_setup_state(struct vdec_av1_slice_vsi *vsi)
1430  {
1431  	memset(&vsi->state, 0, sizeof(vsi->state));
1432  }
1433  
vdec_av1_slice_setup_scale_factors(struct vdec_av1_slice_frame_refs * frame_ref,struct vdec_av1_slice_frame_info * ref_frame_info,struct vdec_av1_slice_uncompressed_header * uh)1434  static void vdec_av1_slice_setup_scale_factors(struct vdec_av1_slice_frame_refs *frame_ref,
1435  					       struct vdec_av1_slice_frame_info *ref_frame_info,
1436  					       struct vdec_av1_slice_uncompressed_header *uh)
1437  {
1438  	struct vdec_av1_slice_scale_factors *scale_factors = &frame_ref->scale_factors;
1439  	u32 ref_upscaled_width = ref_frame_info->upscaled_width;
1440  	u32 ref_frame_height = ref_frame_info->frame_height;
1441  	u32 frame_width = uh->frame_width;
1442  	u32 frame_height = uh->frame_height;
1443  
1444  	if (!vdec_av1_slice_need_scale(ref_upscaled_width, ref_frame_height,
1445  				       frame_width, frame_height)) {
1446  		scale_factors->x_scale = -1;
1447  		scale_factors->y_scale = -1;
1448  		scale_factors->is_scaled = 0;
1449  		return;
1450  	}
1451  
1452  	scale_factors->x_scale =
1453  		((ref_upscaled_width << AV1_REF_SCALE_SHIFT) + (frame_width >> 1)) / frame_width;
1454  	scale_factors->y_scale =
1455  		((ref_frame_height << AV1_REF_SCALE_SHIFT) + (frame_height >> 1)) / frame_height;
1456  	scale_factors->is_scaled =
1457  		(scale_factors->x_scale != AV1_REF_INVALID_SCALE) &&
1458  		(scale_factors->y_scale != AV1_REF_INVALID_SCALE) &&
1459  		(scale_factors->x_scale != AV1_REF_NO_SCALE ||
1460  		 scale_factors->y_scale != AV1_REF_NO_SCALE);
1461  	scale_factors->x_step =
1462  		AV1_DIV_ROUND_UP_POW2(scale_factors->x_scale,
1463  				      AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS);
1464  	scale_factors->y_step =
1465  		AV1_DIV_ROUND_UP_POW2(scale_factors->y_scale,
1466  				      AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS);
1467  }
1468  
vdec_av1_slice_get_sign_bias(int a,int b,u8 enable_order_hint,u8 order_hint_bits)1469  static unsigned char vdec_av1_slice_get_sign_bias(int a,
1470  						  int b,
1471  						  u8 enable_order_hint,
1472  						  u8 order_hint_bits)
1473  {
1474  	int diff = 0;
1475  	int m = 0;
1476  	unsigned char result = 0;
1477  
1478  	if (!enable_order_hint)
1479  		return 0;
1480  
1481  	diff = a - b;
1482  	m = 1 << (order_hint_bits - 1);
1483  	diff = (diff & (m - 1)) - (diff & m);
1484  
1485  	if (diff > 0)
1486  		result = 1;
1487  
1488  	return result;
1489  }
1490  
vdec_av1_slice_setup_ref(struct vdec_av1_slice_pfc * pfc,struct v4l2_ctrl_av1_frame * ctrl_fh)1491  static void vdec_av1_slice_setup_ref(struct vdec_av1_slice_pfc *pfc,
1492  				     struct v4l2_ctrl_av1_frame *ctrl_fh)
1493  {
1494  	struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1495  	struct vdec_av1_slice_frame *frame = &vsi->frame;
1496  	struct vdec_av1_slice_slot *slots = &vsi->slots;
1497  	struct vdec_av1_slice_uncompressed_header *uh = &frame->uh;
1498  	struct vdec_av1_slice_seq_header *seq = &frame->seq;
1499  	struct vdec_av1_slice_frame_info *cur_frame_info =
1500  		&slots->frame_info[vsi->slot_id];
1501  	struct vdec_av1_slice_frame_info *frame_info;
1502  	int i, slot_id;
1503  
1504  	if (uh->frame_is_intra)
1505  		return;
1506  
1507  	for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1508  		int ref_idx = ctrl_fh->ref_frame_idx[i];
1509  
1510  		pfc->ref_idx[i] = ctrl_fh->reference_frame_ts[ref_idx];
1511  		slot_id = frame->ref_frame_map[ref_idx];
1512  		frame_info = &slots->frame_info[slot_id];
1513  		if (slot_id == AV1_INVALID_IDX) {
1514  			pr_err(MTK_DBG_V4L2_STR "cannot match reference[%d] 0x%llx\n", i,
1515  			       ctrl_fh->reference_frame_ts[ref_idx]);
1516  			frame->order_hints[i] = 0;
1517  			frame->ref_frame_valid[i] = 0;
1518  			continue;
1519  		}
1520  
1521  		frame->frame_refs[i].ref_fb_idx = slot_id;
1522  		vdec_av1_slice_setup_scale_factors(&frame->frame_refs[i],
1523  						   frame_info, uh);
1524  		if (!seq->enable_order_hint)
1525  			frame->ref_frame_sign_bias[i + 1] = 0;
1526  		else
1527  			frame->ref_frame_sign_bias[i + 1] =
1528  				vdec_av1_slice_get_sign_bias(frame_info->order_hint,
1529  							     uh->order_hint,
1530  							     seq->enable_order_hint,
1531  							     seq->order_hint_bits);
1532  
1533  		frame->order_hints[i] = ctrl_fh->order_hints[i + 1];
1534  		cur_frame_info->order_hints[i] = frame->order_hints[i];
1535  		frame->ref_frame_valid[i] = 1;
1536  	}
1537  }
1538  
vdec_av1_slice_get_previous(struct vdec_av1_slice_vsi * vsi)1539  static void vdec_av1_slice_get_previous(struct vdec_av1_slice_vsi *vsi)
1540  {
1541  	struct vdec_av1_slice_frame *frame = &vsi->frame;
1542  
1543  	if (frame->uh.primary_ref_frame == AV1_PRIMARY_REF_NONE)
1544  		frame->prev_fb_idx = AV1_INVALID_IDX;
1545  	else
1546  		frame->prev_fb_idx = frame->frame_refs[frame->uh.primary_ref_frame].ref_fb_idx;
1547  }
1548  
vdec_av1_slice_setup_operating_mode(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_frame * frame)1549  static inline void vdec_av1_slice_setup_operating_mode(struct vdec_av1_slice_instance *instance,
1550  						       struct vdec_av1_slice_frame *frame)
1551  {
1552  	frame->large_scale_tile = 0;
1553  }
1554  
vdec_av1_slice_setup_pfc(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_pfc * pfc)1555  static int vdec_av1_slice_setup_pfc(struct vdec_av1_slice_instance *instance,
1556  				    struct vdec_av1_slice_pfc *pfc)
1557  {
1558  	struct v4l2_ctrl_av1_frame *ctrl_fh;
1559  	struct v4l2_ctrl_av1_sequence *ctrl_seq;
1560  	struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1561  	int ret = 0;
1562  
1563  	/* frame header */
1564  	ctrl_fh = (struct v4l2_ctrl_av1_frame *)
1565  		  vdec_av1_get_ctrl_ptr(instance->ctx,
1566  					V4L2_CID_STATELESS_AV1_FRAME);
1567  	if (IS_ERR(ctrl_fh))
1568  		return PTR_ERR(ctrl_fh);
1569  
1570  	ctrl_seq = (struct v4l2_ctrl_av1_sequence *)
1571  		   vdec_av1_get_ctrl_ptr(instance->ctx,
1572  					 V4L2_CID_STATELESS_AV1_SEQUENCE);
1573  	if (IS_ERR(ctrl_seq))
1574  		return PTR_ERR(ctrl_seq);
1575  
1576  	/* setup vsi information */
1577  	vdec_av1_slice_setup_seq(&vsi->frame.seq, ctrl_seq);
1578  	vdec_av1_slice_setup_uh(instance, &vsi->frame, ctrl_fh);
1579  	vdec_av1_slice_setup_operating_mode(instance, &vsi->frame);
1580  
1581  	vdec_av1_slice_setup_state(vsi);
1582  	vdec_av1_slice_setup_slot(instance, vsi, ctrl_fh);
1583  	vdec_av1_slice_setup_ref(pfc, ctrl_fh);
1584  	vdec_av1_slice_get_previous(vsi);
1585  
1586  	pfc->seq = instance->seq;
1587  	instance->seq++;
1588  
1589  	return ret;
1590  }
1591  
vdec_av1_slice_setup_lat_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct mtk_vcodec_mem * bs,struct vdec_lat_buf * lat_buf)1592  static void vdec_av1_slice_setup_lat_buffer(struct vdec_av1_slice_instance *instance,
1593  					    struct vdec_av1_slice_vsi *vsi,
1594  					    struct mtk_vcodec_mem *bs,
1595  					    struct vdec_lat_buf *lat_buf)
1596  {
1597  	struct vdec_av1_slice_work_buffer *work_buffer;
1598  	int i;
1599  
1600  	vsi->bs.dma_addr = bs->dma_addr;
1601  	vsi->bs.size = bs->size;
1602  
1603  	vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
1604  	vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
1605  	vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr;
1606  	/* used to store trans end */
1607  	vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr;
1608  	vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
1609  	vsi->err_map.size = lat_buf->wdma_err_addr.size;
1610  	vsi->rd_mv.dma_addr = lat_buf->rd_mv_addr.dma_addr;
1611  	vsi->rd_mv.size = lat_buf->rd_mv_addr.size;
1612  
1613  	vsi->row_info.buf = 0;
1614  	vsi->row_info.size = 0;
1615  
1616  	work_buffer = vsi->work_buffer;
1617  
1618  	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
1619  		work_buffer[i].mv_addr.buf = instance->mv[i].dma_addr;
1620  		work_buffer[i].mv_addr.size = instance->mv[i].size;
1621  		work_buffer[i].segid_addr.buf = instance->seg[i].dma_addr;
1622  		work_buffer[i].segid_addr.size = instance->seg[i].size;
1623  		work_buffer[i].cdf_addr.buf = instance->cdf[i].dma_addr;
1624  		work_buffer[i].cdf_addr.size = instance->cdf[i].size;
1625  	}
1626  
1627  	vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr;
1628  	vsi->cdf_tmp.size = instance->cdf_temp.size;
1629  
1630  	vsi->tile.buf = instance->tile.dma_addr;
1631  	vsi->tile.size = instance->tile.size;
1632  	memcpy(lat_buf->tile_addr.va, instance->tile.va, 64 * instance->tile_group.num_tiles);
1633  
1634  	vsi->cdf_table.buf = instance->cdf_table.dma_addr;
1635  	vsi->cdf_table.size = instance->cdf_table.size;
1636  	vsi->iq_table.buf = instance->iq_table.dma_addr;
1637  	vsi->iq_table.size = instance->iq_table.size;
1638  }
1639  
vdec_av1_slice_setup_seg_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi)1640  static void vdec_av1_slice_setup_seg_buffer(struct vdec_av1_slice_instance *instance,
1641  					    struct vdec_av1_slice_vsi *vsi)
1642  {
1643  	struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
1644  	struct mtk_vcodec_mem *buf;
1645  
1646  	/* reset segment buffer */
1647  	if (uh->primary_ref_frame == AV1_PRIMARY_REF_NONE || !uh->seg.segmentation_enabled) {
1648  		mtk_vdec_debug(instance->ctx, "reset seg %d\n", vsi->slot_id);
1649  		if (vsi->slot_id != AV1_INVALID_IDX) {
1650  			buf = &instance->seg[vsi->slot_id];
1651  			memset(buf->va, 0, buf->size);
1652  		}
1653  	}
1654  }
1655  
vdec_av1_slice_setup_tile_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_vsi * vsi,struct mtk_vcodec_mem * bs)1656  static void vdec_av1_slice_setup_tile_buffer(struct vdec_av1_slice_instance *instance,
1657  					     struct vdec_av1_slice_vsi *vsi,
1658  					     struct mtk_vcodec_mem *bs)
1659  {
1660  	struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group;
1661  	struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
1662  	struct vdec_av1_slice_tile *tile = &uh->tile;
1663  	u32 tile_num, tile_row, tile_col;
1664  	u32 allow_update_cdf = 0;
1665  	u32 sb_boundary_x_m1 = 0, sb_boundary_y_m1 = 0;
1666  	int tile_info_base;
1667  	u64 tile_buf_pa;
1668  	u32 *tile_info_buf = instance->tile.va;
1669  	u64 pa = (u64)bs->dma_addr;
1670  
1671  	if (uh->disable_cdf_update == 0)
1672  		allow_update_cdf = 1;
1673  
1674  	for (tile_num = 0; tile_num < tile_group->num_tiles; tile_num++) {
1675  		/* each uint32 takes place of 4 bytes */
1676  		tile_info_base = (AV1_TILE_BUF_SIZE * tile_num) >> 2;
1677  		tile_row = tile_num / tile->tile_cols;
1678  		tile_col = tile_num % tile->tile_cols;
1679  		tile_info_buf[tile_info_base + 0] = (tile_group->tile_size[tile_num] << 3);
1680  		tile_buf_pa = pa + tile_group->tile_start_offset[tile_num];
1681  
1682  		/* save av1 tile high 4bits(bit 32-35) address in lower 4 bits position
1683  		 * and clear original for hw requirement.
1684  		 */
1685  		tile_info_buf[tile_info_base + 1] = (tile_buf_pa & 0xFFFFFFF0ull) |
1686  			((tile_buf_pa & 0xF00000000ull) >> 32);
1687  		tile_info_buf[tile_info_base + 2] = (tile_buf_pa & 0xFull) << 3;
1688  
1689  		sb_boundary_x_m1 =
1690  			(tile->mi_col_starts[tile_col + 1] - tile->mi_col_starts[tile_col] - 1) &
1691  			0x3f;
1692  		sb_boundary_y_m1 =
1693  			(tile->mi_row_starts[tile_row + 1] - tile->mi_row_starts[tile_row] - 1) &
1694  			0x1ff;
1695  
1696  		tile_info_buf[tile_info_base + 3] = (sb_boundary_y_m1 << 7) | sb_boundary_x_m1;
1697  		tile_info_buf[tile_info_base + 4] = ((allow_update_cdf << 18) | (1 << 16));
1698  
1699  		if (tile_num == tile->context_update_tile_id &&
1700  		    uh->disable_frame_end_update_cdf == 0)
1701  			tile_info_buf[tile_info_base + 4] |= (1 << 17);
1702  
1703  		mtk_vdec_debug(instance->ctx, "// tile buf %d pos(%dx%d) offset 0x%x\n",
1704  			       tile_num, tile_row, tile_col, tile_info_base);
1705  		mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n",
1706  			       tile_info_buf[tile_info_base + 0],
1707  			       tile_info_buf[tile_info_base + 1],
1708  			       tile_info_buf[tile_info_base + 2],
1709  			       tile_info_buf[tile_info_base + 3]);
1710  		mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n",
1711  			       tile_info_buf[tile_info_base + 4],
1712  			       tile_info_buf[tile_info_base + 5],
1713  			       tile_info_buf[tile_info_base + 6],
1714  			       tile_info_buf[tile_info_base + 7]);
1715  	}
1716  }
1717  
vdec_av1_slice_setup_lat(struct vdec_av1_slice_instance * instance,struct mtk_vcodec_mem * bs,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1718  static int vdec_av1_slice_setup_lat(struct vdec_av1_slice_instance *instance,
1719  				    struct mtk_vcodec_mem *bs,
1720  				    struct vdec_lat_buf *lat_buf,
1721  				    struct vdec_av1_slice_pfc *pfc)
1722  {
1723  	struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1724  	int ret;
1725  
1726  	ret = vdec_av1_slice_setup_lat_from_src_buf(instance, vsi, lat_buf);
1727  	if (ret)
1728  		return ret;
1729  
1730  	ret = vdec_av1_slice_setup_pfc(instance, pfc);
1731  	if (ret)
1732  		return ret;
1733  
1734  	ret = vdec_av1_slice_setup_tile_group(instance, vsi);
1735  	if (ret)
1736  		return ret;
1737  
1738  	ret = vdec_av1_slice_alloc_working_buffer(instance, vsi);
1739  	if (ret)
1740  		return ret;
1741  
1742  	vdec_av1_slice_setup_seg_buffer(instance, vsi);
1743  	vdec_av1_slice_setup_tile_buffer(instance, vsi, bs);
1744  	vdec_av1_slice_setup_lat_buffer(instance, vsi, bs, lat_buf);
1745  
1746  	return 0;
1747  }
1748  
vdec_av1_slice_update_lat(struct vdec_av1_slice_instance * instance,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1749  static int vdec_av1_slice_update_lat(struct vdec_av1_slice_instance *instance,
1750  				     struct vdec_lat_buf *lat_buf,
1751  				     struct vdec_av1_slice_pfc *pfc)
1752  {
1753  	struct vdec_av1_slice_vsi *vsi;
1754  
1755  	vsi = &pfc->vsi;
1756  	mtk_vdec_debug(instance->ctx, "frame %u LAT CRC 0x%08x, output size is %d\n",
1757  		       pfc->seq, vsi->state.crc[0], vsi->state.out_size);
1758  
1759  	/* buffer full, need to re-decode */
1760  	if (vsi->state.full) {
1761  		/* buffer not enough */
1762  		if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == vsi->ube.size)
1763  			return -ENOMEM;
1764  		return -EAGAIN;
1765  	}
1766  
1767  	instance->width = vsi->frame.uh.upscaled_width;
1768  	instance->height = vsi->frame.uh.frame_height;
1769  	instance->frame_type = vsi->frame.uh.frame_type;
1770  
1771  	return 0;
1772  }
1773  
vdec_av1_slice_setup_core_to_dst_buf(struct vdec_av1_slice_instance * instance,struct vdec_lat_buf * lat_buf)1774  static int vdec_av1_slice_setup_core_to_dst_buf(struct vdec_av1_slice_instance *instance,
1775  						struct vdec_lat_buf *lat_buf)
1776  {
1777  	struct vb2_v4l2_buffer *dst;
1778  
1779  	dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
1780  	if (!dst)
1781  		return -EINVAL;
1782  
1783  	v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true);
1784  
1785  	return 0;
1786  }
1787  
vdec_av1_slice_setup_core_buffer(struct vdec_av1_slice_instance * instance,struct vdec_av1_slice_pfc * pfc,struct vdec_av1_slice_vsi * vsi,struct vdec_fb * fb,struct vdec_lat_buf * lat_buf)1788  static int vdec_av1_slice_setup_core_buffer(struct vdec_av1_slice_instance *instance,
1789  					    struct vdec_av1_slice_pfc *pfc,
1790  					    struct vdec_av1_slice_vsi *vsi,
1791  					    struct vdec_fb *fb,
1792  					    struct vdec_lat_buf *lat_buf)
1793  {
1794  	struct vb2_buffer *vb;
1795  	struct vb2_queue *vq;
1796  	int w, h, plane, size;
1797  	int i;
1798  
1799  	plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
1800  	w = vsi->frame.uh.upscaled_width;
1801  	h = vsi->frame.uh.frame_height;
1802  	size = ALIGN(w, VCODEC_DEC_ALIGNED_64) * ALIGN(h, VCODEC_DEC_ALIGNED_64);
1803  
1804  	/* frame buffer */
1805  	vsi->fb.y.dma_addr = fb->base_y.dma_addr;
1806  	if (plane == 1)
1807  		vsi->fb.c.dma_addr = fb->base_y.dma_addr + size;
1808  	else
1809  		vsi->fb.c.dma_addr = fb->base_c.dma_addr;
1810  
1811  	/* reference buffers */
1812  	vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
1813  	if (!vq)
1814  		return -EINVAL;
1815  
1816  	/* get current output buffer */
1817  	vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf;
1818  	if (!vb)
1819  		return -EINVAL;
1820  
1821  	/* get buffer address from vb2buf */
1822  	for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1823  		struct vdec_av1_slice_fb *vref = &vsi->ref[i];
1824  
1825  		vb = vb2_find_buffer(vq, pfc->ref_idx[i]);
1826  		if (!vb) {
1827  			memset(vref, 0, sizeof(*vref));
1828  			continue;
1829  		}
1830  
1831  		vref->y.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
1832  		if (plane == 1)
1833  			vref->c.dma_addr = vref->y.dma_addr + size;
1834  		else
1835  			vref->c.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 1);
1836  	}
1837  	vsi->tile.dma_addr = lat_buf->tile_addr.dma_addr;
1838  	vsi->tile.size = lat_buf->tile_addr.size;
1839  
1840  	return 0;
1841  }
1842  
vdec_av1_slice_setup_core(struct vdec_av1_slice_instance * instance,struct vdec_fb * fb,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1843  static int vdec_av1_slice_setup_core(struct vdec_av1_slice_instance *instance,
1844  				     struct vdec_fb *fb,
1845  				     struct vdec_lat_buf *lat_buf,
1846  				     struct vdec_av1_slice_pfc *pfc)
1847  {
1848  	struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
1849  	int ret;
1850  
1851  	ret = vdec_av1_slice_setup_core_to_dst_buf(instance, lat_buf);
1852  	if (ret)
1853  		return ret;
1854  
1855  	ret = vdec_av1_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf);
1856  	if (ret)
1857  		return ret;
1858  
1859  	return 0;
1860  }
1861  
vdec_av1_slice_update_core(struct vdec_av1_slice_instance * instance,struct vdec_lat_buf * lat_buf,struct vdec_av1_slice_pfc * pfc)1862  static int vdec_av1_slice_update_core(struct vdec_av1_slice_instance *instance,
1863  				      struct vdec_lat_buf *lat_buf,
1864  				      struct vdec_av1_slice_pfc *pfc)
1865  {
1866  	struct vdec_av1_slice_vsi *vsi = instance->core_vsi;
1867  
1868  	mtk_vdec_debug(instance->ctx, "frame %u Y_CRC %08x %08x %08x %08x\n",
1869  		       pfc->seq, vsi->state.crc[0], vsi->state.crc[1],
1870  		       vsi->state.crc[2], vsi->state.crc[3]);
1871  	mtk_vdec_debug(instance->ctx, "frame %u C_CRC %08x %08x %08x %08x\n",
1872  		       pfc->seq, vsi->state.crc[8], vsi->state.crc[9],
1873  		       vsi->state.crc[10], vsi->state.crc[11]);
1874  
1875  	return 0;
1876  }
1877  
vdec_av1_slice_init(struct mtk_vcodec_dec_ctx * ctx)1878  static int vdec_av1_slice_init(struct mtk_vcodec_dec_ctx *ctx)
1879  {
1880  	struct vdec_av1_slice_instance *instance;
1881  	struct vdec_av1_slice_init_vsi *vsi;
1882  	int ret;
1883  
1884  	instance = kzalloc(sizeof(*instance), GFP_KERNEL);
1885  	if (!instance)
1886  		return -ENOMEM;
1887  
1888  	instance->ctx = ctx;
1889  	instance->vpu.id = SCP_IPI_VDEC_LAT;
1890  	instance->vpu.core_id = SCP_IPI_VDEC_CORE;
1891  	instance->vpu.ctx = ctx;
1892  	instance->vpu.codec_type = ctx->current_codec;
1893  
1894  	ret = vpu_dec_init(&instance->vpu);
1895  	if (ret) {
1896  		mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret);
1897  		goto error_vpu_init;
1898  	}
1899  
1900  	/* init vsi and global flags */
1901  	vsi = instance->vpu.vsi;
1902  	if (!vsi) {
1903  		mtk_vdec_err(ctx, "failed to get AV1 vsi\n");
1904  		ret = -EINVAL;
1905  		goto error_vsi;
1906  	}
1907  	instance->init_vsi = vsi;
1908  	instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, (u32)vsi->core_vsi);
1909  
1910  	if (!instance->core_vsi) {
1911  		mtk_vdec_err(ctx, "failed to get AV1 core vsi\n");
1912  		ret = -EINVAL;
1913  		goto error_vsi;
1914  	}
1915  
1916  	if (vsi->vsi_size != sizeof(struct vdec_av1_slice_vsi))
1917  		mtk_vdec_err(ctx, "remote vsi size 0x%x mismatch! expected: 0x%zx\n",
1918  			     vsi->vsi_size, sizeof(struct vdec_av1_slice_vsi));
1919  
1920  	instance->irq_enabled = 1;
1921  	instance->inneracing_mode = IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability);
1922  
1923  	mtk_vdec_debug(ctx, "vsi 0x%p core_vsi 0x%llx 0x%p, inneracing_mode %d\n",
1924  		       vsi, vsi->core_vsi, instance->core_vsi, instance->inneracing_mode);
1925  
1926  	ret = vdec_av1_slice_init_cdf_table(instance);
1927  	if (ret)
1928  		goto error_vsi;
1929  
1930  	ret = vdec_av1_slice_init_iq_table(instance);
1931  	if (ret)
1932  		goto error_vsi;
1933  
1934  	ctx->drv_handle = instance;
1935  
1936  	return 0;
1937  error_vsi:
1938  	vpu_dec_deinit(&instance->vpu);
1939  error_vpu_init:
1940  	kfree(instance);
1941  
1942  	return ret;
1943  }
1944  
vdec_av1_slice_deinit(void * h_vdec)1945  static void vdec_av1_slice_deinit(void *h_vdec)
1946  {
1947  	struct vdec_av1_slice_instance *instance = h_vdec;
1948  
1949  	if (!instance)
1950  		return;
1951  	mtk_vdec_debug(instance->ctx, "h_vdec 0x%p\n", h_vdec);
1952  	vpu_dec_deinit(&instance->vpu);
1953  	vdec_av1_slice_free_working_buffer(instance);
1954  	vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx);
1955  	kfree(instance);
1956  }
1957  
vdec_av1_slice_flush(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)1958  static int vdec_av1_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs,
1959  				struct vdec_fb *fb, bool *res_chg)
1960  {
1961  	struct vdec_av1_slice_instance *instance = h_vdec;
1962  	int i;
1963  
1964  	mtk_vdec_debug(instance->ctx, "flush ...\n");
1965  
1966  	vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue);
1967  
1968  	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++)
1969  		vdec_av1_slice_clear_fb(&instance->slots.frame_info[i]);
1970  
1971  	return vpu_dec_reset(&instance->vpu);
1972  }
1973  
vdec_av1_slice_get_pic_info(struct vdec_av1_slice_instance * instance)1974  static void vdec_av1_slice_get_pic_info(struct vdec_av1_slice_instance *instance)
1975  {
1976  	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
1977  	u32 data[3];
1978  
1979  	mtk_vdec_debug(ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h);
1980  
1981  	data[0] = ctx->picinfo.pic_w;
1982  	data[1] = ctx->picinfo.pic_h;
1983  	data[2] = ctx->capture_fourcc;
1984  	vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO);
1985  
1986  	ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64);
1987  	ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64);
1988  	ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0];
1989  	ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1];
1990  }
1991  
vdec_av1_slice_get_dpb_size(struct vdec_av1_slice_instance * instance,u32 * dpb_sz)1992  static inline void vdec_av1_slice_get_dpb_size(struct vdec_av1_slice_instance *instance,
1993  					       u32 *dpb_sz)
1994  {
1995  	/* refer av1 specification */
1996  	*dpb_sz = V4L2_AV1_TOTAL_REFS_PER_FRAME + 1;
1997  }
1998  
vdec_av1_slice_get_crop_info(struct vdec_av1_slice_instance * instance,struct v4l2_rect * cr)1999  static void vdec_av1_slice_get_crop_info(struct vdec_av1_slice_instance *instance,
2000  					 struct v4l2_rect *cr)
2001  {
2002  	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
2003  
2004  	cr->left = 0;
2005  	cr->top = 0;
2006  	cr->width = ctx->picinfo.pic_w;
2007  	cr->height = ctx->picinfo.pic_h;
2008  
2009  	mtk_vdec_debug(ctx, "l=%d, t=%d, w=%d, h=%d\n",
2010  		       cr->left, cr->top, cr->width, cr->height);
2011  }
2012  
vdec_av1_slice_get_param(void * h_vdec,enum vdec_get_param_type type,void * out)2013  static int vdec_av1_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
2014  {
2015  	struct vdec_av1_slice_instance *instance = h_vdec;
2016  
2017  	switch (type) {
2018  	case GET_PARAM_PIC_INFO:
2019  		vdec_av1_slice_get_pic_info(instance);
2020  		break;
2021  	case GET_PARAM_DPB_SIZE:
2022  		vdec_av1_slice_get_dpb_size(instance, out);
2023  		break;
2024  	case GET_PARAM_CROP_INFO:
2025  		vdec_av1_slice_get_crop_info(instance, out);
2026  		break;
2027  	default:
2028  		mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type);
2029  		return -EINVAL;
2030  	}
2031  
2032  	return 0;
2033  }
2034  
vdec_av1_slice_lat_decode(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)2035  static int vdec_av1_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
2036  				     struct vdec_fb *fb, bool *res_chg)
2037  {
2038  	struct vdec_av1_slice_instance *instance = h_vdec;
2039  	struct vdec_lat_buf *lat_buf;
2040  	struct vdec_av1_slice_pfc *pfc;
2041  	struct vdec_av1_slice_vsi *vsi;
2042  	struct mtk_vcodec_dec_ctx *ctx;
2043  	int ret;
2044  
2045  	if (!instance || !instance->ctx)
2046  		return -EINVAL;
2047  
2048  	ctx = instance->ctx;
2049  	/* init msgQ for the first time */
2050  	if (vdec_msg_queue_init(&ctx->msg_queue, ctx,
2051  				vdec_av1_slice_core_decode, sizeof(*pfc))) {
2052  		mtk_vdec_err(ctx, "failed to init AV1 msg queue\n");
2053  		return -ENOMEM;
2054  	}
2055  
2056  	/* bs NULL means flush decoder */
2057  	if (!bs)
2058  		return vdec_av1_slice_flush(h_vdec, bs, fb, res_chg);
2059  
2060  	lat_buf = vdec_msg_queue_dqbuf(&ctx->msg_queue.lat_ctx);
2061  	if (!lat_buf) {
2062  		mtk_vdec_err(ctx, "failed to get AV1 lat buf\n");
2063  		return -EAGAIN;
2064  	}
2065  	pfc = (struct vdec_av1_slice_pfc *)lat_buf->private_data;
2066  	if (!pfc) {
2067  		ret = -EINVAL;
2068  		goto err_free_fb_out;
2069  	}
2070  	vsi = &pfc->vsi;
2071  
2072  	ret = vdec_av1_slice_setup_lat(instance, bs, lat_buf, pfc);
2073  	if (ret) {
2074  		mtk_vdec_err(ctx, "failed to setup AV1 lat ret %d\n", ret);
2075  		goto err_free_fb_out;
2076  	}
2077  
2078  	vdec_av1_slice_vsi_to_remote(vsi, instance->vsi);
2079  	ret = vpu_dec_start(&instance->vpu, NULL, 0);
2080  	if (ret) {
2081  		mtk_vdec_err(ctx, "failed to dec AV1 ret %d\n", ret);
2082  		goto err_free_fb_out;
2083  	}
2084  	if (instance->inneracing_mode)
2085  		vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf);
2086  
2087  	if (instance->irq_enabled) {
2088  		ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2089  						   WAIT_INTR_TIMEOUT_MS,
2090  						   MTK_VDEC_LAT0);
2091  		/* update remote vsi if decode timeout */
2092  		if (ret) {
2093  			mtk_vdec_err(ctx, "AV1 Frame %d decode timeout %d\n", pfc->seq, ret);
2094  			WRITE_ONCE(instance->vsi->state.timeout, 1);
2095  		}
2096  		vpu_dec_end(&instance->vpu);
2097  	}
2098  
2099  	vdec_av1_slice_vsi_from_remote(vsi, instance->vsi);
2100  	ret = vdec_av1_slice_update_lat(instance, lat_buf, pfc);
2101  
2102  	/* LAT trans full, re-decode */
2103  	if (ret == -EAGAIN) {
2104  		mtk_vdec_err(ctx, "AV1 Frame %d trans full\n", pfc->seq);
2105  		if (!instance->inneracing_mode)
2106  			vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2107  		return 0;
2108  	}
2109  
2110  	/* LAT trans full, no more UBE or decode timeout */
2111  	if (ret == -ENOMEM || vsi->state.timeout) {
2112  		mtk_vdec_err(ctx, "AV1 Frame %d insufficient buffer or timeout\n", pfc->seq);
2113  		if (!instance->inneracing_mode)
2114  			vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2115  		return -EBUSY;
2116  	}
2117  	vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
2118  	mtk_vdec_debug(ctx, "lat dma 1 0x%pad 0x%pad\n",
2119  		       &pfc->vsi.trans.dma_addr, &pfc->vsi.trans.dma_addr_end);
2120  
2121  	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi->trans.dma_addr_end);
2122  
2123  	if (!instance->inneracing_mode)
2124  		vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf);
2125  	memcpy(&instance->slots, &vsi->slots, sizeof(instance->slots));
2126  
2127  	return 0;
2128  
2129  err_free_fb_out:
2130  	vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2131  
2132  	if (pfc)
2133  		mtk_vdec_err(ctx, "slice dec number: %d err: %d", pfc->seq, ret);
2134  
2135  	return ret;
2136  }
2137  
vdec_av1_slice_core_decode(struct vdec_lat_buf * lat_buf)2138  static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf)
2139  {
2140  	struct vdec_av1_slice_instance *instance;
2141  	struct vdec_av1_slice_pfc *pfc;
2142  	struct mtk_vcodec_dec_ctx *ctx = NULL;
2143  	struct vdec_fb *fb = NULL;
2144  	int ret = -EINVAL;
2145  
2146  	if (!lat_buf)
2147  		return -EINVAL;
2148  
2149  	pfc = lat_buf->private_data;
2150  	ctx = lat_buf->ctx;
2151  	if (!pfc || !ctx)
2152  		return -EINVAL;
2153  
2154  	instance = ctx->drv_handle;
2155  	if (!instance)
2156  		goto err;
2157  
2158  	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
2159  	if (!fb) {
2160  		ret = -EBUSY;
2161  		goto err;
2162  	}
2163  
2164  	ret = vdec_av1_slice_setup_core(instance, fb, lat_buf, pfc);
2165  	if (ret) {
2166  		mtk_vdec_err(ctx, "vdec_av1_slice_setup_core\n");
2167  		goto err;
2168  	}
2169  	vdec_av1_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
2170  	ret = vpu_dec_core(&instance->vpu);
2171  	if (ret) {
2172  		mtk_vdec_err(ctx, "vpu_dec_core\n");
2173  		goto err;
2174  	}
2175  
2176  	if (instance->irq_enabled) {
2177  		ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2178  						   WAIT_INTR_TIMEOUT_MS,
2179  						   MTK_VDEC_CORE);
2180  		/* update remote vsi if decode timeout */
2181  		if (ret) {
2182  			mtk_vdec_err(ctx, "AV1 frame %d core timeout\n", pfc->seq);
2183  			WRITE_ONCE(instance->vsi->state.timeout, 1);
2184  		}
2185  		vpu_dec_core_end(&instance->vpu);
2186  	}
2187  
2188  	ret = vdec_av1_slice_update_core(instance, lat_buf, pfc);
2189  	if (ret) {
2190  		mtk_vdec_err(ctx, "vdec_av1_slice_update_core\n");
2191  		goto err;
2192  	}
2193  
2194  	mtk_vdec_debug(ctx, "core dma_addr_end 0x%pad\n",
2195  		       &instance->core_vsi->trans.dma_addr_end);
2196  	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, instance->core_vsi->trans.dma_addr_end);
2197  
2198  	ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
2199  
2200  	return 0;
2201  
2202  err:
2203  	/* always update read pointer */
2204  	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
2205  
2206  	if (fb)
2207  		ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
2208  
2209  	return ret;
2210  }
2211  
2212  const struct vdec_common_if vdec_av1_slice_lat_if = {
2213  	.init		= vdec_av1_slice_init,
2214  	.decode		= vdec_av1_slice_lat_decode,
2215  	.get_param	= vdec_av1_slice_get_param,
2216  	.deinit		= vdec_av1_slice_deinit,
2217  };
2218