xref: /openbmc/linux/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c (revision ecc23d0a422a3118fcf6e4f0a46e17a6c2047b02)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2023, Collabora
4  *
5  * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
6  */
7 
8 #include <media/v4l2-mem2mem.h>
9 #include "hantro.h"
10 #include "hantro_v4l2.h"
11 #include "rockchip_vpu981_regs.h"
12 
13 #define AV1_DEC_MODE		17
14 #define GM_GLOBAL_MODELS_PER_FRAME	7
15 #define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
16 #define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
17 #define AV1_MAX_TILES		128
18 #define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
19 #define AV1DEC_MAX_PIC_BUFFERS	24
20 #define AV1_REF_SCALE_SHIFT	14
21 #define AV1_INVALID_IDX		-1
22 #define MAX_FRAME_DISTANCE	31
23 #define AV1_PRIMARY_REF_NONE	7
24 #define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
25 /*
26  * These 3 values aren't defined enum v4l2_av1_segment_feature because
27  * they are not part of the specification
28  */
29 #define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
30 #define V4L2_AV1_SEG_LVL_ALT_LF_U	3
31 #define V4L2_AV1_SEG_LVL_ALT_LF_V	4
32 
33 #define SUPERRES_SCALE_BITS 3
34 #define SCALE_NUMERATOR 8
35 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
36 
37 #define RS_SUBPEL_BITS 6
38 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
39 #define RS_SCALE_SUBPEL_BITS 14
40 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
41 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
42 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
43 
44 #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
45 
46 #define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
47 #define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
48 #define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
49 #define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
50 #define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
51 #define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
52 #define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
53 
54 #define DIV_LUT_PREC_BITS 14
55 #define DIV_LUT_BITS 8
56 #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
57 #define WARP_PARAM_REDUCE_BITS 6
58 #define WARPEDMODEL_PREC_BITS 16
59 
60 #define AV1_DIV_ROUND_UP_POW2(value, n)			\
61 ({							\
62 	typeof(n) _n  = n;				\
63 	typeof(value) _value = value;			\
64 	(_value + (BIT(_n) >> 1)) >> _n;		\
65 })
66 
67 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
68 ({									\
69 	typeof(n) _n_  = n;						\
70 	typeof(value) _value_ = value;					\
71 	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
72 		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
73 })
74 
75 struct rockchip_av1_film_grain {
76 	u8 scaling_lut_y[256];
77 	u8 scaling_lut_cb[256];
78 	u8 scaling_lut_cr[256];
79 	s16 cropped_luma_grain_block[4096];
80 	s16 cropped_chroma_grain_block[1024 * 2];
81 };
82 
83 static const short div_lut[DIV_LUT_NUM + 1] = {
84 	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
85 	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
86 	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
87 	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
88 	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
89 	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
90 	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
91 	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
92 	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
93 	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
94 	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
95 	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
96 	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
97 	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
98 	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
99 	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
100 	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
101 	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
102 	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
103 	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
104 	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
105 	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
106 	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
107 	8240,  8224,  8208,  8192,
108 };
109 
rockchip_vpu981_get_frame_index(struct hantro_ctx * ctx,int ref)110 static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
111 {
112 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
113 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
114 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
115 	u64 timestamp;
116 	int i, idx = frame->ref_frame_idx[ref];
117 
118 	if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
119 		return AV1_INVALID_IDX;
120 
121 	timestamp = frame->reference_frame_ts[idx];
122 	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
123 		if (!av1_dec->frame_refs[i].used)
124 			continue;
125 		if (av1_dec->frame_refs[i].timestamp == timestamp)
126 			return i;
127 	}
128 
129 	return AV1_INVALID_IDX;
130 }
131 
rockchip_vpu981_get_order_hint(struct hantro_ctx * ctx,int ref)132 static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
133 {
134 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
135 	int idx = rockchip_vpu981_get_frame_index(ctx, ref);
136 
137 	if (idx != AV1_INVALID_IDX)
138 		return av1_dec->frame_refs[idx].order_hint;
139 
140 	return 0;
141 }
142 
rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx * ctx,u64 timestamp)143 static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
144 					     u64 timestamp)
145 {
146 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
147 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
148 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
149 	int i;
150 
151 	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
152 		int j;
153 
154 		if (av1_dec->frame_refs[i].used)
155 			continue;
156 
157 		av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
158 		av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
159 		av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
160 		av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
161 		av1_dec->frame_refs[i].timestamp = timestamp;
162 		av1_dec->frame_refs[i].frame_type = frame->frame_type;
163 		av1_dec->frame_refs[i].order_hint = frame->order_hint;
164 		av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
165 
166 		for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
167 			av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
168 		av1_dec->frame_refs[i].used = true;
169 		av1_dec->current_frame_index = i;
170 
171 		return i;
172 	}
173 
174 	return AV1_INVALID_IDX;
175 }
176 
rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx * ctx,int idx)177 static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
178 {
179 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
180 
181 	if (idx >= 0)
182 		av1_dec->frame_refs[idx].used = false;
183 }
184 
rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx * ctx)185 static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
186 {
187 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
188 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
189 
190 	int ref, idx;
191 
192 	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
193 		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
194 		bool used = false;
195 
196 		if (!av1_dec->frame_refs[idx].used)
197 			continue;
198 
199 		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
200 			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
201 				used = true;
202 		}
203 
204 		if (!used)
205 			rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
206 	}
207 }
208 
rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx * ctx)209 static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
210 {
211 	return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
212 }
213 
rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx * ctx)214 static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
215 {
216 	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
217 
218 	return ALIGN((cr_offset * 3) / 2, 64);
219 }
220 
rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx * ctx)221 static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
222 {
223 	struct hantro_dev *vpu = ctx->dev;
224 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
225 
226 	if (av1_dec->db_data_col.cpu)
227 		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
228 				  av1_dec->db_data_col.cpu,
229 				  av1_dec->db_data_col.dma);
230 	av1_dec->db_data_col.cpu = NULL;
231 
232 	if (av1_dec->db_ctrl_col.cpu)
233 		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
234 				  av1_dec->db_ctrl_col.cpu,
235 				  av1_dec->db_ctrl_col.dma);
236 	av1_dec->db_ctrl_col.cpu = NULL;
237 
238 	if (av1_dec->cdef_col.cpu)
239 		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
240 				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
241 	av1_dec->cdef_col.cpu = NULL;
242 
243 	if (av1_dec->sr_col.cpu)
244 		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
245 				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
246 	av1_dec->sr_col.cpu = NULL;
247 
248 	if (av1_dec->lr_col.cpu)
249 		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
250 				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
251 	av1_dec->lr_col.cpu = NULL;
252 }
253 
rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx * ctx)254 static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
255 {
256 	struct hantro_dev *vpu = ctx->dev;
257 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
258 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
259 	unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
260 	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
261 	unsigned int height_in_sb = height / 64;
262 	unsigned int stripe_num = ((height + 8) + 63) / 64;
263 	size_t size;
264 
265 	if (av1_dec->db_data_col.size >=
266 	    ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
267 		return 0;
268 
269 	rockchip_vpu981_av1_dec_tiles_free(ctx);
270 
271 	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
272 	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
273 						      &av1_dec->db_data_col.dma,
274 						      GFP_KERNEL);
275 	if (!av1_dec->db_data_col.cpu)
276 		goto buffer_allocation_error;
277 	av1_dec->db_data_col.size = size;
278 
279 	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
280 	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
281 						      &av1_dec->db_ctrl_col.dma,
282 						      GFP_KERNEL);
283 	if (!av1_dec->db_ctrl_col.cpu)
284 		goto buffer_allocation_error;
285 	av1_dec->db_ctrl_col.size = size;
286 
287 	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
288 	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
289 						   &av1_dec->cdef_col.dma,
290 						   GFP_KERNEL);
291 	if (!av1_dec->cdef_col.cpu)
292 		goto buffer_allocation_error;
293 	av1_dec->cdef_col.size = size;
294 
295 	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
296 	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
297 						 &av1_dec->sr_col.dma,
298 						 GFP_KERNEL);
299 	if (!av1_dec->sr_col.cpu)
300 		goto buffer_allocation_error;
301 	av1_dec->sr_col.size = size;
302 
303 	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
304 	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
305 						 &av1_dec->lr_col.dma,
306 						 GFP_KERNEL);
307 	if (!av1_dec->lr_col.cpu)
308 		goto buffer_allocation_error;
309 	av1_dec->lr_col.size = size;
310 
311 	av1_dec->num_tile_cols_allocated = num_tile_cols;
312 	return 0;
313 
314 buffer_allocation_error:
315 	rockchip_vpu981_av1_dec_tiles_free(ctx);
316 	return -ENOMEM;
317 }
318 
rockchip_vpu981_av1_dec_exit(struct hantro_ctx * ctx)319 void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
320 {
321 	struct hantro_dev *vpu = ctx->dev;
322 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
323 
324 	if (av1_dec->global_model.cpu)
325 		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
326 				  av1_dec->global_model.cpu,
327 				  av1_dec->global_model.dma);
328 	av1_dec->global_model.cpu = NULL;
329 
330 	if (av1_dec->tile_info.cpu)
331 		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
332 				  av1_dec->tile_info.cpu,
333 				  av1_dec->tile_info.dma);
334 	av1_dec->tile_info.cpu = NULL;
335 
336 	if (av1_dec->film_grain.cpu)
337 		dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
338 				  av1_dec->film_grain.cpu,
339 				  av1_dec->film_grain.dma);
340 	av1_dec->film_grain.cpu = NULL;
341 
342 	if (av1_dec->prob_tbl.cpu)
343 		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
344 				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
345 	av1_dec->prob_tbl.cpu = NULL;
346 
347 	if (av1_dec->prob_tbl_out.cpu)
348 		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
349 				  av1_dec->prob_tbl_out.cpu,
350 				  av1_dec->prob_tbl_out.dma);
351 	av1_dec->prob_tbl_out.cpu = NULL;
352 
353 	if (av1_dec->tile_buf.cpu)
354 		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
355 				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
356 	av1_dec->tile_buf.cpu = NULL;
357 
358 	rockchip_vpu981_av1_dec_tiles_free(ctx);
359 }
360 
rockchip_vpu981_av1_dec_init(struct hantro_ctx * ctx)361 int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
362 {
363 	struct hantro_dev *vpu = ctx->dev;
364 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
365 
366 	memset(av1_dec, 0, sizeof(*av1_dec));
367 
368 	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
369 						       &av1_dec->global_model.dma,
370 						       GFP_KERNEL);
371 	if (!av1_dec->global_model.cpu)
372 		return -ENOMEM;
373 	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
374 
375 	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
376 						    &av1_dec->tile_info.dma,
377 						    GFP_KERNEL);
378 	if (!av1_dec->tile_info.cpu)
379 		return -ENOMEM;
380 	av1_dec->tile_info.size = AV1_MAX_TILES;
381 
382 	av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
383 						     ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
384 						     &av1_dec->film_grain.dma,
385 						     GFP_KERNEL);
386 	if (!av1_dec->film_grain.cpu)
387 		return -ENOMEM;
388 	av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
389 
390 	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
391 						   ALIGN(sizeof(struct av1cdfs), 2048),
392 						   &av1_dec->prob_tbl.dma,
393 						   GFP_KERNEL);
394 	if (!av1_dec->prob_tbl.cpu)
395 		return -ENOMEM;
396 	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
397 
398 	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
399 						       ALIGN(sizeof(struct av1cdfs), 2048),
400 						       &av1_dec->prob_tbl_out.dma,
401 						       GFP_KERNEL);
402 	if (!av1_dec->prob_tbl_out.cpu)
403 		return -ENOMEM;
404 	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
405 	av1_dec->cdfs = &av1_dec->default_cdfs;
406 	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
407 
408 	rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
409 
410 	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
411 						   AV1_TILE_SIZE,
412 						   &av1_dec->tile_buf.dma,
413 						   GFP_KERNEL);
414 	if (!av1_dec->tile_buf.cpu)
415 		return -ENOMEM;
416 	av1_dec->tile_buf.size = AV1_TILE_SIZE;
417 
418 	return 0;
419 }
420 
rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx * ctx)421 static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
422 {
423 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
424 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
425 
426 	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
427 	if (WARN_ON(!ctrls->sequence))
428 		return -EINVAL;
429 
430 	ctrls->tile_group_entry =
431 	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
432 	if (WARN_ON(!ctrls->tile_group_entry))
433 		return -EINVAL;
434 
435 	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
436 	if (WARN_ON(!ctrls->frame))
437 		return -EINVAL;
438 
439 	ctrls->film_grain =
440 	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
441 
442 	return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
443 }
444 
rockchip_vpu981_av1_dec_get_msb(u32 n)445 static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
446 {
447 	if (n == 0)
448 		return 0;
449 	return 31 ^ __builtin_clz(n);
450 }
451 
rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d,short * shift)452 static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
453 {
454 	int f;
455 	u64 e;
456 
457 	*shift = rockchip_vpu981_av1_dec_get_msb(d);
458 	/* e is obtained from D after resetting the most significant 1 bit. */
459 	e = d - ((u32)1 << *shift);
460 	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
461 	if (*shift > DIV_LUT_BITS)
462 		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
463 	else
464 		f = e << (DIV_LUT_BITS - *shift);
465 	if (f > DIV_LUT_NUM)
466 		return -1;
467 	*shift += DIV_LUT_PREC_BITS;
468 	/* Use f as lookup into the precomputed table of multipliers */
469 	return div_lut[f];
470 }
471 
472 static void
rockchip_vpu981_av1_dec_get_shear_params(const u32 * params,s64 * alpha,s64 * beta,s64 * gamma,s64 * delta)473 rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
474 					 s64 *beta, s64 *gamma, s64 *delta)
475 {
476 	const int *mat = params;
477 	short shift;
478 	short y;
479 	long long gv, dv;
480 
481 	if (mat[2] <= 0)
482 		return;
483 
484 	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
485 	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
486 
487 	y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
488 
489 	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
490 
491 	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
492 
493 	dv = ((long long)mat[3] * mat[4]) * y;
494 	*delta = clamp_val(mat[5] -
495 		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
496 		S16_MIN, S16_MAX);
497 
498 	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
499 		 * (1 << WARP_PARAM_REDUCE_BITS);
500 	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
501 		* (1 << WARP_PARAM_REDUCE_BITS);
502 	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
503 		 * (1 << WARP_PARAM_REDUCE_BITS);
504 	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
505 		* (1 << WARP_PARAM_REDUCE_BITS);
506 }
507 
rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx * ctx)508 static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
509 {
510 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
511 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
512 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
513 	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
514 	u8 *dst = av1_dec->global_model.cpu;
515 	struct hantro_dev *vpu = ctx->dev;
516 	int ref_frame, i;
517 
518 	memset(dst, 0, GLOBAL_MODEL_SIZE);
519 	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
520 		s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
521 
522 		for (i = 0; i < 6; ++i) {
523 			if (i == 2)
524 				*(s32 *)dst =
525 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
526 			else if (i == 3)
527 				*(s32 *)dst =
528 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
529 			else
530 				*(s32 *)dst =
531 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
532 			dst += 4;
533 		}
534 
535 		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
536 			rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
537 								 &alpha, &beta, &gamma, &delta);
538 
539 		*(s16 *)dst = alpha;
540 		dst += 2;
541 		*(s16 *)dst = beta;
542 		dst += 2;
543 		*(s16 *)dst = gamma;
544 		dst += 2;
545 		*(s16 *)dst = delta;
546 		dst += 2;
547 	}
548 
549 	hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
550 }
551 
rockchip_vpu981_av1_tile_log2(int target)552 static int rockchip_vpu981_av1_tile_log2(int target)
553 {
554 	int k;
555 
556 	/*
557 	 * returns the smallest value for k such that 1 << k is greater
558 	 * than or equal to target
559 	 */
560 	for (k = 0; (1 << k) < target; k++);
561 
562 	return k;
563 }
564 
rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx * ctx)565 static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
566 {
567 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
568 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
569 	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
570 	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
571 	    ctrls->tile_group_entry;
572 	int context_update_y =
573 	    tile_info->context_update_tile_id / tile_info->tile_cols;
574 	int context_update_x =
575 	    tile_info->context_update_tile_id % tile_info->tile_cols;
576 	int context_update_tile_id =
577 	    context_update_x * tile_info->tile_rows + context_update_y;
578 	u8 *dst = av1_dec->tile_info.cpu;
579 	struct hantro_dev *vpu = ctx->dev;
580 	int tile0, tile1;
581 
582 	memset(dst, 0, av1_dec->tile_info.size);
583 
584 	for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
585 		for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
586 			int tile_id = tile1 * tile_info->tile_cols + tile0;
587 			u32 start, end;
588 			u32 y0 =
589 			    tile_info->height_in_sbs_minus_1[tile1] + 1;
590 			u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
591 
592 			/* tile size in SB units (width,height) */
593 			*dst++ = x0;
594 			*dst++ = 0;
595 			*dst++ = 0;
596 			*dst++ = 0;
597 			*dst++ = y0;
598 			*dst++ = 0;
599 			*dst++ = 0;
600 			*dst++ = 0;
601 
602 			/* tile start position */
603 			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
604 			*dst++ = start & 255;
605 			*dst++ = (start >> 8) & 255;
606 			*dst++ = (start >> 16) & 255;
607 			*dst++ = (start >> 24) & 255;
608 
609 			/* number of bytes in tile data */
610 			end = start + group_entry[tile_id].tile_size;
611 			*dst++ = end & 255;
612 			*dst++ = (end >> 8) & 255;
613 			*dst++ = (end >> 16) & 255;
614 			*dst++ = (end >> 24) & 255;
615 		}
616 	}
617 
618 	hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
619 	hantro_reg_write(vpu, &av1_tile_enable,
620 			 !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
621 	hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
622 	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
623 	hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
624 	hantro_reg_write(vpu, &av1_tile_transpose, 1);
625 	if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
626 	    rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
627 		hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
628 	else
629 		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
630 
631 	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
632 }
633 
rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx * ctx,int a,int b)634 static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
635 					    int a, int b)
636 {
637 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
638 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
639 	int bits = ctrls->sequence->order_hint_bits - 1;
640 	int diff, m;
641 
642 	if (!ctrls->sequence->order_hint_bits)
643 		return 0;
644 
645 	diff = a - b;
646 	m = 1 << bits;
647 	diff = (diff & (m - 1)) - (diff & m);
648 
649 	return diff;
650 }
651 
rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx * ctx)652 static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
653 {
654 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
655 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
656 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
657 	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
658 	int i;
659 
660 	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
661 		for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
662 			av1_dec->ref_frame_sign_bias[i] = 0;
663 
664 		return;
665 	}
666 	// Identify the nearest forward and backward references.
667 	for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
668 		if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
669 			int rel_off =
670 			    rockchip_vpu981_av1_dec_get_dist(ctx,
671 							     rockchip_vpu981_get_order_hint(ctx, i),
672 							     frame->order_hint);
673 			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
674 		}
675 	}
676 }
677 
678 static bool
rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx * ctx,int ref,int idx,int width,int height)679 rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
680 				int width, int height)
681 {
682 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
683 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
684 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
685 	struct hantro_dev *vpu = ctx->dev;
686 	struct hantro_decoded_buffer *dst;
687 	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
688 	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
689 	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
690 	int cur_width = frame->frame_width_minus_1 + 1;
691 	int cur_height = frame->frame_height_minus_1 + 1;
692 	int scale_width =
693 	    ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
694 	int scale_height =
695 	    ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
696 
697 	switch (ref) {
698 	case 0:
699 		hantro_reg_write(vpu, &av1_ref0_height, height);
700 		hantro_reg_write(vpu, &av1_ref0_width, width);
701 		hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
702 		hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
703 		break;
704 	case 1:
705 		hantro_reg_write(vpu, &av1_ref1_height, height);
706 		hantro_reg_write(vpu, &av1_ref1_width, width);
707 		hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
708 		hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
709 		break;
710 	case 2:
711 		hantro_reg_write(vpu, &av1_ref2_height, height);
712 		hantro_reg_write(vpu, &av1_ref2_width, width);
713 		hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
714 		hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
715 		break;
716 	case 3:
717 		hantro_reg_write(vpu, &av1_ref3_height, height);
718 		hantro_reg_write(vpu, &av1_ref3_width, width);
719 		hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
720 		hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
721 		break;
722 	case 4:
723 		hantro_reg_write(vpu, &av1_ref4_height, height);
724 		hantro_reg_write(vpu, &av1_ref4_width, width);
725 		hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
726 		hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
727 		break;
728 	case 5:
729 		hantro_reg_write(vpu, &av1_ref5_height, height);
730 		hantro_reg_write(vpu, &av1_ref5_width, width);
731 		hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
732 		hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
733 		break;
734 	case 6:
735 		hantro_reg_write(vpu, &av1_ref6_height, height);
736 		hantro_reg_write(vpu, &av1_ref6_width, width);
737 		hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
738 		hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
739 		break;
740 	default:
741 		pr_warn("AV1 invalid reference frame index\n");
742 	}
743 
744 	dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
745 	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
746 	chroma_addr = luma_addr + cr_offset;
747 	mv_addr = luma_addr + mv_offset;
748 
749 	hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
750 	hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
751 	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
752 
753 	return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
754 		(scale_height != (1 << AV1_REF_SCALE_SHIFT));
755 }
756 
rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx * ctx,int ref,int val)757 static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
758 						  int ref, int val)
759 {
760 	struct hantro_dev *vpu = ctx->dev;
761 
762 	switch (ref) {
763 	case 0:
764 		hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
765 		break;
766 	case 1:
767 		hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
768 		break;
769 	case 2:
770 		hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
771 		break;
772 	case 3:
773 		hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
774 		break;
775 	case 4:
776 		hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
777 		break;
778 	case 5:
779 		hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
780 		break;
781 	case 6:
782 		hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
783 		break;
784 	default:
785 		pr_warn("AV1 invalid sign bias index\n");
786 		break;
787 	}
788 }
789 
rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx * ctx)790 static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
791 {
792 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
793 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
794 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
795 	const struct v4l2_av1_segmentation *seg = &frame->segmentation;
796 	u32 segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
797 	struct hantro_dev *vpu = ctx->dev;
798 	u8 segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
799 
800 	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
801 	    frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
802 		int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
803 
804 		if (idx >= 0) {
805 			dma_addr_t luma_addr, mv_addr = 0;
806 			struct hantro_decoded_buffer *seg;
807 			size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
808 
809 			seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
810 			luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
811 			mv_addr = luma_addr + mv_offset;
812 
813 			hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
814 			hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
815 		}
816 	}
817 
818 	hantro_reg_write(vpu, &av1_segment_temp_upd_e,
819 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
820 	hantro_reg_write(vpu, &av1_segment_upd_e,
821 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
822 	hantro_reg_write(vpu, &av1_segment_e,
823 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
824 
825 	hantro_reg_write(vpu, &av1_error_resilient,
826 			 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
827 
828 	if (IS_INTRA(frame->frame_type) ||
829 	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
830 		hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
831 	}
832 
833 	if (seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) {
834 		int s;
835 
836 		for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
837 			if (seg->feature_enabled[s] &
838 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
839 				segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
840 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
841 					  0, 255);
842 				segsign |=
843 					(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
844 			}
845 
846 			if (seg->feature_enabled[s] &
847 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
848 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
849 					clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
850 					      -63, 63);
851 
852 			if (seg->feature_enabled[s] &
853 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
854 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
855 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
856 					  -63, 63);
857 
858 			if (seg->feature_enabled[s] &
859 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
860 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
861 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
862 					  -63, 63);
863 
864 			if (seg->feature_enabled[s] &
865 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
866 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
867 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
868 					  -63, 63);
869 
870 			if (frame->frame_type && seg->feature_enabled[s] &
871 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
872 				segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
873 
874 			if (seg->feature_enabled[s] &
875 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
876 				segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
877 
878 			if (seg->feature_enabled[s] &
879 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
880 				segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
881 		}
882 	}
883 
884 	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
885 		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
886 			if (seg->feature_enabled[i]
887 			    & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
888 				preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
889 				last_active_seg = max(i, last_active_seg);
890 			}
891 		}
892 	}
893 
894 	hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
895 	hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
896 
897 	hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
898 
899 	/* Write QP, filter level, ref frame and skip for every segment */
900 	hantro_reg_write(vpu, &av1_quant_seg0,
901 			 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
902 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
903 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
904 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
905 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
906 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
907 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
908 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
909 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
910 	hantro_reg_write(vpu, &av1_refpic_seg0,
911 			 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
912 	hantro_reg_write(vpu, &av1_skip_seg0,
913 			 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
914 	hantro_reg_write(vpu, &av1_global_mv_seg0,
915 			 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
916 
917 	hantro_reg_write(vpu, &av1_quant_seg1,
918 			 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
919 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
920 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
921 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
922 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
923 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
924 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
925 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
926 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
927 	hantro_reg_write(vpu, &av1_refpic_seg1,
928 			 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
929 	hantro_reg_write(vpu, &av1_skip_seg1,
930 			 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
931 	hantro_reg_write(vpu, &av1_global_mv_seg1,
932 			 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
933 
934 	hantro_reg_write(vpu, &av1_quant_seg2,
935 			 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
936 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
937 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
938 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
939 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
940 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
941 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
942 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
943 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
944 	hantro_reg_write(vpu, &av1_refpic_seg2,
945 			 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
946 	hantro_reg_write(vpu, &av1_skip_seg2,
947 			 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
948 	hantro_reg_write(vpu, &av1_global_mv_seg2,
949 			 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
950 
951 	hantro_reg_write(vpu, &av1_quant_seg3,
952 			 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
953 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
954 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
955 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
956 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
957 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
958 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
959 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
960 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
961 	hantro_reg_write(vpu, &av1_refpic_seg3,
962 			 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
963 	hantro_reg_write(vpu, &av1_skip_seg3,
964 			 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
965 	hantro_reg_write(vpu, &av1_global_mv_seg3,
966 			 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
967 
968 	hantro_reg_write(vpu, &av1_quant_seg4,
969 			 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
970 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
971 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
972 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
973 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
974 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
975 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
976 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
977 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
978 	hantro_reg_write(vpu, &av1_refpic_seg4,
979 			 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
980 	hantro_reg_write(vpu, &av1_skip_seg4,
981 			 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
982 	hantro_reg_write(vpu, &av1_global_mv_seg4,
983 			 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
984 
985 	hantro_reg_write(vpu, &av1_quant_seg5,
986 			 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
987 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
988 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
989 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
990 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
991 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
992 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
993 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
994 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
995 	hantro_reg_write(vpu, &av1_refpic_seg5,
996 			 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
997 	hantro_reg_write(vpu, &av1_skip_seg5,
998 			 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
999 	hantro_reg_write(vpu, &av1_global_mv_seg5,
1000 			 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1001 
1002 	hantro_reg_write(vpu, &av1_quant_seg6,
1003 			 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
1004 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
1005 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1006 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
1007 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1008 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
1009 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1010 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
1011 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1012 	hantro_reg_write(vpu, &av1_refpic_seg6,
1013 			 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
1014 	hantro_reg_write(vpu, &av1_skip_seg6,
1015 			 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
1016 	hantro_reg_write(vpu, &av1_global_mv_seg6,
1017 			 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1018 
1019 	hantro_reg_write(vpu, &av1_quant_seg7,
1020 			 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
1021 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
1022 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1023 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
1024 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1025 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
1026 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1027 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
1028 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1029 	hantro_reg_write(vpu, &av1_refpic_seg7,
1030 			 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
1031 	hantro_reg_write(vpu, &av1_skip_seg7,
1032 			 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
1033 	hantro_reg_write(vpu, &av1_global_mv_seg7,
1034 			 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1035 }
1036 
rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx * ctx)1037 static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
1038 {
1039 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1040 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1041 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1042 	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
1043 	const struct v4l2_av1_quantization *quantization = &frame->quantization;
1044 	int i;
1045 
1046 	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1047 		int qindex = quantization->base_q_idx;
1048 
1049 		if (segmentation->feature_enabled[i] &
1050 		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
1051 			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
1052 		}
1053 		qindex = clamp(qindex, 0, 255);
1054 
1055 		if (qindex ||
1056 		    quantization->delta_q_y_dc ||
1057 		    quantization->delta_q_u_dc ||
1058 		    quantization->delta_q_u_ac ||
1059 		    quantization->delta_q_v_dc ||
1060 		    quantization->delta_q_v_ac)
1061 			return false;
1062 	}
1063 	return true;
1064 }
1065 
rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx * ctx)1066 static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
1067 {
1068 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1069 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1070 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1071 	const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
1072 	bool filtering_dis = (loop_filter->level[0] == 0) && (loop_filter->level[1] == 0);
1073 	struct hantro_dev *vpu = ctx->dev;
1074 
1075 	hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
1076 	hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
1077 	hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
1078 
1079 	hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
1080 	hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
1081 	hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
1082 	hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
1083 
1084 	if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
1085 	    !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
1086 	    !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
1087 		hantro_reg_write(vpu, &av1_filt_ref_adj_0,
1088 				 loop_filter->ref_deltas[0]);
1089 		hantro_reg_write(vpu, &av1_filt_ref_adj_1,
1090 				 loop_filter->ref_deltas[1]);
1091 		hantro_reg_write(vpu, &av1_filt_ref_adj_2,
1092 				 loop_filter->ref_deltas[2]);
1093 		hantro_reg_write(vpu, &av1_filt_ref_adj_3,
1094 				 loop_filter->ref_deltas[3]);
1095 		hantro_reg_write(vpu, &av1_filt_ref_adj_4,
1096 				 loop_filter->ref_deltas[4]);
1097 		hantro_reg_write(vpu, &av1_filt_ref_adj_5,
1098 				 loop_filter->ref_deltas[5]);
1099 		hantro_reg_write(vpu, &av1_filt_ref_adj_6,
1100 				 loop_filter->ref_deltas[6]);
1101 		hantro_reg_write(vpu, &av1_filt_ref_adj_7,
1102 				 loop_filter->ref_deltas[7]);
1103 		hantro_reg_write(vpu, &av1_filt_mb_adj_0,
1104 				 loop_filter->mode_deltas[0]);
1105 		hantro_reg_write(vpu, &av1_filt_mb_adj_1,
1106 				 loop_filter->mode_deltas[1]);
1107 	} else {
1108 		hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
1109 		hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
1110 		hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
1111 		hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
1112 		hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
1113 		hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
1114 		hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
1115 		hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
1116 		hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
1117 		hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
1118 	}
1119 
1120 	hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
1121 	hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
1122 }
1123 
rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx * ctx)1124 static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
1125 {
1126 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1127 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1128 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1129 	bool frame_is_intra = IS_INTRA(frame->frame_type);
1130 	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
1131 	int i;
1132 
1133 	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
1134 		return;
1135 
1136 	for (i = 0; i < NUM_REF_FRAMES; i++) {
1137 		if (frame->refresh_frame_flags & BIT(i)) {
1138 			struct mvcdfs stored_mv_cdf;
1139 
1140 			rockchip_av1_get_cdfs(ctx, i);
1141 			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
1142 			*av1_dec->cdfs = *out_cdfs;
1143 			if (frame_is_intra) {
1144 				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
1145 				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
1146 			}
1147 			rockchip_av1_store_cdfs(ctx,
1148 						frame->refresh_frame_flags);
1149 			break;
1150 		}
1151 	}
1152 }
1153 
rockchip_vpu981_av1_dec_done(struct hantro_ctx * ctx)1154 void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
1155 {
1156 	rockchip_vpu981_av1_dec_update_prob(ctx);
1157 }
1158 
rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx * ctx)1159 static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
1160 {
1161 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1162 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1163 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1164 	const struct v4l2_av1_quantization *quantization = &frame->quantization;
1165 	struct hantro_dev *vpu = ctx->dev;
1166 	bool error_resilient_mode =
1167 	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
1168 	bool frame_is_intra = IS_INTRA(frame->frame_type);
1169 
1170 	if (error_resilient_mode || frame_is_intra ||
1171 	    frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
1172 		av1_dec->cdfs = &av1_dec->default_cdfs;
1173 		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
1174 		rockchip_av1_default_coeff_probs(quantization->base_q_idx,
1175 						 av1_dec->cdfs);
1176 	} else {
1177 		rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
1178 	}
1179 	rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
1180 
1181 	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
1182 
1183 	if (frame_is_intra) {
1184 		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
1185 		/* Overwrite MV context area with intrabc MV context */
1186 		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
1187 		       sizeof(struct mvcdfs));
1188 	}
1189 
1190 	hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
1191 	hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
1192 }
1193 
1194 static void
rockchip_vpu981_av1_dec_init_scaling_function(const u8 * values,const u8 * scaling,u8 num_points,u8 * scaling_lut)1195 rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
1196 					      u8 num_points, u8 *scaling_lut)
1197 {
1198 	int i, point;
1199 
1200 	if (num_points == 0) {
1201 		memset(scaling_lut, 0, 256);
1202 		return;
1203 	}
1204 
1205 	for (point = 0; point < num_points - 1; point++) {
1206 		int x;
1207 		s32 delta_y = scaling[point + 1] - scaling[point];
1208 		s32 delta_x = values[point + 1] - values[point];
1209 		s64 delta =
1210 		    delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
1211 					 delta_x) : 0;
1212 
1213 		for (x = 0; x < delta_x; x++) {
1214 			scaling_lut[values[point] + x] =
1215 			    scaling[point] +
1216 			    (s32)((x * delta + 32768) >> 16);
1217 		}
1218 	}
1219 
1220 	for (i = values[num_points - 1]; i < 256; i++)
1221 		scaling_lut[i] = scaling[num_points - 1];
1222 }
1223 
rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx * ctx)1224 static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
1225 {
1226 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1227 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1228 	const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
1229 	struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
1230 	struct hantro_dev *vpu = ctx->dev;
1231 	bool scaling_from_luma =
1232 		!!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
1233 	s32 (*ar_coeffs_y)[24];
1234 	s32 (*ar_coeffs_cb)[25];
1235 	s32 (*ar_coeffs_cr)[25];
1236 	s32 (*luma_grain_block)[73][82];
1237 	s32 (*cb_grain_block)[38][44];
1238 	s32 (*cr_grain_block)[38][44];
1239 	s32 ar_coeff_lag, ar_coeff_shift;
1240 	s32 grain_scale_shift, bitdepth;
1241 	s32 grain_center, grain_min, grain_max;
1242 	int i, j;
1243 
1244 	hantro_reg_write(vpu, &av1_apply_grain, 0);
1245 
1246 	if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
1247 		hantro_reg_write(vpu, &av1_num_y_points_b, 0);
1248 		hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
1249 		hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
1250 		hantro_reg_write(vpu, &av1_scaling_shift, 0);
1251 		hantro_reg_write(vpu, &av1_cb_mult, 0);
1252 		hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1253 		hantro_reg_write(vpu, &av1_cb_offset, 0);
1254 		hantro_reg_write(vpu, &av1_cr_mult, 0);
1255 		hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1256 		hantro_reg_write(vpu, &av1_cr_offset, 0);
1257 		hantro_reg_write(vpu, &av1_overlap_flag, 0);
1258 		hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
1259 		hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
1260 		hantro_reg_write(vpu, &av1_random_seed, 0);
1261 		hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
1262 		return;
1263 	}
1264 
1265 	ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
1266 	ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1267 	ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1268 	luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
1269 	cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1270 	cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1271 
1272 	if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr ||
1273 	    !luma_grain_block || !cb_grain_block || !cr_grain_block) {
1274 		pr_warn("Fail allocating memory for film grain parameters\n");
1275 		goto alloc_fail;
1276 	}
1277 
1278 	hantro_reg_write(vpu, &av1_apply_grain, 1);
1279 
1280 	hantro_reg_write(vpu, &av1_num_y_points_b,
1281 			 film_grain->num_y_points > 0);
1282 	hantro_reg_write(vpu, &av1_num_cb_points_b,
1283 			 film_grain->num_cb_points > 0);
1284 	hantro_reg_write(vpu, &av1_num_cr_points_b,
1285 			 film_grain->num_cr_points > 0);
1286 	hantro_reg_write(vpu, &av1_scaling_shift,
1287 			 film_grain->grain_scaling_minus_8 + 8);
1288 
1289 	if (!scaling_from_luma) {
1290 		hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
1291 		hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
1292 		hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
1293 		hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
1294 		hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
1295 		hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
1296 	} else {
1297 		hantro_reg_write(vpu, &av1_cb_mult, 0);
1298 		hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1299 		hantro_reg_write(vpu, &av1_cb_offset, 0);
1300 		hantro_reg_write(vpu, &av1_cr_mult, 0);
1301 		hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1302 		hantro_reg_write(vpu, &av1_cr_offset, 0);
1303 	}
1304 
1305 	hantro_reg_write(vpu, &av1_overlap_flag,
1306 			 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
1307 	hantro_reg_write(vpu, &av1_clip_to_restricted_range,
1308 			 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
1309 	hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
1310 	hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
1311 
1312 	rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
1313 						      film_grain->point_y_scaling,
1314 						      film_grain->num_y_points,
1315 						      fgmem->scaling_lut_y);
1316 
1317 	if (film_grain->flags &
1318 	    V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
1319 		memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
1320 		       sizeof(*fgmem->scaling_lut_y) * 256);
1321 		memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
1322 		       sizeof(*fgmem->scaling_lut_y) * 256);
1323 	} else {
1324 		rockchip_vpu981_av1_dec_init_scaling_function
1325 		    (film_grain->point_cb_value, film_grain->point_cb_scaling,
1326 		     film_grain->num_cb_points, fgmem->scaling_lut_cb);
1327 		rockchip_vpu981_av1_dec_init_scaling_function
1328 		    (film_grain->point_cr_value, film_grain->point_cr_scaling,
1329 		     film_grain->num_cr_points, fgmem->scaling_lut_cr);
1330 	}
1331 
1332 	for (i = 0; i < V4L2_AV1_AR_COEFFS_SIZE; i++) {
1333 		if (i < 24)
1334 			(*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
1335 		(*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
1336 		(*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
1337 	}
1338 
1339 	ar_coeff_lag = film_grain->ar_coeff_lag;
1340 	ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
1341 	grain_scale_shift = film_grain->grain_scale_shift;
1342 	bitdepth = ctx->bit_depth;
1343 	grain_center = 128 << (bitdepth - 8);
1344 	grain_min = 0 - grain_center;
1345 	grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1346 
1347 	rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
1348 					       film_grain->num_y_points, grain_scale_shift,
1349 					       ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
1350 					       grain_min, grain_max, film_grain->grain_seed);
1351 
1352 	rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
1353 						 cr_grain_block, bitdepth,
1354 						 film_grain->num_y_points,
1355 						 film_grain->num_cb_points,
1356 						 film_grain->num_cr_points,
1357 						 grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1358 						 ar_coeffs_cr, ar_coeff_shift, grain_min,
1359 						 grain_max,
1360 						 scaling_from_luma,
1361 						 film_grain->grain_seed);
1362 
1363 	for (i = 0; i < 64; i++) {
1364 		for (j = 0; j < 64; j++)
1365 			fgmem->cropped_luma_grain_block[i * 64 + j] =
1366 				(*luma_grain_block)[i + 9][j + 9];
1367 	}
1368 
1369 	for (i = 0; i < 32; i++) {
1370 		for (j = 0; j < 32; j++) {
1371 			fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
1372 				(*cb_grain_block)[i + 6][j + 6];
1373 			fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1374 				(*cr_grain_block)[i + 6][j + 6];
1375 		}
1376 	}
1377 
1378 	hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
1379 
1380 alloc_fail:
1381 	kfree(ar_coeffs_y);
1382 	kfree(ar_coeffs_cb);
1383 	kfree(ar_coeffs_cr);
1384 	kfree(luma_grain_block);
1385 	kfree(cb_grain_block);
1386 	kfree(cr_grain_block);
1387 }
1388 
rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx * ctx)1389 static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
1390 {
1391 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1392 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1393 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1394 	const struct v4l2_av1_cdef *cdef = &frame->cdef;
1395 	struct hantro_dev *vpu = ctx->dev;
1396 	u32 luma_pri_strength = 0;
1397 	u16 luma_sec_strength = 0;
1398 	u32 chroma_pri_strength = 0;
1399 	u16 chroma_sec_strength = 0;
1400 	int i;
1401 
1402 	hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
1403 	hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
1404 
1405 	for (i = 0; i < BIT(cdef->bits); i++) {
1406 		luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
1407 		if (cdef->y_sec_strength[i] == 4)
1408 			luma_sec_strength |= 3 << (i * 2);
1409 		else
1410 			luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
1411 
1412 		chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
1413 		if (cdef->uv_sec_strength[i] == 4)
1414 			chroma_sec_strength |= 3 << (i * 2);
1415 		else
1416 			chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
1417 	}
1418 
1419 	hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
1420 			 luma_pri_strength);
1421 	hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
1422 			 luma_sec_strength);
1423 	hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
1424 			 chroma_pri_strength);
1425 	hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
1426 			 chroma_sec_strength);
1427 
1428 	hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
1429 }
1430 
rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx * ctx)1431 static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
1432 {
1433 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1434 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1435 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1436 	const struct v4l2_av1_loop_restoration *loop_restoration =
1437 	    &frame->loop_restoration;
1438 	struct hantro_dev *vpu = ctx->dev;
1439 	u16 lr_type = 0, lr_unit_size = 0;
1440 	u8 restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
1441 	int i;
1442 
1443 	if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
1444 		restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
1445 		restoration_unit_size[1] =
1446 		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1447 		restoration_unit_size[2] =
1448 		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1449 	}
1450 
1451 	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1452 		lr_type |=
1453 		    loop_restoration->frame_restoration_type[i] << (i * 2);
1454 		lr_unit_size |= restoration_unit_size[i] << (i * 2);
1455 	}
1456 
1457 	hantro_reg_write(vpu, &av1_lr_type, lr_type);
1458 	hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
1459 	hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
1460 }
1461 
rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx * ctx)1462 static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
1463 {
1464 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1465 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1466 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1467 	struct hantro_dev *vpu = ctx->dev;
1468 	u8 superres_scale_denominator = SCALE_NUMERATOR;
1469 	int superres_luma_step = RS_SCALE_SUBPEL_BITS;
1470 	int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1471 	int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1472 	int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1473 	int superres_init_luma_subpel_x = 0;
1474 	int superres_init_chroma_subpel_x = 0;
1475 	int superres_is_scaled = 0;
1476 	int min_w = min_t(uint32_t, 16, frame->upscaled_width);
1477 	int upscaled_luma, downscaled_luma;
1478 	int downscaled_chroma, upscaled_chroma;
1479 	int step_luma, step_chroma;
1480 	int err_luma, err_chroma;
1481 	int initial_luma, initial_chroma;
1482 	int width = 0;
1483 
1484 	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1485 		superres_scale_denominator = frame->superres_denom;
1486 
1487 	if (superres_scale_denominator <= SCALE_NUMERATOR)
1488 		goto set_regs;
1489 
1490 	width = (frame->upscaled_width * SCALE_NUMERATOR +
1491 		(superres_scale_denominator / 2)) / superres_scale_denominator;
1492 
1493 	if (width < min_w)
1494 		width = min_w;
1495 
1496 	if (width == frame->upscaled_width)
1497 		goto set_regs;
1498 
1499 	superres_is_scaled = 1;
1500 	upscaled_luma = frame->upscaled_width;
1501 	downscaled_luma = width;
1502 	downscaled_chroma = (downscaled_luma + 1) >> 1;
1503 	upscaled_chroma = (upscaled_luma + 1) >> 1;
1504 	step_luma =
1505 		((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
1506 		 (upscaled_luma / 2)) / upscaled_luma;
1507 	step_chroma =
1508 		((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
1509 		 (upscaled_chroma / 2)) / upscaled_chroma;
1510 	err_luma =
1511 		(upscaled_luma * step_luma)
1512 		- (downscaled_luma << RS_SCALE_SUBPEL_BITS);
1513 	err_chroma =
1514 		(upscaled_chroma * step_chroma)
1515 		- (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
1516 	initial_luma =
1517 		((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
1518 		  + upscaled_luma / 2)
1519 		 / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
1520 		& RS_SCALE_SUBPEL_MASK;
1521 	initial_chroma =
1522 		((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
1523 		  + upscaled_chroma / 2)
1524 		 / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
1525 		& RS_SCALE_SUBPEL_MASK;
1526 	superres_luma_step = step_luma;
1527 	superres_chroma_step = step_chroma;
1528 	superres_luma_step_invra =
1529 		((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
1530 		/ downscaled_luma;
1531 	superres_chroma_step_invra =
1532 		((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
1533 		/ downscaled_chroma;
1534 	superres_init_luma_subpel_x = initial_luma;
1535 	superres_init_chroma_subpel_x = initial_chroma;
1536 
1537 set_regs:
1538 	hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
1539 
1540 	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1541 		hantro_reg_write(vpu, &av1_scale_denom_minus9,
1542 				 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
1543 	else
1544 		hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
1545 
1546 	hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
1547 	hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
1548 	hantro_reg_write(vpu, &av1_superres_luma_step_invra,
1549 			 superres_luma_step_invra);
1550 	hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
1551 			 superres_chroma_step_invra);
1552 	hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
1553 			 superres_init_luma_subpel_x);
1554 	hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
1555 			 superres_init_chroma_subpel_x);
1556 	hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
1557 
1558 	hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
1559 }
1560 
rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx * ctx)1561 static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
1562 {
1563 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1564 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1565 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1566 	struct hantro_dev *vpu = ctx->dev;
1567 	int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1568 	int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1569 	int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
1570 			    - (frame->frame_width_minus_1 + 1);
1571 	int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
1572 			     - (frame->frame_height_minus_1 + 1);
1573 
1574 	hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
1575 	hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
1576 	hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
1577 	hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
1578 
1579 	rockchip_vpu981_av1_dec_set_superres_params(ctx);
1580 }
1581 
rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx * ctx)1582 static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
1583 {
1584 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1585 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1586 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1587 	struct hantro_dev *vpu = ctx->dev;
1588 	bool use_ref_frame_mvs =
1589 	    !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
1590 	int cur_frame_offset = frame->order_hint;
1591 	int alt_frame_offset = 0;
1592 	int gld_frame_offset = 0;
1593 	int bwd_frame_offset = 0;
1594 	int alt2_frame_offset = 0;
1595 	int refs_selected[3] = { 0, 0, 0 };
1596 	int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1597 	int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1598 	int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1599 	int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1600 	int mf_types[3] = { 0, 0, 0 };
1601 	int ref_stamp = 2;
1602 	int ref_ind = 0;
1603 	int rf, idx;
1604 
1605 	alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
1606 	gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
1607 	bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
1608 	alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
1609 
1610 	idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
1611 	if (idx >= 0) {
1612 		int alt_frame_offset_in_lst =
1613 			av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
1614 		bool is_lst_overlay =
1615 		    (alt_frame_offset_in_lst == gld_frame_offset);
1616 
1617 		if (!is_lst_overlay) {
1618 			int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1619 			int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1620 			bool lst_intra_only =
1621 			    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1622 
1623 			if (lst_mi_cols == cur_mi_cols &&
1624 			    lst_mi_rows == cur_mi_rows && !lst_intra_only) {
1625 				mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
1626 				refs_selected[ref_ind++] = LST_BUF_IDX;
1627 			}
1628 		}
1629 		ref_stamp--;
1630 	}
1631 
1632 	idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
1633 	if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
1634 		int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1635 		int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1636 		bool bwd_intra_only =
1637 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1638 
1639 		if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
1640 		    !bwd_intra_only) {
1641 			mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
1642 			refs_selected[ref_ind++] = BWD_BUF_IDX;
1643 			ref_stamp--;
1644 		}
1645 	}
1646 
1647 	idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
1648 	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
1649 		int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1650 		int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1651 		bool alt2_intra_only =
1652 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1653 
1654 		if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
1655 		    !alt2_intra_only) {
1656 			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
1657 			refs_selected[ref_ind++] = ALT2_BUF_IDX;
1658 			ref_stamp--;
1659 		}
1660 	}
1661 
1662 	idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
1663 	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
1664 	    ref_stamp >= 0) {
1665 		int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1666 		int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1667 		bool alt_intra_only =
1668 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1669 
1670 		if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
1671 		    !alt_intra_only) {
1672 			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
1673 			refs_selected[ref_ind++] = ALT_BUF_IDX;
1674 			ref_stamp--;
1675 		}
1676 	}
1677 
1678 	idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
1679 	if (idx >= 0 && ref_stamp >= 0) {
1680 		int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1681 		int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1682 		bool lst2_intra_only =
1683 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1684 
1685 		if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
1686 		    !lst2_intra_only) {
1687 			mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
1688 			refs_selected[ref_ind++] = LST2_BUF_IDX;
1689 			ref_stamp--;
1690 		}
1691 	}
1692 
1693 	for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
1694 		idx = rockchip_vpu981_get_frame_index(ctx, rf);
1695 		if (idx >= 0) {
1696 			int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
1697 
1698 			cur_offset[rf] =
1699 			    rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
1700 			cur_roffset[rf] =
1701 			    rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
1702 		} else {
1703 			cur_offset[rf] = 0;
1704 			cur_roffset[rf] = 0;
1705 		}
1706 	}
1707 
1708 	hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
1709 	hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
1710 	hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
1711 	hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
1712 
1713 	hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
1714 	hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
1715 	hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
1716 	hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
1717 	hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
1718 	hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
1719 	hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
1720 
1721 	if (use_ref_frame_mvs && ref_ind > 0 &&
1722 	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1723 	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1724 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
1725 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
1726 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1727 		int val;
1728 
1729 		hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
1730 
1731 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1732 		hantro_reg_write(vpu, &av1_mf1_last_offset, val);
1733 
1734 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1735 		hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
1736 
1737 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1738 		hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
1739 
1740 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1741 		hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
1742 
1743 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1744 		hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
1745 
1746 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1747 		hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
1748 
1749 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1750 		hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
1751 	}
1752 
1753 	hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
1754 	hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
1755 	hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
1756 	hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
1757 	hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
1758 	hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
1759 	hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
1760 
1761 	if (use_ref_frame_mvs && ref_ind > 1 &&
1762 	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1763 	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1764 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
1765 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
1766 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1767 		int val;
1768 
1769 		hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
1770 
1771 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1772 		hantro_reg_write(vpu, &av1_mf2_last_offset, val);
1773 
1774 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1775 		hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
1776 
1777 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1778 		hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
1779 
1780 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1781 		hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
1782 
1783 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1784 		hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
1785 
1786 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1787 		hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
1788 
1789 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1790 		hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
1791 	}
1792 
1793 	hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
1794 	hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
1795 	hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
1796 	hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
1797 	hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
1798 	hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
1799 	hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
1800 
1801 	if (use_ref_frame_mvs && ref_ind > 2 &&
1802 	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1803 	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1804 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
1805 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
1806 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1807 		int val;
1808 
1809 		hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
1810 
1811 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1812 		hantro_reg_write(vpu, &av1_mf3_last_offset, val);
1813 
1814 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1815 		hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
1816 
1817 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1818 		hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
1819 
1820 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1821 		hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
1822 
1823 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1824 		hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
1825 
1826 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1827 		hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
1828 
1829 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1830 		hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
1831 	}
1832 
1833 	hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
1834 	hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
1835 	hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
1836 	hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
1837 	hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
1838 	hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
1839 	hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
1840 
1841 	hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
1842 	hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
1843 	hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
1844 	hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
1845 	hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
1846 	hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
1847 	hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
1848 
1849 	hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
1850 	hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
1851 	hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
1852 }
1853 
rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx * ctx)1854 static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
1855 {
1856 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1857 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1858 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1859 	int frame_type = frame->frame_type;
1860 	bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
1861 	int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
1862 	struct hantro_dev *vpu = ctx->dev;
1863 	int i, ref_frames = 0;
1864 	bool scale_enable = false;
1865 
1866 	if (IS_INTRA(frame_type) && !allow_intrabc)
1867 		return;
1868 
1869 	if (!allow_intrabc) {
1870 		for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1871 			int idx = rockchip_vpu981_get_frame_index(ctx, i);
1872 
1873 			if (idx >= 0)
1874 				ref_count[idx]++;
1875 		}
1876 
1877 		for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
1878 			if (ref_count[i])
1879 				ref_frames++;
1880 		}
1881 	} else {
1882 		ref_frames = 1;
1883 	}
1884 	hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
1885 
1886 	rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
1887 
1888 	for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1889 		u32 ref = i - 1;
1890 		int idx = 0;
1891 		int width, height;
1892 
1893 		if (allow_intrabc) {
1894 			idx = av1_dec->current_frame_index;
1895 			width = frame->frame_width_minus_1 + 1;
1896 			height = frame->frame_height_minus_1 + 1;
1897 		} else {
1898 			if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
1899 				idx = rockchip_vpu981_get_frame_index(ctx, ref);
1900 			width = av1_dec->frame_refs[idx].width;
1901 			height = av1_dec->frame_refs[idx].height;
1902 		}
1903 
1904 		scale_enable |=
1905 		    rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
1906 						    height);
1907 
1908 		rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
1909 						      av1_dec->ref_frame_sign_bias[i]);
1910 	}
1911 	hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
1912 
1913 	hantro_reg_write(vpu, &av1_ref0_gm_mode,
1914 			 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
1915 	hantro_reg_write(vpu, &av1_ref1_gm_mode,
1916 			 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
1917 	hantro_reg_write(vpu, &av1_ref2_gm_mode,
1918 			 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
1919 	hantro_reg_write(vpu, &av1_ref3_gm_mode,
1920 			 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
1921 	hantro_reg_write(vpu, &av1_ref4_gm_mode,
1922 			 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
1923 	hantro_reg_write(vpu, &av1_ref5_gm_mode,
1924 			 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
1925 	hantro_reg_write(vpu, &av1_ref6_gm_mode,
1926 			 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
1927 
1928 	rockchip_vpu981_av1_dec_set_other_frames(ctx);
1929 }
1930 
rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx * ctx)1931 static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
1932 {
1933 	struct hantro_dev *vpu = ctx->dev;
1934 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1935 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1936 
1937 	hantro_reg_write(vpu, &av1_skip_mode,
1938 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
1939 	hantro_reg_write(vpu, &av1_tempor_mvp_e,
1940 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
1941 	hantro_reg_write(vpu, &av1_delta_lf_res_log,
1942 			 ctrls->frame->loop_filter.delta_lf_res);
1943 	hantro_reg_write(vpu, &av1_delta_lf_multi,
1944 			 !!(ctrls->frame->loop_filter.flags
1945 			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
1946 	hantro_reg_write(vpu, &av1_delta_lf_present,
1947 			 !!(ctrls->frame->loop_filter.flags
1948 			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
1949 	hantro_reg_write(vpu, &av1_disable_cdf_update,
1950 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
1951 	hantro_reg_write(vpu, &av1_allow_warp,
1952 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
1953 	hantro_reg_write(vpu, &av1_show_frame,
1954 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
1955 	hantro_reg_write(vpu, &av1_switchable_motion_mode,
1956 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
1957 	hantro_reg_write(vpu, &av1_enable_cdef,
1958 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
1959 	hantro_reg_write(vpu, &av1_allow_masked_compound,
1960 			 !!(ctrls->sequence->flags
1961 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
1962 	hantro_reg_write(vpu, &av1_allow_interintra,
1963 			 !!(ctrls->sequence->flags
1964 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
1965 	hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
1966 			 !!(ctrls->sequence->flags
1967 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
1968 	hantro_reg_write(vpu, &av1_allow_filter_intra,
1969 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
1970 	hantro_reg_write(vpu, &av1_enable_jnt_comp,
1971 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
1972 	hantro_reg_write(vpu, &av1_enable_dual_filter,
1973 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
1974 	hantro_reg_write(vpu, &av1_reduced_tx_set_used,
1975 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
1976 	hantro_reg_write(vpu, &av1_allow_screen_content_tools,
1977 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
1978 	hantro_reg_write(vpu, &av1_allow_intrabc,
1979 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
1980 
1981 	if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
1982 		hantro_reg_write(vpu, &av1_force_interger_mv, 0);
1983 	else
1984 		hantro_reg_write(vpu, &av1_force_interger_mv,
1985 				 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
1986 
1987 	hantro_reg_write(vpu, &av1_blackwhite_e, 0);
1988 	hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
1989 	hantro_reg_write(vpu, &av1_delta_q_present,
1990 			 !!(ctrls->frame->quantization.flags
1991 			    & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
1992 
1993 	hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
1994 	hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
1995 	hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
1996 	hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
1997 
1998 	hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
1999 	hantro_reg_write(vpu, &av1_high_prec_mv_e,
2000 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
2001 	hantro_reg_write(vpu, &av1_comp_pred_mode,
2002 			 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
2003 	hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
2004 	hantro_reg_write(vpu, &av1_max_cb_size,
2005 			 (ctrls->sequence->flags
2006 			  & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
2007 	hantro_reg_write(vpu, &av1_min_cb_size, 3);
2008 
2009 	hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
2010 	hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
2011 	hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
2012 	hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
2013 	hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
2014 	hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
2015 	hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
2016 	hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
2017 	hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
2018 	hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
2019 	hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
2020 
2021 	hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
2022 	hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
2023 	hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
2024 	if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
2025 		hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
2026 		hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
2027 		hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
2028 	} else {
2029 		hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
2030 		hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
2031 		hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
2032 	}
2033 
2034 	hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
2035 	hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
2036 	hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
2037 
2038 	hantro_reg_write(vpu, &av1_skip_ref0,
2039 			 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
2040 	hantro_reg_write(vpu, &av1_skip_ref1,
2041 			 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
2042 
2043 	hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
2044 	hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
2045 }
2046 
2047 static void
rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx * ctx,struct vb2_v4l2_buffer * vb2_src)2048 rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
2049 					 struct vb2_v4l2_buffer *vb2_src)
2050 {
2051 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2052 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
2053 	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
2054 	    ctrls->tile_group_entry;
2055 	struct hantro_dev *vpu = ctx->dev;
2056 	dma_addr_t src_dma;
2057 	u32 src_len, src_buf_len;
2058 	int start_bit, offset;
2059 
2060 	src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
2061 	src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
2062 	src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
2063 
2064 	start_bit = (group_entry[0].tile_offset & 0xf) * 8;
2065 	offset = group_entry[0].tile_offset & ~0xf;
2066 
2067 	hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
2068 	hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
2069 	hantro_reg_write(vpu, &av1_stream_len, src_len);
2070 	hantro_reg_write(vpu, &av1_strm_start_offset, 0);
2071 	hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
2072 }
2073 
2074 static void
rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx * ctx)2075 rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
2076 {
2077 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2078 	struct hantro_dev *vpu = ctx->dev;
2079 	struct hantro_decoded_buffer *dst;
2080 	struct vb2_v4l2_buffer *vb2_dst;
2081 	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
2082 	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
2083 	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
2084 
2085 	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
2086 	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
2087 	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
2088 	chroma_addr = luma_addr + cr_offset;
2089 	mv_addr = luma_addr + mv_offset;
2090 
2091 	hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
2092 	hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
2093 	hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
2094 }
2095 
rockchip_vpu981_av1_dec_run(struct hantro_ctx * ctx)2096 int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
2097 {
2098 	struct hantro_dev *vpu = ctx->dev;
2099 	struct vb2_v4l2_buffer *vb2_src;
2100 	int ret;
2101 
2102 	hantro_start_prepare_run(ctx);
2103 
2104 	ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
2105 	if (ret)
2106 		goto prepare_error;
2107 
2108 	vb2_src = hantro_get_src_buf(ctx);
2109 	if (!vb2_src) {
2110 		ret = -EINVAL;
2111 		goto prepare_error;
2112 	}
2113 
2114 	rockchip_vpu981_av1_dec_clean_refs(ctx);
2115 	rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
2116 
2117 	rockchip_vpu981_av1_dec_set_parameters(ctx);
2118 	rockchip_vpu981_av1_dec_set_global_model(ctx);
2119 	rockchip_vpu981_av1_dec_set_tile_info(ctx);
2120 	rockchip_vpu981_av1_dec_set_reference_frames(ctx);
2121 	rockchip_vpu981_av1_dec_set_segmentation(ctx);
2122 	rockchip_vpu981_av1_dec_set_loopfilter(ctx);
2123 	rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
2124 	rockchip_vpu981_av1_dec_set_cdef(ctx);
2125 	rockchip_vpu981_av1_dec_set_lr(ctx);
2126 	rockchip_vpu981_av1_dec_set_fgs(ctx);
2127 	rockchip_vpu981_av1_dec_set_prob(ctx);
2128 
2129 	hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
2130 	hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
2131 	hantro_reg_write(vpu, &av1_write_mvs_e, 1);
2132 	hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
2133 	hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
2134 
2135 	hantro_reg_write(vpu, &av1_dec_abort_e, 0);
2136 	hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
2137 
2138 	hantro_reg_write(vpu, &av1_dec_alignment, 64);
2139 	hantro_reg_write(vpu, &av1_apf_disable, 0);
2140 	hantro_reg_write(vpu, &av1_apf_threshold, 8);
2141 	hantro_reg_write(vpu, &av1_dec_buswidth, 2);
2142 	hantro_reg_write(vpu, &av1_dec_max_burst, 16);
2143 	hantro_reg_write(vpu, &av1_error_conceal_e, 0);
2144 	hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
2145 	hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
2146 
2147 	hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
2148 	hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
2149 	hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
2150 	hantro_reg_write(vpu, &av1_timeout_override_e, 1);
2151 
2152 	rockchip_vpu981_av1_dec_set_output_buffer(ctx);
2153 	rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
2154 
2155 	hantro_end_prepare_run(ctx);
2156 
2157 	hantro_reg_write(vpu, &av1_dec_e, 1);
2158 
2159 	return 0;
2160 
2161 prepare_error:
2162 	hantro_end_prepare_run(ctx);
2163 	hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
2164 	return ret;
2165 }
2166 
rockchip_vpu981_postproc_enable(struct hantro_ctx * ctx)2167 static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
2168 {
2169 	struct hantro_dev *vpu = ctx->dev;
2170 	int width = ctx->dst_fmt.width;
2171 	int height = ctx->dst_fmt.height;
2172 	struct vb2_v4l2_buffer *vb2_dst;
2173 	size_t chroma_offset;
2174 	dma_addr_t dst_dma;
2175 
2176 	vb2_dst = hantro_get_dst_buf(ctx);
2177 
2178 	dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
2179 	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
2180 	    ctx->dst_fmt.height;
2181 
2182 	/* enable post processor */
2183 	hantro_reg_write(vpu, &av1_pp_out_e, 1);
2184 	hantro_reg_write(vpu, &av1_pp_in_format, 0);
2185 	hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
2186 	hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
2187 
2188 	hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
2189 	hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
2190 	hantro_reg_write(vpu, &av1_pp_out_height, height);
2191 	hantro_reg_write(vpu, &av1_pp_out_width, width);
2192 	hantro_reg_write(vpu, &av1_pp_out_y_stride,
2193 			 ctx->dst_fmt.plane_fmt[0].bytesperline);
2194 	hantro_reg_write(vpu, &av1_pp_out_c_stride,
2195 			 ctx->dst_fmt.plane_fmt[0].bytesperline);
2196 	switch (ctx->dst_fmt.pixelformat) {
2197 	case V4L2_PIX_FMT_P010:
2198 		hantro_reg_write(vpu, &av1_pp_out_format, 1);
2199 		break;
2200 	case V4L2_PIX_FMT_NV12:
2201 		hantro_reg_write(vpu, &av1_pp_out_format, 3);
2202 		break;
2203 	default:
2204 		hantro_reg_write(vpu, &av1_pp_out_format, 0);
2205 	}
2206 
2207 	hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
2208 	hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
2209 	hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
2210 	hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
2211 	hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
2212 	hantro_reg_write(vpu, &av1_pp_up_level, 0);
2213 	hantro_reg_write(vpu, &av1_pp_down_level, 0);
2214 	hantro_reg_write(vpu, &av1_pp_exist, 0);
2215 
2216 	hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
2217 	hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
2218 }
2219 
rockchip_vpu981_postproc_disable(struct hantro_ctx * ctx)2220 static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
2221 {
2222 	struct hantro_dev *vpu = ctx->dev;
2223 
2224 	/* disable post processor */
2225 	hantro_reg_write(vpu, &av1_pp_out_e, 0);
2226 }
2227 
2228 const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
2229 	.enable = rockchip_vpu981_postproc_enable,
2230 	.disable = rockchip_vpu981_postproc_disable,
2231 };
2232