1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2023, Collabora
4 *
5 * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
6 */
7
8 #include <media/v4l2-mem2mem.h>
9 #include "hantro.h"
10 #include "hantro_v4l2.h"
11 #include "rockchip_vpu981_regs.h"
12
13 #define AV1_DEC_MODE 17
14 #define GM_GLOBAL_MODELS_PER_FRAME 7
15 #define GLOBAL_MODEL_TOTAL_SIZE (6 * 4 + 4 * 2)
16 #define GLOBAL_MODEL_SIZE ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
17 #define AV1_MAX_TILES 128
18 #define AV1_TILE_INFO_SIZE (AV1_MAX_TILES * 16)
19 #define AV1DEC_MAX_PIC_BUFFERS 24
20 #define AV1_REF_SCALE_SHIFT 14
21 #define AV1_INVALID_IDX -1
22 #define MAX_FRAME_DISTANCE 31
23 #define AV1_PRIMARY_REF_NONE 7
24 #define AV1_TILE_SIZE ALIGN(32 * 128, 4096)
25 /*
26 * These 3 values aren't defined enum v4l2_av1_segment_feature because
27 * they are not part of the specification
28 */
29 #define V4L2_AV1_SEG_LVL_ALT_LF_Y_H 2
30 #define V4L2_AV1_SEG_LVL_ALT_LF_U 3
31 #define V4L2_AV1_SEG_LVL_ALT_LF_V 4
32
33 #define SUPERRES_SCALE_BITS 3
34 #define SCALE_NUMERATOR 8
35 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
36
37 #define RS_SUBPEL_BITS 6
38 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
39 #define RS_SCALE_SUBPEL_BITS 14
40 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
41 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
42 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
43
44 #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
45
46 #define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
47 #define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
48 #define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
49 #define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
50 #define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
51 #define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
52 #define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
53
54 #define DIV_LUT_PREC_BITS 14
55 #define DIV_LUT_BITS 8
56 #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
57 #define WARP_PARAM_REDUCE_BITS 6
58 #define WARPEDMODEL_PREC_BITS 16
59
60 #define AV1_DIV_ROUND_UP_POW2(value, n) \
61 ({ \
62 typeof(n) _n = n; \
63 typeof(value) _value = value; \
64 (_value + (BIT(_n) >> 1)) >> _n; \
65 })
66
67 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \
68 ({ \
69 typeof(n) _n_ = n; \
70 typeof(value) _value_ = value; \
71 (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \
72 : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \
73 })
74
75 struct rockchip_av1_film_grain {
76 u8 scaling_lut_y[256];
77 u8 scaling_lut_cb[256];
78 u8 scaling_lut_cr[256];
79 s16 cropped_luma_grain_block[4096];
80 s16 cropped_chroma_grain_block[1024 * 2];
81 };
82
83 static const short div_lut[DIV_LUT_NUM + 1] = {
84 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
85 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
86 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
87 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
88 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
89 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
90 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
91 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
92 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
93 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
94 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
95 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
96 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
97 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
98 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
99 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
100 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
101 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
102 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
103 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
104 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
105 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
106 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
107 8240, 8224, 8208, 8192,
108 };
109
rockchip_vpu981_get_frame_index(struct hantro_ctx * ctx,int ref)110 static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
111 {
112 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
113 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
114 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
115 u64 timestamp;
116 int i, idx = frame->ref_frame_idx[ref];
117
118 if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
119 return AV1_INVALID_IDX;
120
121 timestamp = frame->reference_frame_ts[idx];
122 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
123 if (!av1_dec->frame_refs[i].used)
124 continue;
125 if (av1_dec->frame_refs[i].timestamp == timestamp)
126 return i;
127 }
128
129 return AV1_INVALID_IDX;
130 }
131
rockchip_vpu981_get_order_hint(struct hantro_ctx * ctx,int ref)132 static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
133 {
134 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
135 int idx = rockchip_vpu981_get_frame_index(ctx, ref);
136
137 if (idx != AV1_INVALID_IDX)
138 return av1_dec->frame_refs[idx].order_hint;
139
140 return 0;
141 }
142
rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx * ctx,u64 timestamp)143 static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
144 u64 timestamp)
145 {
146 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
147 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
148 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
149 int i;
150
151 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
152 int j;
153
154 if (av1_dec->frame_refs[i].used)
155 continue;
156
157 av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
158 av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
159 av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
160 av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
161 av1_dec->frame_refs[i].timestamp = timestamp;
162 av1_dec->frame_refs[i].frame_type = frame->frame_type;
163 av1_dec->frame_refs[i].order_hint = frame->order_hint;
164 if (!av1_dec->frame_refs[i].vb2_ref)
165 av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
166
167 for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
168 av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
169 av1_dec->frame_refs[i].used = true;
170 av1_dec->current_frame_index = i;
171
172 return i;
173 }
174
175 return AV1_INVALID_IDX;
176 }
177
rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx * ctx,int idx)178 static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
179 {
180 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
181
182 if (idx >= 0)
183 av1_dec->frame_refs[idx].used = false;
184 }
185
rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx * ctx)186 static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
187 {
188 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
189 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
190
191 int ref, idx;
192
193 for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
194 u64 timestamp = av1_dec->frame_refs[idx].timestamp;
195 bool used = false;
196
197 if (!av1_dec->frame_refs[idx].used)
198 continue;
199
200 for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
201 if (ctrls->frame->reference_frame_ts[ref] == timestamp)
202 used = true;
203 }
204
205 if (!used)
206 rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
207 }
208 }
209
rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx * ctx)210 static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
211 {
212 return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
213 }
214
rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx * ctx)215 static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
216 {
217 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
218
219 return ALIGN((cr_offset * 3) / 2, 64);
220 }
221
rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx * ctx)222 static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
223 {
224 struct hantro_dev *vpu = ctx->dev;
225 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
226
227 if (av1_dec->db_data_col.cpu)
228 dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
229 av1_dec->db_data_col.cpu,
230 av1_dec->db_data_col.dma);
231 av1_dec->db_data_col.cpu = NULL;
232
233 if (av1_dec->db_ctrl_col.cpu)
234 dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
235 av1_dec->db_ctrl_col.cpu,
236 av1_dec->db_ctrl_col.dma);
237 av1_dec->db_ctrl_col.cpu = NULL;
238
239 if (av1_dec->cdef_col.cpu)
240 dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
241 av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
242 av1_dec->cdef_col.cpu = NULL;
243
244 if (av1_dec->sr_col.cpu)
245 dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
246 av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
247 av1_dec->sr_col.cpu = NULL;
248
249 if (av1_dec->lr_col.cpu)
250 dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
251 av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
252 av1_dec->lr_col.cpu = NULL;
253 }
254
rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx * ctx)255 static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
256 {
257 struct hantro_dev *vpu = ctx->dev;
258 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
259 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
260 unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
261 unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
262 unsigned int height_in_sb = height / 64;
263 unsigned int stripe_num = ((height + 8) + 63) / 64;
264 size_t size;
265
266 if (av1_dec->db_data_col.size >=
267 ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
268 return 0;
269
270 rockchip_vpu981_av1_dec_tiles_free(ctx);
271
272 size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
273 av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
274 &av1_dec->db_data_col.dma,
275 GFP_KERNEL);
276 if (!av1_dec->db_data_col.cpu)
277 goto buffer_allocation_error;
278 av1_dec->db_data_col.size = size;
279
280 size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
281 av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
282 &av1_dec->db_ctrl_col.dma,
283 GFP_KERNEL);
284 if (!av1_dec->db_ctrl_col.cpu)
285 goto buffer_allocation_error;
286 av1_dec->db_ctrl_col.size = size;
287
288 size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
289 av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
290 &av1_dec->cdef_col.dma,
291 GFP_KERNEL);
292 if (!av1_dec->cdef_col.cpu)
293 goto buffer_allocation_error;
294 av1_dec->cdef_col.size = size;
295
296 size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
297 av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
298 &av1_dec->sr_col.dma,
299 GFP_KERNEL);
300 if (!av1_dec->sr_col.cpu)
301 goto buffer_allocation_error;
302 av1_dec->sr_col.size = size;
303
304 size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
305 av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
306 &av1_dec->lr_col.dma,
307 GFP_KERNEL);
308 if (!av1_dec->lr_col.cpu)
309 goto buffer_allocation_error;
310 av1_dec->lr_col.size = size;
311
312 av1_dec->num_tile_cols_allocated = num_tile_cols;
313 return 0;
314
315 buffer_allocation_error:
316 rockchip_vpu981_av1_dec_tiles_free(ctx);
317 return -ENOMEM;
318 }
319
rockchip_vpu981_av1_dec_exit(struct hantro_ctx * ctx)320 void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
321 {
322 struct hantro_dev *vpu = ctx->dev;
323 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
324
325 if (av1_dec->global_model.cpu)
326 dma_free_coherent(vpu->dev, av1_dec->global_model.size,
327 av1_dec->global_model.cpu,
328 av1_dec->global_model.dma);
329 av1_dec->global_model.cpu = NULL;
330
331 if (av1_dec->tile_info.cpu)
332 dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
333 av1_dec->tile_info.cpu,
334 av1_dec->tile_info.dma);
335 av1_dec->tile_info.cpu = NULL;
336
337 if (av1_dec->film_grain.cpu)
338 dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
339 av1_dec->film_grain.cpu,
340 av1_dec->film_grain.dma);
341 av1_dec->film_grain.cpu = NULL;
342
343 if (av1_dec->prob_tbl.cpu)
344 dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
345 av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
346 av1_dec->prob_tbl.cpu = NULL;
347
348 if (av1_dec->prob_tbl_out.cpu)
349 dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
350 av1_dec->prob_tbl_out.cpu,
351 av1_dec->prob_tbl_out.dma);
352 av1_dec->prob_tbl_out.cpu = NULL;
353
354 if (av1_dec->tile_buf.cpu)
355 dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
356 av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
357 av1_dec->tile_buf.cpu = NULL;
358
359 rockchip_vpu981_av1_dec_tiles_free(ctx);
360 }
361
rockchip_vpu981_av1_dec_init(struct hantro_ctx * ctx)362 int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
363 {
364 struct hantro_dev *vpu = ctx->dev;
365 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
366
367 memset(av1_dec, 0, sizeof(*av1_dec));
368
369 av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
370 &av1_dec->global_model.dma,
371 GFP_KERNEL);
372 if (!av1_dec->global_model.cpu)
373 return -ENOMEM;
374 av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
375
376 av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
377 &av1_dec->tile_info.dma,
378 GFP_KERNEL);
379 if (!av1_dec->tile_info.cpu)
380 return -ENOMEM;
381 av1_dec->tile_info.size = AV1_MAX_TILES;
382
383 av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
384 ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
385 &av1_dec->film_grain.dma,
386 GFP_KERNEL);
387 if (!av1_dec->film_grain.cpu)
388 return -ENOMEM;
389 av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
390
391 av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
392 ALIGN(sizeof(struct av1cdfs), 2048),
393 &av1_dec->prob_tbl.dma,
394 GFP_KERNEL);
395 if (!av1_dec->prob_tbl.cpu)
396 return -ENOMEM;
397 av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
398
399 av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
400 ALIGN(sizeof(struct av1cdfs), 2048),
401 &av1_dec->prob_tbl_out.dma,
402 GFP_KERNEL);
403 if (!av1_dec->prob_tbl_out.cpu)
404 return -ENOMEM;
405 av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
406 av1_dec->cdfs = &av1_dec->default_cdfs;
407 av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
408
409 rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
410
411 av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
412 AV1_TILE_SIZE,
413 &av1_dec->tile_buf.dma,
414 GFP_KERNEL);
415 if (!av1_dec->tile_buf.cpu)
416 return -ENOMEM;
417 av1_dec->tile_buf.size = AV1_TILE_SIZE;
418
419 return 0;
420 }
421
rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx * ctx)422 static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
423 {
424 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
425 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
426
427 ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
428 if (WARN_ON(!ctrls->sequence))
429 return -EINVAL;
430
431 ctrls->tile_group_entry =
432 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
433 if (WARN_ON(!ctrls->tile_group_entry))
434 return -EINVAL;
435
436 ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
437 if (WARN_ON(!ctrls->frame))
438 return -EINVAL;
439
440 ctrls->film_grain =
441 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
442
443 return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
444 }
445
rockchip_vpu981_av1_dec_get_msb(u32 n)446 static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
447 {
448 if (n == 0)
449 return 0;
450 return 31 ^ __builtin_clz(n);
451 }
452
rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d,short * shift)453 static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
454 {
455 int f;
456 u64 e;
457
458 *shift = rockchip_vpu981_av1_dec_get_msb(d);
459 /* e is obtained from D after resetting the most significant 1 bit. */
460 e = d - ((u32)1 << *shift);
461 /* Get the most significant DIV_LUT_BITS (8) bits of e into f */
462 if (*shift > DIV_LUT_BITS)
463 f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
464 else
465 f = e << (DIV_LUT_BITS - *shift);
466 if (f > DIV_LUT_NUM)
467 return -1;
468 *shift += DIV_LUT_PREC_BITS;
469 /* Use f as lookup into the precomputed table of multipliers */
470 return div_lut[f];
471 }
472
473 static void
rockchip_vpu981_av1_dec_get_shear_params(const u32 * params,s64 * alpha,s64 * beta,s64 * gamma,s64 * delta)474 rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
475 s64 *beta, s64 *gamma, s64 *delta)
476 {
477 const int *mat = params;
478 short shift;
479 short y;
480 long long gv, dv;
481
482 if (mat[2] <= 0)
483 return;
484
485 *alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
486 *beta = clamp_val(mat[3], S16_MIN, S16_MAX);
487
488 y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
489
490 gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
491
492 *gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
493
494 dv = ((long long)mat[3] * mat[4]) * y;
495 *delta = clamp_val(mat[5] -
496 (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
497 S16_MIN, S16_MAX);
498
499 *alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
500 * (1 << WARP_PARAM_REDUCE_BITS);
501 *beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
502 * (1 << WARP_PARAM_REDUCE_BITS);
503 *gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
504 * (1 << WARP_PARAM_REDUCE_BITS);
505 *delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
506 * (1 << WARP_PARAM_REDUCE_BITS);
507 }
508
rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx * ctx)509 static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
510 {
511 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
512 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
513 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
514 const struct v4l2_av1_global_motion *gm = &frame->global_motion;
515 u8 *dst = av1_dec->global_model.cpu;
516 struct hantro_dev *vpu = ctx->dev;
517 int ref_frame, i;
518
519 memset(dst, 0, GLOBAL_MODEL_SIZE);
520 for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
521 s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
522
523 for (i = 0; i < 6; ++i) {
524 if (i == 2)
525 *(s32 *)dst =
526 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
527 else if (i == 3)
528 *(s32 *)dst =
529 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
530 else
531 *(s32 *)dst =
532 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
533 dst += 4;
534 }
535
536 if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
537 rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
538 &alpha, &beta, &gamma, &delta);
539
540 *(s16 *)dst = alpha;
541 dst += 2;
542 *(s16 *)dst = beta;
543 dst += 2;
544 *(s16 *)dst = gamma;
545 dst += 2;
546 *(s16 *)dst = delta;
547 dst += 2;
548 }
549
550 hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
551 }
552
rockchip_vpu981_av1_tile_log2(int target)553 static int rockchip_vpu981_av1_tile_log2(int target)
554 {
555 int k;
556
557 /*
558 * returns the smallest value for k such that 1 << k is greater
559 * than or equal to target
560 */
561 for (k = 0; (1 << k) < target; k++);
562
563 return k;
564 }
565
rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx * ctx)566 static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
567 {
568 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
569 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
570 const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
571 const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
572 ctrls->tile_group_entry;
573 int context_update_y =
574 tile_info->context_update_tile_id / tile_info->tile_cols;
575 int context_update_x =
576 tile_info->context_update_tile_id % tile_info->tile_cols;
577 int context_update_tile_id =
578 context_update_x * tile_info->tile_rows + context_update_y;
579 u8 *dst = av1_dec->tile_info.cpu;
580 struct hantro_dev *vpu = ctx->dev;
581 int tile0, tile1;
582
583 memset(dst, 0, av1_dec->tile_info.size);
584
585 for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
586 for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
587 int tile_id = tile1 * tile_info->tile_cols + tile0;
588 u32 start, end;
589 u32 y0 =
590 tile_info->height_in_sbs_minus_1[tile1] + 1;
591 u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
592
593 /* tile size in SB units (width,height) */
594 *dst++ = x0;
595 *dst++ = 0;
596 *dst++ = 0;
597 *dst++ = 0;
598 *dst++ = y0;
599 *dst++ = 0;
600 *dst++ = 0;
601 *dst++ = 0;
602
603 /* tile start position */
604 start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
605 *dst++ = start & 255;
606 *dst++ = (start >> 8) & 255;
607 *dst++ = (start >> 16) & 255;
608 *dst++ = (start >> 24) & 255;
609
610 /* number of bytes in tile data */
611 end = start + group_entry[tile_id].tile_size;
612 *dst++ = end & 255;
613 *dst++ = (end >> 8) & 255;
614 *dst++ = (end >> 16) & 255;
615 *dst++ = (end >> 24) & 255;
616 }
617 }
618
619 hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
620 hantro_reg_write(vpu, &av1_tile_enable,
621 !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
622 hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
623 hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
624 hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
625 hantro_reg_write(vpu, &av1_tile_transpose, 1);
626 if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
627 rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
628 hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
629 else
630 hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
631
632 hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
633 }
634
rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx * ctx,int a,int b)635 static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
636 int a, int b)
637 {
638 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
639 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
640 int bits = ctrls->sequence->order_hint_bits - 1;
641 int diff, m;
642
643 if (!ctrls->sequence->order_hint_bits)
644 return 0;
645
646 diff = a - b;
647 m = 1 << bits;
648 diff = (diff & (m - 1)) - (diff & m);
649
650 return diff;
651 }
652
rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx * ctx)653 static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
654 {
655 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
656 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
657 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
658 const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
659 int i;
660
661 if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
662 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
663 av1_dec->ref_frame_sign_bias[i] = 0;
664
665 return;
666 }
667 // Identify the nearest forward and backward references.
668 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
669 if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
670 int rel_off =
671 rockchip_vpu981_av1_dec_get_dist(ctx,
672 rockchip_vpu981_get_order_hint(ctx, i),
673 frame->order_hint);
674 av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
675 }
676 }
677 }
678
679 static bool
rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx * ctx,int ref,int idx,int width,int height)680 rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
681 int width, int height)
682 {
683 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
684 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
685 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
686 struct hantro_dev *vpu = ctx->dev;
687 struct hantro_decoded_buffer *dst;
688 dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
689 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
690 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
691 int cur_width = frame->frame_width_minus_1 + 1;
692 int cur_height = frame->frame_height_minus_1 + 1;
693 int scale_width =
694 ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
695 int scale_height =
696 ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
697
698 switch (ref) {
699 case 0:
700 hantro_reg_write(vpu, &av1_ref0_height, height);
701 hantro_reg_write(vpu, &av1_ref0_width, width);
702 hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
703 hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
704 break;
705 case 1:
706 hantro_reg_write(vpu, &av1_ref1_height, height);
707 hantro_reg_write(vpu, &av1_ref1_width, width);
708 hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
709 hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
710 break;
711 case 2:
712 hantro_reg_write(vpu, &av1_ref2_height, height);
713 hantro_reg_write(vpu, &av1_ref2_width, width);
714 hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
715 hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
716 break;
717 case 3:
718 hantro_reg_write(vpu, &av1_ref3_height, height);
719 hantro_reg_write(vpu, &av1_ref3_width, width);
720 hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
721 hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
722 break;
723 case 4:
724 hantro_reg_write(vpu, &av1_ref4_height, height);
725 hantro_reg_write(vpu, &av1_ref4_width, width);
726 hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
727 hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
728 break;
729 case 5:
730 hantro_reg_write(vpu, &av1_ref5_height, height);
731 hantro_reg_write(vpu, &av1_ref5_width, width);
732 hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
733 hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
734 break;
735 case 6:
736 hantro_reg_write(vpu, &av1_ref6_height, height);
737 hantro_reg_write(vpu, &av1_ref6_width, width);
738 hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
739 hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
740 break;
741 default:
742 pr_warn("AV1 invalid reference frame index\n");
743 }
744
745 dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
746 luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
747 chroma_addr = luma_addr + cr_offset;
748 mv_addr = luma_addr + mv_offset;
749
750 hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
751 hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
752 hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
753
754 return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
755 (scale_height != (1 << AV1_REF_SCALE_SHIFT));
756 }
757
rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx * ctx,int ref,int val)758 static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
759 int ref, int val)
760 {
761 struct hantro_dev *vpu = ctx->dev;
762
763 switch (ref) {
764 case 0:
765 hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
766 break;
767 case 1:
768 hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
769 break;
770 case 2:
771 hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
772 break;
773 case 3:
774 hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
775 break;
776 case 4:
777 hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
778 break;
779 case 5:
780 hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
781 break;
782 case 6:
783 hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
784 break;
785 default:
786 pr_warn("AV1 invalid sign bias index\n");
787 break;
788 }
789 }
790
rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx * ctx)791 static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
792 {
793 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
794 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
795 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
796 const struct v4l2_av1_segmentation *seg = &frame->segmentation;
797 u32 segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
798 struct hantro_dev *vpu = ctx->dev;
799 u8 segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
800
801 if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
802 frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
803 int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
804
805 if (idx >= 0) {
806 dma_addr_t luma_addr, mv_addr = 0;
807 struct hantro_decoded_buffer *seg;
808 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
809
810 seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
811 luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
812 mv_addr = luma_addr + mv_offset;
813
814 hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
815 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
816 }
817 }
818
819 hantro_reg_write(vpu, &av1_segment_temp_upd_e,
820 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
821 hantro_reg_write(vpu, &av1_segment_upd_e,
822 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
823 hantro_reg_write(vpu, &av1_segment_e,
824 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
825
826 hantro_reg_write(vpu, &av1_error_resilient,
827 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
828
829 if (IS_INTRA(frame->frame_type) ||
830 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
831 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
832 }
833
834 if (seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) {
835 int s;
836
837 for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
838 if (seg->feature_enabled[s] &
839 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
840 segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
841 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
842 0, 255);
843 segsign |=
844 (seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
845 }
846
847 if (seg->feature_enabled[s] &
848 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
849 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
850 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
851 -63, 63);
852
853 if (seg->feature_enabled[s] &
854 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
855 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
856 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
857 -63, 63);
858
859 if (seg->feature_enabled[s] &
860 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
861 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
862 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
863 -63, 63);
864
865 if (seg->feature_enabled[s] &
866 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
867 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
868 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
869 -63, 63);
870
871 if (frame->frame_type && seg->feature_enabled[s] &
872 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
873 segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
874
875 if (seg->feature_enabled[s] &
876 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
877 segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
878
879 if (seg->feature_enabled[s] &
880 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
881 segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
882 }
883 }
884
885 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
886 for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
887 if (seg->feature_enabled[i]
888 & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
889 preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
890 last_active_seg = max(i, last_active_seg);
891 }
892 }
893 }
894
895 hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
896 hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
897
898 hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
899
900 /* Write QP, filter level, ref frame and skip for every segment */
901 hantro_reg_write(vpu, &av1_quant_seg0,
902 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
903 hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
904 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
905 hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
906 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
907 hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
908 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
909 hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
910 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
911 hantro_reg_write(vpu, &av1_refpic_seg0,
912 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
913 hantro_reg_write(vpu, &av1_skip_seg0,
914 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
915 hantro_reg_write(vpu, &av1_global_mv_seg0,
916 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
917
918 hantro_reg_write(vpu, &av1_quant_seg1,
919 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
920 hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
921 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
922 hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
923 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
924 hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
925 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
926 hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
927 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
928 hantro_reg_write(vpu, &av1_refpic_seg1,
929 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
930 hantro_reg_write(vpu, &av1_skip_seg1,
931 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
932 hantro_reg_write(vpu, &av1_global_mv_seg1,
933 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
934
935 hantro_reg_write(vpu, &av1_quant_seg2,
936 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
937 hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
938 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
939 hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
940 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
941 hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
942 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
943 hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
944 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
945 hantro_reg_write(vpu, &av1_refpic_seg2,
946 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
947 hantro_reg_write(vpu, &av1_skip_seg2,
948 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
949 hantro_reg_write(vpu, &av1_global_mv_seg2,
950 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
951
952 hantro_reg_write(vpu, &av1_quant_seg3,
953 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
954 hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
955 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
956 hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
957 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
958 hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
959 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
960 hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
961 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
962 hantro_reg_write(vpu, &av1_refpic_seg3,
963 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
964 hantro_reg_write(vpu, &av1_skip_seg3,
965 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
966 hantro_reg_write(vpu, &av1_global_mv_seg3,
967 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
968
969 hantro_reg_write(vpu, &av1_quant_seg4,
970 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
971 hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
972 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
973 hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
974 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
975 hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
976 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
977 hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
978 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
979 hantro_reg_write(vpu, &av1_refpic_seg4,
980 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
981 hantro_reg_write(vpu, &av1_skip_seg4,
982 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
983 hantro_reg_write(vpu, &av1_global_mv_seg4,
984 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
985
986 hantro_reg_write(vpu, &av1_quant_seg5,
987 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
988 hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
989 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
990 hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
991 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
992 hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
993 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
994 hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
995 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
996 hantro_reg_write(vpu, &av1_refpic_seg5,
997 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
998 hantro_reg_write(vpu, &av1_skip_seg5,
999 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
1000 hantro_reg_write(vpu, &av1_global_mv_seg5,
1001 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1002
1003 hantro_reg_write(vpu, &av1_quant_seg6,
1004 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
1005 hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
1006 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1007 hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
1008 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1009 hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
1010 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1011 hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
1012 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1013 hantro_reg_write(vpu, &av1_refpic_seg6,
1014 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
1015 hantro_reg_write(vpu, &av1_skip_seg6,
1016 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
1017 hantro_reg_write(vpu, &av1_global_mv_seg6,
1018 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1019
1020 hantro_reg_write(vpu, &av1_quant_seg7,
1021 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
1022 hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
1023 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1024 hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
1025 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1026 hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
1027 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1028 hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
1029 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1030 hantro_reg_write(vpu, &av1_refpic_seg7,
1031 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
1032 hantro_reg_write(vpu, &av1_skip_seg7,
1033 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
1034 hantro_reg_write(vpu, &av1_global_mv_seg7,
1035 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1036 }
1037
rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx * ctx)1038 static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
1039 {
1040 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1041 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1042 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1043 const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
1044 const struct v4l2_av1_quantization *quantization = &frame->quantization;
1045 int i;
1046
1047 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1048 int qindex = quantization->base_q_idx;
1049
1050 if (segmentation->feature_enabled[i] &
1051 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
1052 qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
1053 }
1054 qindex = clamp(qindex, 0, 255);
1055
1056 if (qindex ||
1057 quantization->delta_q_y_dc ||
1058 quantization->delta_q_u_dc ||
1059 quantization->delta_q_u_ac ||
1060 quantization->delta_q_v_dc ||
1061 quantization->delta_q_v_ac)
1062 return false;
1063 }
1064 return true;
1065 }
1066
rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx * ctx)1067 static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
1068 {
1069 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1070 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1071 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1072 const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
1073 bool filtering_dis = (loop_filter->level[0] == 0) && (loop_filter->level[1] == 0);
1074 struct hantro_dev *vpu = ctx->dev;
1075
1076 hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
1077 hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
1078 hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
1079
1080 hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
1081 hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
1082 hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
1083 hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
1084
1085 if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
1086 !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
1087 !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
1088 hantro_reg_write(vpu, &av1_filt_ref_adj_0,
1089 loop_filter->ref_deltas[0]);
1090 hantro_reg_write(vpu, &av1_filt_ref_adj_1,
1091 loop_filter->ref_deltas[1]);
1092 hantro_reg_write(vpu, &av1_filt_ref_adj_2,
1093 loop_filter->ref_deltas[2]);
1094 hantro_reg_write(vpu, &av1_filt_ref_adj_3,
1095 loop_filter->ref_deltas[3]);
1096 hantro_reg_write(vpu, &av1_filt_ref_adj_4,
1097 loop_filter->ref_deltas[4]);
1098 hantro_reg_write(vpu, &av1_filt_ref_adj_5,
1099 loop_filter->ref_deltas[5]);
1100 hantro_reg_write(vpu, &av1_filt_ref_adj_6,
1101 loop_filter->ref_deltas[6]);
1102 hantro_reg_write(vpu, &av1_filt_ref_adj_7,
1103 loop_filter->ref_deltas[7]);
1104 hantro_reg_write(vpu, &av1_filt_mb_adj_0,
1105 loop_filter->mode_deltas[0]);
1106 hantro_reg_write(vpu, &av1_filt_mb_adj_1,
1107 loop_filter->mode_deltas[1]);
1108 } else {
1109 hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
1110 hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
1111 hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
1112 hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
1113 hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
1114 hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
1115 hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
1116 hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
1117 hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
1118 hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
1119 }
1120
1121 hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
1122 hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
1123 }
1124
rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx * ctx)1125 static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
1126 {
1127 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1128 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1129 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1130 bool frame_is_intra = IS_INTRA(frame->frame_type);
1131 struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
1132 int i;
1133
1134 if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
1135 return;
1136
1137 for (i = 0; i < NUM_REF_FRAMES; i++) {
1138 if (frame->refresh_frame_flags & BIT(i)) {
1139 struct mvcdfs stored_mv_cdf;
1140
1141 rockchip_av1_get_cdfs(ctx, i);
1142 stored_mv_cdf = av1_dec->cdfs->mv_cdf;
1143 *av1_dec->cdfs = *out_cdfs;
1144 if (frame_is_intra) {
1145 av1_dec->cdfs->mv_cdf = stored_mv_cdf;
1146 *av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
1147 }
1148 rockchip_av1_store_cdfs(ctx,
1149 frame->refresh_frame_flags);
1150 break;
1151 }
1152 }
1153 }
1154
rockchip_vpu981_av1_dec_done(struct hantro_ctx * ctx)1155 void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
1156 {
1157 rockchip_vpu981_av1_dec_update_prob(ctx);
1158 }
1159
rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx * ctx)1160 static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
1161 {
1162 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1163 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1164 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1165 const struct v4l2_av1_quantization *quantization = &frame->quantization;
1166 struct hantro_dev *vpu = ctx->dev;
1167 bool error_resilient_mode =
1168 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
1169 bool frame_is_intra = IS_INTRA(frame->frame_type);
1170
1171 if (error_resilient_mode || frame_is_intra ||
1172 frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
1173 av1_dec->cdfs = &av1_dec->default_cdfs;
1174 av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
1175 rockchip_av1_default_coeff_probs(quantization->base_q_idx,
1176 av1_dec->cdfs);
1177 } else {
1178 rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
1179 }
1180 rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
1181
1182 memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
1183
1184 if (frame_is_intra) {
1185 int mv_offset = offsetof(struct av1cdfs, mv_cdf);
1186 /* Overwrite MV context area with intrabc MV context */
1187 memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
1188 sizeof(struct mvcdfs));
1189 }
1190
1191 hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
1192 hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
1193 }
1194
1195 static void
rockchip_vpu981_av1_dec_init_scaling_function(const u8 * values,const u8 * scaling,u8 num_points,u8 * scaling_lut)1196 rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
1197 u8 num_points, u8 *scaling_lut)
1198 {
1199 int i, point;
1200
1201 if (num_points == 0) {
1202 memset(scaling_lut, 0, 256);
1203 return;
1204 }
1205
1206 for (point = 0; point < num_points - 1; point++) {
1207 int x;
1208 s32 delta_y = scaling[point + 1] - scaling[point];
1209 s32 delta_x = values[point + 1] - values[point];
1210 s64 delta =
1211 delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
1212 delta_x) : 0;
1213
1214 for (x = 0; x < delta_x; x++) {
1215 scaling_lut[values[point] + x] =
1216 scaling[point] +
1217 (s32)((x * delta + 32768) >> 16);
1218 }
1219 }
1220
1221 for (i = values[num_points - 1]; i < 256; i++)
1222 scaling_lut[i] = scaling[num_points - 1];
1223 }
1224
rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx * ctx)1225 static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
1226 {
1227 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1228 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1229 const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
1230 struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
1231 struct hantro_dev *vpu = ctx->dev;
1232 bool scaling_from_luma =
1233 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
1234 s32 (*ar_coeffs_y)[24];
1235 s32 (*ar_coeffs_cb)[25];
1236 s32 (*ar_coeffs_cr)[25];
1237 s32 (*luma_grain_block)[73][82];
1238 s32 (*cb_grain_block)[38][44];
1239 s32 (*cr_grain_block)[38][44];
1240 s32 ar_coeff_lag, ar_coeff_shift;
1241 s32 grain_scale_shift, bitdepth;
1242 s32 grain_center, grain_min, grain_max;
1243 int i, j;
1244
1245 hantro_reg_write(vpu, &av1_apply_grain, 0);
1246
1247 if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
1248 hantro_reg_write(vpu, &av1_num_y_points_b, 0);
1249 hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
1250 hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
1251 hantro_reg_write(vpu, &av1_scaling_shift, 0);
1252 hantro_reg_write(vpu, &av1_cb_mult, 0);
1253 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1254 hantro_reg_write(vpu, &av1_cb_offset, 0);
1255 hantro_reg_write(vpu, &av1_cr_mult, 0);
1256 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1257 hantro_reg_write(vpu, &av1_cr_offset, 0);
1258 hantro_reg_write(vpu, &av1_overlap_flag, 0);
1259 hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
1260 hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
1261 hantro_reg_write(vpu, &av1_random_seed, 0);
1262 hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
1263 return;
1264 }
1265
1266 ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
1267 ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1268 ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1269 luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
1270 cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1271 cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1272
1273 if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr ||
1274 !luma_grain_block || !cb_grain_block || !cr_grain_block) {
1275 pr_warn("Fail allocating memory for film grain parameters\n");
1276 goto alloc_fail;
1277 }
1278
1279 hantro_reg_write(vpu, &av1_apply_grain, 1);
1280
1281 hantro_reg_write(vpu, &av1_num_y_points_b,
1282 film_grain->num_y_points > 0);
1283 hantro_reg_write(vpu, &av1_num_cb_points_b,
1284 film_grain->num_cb_points > 0);
1285 hantro_reg_write(vpu, &av1_num_cr_points_b,
1286 film_grain->num_cr_points > 0);
1287 hantro_reg_write(vpu, &av1_scaling_shift,
1288 film_grain->grain_scaling_minus_8 + 8);
1289
1290 if (!scaling_from_luma) {
1291 hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
1292 hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
1293 hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
1294 hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
1295 hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
1296 hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
1297 } else {
1298 hantro_reg_write(vpu, &av1_cb_mult, 0);
1299 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1300 hantro_reg_write(vpu, &av1_cb_offset, 0);
1301 hantro_reg_write(vpu, &av1_cr_mult, 0);
1302 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1303 hantro_reg_write(vpu, &av1_cr_offset, 0);
1304 }
1305
1306 hantro_reg_write(vpu, &av1_overlap_flag,
1307 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
1308 hantro_reg_write(vpu, &av1_clip_to_restricted_range,
1309 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
1310 hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
1311 hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
1312
1313 rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
1314 film_grain->point_y_scaling,
1315 film_grain->num_y_points,
1316 fgmem->scaling_lut_y);
1317
1318 if (film_grain->flags &
1319 V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
1320 memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
1321 sizeof(*fgmem->scaling_lut_y) * 256);
1322 memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
1323 sizeof(*fgmem->scaling_lut_y) * 256);
1324 } else {
1325 rockchip_vpu981_av1_dec_init_scaling_function
1326 (film_grain->point_cb_value, film_grain->point_cb_scaling,
1327 film_grain->num_cb_points, fgmem->scaling_lut_cb);
1328 rockchip_vpu981_av1_dec_init_scaling_function
1329 (film_grain->point_cr_value, film_grain->point_cr_scaling,
1330 film_grain->num_cr_points, fgmem->scaling_lut_cr);
1331 }
1332
1333 for (i = 0; i < V4L2_AV1_AR_COEFFS_SIZE; i++) {
1334 if (i < 24)
1335 (*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
1336 (*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
1337 (*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
1338 }
1339
1340 ar_coeff_lag = film_grain->ar_coeff_lag;
1341 ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
1342 grain_scale_shift = film_grain->grain_scale_shift;
1343 bitdepth = ctx->bit_depth;
1344 grain_center = 128 << (bitdepth - 8);
1345 grain_min = 0 - grain_center;
1346 grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1347
1348 rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
1349 film_grain->num_y_points, grain_scale_shift,
1350 ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
1351 grain_min, grain_max, film_grain->grain_seed);
1352
1353 rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
1354 cr_grain_block, bitdepth,
1355 film_grain->num_y_points,
1356 film_grain->num_cb_points,
1357 film_grain->num_cr_points,
1358 grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1359 ar_coeffs_cr, ar_coeff_shift, grain_min,
1360 grain_max,
1361 scaling_from_luma,
1362 film_grain->grain_seed);
1363
1364 for (i = 0; i < 64; i++) {
1365 for (j = 0; j < 64; j++)
1366 fgmem->cropped_luma_grain_block[i * 64 + j] =
1367 (*luma_grain_block)[i + 9][j + 9];
1368 }
1369
1370 for (i = 0; i < 32; i++) {
1371 for (j = 0; j < 32; j++) {
1372 fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
1373 (*cb_grain_block)[i + 6][j + 6];
1374 fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1375 (*cr_grain_block)[i + 6][j + 6];
1376 }
1377 }
1378
1379 hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
1380
1381 alloc_fail:
1382 kfree(ar_coeffs_y);
1383 kfree(ar_coeffs_cb);
1384 kfree(ar_coeffs_cr);
1385 kfree(luma_grain_block);
1386 kfree(cb_grain_block);
1387 kfree(cr_grain_block);
1388 }
1389
rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx * ctx)1390 static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
1391 {
1392 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1393 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1394 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1395 const struct v4l2_av1_cdef *cdef = &frame->cdef;
1396 struct hantro_dev *vpu = ctx->dev;
1397 u32 luma_pri_strength = 0;
1398 u16 luma_sec_strength = 0;
1399 u32 chroma_pri_strength = 0;
1400 u16 chroma_sec_strength = 0;
1401 int i;
1402
1403 hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
1404 hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
1405
1406 for (i = 0; i < BIT(cdef->bits); i++) {
1407 luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
1408 if (cdef->y_sec_strength[i] == 4)
1409 luma_sec_strength |= 3 << (i * 2);
1410 else
1411 luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
1412
1413 chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
1414 if (cdef->uv_sec_strength[i] == 4)
1415 chroma_sec_strength |= 3 << (i * 2);
1416 else
1417 chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
1418 }
1419
1420 hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
1421 luma_pri_strength);
1422 hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
1423 luma_sec_strength);
1424 hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
1425 chroma_pri_strength);
1426 hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
1427 chroma_sec_strength);
1428
1429 hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
1430 }
1431
rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx * ctx)1432 static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
1433 {
1434 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1435 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1436 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1437 const struct v4l2_av1_loop_restoration *loop_restoration =
1438 &frame->loop_restoration;
1439 struct hantro_dev *vpu = ctx->dev;
1440 u16 lr_type = 0, lr_unit_size = 0;
1441 u8 restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
1442 int i;
1443
1444 if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
1445 restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
1446 restoration_unit_size[1] =
1447 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1448 restoration_unit_size[2] =
1449 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1450 }
1451
1452 for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1453 lr_type |=
1454 loop_restoration->frame_restoration_type[i] << (i * 2);
1455 lr_unit_size |= restoration_unit_size[i] << (i * 2);
1456 }
1457
1458 hantro_reg_write(vpu, &av1_lr_type, lr_type);
1459 hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
1460 hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
1461 }
1462
rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx * ctx)1463 static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
1464 {
1465 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1466 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1467 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1468 struct hantro_dev *vpu = ctx->dev;
1469 u8 superres_scale_denominator = SCALE_NUMERATOR;
1470 int superres_luma_step = RS_SCALE_SUBPEL_BITS;
1471 int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1472 int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1473 int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1474 int superres_init_luma_subpel_x = 0;
1475 int superres_init_chroma_subpel_x = 0;
1476 int superres_is_scaled = 0;
1477 int min_w = min_t(uint32_t, 16, frame->upscaled_width);
1478 int upscaled_luma, downscaled_luma;
1479 int downscaled_chroma, upscaled_chroma;
1480 int step_luma, step_chroma;
1481 int err_luma, err_chroma;
1482 int initial_luma, initial_chroma;
1483 int width = 0;
1484
1485 if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1486 superres_scale_denominator = frame->superres_denom;
1487
1488 if (superres_scale_denominator <= SCALE_NUMERATOR)
1489 goto set_regs;
1490
1491 width = (frame->upscaled_width * SCALE_NUMERATOR +
1492 (superres_scale_denominator / 2)) / superres_scale_denominator;
1493
1494 if (width < min_w)
1495 width = min_w;
1496
1497 if (width == frame->upscaled_width)
1498 goto set_regs;
1499
1500 superres_is_scaled = 1;
1501 upscaled_luma = frame->upscaled_width;
1502 downscaled_luma = width;
1503 downscaled_chroma = (downscaled_luma + 1) >> 1;
1504 upscaled_chroma = (upscaled_luma + 1) >> 1;
1505 step_luma =
1506 ((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
1507 (upscaled_luma / 2)) / upscaled_luma;
1508 step_chroma =
1509 ((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
1510 (upscaled_chroma / 2)) / upscaled_chroma;
1511 err_luma =
1512 (upscaled_luma * step_luma)
1513 - (downscaled_luma << RS_SCALE_SUBPEL_BITS);
1514 err_chroma =
1515 (upscaled_chroma * step_chroma)
1516 - (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
1517 initial_luma =
1518 ((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
1519 + upscaled_luma / 2)
1520 / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
1521 & RS_SCALE_SUBPEL_MASK;
1522 initial_chroma =
1523 ((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
1524 + upscaled_chroma / 2)
1525 / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
1526 & RS_SCALE_SUBPEL_MASK;
1527 superres_luma_step = step_luma;
1528 superres_chroma_step = step_chroma;
1529 superres_luma_step_invra =
1530 ((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
1531 / downscaled_luma;
1532 superres_chroma_step_invra =
1533 ((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
1534 / downscaled_chroma;
1535 superres_init_luma_subpel_x = initial_luma;
1536 superres_init_chroma_subpel_x = initial_chroma;
1537
1538 set_regs:
1539 hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
1540
1541 if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1542 hantro_reg_write(vpu, &av1_scale_denom_minus9,
1543 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
1544 else
1545 hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
1546
1547 hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
1548 hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
1549 hantro_reg_write(vpu, &av1_superres_luma_step_invra,
1550 superres_luma_step_invra);
1551 hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
1552 superres_chroma_step_invra);
1553 hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
1554 superres_init_luma_subpel_x);
1555 hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
1556 superres_init_chroma_subpel_x);
1557 hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
1558
1559 hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
1560 }
1561
rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx * ctx)1562 static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
1563 {
1564 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1565 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1566 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1567 struct hantro_dev *vpu = ctx->dev;
1568 int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1569 int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1570 int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
1571 - (frame->frame_width_minus_1 + 1);
1572 int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
1573 - (frame->frame_height_minus_1 + 1);
1574
1575 hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
1576 hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
1577 hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
1578 hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
1579
1580 rockchip_vpu981_av1_dec_set_superres_params(ctx);
1581 }
1582
rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx * ctx)1583 static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
1584 {
1585 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1586 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1587 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1588 struct hantro_dev *vpu = ctx->dev;
1589 bool use_ref_frame_mvs =
1590 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
1591 int cur_frame_offset = frame->order_hint;
1592 int alt_frame_offset = 0;
1593 int gld_frame_offset = 0;
1594 int bwd_frame_offset = 0;
1595 int alt2_frame_offset = 0;
1596 int refs_selected[3] = { 0, 0, 0 };
1597 int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1598 int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1599 int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1600 int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1601 int mf_types[3] = { 0, 0, 0 };
1602 int ref_stamp = 2;
1603 int ref_ind = 0;
1604 int rf, idx;
1605
1606 alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
1607 gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
1608 bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
1609 alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
1610
1611 idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
1612 if (idx >= 0) {
1613 int alt_frame_offset_in_lst =
1614 av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
1615 bool is_lst_overlay =
1616 (alt_frame_offset_in_lst == gld_frame_offset);
1617
1618 if (!is_lst_overlay) {
1619 int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1620 int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1621 bool lst_intra_only =
1622 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1623
1624 if (lst_mi_cols == cur_mi_cols &&
1625 lst_mi_rows == cur_mi_rows && !lst_intra_only) {
1626 mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
1627 refs_selected[ref_ind++] = LST_BUF_IDX;
1628 }
1629 }
1630 ref_stamp--;
1631 }
1632
1633 idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
1634 if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
1635 int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1636 int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1637 bool bwd_intra_only =
1638 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1639
1640 if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
1641 !bwd_intra_only) {
1642 mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
1643 refs_selected[ref_ind++] = BWD_BUF_IDX;
1644 ref_stamp--;
1645 }
1646 }
1647
1648 idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
1649 if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
1650 int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1651 int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1652 bool alt2_intra_only =
1653 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1654
1655 if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
1656 !alt2_intra_only) {
1657 mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
1658 refs_selected[ref_ind++] = ALT2_BUF_IDX;
1659 ref_stamp--;
1660 }
1661 }
1662
1663 idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
1664 if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
1665 ref_stamp >= 0) {
1666 int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1667 int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1668 bool alt_intra_only =
1669 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1670
1671 if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
1672 !alt_intra_only) {
1673 mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
1674 refs_selected[ref_ind++] = ALT_BUF_IDX;
1675 ref_stamp--;
1676 }
1677 }
1678
1679 idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
1680 if (idx >= 0 && ref_stamp >= 0) {
1681 int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1682 int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1683 bool lst2_intra_only =
1684 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1685
1686 if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
1687 !lst2_intra_only) {
1688 mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
1689 refs_selected[ref_ind++] = LST2_BUF_IDX;
1690 ref_stamp--;
1691 }
1692 }
1693
1694 for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
1695 idx = rockchip_vpu981_get_frame_index(ctx, rf);
1696 if (idx >= 0) {
1697 int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
1698
1699 cur_offset[rf] =
1700 rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
1701 cur_roffset[rf] =
1702 rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
1703 } else {
1704 cur_offset[rf] = 0;
1705 cur_roffset[rf] = 0;
1706 }
1707 }
1708
1709 hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
1710 hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
1711 hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
1712 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
1713
1714 hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
1715 hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
1716 hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
1717 hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
1718 hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
1719 hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
1720 hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
1721
1722 if (use_ref_frame_mvs && ref_ind > 0 &&
1723 cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1724 cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1725 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
1726 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
1727 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1728 int val;
1729
1730 hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
1731
1732 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1733 hantro_reg_write(vpu, &av1_mf1_last_offset, val);
1734
1735 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1736 hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
1737
1738 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1739 hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
1740
1741 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1742 hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
1743
1744 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1745 hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
1746
1747 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1748 hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
1749
1750 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1751 hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
1752 }
1753
1754 hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
1755 hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
1756 hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
1757 hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
1758 hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
1759 hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
1760 hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
1761
1762 if (use_ref_frame_mvs && ref_ind > 1 &&
1763 cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1764 cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1765 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
1766 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
1767 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1768 int val;
1769
1770 hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
1771
1772 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1773 hantro_reg_write(vpu, &av1_mf2_last_offset, val);
1774
1775 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1776 hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
1777
1778 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1779 hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
1780
1781 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1782 hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
1783
1784 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1785 hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
1786
1787 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1788 hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
1789
1790 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1791 hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
1792 }
1793
1794 hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
1795 hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
1796 hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
1797 hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
1798 hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
1799 hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
1800 hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
1801
1802 if (use_ref_frame_mvs && ref_ind > 2 &&
1803 cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1804 cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1805 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
1806 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
1807 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1808 int val;
1809
1810 hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
1811
1812 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1813 hantro_reg_write(vpu, &av1_mf3_last_offset, val);
1814
1815 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1816 hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
1817
1818 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1819 hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
1820
1821 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1822 hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
1823
1824 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1825 hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
1826
1827 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1828 hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
1829
1830 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1831 hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
1832 }
1833
1834 hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
1835 hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
1836 hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
1837 hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
1838 hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
1839 hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
1840 hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
1841
1842 hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
1843 hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
1844 hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
1845 hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
1846 hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
1847 hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
1848 hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
1849
1850 hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
1851 hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
1852 hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
1853 }
1854
rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx * ctx)1855 static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
1856 {
1857 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1858 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1859 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1860 int frame_type = frame->frame_type;
1861 bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
1862 int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
1863 struct hantro_dev *vpu = ctx->dev;
1864 int i, ref_frames = 0;
1865 bool scale_enable = false;
1866
1867 if (IS_INTRA(frame_type) && !allow_intrabc)
1868 return;
1869
1870 if (!allow_intrabc) {
1871 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1872 int idx = rockchip_vpu981_get_frame_index(ctx, i);
1873
1874 if (idx >= 0)
1875 ref_count[idx]++;
1876 }
1877
1878 for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
1879 if (ref_count[i])
1880 ref_frames++;
1881 }
1882 } else {
1883 ref_frames = 1;
1884 }
1885 hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
1886
1887 rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
1888
1889 for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1890 u32 ref = i - 1;
1891 int idx = 0;
1892 int width, height;
1893
1894 if (allow_intrabc) {
1895 idx = av1_dec->current_frame_index;
1896 width = frame->frame_width_minus_1 + 1;
1897 height = frame->frame_height_minus_1 + 1;
1898 } else {
1899 if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
1900 idx = rockchip_vpu981_get_frame_index(ctx, ref);
1901 width = av1_dec->frame_refs[idx].width;
1902 height = av1_dec->frame_refs[idx].height;
1903 }
1904
1905 scale_enable |=
1906 rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
1907 height);
1908
1909 rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
1910 av1_dec->ref_frame_sign_bias[i]);
1911 }
1912 hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
1913
1914 hantro_reg_write(vpu, &av1_ref0_gm_mode,
1915 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
1916 hantro_reg_write(vpu, &av1_ref1_gm_mode,
1917 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
1918 hantro_reg_write(vpu, &av1_ref2_gm_mode,
1919 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
1920 hantro_reg_write(vpu, &av1_ref3_gm_mode,
1921 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
1922 hantro_reg_write(vpu, &av1_ref4_gm_mode,
1923 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
1924 hantro_reg_write(vpu, &av1_ref5_gm_mode,
1925 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
1926 hantro_reg_write(vpu, &av1_ref6_gm_mode,
1927 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
1928
1929 rockchip_vpu981_av1_dec_set_other_frames(ctx);
1930 }
1931
rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx * ctx)1932 static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
1933 {
1934 struct hantro_dev *vpu = ctx->dev;
1935 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1936 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1937
1938 hantro_reg_write(vpu, &av1_skip_mode,
1939 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
1940 hantro_reg_write(vpu, &av1_tempor_mvp_e,
1941 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
1942 hantro_reg_write(vpu, &av1_delta_lf_res_log,
1943 ctrls->frame->loop_filter.delta_lf_res);
1944 hantro_reg_write(vpu, &av1_delta_lf_multi,
1945 !!(ctrls->frame->loop_filter.flags
1946 & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
1947 hantro_reg_write(vpu, &av1_delta_lf_present,
1948 !!(ctrls->frame->loop_filter.flags
1949 & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
1950 hantro_reg_write(vpu, &av1_disable_cdf_update,
1951 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
1952 hantro_reg_write(vpu, &av1_allow_warp,
1953 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
1954 hantro_reg_write(vpu, &av1_show_frame,
1955 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
1956 hantro_reg_write(vpu, &av1_switchable_motion_mode,
1957 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
1958 hantro_reg_write(vpu, &av1_enable_cdef,
1959 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
1960 hantro_reg_write(vpu, &av1_allow_masked_compound,
1961 !!(ctrls->sequence->flags
1962 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
1963 hantro_reg_write(vpu, &av1_allow_interintra,
1964 !!(ctrls->sequence->flags
1965 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
1966 hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
1967 !!(ctrls->sequence->flags
1968 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
1969 hantro_reg_write(vpu, &av1_allow_filter_intra,
1970 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
1971 hantro_reg_write(vpu, &av1_enable_jnt_comp,
1972 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
1973 hantro_reg_write(vpu, &av1_enable_dual_filter,
1974 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
1975 hantro_reg_write(vpu, &av1_reduced_tx_set_used,
1976 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
1977 hantro_reg_write(vpu, &av1_allow_screen_content_tools,
1978 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
1979 hantro_reg_write(vpu, &av1_allow_intrabc,
1980 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
1981
1982 if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
1983 hantro_reg_write(vpu, &av1_force_interger_mv, 0);
1984 else
1985 hantro_reg_write(vpu, &av1_force_interger_mv,
1986 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
1987
1988 hantro_reg_write(vpu, &av1_blackwhite_e, 0);
1989 hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
1990 hantro_reg_write(vpu, &av1_delta_q_present,
1991 !!(ctrls->frame->quantization.flags
1992 & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
1993
1994 hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
1995 hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
1996 hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
1997 hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
1998
1999 hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
2000 hantro_reg_write(vpu, &av1_high_prec_mv_e,
2001 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
2002 hantro_reg_write(vpu, &av1_comp_pred_mode,
2003 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
2004 hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
2005 hantro_reg_write(vpu, &av1_max_cb_size,
2006 (ctrls->sequence->flags
2007 & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
2008 hantro_reg_write(vpu, &av1_min_cb_size, 3);
2009
2010 hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
2011 hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
2012 hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
2013 hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
2014 hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
2015 hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
2016 hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
2017 hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
2018 hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
2019 hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
2020 hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
2021
2022 hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
2023 hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
2024 hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
2025 if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
2026 hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
2027 hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
2028 hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
2029 } else {
2030 hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
2031 hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
2032 hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
2033 }
2034
2035 hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
2036 hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
2037 hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
2038
2039 hantro_reg_write(vpu, &av1_skip_ref0,
2040 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
2041 hantro_reg_write(vpu, &av1_skip_ref1,
2042 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
2043
2044 hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
2045 hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
2046 }
2047
2048 static void
rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx * ctx,struct vb2_v4l2_buffer * vb2_src)2049 rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
2050 struct vb2_v4l2_buffer *vb2_src)
2051 {
2052 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2053 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
2054 const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
2055 ctrls->tile_group_entry;
2056 struct hantro_dev *vpu = ctx->dev;
2057 dma_addr_t src_dma;
2058 u32 src_len, src_buf_len;
2059 int start_bit, offset;
2060
2061 src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
2062 src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
2063 src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
2064
2065 start_bit = (group_entry[0].tile_offset & 0xf) * 8;
2066 offset = group_entry[0].tile_offset & ~0xf;
2067
2068 hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
2069 hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
2070 hantro_reg_write(vpu, &av1_stream_len, src_len);
2071 hantro_reg_write(vpu, &av1_strm_start_offset, 0);
2072 hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
2073 }
2074
2075 static void
rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx * ctx)2076 rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
2077 {
2078 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2079 struct hantro_dev *vpu = ctx->dev;
2080 struct hantro_decoded_buffer *dst;
2081 struct vb2_v4l2_buffer *vb2_dst;
2082 dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
2083 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
2084 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
2085
2086 vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
2087 dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
2088 luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
2089 chroma_addr = luma_addr + cr_offset;
2090 mv_addr = luma_addr + mv_offset;
2091
2092 hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
2093 hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
2094 hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
2095 }
2096
rockchip_vpu981_av1_dec_run(struct hantro_ctx * ctx)2097 int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
2098 {
2099 struct hantro_dev *vpu = ctx->dev;
2100 struct vb2_v4l2_buffer *vb2_src;
2101 int ret;
2102
2103 hantro_start_prepare_run(ctx);
2104
2105 ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
2106 if (ret)
2107 goto prepare_error;
2108
2109 vb2_src = hantro_get_src_buf(ctx);
2110 if (!vb2_src) {
2111 ret = -EINVAL;
2112 goto prepare_error;
2113 }
2114
2115 rockchip_vpu981_av1_dec_clean_refs(ctx);
2116 rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
2117
2118 rockchip_vpu981_av1_dec_set_parameters(ctx);
2119 rockchip_vpu981_av1_dec_set_global_model(ctx);
2120 rockchip_vpu981_av1_dec_set_tile_info(ctx);
2121 rockchip_vpu981_av1_dec_set_reference_frames(ctx);
2122 rockchip_vpu981_av1_dec_set_segmentation(ctx);
2123 rockchip_vpu981_av1_dec_set_loopfilter(ctx);
2124 rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
2125 rockchip_vpu981_av1_dec_set_cdef(ctx);
2126 rockchip_vpu981_av1_dec_set_lr(ctx);
2127 rockchip_vpu981_av1_dec_set_fgs(ctx);
2128 rockchip_vpu981_av1_dec_set_prob(ctx);
2129
2130 hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
2131 hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
2132 hantro_reg_write(vpu, &av1_write_mvs_e, 1);
2133 hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
2134 hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
2135
2136 hantro_reg_write(vpu, &av1_dec_abort_e, 0);
2137 hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
2138
2139 hantro_reg_write(vpu, &av1_dec_alignment, 64);
2140 hantro_reg_write(vpu, &av1_apf_disable, 0);
2141 hantro_reg_write(vpu, &av1_apf_threshold, 8);
2142 hantro_reg_write(vpu, &av1_dec_buswidth, 2);
2143 hantro_reg_write(vpu, &av1_dec_max_burst, 16);
2144 hantro_reg_write(vpu, &av1_error_conceal_e, 0);
2145 hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
2146 hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
2147
2148 hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
2149 hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
2150 hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
2151 hantro_reg_write(vpu, &av1_timeout_override_e, 1);
2152
2153 rockchip_vpu981_av1_dec_set_output_buffer(ctx);
2154 rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
2155
2156 hantro_end_prepare_run(ctx);
2157
2158 hantro_reg_write(vpu, &av1_dec_e, 1);
2159
2160 return 0;
2161
2162 prepare_error:
2163 hantro_end_prepare_run(ctx);
2164 hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
2165 return ret;
2166 }
2167
rockchip_vpu981_postproc_enable(struct hantro_ctx * ctx)2168 static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
2169 {
2170 struct hantro_dev *vpu = ctx->dev;
2171 int width = ctx->dst_fmt.width;
2172 int height = ctx->dst_fmt.height;
2173 struct vb2_v4l2_buffer *vb2_dst;
2174 size_t chroma_offset;
2175 dma_addr_t dst_dma;
2176
2177 vb2_dst = hantro_get_dst_buf(ctx);
2178
2179 dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
2180 chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
2181 ctx->dst_fmt.height;
2182
2183 /* enable post processor */
2184 hantro_reg_write(vpu, &av1_pp_out_e, 1);
2185 hantro_reg_write(vpu, &av1_pp_in_format, 0);
2186 hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
2187 hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
2188
2189 hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
2190 hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
2191 hantro_reg_write(vpu, &av1_pp_out_height, height);
2192 hantro_reg_write(vpu, &av1_pp_out_width, width);
2193 hantro_reg_write(vpu, &av1_pp_out_y_stride,
2194 ctx->dst_fmt.plane_fmt[0].bytesperline);
2195 hantro_reg_write(vpu, &av1_pp_out_c_stride,
2196 ctx->dst_fmt.plane_fmt[0].bytesperline);
2197 switch (ctx->dst_fmt.pixelformat) {
2198 case V4L2_PIX_FMT_P010:
2199 hantro_reg_write(vpu, &av1_pp_out_format, 1);
2200 break;
2201 case V4L2_PIX_FMT_NV12:
2202 hantro_reg_write(vpu, &av1_pp_out_format, 3);
2203 break;
2204 default:
2205 hantro_reg_write(vpu, &av1_pp_out_format, 0);
2206 }
2207
2208 hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
2209 hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
2210 hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
2211 hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
2212 hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
2213 hantro_reg_write(vpu, &av1_pp_up_level, 0);
2214 hantro_reg_write(vpu, &av1_pp_down_level, 0);
2215 hantro_reg_write(vpu, &av1_pp_exist, 0);
2216
2217 hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
2218 hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
2219 }
2220
rockchip_vpu981_postproc_disable(struct hantro_ctx * ctx)2221 static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
2222 {
2223 struct hantro_dev *vpu = ctx->dev;
2224
2225 /* disable post processor */
2226 hantro_reg_write(vpu, &av1_pp_out_e, 0);
2227 }
2228
2229 const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
2230 .enable = rockchip_vpu981_postproc_enable,
2231 .disable = rockchip_vpu981_postproc_disable,
2232 };
2233