1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2023, Collabora
4 *
5 * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
6 */
7
8 #include <media/v4l2-mem2mem.h>
9 #include "hantro.h"
10 #include "hantro_v4l2.h"
11 #include "rockchip_vpu981_regs.h"
12
13 #define AV1_DEC_MODE 17
14 #define GM_GLOBAL_MODELS_PER_FRAME 7
15 #define GLOBAL_MODEL_TOTAL_SIZE (6 * 4 + 4 * 2)
16 #define GLOBAL_MODEL_SIZE ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
17 #define AV1_MAX_TILES 128
18 #define AV1_TILE_INFO_SIZE (AV1_MAX_TILES * 16)
19 #define AV1DEC_MAX_PIC_BUFFERS 24
20 #define AV1_REF_SCALE_SHIFT 14
21 #define AV1_INVALID_IDX -1
22 #define MAX_FRAME_DISTANCE 31
23 #define AV1_PRIMARY_REF_NONE 7
24 #define AV1_TILE_SIZE ALIGN(32 * 128, 4096)
25 /*
26 * These 3 values aren't defined enum v4l2_av1_segment_feature because
27 * they are not part of the specification
28 */
29 #define V4L2_AV1_SEG_LVL_ALT_LF_Y_H 2
30 #define V4L2_AV1_SEG_LVL_ALT_LF_U 3
31 #define V4L2_AV1_SEG_LVL_ALT_LF_V 4
32
33 #define SUPERRES_SCALE_BITS 3
34 #define SCALE_NUMERATOR 8
35 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
36
37 #define RS_SUBPEL_BITS 6
38 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
39 #define RS_SCALE_SUBPEL_BITS 14
40 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
41 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
42 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
43
44 #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
45
46 #define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
47 #define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
48 #define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
49 #define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
50 #define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
51 #define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
52 #define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
53
54 #define DIV_LUT_PREC_BITS 14
55 #define DIV_LUT_BITS 8
56 #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
57 #define WARP_PARAM_REDUCE_BITS 6
58 #define WARPEDMODEL_PREC_BITS 16
59
60 #define AV1_DIV_ROUND_UP_POW2(value, n) \
61 ({ \
62 typeof(n) _n = n; \
63 typeof(value) _value = value; \
64 (_value + (BIT(_n) >> 1)) >> _n; \
65 })
66
67 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \
68 ({ \
69 typeof(n) _n_ = n; \
70 typeof(value) _value_ = value; \
71 (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \
72 : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \
73 })
74
75 struct rockchip_av1_film_grain {
76 u8 scaling_lut_y[256];
77 u8 scaling_lut_cb[256];
78 u8 scaling_lut_cr[256];
79 s16 cropped_luma_grain_block[4096];
80 s16 cropped_chroma_grain_block[1024 * 2];
81 };
82
83 static const short div_lut[DIV_LUT_NUM + 1] = {
84 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
85 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
86 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
87 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
88 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
89 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
90 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
91 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
92 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
93 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
94 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
95 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
96 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
97 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
98 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
99 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
100 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
101 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
102 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
103 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
104 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
105 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
106 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
107 8240, 8224, 8208, 8192,
108 };
109
rockchip_vpu981_get_frame_index(struct hantro_ctx * ctx,int ref)110 static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
111 {
112 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
113 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
114 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
115 u64 timestamp;
116 int i, idx = frame->ref_frame_idx[ref];
117
118 if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
119 return AV1_INVALID_IDX;
120
121 timestamp = frame->reference_frame_ts[idx];
122 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
123 if (!av1_dec->frame_refs[i].used)
124 continue;
125 if (av1_dec->frame_refs[i].timestamp == timestamp)
126 return i;
127 }
128
129 return AV1_INVALID_IDX;
130 }
131
rockchip_vpu981_get_order_hint(struct hantro_ctx * ctx,int ref)132 static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
133 {
134 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
135 int idx = rockchip_vpu981_get_frame_index(ctx, ref);
136
137 if (idx != AV1_INVALID_IDX)
138 return av1_dec->frame_refs[idx].order_hint;
139
140 return 0;
141 }
142
rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx * ctx,u64 timestamp)143 static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
144 u64 timestamp)
145 {
146 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
147 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
148 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
149 int i;
150
151 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
152 int j;
153
154 if (av1_dec->frame_refs[i].used)
155 continue;
156
157 av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
158 av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
159 av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
160 av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
161 av1_dec->frame_refs[i].timestamp = timestamp;
162 av1_dec->frame_refs[i].frame_type = frame->frame_type;
163 av1_dec->frame_refs[i].order_hint = frame->order_hint;
164 av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
165
166 for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
167 av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
168 av1_dec->frame_refs[i].used = true;
169 av1_dec->current_frame_index = i;
170
171 return i;
172 }
173
174 return AV1_INVALID_IDX;
175 }
176
rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx * ctx,int idx)177 static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
178 {
179 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
180
181 if (idx >= 0)
182 av1_dec->frame_refs[idx].used = false;
183 }
184
rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx * ctx)185 static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
186 {
187 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
188 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
189
190 int ref, idx;
191
192 for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
193 u64 timestamp = av1_dec->frame_refs[idx].timestamp;
194 bool used = false;
195
196 if (!av1_dec->frame_refs[idx].used)
197 continue;
198
199 for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
200 if (ctrls->frame->reference_frame_ts[ref] == timestamp)
201 used = true;
202 }
203
204 if (!used)
205 rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
206 }
207 }
208
rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx * ctx)209 static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
210 {
211 return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
212 }
213
rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx * ctx)214 static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
215 {
216 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
217
218 return ALIGN((cr_offset * 3) / 2, 64);
219 }
220
rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx * ctx)221 static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
222 {
223 struct hantro_dev *vpu = ctx->dev;
224 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
225
226 if (av1_dec->db_data_col.cpu)
227 dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
228 av1_dec->db_data_col.cpu,
229 av1_dec->db_data_col.dma);
230 av1_dec->db_data_col.cpu = NULL;
231
232 if (av1_dec->db_ctrl_col.cpu)
233 dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
234 av1_dec->db_ctrl_col.cpu,
235 av1_dec->db_ctrl_col.dma);
236 av1_dec->db_ctrl_col.cpu = NULL;
237
238 if (av1_dec->cdef_col.cpu)
239 dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
240 av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
241 av1_dec->cdef_col.cpu = NULL;
242
243 if (av1_dec->sr_col.cpu)
244 dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
245 av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
246 av1_dec->sr_col.cpu = NULL;
247
248 if (av1_dec->lr_col.cpu)
249 dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
250 av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
251 av1_dec->lr_col.cpu = NULL;
252 }
253
rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx * ctx)254 static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
255 {
256 struct hantro_dev *vpu = ctx->dev;
257 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
258 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
259 unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
260 unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
261 unsigned int height_in_sb = height / 64;
262 unsigned int stripe_num = ((height + 8) + 63) / 64;
263 size_t size;
264
265 if (av1_dec->db_data_col.size >=
266 ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
267 return 0;
268
269 rockchip_vpu981_av1_dec_tiles_free(ctx);
270
271 size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
272 av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
273 &av1_dec->db_data_col.dma,
274 GFP_KERNEL);
275 if (!av1_dec->db_data_col.cpu)
276 goto buffer_allocation_error;
277 av1_dec->db_data_col.size = size;
278
279 size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
280 av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
281 &av1_dec->db_ctrl_col.dma,
282 GFP_KERNEL);
283 if (!av1_dec->db_ctrl_col.cpu)
284 goto buffer_allocation_error;
285 av1_dec->db_ctrl_col.size = size;
286
287 size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
288 av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
289 &av1_dec->cdef_col.dma,
290 GFP_KERNEL);
291 if (!av1_dec->cdef_col.cpu)
292 goto buffer_allocation_error;
293 av1_dec->cdef_col.size = size;
294
295 size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
296 av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
297 &av1_dec->sr_col.dma,
298 GFP_KERNEL);
299 if (!av1_dec->sr_col.cpu)
300 goto buffer_allocation_error;
301 av1_dec->sr_col.size = size;
302
303 size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
304 av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
305 &av1_dec->lr_col.dma,
306 GFP_KERNEL);
307 if (!av1_dec->lr_col.cpu)
308 goto buffer_allocation_error;
309 av1_dec->lr_col.size = size;
310
311 av1_dec->num_tile_cols_allocated = num_tile_cols;
312 return 0;
313
314 buffer_allocation_error:
315 rockchip_vpu981_av1_dec_tiles_free(ctx);
316 return -ENOMEM;
317 }
318
rockchip_vpu981_av1_dec_exit(struct hantro_ctx * ctx)319 void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
320 {
321 struct hantro_dev *vpu = ctx->dev;
322 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
323
324 if (av1_dec->global_model.cpu)
325 dma_free_coherent(vpu->dev, av1_dec->global_model.size,
326 av1_dec->global_model.cpu,
327 av1_dec->global_model.dma);
328 av1_dec->global_model.cpu = NULL;
329
330 if (av1_dec->tile_info.cpu)
331 dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
332 av1_dec->tile_info.cpu,
333 av1_dec->tile_info.dma);
334 av1_dec->tile_info.cpu = NULL;
335
336 if (av1_dec->film_grain.cpu)
337 dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
338 av1_dec->film_grain.cpu,
339 av1_dec->film_grain.dma);
340 av1_dec->film_grain.cpu = NULL;
341
342 if (av1_dec->prob_tbl.cpu)
343 dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
344 av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
345 av1_dec->prob_tbl.cpu = NULL;
346
347 if (av1_dec->prob_tbl_out.cpu)
348 dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
349 av1_dec->prob_tbl_out.cpu,
350 av1_dec->prob_tbl_out.dma);
351 av1_dec->prob_tbl_out.cpu = NULL;
352
353 if (av1_dec->tile_buf.cpu)
354 dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
355 av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
356 av1_dec->tile_buf.cpu = NULL;
357
358 rockchip_vpu981_av1_dec_tiles_free(ctx);
359 }
360
rockchip_vpu981_av1_dec_init(struct hantro_ctx * ctx)361 int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
362 {
363 struct hantro_dev *vpu = ctx->dev;
364 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
365
366 memset(av1_dec, 0, sizeof(*av1_dec));
367
368 av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
369 &av1_dec->global_model.dma,
370 GFP_KERNEL);
371 if (!av1_dec->global_model.cpu)
372 return -ENOMEM;
373 av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
374
375 av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
376 &av1_dec->tile_info.dma,
377 GFP_KERNEL);
378 if (!av1_dec->tile_info.cpu)
379 return -ENOMEM;
380 av1_dec->tile_info.size = AV1_MAX_TILES;
381
382 av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
383 ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
384 &av1_dec->film_grain.dma,
385 GFP_KERNEL);
386 if (!av1_dec->film_grain.cpu)
387 return -ENOMEM;
388 av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
389
390 av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
391 ALIGN(sizeof(struct av1cdfs), 2048),
392 &av1_dec->prob_tbl.dma,
393 GFP_KERNEL);
394 if (!av1_dec->prob_tbl.cpu)
395 return -ENOMEM;
396 av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
397
398 av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
399 ALIGN(sizeof(struct av1cdfs), 2048),
400 &av1_dec->prob_tbl_out.dma,
401 GFP_KERNEL);
402 if (!av1_dec->prob_tbl_out.cpu)
403 return -ENOMEM;
404 av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
405 av1_dec->cdfs = &av1_dec->default_cdfs;
406 av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
407
408 rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
409
410 av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
411 AV1_TILE_SIZE,
412 &av1_dec->tile_buf.dma,
413 GFP_KERNEL);
414 if (!av1_dec->tile_buf.cpu)
415 return -ENOMEM;
416 av1_dec->tile_buf.size = AV1_TILE_SIZE;
417
418 return 0;
419 }
420
rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx * ctx)421 static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
422 {
423 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
424 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
425
426 ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
427 if (WARN_ON(!ctrls->sequence))
428 return -EINVAL;
429
430 ctrls->tile_group_entry =
431 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
432 if (WARN_ON(!ctrls->tile_group_entry))
433 return -EINVAL;
434
435 ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
436 if (WARN_ON(!ctrls->frame))
437 return -EINVAL;
438
439 ctrls->film_grain =
440 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
441
442 return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
443 }
444
rockchip_vpu981_av1_dec_get_msb(u32 n)445 static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
446 {
447 if (n == 0)
448 return 0;
449 return 31 ^ __builtin_clz(n);
450 }
451
rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d,short * shift)452 static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
453 {
454 int f;
455 u64 e;
456
457 *shift = rockchip_vpu981_av1_dec_get_msb(d);
458 /* e is obtained from D after resetting the most significant 1 bit. */
459 e = d - ((u32)1 << *shift);
460 /* Get the most significant DIV_LUT_BITS (8) bits of e into f */
461 if (*shift > DIV_LUT_BITS)
462 f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
463 else
464 f = e << (DIV_LUT_BITS - *shift);
465 if (f > DIV_LUT_NUM)
466 return -1;
467 *shift += DIV_LUT_PREC_BITS;
468 /* Use f as lookup into the precomputed table of multipliers */
469 return div_lut[f];
470 }
471
472 static void
rockchip_vpu981_av1_dec_get_shear_params(const u32 * params,s64 * alpha,s64 * beta,s64 * gamma,s64 * delta)473 rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
474 s64 *beta, s64 *gamma, s64 *delta)
475 {
476 const int *mat = params;
477 short shift;
478 short y;
479 long long gv, dv;
480
481 if (mat[2] <= 0)
482 return;
483
484 *alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
485 *beta = clamp_val(mat[3], S16_MIN, S16_MAX);
486
487 y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
488
489 gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
490
491 *gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
492
493 dv = ((long long)mat[3] * mat[4]) * y;
494 *delta = clamp_val(mat[5] -
495 (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
496 S16_MIN, S16_MAX);
497
498 *alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
499 * (1 << WARP_PARAM_REDUCE_BITS);
500 *beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
501 * (1 << WARP_PARAM_REDUCE_BITS);
502 *gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
503 * (1 << WARP_PARAM_REDUCE_BITS);
504 *delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
505 * (1 << WARP_PARAM_REDUCE_BITS);
506 }
507
rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx * ctx)508 static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
509 {
510 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
511 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
512 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
513 const struct v4l2_av1_global_motion *gm = &frame->global_motion;
514 u8 *dst = av1_dec->global_model.cpu;
515 struct hantro_dev *vpu = ctx->dev;
516 int ref_frame, i;
517
518 memset(dst, 0, GLOBAL_MODEL_SIZE);
519 for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
520 s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
521
522 for (i = 0; i < 6; ++i) {
523 if (i == 2)
524 *(s32 *)dst =
525 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
526 else if (i == 3)
527 *(s32 *)dst =
528 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
529 else
530 *(s32 *)dst =
531 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
532 dst += 4;
533 }
534
535 if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
536 rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
537 &alpha, &beta, &gamma, &delta);
538
539 *(s16 *)dst = alpha;
540 dst += 2;
541 *(s16 *)dst = beta;
542 dst += 2;
543 *(s16 *)dst = gamma;
544 dst += 2;
545 *(s16 *)dst = delta;
546 dst += 2;
547 }
548
549 hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
550 }
551
rockchip_vpu981_av1_tile_log2(int target)552 static int rockchip_vpu981_av1_tile_log2(int target)
553 {
554 int k;
555
556 /*
557 * returns the smallest value for k such that 1 << k is greater
558 * than or equal to target
559 */
560 for (k = 0; (1 << k) < target; k++);
561
562 return k;
563 }
564
rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx * ctx)565 static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
566 {
567 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
568 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
569 const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
570 const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
571 ctrls->tile_group_entry;
572 int context_update_y =
573 tile_info->context_update_tile_id / tile_info->tile_cols;
574 int context_update_x =
575 tile_info->context_update_tile_id % tile_info->tile_cols;
576 int context_update_tile_id =
577 context_update_x * tile_info->tile_rows + context_update_y;
578 u8 *dst = av1_dec->tile_info.cpu;
579 struct hantro_dev *vpu = ctx->dev;
580 int tile0, tile1;
581
582 memset(dst, 0, av1_dec->tile_info.size);
583
584 for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
585 for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
586 int tile_id = tile1 * tile_info->tile_cols + tile0;
587 u32 start, end;
588 u32 y0 =
589 tile_info->height_in_sbs_minus_1[tile1] + 1;
590 u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
591
592 /* tile size in SB units (width,height) */
593 *dst++ = x0;
594 *dst++ = 0;
595 *dst++ = 0;
596 *dst++ = 0;
597 *dst++ = y0;
598 *dst++ = 0;
599 *dst++ = 0;
600 *dst++ = 0;
601
602 /* tile start position */
603 start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
604 *dst++ = start & 255;
605 *dst++ = (start >> 8) & 255;
606 *dst++ = (start >> 16) & 255;
607 *dst++ = (start >> 24) & 255;
608
609 /* number of bytes in tile data */
610 end = start + group_entry[tile_id].tile_size;
611 *dst++ = end & 255;
612 *dst++ = (end >> 8) & 255;
613 *dst++ = (end >> 16) & 255;
614 *dst++ = (end >> 24) & 255;
615 }
616 }
617
618 hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
619 hantro_reg_write(vpu, &av1_tile_enable,
620 !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
621 hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
622 hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
623 hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
624 hantro_reg_write(vpu, &av1_tile_transpose, 1);
625 if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
626 rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
627 hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
628 else
629 hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
630
631 hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
632 }
633
rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx * ctx,int a,int b)634 static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
635 int a, int b)
636 {
637 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
638 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
639 int bits = ctrls->sequence->order_hint_bits - 1;
640 int diff, m;
641
642 if (!ctrls->sequence->order_hint_bits)
643 return 0;
644
645 diff = a - b;
646 m = 1 << bits;
647 diff = (diff & (m - 1)) - (diff & m);
648
649 return diff;
650 }
651
rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx * ctx)652 static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
653 {
654 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
655 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
656 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
657 const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
658 int i;
659
660 if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
661 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
662 av1_dec->ref_frame_sign_bias[i] = 0;
663
664 return;
665 }
666 // Identify the nearest forward and backward references.
667 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
668 if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
669 int rel_off =
670 rockchip_vpu981_av1_dec_get_dist(ctx,
671 rockchip_vpu981_get_order_hint(ctx, i),
672 frame->order_hint);
673 av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
674 }
675 }
676 }
677
678 static bool
rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx * ctx,int ref,int idx,int width,int height)679 rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
680 int width, int height)
681 {
682 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
683 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
684 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
685 struct hantro_dev *vpu = ctx->dev;
686 struct hantro_decoded_buffer *dst;
687 dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
688 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
689 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
690 int cur_width = frame->frame_width_minus_1 + 1;
691 int cur_height = frame->frame_height_minus_1 + 1;
692 int scale_width =
693 ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
694 int scale_height =
695 ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
696
697 switch (ref) {
698 case 0:
699 hantro_reg_write(vpu, &av1_ref0_height, height);
700 hantro_reg_write(vpu, &av1_ref0_width, width);
701 hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
702 hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
703 break;
704 case 1:
705 hantro_reg_write(vpu, &av1_ref1_height, height);
706 hantro_reg_write(vpu, &av1_ref1_width, width);
707 hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
708 hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
709 break;
710 case 2:
711 hantro_reg_write(vpu, &av1_ref2_height, height);
712 hantro_reg_write(vpu, &av1_ref2_width, width);
713 hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
714 hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
715 break;
716 case 3:
717 hantro_reg_write(vpu, &av1_ref3_height, height);
718 hantro_reg_write(vpu, &av1_ref3_width, width);
719 hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
720 hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
721 break;
722 case 4:
723 hantro_reg_write(vpu, &av1_ref4_height, height);
724 hantro_reg_write(vpu, &av1_ref4_width, width);
725 hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
726 hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
727 break;
728 case 5:
729 hantro_reg_write(vpu, &av1_ref5_height, height);
730 hantro_reg_write(vpu, &av1_ref5_width, width);
731 hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
732 hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
733 break;
734 case 6:
735 hantro_reg_write(vpu, &av1_ref6_height, height);
736 hantro_reg_write(vpu, &av1_ref6_width, width);
737 hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
738 hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
739 break;
740 default:
741 pr_warn("AV1 invalid reference frame index\n");
742 }
743
744 dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
745 luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
746 chroma_addr = luma_addr + cr_offset;
747 mv_addr = luma_addr + mv_offset;
748
749 hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
750 hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
751 hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
752
753 return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
754 (scale_height != (1 << AV1_REF_SCALE_SHIFT));
755 }
756
rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx * ctx,int ref,int val)757 static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
758 int ref, int val)
759 {
760 struct hantro_dev *vpu = ctx->dev;
761
762 switch (ref) {
763 case 0:
764 hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
765 break;
766 case 1:
767 hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
768 break;
769 case 2:
770 hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
771 break;
772 case 3:
773 hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
774 break;
775 case 4:
776 hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
777 break;
778 case 5:
779 hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
780 break;
781 case 6:
782 hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
783 break;
784 default:
785 pr_warn("AV1 invalid sign bias index\n");
786 break;
787 }
788 }
789
rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx * ctx)790 static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
791 {
792 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
793 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
794 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
795 const struct v4l2_av1_segmentation *seg = &frame->segmentation;
796 u32 segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
797 struct hantro_dev *vpu = ctx->dev;
798 u8 segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
799
800 if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
801 frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
802 int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
803
804 if (idx >= 0) {
805 dma_addr_t luma_addr, mv_addr = 0;
806 struct hantro_decoded_buffer *seg;
807 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
808
809 seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
810 luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
811 mv_addr = luma_addr + mv_offset;
812
813 hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
814 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
815 }
816 }
817
818 hantro_reg_write(vpu, &av1_segment_temp_upd_e,
819 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
820 hantro_reg_write(vpu, &av1_segment_upd_e,
821 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
822 hantro_reg_write(vpu, &av1_segment_e,
823 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
824
825 hantro_reg_write(vpu, &av1_error_resilient,
826 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
827
828 if (IS_INTRA(frame->frame_type) ||
829 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
830 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
831 }
832
833 if (seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) {
834 int s;
835
836 for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
837 if (seg->feature_enabled[s] &
838 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
839 segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
840 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
841 0, 255);
842 segsign |=
843 (seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
844 }
845
846 if (seg->feature_enabled[s] &
847 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
848 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
849 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
850 -63, 63);
851
852 if (seg->feature_enabled[s] &
853 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
854 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
855 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
856 -63, 63);
857
858 if (seg->feature_enabled[s] &
859 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
860 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
861 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
862 -63, 63);
863
864 if (seg->feature_enabled[s] &
865 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
866 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
867 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
868 -63, 63);
869
870 if (frame->frame_type && seg->feature_enabled[s] &
871 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
872 segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
873
874 if (seg->feature_enabled[s] &
875 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
876 segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
877
878 if (seg->feature_enabled[s] &
879 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
880 segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
881 }
882 }
883
884 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
885 for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
886 if (seg->feature_enabled[i]
887 & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
888 preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
889 last_active_seg = max(i, last_active_seg);
890 }
891 }
892 }
893
894 hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
895 hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
896
897 hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
898
899 /* Write QP, filter level, ref frame and skip for every segment */
900 hantro_reg_write(vpu, &av1_quant_seg0,
901 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
902 hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
903 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
904 hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
905 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
906 hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
907 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
908 hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
909 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
910 hantro_reg_write(vpu, &av1_refpic_seg0,
911 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
912 hantro_reg_write(vpu, &av1_skip_seg0,
913 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
914 hantro_reg_write(vpu, &av1_global_mv_seg0,
915 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
916
917 hantro_reg_write(vpu, &av1_quant_seg1,
918 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
919 hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
920 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
921 hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
922 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
923 hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
924 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
925 hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
926 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
927 hantro_reg_write(vpu, &av1_refpic_seg1,
928 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
929 hantro_reg_write(vpu, &av1_skip_seg1,
930 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
931 hantro_reg_write(vpu, &av1_global_mv_seg1,
932 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
933
934 hantro_reg_write(vpu, &av1_quant_seg2,
935 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
936 hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
937 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
938 hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
939 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
940 hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
941 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
942 hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
943 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
944 hantro_reg_write(vpu, &av1_refpic_seg2,
945 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
946 hantro_reg_write(vpu, &av1_skip_seg2,
947 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
948 hantro_reg_write(vpu, &av1_global_mv_seg2,
949 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
950
951 hantro_reg_write(vpu, &av1_quant_seg3,
952 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
953 hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
954 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
955 hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
956 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
957 hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
958 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
959 hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
960 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
961 hantro_reg_write(vpu, &av1_refpic_seg3,
962 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
963 hantro_reg_write(vpu, &av1_skip_seg3,
964 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
965 hantro_reg_write(vpu, &av1_global_mv_seg3,
966 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
967
968 hantro_reg_write(vpu, &av1_quant_seg4,
969 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
970 hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
971 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
972 hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
973 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
974 hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
975 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
976 hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
977 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
978 hantro_reg_write(vpu, &av1_refpic_seg4,
979 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
980 hantro_reg_write(vpu, &av1_skip_seg4,
981 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
982 hantro_reg_write(vpu, &av1_global_mv_seg4,
983 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
984
985 hantro_reg_write(vpu, &av1_quant_seg5,
986 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
987 hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
988 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
989 hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
990 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
991 hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
992 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
993 hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
994 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
995 hantro_reg_write(vpu, &av1_refpic_seg5,
996 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
997 hantro_reg_write(vpu, &av1_skip_seg5,
998 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
999 hantro_reg_write(vpu, &av1_global_mv_seg5,
1000 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1001
1002 hantro_reg_write(vpu, &av1_quant_seg6,
1003 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
1004 hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
1005 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1006 hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
1007 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1008 hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
1009 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1010 hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
1011 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1012 hantro_reg_write(vpu, &av1_refpic_seg6,
1013 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
1014 hantro_reg_write(vpu, &av1_skip_seg6,
1015 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
1016 hantro_reg_write(vpu, &av1_global_mv_seg6,
1017 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1018
1019 hantro_reg_write(vpu, &av1_quant_seg7,
1020 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
1021 hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
1022 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1023 hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
1024 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1025 hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
1026 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1027 hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
1028 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1029 hantro_reg_write(vpu, &av1_refpic_seg7,
1030 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
1031 hantro_reg_write(vpu, &av1_skip_seg7,
1032 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
1033 hantro_reg_write(vpu, &av1_global_mv_seg7,
1034 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1035 }
1036
rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx * ctx)1037 static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
1038 {
1039 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1040 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1041 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1042 const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
1043 const struct v4l2_av1_quantization *quantization = &frame->quantization;
1044 int i;
1045
1046 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1047 int qindex = quantization->base_q_idx;
1048
1049 if (segmentation->feature_enabled[i] &
1050 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
1051 qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
1052 }
1053 qindex = clamp(qindex, 0, 255);
1054
1055 if (qindex ||
1056 quantization->delta_q_y_dc ||
1057 quantization->delta_q_u_dc ||
1058 quantization->delta_q_u_ac ||
1059 quantization->delta_q_v_dc ||
1060 quantization->delta_q_v_ac)
1061 return false;
1062 }
1063 return true;
1064 }
1065
rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx * ctx)1066 static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
1067 {
1068 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1069 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1070 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1071 const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
1072 bool filtering_dis = (loop_filter->level[0] == 0) && (loop_filter->level[1] == 0);
1073 struct hantro_dev *vpu = ctx->dev;
1074
1075 hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
1076 hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
1077 hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
1078
1079 hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
1080 hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
1081 hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
1082 hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
1083
1084 if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
1085 !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
1086 !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
1087 hantro_reg_write(vpu, &av1_filt_ref_adj_0,
1088 loop_filter->ref_deltas[0]);
1089 hantro_reg_write(vpu, &av1_filt_ref_adj_1,
1090 loop_filter->ref_deltas[1]);
1091 hantro_reg_write(vpu, &av1_filt_ref_adj_2,
1092 loop_filter->ref_deltas[2]);
1093 hantro_reg_write(vpu, &av1_filt_ref_adj_3,
1094 loop_filter->ref_deltas[3]);
1095 hantro_reg_write(vpu, &av1_filt_ref_adj_4,
1096 loop_filter->ref_deltas[4]);
1097 hantro_reg_write(vpu, &av1_filt_ref_adj_5,
1098 loop_filter->ref_deltas[5]);
1099 hantro_reg_write(vpu, &av1_filt_ref_adj_6,
1100 loop_filter->ref_deltas[6]);
1101 hantro_reg_write(vpu, &av1_filt_ref_adj_7,
1102 loop_filter->ref_deltas[7]);
1103 hantro_reg_write(vpu, &av1_filt_mb_adj_0,
1104 loop_filter->mode_deltas[0]);
1105 hantro_reg_write(vpu, &av1_filt_mb_adj_1,
1106 loop_filter->mode_deltas[1]);
1107 } else {
1108 hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
1109 hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
1110 hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
1111 hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
1112 hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
1113 hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
1114 hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
1115 hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
1116 hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
1117 hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
1118 }
1119
1120 hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
1121 hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
1122 }
1123
rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx * ctx)1124 static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
1125 {
1126 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1127 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1128 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1129 bool frame_is_intra = IS_INTRA(frame->frame_type);
1130 struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
1131 int i;
1132
1133 if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
1134 return;
1135
1136 for (i = 0; i < NUM_REF_FRAMES; i++) {
1137 if (frame->refresh_frame_flags & BIT(i)) {
1138 struct mvcdfs stored_mv_cdf;
1139
1140 rockchip_av1_get_cdfs(ctx, i);
1141 stored_mv_cdf = av1_dec->cdfs->mv_cdf;
1142 *av1_dec->cdfs = *out_cdfs;
1143 if (frame_is_intra) {
1144 av1_dec->cdfs->mv_cdf = stored_mv_cdf;
1145 *av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
1146 }
1147 rockchip_av1_store_cdfs(ctx,
1148 frame->refresh_frame_flags);
1149 break;
1150 }
1151 }
1152 }
1153
rockchip_vpu981_av1_dec_done(struct hantro_ctx * ctx)1154 void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
1155 {
1156 rockchip_vpu981_av1_dec_update_prob(ctx);
1157 }
1158
rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx * ctx)1159 static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
1160 {
1161 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1162 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1163 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1164 const struct v4l2_av1_quantization *quantization = &frame->quantization;
1165 struct hantro_dev *vpu = ctx->dev;
1166 bool error_resilient_mode =
1167 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
1168 bool frame_is_intra = IS_INTRA(frame->frame_type);
1169
1170 if (error_resilient_mode || frame_is_intra ||
1171 frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
1172 av1_dec->cdfs = &av1_dec->default_cdfs;
1173 av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
1174 rockchip_av1_default_coeff_probs(quantization->base_q_idx,
1175 av1_dec->cdfs);
1176 } else {
1177 rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
1178 }
1179 rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
1180
1181 memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
1182
1183 if (frame_is_intra) {
1184 int mv_offset = offsetof(struct av1cdfs, mv_cdf);
1185 /* Overwrite MV context area with intrabc MV context */
1186 memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
1187 sizeof(struct mvcdfs));
1188 }
1189
1190 hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
1191 hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
1192 }
1193
1194 static void
rockchip_vpu981_av1_dec_init_scaling_function(const u8 * values,const u8 * scaling,u8 num_points,u8 * scaling_lut)1195 rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
1196 u8 num_points, u8 *scaling_lut)
1197 {
1198 int i, point;
1199
1200 if (num_points == 0) {
1201 memset(scaling_lut, 0, 256);
1202 return;
1203 }
1204
1205 for (point = 0; point < num_points - 1; point++) {
1206 int x;
1207 s32 delta_y = scaling[point + 1] - scaling[point];
1208 s32 delta_x = values[point + 1] - values[point];
1209 s64 delta =
1210 delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
1211 delta_x) : 0;
1212
1213 for (x = 0; x < delta_x; x++) {
1214 scaling_lut[values[point] + x] =
1215 scaling[point] +
1216 (s32)((x * delta + 32768) >> 16);
1217 }
1218 }
1219
1220 for (i = values[num_points - 1]; i < 256; i++)
1221 scaling_lut[i] = scaling[num_points - 1];
1222 }
1223
rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx * ctx)1224 static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
1225 {
1226 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1227 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1228 const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
1229 struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
1230 struct hantro_dev *vpu = ctx->dev;
1231 bool scaling_from_luma =
1232 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
1233 s32 (*ar_coeffs_y)[24];
1234 s32 (*ar_coeffs_cb)[25];
1235 s32 (*ar_coeffs_cr)[25];
1236 s32 (*luma_grain_block)[73][82];
1237 s32 (*cb_grain_block)[38][44];
1238 s32 (*cr_grain_block)[38][44];
1239 s32 ar_coeff_lag, ar_coeff_shift;
1240 s32 grain_scale_shift, bitdepth;
1241 s32 grain_center, grain_min, grain_max;
1242 int i, j;
1243
1244 hantro_reg_write(vpu, &av1_apply_grain, 0);
1245
1246 if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
1247 hantro_reg_write(vpu, &av1_num_y_points_b, 0);
1248 hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
1249 hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
1250 hantro_reg_write(vpu, &av1_scaling_shift, 0);
1251 hantro_reg_write(vpu, &av1_cb_mult, 0);
1252 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1253 hantro_reg_write(vpu, &av1_cb_offset, 0);
1254 hantro_reg_write(vpu, &av1_cr_mult, 0);
1255 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1256 hantro_reg_write(vpu, &av1_cr_offset, 0);
1257 hantro_reg_write(vpu, &av1_overlap_flag, 0);
1258 hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
1259 hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
1260 hantro_reg_write(vpu, &av1_random_seed, 0);
1261 hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
1262 return;
1263 }
1264
1265 ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
1266 ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1267 ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1268 luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
1269 cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1270 cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1271
1272 if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr ||
1273 !luma_grain_block || !cb_grain_block || !cr_grain_block) {
1274 pr_warn("Fail allocating memory for film grain parameters\n");
1275 goto alloc_fail;
1276 }
1277
1278 hantro_reg_write(vpu, &av1_apply_grain, 1);
1279
1280 hantro_reg_write(vpu, &av1_num_y_points_b,
1281 film_grain->num_y_points > 0);
1282 hantro_reg_write(vpu, &av1_num_cb_points_b,
1283 film_grain->num_cb_points > 0);
1284 hantro_reg_write(vpu, &av1_num_cr_points_b,
1285 film_grain->num_cr_points > 0);
1286 hantro_reg_write(vpu, &av1_scaling_shift,
1287 film_grain->grain_scaling_minus_8 + 8);
1288
1289 if (!scaling_from_luma) {
1290 hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
1291 hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
1292 hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
1293 hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
1294 hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
1295 hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
1296 } else {
1297 hantro_reg_write(vpu, &av1_cb_mult, 0);
1298 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1299 hantro_reg_write(vpu, &av1_cb_offset, 0);
1300 hantro_reg_write(vpu, &av1_cr_mult, 0);
1301 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1302 hantro_reg_write(vpu, &av1_cr_offset, 0);
1303 }
1304
1305 hantro_reg_write(vpu, &av1_overlap_flag,
1306 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
1307 hantro_reg_write(vpu, &av1_clip_to_restricted_range,
1308 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
1309 hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
1310 hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
1311
1312 rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
1313 film_grain->point_y_scaling,
1314 film_grain->num_y_points,
1315 fgmem->scaling_lut_y);
1316
1317 if (film_grain->flags &
1318 V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
1319 memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
1320 sizeof(*fgmem->scaling_lut_y) * 256);
1321 memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
1322 sizeof(*fgmem->scaling_lut_y) * 256);
1323 } else {
1324 rockchip_vpu981_av1_dec_init_scaling_function
1325 (film_grain->point_cb_value, film_grain->point_cb_scaling,
1326 film_grain->num_cb_points, fgmem->scaling_lut_cb);
1327 rockchip_vpu981_av1_dec_init_scaling_function
1328 (film_grain->point_cr_value, film_grain->point_cr_scaling,
1329 film_grain->num_cr_points, fgmem->scaling_lut_cr);
1330 }
1331
1332 for (i = 0; i < V4L2_AV1_AR_COEFFS_SIZE; i++) {
1333 if (i < 24)
1334 (*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
1335 (*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
1336 (*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
1337 }
1338
1339 ar_coeff_lag = film_grain->ar_coeff_lag;
1340 ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
1341 grain_scale_shift = film_grain->grain_scale_shift;
1342 bitdepth = ctx->bit_depth;
1343 grain_center = 128 << (bitdepth - 8);
1344 grain_min = 0 - grain_center;
1345 grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1346
1347 rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
1348 film_grain->num_y_points, grain_scale_shift,
1349 ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
1350 grain_min, grain_max, film_grain->grain_seed);
1351
1352 rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
1353 cr_grain_block, bitdepth,
1354 film_grain->num_y_points,
1355 film_grain->num_cb_points,
1356 film_grain->num_cr_points,
1357 grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1358 ar_coeffs_cr, ar_coeff_shift, grain_min,
1359 grain_max,
1360 scaling_from_luma,
1361 film_grain->grain_seed);
1362
1363 for (i = 0; i < 64; i++) {
1364 for (j = 0; j < 64; j++)
1365 fgmem->cropped_luma_grain_block[i * 64 + j] =
1366 (*luma_grain_block)[i + 9][j + 9];
1367 }
1368
1369 for (i = 0; i < 32; i++) {
1370 for (j = 0; j < 32; j++) {
1371 fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
1372 (*cb_grain_block)[i + 6][j + 6];
1373 fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1374 (*cr_grain_block)[i + 6][j + 6];
1375 }
1376 }
1377
1378 hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
1379
1380 alloc_fail:
1381 kfree(ar_coeffs_y);
1382 kfree(ar_coeffs_cb);
1383 kfree(ar_coeffs_cr);
1384 kfree(luma_grain_block);
1385 kfree(cb_grain_block);
1386 kfree(cr_grain_block);
1387 }
1388
rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx * ctx)1389 static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
1390 {
1391 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1392 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1393 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1394 const struct v4l2_av1_cdef *cdef = &frame->cdef;
1395 struct hantro_dev *vpu = ctx->dev;
1396 u32 luma_pri_strength = 0;
1397 u16 luma_sec_strength = 0;
1398 u32 chroma_pri_strength = 0;
1399 u16 chroma_sec_strength = 0;
1400 int i;
1401
1402 hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
1403 hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
1404
1405 for (i = 0; i < BIT(cdef->bits); i++) {
1406 luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
1407 if (cdef->y_sec_strength[i] == 4)
1408 luma_sec_strength |= 3 << (i * 2);
1409 else
1410 luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
1411
1412 chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
1413 if (cdef->uv_sec_strength[i] == 4)
1414 chroma_sec_strength |= 3 << (i * 2);
1415 else
1416 chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
1417 }
1418
1419 hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
1420 luma_pri_strength);
1421 hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
1422 luma_sec_strength);
1423 hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
1424 chroma_pri_strength);
1425 hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
1426 chroma_sec_strength);
1427
1428 hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
1429 }
1430
rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx * ctx)1431 static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
1432 {
1433 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1434 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1435 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1436 const struct v4l2_av1_loop_restoration *loop_restoration =
1437 &frame->loop_restoration;
1438 struct hantro_dev *vpu = ctx->dev;
1439 u16 lr_type = 0, lr_unit_size = 0;
1440 u8 restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
1441 int i;
1442
1443 if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
1444 restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
1445 restoration_unit_size[1] =
1446 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1447 restoration_unit_size[2] =
1448 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1449 }
1450
1451 for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1452 lr_type |=
1453 loop_restoration->frame_restoration_type[i] << (i * 2);
1454 lr_unit_size |= restoration_unit_size[i] << (i * 2);
1455 }
1456
1457 hantro_reg_write(vpu, &av1_lr_type, lr_type);
1458 hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
1459 hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
1460 }
1461
rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx * ctx)1462 static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
1463 {
1464 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1465 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1466 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1467 struct hantro_dev *vpu = ctx->dev;
1468 u8 superres_scale_denominator = SCALE_NUMERATOR;
1469 int superres_luma_step = RS_SCALE_SUBPEL_BITS;
1470 int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1471 int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1472 int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1473 int superres_init_luma_subpel_x = 0;
1474 int superres_init_chroma_subpel_x = 0;
1475 int superres_is_scaled = 0;
1476 int min_w = min_t(uint32_t, 16, frame->upscaled_width);
1477 int upscaled_luma, downscaled_luma;
1478 int downscaled_chroma, upscaled_chroma;
1479 int step_luma, step_chroma;
1480 int err_luma, err_chroma;
1481 int initial_luma, initial_chroma;
1482 int width = 0;
1483
1484 if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1485 superres_scale_denominator = frame->superres_denom;
1486
1487 if (superres_scale_denominator <= SCALE_NUMERATOR)
1488 goto set_regs;
1489
1490 width = (frame->upscaled_width * SCALE_NUMERATOR +
1491 (superres_scale_denominator / 2)) / superres_scale_denominator;
1492
1493 if (width < min_w)
1494 width = min_w;
1495
1496 if (width == frame->upscaled_width)
1497 goto set_regs;
1498
1499 superres_is_scaled = 1;
1500 upscaled_luma = frame->upscaled_width;
1501 downscaled_luma = width;
1502 downscaled_chroma = (downscaled_luma + 1) >> 1;
1503 upscaled_chroma = (upscaled_luma + 1) >> 1;
1504 step_luma =
1505 ((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
1506 (upscaled_luma / 2)) / upscaled_luma;
1507 step_chroma =
1508 ((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
1509 (upscaled_chroma / 2)) / upscaled_chroma;
1510 err_luma =
1511 (upscaled_luma * step_luma)
1512 - (downscaled_luma << RS_SCALE_SUBPEL_BITS);
1513 err_chroma =
1514 (upscaled_chroma * step_chroma)
1515 - (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
1516 initial_luma =
1517 ((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
1518 + upscaled_luma / 2)
1519 / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
1520 & RS_SCALE_SUBPEL_MASK;
1521 initial_chroma =
1522 ((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
1523 + upscaled_chroma / 2)
1524 / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
1525 & RS_SCALE_SUBPEL_MASK;
1526 superres_luma_step = step_luma;
1527 superres_chroma_step = step_chroma;
1528 superres_luma_step_invra =
1529 ((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
1530 / downscaled_luma;
1531 superres_chroma_step_invra =
1532 ((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
1533 / downscaled_chroma;
1534 superres_init_luma_subpel_x = initial_luma;
1535 superres_init_chroma_subpel_x = initial_chroma;
1536
1537 set_regs:
1538 hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
1539
1540 if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1541 hantro_reg_write(vpu, &av1_scale_denom_minus9,
1542 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
1543 else
1544 hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
1545
1546 hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
1547 hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
1548 hantro_reg_write(vpu, &av1_superres_luma_step_invra,
1549 superres_luma_step_invra);
1550 hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
1551 superres_chroma_step_invra);
1552 hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
1553 superres_init_luma_subpel_x);
1554 hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
1555 superres_init_chroma_subpel_x);
1556 hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
1557
1558 hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
1559 }
1560
rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx * ctx)1561 static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
1562 {
1563 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1564 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1565 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1566 struct hantro_dev *vpu = ctx->dev;
1567 int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1568 int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1569 int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
1570 - (frame->frame_width_minus_1 + 1);
1571 int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
1572 - (frame->frame_height_minus_1 + 1);
1573
1574 hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
1575 hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
1576 hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
1577 hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
1578
1579 rockchip_vpu981_av1_dec_set_superres_params(ctx);
1580 }
1581
rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx * ctx)1582 static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
1583 {
1584 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1585 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1586 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1587 struct hantro_dev *vpu = ctx->dev;
1588 bool use_ref_frame_mvs =
1589 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
1590 int cur_frame_offset = frame->order_hint;
1591 int alt_frame_offset = 0;
1592 int gld_frame_offset = 0;
1593 int bwd_frame_offset = 0;
1594 int alt2_frame_offset = 0;
1595 int refs_selected[3] = { 0, 0, 0 };
1596 int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1597 int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1598 int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1599 int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1600 int mf_types[3] = { 0, 0, 0 };
1601 int ref_stamp = 2;
1602 int ref_ind = 0;
1603 int rf, idx;
1604
1605 alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
1606 gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
1607 bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
1608 alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
1609
1610 idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
1611 if (idx >= 0) {
1612 int alt_frame_offset_in_lst =
1613 av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
1614 bool is_lst_overlay =
1615 (alt_frame_offset_in_lst == gld_frame_offset);
1616
1617 if (!is_lst_overlay) {
1618 int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1619 int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1620 bool lst_intra_only =
1621 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1622
1623 if (lst_mi_cols == cur_mi_cols &&
1624 lst_mi_rows == cur_mi_rows && !lst_intra_only) {
1625 mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
1626 refs_selected[ref_ind++] = LST_BUF_IDX;
1627 }
1628 }
1629 ref_stamp--;
1630 }
1631
1632 idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
1633 if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
1634 int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1635 int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1636 bool bwd_intra_only =
1637 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1638
1639 if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
1640 !bwd_intra_only) {
1641 mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
1642 refs_selected[ref_ind++] = BWD_BUF_IDX;
1643 ref_stamp--;
1644 }
1645 }
1646
1647 idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
1648 if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
1649 int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1650 int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1651 bool alt2_intra_only =
1652 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1653
1654 if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
1655 !alt2_intra_only) {
1656 mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
1657 refs_selected[ref_ind++] = ALT2_BUF_IDX;
1658 ref_stamp--;
1659 }
1660 }
1661
1662 idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
1663 if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
1664 ref_stamp >= 0) {
1665 int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1666 int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1667 bool alt_intra_only =
1668 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1669
1670 if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
1671 !alt_intra_only) {
1672 mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
1673 refs_selected[ref_ind++] = ALT_BUF_IDX;
1674 ref_stamp--;
1675 }
1676 }
1677
1678 idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
1679 if (idx >= 0 && ref_stamp >= 0) {
1680 int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1681 int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1682 bool lst2_intra_only =
1683 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1684
1685 if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
1686 !lst2_intra_only) {
1687 mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
1688 refs_selected[ref_ind++] = LST2_BUF_IDX;
1689 ref_stamp--;
1690 }
1691 }
1692
1693 for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
1694 idx = rockchip_vpu981_get_frame_index(ctx, rf);
1695 if (idx >= 0) {
1696 int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
1697
1698 cur_offset[rf] =
1699 rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
1700 cur_roffset[rf] =
1701 rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
1702 } else {
1703 cur_offset[rf] = 0;
1704 cur_roffset[rf] = 0;
1705 }
1706 }
1707
1708 hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
1709 hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
1710 hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
1711 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
1712
1713 hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
1714 hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
1715 hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
1716 hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
1717 hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
1718 hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
1719 hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
1720
1721 if (use_ref_frame_mvs && ref_ind > 0 &&
1722 cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1723 cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1724 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
1725 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
1726 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1727 int val;
1728
1729 hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
1730
1731 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1732 hantro_reg_write(vpu, &av1_mf1_last_offset, val);
1733
1734 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1735 hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
1736
1737 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1738 hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
1739
1740 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1741 hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
1742
1743 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1744 hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
1745
1746 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1747 hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
1748
1749 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1750 hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
1751 }
1752
1753 hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
1754 hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
1755 hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
1756 hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
1757 hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
1758 hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
1759 hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
1760
1761 if (use_ref_frame_mvs && ref_ind > 1 &&
1762 cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1763 cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1764 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
1765 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
1766 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1767 int val;
1768
1769 hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
1770
1771 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1772 hantro_reg_write(vpu, &av1_mf2_last_offset, val);
1773
1774 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1775 hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
1776
1777 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1778 hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
1779
1780 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1781 hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
1782
1783 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1784 hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
1785
1786 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1787 hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
1788
1789 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1790 hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
1791 }
1792
1793 hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
1794 hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
1795 hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
1796 hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
1797 hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
1798 hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
1799 hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
1800
1801 if (use_ref_frame_mvs && ref_ind > 2 &&
1802 cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1803 cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1804 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
1805 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
1806 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1807 int val;
1808
1809 hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
1810
1811 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1812 hantro_reg_write(vpu, &av1_mf3_last_offset, val);
1813
1814 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1815 hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
1816
1817 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1818 hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
1819
1820 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1821 hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
1822
1823 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1824 hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
1825
1826 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1827 hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
1828
1829 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1830 hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
1831 }
1832
1833 hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
1834 hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
1835 hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
1836 hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
1837 hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
1838 hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
1839 hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
1840
1841 hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
1842 hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
1843 hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
1844 hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
1845 hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
1846 hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
1847 hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
1848
1849 hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
1850 hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
1851 hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
1852 }
1853
rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx * ctx)1854 static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
1855 {
1856 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1857 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1858 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1859 int frame_type = frame->frame_type;
1860 bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
1861 int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
1862 struct hantro_dev *vpu = ctx->dev;
1863 int i, ref_frames = 0;
1864 bool scale_enable = false;
1865
1866 if (IS_INTRA(frame_type) && !allow_intrabc)
1867 return;
1868
1869 if (!allow_intrabc) {
1870 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1871 int idx = rockchip_vpu981_get_frame_index(ctx, i);
1872
1873 if (idx >= 0)
1874 ref_count[idx]++;
1875 }
1876
1877 for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
1878 if (ref_count[i])
1879 ref_frames++;
1880 }
1881 } else {
1882 ref_frames = 1;
1883 }
1884 hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
1885
1886 rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
1887
1888 for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1889 u32 ref = i - 1;
1890 int idx = 0;
1891 int width, height;
1892
1893 if (allow_intrabc) {
1894 idx = av1_dec->current_frame_index;
1895 width = frame->frame_width_minus_1 + 1;
1896 height = frame->frame_height_minus_1 + 1;
1897 } else {
1898 if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
1899 idx = rockchip_vpu981_get_frame_index(ctx, ref);
1900 width = av1_dec->frame_refs[idx].width;
1901 height = av1_dec->frame_refs[idx].height;
1902 }
1903
1904 scale_enable |=
1905 rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
1906 height);
1907
1908 rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
1909 av1_dec->ref_frame_sign_bias[i]);
1910 }
1911 hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
1912
1913 hantro_reg_write(vpu, &av1_ref0_gm_mode,
1914 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
1915 hantro_reg_write(vpu, &av1_ref1_gm_mode,
1916 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
1917 hantro_reg_write(vpu, &av1_ref2_gm_mode,
1918 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
1919 hantro_reg_write(vpu, &av1_ref3_gm_mode,
1920 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
1921 hantro_reg_write(vpu, &av1_ref4_gm_mode,
1922 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
1923 hantro_reg_write(vpu, &av1_ref5_gm_mode,
1924 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
1925 hantro_reg_write(vpu, &av1_ref6_gm_mode,
1926 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
1927
1928 rockchip_vpu981_av1_dec_set_other_frames(ctx);
1929 }
1930
rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx * ctx)1931 static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
1932 {
1933 struct hantro_dev *vpu = ctx->dev;
1934 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1935 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1936
1937 hantro_reg_write(vpu, &av1_skip_mode,
1938 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
1939 hantro_reg_write(vpu, &av1_tempor_mvp_e,
1940 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
1941 hantro_reg_write(vpu, &av1_delta_lf_res_log,
1942 ctrls->frame->loop_filter.delta_lf_res);
1943 hantro_reg_write(vpu, &av1_delta_lf_multi,
1944 !!(ctrls->frame->loop_filter.flags
1945 & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
1946 hantro_reg_write(vpu, &av1_delta_lf_present,
1947 !!(ctrls->frame->loop_filter.flags
1948 & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
1949 hantro_reg_write(vpu, &av1_disable_cdf_update,
1950 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
1951 hantro_reg_write(vpu, &av1_allow_warp,
1952 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
1953 hantro_reg_write(vpu, &av1_show_frame,
1954 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
1955 hantro_reg_write(vpu, &av1_switchable_motion_mode,
1956 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
1957 hantro_reg_write(vpu, &av1_enable_cdef,
1958 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
1959 hantro_reg_write(vpu, &av1_allow_masked_compound,
1960 !!(ctrls->sequence->flags
1961 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
1962 hantro_reg_write(vpu, &av1_allow_interintra,
1963 !!(ctrls->sequence->flags
1964 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
1965 hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
1966 !!(ctrls->sequence->flags
1967 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
1968 hantro_reg_write(vpu, &av1_allow_filter_intra,
1969 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
1970 hantro_reg_write(vpu, &av1_enable_jnt_comp,
1971 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
1972 hantro_reg_write(vpu, &av1_enable_dual_filter,
1973 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
1974 hantro_reg_write(vpu, &av1_reduced_tx_set_used,
1975 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
1976 hantro_reg_write(vpu, &av1_allow_screen_content_tools,
1977 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
1978 hantro_reg_write(vpu, &av1_allow_intrabc,
1979 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
1980
1981 if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
1982 hantro_reg_write(vpu, &av1_force_interger_mv, 0);
1983 else
1984 hantro_reg_write(vpu, &av1_force_interger_mv,
1985 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
1986
1987 hantro_reg_write(vpu, &av1_blackwhite_e, 0);
1988 hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
1989 hantro_reg_write(vpu, &av1_delta_q_present,
1990 !!(ctrls->frame->quantization.flags
1991 & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
1992
1993 hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
1994 hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
1995 hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
1996 hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
1997
1998 hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
1999 hantro_reg_write(vpu, &av1_high_prec_mv_e,
2000 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
2001 hantro_reg_write(vpu, &av1_comp_pred_mode,
2002 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
2003 hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
2004 hantro_reg_write(vpu, &av1_max_cb_size,
2005 (ctrls->sequence->flags
2006 & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
2007 hantro_reg_write(vpu, &av1_min_cb_size, 3);
2008
2009 hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
2010 hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
2011 hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
2012 hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
2013 hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
2014 hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
2015 hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
2016 hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
2017 hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
2018 hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
2019 hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
2020
2021 hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
2022 hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
2023 hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
2024 if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
2025 hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
2026 hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
2027 hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
2028 } else {
2029 hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
2030 hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
2031 hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
2032 }
2033
2034 hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
2035 hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
2036 hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
2037
2038 hantro_reg_write(vpu, &av1_skip_ref0,
2039 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
2040 hantro_reg_write(vpu, &av1_skip_ref1,
2041 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
2042
2043 hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
2044 hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
2045 }
2046
2047 static void
rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx * ctx,struct vb2_v4l2_buffer * vb2_src)2048 rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
2049 struct vb2_v4l2_buffer *vb2_src)
2050 {
2051 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2052 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
2053 const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
2054 ctrls->tile_group_entry;
2055 struct hantro_dev *vpu = ctx->dev;
2056 dma_addr_t src_dma;
2057 u32 src_len, src_buf_len;
2058 int start_bit, offset;
2059
2060 src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
2061 src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
2062 src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
2063
2064 start_bit = (group_entry[0].tile_offset & 0xf) * 8;
2065 offset = group_entry[0].tile_offset & ~0xf;
2066
2067 hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
2068 hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
2069 hantro_reg_write(vpu, &av1_stream_len, src_len);
2070 hantro_reg_write(vpu, &av1_strm_start_offset, 0);
2071 hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
2072 }
2073
2074 static void
rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx * ctx)2075 rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
2076 {
2077 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2078 struct hantro_dev *vpu = ctx->dev;
2079 struct hantro_decoded_buffer *dst;
2080 struct vb2_v4l2_buffer *vb2_dst;
2081 dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
2082 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
2083 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
2084
2085 vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
2086 dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
2087 luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
2088 chroma_addr = luma_addr + cr_offset;
2089 mv_addr = luma_addr + mv_offset;
2090
2091 hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
2092 hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
2093 hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
2094 }
2095
rockchip_vpu981_av1_dec_run(struct hantro_ctx * ctx)2096 int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
2097 {
2098 struct hantro_dev *vpu = ctx->dev;
2099 struct vb2_v4l2_buffer *vb2_src;
2100 int ret;
2101
2102 hantro_start_prepare_run(ctx);
2103
2104 ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
2105 if (ret)
2106 goto prepare_error;
2107
2108 vb2_src = hantro_get_src_buf(ctx);
2109 if (!vb2_src) {
2110 ret = -EINVAL;
2111 goto prepare_error;
2112 }
2113
2114 rockchip_vpu981_av1_dec_clean_refs(ctx);
2115 rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
2116
2117 rockchip_vpu981_av1_dec_set_parameters(ctx);
2118 rockchip_vpu981_av1_dec_set_global_model(ctx);
2119 rockchip_vpu981_av1_dec_set_tile_info(ctx);
2120 rockchip_vpu981_av1_dec_set_reference_frames(ctx);
2121 rockchip_vpu981_av1_dec_set_segmentation(ctx);
2122 rockchip_vpu981_av1_dec_set_loopfilter(ctx);
2123 rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
2124 rockchip_vpu981_av1_dec_set_cdef(ctx);
2125 rockchip_vpu981_av1_dec_set_lr(ctx);
2126 rockchip_vpu981_av1_dec_set_fgs(ctx);
2127 rockchip_vpu981_av1_dec_set_prob(ctx);
2128
2129 hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
2130 hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
2131 hantro_reg_write(vpu, &av1_write_mvs_e, 1);
2132 hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
2133 hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
2134
2135 hantro_reg_write(vpu, &av1_dec_abort_e, 0);
2136 hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
2137
2138 hantro_reg_write(vpu, &av1_dec_alignment, 64);
2139 hantro_reg_write(vpu, &av1_apf_disable, 0);
2140 hantro_reg_write(vpu, &av1_apf_threshold, 8);
2141 hantro_reg_write(vpu, &av1_dec_buswidth, 2);
2142 hantro_reg_write(vpu, &av1_dec_max_burst, 16);
2143 hantro_reg_write(vpu, &av1_error_conceal_e, 0);
2144 hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
2145 hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
2146
2147 hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
2148 hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
2149 hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
2150 hantro_reg_write(vpu, &av1_timeout_override_e, 1);
2151
2152 rockchip_vpu981_av1_dec_set_output_buffer(ctx);
2153 rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
2154
2155 hantro_end_prepare_run(ctx);
2156
2157 hantro_reg_write(vpu, &av1_dec_e, 1);
2158
2159 return 0;
2160
2161 prepare_error:
2162 hantro_end_prepare_run(ctx);
2163 hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
2164 return ret;
2165 }
2166
rockchip_vpu981_postproc_enable(struct hantro_ctx * ctx)2167 static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
2168 {
2169 struct hantro_dev *vpu = ctx->dev;
2170 int width = ctx->dst_fmt.width;
2171 int height = ctx->dst_fmt.height;
2172 struct vb2_v4l2_buffer *vb2_dst;
2173 size_t chroma_offset;
2174 dma_addr_t dst_dma;
2175
2176 vb2_dst = hantro_get_dst_buf(ctx);
2177
2178 dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
2179 chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
2180 ctx->dst_fmt.height;
2181
2182 /* enable post processor */
2183 hantro_reg_write(vpu, &av1_pp_out_e, 1);
2184 hantro_reg_write(vpu, &av1_pp_in_format, 0);
2185 hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
2186 hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
2187
2188 hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
2189 hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
2190 hantro_reg_write(vpu, &av1_pp_out_height, height);
2191 hantro_reg_write(vpu, &av1_pp_out_width, width);
2192 hantro_reg_write(vpu, &av1_pp_out_y_stride,
2193 ctx->dst_fmt.plane_fmt[0].bytesperline);
2194 hantro_reg_write(vpu, &av1_pp_out_c_stride,
2195 ctx->dst_fmt.plane_fmt[0].bytesperline);
2196 switch (ctx->dst_fmt.pixelformat) {
2197 case V4L2_PIX_FMT_P010:
2198 hantro_reg_write(vpu, &av1_pp_out_format, 1);
2199 break;
2200 case V4L2_PIX_FMT_NV12:
2201 hantro_reg_write(vpu, &av1_pp_out_format, 3);
2202 break;
2203 default:
2204 hantro_reg_write(vpu, &av1_pp_out_format, 0);
2205 }
2206
2207 hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
2208 hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
2209 hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
2210 hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
2211 hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
2212 hantro_reg_write(vpu, &av1_pp_up_level, 0);
2213 hantro_reg_write(vpu, &av1_pp_down_level, 0);
2214 hantro_reg_write(vpu, &av1_pp_exist, 0);
2215
2216 hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
2217 hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
2218 }
2219
rockchip_vpu981_postproc_disable(struct hantro_ctx * ctx)2220 static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
2221 {
2222 struct hantro_dev *vpu = ctx->dev;
2223
2224 /* disable post processor */
2225 hantro_reg_write(vpu, &av1_pp_out_e, 0);
2226 }
2227
2228 const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
2229 .enable = rockchip_vpu981_postproc_enable,
2230 .disable = rockchip_vpu981_postproc_disable,
2231 };
2232