xref: /openbmc/linux/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c (revision d699090510c3223641a23834b4710e2d4309a6ad)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2021 MediaTek Inc.
4  * Author: George Sun <george.sun@mediatek.com>
5  */
6 
7 #include <linux/module.h>
8 #include <linux/slab.h>
9 #include <media/videobuf2-dma-contig.h>
10 #include <media/v4l2-vp9.h>
11 
12 #include "../mtk_vcodec_dec.h"
13 #include "../../common/mtk_vcodec_intr.h"
14 #include "../vdec_drv_base.h"
15 #include "../vdec_drv_if.h"
16 #include "../vdec_vpu_if.h"
17 
18 /* reset_frame_context defined in VP9 spec */
19 #define VP9_RESET_FRAME_CONTEXT_NONE0 0
20 #define VP9_RESET_FRAME_CONTEXT_NONE1 1
21 #define VP9_RESET_FRAME_CONTEXT_SPEC 2
22 #define VP9_RESET_FRAME_CONTEXT_ALL 3
23 
24 #define VP9_TILE_BUF_SIZE 4096
25 #define VP9_PROB_BUF_SIZE 2560
26 #define VP9_COUNTS_BUF_SIZE 16384
27 
28 #define HDR_FLAG(x) (!!((hdr)->flags & V4L2_VP9_FRAME_FLAG_##x))
29 #define LF_FLAG(x) (!!((lf)->flags & V4L2_VP9_LOOP_FILTER_FLAG_##x))
30 #define SEG_FLAG(x) (!!((seg)->flags & V4L2_VP9_SEGMENTATION_FLAG_##x))
31 #define VP9_BAND_6(band) ((band) == 0 ? 3 : 6)
32 
33 /*
34  * struct vdec_vp9_slice_frame_ctx - vp9 prob tables footprint
35  */
36 struct vdec_vp9_slice_frame_ctx {
37 	struct {
38 		u8 probs[6][3];
39 		u8 padding[2];
40 	} coef_probs[4][2][2][6];
41 
42 	u8 y_mode_prob[4][16];
43 	u8 switch_interp_prob[4][16];
44 	u8 seg[32];  /* ignore */
45 	u8 comp_inter_prob[16];
46 	u8 comp_ref_prob[16];
47 	u8 single_ref_prob[5][2];
48 	u8 single_ref_prob_padding[6];
49 
50 	u8 joint[3];
51 	u8 joint_padding[13];
52 	struct {
53 		u8 sign;
54 		u8 classes[10];
55 		u8 padding[5];
56 	} sign_classes[2];
57 	struct {
58 		u8 class0[1];
59 		u8 bits[10];
60 		u8 padding[5];
61 	} class0_bits[2];
62 	struct {
63 		u8 class0_fp[2][3];
64 		u8 fp[3];
65 		u8 class0_hp;
66 		u8 hp;
67 		u8 padding[5];
68 	} class0_fp_hp[2];
69 
70 	u8 uv_mode_prob[10][16];
71 	u8 uv_mode_prob_padding[2][16];
72 
73 	u8 partition_prob[16][4];
74 
75 	u8 inter_mode_probs[7][4];
76 	u8 skip_probs[4];
77 
78 	u8 tx_p8x8[2][4];
79 	u8 tx_p16x16[2][4];
80 	u8 tx_p32x32[2][4];
81 	u8 intra_inter_prob[8];
82 };
83 
84 /*
85  * struct vdec_vp9_slice_frame_counts - vp9 counts tables footprint
86  */
87 struct vdec_vp9_slice_frame_counts {
88 	union {
89 		struct {
90 			u32 band_0[3];
91 			u32 padding0[1];
92 			u32 band_1_5[5][6];
93 			u32 padding1[2];
94 		} eob_branch[4][2][2];
95 		u32 eob_branch_space[256 * 4];
96 	};
97 
98 	struct {
99 		u32 band_0[3][4];
100 		u32 band_1_5[5][6][4];
101 	} coef_probs[4][2][2];
102 
103 	u32 intra_inter[4][2];
104 	u32 comp_inter[5][2];
105 	u32 comp_inter_padding[2];
106 	u32 comp_ref[5][2];
107 	u32 comp_ref_padding[2];
108 	u32 single_ref[5][2][2];
109 	u32 inter_mode[7][4];
110 	u32 y_mode[4][12];
111 	u32 uv_mode[10][10];
112 	u32 partition[16][4];
113 	u32 switchable_interp[4][4];
114 
115 	u32 tx_p8x8[2][2];
116 	u32 tx_p16x16[2][4];
117 	u32 tx_p32x32[2][4];
118 
119 	u32 skip[3][4];
120 
121 	u32 joint[4];
122 
123 	struct {
124 		u32 sign[2];
125 		u32 class0[2];
126 		u32 classes[12];
127 		u32 bits[10][2];
128 		u32 padding[4];
129 		u32 class0_fp[2][4];
130 		u32 fp[4];
131 		u32 class0_hp[2];
132 		u32 hp[2];
133 	} mvcomp[2];
134 
135 	u32 reserved[126][4];
136 };
137 
138 /**
139  * struct vdec_vp9_slice_counts_map - vp9 counts tables to map
140  *                                    v4l2_vp9_frame_symbol_counts
141  * @skip:	skip counts.
142  * @y_mode:	Y prediction mode counts.
143  * @filter:	interpolation filter counts.
144  * @mv_joint:	motion vector joint counts.
145  * @sign:	motion vector sign counts.
146  * @classes:	motion vector class counts.
147  * @class0:	motion vector class0 bit counts.
148  * @bits:	motion vector bits counts.
149  * @class0_fp:	motion vector class0 fractional bit counts.
150  * @fp:	motion vector fractional bit counts.
151  * @class0_hp:	motion vector class0 high precision fractional bit counts.
152  * @hp:	motion vector high precision fractional bit counts.
153  */
154 struct vdec_vp9_slice_counts_map {
155 	u32 skip[3][2];
156 	u32 y_mode[4][10];
157 	u32 filter[4][3];
158 	u32 sign[2][2];
159 	u32 classes[2][11];
160 	u32 class0[2][2];
161 	u32 bits[2][10][2];
162 	u32 class0_fp[2][2][4];
163 	u32 fp[2][4];
164 	u32 class0_hp[2][2];
165 	u32 hp[2][2];
166 };
167 
168 /*
169  * struct vdec_vp9_slice_uncompressed_header - vp9 uncompressed header syntax
170  *                                             used for decoding
171  */
172 struct vdec_vp9_slice_uncompressed_header {
173 	u8 profile;
174 	u8 last_frame_type;
175 	u8 frame_type;
176 
177 	u8 last_show_frame;
178 	u8 show_frame;
179 	u8 error_resilient_mode;
180 
181 	u8 bit_depth;
182 	u8 padding0[1];
183 	u16 last_frame_width;
184 	u16 last_frame_height;
185 	u16 frame_width;
186 	u16 frame_height;
187 
188 	u8 intra_only;
189 	u8 reset_frame_context;
190 	u8 ref_frame_sign_bias[4];
191 	u8 allow_high_precision_mv;
192 	u8 interpolation_filter;
193 
194 	u8 refresh_frame_context;
195 	u8 frame_parallel_decoding_mode;
196 	u8 frame_context_idx;
197 
198 	/* loop_filter_params */
199 	u8 loop_filter_level;
200 	u8 loop_filter_sharpness;
201 	u8 loop_filter_delta_enabled;
202 	s8 loop_filter_ref_deltas[4];
203 	s8 loop_filter_mode_deltas[2];
204 
205 	/* quantization_params */
206 	u8 base_q_idx;
207 	s8 delta_q_y_dc;
208 	s8 delta_q_uv_dc;
209 	s8 delta_q_uv_ac;
210 
211 	/* segmentation_params */
212 	u8 segmentation_enabled;
213 	u8 segmentation_update_map;
214 	u8 segmentation_tree_probs[7];
215 	u8 padding1[1];
216 	u8 segmentation_temporal_udpate;
217 	u8 segmentation_pred_prob[3];
218 	u8 segmentation_update_data;
219 	u8 segmentation_abs_or_delta_update;
220 	u8 feature_enabled[8];
221 	s16 feature_value[8][4];
222 
223 	/* tile_info */
224 	u8 tile_cols_log2;
225 	u8 tile_rows_log2;
226 	u8 padding2[2];
227 
228 	u16 uncompressed_header_size;
229 	u16 header_size_in_bytes;
230 
231 	/* LAT OUT, CORE IN */
232 	u32 dequant[8][4];
233 };
234 
235 /*
236  * struct vdec_vp9_slice_compressed_header - vp9 compressed header syntax
237  *                                           used for decoding.
238  */
239 struct vdec_vp9_slice_compressed_header {
240 	u8 tx_mode;
241 	u8 ref_mode;
242 	u8 comp_fixed_ref;
243 	u8 comp_var_ref[2];
244 	u8 padding[3];
245 };
246 
247 /*
248  * struct vdec_vp9_slice_tiles - vp9 tile syntax
249  */
250 struct vdec_vp9_slice_tiles {
251 	u32 size[4][64];
252 	u32 mi_rows[4];
253 	u32 mi_cols[64];
254 	u8 actual_rows;
255 	u8 padding[7];
256 };
257 
258 /*
259  * struct vdec_vp9_slice_reference - vp9 reference frame information
260  */
261 struct vdec_vp9_slice_reference {
262 	u16 frame_width;
263 	u16 frame_height;
264 	u8 bit_depth;
265 	u8 subsampling_x;
266 	u8 subsampling_y;
267 	u8 padding;
268 };
269 
270 /*
271  * struct vdec_vp9_slice_frame - vp9 syntax used for decoding
272  */
273 struct vdec_vp9_slice_frame {
274 	struct vdec_vp9_slice_uncompressed_header uh;
275 	struct vdec_vp9_slice_compressed_header ch;
276 	struct vdec_vp9_slice_tiles tiles;
277 	struct vdec_vp9_slice_reference ref[3];
278 };
279 
280 /*
281  * struct vdec_vp9_slice_init_vsi - VSI used to initialize instance
282  */
283 struct vdec_vp9_slice_init_vsi {
284 	unsigned int architecture;
285 	unsigned int reserved;
286 	u64 core_vsi;
287 	/* default frame context's position in MicroP */
288 	u64 default_frame_ctx;
289 };
290 
291 /*
292  * struct vdec_vp9_slice_mem - memory address and size
293  */
294 struct vdec_vp9_slice_mem {
295 	union {
296 		u64 buf;
297 		dma_addr_t dma_addr;
298 	};
299 	union {
300 		size_t size;
301 		dma_addr_t dma_addr_end;
302 		u64 padding;
303 	};
304 };
305 
306 /*
307  * struct vdec_vp9_slice_bs - input buffer for decoding
308  */
309 struct vdec_vp9_slice_bs {
310 	struct vdec_vp9_slice_mem buf;
311 	struct vdec_vp9_slice_mem frame;
312 };
313 
314 /*
315  * struct vdec_vp9_slice_fb - frame buffer for decoding
316  */
317 struct vdec_vp9_slice_fb {
318 	struct vdec_vp9_slice_mem y;
319 	struct vdec_vp9_slice_mem c;
320 };
321 
322 /*
323  * struct vdec_vp9_slice_state - decoding state
324  */
325 struct vdec_vp9_slice_state {
326 	int err;
327 	unsigned int full;
328 	unsigned int timeout;
329 	unsigned int perf;
330 
331 	unsigned int crc[12];
332 };
333 
334 /**
335  * struct vdec_vp9_slice_vsi - exchange decoding information
336  *                             between Main CPU and MicroP
337  *
338  * @bs:	input buffer
339  * @fb:	output buffer
340  * @ref:	3 reference buffers
341  * @mv:	mv working buffer
342  * @seg:	segmentation working buffer
343  * @tile:	tile buffer
344  * @prob:	prob table buffer, used to set/update prob table
345  * @counts:	counts table buffer, used to update prob table
346  * @ube:	general buffer
347  * @trans:	trans buffer position in general buffer
348  * @err_map:	error buffer
349  * @row_info:	row info buffer
350  * @frame:	decoding syntax
351  * @state:	decoding state
352  */
353 struct vdec_vp9_slice_vsi {
354 	/* used in LAT stage */
355 	struct vdec_vp9_slice_bs bs;
356 	/* used in Core stage */
357 	struct vdec_vp9_slice_fb fb;
358 	struct vdec_vp9_slice_fb ref[3];
359 
360 	struct vdec_vp9_slice_mem mv[2];
361 	struct vdec_vp9_slice_mem seg[2];
362 	struct vdec_vp9_slice_mem tile;
363 	struct vdec_vp9_slice_mem prob;
364 	struct vdec_vp9_slice_mem counts;
365 
366 	/* LAT stage's output, Core stage's input */
367 	struct vdec_vp9_slice_mem ube;
368 	struct vdec_vp9_slice_mem trans;
369 	struct vdec_vp9_slice_mem err_map;
370 	struct vdec_vp9_slice_mem row_info;
371 
372 	/* decoding parameters */
373 	struct vdec_vp9_slice_frame frame;
374 
375 	struct vdec_vp9_slice_state state;
376 };
377 
378 /**
379  * struct vdec_vp9_slice_pfc - per-frame context that contains a local vsi.
380  *                             pass it from lat to core
381  *
382  * @vsi:	local vsi. copy to/from remote vsi before/after decoding
383  * @ref_idx:	reference buffer index
384  * @seq:	picture sequence
385  * @state:	decoding state
386  */
387 struct vdec_vp9_slice_pfc {
388 	struct vdec_vp9_slice_vsi vsi;
389 
390 	u64 ref_idx[3];
391 
392 	int seq;
393 
394 	/* LAT/Core CRC */
395 	struct vdec_vp9_slice_state state[2];
396 };
397 
398 /*
399  * enum vdec_vp9_slice_resolution_level
400  */
401 enum vdec_vp9_slice_resolution_level {
402 	VP9_RES_NONE,
403 	VP9_RES_FHD,
404 	VP9_RES_4K,
405 	VP9_RES_8K,
406 };
407 
408 /*
409  * struct vdec_vp9_slice_ref - picture's width & height should kept
410  *                             for later decoding as reference picture
411  */
412 struct vdec_vp9_slice_ref {
413 	unsigned int width;
414 	unsigned int height;
415 };
416 
417 /**
418  * struct vdec_vp9_slice_instance - represent one vp9 instance
419  *
420  * @ctx:		pointer to codec's context
421  * @vpu:		VPU instance
422  * @seq:		global picture sequence
423  * @level:		level of current resolution
424  * @width:		width of last picture
425  * @height:		height of last picture
426  * @frame_type:	frame_type of last picture
427  * @irq:		irq to Main CPU or MicroP
428  * @show_frame:	show_frame of last picture
429  * @dpb:		picture information (width/height) for reference
430  * @mv:		mv working buffer
431  * @seg:		segmentation working buffer
432  * @tile:		tile buffer
433  * @prob:		prob table buffer, used to set/update prob table
434  * @counts:		counts table buffer, used to update prob table
435  * @frame_ctx:		4 frame context according to VP9 Spec
436  * @frame_ctx_helper:	4 frame context according to newest kernel spec
437  * @dirty:		state of each frame context
438  * @init_vsi:		vsi used for initialized VP9 instance
439  * @vsi:		vsi used for decoding/flush ...
440  * @core_vsi:		vsi used for Core stage
441  *
442  * @sc_pfc:		per frame context single core
443  * @counts_map:	used map to counts_helper
444  * @counts_helper:	counts table according to newest kernel spec
445  */
446 struct vdec_vp9_slice_instance {
447 	struct mtk_vcodec_dec_ctx *ctx;
448 	struct vdec_vpu_inst vpu;
449 
450 	int seq;
451 
452 	enum vdec_vp9_slice_resolution_level level;
453 
454 	/* for resolution change and get_pic_info */
455 	unsigned int width;
456 	unsigned int height;
457 
458 	/* for last_frame_type */
459 	unsigned int frame_type;
460 	unsigned int irq;
461 
462 	unsigned int show_frame;
463 
464 	/* maintain vp9 reference frame state */
465 	struct vdec_vp9_slice_ref dpb[VB2_MAX_FRAME];
466 
467 	/*
468 	 * normal working buffers
469 	 * mv[0]/seg[0]/tile/prob/counts is used for LAT
470 	 * mv[1]/seg[1] is used for CORE
471 	 */
472 	struct mtk_vcodec_mem mv[2];
473 	struct mtk_vcodec_mem seg[2];
474 	struct mtk_vcodec_mem tile;
475 	struct mtk_vcodec_mem prob;
476 	struct mtk_vcodec_mem counts;
477 
478 	/* 4 prob tables */
479 	struct vdec_vp9_slice_frame_ctx frame_ctx[4];
480 	/*4 helper tables */
481 	struct v4l2_vp9_frame_context frame_ctx_helper;
482 	unsigned char dirty[4];
483 
484 	/* MicroP vsi */
485 	union {
486 		struct vdec_vp9_slice_init_vsi *init_vsi;
487 		struct vdec_vp9_slice_vsi *vsi;
488 	};
489 	struct vdec_vp9_slice_vsi *core_vsi;
490 
491 	struct vdec_vp9_slice_pfc sc_pfc;
492 	struct vdec_vp9_slice_counts_map counts_map;
493 	struct v4l2_vp9_frame_symbol_counts counts_helper;
494 };
495 
496 /*
497  * all VP9 instances could share this default frame context.
498  */
499 static struct vdec_vp9_slice_frame_ctx *vdec_vp9_slice_default_frame_ctx;
500 static DEFINE_MUTEX(vdec_vp9_slice_frame_ctx_lock);
501 
502 static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf);
503 
vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance * instance)504 static int vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance *instance)
505 {
506 	struct vdec_vp9_slice_frame_ctx *remote_frame_ctx;
507 	struct vdec_vp9_slice_frame_ctx *frame_ctx;
508 	struct mtk_vcodec_dec_ctx *ctx;
509 	struct vdec_vp9_slice_init_vsi *vsi;
510 	int ret = 0;
511 
512 	ctx = instance->ctx;
513 	vsi = instance->vpu.vsi;
514 	if (!ctx || !vsi)
515 		return -EINVAL;
516 
517 	remote_frame_ctx = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
518 						     (u32)vsi->default_frame_ctx);
519 	if (!remote_frame_ctx) {
520 		mtk_vdec_err(ctx, "failed to map default frame ctx\n");
521 		return -EINVAL;
522 	}
523 
524 	mutex_lock(&vdec_vp9_slice_frame_ctx_lock);
525 	if (vdec_vp9_slice_default_frame_ctx)
526 		goto out;
527 
528 	frame_ctx = kmemdup(remote_frame_ctx, sizeof(*frame_ctx), GFP_KERNEL);
529 	if (!frame_ctx) {
530 		ret = -ENOMEM;
531 		goto out;
532 	}
533 
534 	vdec_vp9_slice_default_frame_ctx = frame_ctx;
535 
536 out:
537 	mutex_unlock(&vdec_vp9_slice_frame_ctx_lock);
538 
539 	return ret;
540 }
541 
vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi)542 static int vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance *instance,
543 					       struct vdec_vp9_slice_vsi *vsi)
544 {
545 	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
546 	enum vdec_vp9_slice_resolution_level level;
547 	/* super blocks */
548 	unsigned int max_sb_w;
549 	unsigned int max_sb_h;
550 	unsigned int max_w;
551 	unsigned int max_h;
552 	unsigned int w;
553 	unsigned int h;
554 	size_t size;
555 	int ret;
556 	int i;
557 
558 	w = vsi->frame.uh.frame_width;
559 	h = vsi->frame.uh.frame_height;
560 
561 	if (w > VCODEC_DEC_4K_CODED_WIDTH ||
562 	    h > VCODEC_DEC_4K_CODED_HEIGHT) {
563 		return -EINVAL;
564 	} else if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) {
565 		/* 4K */
566 		level = VP9_RES_4K;
567 		max_w = VCODEC_DEC_4K_CODED_WIDTH;
568 		max_h = VCODEC_DEC_4K_CODED_HEIGHT;
569 	} else {
570 		/* FHD */
571 		level = VP9_RES_FHD;
572 		max_w = MTK_VDEC_MAX_W;
573 		max_h = MTK_VDEC_MAX_H;
574 	}
575 
576 	if (level == instance->level)
577 		return 0;
578 
579 	mtk_vdec_debug(ctx, "resolution level changed, from %u to %u, %ux%u",
580 		       instance->level, level, w, h);
581 
582 	max_sb_w = DIV_ROUND_UP(max_w, 64);
583 	max_sb_h = DIV_ROUND_UP(max_h, 64);
584 	ret = -ENOMEM;
585 
586 	/*
587 	 * Lat-flush must wait core idle, otherwise core will
588 	 * use released buffers
589 	 */
590 
591 	size = (max_sb_w * max_sb_h + 2) * 576;
592 	for (i = 0; i < 2; i++) {
593 		if (instance->mv[i].va)
594 			mtk_vcodec_mem_free(ctx, &instance->mv[i]);
595 		instance->mv[i].size = size;
596 		if (mtk_vcodec_mem_alloc(ctx, &instance->mv[i]))
597 			goto err;
598 	}
599 
600 	size = (max_sb_w * max_sb_h * 32) + 256;
601 	for (i = 0; i < 2; i++) {
602 		if (instance->seg[i].va)
603 			mtk_vcodec_mem_free(ctx, &instance->seg[i]);
604 		instance->seg[i].size = size;
605 		if (mtk_vcodec_mem_alloc(ctx, &instance->seg[i]))
606 			goto err;
607 	}
608 
609 	if (!instance->tile.va) {
610 		instance->tile.size = VP9_TILE_BUF_SIZE;
611 		if (mtk_vcodec_mem_alloc(ctx, &instance->tile))
612 			goto err;
613 	}
614 
615 	if (!instance->prob.va) {
616 		instance->prob.size = VP9_PROB_BUF_SIZE;
617 		if (mtk_vcodec_mem_alloc(ctx, &instance->prob))
618 			goto err;
619 	}
620 
621 	if (!instance->counts.va) {
622 		instance->counts.size = VP9_COUNTS_BUF_SIZE;
623 		if (mtk_vcodec_mem_alloc(ctx, &instance->counts))
624 			goto err;
625 	}
626 
627 	instance->level = level;
628 	return 0;
629 
630 err:
631 	instance->level = VP9_RES_NONE;
632 	return ret;
633 }
634 
vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance * instance)635 static void vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance *instance)
636 {
637 	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
638 	int i;
639 
640 	for (i = 0; i < ARRAY_SIZE(instance->mv); i++) {
641 		if (instance->mv[i].va)
642 			mtk_vcodec_mem_free(ctx, &instance->mv[i]);
643 	}
644 	for (i = 0; i < ARRAY_SIZE(instance->seg); i++) {
645 		if (instance->seg[i].va)
646 			mtk_vcodec_mem_free(ctx, &instance->seg[i]);
647 	}
648 	if (instance->tile.va)
649 		mtk_vcodec_mem_free(ctx, &instance->tile);
650 	if (instance->prob.va)
651 		mtk_vcodec_mem_free(ctx, &instance->prob);
652 	if (instance->counts.va)
653 		mtk_vcodec_mem_free(ctx, &instance->counts);
654 
655 	instance->level = VP9_RES_NONE;
656 }
657 
vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi * vsi,struct vdec_vp9_slice_vsi * remote_vsi,int skip)658 static void vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi *vsi,
659 					   struct vdec_vp9_slice_vsi *remote_vsi,
660 					   int skip)
661 {
662 	struct vdec_vp9_slice_frame *rf;
663 	struct vdec_vp9_slice_frame *f;
664 
665 	/*
666 	 * compressed header
667 	 * dequant
668 	 * buffer position
669 	 * decode state
670 	 */
671 	if (!skip) {
672 		rf = &remote_vsi->frame;
673 		f = &vsi->frame;
674 		memcpy(&f->ch, &rf->ch, sizeof(f->ch));
675 		memcpy(&f->uh.dequant, &rf->uh.dequant, sizeof(f->uh.dequant));
676 		memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans));
677 	}
678 
679 	memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state));
680 }
681 
vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi * vsi,struct vdec_vp9_slice_vsi * remote_vsi)682 static void vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi *vsi,
683 					 struct vdec_vp9_slice_vsi *remote_vsi)
684 {
685 	memcpy(remote_vsi, vsi, sizeof(*vsi));
686 }
687 
vdec_vp9_slice_tile_offset(int idx,int mi_num,int tile_log2)688 static int vdec_vp9_slice_tile_offset(int idx, int mi_num, int tile_log2)
689 {
690 	int sbs = (mi_num + 7) >> 3;
691 	int offset = ((idx * sbs) >> tile_log2) << 3;
692 
693 	return min(offset, mi_num);
694 }
695 
696 static
vdec_vp9_slice_setup_single_from_src_to_dst(struct vdec_vp9_slice_instance * instance)697 int vdec_vp9_slice_setup_single_from_src_to_dst(struct vdec_vp9_slice_instance *instance)
698 {
699 	struct vb2_v4l2_buffer *src;
700 	struct vb2_v4l2_buffer *dst;
701 
702 	src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
703 	if (!src)
704 		return -EINVAL;
705 
706 	dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
707 	if (!dst)
708 		return -EINVAL;
709 
710 	v4l2_m2m_buf_copy_metadata(src, dst, true);
711 
712 	return 0;
713 }
714 
vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance * instance,struct vdec_lat_buf * lat_buf)715 static int vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance *instance,
716 						 struct vdec_lat_buf *lat_buf)
717 {
718 	struct vb2_v4l2_buffer *src;
719 	struct vb2_v4l2_buffer *dst;
720 
721 	src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
722 	if (!src)
723 		return -EINVAL;
724 
725 	lat_buf->src_buf_req = src->vb2_buf.req_obj.req;
726 
727 	dst = &lat_buf->ts_info;
728 	v4l2_m2m_buf_copy_metadata(src, dst, true);
729 	return 0;
730 }
731 
vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_uncompressed_header * uh,struct v4l2_ctrl_vp9_frame * hdr)732 static void vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance *instance,
733 				     struct vdec_vp9_slice_uncompressed_header *uh,
734 				     struct v4l2_ctrl_vp9_frame *hdr)
735 {
736 	int i;
737 
738 	uh->profile = hdr->profile;
739 	uh->last_frame_type = instance->frame_type;
740 	uh->frame_type = !HDR_FLAG(KEY_FRAME);
741 	uh->last_show_frame = instance->show_frame;
742 	uh->show_frame = HDR_FLAG(SHOW_FRAME);
743 	uh->error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
744 	uh->bit_depth = hdr->bit_depth;
745 	uh->last_frame_width = instance->width;
746 	uh->last_frame_height = instance->height;
747 	uh->frame_width = hdr->frame_width_minus_1 + 1;
748 	uh->frame_height = hdr->frame_height_minus_1 + 1;
749 	uh->intra_only = HDR_FLAG(INTRA_ONLY);
750 	/* map v4l2 enum to values defined in VP9 spec for firmware */
751 	switch (hdr->reset_frame_context) {
752 	case V4L2_VP9_RESET_FRAME_CTX_NONE:
753 		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
754 		break;
755 	case V4L2_VP9_RESET_FRAME_CTX_SPEC:
756 		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_SPEC;
757 		break;
758 	case V4L2_VP9_RESET_FRAME_CTX_ALL:
759 		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_ALL;
760 		break;
761 	default:
762 		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
763 		break;
764 	}
765 	/*
766 	 * ref_frame_sign_bias specifies the intended direction
767 	 * of the motion vector in time for each reference frame.
768 	 * - INTRA_FRAME = 0,
769 	 * - LAST_FRAME = 1,
770 	 * - GOLDEN_FRAME = 2,
771 	 * - ALTREF_FRAME = 3,
772 	 * ref_frame_sign_bias[INTRA_FRAME] is always 0
773 	 * and VDA only passes another 3 directions
774 	 */
775 	uh->ref_frame_sign_bias[0] = 0;
776 	for (i = 0; i < 3; i++)
777 		uh->ref_frame_sign_bias[i + 1] =
778 			!!(hdr->ref_frame_sign_bias & (1 << i));
779 	uh->allow_high_precision_mv = HDR_FLAG(ALLOW_HIGH_PREC_MV);
780 	uh->interpolation_filter = hdr->interpolation_filter;
781 	uh->refresh_frame_context = HDR_FLAG(REFRESH_FRAME_CTX);
782 	uh->frame_parallel_decoding_mode = HDR_FLAG(PARALLEL_DEC_MODE);
783 	uh->frame_context_idx = hdr->frame_context_idx;
784 
785 	/* tile info */
786 	uh->tile_cols_log2 = hdr->tile_cols_log2;
787 	uh->tile_rows_log2 = hdr->tile_rows_log2;
788 
789 	uh->uncompressed_header_size = hdr->uncompressed_header_size;
790 	uh->header_size_in_bytes = hdr->compressed_header_size;
791 }
792 
vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_uncompressed_header * uh,struct v4l2_ctrl_vp9_frame * hdr)793 static void vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance *instance,
794 					   struct vdec_vp9_slice_uncompressed_header *uh,
795 					   struct v4l2_ctrl_vp9_frame *hdr)
796 {
797 	int error_resilient_mode;
798 	int reset_frame_context;
799 	int key_frame;
800 	int intra_only;
801 	int i;
802 
803 	key_frame = HDR_FLAG(KEY_FRAME);
804 	intra_only = HDR_FLAG(INTRA_ONLY);
805 	error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
806 	reset_frame_context = uh->reset_frame_context;
807 
808 	/*
809 	 * according to "6.2 Uncompressed header syntax" in
810 	 * "VP9 Bitstream & Decoding Process Specification",
811 	 * reset @frame_context_idx when (FrameIsIntra || error_resilient_mode)
812 	 */
813 	if (key_frame || intra_only || error_resilient_mode) {
814 		/*
815 		 * @reset_frame_context specifies
816 		 * whether the frame context should be
817 		 * reset to default values:
818 		 * 0 or 1 means do not reset any frame context
819 		 * 2 resets just the context specified in the frame header
820 		 * 3 resets all contexts
821 		 */
822 		if (key_frame || error_resilient_mode ||
823 		    reset_frame_context == 3) {
824 			/* use default table */
825 			for (i = 0; i < 4; i++)
826 				instance->dirty[i] = 0;
827 		} else if (reset_frame_context == 2) {
828 			instance->dirty[uh->frame_context_idx] = 0;
829 		}
830 		uh->frame_context_idx = 0;
831 	}
832 }
833 
vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header * uh,struct v4l2_vp9_loop_filter * lf)834 static void vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header *uh,
835 					     struct v4l2_vp9_loop_filter *lf)
836 {
837 	int i;
838 
839 	uh->loop_filter_level = lf->level;
840 	uh->loop_filter_sharpness = lf->sharpness;
841 	uh->loop_filter_delta_enabled = LF_FLAG(DELTA_ENABLED);
842 	for (i = 0; i < 4; i++)
843 		uh->loop_filter_ref_deltas[i] = lf->ref_deltas[i];
844 	for (i = 0; i < 2; i++)
845 		uh->loop_filter_mode_deltas[i] = lf->mode_deltas[i];
846 }
847 
vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header * uh,struct v4l2_vp9_quantization * quant)848 static void vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header *uh,
849 					      struct v4l2_vp9_quantization *quant)
850 {
851 	uh->base_q_idx = quant->base_q_idx;
852 	uh->delta_q_y_dc = quant->delta_q_y_dc;
853 	uh->delta_q_uv_dc = quant->delta_q_uv_dc;
854 	uh->delta_q_uv_ac = quant->delta_q_uv_ac;
855 }
856 
vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header * uh,struct v4l2_vp9_segmentation * seg)857 static void vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header *uh,
858 					      struct v4l2_vp9_segmentation *seg)
859 {
860 	int i;
861 	int j;
862 
863 	uh->segmentation_enabled = SEG_FLAG(ENABLED);
864 	uh->segmentation_update_map = SEG_FLAG(UPDATE_MAP);
865 	for (i = 0; i < 7; i++)
866 		uh->segmentation_tree_probs[i] = seg->tree_probs[i];
867 	uh->segmentation_temporal_udpate = SEG_FLAG(TEMPORAL_UPDATE);
868 	for (i = 0; i < 3; i++)
869 		uh->segmentation_pred_prob[i] = seg->pred_probs[i];
870 	uh->segmentation_update_data = SEG_FLAG(UPDATE_DATA);
871 	uh->segmentation_abs_or_delta_update = SEG_FLAG(ABS_OR_DELTA_UPDATE);
872 	for (i = 0; i < 8; i++) {
873 		uh->feature_enabled[i] = seg->feature_enabled[i];
874 		for (j = 0; j < 4; j++)
875 			uh->feature_value[i][j] = seg->feature_data[i][j];
876 	}
877 }
878 
vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi * vsi,struct v4l2_ctrl_vp9_frame * hdr)879 static int vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi *vsi,
880 				     struct v4l2_ctrl_vp9_frame *hdr)
881 {
882 	unsigned int rows_log2;
883 	unsigned int cols_log2;
884 	unsigned int rows;
885 	unsigned int cols;
886 	unsigned int mi_rows;
887 	unsigned int mi_cols;
888 	struct vdec_vp9_slice_tiles *tiles;
889 	int offset;
890 	int start;
891 	int end;
892 	int i;
893 
894 	rows_log2 = hdr->tile_rows_log2;
895 	cols_log2 = hdr->tile_cols_log2;
896 	rows = 1 << rows_log2;
897 	cols = 1 << cols_log2;
898 	tiles = &vsi->frame.tiles;
899 	tiles->actual_rows = 0;
900 
901 	if (rows > 4 || cols > 64)
902 		return -EINVAL;
903 
904 	/* setup mi rows/cols information */
905 	mi_rows = (hdr->frame_height_minus_1 + 1 + 7) >> 3;
906 	mi_cols = (hdr->frame_width_minus_1 + 1 + 7) >> 3;
907 
908 	for (i = 0; i < rows; i++) {
909 		start = vdec_vp9_slice_tile_offset(i, mi_rows, rows_log2);
910 		end = vdec_vp9_slice_tile_offset(i + 1, mi_rows, rows_log2);
911 		offset = end - start;
912 		tiles->mi_rows[i] = (offset + 7) >> 3;
913 		if (tiles->mi_rows[i])
914 			tiles->actual_rows++;
915 	}
916 
917 	for (i = 0; i < cols; i++) {
918 		start = vdec_vp9_slice_tile_offset(i, mi_cols, cols_log2);
919 		end = vdec_vp9_slice_tile_offset(i + 1, mi_cols, cols_log2);
920 		offset = end - start;
921 		tiles->mi_cols[i] = (offset + 7) >> 3;
922 	}
923 
924 	return 0;
925 }
926 
vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi * vsi)927 static void vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi *vsi)
928 {
929 	memset(&vsi->state, 0, sizeof(vsi->state));
930 }
931 
vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc * pfc,struct v4l2_ctrl_vp9_frame * hdr)932 static void vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc *pfc,
933 					 struct v4l2_ctrl_vp9_frame *hdr)
934 {
935 	pfc->ref_idx[0] = hdr->last_frame_ts;
936 	pfc->ref_idx[1] = hdr->golden_frame_ts;
937 	pfc->ref_idx[2] = hdr->alt_frame_ts;
938 }
939 
vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_pfc * pfc)940 static int vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance *instance,
941 				    struct vdec_vp9_slice_pfc *pfc)
942 {
943 	struct v4l2_ctrl_vp9_frame *hdr;
944 	struct vdec_vp9_slice_uncompressed_header *uh;
945 	struct v4l2_ctrl *hdr_ctrl;
946 	struct vdec_vp9_slice_vsi *vsi;
947 	int ret;
948 
949 	/* frame header */
950 	hdr_ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_FRAME);
951 	if (!hdr_ctrl || !hdr_ctrl->p_cur.p)
952 		return -EINVAL;
953 
954 	hdr = hdr_ctrl->p_cur.p;
955 	vsi = &pfc->vsi;
956 	uh = &vsi->frame.uh;
957 
958 	/* setup vsi information */
959 	vdec_vp9_slice_setup_hdr(instance, uh, hdr);
960 	vdec_vp9_slice_setup_frame_ctx(instance, uh, hdr);
961 	vdec_vp9_slice_setup_loop_filter(uh, &hdr->lf);
962 	vdec_vp9_slice_setup_quantization(uh, &hdr->quant);
963 	vdec_vp9_slice_setup_segmentation(uh, &hdr->seg);
964 	ret = vdec_vp9_slice_setup_tile(vsi, hdr);
965 	if (ret)
966 		return ret;
967 	vdec_vp9_slice_setup_state(vsi);
968 
969 	/* core stage needs buffer index to get ref y/c ... */
970 	vdec_vp9_slice_setup_ref_idx(pfc, hdr);
971 
972 	pfc->seq = instance->seq;
973 	instance->seq++;
974 
975 	return 0;
976 }
977 
vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi,struct mtk_vcodec_mem * bs,struct vdec_lat_buf * lat_buf)978 static int vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance *instance,
979 					   struct vdec_vp9_slice_vsi *vsi,
980 					   struct mtk_vcodec_mem *bs,
981 					   struct vdec_lat_buf *lat_buf)
982 {
983 	int i;
984 
985 	vsi->bs.buf.dma_addr = bs->dma_addr;
986 	vsi->bs.buf.size = bs->size;
987 	vsi->bs.frame.dma_addr = bs->dma_addr;
988 	vsi->bs.frame.size = bs->size;
989 
990 	for (i = 0; i < 2; i++) {
991 		vsi->mv[i].dma_addr = instance->mv[i].dma_addr;
992 		vsi->mv[i].size = instance->mv[i].size;
993 	}
994 	for (i = 0; i < 2; i++) {
995 		vsi->seg[i].dma_addr = instance->seg[i].dma_addr;
996 		vsi->seg[i].size = instance->seg[i].size;
997 	}
998 	vsi->tile.dma_addr = instance->tile.dma_addr;
999 	vsi->tile.size = instance->tile.size;
1000 	vsi->prob.dma_addr = instance->prob.dma_addr;
1001 	vsi->prob.size = instance->prob.size;
1002 	vsi->counts.dma_addr = instance->counts.dma_addr;
1003 	vsi->counts.size = instance->counts.size;
1004 
1005 	vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
1006 	vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
1007 	vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr;
1008 	/* used to store trans end */
1009 	vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr;
1010 	vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
1011 	vsi->err_map.size = lat_buf->wdma_err_addr.size;
1012 
1013 	vsi->row_info.buf = 0;
1014 	vsi->row_info.size = 0;
1015 
1016 	return 0;
1017 }
1018 
vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi)1019 static int vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance *instance,
1020 					    struct vdec_vp9_slice_vsi *vsi)
1021 {
1022 	struct vdec_vp9_slice_frame_ctx *frame_ctx;
1023 	struct vdec_vp9_slice_uncompressed_header *uh;
1024 
1025 	uh = &vsi->frame.uh;
1026 
1027 	mtk_vdec_debug(instance->ctx, "ctx dirty %u idx %d\n",
1028 		       instance->dirty[uh->frame_context_idx],
1029 		       uh->frame_context_idx);
1030 
1031 	if (instance->dirty[uh->frame_context_idx])
1032 		frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
1033 	else
1034 		frame_ctx = vdec_vp9_slice_default_frame_ctx;
1035 	memcpy(instance->prob.va, frame_ctx, sizeof(*frame_ctx));
1036 
1037 	return 0;
1038 }
1039 
vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi,struct mtk_vcodec_mem * buf)1040 static void vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance *instance,
1041 					    struct vdec_vp9_slice_vsi *vsi,
1042 					    struct mtk_vcodec_mem *buf)
1043 {
1044 	struct vdec_vp9_slice_uncompressed_header *uh;
1045 
1046 	/* reset segment buffer */
1047 	uh = &vsi->frame.uh;
1048 	if (uh->frame_type == 0 ||
1049 	    uh->intra_only ||
1050 	    uh->error_resilient_mode ||
1051 	    uh->frame_width != instance->width ||
1052 	    uh->frame_height != instance->height) {
1053 		mtk_vdec_debug(instance->ctx, "reset seg\n");
1054 		memset(buf->va, 0, buf->size);
1055 	}
1056 }
1057 
1058 /*
1059  * parse tiles according to `6.4 Decode tiles syntax`
1060  * in "vp9-bitstream-specification"
1061  *
1062  * frame contains uncompress header, compressed header and several tiles.
1063  * this function parses tiles' position and size, stores them to tile buffer
1064  * for decoding.
1065  */
vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi,struct mtk_vcodec_mem * bs)1066 static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *instance,
1067 					    struct vdec_vp9_slice_vsi *vsi,
1068 					    struct mtk_vcodec_mem *bs)
1069 {
1070 	struct vdec_vp9_slice_uncompressed_header *uh;
1071 	unsigned int rows_log2;
1072 	unsigned int cols_log2;
1073 	unsigned int rows;
1074 	unsigned int cols;
1075 	unsigned int mi_row;
1076 	unsigned int mi_col;
1077 	unsigned int offset;
1078 	unsigned int pa;
1079 	unsigned int size;
1080 	struct vdec_vp9_slice_tiles *tiles;
1081 	unsigned char *pos;
1082 	unsigned char *end;
1083 	unsigned char *va;
1084 	unsigned int *tb;
1085 	int i;
1086 	int j;
1087 
1088 	uh = &vsi->frame.uh;
1089 	rows_log2 = uh->tile_rows_log2;
1090 	cols_log2 = uh->tile_cols_log2;
1091 	rows = 1 << rows_log2;
1092 	cols = 1 << cols_log2;
1093 
1094 	if (rows > 4 || cols > 64) {
1095 		mtk_vdec_err(instance->ctx, "tile_rows %u tile_cols %u\n", rows, cols);
1096 		return -EINVAL;
1097 	}
1098 
1099 	offset = uh->uncompressed_header_size +
1100 		uh->header_size_in_bytes;
1101 	if (bs->size <= offset) {
1102 		mtk_vdec_err(instance->ctx, "bs size %zu tile offset %u\n", bs->size, offset);
1103 		return -EINVAL;
1104 	}
1105 
1106 	tiles = &vsi->frame.tiles;
1107 	/* setup tile buffer */
1108 
1109 	va = (unsigned char *)bs->va;
1110 	pos = va + offset;
1111 	end = va + bs->size;
1112 	/* truncated */
1113 	pa = (unsigned int)bs->dma_addr + offset;
1114 	tb = instance->tile.va;
1115 	for (i = 0; i < rows; i++) {
1116 		for (j = 0; j < cols; j++) {
1117 			if (i == rows - 1 &&
1118 			    j == cols - 1) {
1119 				size = (unsigned int)(end - pos);
1120 			} else {
1121 				if (end - pos < 4)
1122 					return -EINVAL;
1123 
1124 				size = (pos[0] << 24) | (pos[1] << 16) |
1125 					(pos[2] << 8) | pos[3];
1126 				pos += 4;
1127 				pa += 4;
1128 				offset += 4;
1129 				if (end - pos < size)
1130 					return -EINVAL;
1131 			}
1132 			tiles->size[i][j] = size;
1133 			if (tiles->mi_rows[i]) {
1134 				*tb++ = (size << 3) + ((offset << 3) & 0x7f);
1135 				*tb++ = pa & ~0xf;
1136 				*tb++ = (pa << 3) & 0x7f;
1137 				mi_row = (tiles->mi_rows[i] - 1) & 0x1ff;
1138 				mi_col = (tiles->mi_cols[j] - 1) & 0x3f;
1139 				*tb++ = (mi_row << 6) + mi_col;
1140 			}
1141 			pos += size;
1142 			pa += size;
1143 			offset += size;
1144 		}
1145 	}
1146 
1147 	return 0;
1148 }
1149 
vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance * instance,struct mtk_vcodec_mem * bs,struct vdec_lat_buf * lat_buf,struct vdec_vp9_slice_pfc * pfc)1150 static int vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance *instance,
1151 				    struct mtk_vcodec_mem *bs,
1152 				    struct vdec_lat_buf *lat_buf,
1153 				    struct vdec_vp9_slice_pfc *pfc)
1154 {
1155 	struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1156 	int ret;
1157 
1158 	ret = vdec_vp9_slice_setup_lat_from_src_buf(instance, lat_buf);
1159 	if (ret)
1160 		goto err;
1161 
1162 	ret = vdec_vp9_slice_setup_pfc(instance, pfc);
1163 	if (ret)
1164 		goto err;
1165 
1166 	ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi);
1167 	if (ret)
1168 		goto err;
1169 
1170 	ret = vdec_vp9_slice_setup_lat_buffer(instance, vsi, bs, lat_buf);
1171 	if (ret)
1172 		goto err;
1173 
1174 	vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]);
1175 
1176 	/* setup prob/tile buffers for LAT */
1177 
1178 	ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi);
1179 	if (ret)
1180 		goto err;
1181 
1182 	ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs);
1183 	if (ret)
1184 		goto err;
1185 
1186 	return 0;
1187 
1188 err:
1189 	return ret;
1190 }
1191 
1192 /* clang stack usage explodes if this is inlined */
1193 static noinline_for_stack
vdec_vp9_slice_map_counts_eob_coef(unsigned int i,unsigned int j,unsigned int k,struct vdec_vp9_slice_frame_counts * counts,struct v4l2_vp9_frame_symbol_counts * counts_helper)1194 void vdec_vp9_slice_map_counts_eob_coef(unsigned int i, unsigned int j, unsigned int k,
1195 					struct vdec_vp9_slice_frame_counts *counts,
1196 					struct v4l2_vp9_frame_symbol_counts *counts_helper)
1197 {
1198 	u32 l = 0, m;
1199 
1200 	/*
1201 	 * helper eo -> mtk eo
1202 	 * helpre e1 -> mtk c3
1203 	 * helper c0 -> c0
1204 	 * helper c1 -> c1
1205 	 * helper c2 -> c2
1206 	 */
1207 	for (m = 0; m < 3; m++) {
1208 		counts_helper->coeff[i][j][k][l][m] =
1209 			(u32 (*)[3]) & counts->coef_probs[i][j][k].band_0[m];
1210 		counts_helper->eob[i][j][k][l][m][0] =
1211 			&counts->eob_branch[i][j][k].band_0[m];
1212 		counts_helper->eob[i][j][k][l][m][1] =
1213 			&counts->coef_probs[i][j][k].band_0[m][3];
1214 	}
1215 
1216 	for (l = 1; l < 6; l++) {
1217 		for (m = 0; m < 6; m++) {
1218 			counts_helper->coeff[i][j][k][l][m] =
1219 				(u32 (*)[3]) & counts->coef_probs[i][j][k].band_1_5[l - 1][m];
1220 			counts_helper->eob[i][j][k][l][m][0] =
1221 				&counts->eob_branch[i][j][k].band_1_5[l - 1][m];
1222 			counts_helper->eob[i][j][k][l][m][1] =
1223 				&counts->coef_probs[i][j][k].band_1_5[l - 1][m][3];
1224 		}
1225 	}
1226 }
1227 
vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map * counts_map,struct vdec_vp9_slice_frame_counts * counts,struct v4l2_vp9_frame_symbol_counts * counts_helper)1228 static void vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map *counts_map,
1229 					     struct vdec_vp9_slice_frame_counts *counts,
1230 					     struct v4l2_vp9_frame_symbol_counts *counts_helper)
1231 {
1232 	int i, j, k;
1233 
1234 	counts_helper->partition = &counts->partition;
1235 	counts_helper->intra_inter = &counts->intra_inter;
1236 	counts_helper->tx32p = &counts->tx_p32x32;
1237 	counts_helper->tx16p = &counts->tx_p16x16;
1238 	counts_helper->tx8p = &counts->tx_p8x8;
1239 	counts_helper->uv_mode = &counts->uv_mode;
1240 
1241 	counts_helper->comp = &counts->comp_inter;
1242 	counts_helper->comp_ref = &counts->comp_ref;
1243 	counts_helper->single_ref = &counts->single_ref;
1244 	counts_helper->mv_mode = &counts->inter_mode;
1245 	counts_helper->mv_joint = &counts->joint;
1246 
1247 	for (i = 0; i < ARRAY_SIZE(counts_map->skip); i++)
1248 		memcpy(counts_map->skip[i], counts->skip[i],
1249 		       sizeof(counts_map->skip[0]));
1250 	counts_helper->skip = &counts_map->skip;
1251 
1252 	for (i = 0; i < ARRAY_SIZE(counts_map->y_mode); i++)
1253 		memcpy(counts_map->y_mode[i], counts->y_mode[i],
1254 		       sizeof(counts_map->y_mode[0]));
1255 	counts_helper->y_mode = &counts_map->y_mode;
1256 
1257 	for (i = 0; i < ARRAY_SIZE(counts_map->filter); i++)
1258 		memcpy(counts_map->filter[i], counts->switchable_interp[i],
1259 		       sizeof(counts_map->filter[0]));
1260 	counts_helper->filter = &counts_map->filter;
1261 
1262 	for (i = 0; i < ARRAY_SIZE(counts_map->sign); i++)
1263 		memcpy(counts_map->sign[i], counts->mvcomp[i].sign,
1264 		       sizeof(counts_map->sign[0]));
1265 	counts_helper->sign = &counts_map->sign;
1266 
1267 	for (i = 0; i < ARRAY_SIZE(counts_map->classes); i++)
1268 		memcpy(counts_map->classes[i], counts->mvcomp[i].classes,
1269 		       sizeof(counts_map->classes[0]));
1270 	counts_helper->classes = &counts_map->classes;
1271 
1272 	for (i = 0; i < ARRAY_SIZE(counts_map->class0); i++)
1273 		memcpy(counts_map->class0[i], counts->mvcomp[i].class0,
1274 		       sizeof(counts_map->class0[0]));
1275 	counts_helper->class0 = &counts_map->class0;
1276 
1277 	for (i = 0; i < ARRAY_SIZE(counts_map->bits); i++)
1278 		for (j = 0; j < ARRAY_SIZE(counts_map->bits[0]); j++)
1279 			memcpy(counts_map->bits[i][j], counts->mvcomp[i].bits[j],
1280 			       sizeof(counts_map->bits[0][0]));
1281 	counts_helper->bits = &counts_map->bits;
1282 
1283 	for (i = 0; i < ARRAY_SIZE(counts_map->class0_fp); i++)
1284 		for (j = 0; j < ARRAY_SIZE(counts_map->class0_fp[0]); j++)
1285 			memcpy(counts_map->class0_fp[i][j], counts->mvcomp[i].class0_fp[j],
1286 			       sizeof(counts_map->class0_fp[0][0]));
1287 	counts_helper->class0_fp = &counts_map->class0_fp;
1288 
1289 	for (i = 0; i < ARRAY_SIZE(counts_map->fp); i++)
1290 		memcpy(counts_map->fp[i], counts->mvcomp[i].fp,
1291 		       sizeof(counts_map->fp[0]));
1292 	counts_helper->fp = &counts_map->fp;
1293 
1294 	for (i = 0; i < ARRAY_SIZE(counts_map->class0_hp); i++)
1295 		memcpy(counts_map->class0_hp[i], counts->mvcomp[i].class0_hp,
1296 		       sizeof(counts_map->class0_hp[0]));
1297 	counts_helper->class0_hp = &counts_map->class0_hp;
1298 
1299 	for (i = 0; i < ARRAY_SIZE(counts_map->hp); i++)
1300 		memcpy(counts_map->hp[i], counts->mvcomp[i].hp, sizeof(counts_map->hp[0]));
1301 
1302 	counts_helper->hp = &counts_map->hp;
1303 
1304 	for (i = 0; i < 4; i++)
1305 		for (j = 0; j < 2; j++)
1306 			for (k = 0; k < 2; k++)
1307 				vdec_vp9_slice_map_counts_eob_coef(i, j, k, counts, counts_helper);
1308 }
1309 
vdec_vp9_slice_map_to_coef(unsigned int i,unsigned int j,unsigned int k,struct vdec_vp9_slice_frame_ctx * frame_ctx,struct v4l2_vp9_frame_context * frame_ctx_helper)1310 static void vdec_vp9_slice_map_to_coef(unsigned int i, unsigned int j, unsigned int k,
1311 				       struct vdec_vp9_slice_frame_ctx *frame_ctx,
1312 				       struct v4l2_vp9_frame_context *frame_ctx_helper)
1313 {
1314 	u32 l, m;
1315 
1316 	for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) {
1317 		for (m = 0; m < VP9_BAND_6(l); m++) {
1318 			memcpy(frame_ctx_helper->coef[i][j][k][l][m],
1319 			       frame_ctx->coef_probs[i][j][k][l].probs[m],
1320 			       sizeof(frame_ctx_helper->coef[i][j][k][l][0]));
1321 		}
1322 	}
1323 }
1324 
vdec_vp9_slice_map_from_coef(unsigned int i,unsigned int j,unsigned int k,struct vdec_vp9_slice_frame_ctx * frame_ctx,struct v4l2_vp9_frame_context * frame_ctx_helper)1325 static void vdec_vp9_slice_map_from_coef(unsigned int i, unsigned int j, unsigned int k,
1326 					 struct vdec_vp9_slice_frame_ctx *frame_ctx,
1327 					 struct v4l2_vp9_frame_context *frame_ctx_helper)
1328 {
1329 	u32 l, m;
1330 
1331 	for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) {
1332 		for (m = 0; m < VP9_BAND_6(l); m++) {
1333 			memcpy(frame_ctx->coef_probs[i][j][k][l].probs[m],
1334 			       frame_ctx_helper->coef[i][j][k][l][m],
1335 			       sizeof(frame_ctx_helper->coef[i][j][k][l][0]));
1336 		}
1337 	}
1338 }
1339 
1340 static
vdec_vp9_slice_framectx_map_helper(bool frame_is_intra,struct vdec_vp9_slice_frame_ctx * pre_frame_ctx,struct vdec_vp9_slice_frame_ctx * frame_ctx,struct v4l2_vp9_frame_context * frame_ctx_helper)1341 void vdec_vp9_slice_framectx_map_helper(bool frame_is_intra,
1342 					struct vdec_vp9_slice_frame_ctx *pre_frame_ctx,
1343 					struct vdec_vp9_slice_frame_ctx *frame_ctx,
1344 					struct v4l2_vp9_frame_context *frame_ctx_helper)
1345 {
1346 	struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv;
1347 	u32 i, j, k;
1348 
1349 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++)
1350 		for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++)
1351 			for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++)
1352 				vdec_vp9_slice_map_to_coef(i, j, k, pre_frame_ctx,
1353 							   frame_ctx_helper);
1354 
1355 	/*
1356 	 * use previous prob when frame is not intra or
1357 	 * we should use the prob updated by the compressed header parse
1358 	 */
1359 	if (!frame_is_intra)
1360 		frame_ctx = pre_frame_ctx;
1361 
1362 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++)
1363 		memcpy(frame_ctx_helper->tx8[i], frame_ctx->tx_p8x8[i],
1364 		       sizeof(frame_ctx_helper->tx8[0]));
1365 
1366 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++)
1367 		memcpy(frame_ctx_helper->tx16[i], frame_ctx->tx_p16x16[i],
1368 		       sizeof(frame_ctx_helper->tx16[0]));
1369 
1370 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++)
1371 		memcpy(frame_ctx_helper->tx32[i], frame_ctx->tx_p32x32[i],
1372 		       sizeof(frame_ctx_helper->tx32[0]));
1373 
1374 	memcpy(frame_ctx_helper->skip, frame_ctx->skip_probs, sizeof(frame_ctx_helper->skip));
1375 
1376 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++)
1377 		memcpy(frame_ctx_helper->inter_mode[i], frame_ctx->inter_mode_probs[i],
1378 		       sizeof(frame_ctx_helper->inter_mode[0]));
1379 
1380 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++)
1381 		memcpy(frame_ctx_helper->interp_filter[i], frame_ctx->switch_interp_prob[i],
1382 		       sizeof(frame_ctx_helper->interp_filter[0]));
1383 
1384 	memcpy(frame_ctx_helper->is_inter, frame_ctx->intra_inter_prob,
1385 	       sizeof(frame_ctx_helper->is_inter));
1386 
1387 	memcpy(frame_ctx_helper->comp_mode, frame_ctx->comp_inter_prob,
1388 	       sizeof(frame_ctx_helper->comp_mode));
1389 
1390 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++)
1391 		memcpy(frame_ctx_helper->single_ref[i], frame_ctx->single_ref_prob[i],
1392 		       sizeof(frame_ctx_helper->single_ref[0]));
1393 
1394 	memcpy(frame_ctx_helper->comp_ref, frame_ctx->comp_ref_prob,
1395 	       sizeof(frame_ctx_helper->comp_ref));
1396 
1397 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++)
1398 		memcpy(frame_ctx_helper->y_mode[i], frame_ctx->y_mode_prob[i],
1399 		       sizeof(frame_ctx_helper->y_mode[0]));
1400 
1401 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++)
1402 		memcpy(frame_ctx_helper->uv_mode[i], frame_ctx->uv_mode_prob[i],
1403 		       sizeof(frame_ctx_helper->uv_mode[0]));
1404 
1405 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++)
1406 		memcpy(frame_ctx_helper->partition[i], frame_ctx->partition_prob[i],
1407 		       sizeof(frame_ctx_helper->partition[0]));
1408 
1409 	memcpy(mv->joint, frame_ctx->joint, sizeof(mv->joint));
1410 
1411 	for (i = 0; i < ARRAY_SIZE(mv->sign); i++)
1412 		mv->sign[i] = frame_ctx->sign_classes[i].sign;
1413 
1414 	for (i = 0; i < ARRAY_SIZE(mv->classes); i++)
1415 		memcpy(mv->classes[i], frame_ctx->sign_classes[i].classes,
1416 		       sizeof(mv->classes[i]));
1417 
1418 	for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++)
1419 		mv->class0_bit[i] = frame_ctx->class0_bits[i].class0[0];
1420 
1421 	for (i = 0; i < ARRAY_SIZE(mv->bits); i++)
1422 		memcpy(mv->bits[i], frame_ctx->class0_bits[i].bits, sizeof(mv->bits[0]));
1423 
1424 	for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++)
1425 		for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++)
1426 			memcpy(mv->class0_fr[i][j], frame_ctx->class0_fp_hp[i].class0_fp[j],
1427 			       sizeof(mv->class0_fr[0][0]));
1428 
1429 	for (i = 0; i < ARRAY_SIZE(mv->fr); i++)
1430 		memcpy(mv->fr[i], frame_ctx->class0_fp_hp[i].fp, sizeof(mv->fr[0]));
1431 
1432 	for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++)
1433 		mv->class0_hp[i] = frame_ctx->class0_fp_hp[i].class0_hp;
1434 
1435 	for (i = 0; i < ARRAY_SIZE(mv->hp); i++)
1436 		mv->hp[i] = frame_ctx->class0_fp_hp[i].hp;
1437 }
1438 
vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context * frame_ctx_helper,struct vdec_vp9_slice_frame_ctx * frame_ctx)1439 static void vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context *frame_ctx_helper,
1440 					       struct vdec_vp9_slice_frame_ctx *frame_ctx)
1441 {
1442 	struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv;
1443 	u32 i, j, k;
1444 
1445 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++)
1446 		memcpy(frame_ctx->tx_p8x8[i], frame_ctx_helper->tx8[i],
1447 		       sizeof(frame_ctx_helper->tx8[0]));
1448 
1449 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++)
1450 		memcpy(frame_ctx->tx_p16x16[i], frame_ctx_helper->tx16[i],
1451 		       sizeof(frame_ctx_helper->tx16[0]));
1452 
1453 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++)
1454 		memcpy(frame_ctx->tx_p32x32[i], frame_ctx_helper->tx32[i],
1455 		       sizeof(frame_ctx_helper->tx32[0]));
1456 
1457 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++)
1458 		for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++)
1459 			for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++)
1460 				vdec_vp9_slice_map_from_coef(i, j, k, frame_ctx,
1461 							     frame_ctx_helper);
1462 
1463 	memcpy(frame_ctx->skip_probs, frame_ctx_helper->skip, sizeof(frame_ctx_helper->skip));
1464 
1465 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++)
1466 		memcpy(frame_ctx->inter_mode_probs[i], frame_ctx_helper->inter_mode[i],
1467 		       sizeof(frame_ctx_helper->inter_mode[0]));
1468 
1469 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++)
1470 		memcpy(frame_ctx->switch_interp_prob[i], frame_ctx_helper->interp_filter[i],
1471 		       sizeof(frame_ctx_helper->interp_filter[0]));
1472 
1473 	memcpy(frame_ctx->intra_inter_prob, frame_ctx_helper->is_inter,
1474 	       sizeof(frame_ctx_helper->is_inter));
1475 
1476 	memcpy(frame_ctx->comp_inter_prob, frame_ctx_helper->comp_mode,
1477 	       sizeof(frame_ctx_helper->comp_mode));
1478 
1479 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++)
1480 		memcpy(frame_ctx->single_ref_prob[i], frame_ctx_helper->single_ref[i],
1481 		       sizeof(frame_ctx_helper->single_ref[0]));
1482 
1483 	memcpy(frame_ctx->comp_ref_prob, frame_ctx_helper->comp_ref,
1484 	       sizeof(frame_ctx_helper->comp_ref));
1485 
1486 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++)
1487 		memcpy(frame_ctx->y_mode_prob[i], frame_ctx_helper->y_mode[i],
1488 		       sizeof(frame_ctx_helper->y_mode[0]));
1489 
1490 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++)
1491 		memcpy(frame_ctx->uv_mode_prob[i], frame_ctx_helper->uv_mode[i],
1492 		       sizeof(frame_ctx_helper->uv_mode[0]));
1493 
1494 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++)
1495 		memcpy(frame_ctx->partition_prob[i], frame_ctx_helper->partition[i],
1496 		       sizeof(frame_ctx_helper->partition[0]));
1497 
1498 	memcpy(frame_ctx->joint, mv->joint, sizeof(mv->joint));
1499 
1500 	for (i = 0; i < ARRAY_SIZE(mv->sign); i++)
1501 		frame_ctx->sign_classes[i].sign = mv->sign[i];
1502 
1503 	for (i = 0; i < ARRAY_SIZE(mv->classes); i++)
1504 		memcpy(frame_ctx->sign_classes[i].classes, mv->classes[i],
1505 		       sizeof(mv->classes[i]));
1506 
1507 	for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++)
1508 		frame_ctx->class0_bits[i].class0[0] = mv->class0_bit[i];
1509 
1510 	for (i = 0; i < ARRAY_SIZE(mv->bits); i++)
1511 		memcpy(frame_ctx->class0_bits[i].bits, mv->bits[i], sizeof(mv->bits[0]));
1512 
1513 	for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++)
1514 		for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++)
1515 			memcpy(frame_ctx->class0_fp_hp[i].class0_fp[j], mv->class0_fr[i][j],
1516 			       sizeof(mv->class0_fr[0][0]));
1517 
1518 	for (i = 0; i < ARRAY_SIZE(mv->fr); i++)
1519 		memcpy(frame_ctx->class0_fp_hp[i].fp, mv->fr[i], sizeof(mv->fr[0]));
1520 
1521 	for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++)
1522 		frame_ctx->class0_fp_hp[i].class0_hp = mv->class0_hp[i];
1523 
1524 	for (i = 0; i < ARRAY_SIZE(mv->hp); i++)
1525 		frame_ctx->class0_fp_hp[i].hp = mv->hp[i];
1526 }
1527 
vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi)1528 static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance,
1529 				      struct vdec_vp9_slice_vsi *vsi)
1530 {
1531 	struct vdec_vp9_slice_frame_ctx *pre_frame_ctx;
1532 	struct v4l2_vp9_frame_context *pre_frame_ctx_helper;
1533 	struct vdec_vp9_slice_frame_ctx *frame_ctx;
1534 	struct vdec_vp9_slice_frame_counts *counts;
1535 	struct v4l2_vp9_frame_symbol_counts *counts_helper;
1536 	struct vdec_vp9_slice_uncompressed_header *uh;
1537 	bool frame_is_intra;
1538 	bool use_128;
1539 
1540 	uh = &vsi->frame.uh;
1541 	pre_frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
1542 	pre_frame_ctx_helper = &instance->frame_ctx_helper;
1543 	frame_ctx = (struct vdec_vp9_slice_frame_ctx *)instance->prob.va;
1544 	counts = (struct vdec_vp9_slice_frame_counts *)instance->counts.va;
1545 	counts_helper = &instance->counts_helper;
1546 
1547 	if (!uh->refresh_frame_context)
1548 		return 0;
1549 
1550 	if (!uh->frame_parallel_decoding_mode) {
1551 		vdec_vp9_slice_counts_map_helper(&instance->counts_map, counts, counts_helper);
1552 
1553 		frame_is_intra = !vsi->frame.uh.frame_type || vsi->frame.uh.intra_only;
1554 		/* check default prob */
1555 		if (!instance->dirty[uh->frame_context_idx])
1556 			vdec_vp9_slice_framectx_map_helper(frame_is_intra,
1557 							   vdec_vp9_slice_default_frame_ctx,
1558 							   frame_ctx,
1559 							   pre_frame_ctx_helper);
1560 		else
1561 			vdec_vp9_slice_framectx_map_helper(frame_is_intra,
1562 							   pre_frame_ctx,
1563 							   frame_ctx,
1564 							   pre_frame_ctx_helper);
1565 
1566 		use_128 = !frame_is_intra && !vsi->frame.uh.last_frame_type;
1567 		v4l2_vp9_adapt_coef_probs(pre_frame_ctx_helper,
1568 					  counts_helper,
1569 					  use_128,
1570 					  frame_is_intra);
1571 		if (!frame_is_intra)
1572 			v4l2_vp9_adapt_noncoef_probs(pre_frame_ctx_helper,
1573 						     counts_helper,
1574 						     V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE,
1575 						     vsi->frame.uh.interpolation_filter,
1576 						     vsi->frame.ch.tx_mode,
1577 						     vsi->frame.uh.allow_high_precision_mv ?
1578 						     V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV : 0);
1579 		vdec_vp9_slice_helper_map_framectx(pre_frame_ctx_helper, pre_frame_ctx);
1580 	} else {
1581 		memcpy(pre_frame_ctx, frame_ctx, sizeof(*frame_ctx));
1582 	}
1583 
1584 	instance->dirty[uh->frame_context_idx] = 1;
1585 
1586 	return 0;
1587 }
1588 
vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_pfc * pfc)1589 static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance,
1590 					struct vdec_vp9_slice_pfc *pfc)
1591 {
1592 	struct vdec_vp9_slice_vsi *vsi;
1593 
1594 	vsi = &pfc->vsi;
1595 	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
1596 
1597 	mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n",
1598 		       pfc->seq, vsi->state.crc[0], vsi->state.crc[1],
1599 		       vsi->state.crc[2], vsi->state.crc[3]);
1600 	mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n",
1601 		       pfc->seq, vsi->state.crc[4], vsi->state.crc[5],
1602 		       vsi->state.crc[6], vsi->state.crc[7]);
1603 
1604 	vdec_vp9_slice_update_prob(instance, vsi);
1605 
1606 	instance->width = vsi->frame.uh.frame_width;
1607 	instance->height = vsi->frame.uh.frame_height;
1608 	instance->frame_type = vsi->frame.uh.frame_type;
1609 	instance->show_frame = vsi->frame.uh.show_frame;
1610 
1611 	return 0;
1612 }
1613 
vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance * instance,struct vdec_lat_buf * lat_buf,struct vdec_vp9_slice_pfc * pfc)1614 static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance,
1615 				     struct vdec_lat_buf *lat_buf,
1616 				     struct vdec_vp9_slice_pfc *pfc)
1617 {
1618 	struct vdec_vp9_slice_vsi *vsi;
1619 
1620 	vsi = &pfc->vsi;
1621 	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
1622 
1623 	mtk_vdec_debug(instance->ctx, "Frame %u LAT CRC 0x%08x %lx %lx\n",
1624 		       pfc->seq, vsi->state.crc[0],
1625 		       (unsigned long)vsi->trans.dma_addr,
1626 		       (unsigned long)vsi->trans.dma_addr_end);
1627 
1628 	/* buffer full, need to re-decode */
1629 	if (vsi->state.full) {
1630 		/* buffer not enough */
1631 		if (vsi->trans.dma_addr_end - vsi->trans.dma_addr ==
1632 			vsi->ube.size)
1633 			return -ENOMEM;
1634 		return -EAGAIN;
1635 	}
1636 
1637 	vdec_vp9_slice_update_prob(instance, vsi);
1638 
1639 	instance->width = vsi->frame.uh.frame_width;
1640 	instance->height = vsi->frame.uh.frame_height;
1641 	instance->frame_type = vsi->frame.uh.frame_type;
1642 	instance->show_frame = vsi->frame.uh.show_frame;
1643 
1644 	return 0;
1645 }
1646 
vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance * instance,struct vdec_lat_buf * lat_buf)1647 static int vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance *instance,
1648 						struct vdec_lat_buf *lat_buf)
1649 {
1650 	struct vb2_v4l2_buffer *dst;
1651 
1652 	dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
1653 	if (!dst)
1654 		return -EINVAL;
1655 
1656 	v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true);
1657 	return 0;
1658 }
1659 
vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_pfc * pfc,struct vdec_vp9_slice_vsi * vsi,struct vdec_fb * fb,struct vdec_lat_buf * lat_buf)1660 static int vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance *instance,
1661 					    struct vdec_vp9_slice_pfc *pfc,
1662 					    struct vdec_vp9_slice_vsi *vsi,
1663 					    struct vdec_fb *fb,
1664 					    struct vdec_lat_buf *lat_buf)
1665 {
1666 	struct vb2_buffer *vb;
1667 	struct vb2_queue *vq;
1668 	struct vdec_vp9_slice_reference *ref;
1669 	int plane;
1670 	int size;
1671 	int w;
1672 	int h;
1673 	int i;
1674 
1675 	plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
1676 	w = vsi->frame.uh.frame_width;
1677 	h = vsi->frame.uh.frame_height;
1678 	size = ALIGN(w, 64) * ALIGN(h, 64);
1679 
1680 	/* frame buffer */
1681 	vsi->fb.y.dma_addr = fb->base_y.dma_addr;
1682 	if (plane == 1)
1683 		vsi->fb.c.dma_addr = fb->base_y.dma_addr + size;
1684 	else
1685 		vsi->fb.c.dma_addr = fb->base_c.dma_addr;
1686 
1687 	/* reference buffers */
1688 	vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx,
1689 			     V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
1690 	if (!vq)
1691 		return -EINVAL;
1692 
1693 	/* get current output buffer */
1694 	vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf;
1695 	if (!vb)
1696 		return -EINVAL;
1697 
1698 	/* update internal buffer's width/height */
1699 	for (i = 0; i < vq->num_buffers; i++) {
1700 		if (vb == vq->bufs[i]) {
1701 			instance->dpb[i].width = w;
1702 			instance->dpb[i].height = h;
1703 			break;
1704 		}
1705 	}
1706 
1707 	/*
1708 	 * get buffer's width/height from instance
1709 	 * get buffer address from vb2buf
1710 	 */
1711 	for (i = 0; i < 3; i++) {
1712 		ref = &vsi->frame.ref[i];
1713 		vb = vb2_find_buffer(vq, pfc->ref_idx[i]);
1714 		if (!vb) {
1715 			ref->frame_width = w;
1716 			ref->frame_height = h;
1717 			memset(&vsi->ref[i], 0, sizeof(vsi->ref[i]));
1718 		} else {
1719 			int idx = vb->index;
1720 
1721 			ref->frame_width = instance->dpb[idx].width;
1722 			ref->frame_height = instance->dpb[idx].height;
1723 			vsi->ref[i].y.dma_addr =
1724 				vb2_dma_contig_plane_dma_addr(vb, 0);
1725 			if (plane == 1)
1726 				vsi->ref[i].c.dma_addr =
1727 					vsi->ref[i].y.dma_addr + size;
1728 			else
1729 				vsi->ref[i].c.dma_addr =
1730 					vb2_dma_contig_plane_dma_addr(vb, 1);
1731 		}
1732 	}
1733 
1734 	return 0;
1735 }
1736 
vdec_vp9_slice_setup_single_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_pfc * pfc,struct vdec_vp9_slice_vsi * vsi,struct mtk_vcodec_mem * bs,struct vdec_fb * fb)1737 static void vdec_vp9_slice_setup_single_buffer(struct vdec_vp9_slice_instance *instance,
1738 					       struct vdec_vp9_slice_pfc *pfc,
1739 					       struct vdec_vp9_slice_vsi *vsi,
1740 					       struct mtk_vcodec_mem *bs,
1741 					       struct vdec_fb *fb)
1742 {
1743 	int i;
1744 
1745 	vsi->bs.buf.dma_addr = bs->dma_addr;
1746 	vsi->bs.buf.size = bs->size;
1747 	vsi->bs.frame.dma_addr = bs->dma_addr;
1748 	vsi->bs.frame.size = bs->size;
1749 
1750 	for (i = 0; i < 2; i++) {
1751 		vsi->mv[i].dma_addr = instance->mv[i].dma_addr;
1752 		vsi->mv[i].size = instance->mv[i].size;
1753 	}
1754 	for (i = 0; i < 2; i++) {
1755 		vsi->seg[i].dma_addr = instance->seg[i].dma_addr;
1756 		vsi->seg[i].size = instance->seg[i].size;
1757 	}
1758 	vsi->tile.dma_addr = instance->tile.dma_addr;
1759 	vsi->tile.size = instance->tile.size;
1760 	vsi->prob.dma_addr = instance->prob.dma_addr;
1761 	vsi->prob.size = instance->prob.size;
1762 	vsi->counts.dma_addr = instance->counts.dma_addr;
1763 	vsi->counts.size = instance->counts.size;
1764 
1765 	vsi->row_info.buf = 0;
1766 	vsi->row_info.size = 0;
1767 
1768 	vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, NULL);
1769 }
1770 
vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance * instance,struct vdec_fb * fb,struct vdec_lat_buf * lat_buf,struct vdec_vp9_slice_pfc * pfc)1771 static int vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance *instance,
1772 				     struct vdec_fb *fb,
1773 				     struct vdec_lat_buf *lat_buf,
1774 				     struct vdec_vp9_slice_pfc *pfc)
1775 {
1776 	struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1777 	int ret;
1778 
1779 	vdec_vp9_slice_setup_state(vsi);
1780 
1781 	ret = vdec_vp9_slice_setup_core_to_dst_buf(instance, lat_buf);
1782 	if (ret)
1783 		goto err;
1784 
1785 	ret = vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf);
1786 	if (ret)
1787 		goto err;
1788 
1789 	vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[1]);
1790 
1791 	return 0;
1792 
1793 err:
1794 	return ret;
1795 }
1796 
vdec_vp9_slice_setup_single(struct vdec_vp9_slice_instance * instance,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,struct vdec_vp9_slice_pfc * pfc)1797 static int vdec_vp9_slice_setup_single(struct vdec_vp9_slice_instance *instance,
1798 				       struct mtk_vcodec_mem *bs,
1799 				       struct vdec_fb *fb,
1800 				       struct vdec_vp9_slice_pfc *pfc)
1801 {
1802 	struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1803 	int ret;
1804 
1805 	ret = vdec_vp9_slice_setup_single_from_src_to_dst(instance);
1806 	if (ret)
1807 		goto err;
1808 
1809 	ret = vdec_vp9_slice_setup_pfc(instance, pfc);
1810 	if (ret)
1811 		goto err;
1812 
1813 	ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi);
1814 	if (ret)
1815 		goto err;
1816 
1817 	vdec_vp9_slice_setup_single_buffer(instance, pfc, vsi, bs, fb);
1818 	vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]);
1819 
1820 	ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi);
1821 	if (ret)
1822 		goto err;
1823 
1824 	ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs);
1825 	if (ret)
1826 		goto err;
1827 
1828 	return 0;
1829 
1830 err:
1831 	return ret;
1832 }
1833 
vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance * instance,struct vdec_lat_buf * lat_buf,struct vdec_vp9_slice_pfc * pfc)1834 static int vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance *instance,
1835 				      struct vdec_lat_buf *lat_buf,
1836 				      struct vdec_vp9_slice_pfc *pfc)
1837 {
1838 	struct vdec_vp9_slice_vsi *vsi;
1839 
1840 	vsi = &pfc->vsi;
1841 	memcpy(&pfc->state[1], &vsi->state, sizeof(vsi->state));
1842 
1843 	mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n",
1844 		       pfc->seq, vsi->state.crc[0], vsi->state.crc[1],
1845 		       vsi->state.crc[2], vsi->state.crc[3]);
1846 	mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n",
1847 		       pfc->seq, vsi->state.crc[4], vsi->state.crc[5],
1848 		       vsi->state.crc[6], vsi->state.crc[7]);
1849 
1850 	return 0;
1851 }
1852 
vdec_vp9_slice_init(struct mtk_vcodec_dec_ctx * ctx)1853 static int vdec_vp9_slice_init(struct mtk_vcodec_dec_ctx *ctx)
1854 {
1855 	struct vdec_vp9_slice_instance *instance;
1856 	struct vdec_vp9_slice_init_vsi *vsi;
1857 	int ret;
1858 
1859 	instance = kzalloc(sizeof(*instance), GFP_KERNEL);
1860 	if (!instance)
1861 		return -ENOMEM;
1862 
1863 	instance->ctx = ctx;
1864 	instance->vpu.id = SCP_IPI_VDEC_LAT;
1865 	instance->vpu.core_id = SCP_IPI_VDEC_CORE;
1866 	instance->vpu.ctx = ctx;
1867 	instance->vpu.codec_type = ctx->current_codec;
1868 
1869 	ret = vpu_dec_init(&instance->vpu);
1870 	if (ret) {
1871 		mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret);
1872 		goto error_vpu_init;
1873 	}
1874 
1875 	/* init vsi and global flags */
1876 
1877 	vsi = instance->vpu.vsi;
1878 	if (!vsi) {
1879 		mtk_vdec_err(ctx, "failed to get VP9 vsi\n");
1880 		ret = -EINVAL;
1881 		goto error_vsi;
1882 	}
1883 	instance->init_vsi = vsi;
1884 	instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
1885 						       (u32)vsi->core_vsi);
1886 	if (!instance->core_vsi) {
1887 		mtk_vdec_err(ctx, "failed to get VP9 core vsi\n");
1888 		ret = -EINVAL;
1889 		goto error_vsi;
1890 	}
1891 
1892 	instance->irq = 1;
1893 
1894 	ret = vdec_vp9_slice_init_default_frame_ctx(instance);
1895 	if (ret)
1896 		goto error_default_frame_ctx;
1897 
1898 	ctx->drv_handle = instance;
1899 
1900 	return 0;
1901 
1902 error_default_frame_ctx:
1903 error_vsi:
1904 	vpu_dec_deinit(&instance->vpu);
1905 error_vpu_init:
1906 	kfree(instance);
1907 	return ret;
1908 }
1909 
vdec_vp9_slice_deinit(void * h_vdec)1910 static void vdec_vp9_slice_deinit(void *h_vdec)
1911 {
1912 	struct vdec_vp9_slice_instance *instance = h_vdec;
1913 
1914 	if (!instance)
1915 		return;
1916 
1917 	vpu_dec_deinit(&instance->vpu);
1918 	vdec_vp9_slice_free_working_buffer(instance);
1919 	vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx);
1920 	kfree(instance);
1921 }
1922 
vdec_vp9_slice_flush(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)1923 static int vdec_vp9_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs,
1924 				struct vdec_fb *fb, bool *res_chg)
1925 {
1926 	struct vdec_vp9_slice_instance *instance = h_vdec;
1927 
1928 	mtk_vdec_debug(instance->ctx, "flush ...\n");
1929 	if (instance->ctx->dev->vdec_pdata->hw_arch != MTK_VDEC_PURE_SINGLE_CORE)
1930 		vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue);
1931 	return vpu_dec_reset(&instance->vpu);
1932 }
1933 
vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance * instance)1934 static void vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance *instance)
1935 {
1936 	struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
1937 	unsigned int data[3];
1938 
1939 	mtk_vdec_debug(instance->ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h);
1940 
1941 	data[0] = ctx->picinfo.pic_w;
1942 	data[1] = ctx->picinfo.pic_h;
1943 	data[2] = ctx->capture_fourcc;
1944 	vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO);
1945 
1946 	ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64);
1947 	ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64);
1948 	ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0];
1949 	ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1];
1950 }
1951 
vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance * instance,unsigned int * dpb_sz)1952 static void vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance *instance,
1953 					unsigned int *dpb_sz)
1954 {
1955 	/* refer VP9 specification */
1956 	*dpb_sz = 9;
1957 }
1958 
vdec_vp9_slice_get_param(void * h_vdec,enum vdec_get_param_type type,void * out)1959 static int vdec_vp9_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
1960 {
1961 	struct vdec_vp9_slice_instance *instance = h_vdec;
1962 
1963 	switch (type) {
1964 	case GET_PARAM_PIC_INFO:
1965 		vdec_vp9_slice_get_pic_info(instance);
1966 		break;
1967 	case GET_PARAM_DPB_SIZE:
1968 		vdec_vp9_slice_get_dpb_size(instance, out);
1969 		break;
1970 	case GET_PARAM_CROP_INFO:
1971 		mtk_vdec_debug(instance->ctx, "No need to get vp9 crop information.");
1972 		break;
1973 	default:
1974 		mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type);
1975 		return -EINVAL;
1976 	}
1977 
1978 	return 0;
1979 }
1980 
vdec_vp9_slice_single_decode(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)1981 static int vdec_vp9_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
1982 					struct vdec_fb *fb, bool *res_chg)
1983 {
1984 	struct vdec_vp9_slice_instance *instance = h_vdec;
1985 	struct vdec_vp9_slice_pfc *pfc = &instance->sc_pfc;
1986 	struct vdec_vp9_slice_vsi *vsi;
1987 	struct mtk_vcodec_dec_ctx *ctx;
1988 	int ret;
1989 
1990 	if (!instance || !instance->ctx)
1991 		return -EINVAL;
1992 	ctx = instance->ctx;
1993 
1994 	/* bs NULL means flush decoder */
1995 	if (!bs)
1996 		return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg);
1997 
1998 	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
1999 	if (!fb)
2000 		return -EBUSY;
2001 
2002 	vsi = &pfc->vsi;
2003 
2004 	ret = vdec_vp9_slice_setup_single(instance, bs, fb, pfc);
2005 	if (ret) {
2006 		mtk_vdec_err(ctx, "Failed to setup VP9 single ret %d\n", ret);
2007 		return ret;
2008 	}
2009 	vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi);
2010 
2011 	ret = vpu_dec_start(&instance->vpu, NULL, 0);
2012 	if (ret) {
2013 		mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n", ret);
2014 		return ret;
2015 	}
2016 
2017 	ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_RECEIVED,
2018 					   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
2019 	/* update remote vsi if decode timeout */
2020 	if (ret) {
2021 		mtk_vdec_err(ctx, "VP9 decode timeout %d\n", ret);
2022 		WRITE_ONCE(instance->vsi->state.timeout, 1);
2023 	}
2024 
2025 	vpu_dec_end(&instance->vpu);
2026 
2027 	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
2028 	ret = vdec_vp9_slice_update_single(instance, pfc);
2029 	if (ret) {
2030 		mtk_vdec_err(ctx, "VP9 decode error: %d\n", ret);
2031 		return ret;
2032 	}
2033 
2034 	instance->ctx->decoded_frame_cnt++;
2035 	return 0;
2036 }
2037 
vdec_vp9_slice_lat_decode(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)2038 static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
2039 				     struct vdec_fb *fb, bool *res_chg)
2040 {
2041 	struct vdec_vp9_slice_instance *instance = h_vdec;
2042 	struct vdec_lat_buf *lat_buf;
2043 	struct vdec_vp9_slice_pfc *pfc;
2044 	struct vdec_vp9_slice_vsi *vsi;
2045 	struct mtk_vcodec_dec_ctx *ctx;
2046 	int ret;
2047 
2048 	if (!instance || !instance->ctx)
2049 		return -EINVAL;
2050 	ctx = instance->ctx;
2051 
2052 	/* init msgQ for the first time */
2053 	if (vdec_msg_queue_init(&ctx->msg_queue, ctx,
2054 				vdec_vp9_slice_core_decode,
2055 				sizeof(*pfc)))
2056 		return -ENOMEM;
2057 
2058 	/* bs NULL means flush decoder */
2059 	if (!bs)
2060 		return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg);
2061 
2062 	lat_buf = vdec_msg_queue_dqbuf(&instance->ctx->msg_queue.lat_ctx);
2063 	if (!lat_buf) {
2064 		mtk_vdec_debug(ctx, "Failed to get VP9 lat buf\n");
2065 		return -EAGAIN;
2066 	}
2067 	pfc = (struct vdec_vp9_slice_pfc *)lat_buf->private_data;
2068 	if (!pfc) {
2069 		ret = -EINVAL;
2070 		goto err_free_fb_out;
2071 	}
2072 	vsi = &pfc->vsi;
2073 
2074 	ret = vdec_vp9_slice_setup_lat(instance, bs, lat_buf, pfc);
2075 	if (ret) {
2076 		mtk_vdec_err(ctx, "Failed to setup VP9 lat ret %d\n", ret);
2077 		goto err_free_fb_out;
2078 	}
2079 	vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi);
2080 
2081 	ret = vpu_dec_start(&instance->vpu, NULL, 0);
2082 	if (ret) {
2083 		mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n", ret);
2084 		goto err_free_fb_out;
2085 	}
2086 
2087 	if (instance->irq) {
2088 		ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_RECEIVED,
2089 						   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
2090 		/* update remote vsi if decode timeout */
2091 		if (ret) {
2092 			mtk_vdec_err(ctx, "VP9 decode timeout %d pic %d\n", ret, pfc->seq);
2093 			WRITE_ONCE(instance->vsi->state.timeout, 1);
2094 		}
2095 		vpu_dec_end(&instance->vpu);
2096 	}
2097 
2098 	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
2099 	ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
2100 
2101 	/* LAT trans full, no more UBE or decode timeout */
2102 	if (ret) {
2103 		mtk_vdec_err(ctx, "VP9 decode error: %d\n", ret);
2104 		goto err_free_fb_out;
2105 	}
2106 
2107 	mtk_vdec_debug(ctx, "lat dma addr: 0x%lx 0x%lx\n",
2108 		       (unsigned long)pfc->vsi.trans.dma_addr,
2109 		       (unsigned long)pfc->vsi.trans.dma_addr_end);
2110 
2111 	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
2112 				       vsi->trans.dma_addr_end +
2113 				       ctx->msg_queue.wdma_addr.dma_addr);
2114 	vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf);
2115 
2116 	return 0;
2117 err_free_fb_out:
2118 	vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2119 	return ret;
2120 }
2121 
vdec_vp9_slice_decode(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)2122 static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
2123 				 struct vdec_fb *fb, bool *res_chg)
2124 {
2125 	struct vdec_vp9_slice_instance *instance = h_vdec;
2126 	int ret;
2127 
2128 	if (instance->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE)
2129 		ret = vdec_vp9_slice_single_decode(h_vdec, bs, fb, res_chg);
2130 	else
2131 		ret = vdec_vp9_slice_lat_decode(h_vdec, bs, fb, res_chg);
2132 
2133 	return ret;
2134 }
2135 
vdec_vp9_slice_core_decode(struct vdec_lat_buf * lat_buf)2136 static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
2137 {
2138 	struct vdec_vp9_slice_instance *instance;
2139 	struct vdec_vp9_slice_pfc *pfc;
2140 	struct mtk_vcodec_dec_ctx *ctx = NULL;
2141 	struct vdec_fb *fb = NULL;
2142 	int ret = -EINVAL;
2143 
2144 	if (!lat_buf)
2145 		goto err;
2146 
2147 	pfc = lat_buf->private_data;
2148 	ctx = lat_buf->ctx;
2149 	if (!pfc || !ctx)
2150 		goto err;
2151 
2152 	instance = ctx->drv_handle;
2153 	if (!instance)
2154 		goto err;
2155 
2156 	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
2157 	if (!fb) {
2158 		ret = -EBUSY;
2159 		goto err;
2160 	}
2161 
2162 	ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc);
2163 	if (ret) {
2164 		mtk_vdec_err(ctx, "vdec_vp9_slice_setup_core\n");
2165 		goto err;
2166 	}
2167 	vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
2168 
2169 	ret = vpu_dec_core(&instance->vpu);
2170 	if (ret) {
2171 		mtk_vdec_err(ctx, "vpu_dec_core\n");
2172 		goto err;
2173 	}
2174 
2175 	if (instance->irq) {
2176 		ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2177 						   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
2178 		/* update remote vsi if decode timeout */
2179 		if (ret) {
2180 			mtk_vdec_err(ctx, "VP9 core timeout pic %d\n", pfc->seq);
2181 			WRITE_ONCE(instance->core_vsi->state.timeout, 1);
2182 		}
2183 		vpu_dec_core_end(&instance->vpu);
2184 	}
2185 
2186 	vdec_vp9_slice_vsi_from_remote(&pfc->vsi, instance->core_vsi, 1);
2187 	ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc);
2188 	if (ret) {
2189 		mtk_vdec_err(ctx, "vdec_vp9_slice_update_core\n");
2190 		goto err;
2191 	}
2192 
2193 	pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
2194 	mtk_vdec_debug(ctx, "core dma_addr_end 0x%lx\n",
2195 		       (unsigned long)pfc->vsi.trans.dma_addr_end);
2196 	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
2197 	ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
2198 
2199 	return 0;
2200 
2201 err:
2202 	if (ctx && pfc) {
2203 		/* always update read pointer */
2204 		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
2205 
2206 		if (fb)
2207 			ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
2208 	}
2209 	return ret;
2210 }
2211 
2212 const struct vdec_common_if vdec_vp9_slice_lat_if = {
2213 	.init		= vdec_vp9_slice_init,
2214 	.decode		= vdec_vp9_slice_decode,
2215 	.get_param	= vdec_vp9_slice_get_param,
2216 	.deinit		= vdec_vp9_slice_deinit,
2217 };
2218