1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Rockchip Video Decoder VP9 backend
4 *
5 * Copyright (C) 2019 Collabora, Ltd.
6 * Boris Brezillon <boris.brezillon@collabora.com>
7 * Copyright (C) 2021 Collabora, Ltd.
8 * Andrzej Pietrasiewicz <andrzej.p@collabora.com>
9 *
10 * Copyright (C) 2016 Rockchip Electronics Co., Ltd.
11 * Alpha Lin <Alpha.Lin@rock-chips.com>
12 */
13
14 /*
15 * For following the vp9 spec please start reading this driver
16 * code from rkvdec_vp9_run() followed by rkvdec_vp9_done().
17 */
18
19 #include <linux/kernel.h>
20 #include <linux/vmalloc.h>
21 #include <media/v4l2-mem2mem.h>
22 #include <media/v4l2-vp9.h>
23
24 #include "rkvdec.h"
25 #include "rkvdec-regs.h"
26
27 #define RKVDEC_VP9_PROBE_SIZE 4864
28 #define RKVDEC_VP9_COUNT_SIZE 13232
29 #define RKVDEC_VP9_MAX_SEGMAP_SIZE 73728
30
31 struct rkvdec_vp9_intra_mode_probs {
32 u8 y_mode[105];
33 u8 uv_mode[23];
34 };
35
36 struct rkvdec_vp9_intra_only_frame_probs {
37 u8 coef_intra[4][2][128];
38 struct rkvdec_vp9_intra_mode_probs intra_mode[10];
39 };
40
41 struct rkvdec_vp9_inter_frame_probs {
42 u8 y_mode[4][9];
43 u8 comp_mode[5];
44 u8 comp_ref[5];
45 u8 single_ref[5][2];
46 u8 inter_mode[7][3];
47 u8 interp_filter[4][2];
48 u8 padding0[11];
49 u8 coef[2][4][2][128];
50 u8 uv_mode_0_2[3][9];
51 u8 padding1[5];
52 u8 uv_mode_3_5[3][9];
53 u8 padding2[5];
54 u8 uv_mode_6_8[3][9];
55 u8 padding3[5];
56 u8 uv_mode_9[9];
57 u8 padding4[7];
58 u8 padding5[16];
59 struct {
60 u8 joint[3];
61 u8 sign[2];
62 u8 classes[2][10];
63 u8 class0_bit[2];
64 u8 bits[2][10];
65 u8 class0_fr[2][2][3];
66 u8 fr[2][3];
67 u8 class0_hp[2];
68 u8 hp[2];
69 } mv;
70 };
71
72 struct rkvdec_vp9_probs {
73 u8 partition[16][3];
74 u8 pred[3];
75 u8 tree[7];
76 u8 skip[3];
77 u8 tx32[2][3];
78 u8 tx16[2][2];
79 u8 tx8[2][1];
80 u8 is_inter[4];
81 /* 128 bit alignment */
82 u8 padding0[3];
83 union {
84 struct rkvdec_vp9_inter_frame_probs inter;
85 struct rkvdec_vp9_intra_only_frame_probs intra_only;
86 };
87 /* 128 bit alignment */
88 u8 padding1[11];
89 };
90
91 /* Data structure describing auxiliary buffer format. */
92 struct rkvdec_vp9_priv_tbl {
93 struct rkvdec_vp9_probs probs;
94 u8 segmap[2][RKVDEC_VP9_MAX_SEGMAP_SIZE];
95 };
96
97 struct rkvdec_vp9_refs_counts {
98 u32 eob[2];
99 u32 coeff[3];
100 };
101
102 struct rkvdec_vp9_inter_frame_symbol_counts {
103 u32 partition[16][4];
104 u32 skip[3][2];
105 u32 inter[4][2];
106 u32 tx32p[2][4];
107 u32 tx16p[2][4];
108 u32 tx8p[2][2];
109 u32 y_mode[4][10];
110 u32 uv_mode[10][10];
111 u32 comp[5][2];
112 u32 comp_ref[5][2];
113 u32 single_ref[5][2][2];
114 u32 mv_mode[7][4];
115 u32 filter[4][3];
116 u32 mv_joint[4];
117 u32 sign[2][2];
118 /* add 1 element for align */
119 u32 classes[2][11 + 1];
120 u32 class0[2][2];
121 u32 bits[2][10][2];
122 u32 class0_fp[2][2][4];
123 u32 fp[2][4];
124 u32 class0_hp[2][2];
125 u32 hp[2][2];
126 struct rkvdec_vp9_refs_counts ref_cnt[2][4][2][6][6];
127 };
128
129 struct rkvdec_vp9_intra_frame_symbol_counts {
130 u32 partition[4][4][4];
131 u32 skip[3][2];
132 u32 intra[4][2];
133 u32 tx32p[2][4];
134 u32 tx16p[2][4];
135 u32 tx8p[2][2];
136 struct rkvdec_vp9_refs_counts ref_cnt[2][4][2][6][6];
137 };
138
139 struct rkvdec_vp9_run {
140 struct rkvdec_run base;
141 const struct v4l2_ctrl_vp9_frame *decode_params;
142 };
143
144 struct rkvdec_vp9_frame_info {
145 u32 valid : 1;
146 u32 segmapid : 1;
147 u32 frame_context_idx : 2;
148 u32 reference_mode : 2;
149 u32 tx_mode : 3;
150 u32 interpolation_filter : 3;
151 u32 flags;
152 u64 timestamp;
153 struct v4l2_vp9_segmentation seg;
154 struct v4l2_vp9_loop_filter lf;
155 };
156
157 struct rkvdec_vp9_ctx {
158 struct rkvdec_aux_buf priv_tbl;
159 struct rkvdec_aux_buf count_tbl;
160 struct v4l2_vp9_frame_symbol_counts inter_cnts;
161 struct v4l2_vp9_frame_symbol_counts intra_cnts;
162 struct v4l2_vp9_frame_context probability_tables;
163 struct v4l2_vp9_frame_context frame_context[4];
164 struct rkvdec_vp9_frame_info cur;
165 struct rkvdec_vp9_frame_info last;
166 };
167
write_coeff_plane(const u8 coef[6][6][3],u8 * coeff_plane)168 static void write_coeff_plane(const u8 coef[6][6][3], u8 *coeff_plane)
169 {
170 unsigned int idx = 0, byte_count = 0;
171 int k, m, n;
172 u8 p;
173
174 for (k = 0; k < 6; k++) {
175 for (m = 0; m < 6; m++) {
176 for (n = 0; n < 3; n++) {
177 p = coef[k][m][n];
178 coeff_plane[idx++] = p;
179 byte_count++;
180 if (byte_count == 27) {
181 idx += 5;
182 byte_count = 0;
183 }
184 }
185 }
186 }
187 }
188
init_intra_only_probs(struct rkvdec_ctx * ctx,const struct rkvdec_vp9_run * run)189 static void init_intra_only_probs(struct rkvdec_ctx *ctx,
190 const struct rkvdec_vp9_run *run)
191 {
192 struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv;
193 struct rkvdec_vp9_priv_tbl *tbl = vp9_ctx->priv_tbl.cpu;
194 struct rkvdec_vp9_intra_only_frame_probs *rkprobs;
195 const struct v4l2_vp9_frame_context *probs;
196 unsigned int i, j, k;
197
198 rkprobs = &tbl->probs.intra_only;
199 probs = &vp9_ctx->probability_tables;
200
201 /*
202 * intra only 149 x 128 bits ,aligned to 152 x 128 bits coeff related
203 * prob 64 x 128 bits
204 */
205 for (i = 0; i < ARRAY_SIZE(probs->coef); i++) {
206 for (j = 0; j < ARRAY_SIZE(probs->coef[0]); j++)
207 write_coeff_plane(probs->coef[i][j][0],
208 rkprobs->coef_intra[i][j]);
209 }
210
211 /* intra mode prob 80 x 128 bits */
212 for (i = 0; i < ARRAY_SIZE(v4l2_vp9_kf_y_mode_prob); i++) {
213 unsigned int byte_count = 0;
214 int idx = 0;
215
216 /* vp9_kf_y_mode_prob */
217 for (j = 0; j < ARRAY_SIZE(v4l2_vp9_kf_y_mode_prob[0]); j++) {
218 for (k = 0; k < ARRAY_SIZE(v4l2_vp9_kf_y_mode_prob[0][0]);
219 k++) {
220 u8 val = v4l2_vp9_kf_y_mode_prob[i][j][k];
221
222 rkprobs->intra_mode[i].y_mode[idx++] = val;
223 byte_count++;
224 if (byte_count == 27) {
225 byte_count = 0;
226 idx += 5;
227 }
228 }
229 }
230 }
231
232 for (i = 0; i < sizeof(v4l2_vp9_kf_uv_mode_prob); ++i) {
233 const u8 *ptr = (const u8 *)v4l2_vp9_kf_uv_mode_prob;
234
235 rkprobs->intra_mode[i / 23].uv_mode[i % 23] = ptr[i];
236 }
237 }
238
init_inter_probs(struct rkvdec_ctx * ctx,const struct rkvdec_vp9_run * run)239 static void init_inter_probs(struct rkvdec_ctx *ctx,
240 const struct rkvdec_vp9_run *run)
241 {
242 struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv;
243 struct rkvdec_vp9_priv_tbl *tbl = vp9_ctx->priv_tbl.cpu;
244 struct rkvdec_vp9_inter_frame_probs *rkprobs;
245 const struct v4l2_vp9_frame_context *probs;
246 unsigned int i, j, k;
247
248 rkprobs = &tbl->probs.inter;
249 probs = &vp9_ctx->probability_tables;
250
251 /*
252 * inter probs
253 * 151 x 128 bits, aligned to 152 x 128 bits
254 * inter only
255 * intra_y_mode & inter_block info 6 x 128 bits
256 */
257
258 memcpy(rkprobs->y_mode, probs->y_mode, sizeof(rkprobs->y_mode));
259 memcpy(rkprobs->comp_mode, probs->comp_mode,
260 sizeof(rkprobs->comp_mode));
261 memcpy(rkprobs->comp_ref, probs->comp_ref,
262 sizeof(rkprobs->comp_ref));
263 memcpy(rkprobs->single_ref, probs->single_ref,
264 sizeof(rkprobs->single_ref));
265 memcpy(rkprobs->inter_mode, probs->inter_mode,
266 sizeof(rkprobs->inter_mode));
267 memcpy(rkprobs->interp_filter, probs->interp_filter,
268 sizeof(rkprobs->interp_filter));
269
270 /* 128 x 128 bits coeff related */
271 for (i = 0; i < ARRAY_SIZE(probs->coef); i++) {
272 for (j = 0; j < ARRAY_SIZE(probs->coef[0]); j++) {
273 for (k = 0; k < ARRAY_SIZE(probs->coef[0][0]); k++)
274 write_coeff_plane(probs->coef[i][j][k],
275 rkprobs->coef[k][i][j]);
276 }
277 }
278
279 /* intra uv mode 6 x 128 */
280 memcpy(rkprobs->uv_mode_0_2, &probs->uv_mode[0],
281 sizeof(rkprobs->uv_mode_0_2));
282 memcpy(rkprobs->uv_mode_3_5, &probs->uv_mode[3],
283 sizeof(rkprobs->uv_mode_3_5));
284 memcpy(rkprobs->uv_mode_6_8, &probs->uv_mode[6],
285 sizeof(rkprobs->uv_mode_6_8));
286 memcpy(rkprobs->uv_mode_9, &probs->uv_mode[9],
287 sizeof(rkprobs->uv_mode_9));
288
289 /* mv related 6 x 128 */
290 memcpy(rkprobs->mv.joint, probs->mv.joint,
291 sizeof(rkprobs->mv.joint));
292 memcpy(rkprobs->mv.sign, probs->mv.sign,
293 sizeof(rkprobs->mv.sign));
294 memcpy(rkprobs->mv.classes, probs->mv.classes,
295 sizeof(rkprobs->mv.classes));
296 memcpy(rkprobs->mv.class0_bit, probs->mv.class0_bit,
297 sizeof(rkprobs->mv.class0_bit));
298 memcpy(rkprobs->mv.bits, probs->mv.bits,
299 sizeof(rkprobs->mv.bits));
300 memcpy(rkprobs->mv.class0_fr, probs->mv.class0_fr,
301 sizeof(rkprobs->mv.class0_fr));
302 memcpy(rkprobs->mv.fr, probs->mv.fr,
303 sizeof(rkprobs->mv.fr));
304 memcpy(rkprobs->mv.class0_hp, probs->mv.class0_hp,
305 sizeof(rkprobs->mv.class0_hp));
306 memcpy(rkprobs->mv.hp, probs->mv.hp,
307 sizeof(rkprobs->mv.hp));
308 }
309
init_probs(struct rkvdec_ctx * ctx,const struct rkvdec_vp9_run * run)310 static void init_probs(struct rkvdec_ctx *ctx,
311 const struct rkvdec_vp9_run *run)
312 {
313 const struct v4l2_ctrl_vp9_frame *dec_params;
314 struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv;
315 struct rkvdec_vp9_priv_tbl *tbl = vp9_ctx->priv_tbl.cpu;
316 struct rkvdec_vp9_probs *rkprobs = &tbl->probs;
317 const struct v4l2_vp9_segmentation *seg;
318 const struct v4l2_vp9_frame_context *probs;
319 bool intra_only;
320
321 dec_params = run->decode_params;
322 probs = &vp9_ctx->probability_tables;
323 seg = &dec_params->seg;
324
325 memset(rkprobs, 0, sizeof(*rkprobs));
326
327 intra_only = !!(dec_params->flags &
328 (V4L2_VP9_FRAME_FLAG_KEY_FRAME |
329 V4L2_VP9_FRAME_FLAG_INTRA_ONLY));
330
331 /* sb info 5 x 128 bit */
332 memcpy(rkprobs->partition,
333 intra_only ? v4l2_vp9_kf_partition_probs : probs->partition,
334 sizeof(rkprobs->partition));
335
336 memcpy(rkprobs->pred, seg->pred_probs, sizeof(rkprobs->pred));
337 memcpy(rkprobs->tree, seg->tree_probs, sizeof(rkprobs->tree));
338 memcpy(rkprobs->skip, probs->skip, sizeof(rkprobs->skip));
339 memcpy(rkprobs->tx32, probs->tx32, sizeof(rkprobs->tx32));
340 memcpy(rkprobs->tx16, probs->tx16, sizeof(rkprobs->tx16));
341 memcpy(rkprobs->tx8, probs->tx8, sizeof(rkprobs->tx8));
342 memcpy(rkprobs->is_inter, probs->is_inter, sizeof(rkprobs->is_inter));
343
344 if (intra_only)
345 init_intra_only_probs(ctx, run);
346 else
347 init_inter_probs(ctx, run);
348 }
349
350 struct rkvdec_vp9_ref_reg {
351 u32 reg_frm_size;
352 u32 reg_hor_stride;
353 u32 reg_y_stride;
354 u32 reg_yuv_stride;
355 u32 reg_ref_base;
356 };
357
358 static struct rkvdec_vp9_ref_reg ref_regs[] = {
359 {
360 .reg_frm_size = RKVDEC_REG_VP9_FRAME_SIZE(0),
361 .reg_hor_stride = RKVDEC_VP9_HOR_VIRSTRIDE(0),
362 .reg_y_stride = RKVDEC_VP9_LAST_FRAME_YSTRIDE,
363 .reg_yuv_stride = RKVDEC_VP9_LAST_FRAME_YUVSTRIDE,
364 .reg_ref_base = RKVDEC_REG_VP9_LAST_FRAME_BASE,
365 },
366 {
367 .reg_frm_size = RKVDEC_REG_VP9_FRAME_SIZE(1),
368 .reg_hor_stride = RKVDEC_VP9_HOR_VIRSTRIDE(1),
369 .reg_y_stride = RKVDEC_VP9_GOLDEN_FRAME_YSTRIDE,
370 .reg_yuv_stride = 0,
371 .reg_ref_base = RKVDEC_REG_VP9_GOLDEN_FRAME_BASE,
372 },
373 {
374 .reg_frm_size = RKVDEC_REG_VP9_FRAME_SIZE(2),
375 .reg_hor_stride = RKVDEC_VP9_HOR_VIRSTRIDE(2),
376 .reg_y_stride = RKVDEC_VP9_ALTREF_FRAME_YSTRIDE,
377 .reg_yuv_stride = 0,
378 .reg_ref_base = RKVDEC_REG_VP9_ALTREF_FRAME_BASE,
379 }
380 };
381
382 static struct rkvdec_decoded_buffer *
get_ref_buf(struct rkvdec_ctx * ctx,struct vb2_v4l2_buffer * dst,u64 timestamp)383 get_ref_buf(struct rkvdec_ctx *ctx, struct vb2_v4l2_buffer *dst, u64 timestamp)
384 {
385 struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
386 struct vb2_queue *cap_q = &m2m_ctx->cap_q_ctx.q;
387 struct vb2_buffer *buf;
388
389 /*
390 * If a ref is unused or invalid, address of current destination
391 * buffer is returned.
392 */
393 buf = vb2_find_buffer(cap_q, timestamp);
394 if (!buf)
395 buf = &dst->vb2_buf;
396
397 return vb2_to_rkvdec_decoded_buf(buf);
398 }
399
get_mv_base_addr(struct rkvdec_decoded_buffer * buf)400 static dma_addr_t get_mv_base_addr(struct rkvdec_decoded_buffer *buf)
401 {
402 unsigned int aligned_pitch, aligned_height, yuv_len;
403
404 aligned_height = round_up(buf->vp9.height, 64);
405 aligned_pitch = round_up(buf->vp9.width * buf->vp9.bit_depth, 512) / 8;
406 yuv_len = (aligned_height * aligned_pitch * 3) / 2;
407
408 return vb2_dma_contig_plane_dma_addr(&buf->base.vb.vb2_buf, 0) +
409 yuv_len;
410 }
411
config_ref_registers(struct rkvdec_ctx * ctx,const struct rkvdec_vp9_run * run,struct rkvdec_decoded_buffer * ref_buf,struct rkvdec_vp9_ref_reg * ref_reg)412 static void config_ref_registers(struct rkvdec_ctx *ctx,
413 const struct rkvdec_vp9_run *run,
414 struct rkvdec_decoded_buffer *ref_buf,
415 struct rkvdec_vp9_ref_reg *ref_reg)
416 {
417 unsigned int aligned_pitch, aligned_height, y_len, yuv_len;
418 struct rkvdec_dev *rkvdec = ctx->dev;
419
420 aligned_height = round_up(ref_buf->vp9.height, 64);
421 writel_relaxed(RKVDEC_VP9_FRAMEWIDTH(ref_buf->vp9.width) |
422 RKVDEC_VP9_FRAMEHEIGHT(ref_buf->vp9.height),
423 rkvdec->regs + ref_reg->reg_frm_size);
424
425 writel_relaxed(vb2_dma_contig_plane_dma_addr(&ref_buf->base.vb.vb2_buf, 0),
426 rkvdec->regs + ref_reg->reg_ref_base);
427
428 if (&ref_buf->base.vb == run->base.bufs.dst)
429 return;
430
431 aligned_pitch = round_up(ref_buf->vp9.width * ref_buf->vp9.bit_depth, 512) / 8;
432 y_len = aligned_height * aligned_pitch;
433 yuv_len = (y_len * 3) / 2;
434
435 writel_relaxed(RKVDEC_HOR_Y_VIRSTRIDE(aligned_pitch / 16) |
436 RKVDEC_HOR_UV_VIRSTRIDE(aligned_pitch / 16),
437 rkvdec->regs + ref_reg->reg_hor_stride);
438 writel_relaxed(RKVDEC_VP9_REF_YSTRIDE(y_len / 16),
439 rkvdec->regs + ref_reg->reg_y_stride);
440
441 if (!ref_reg->reg_yuv_stride)
442 return;
443
444 writel_relaxed(RKVDEC_VP9_REF_YUVSTRIDE(yuv_len / 16),
445 rkvdec->regs + ref_reg->reg_yuv_stride);
446 }
447
config_seg_registers(struct rkvdec_ctx * ctx,unsigned int segid)448 static void config_seg_registers(struct rkvdec_ctx *ctx, unsigned int segid)
449 {
450 struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv;
451 const struct v4l2_vp9_segmentation *seg;
452 struct rkvdec_dev *rkvdec = ctx->dev;
453 s16 feature_val;
454 int feature_id;
455 u32 val = 0;
456
457 seg = vp9_ctx->last.valid ? &vp9_ctx->last.seg : &vp9_ctx->cur.seg;
458 feature_id = V4L2_VP9_SEG_LVL_ALT_Q;
459 if (v4l2_vp9_seg_feat_enabled(seg->feature_enabled, feature_id, segid)) {
460 feature_val = seg->feature_data[segid][feature_id];
461 val |= RKVDEC_SEGID_FRAME_QP_DELTA_EN(1) |
462 RKVDEC_SEGID_FRAME_QP_DELTA(feature_val);
463 }
464
465 feature_id = V4L2_VP9_SEG_LVL_ALT_L;
466 if (v4l2_vp9_seg_feat_enabled(seg->feature_enabled, feature_id, segid)) {
467 feature_val = seg->feature_data[segid][feature_id];
468 val |= RKVDEC_SEGID_FRAME_LOOPFILTER_VALUE_EN(1) |
469 RKVDEC_SEGID_FRAME_LOOPFILTER_VALUE(feature_val);
470 }
471
472 feature_id = V4L2_VP9_SEG_LVL_REF_FRAME;
473 if (v4l2_vp9_seg_feat_enabled(seg->feature_enabled, feature_id, segid)) {
474 feature_val = seg->feature_data[segid][feature_id];
475 val |= RKVDEC_SEGID_REFERINFO_EN(1) |
476 RKVDEC_SEGID_REFERINFO(feature_val);
477 }
478
479 feature_id = V4L2_VP9_SEG_LVL_SKIP;
480 if (v4l2_vp9_seg_feat_enabled(seg->feature_enabled, feature_id, segid))
481 val |= RKVDEC_SEGID_FRAME_SKIP_EN(1);
482
483 if (!segid &&
484 (seg->flags & V4L2_VP9_SEGMENTATION_FLAG_ABS_OR_DELTA_UPDATE))
485 val |= RKVDEC_SEGID_ABS_DELTA(1);
486
487 writel_relaxed(val, rkvdec->regs + RKVDEC_VP9_SEGID_GRP(segid));
488 }
489
update_dec_buf_info(struct rkvdec_decoded_buffer * buf,const struct v4l2_ctrl_vp9_frame * dec_params)490 static void update_dec_buf_info(struct rkvdec_decoded_buffer *buf,
491 const struct v4l2_ctrl_vp9_frame *dec_params)
492 {
493 buf->vp9.width = dec_params->frame_width_minus_1 + 1;
494 buf->vp9.height = dec_params->frame_height_minus_1 + 1;
495 buf->vp9.bit_depth = dec_params->bit_depth;
496 }
497
update_ctx_cur_info(struct rkvdec_vp9_ctx * vp9_ctx,struct rkvdec_decoded_buffer * buf,const struct v4l2_ctrl_vp9_frame * dec_params)498 static void update_ctx_cur_info(struct rkvdec_vp9_ctx *vp9_ctx,
499 struct rkvdec_decoded_buffer *buf,
500 const struct v4l2_ctrl_vp9_frame *dec_params)
501 {
502 vp9_ctx->cur.valid = true;
503 vp9_ctx->cur.reference_mode = dec_params->reference_mode;
504 vp9_ctx->cur.interpolation_filter = dec_params->interpolation_filter;
505 vp9_ctx->cur.flags = dec_params->flags;
506 vp9_ctx->cur.timestamp = buf->base.vb.vb2_buf.timestamp;
507 vp9_ctx->cur.seg = dec_params->seg;
508 vp9_ctx->cur.lf = dec_params->lf;
509 }
510
update_ctx_last_info(struct rkvdec_vp9_ctx * vp9_ctx)511 static void update_ctx_last_info(struct rkvdec_vp9_ctx *vp9_ctx)
512 {
513 vp9_ctx->last = vp9_ctx->cur;
514 }
515
config_registers(struct rkvdec_ctx * ctx,const struct rkvdec_vp9_run * run)516 static void config_registers(struct rkvdec_ctx *ctx,
517 const struct rkvdec_vp9_run *run)
518 {
519 unsigned int y_len, uv_len, yuv_len, bit_depth, aligned_height, aligned_pitch, stream_len;
520 const struct v4l2_ctrl_vp9_frame *dec_params;
521 struct rkvdec_decoded_buffer *ref_bufs[3];
522 struct rkvdec_decoded_buffer *dst, *last, *mv_ref;
523 struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv;
524 u32 val, last_frame_info = 0;
525 const struct v4l2_vp9_segmentation *seg;
526 struct rkvdec_dev *rkvdec = ctx->dev;
527 dma_addr_t addr;
528 bool intra_only;
529 unsigned int i;
530
531 dec_params = run->decode_params;
532 dst = vb2_to_rkvdec_decoded_buf(&run->base.bufs.dst->vb2_buf);
533 ref_bufs[0] = get_ref_buf(ctx, &dst->base.vb, dec_params->last_frame_ts);
534 ref_bufs[1] = get_ref_buf(ctx, &dst->base.vb, dec_params->golden_frame_ts);
535 ref_bufs[2] = get_ref_buf(ctx, &dst->base.vb, dec_params->alt_frame_ts);
536
537 if (vp9_ctx->last.valid)
538 last = get_ref_buf(ctx, &dst->base.vb, vp9_ctx->last.timestamp);
539 else
540 last = dst;
541
542 update_dec_buf_info(dst, dec_params);
543 update_ctx_cur_info(vp9_ctx, dst, dec_params);
544 seg = &dec_params->seg;
545
546 intra_only = !!(dec_params->flags &
547 (V4L2_VP9_FRAME_FLAG_KEY_FRAME |
548 V4L2_VP9_FRAME_FLAG_INTRA_ONLY));
549
550 writel_relaxed(RKVDEC_MODE(RKVDEC_MODE_VP9),
551 rkvdec->regs + RKVDEC_REG_SYSCTRL);
552
553 bit_depth = dec_params->bit_depth;
554 aligned_height = round_up(ctx->decoded_fmt.fmt.pix_mp.height, 64);
555
556 aligned_pitch = round_up(ctx->decoded_fmt.fmt.pix_mp.width *
557 bit_depth,
558 512) / 8;
559 y_len = aligned_height * aligned_pitch;
560 uv_len = y_len / 2;
561 yuv_len = y_len + uv_len;
562
563 writel_relaxed(RKVDEC_Y_HOR_VIRSTRIDE(aligned_pitch / 16) |
564 RKVDEC_UV_HOR_VIRSTRIDE(aligned_pitch / 16),
565 rkvdec->regs + RKVDEC_REG_PICPAR);
566 writel_relaxed(RKVDEC_Y_VIRSTRIDE(y_len / 16),
567 rkvdec->regs + RKVDEC_REG_Y_VIRSTRIDE);
568 writel_relaxed(RKVDEC_YUV_VIRSTRIDE(yuv_len / 16),
569 rkvdec->regs + RKVDEC_REG_YUV_VIRSTRIDE);
570
571 stream_len = vb2_get_plane_payload(&run->base.bufs.src->vb2_buf, 0);
572 writel_relaxed(RKVDEC_STRM_LEN(stream_len),
573 rkvdec->regs + RKVDEC_REG_STRM_LEN);
574
575 /*
576 * Reset count buffer, because decoder only output intra related syntax
577 * counts when decoding intra frame, but update entropy need to update
578 * all the probabilities.
579 */
580 if (intra_only)
581 memset(vp9_ctx->count_tbl.cpu, 0, vp9_ctx->count_tbl.size);
582
583 vp9_ctx->cur.segmapid = vp9_ctx->last.segmapid;
584 if (!intra_only &&
585 !(dec_params->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT) &&
586 (!(seg->flags & V4L2_VP9_SEGMENTATION_FLAG_ENABLED) ||
587 (seg->flags & V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP)))
588 vp9_ctx->cur.segmapid++;
589
590 for (i = 0; i < ARRAY_SIZE(ref_bufs); i++)
591 config_ref_registers(ctx, run, ref_bufs[i], &ref_regs[i]);
592
593 for (i = 0; i < 8; i++)
594 config_seg_registers(ctx, i);
595
596 writel_relaxed(RKVDEC_VP9_TX_MODE(vp9_ctx->cur.tx_mode) |
597 RKVDEC_VP9_FRAME_REF_MODE(dec_params->reference_mode),
598 rkvdec->regs + RKVDEC_VP9_CPRHEADER_CONFIG);
599
600 if (!intra_only) {
601 const struct v4l2_vp9_loop_filter *lf;
602 s8 delta;
603
604 if (vp9_ctx->last.valid)
605 lf = &vp9_ctx->last.lf;
606 else
607 lf = &vp9_ctx->cur.lf;
608
609 val = 0;
610 for (i = 0; i < ARRAY_SIZE(lf->ref_deltas); i++) {
611 delta = lf->ref_deltas[i];
612 val |= RKVDEC_REF_DELTAS_LASTFRAME(i, delta);
613 }
614
615 writel_relaxed(val,
616 rkvdec->regs + RKVDEC_VP9_REF_DELTAS_LASTFRAME);
617
618 for (i = 0; i < ARRAY_SIZE(lf->mode_deltas); i++) {
619 delta = lf->mode_deltas[i];
620 last_frame_info |= RKVDEC_MODE_DELTAS_LASTFRAME(i,
621 delta);
622 }
623 }
624
625 if (vp9_ctx->last.valid && !intra_only &&
626 vp9_ctx->last.seg.flags & V4L2_VP9_SEGMENTATION_FLAG_ENABLED)
627 last_frame_info |= RKVDEC_SEG_EN_LASTFRAME;
628
629 if (vp9_ctx->last.valid &&
630 vp9_ctx->last.flags & V4L2_VP9_FRAME_FLAG_SHOW_FRAME)
631 last_frame_info |= RKVDEC_LAST_SHOW_FRAME;
632
633 if (vp9_ctx->last.valid &&
634 vp9_ctx->last.flags &
635 (V4L2_VP9_FRAME_FLAG_KEY_FRAME | V4L2_VP9_FRAME_FLAG_INTRA_ONLY))
636 last_frame_info |= RKVDEC_LAST_INTRA_ONLY;
637
638 if (vp9_ctx->last.valid &&
639 last->vp9.width == dst->vp9.width &&
640 last->vp9.height == dst->vp9.height)
641 last_frame_info |= RKVDEC_LAST_WIDHHEIGHT_EQCUR;
642
643 writel_relaxed(last_frame_info,
644 rkvdec->regs + RKVDEC_VP9_INFO_LASTFRAME);
645
646 writel_relaxed(stream_len - dec_params->compressed_header_size -
647 dec_params->uncompressed_header_size,
648 rkvdec->regs + RKVDEC_VP9_LASTTILE_SIZE);
649
650 for (i = 0; !intra_only && i < ARRAY_SIZE(ref_bufs); i++) {
651 unsigned int refw = ref_bufs[i]->vp9.width;
652 unsigned int refh = ref_bufs[i]->vp9.height;
653 u32 hscale, vscale;
654
655 hscale = (refw << 14) / dst->vp9.width;
656 vscale = (refh << 14) / dst->vp9.height;
657 writel_relaxed(RKVDEC_VP9_REF_HOR_SCALE(hscale) |
658 RKVDEC_VP9_REF_VER_SCALE(vscale),
659 rkvdec->regs + RKVDEC_VP9_REF_SCALE(i));
660 }
661
662 addr = vb2_dma_contig_plane_dma_addr(&dst->base.vb.vb2_buf, 0);
663 writel_relaxed(addr, rkvdec->regs + RKVDEC_REG_DECOUT_BASE);
664 addr = vb2_dma_contig_plane_dma_addr(&run->base.bufs.src->vb2_buf, 0);
665 writel_relaxed(addr, rkvdec->regs + RKVDEC_REG_STRM_RLC_BASE);
666 writel_relaxed(vp9_ctx->priv_tbl.dma +
667 offsetof(struct rkvdec_vp9_priv_tbl, probs),
668 rkvdec->regs + RKVDEC_REG_CABACTBL_PROB_BASE);
669 writel_relaxed(vp9_ctx->count_tbl.dma,
670 rkvdec->regs + RKVDEC_REG_VP9COUNT_BASE);
671
672 writel_relaxed(vp9_ctx->priv_tbl.dma +
673 offsetof(struct rkvdec_vp9_priv_tbl, segmap) +
674 (RKVDEC_VP9_MAX_SEGMAP_SIZE * vp9_ctx->cur.segmapid),
675 rkvdec->regs + RKVDEC_REG_VP9_SEGIDCUR_BASE);
676 writel_relaxed(vp9_ctx->priv_tbl.dma +
677 offsetof(struct rkvdec_vp9_priv_tbl, segmap) +
678 (RKVDEC_VP9_MAX_SEGMAP_SIZE * (!vp9_ctx->cur.segmapid)),
679 rkvdec->regs + RKVDEC_REG_VP9_SEGIDLAST_BASE);
680
681 if (!intra_only &&
682 !(dec_params->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT) &&
683 vp9_ctx->last.valid)
684 mv_ref = last;
685 else
686 mv_ref = dst;
687
688 writel_relaxed(get_mv_base_addr(mv_ref),
689 rkvdec->regs + RKVDEC_VP9_REF_COLMV_BASE);
690
691 writel_relaxed(ctx->decoded_fmt.fmt.pix_mp.width |
692 (ctx->decoded_fmt.fmt.pix_mp.height << 16),
693 rkvdec->regs + RKVDEC_REG_PERFORMANCE_CYCLE);
694 }
695
validate_dec_params(struct rkvdec_ctx * ctx,const struct v4l2_ctrl_vp9_frame * dec_params)696 static int validate_dec_params(struct rkvdec_ctx *ctx,
697 const struct v4l2_ctrl_vp9_frame *dec_params)
698 {
699 unsigned int aligned_width, aligned_height;
700
701 /* We only support profile 0. */
702 if (dec_params->profile != 0) {
703 dev_err(ctx->dev->dev, "unsupported profile %d\n",
704 dec_params->profile);
705 return -EINVAL;
706 }
707
708 aligned_width = round_up(dec_params->frame_width_minus_1 + 1, 64);
709 aligned_height = round_up(dec_params->frame_height_minus_1 + 1, 64);
710
711 /*
712 * Userspace should update the capture/decoded format when the
713 * resolution changes.
714 */
715 if (aligned_width != ctx->decoded_fmt.fmt.pix_mp.width ||
716 aligned_height != ctx->decoded_fmt.fmt.pix_mp.height) {
717 dev_err(ctx->dev->dev,
718 "unexpected bitstream resolution %dx%d\n",
719 dec_params->frame_width_minus_1 + 1,
720 dec_params->frame_height_minus_1 + 1);
721 return -EINVAL;
722 }
723
724 return 0;
725 }
726
rkvdec_vp9_run_preamble(struct rkvdec_ctx * ctx,struct rkvdec_vp9_run * run)727 static int rkvdec_vp9_run_preamble(struct rkvdec_ctx *ctx,
728 struct rkvdec_vp9_run *run)
729 {
730 const struct v4l2_ctrl_vp9_frame *dec_params;
731 const struct v4l2_ctrl_vp9_compressed_hdr *prob_updates;
732 struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv;
733 struct v4l2_ctrl *ctrl;
734 unsigned int fctx_idx;
735 int ret;
736
737 /* v4l2-specific stuff */
738 rkvdec_run_preamble(ctx, &run->base);
739
740 ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl,
741 V4L2_CID_STATELESS_VP9_FRAME);
742 if (WARN_ON(!ctrl))
743 return -EINVAL;
744 dec_params = ctrl->p_cur.p;
745
746 ret = validate_dec_params(ctx, dec_params);
747 if (ret)
748 return ret;
749
750 run->decode_params = dec_params;
751
752 ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_COMPRESSED_HDR);
753 if (WARN_ON(!ctrl))
754 return -EINVAL;
755 prob_updates = ctrl->p_cur.p;
756 vp9_ctx->cur.tx_mode = prob_updates->tx_mode;
757
758 /*
759 * vp9 stuff
760 *
761 * by this point the userspace has done all parts of 6.2 uncompressed_header()
762 * except this fragment:
763 * if ( FrameIsIntra || error_resilient_mode ) {
764 * setup_past_independence ( )
765 * if ( frame_type == KEY_FRAME || error_resilient_mode == 1 ||
766 * reset_frame_context == 3 ) {
767 * for ( i = 0; i < 4; i ++ ) {
768 * save_probs( i )
769 * }
770 * } else if ( reset_frame_context == 2 ) {
771 * save_probs( frame_context_idx )
772 * }
773 * frame_context_idx = 0
774 * }
775 */
776 fctx_idx = v4l2_vp9_reset_frame_ctx(dec_params, vp9_ctx->frame_context);
777 vp9_ctx->cur.frame_context_idx = fctx_idx;
778
779 /* 6.1 frame(sz): load_probs() and load_probs2() */
780 vp9_ctx->probability_tables = vp9_ctx->frame_context[fctx_idx];
781
782 /*
783 * The userspace has also performed 6.3 compressed_header(), but handling the
784 * probs in a special way. All probs which need updating, except MV-related,
785 * have been read from the bitstream and translated through inv_map_table[],
786 * but no 6.3.6 inv_recenter_nonneg(v, m) has been performed. The values passed
787 * by userspace are either translated values (there are no 0 values in
788 * inv_map_table[]), or zero to indicate no update. All MV-related probs which need
789 * updating have been read from the bitstream and (mv_prob << 1) | 1 has been
790 * performed. The values passed by userspace are either new values
791 * to replace old ones (the above mentioned shift and bitwise or never result in
792 * a zero) or zero to indicate no update.
793 * fw_update_probs() performs actual probs updates or leaves probs as-is
794 * for values for which a zero was passed from userspace.
795 */
796 v4l2_vp9_fw_update_probs(&vp9_ctx->probability_tables, prob_updates, dec_params);
797
798 return 0;
799 }
800
rkvdec_vp9_run(struct rkvdec_ctx * ctx)801 static int rkvdec_vp9_run(struct rkvdec_ctx *ctx)
802 {
803 struct rkvdec_dev *rkvdec = ctx->dev;
804 struct rkvdec_vp9_run run = { };
805 int ret;
806
807 ret = rkvdec_vp9_run_preamble(ctx, &run);
808 if (ret) {
809 rkvdec_run_postamble(ctx, &run.base);
810 return ret;
811 }
812
813 /* Prepare probs. */
814 init_probs(ctx, &run);
815
816 /* Configure hardware registers. */
817 config_registers(ctx, &run);
818
819 rkvdec_run_postamble(ctx, &run.base);
820
821 schedule_delayed_work(&rkvdec->watchdog_work, msecs_to_jiffies(2000));
822
823 writel(1, rkvdec->regs + RKVDEC_REG_PREF_LUMA_CACHE_COMMAND);
824 writel(1, rkvdec->regs + RKVDEC_REG_PREF_CHR_CACHE_COMMAND);
825
826 writel(0xe, rkvdec->regs + RKVDEC_REG_STRMD_ERR_EN);
827 /* Start decoding! */
828 writel(RKVDEC_INTERRUPT_DEC_E | RKVDEC_CONFIG_DEC_CLK_GATE_E |
829 RKVDEC_TIMEOUT_E | RKVDEC_BUF_EMPTY_E,
830 rkvdec->regs + RKVDEC_REG_INTERRUPT);
831
832 return 0;
833 }
834
835 #define copy_tx_and_skip(p1, p2) \
836 do { \
837 memcpy((p1)->tx8, (p2)->tx8, sizeof((p1)->tx8)); \
838 memcpy((p1)->tx16, (p2)->tx16, sizeof((p1)->tx16)); \
839 memcpy((p1)->tx32, (p2)->tx32, sizeof((p1)->tx32)); \
840 memcpy((p1)->skip, (p2)->skip, sizeof((p1)->skip)); \
841 } while (0)
842
rkvdec_vp9_done(struct rkvdec_ctx * ctx,struct vb2_v4l2_buffer * src_buf,struct vb2_v4l2_buffer * dst_buf,enum vb2_buffer_state result)843 static void rkvdec_vp9_done(struct rkvdec_ctx *ctx,
844 struct vb2_v4l2_buffer *src_buf,
845 struct vb2_v4l2_buffer *dst_buf,
846 enum vb2_buffer_state result)
847 {
848 struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv;
849 unsigned int fctx_idx;
850
851 /* v4l2-specific stuff */
852 if (result == VB2_BUF_STATE_ERROR)
853 goto out_update_last;
854
855 /*
856 * vp9 stuff
857 *
858 * 6.1.2 refresh_probs()
859 *
860 * In the spec a complementary condition goes last in 6.1.2 refresh_probs(),
861 * but it makes no sense to perform all the activities from the first "if"
862 * there if we actually are not refreshing the frame context. On top of that,
863 * because of 6.2 uncompressed_header() whenever error_resilient_mode == 1,
864 * refresh_frame_context == 0. Consequently, if we don't jump to out_update_last
865 * it means error_resilient_mode must be 0.
866 */
867 if (!(vp9_ctx->cur.flags & V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX))
868 goto out_update_last;
869
870 fctx_idx = vp9_ctx->cur.frame_context_idx;
871
872 if (!(vp9_ctx->cur.flags & V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE)) {
873 /* error_resilient_mode == 0 && frame_parallel_decoding_mode == 0 */
874 struct v4l2_vp9_frame_context *probs = &vp9_ctx->probability_tables;
875 bool frame_is_intra = vp9_ctx->cur.flags &
876 (V4L2_VP9_FRAME_FLAG_KEY_FRAME | V4L2_VP9_FRAME_FLAG_INTRA_ONLY);
877 struct tx_and_skip {
878 u8 tx8[2][1];
879 u8 tx16[2][2];
880 u8 tx32[2][3];
881 u8 skip[3];
882 } _tx_skip, *tx_skip = &_tx_skip;
883 struct v4l2_vp9_frame_symbol_counts *counts;
884
885 /* buffer the forward-updated TX and skip probs */
886 if (frame_is_intra)
887 copy_tx_and_skip(tx_skip, probs);
888
889 /* 6.1.2 refresh_probs(): load_probs() and load_probs2() */
890 *probs = vp9_ctx->frame_context[fctx_idx];
891
892 /* if FrameIsIntra then undo the effect of load_probs2() */
893 if (frame_is_intra)
894 copy_tx_and_skip(probs, tx_skip);
895
896 counts = frame_is_intra ? &vp9_ctx->intra_cnts : &vp9_ctx->inter_cnts;
897 v4l2_vp9_adapt_coef_probs(probs, counts,
898 !vp9_ctx->last.valid ||
899 vp9_ctx->last.flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME,
900 frame_is_intra);
901 if (!frame_is_intra) {
902 const struct rkvdec_vp9_inter_frame_symbol_counts *inter_cnts;
903 u32 classes[2][11];
904 int i;
905
906 inter_cnts = vp9_ctx->count_tbl.cpu;
907 for (i = 0; i < ARRAY_SIZE(classes); ++i)
908 memcpy(classes[i], inter_cnts->classes[i], sizeof(classes[0]));
909 counts->classes = &classes;
910
911 /* load_probs2() already done */
912 v4l2_vp9_adapt_noncoef_probs(&vp9_ctx->probability_tables, counts,
913 vp9_ctx->cur.reference_mode,
914 vp9_ctx->cur.interpolation_filter,
915 vp9_ctx->cur.tx_mode, vp9_ctx->cur.flags);
916 }
917 }
918
919 /* 6.1.2 refresh_probs(): save_probs(fctx_idx) */
920 vp9_ctx->frame_context[fctx_idx] = vp9_ctx->probability_tables;
921
922 out_update_last:
923 update_ctx_last_info(vp9_ctx);
924 }
925
rkvdec_init_v4l2_vp9_count_tbl(struct rkvdec_ctx * ctx)926 static void rkvdec_init_v4l2_vp9_count_tbl(struct rkvdec_ctx *ctx)
927 {
928 struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv;
929 struct rkvdec_vp9_intra_frame_symbol_counts *intra_cnts = vp9_ctx->count_tbl.cpu;
930 struct rkvdec_vp9_inter_frame_symbol_counts *inter_cnts = vp9_ctx->count_tbl.cpu;
931 int i, j, k, l, m;
932
933 vp9_ctx->inter_cnts.partition = &inter_cnts->partition;
934 vp9_ctx->inter_cnts.skip = &inter_cnts->skip;
935 vp9_ctx->inter_cnts.intra_inter = &inter_cnts->inter;
936 vp9_ctx->inter_cnts.tx32p = &inter_cnts->tx32p;
937 vp9_ctx->inter_cnts.tx16p = &inter_cnts->tx16p;
938 vp9_ctx->inter_cnts.tx8p = &inter_cnts->tx8p;
939
940 vp9_ctx->intra_cnts.partition = (u32 (*)[16][4])(&intra_cnts->partition);
941 vp9_ctx->intra_cnts.skip = &intra_cnts->skip;
942 vp9_ctx->intra_cnts.intra_inter = &intra_cnts->intra;
943 vp9_ctx->intra_cnts.tx32p = &intra_cnts->tx32p;
944 vp9_ctx->intra_cnts.tx16p = &intra_cnts->tx16p;
945 vp9_ctx->intra_cnts.tx8p = &intra_cnts->tx8p;
946
947 vp9_ctx->inter_cnts.y_mode = &inter_cnts->y_mode;
948 vp9_ctx->inter_cnts.uv_mode = &inter_cnts->uv_mode;
949 vp9_ctx->inter_cnts.comp = &inter_cnts->comp;
950 vp9_ctx->inter_cnts.comp_ref = &inter_cnts->comp_ref;
951 vp9_ctx->inter_cnts.single_ref = &inter_cnts->single_ref;
952 vp9_ctx->inter_cnts.mv_mode = &inter_cnts->mv_mode;
953 vp9_ctx->inter_cnts.filter = &inter_cnts->filter;
954 vp9_ctx->inter_cnts.mv_joint = &inter_cnts->mv_joint;
955 vp9_ctx->inter_cnts.sign = &inter_cnts->sign;
956 /*
957 * rk hardware actually uses "u32 classes[2][11 + 1];"
958 * instead of "u32 classes[2][11];", so this must be explicitly
959 * copied into vp9_ctx->classes when passing the data to the
960 * vp9 library function
961 */
962 vp9_ctx->inter_cnts.class0 = &inter_cnts->class0;
963 vp9_ctx->inter_cnts.bits = &inter_cnts->bits;
964 vp9_ctx->inter_cnts.class0_fp = &inter_cnts->class0_fp;
965 vp9_ctx->inter_cnts.fp = &inter_cnts->fp;
966 vp9_ctx->inter_cnts.class0_hp = &inter_cnts->class0_hp;
967 vp9_ctx->inter_cnts.hp = &inter_cnts->hp;
968
969 #define INNERMOST_LOOP \
970 do { \
971 for (m = 0; m < ARRAY_SIZE(vp9_ctx->inter_cnts.coeff[0][0][0][0]); ++m) {\
972 vp9_ctx->inter_cnts.coeff[i][j][k][l][m] = \
973 &inter_cnts->ref_cnt[k][i][j][l][m].coeff; \
974 vp9_ctx->inter_cnts.eob[i][j][k][l][m][0] = \
975 &inter_cnts->ref_cnt[k][i][j][l][m].eob[0]; \
976 vp9_ctx->inter_cnts.eob[i][j][k][l][m][1] = \
977 &inter_cnts->ref_cnt[k][i][j][l][m].eob[1]; \
978 \
979 vp9_ctx->intra_cnts.coeff[i][j][k][l][m] = \
980 &intra_cnts->ref_cnt[k][i][j][l][m].coeff; \
981 vp9_ctx->intra_cnts.eob[i][j][k][l][m][0] = \
982 &intra_cnts->ref_cnt[k][i][j][l][m].eob[0]; \
983 vp9_ctx->intra_cnts.eob[i][j][k][l][m][1] = \
984 &intra_cnts->ref_cnt[k][i][j][l][m].eob[1]; \
985 } \
986 } while (0)
987
988 for (i = 0; i < ARRAY_SIZE(vp9_ctx->inter_cnts.coeff); ++i)
989 for (j = 0; j < ARRAY_SIZE(vp9_ctx->inter_cnts.coeff[0]); ++j)
990 for (k = 0; k < ARRAY_SIZE(vp9_ctx->inter_cnts.coeff[0][0]); ++k)
991 for (l = 0; l < ARRAY_SIZE(vp9_ctx->inter_cnts.coeff[0][0][0]); ++l)
992 INNERMOST_LOOP;
993 #undef INNERMOST_LOOP
994 }
995
rkvdec_vp9_start(struct rkvdec_ctx * ctx)996 static int rkvdec_vp9_start(struct rkvdec_ctx *ctx)
997 {
998 struct rkvdec_dev *rkvdec = ctx->dev;
999 struct rkvdec_vp9_priv_tbl *priv_tbl;
1000 struct rkvdec_vp9_ctx *vp9_ctx;
1001 unsigned char *count_tbl;
1002 int ret;
1003
1004 vp9_ctx = kzalloc(sizeof(*vp9_ctx), GFP_KERNEL);
1005 if (!vp9_ctx)
1006 return -ENOMEM;
1007
1008 ctx->priv = vp9_ctx;
1009
1010 BUILD_BUG_ON(sizeof(priv_tbl->probs) % 16); /* ensure probs size is 128-bit aligned */
1011 priv_tbl = dma_alloc_coherent(rkvdec->dev, sizeof(*priv_tbl),
1012 &vp9_ctx->priv_tbl.dma, GFP_KERNEL);
1013 if (!priv_tbl) {
1014 ret = -ENOMEM;
1015 goto err_free_ctx;
1016 }
1017
1018 vp9_ctx->priv_tbl.size = sizeof(*priv_tbl);
1019 vp9_ctx->priv_tbl.cpu = priv_tbl;
1020
1021 count_tbl = dma_alloc_coherent(rkvdec->dev, RKVDEC_VP9_COUNT_SIZE,
1022 &vp9_ctx->count_tbl.dma, GFP_KERNEL);
1023 if (!count_tbl) {
1024 ret = -ENOMEM;
1025 goto err_free_priv_tbl;
1026 }
1027
1028 vp9_ctx->count_tbl.size = RKVDEC_VP9_COUNT_SIZE;
1029 vp9_ctx->count_tbl.cpu = count_tbl;
1030 rkvdec_init_v4l2_vp9_count_tbl(ctx);
1031
1032 return 0;
1033
1034 err_free_priv_tbl:
1035 dma_free_coherent(rkvdec->dev, vp9_ctx->priv_tbl.size,
1036 vp9_ctx->priv_tbl.cpu, vp9_ctx->priv_tbl.dma);
1037
1038 err_free_ctx:
1039 kfree(vp9_ctx);
1040 return ret;
1041 }
1042
rkvdec_vp9_stop(struct rkvdec_ctx * ctx)1043 static void rkvdec_vp9_stop(struct rkvdec_ctx *ctx)
1044 {
1045 struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv;
1046 struct rkvdec_dev *rkvdec = ctx->dev;
1047
1048 dma_free_coherent(rkvdec->dev, vp9_ctx->count_tbl.size,
1049 vp9_ctx->count_tbl.cpu, vp9_ctx->count_tbl.dma);
1050 dma_free_coherent(rkvdec->dev, vp9_ctx->priv_tbl.size,
1051 vp9_ctx->priv_tbl.cpu, vp9_ctx->priv_tbl.dma);
1052 kfree(vp9_ctx);
1053 }
1054
rkvdec_vp9_adjust_fmt(struct rkvdec_ctx * ctx,struct v4l2_format * f)1055 static int rkvdec_vp9_adjust_fmt(struct rkvdec_ctx *ctx,
1056 struct v4l2_format *f)
1057 {
1058 struct v4l2_pix_format_mplane *fmt = &f->fmt.pix_mp;
1059
1060 fmt->num_planes = 1;
1061 if (!fmt->plane_fmt[0].sizeimage)
1062 fmt->plane_fmt[0].sizeimage = fmt->width * fmt->height * 2;
1063 return 0;
1064 }
1065
1066 const struct rkvdec_coded_fmt_ops rkvdec_vp9_fmt_ops = {
1067 .adjust_fmt = rkvdec_vp9_adjust_fmt,
1068 .start = rkvdec_vp9_start,
1069 .stop = rkvdec_vp9_stop,
1070 .run = rkvdec_vp9_run,
1071 .done = rkvdec_vp9_done,
1072 };
1073