1*fbb6c848SEzequiel Garcia // SPDX-License-Identifier: GPL-2.0
2*fbb6c848SEzequiel Garcia /*
3*fbb6c848SEzequiel Garcia * Hantro VP9 codec driver
4*fbb6c848SEzequiel Garcia *
5*fbb6c848SEzequiel Garcia * Copyright (C) 2021 Collabora Ltd.
6*fbb6c848SEzequiel Garcia */
7*fbb6c848SEzequiel Garcia
8*fbb6c848SEzequiel Garcia #include <linux/types.h>
9*fbb6c848SEzequiel Garcia #include <media/v4l2-mem2mem.h>
10*fbb6c848SEzequiel Garcia
11*fbb6c848SEzequiel Garcia #include "hantro.h"
12*fbb6c848SEzequiel Garcia #include "hantro_hw.h"
13*fbb6c848SEzequiel Garcia #include "hantro_vp9.h"
14*fbb6c848SEzequiel Garcia
15*fbb6c848SEzequiel Garcia #define POW2(x) (1 << (x))
16*fbb6c848SEzequiel Garcia
17*fbb6c848SEzequiel Garcia #define MAX_LOG2_TILE_COLUMNS 6
18*fbb6c848SEzequiel Garcia #define MAX_NUM_TILE_COLS POW2(MAX_LOG2_TILE_COLUMNS)
19*fbb6c848SEzequiel Garcia #define MAX_TILE_COLS 20
20*fbb6c848SEzequiel Garcia #define MAX_TILE_ROWS 22
21*fbb6c848SEzequiel Garcia
hantro_vp9_tile_filter_size(unsigned int height)22*fbb6c848SEzequiel Garcia static size_t hantro_vp9_tile_filter_size(unsigned int height)
23*fbb6c848SEzequiel Garcia {
24*fbb6c848SEzequiel Garcia u32 h, height32, size;
25*fbb6c848SEzequiel Garcia
26*fbb6c848SEzequiel Garcia h = roundup(height, 8);
27*fbb6c848SEzequiel Garcia
28*fbb6c848SEzequiel Garcia height32 = roundup(h, 64);
29*fbb6c848SEzequiel Garcia size = 24 * height32 * (MAX_NUM_TILE_COLS - 1); /* luma: 8, chroma: 8 + 8 */
30*fbb6c848SEzequiel Garcia
31*fbb6c848SEzequiel Garcia return size;
32*fbb6c848SEzequiel Garcia }
33*fbb6c848SEzequiel Garcia
hantro_vp9_bsd_control_size(unsigned int height)34*fbb6c848SEzequiel Garcia static size_t hantro_vp9_bsd_control_size(unsigned int height)
35*fbb6c848SEzequiel Garcia {
36*fbb6c848SEzequiel Garcia u32 h, height32;
37*fbb6c848SEzequiel Garcia
38*fbb6c848SEzequiel Garcia h = roundup(height, 8);
39*fbb6c848SEzequiel Garcia height32 = roundup(h, 64);
40*fbb6c848SEzequiel Garcia
41*fbb6c848SEzequiel Garcia return 16 * (height32 / 4) * (MAX_NUM_TILE_COLS - 1);
42*fbb6c848SEzequiel Garcia }
43*fbb6c848SEzequiel Garcia
hantro_vp9_segment_map_size(unsigned int width,unsigned int height)44*fbb6c848SEzequiel Garcia static size_t hantro_vp9_segment_map_size(unsigned int width, unsigned int height)
45*fbb6c848SEzequiel Garcia {
46*fbb6c848SEzequiel Garcia u32 w, h;
47*fbb6c848SEzequiel Garcia int num_ctbs;
48*fbb6c848SEzequiel Garcia
49*fbb6c848SEzequiel Garcia w = roundup(width, 8);
50*fbb6c848SEzequiel Garcia h = roundup(height, 8);
51*fbb6c848SEzequiel Garcia num_ctbs = ((w + 63) / 64) * ((h + 63) / 64);
52*fbb6c848SEzequiel Garcia
53*fbb6c848SEzequiel Garcia return num_ctbs * 32;
54*fbb6c848SEzequiel Garcia }
55*fbb6c848SEzequiel Garcia
hantro_vp9_prob_tab_size(void)56*fbb6c848SEzequiel Garcia static inline size_t hantro_vp9_prob_tab_size(void)
57*fbb6c848SEzequiel Garcia {
58*fbb6c848SEzequiel Garcia return roundup(sizeof(struct hantro_g2_all_probs), 16);
59*fbb6c848SEzequiel Garcia }
60*fbb6c848SEzequiel Garcia
hantro_vp9_count_tab_size(void)61*fbb6c848SEzequiel Garcia static inline size_t hantro_vp9_count_tab_size(void)
62*fbb6c848SEzequiel Garcia {
63*fbb6c848SEzequiel Garcia return roundup(sizeof(struct symbol_counts), 16);
64*fbb6c848SEzequiel Garcia }
65*fbb6c848SEzequiel Garcia
hantro_vp9_tile_info_size(void)66*fbb6c848SEzequiel Garcia static inline size_t hantro_vp9_tile_info_size(void)
67*fbb6c848SEzequiel Garcia {
68*fbb6c848SEzequiel Garcia return roundup((MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 15 + 16) & ~0xf, 16);
69*fbb6c848SEzequiel Garcia }
70*fbb6c848SEzequiel Garcia
get_coeffs_arr(struct symbol_counts * cnts,int i,int j,int k,int l,int m)71*fbb6c848SEzequiel Garcia static void *get_coeffs_arr(struct symbol_counts *cnts, int i, int j, int k, int l, int m)
72*fbb6c848SEzequiel Garcia {
73*fbb6c848SEzequiel Garcia if (i == 0)
74*fbb6c848SEzequiel Garcia return &cnts->count_coeffs[j][k][l][m];
75*fbb6c848SEzequiel Garcia
76*fbb6c848SEzequiel Garcia if (i == 1)
77*fbb6c848SEzequiel Garcia return &cnts->count_coeffs8x8[j][k][l][m];
78*fbb6c848SEzequiel Garcia
79*fbb6c848SEzequiel Garcia if (i == 2)
80*fbb6c848SEzequiel Garcia return &cnts->count_coeffs16x16[j][k][l][m];
81*fbb6c848SEzequiel Garcia
82*fbb6c848SEzequiel Garcia if (i == 3)
83*fbb6c848SEzequiel Garcia return &cnts->count_coeffs32x32[j][k][l][m];
84*fbb6c848SEzequiel Garcia
85*fbb6c848SEzequiel Garcia return NULL;
86*fbb6c848SEzequiel Garcia }
87*fbb6c848SEzequiel Garcia
get_eobs1(struct symbol_counts * cnts,int i,int j,int k,int l,int m)88*fbb6c848SEzequiel Garcia static void *get_eobs1(struct symbol_counts *cnts, int i, int j, int k, int l, int m)
89*fbb6c848SEzequiel Garcia {
90*fbb6c848SEzequiel Garcia if (i == 0)
91*fbb6c848SEzequiel Garcia return &cnts->count_coeffs[j][k][l][m][3];
92*fbb6c848SEzequiel Garcia
93*fbb6c848SEzequiel Garcia if (i == 1)
94*fbb6c848SEzequiel Garcia return &cnts->count_coeffs8x8[j][k][l][m][3];
95*fbb6c848SEzequiel Garcia
96*fbb6c848SEzequiel Garcia if (i == 2)
97*fbb6c848SEzequiel Garcia return &cnts->count_coeffs16x16[j][k][l][m][3];
98*fbb6c848SEzequiel Garcia
99*fbb6c848SEzequiel Garcia if (i == 3)
100*fbb6c848SEzequiel Garcia return &cnts->count_coeffs32x32[j][k][l][m][3];
101*fbb6c848SEzequiel Garcia
102*fbb6c848SEzequiel Garcia return NULL;
103*fbb6c848SEzequiel Garcia }
104*fbb6c848SEzequiel Garcia
105*fbb6c848SEzequiel Garcia #define INNER_LOOP \
106*fbb6c848SEzequiel Garcia do { \
107*fbb6c848SEzequiel Garcia for (m = 0; m < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0][0]); ++m) { \
108*fbb6c848SEzequiel Garcia vp9_ctx->cnts.coeff[i][j][k][l][m] = \
109*fbb6c848SEzequiel Garcia get_coeffs_arr(cnts, i, j, k, l, m); \
110*fbb6c848SEzequiel Garcia vp9_ctx->cnts.eob[i][j][k][l][m][0] = \
111*fbb6c848SEzequiel Garcia &cnts->count_eobs[i][j][k][l][m]; \
112*fbb6c848SEzequiel Garcia vp9_ctx->cnts.eob[i][j][k][l][m][1] = \
113*fbb6c848SEzequiel Garcia get_eobs1(cnts, i, j, k, l, m); \
114*fbb6c848SEzequiel Garcia } \
115*fbb6c848SEzequiel Garcia } while (0)
116*fbb6c848SEzequiel Garcia
init_v4l2_vp9_count_tbl(struct hantro_ctx * ctx)117*fbb6c848SEzequiel Garcia static void init_v4l2_vp9_count_tbl(struct hantro_ctx *ctx)
118*fbb6c848SEzequiel Garcia {
119*fbb6c848SEzequiel Garcia struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
120*fbb6c848SEzequiel Garcia struct symbol_counts *cnts = vp9_ctx->misc.cpu + vp9_ctx->ctx_counters_offset;
121*fbb6c848SEzequiel Garcia int i, j, k, l, m;
122*fbb6c848SEzequiel Garcia
123*fbb6c848SEzequiel Garcia vp9_ctx->cnts.partition = &cnts->partition_counts;
124*fbb6c848SEzequiel Garcia vp9_ctx->cnts.skip = &cnts->mbskip_count;
125*fbb6c848SEzequiel Garcia vp9_ctx->cnts.intra_inter = &cnts->intra_inter_count;
126*fbb6c848SEzequiel Garcia vp9_ctx->cnts.tx32p = &cnts->tx32x32_count;
127*fbb6c848SEzequiel Garcia /*
128*fbb6c848SEzequiel Garcia * g2 hardware uses tx16x16_count[2][3], while the api
129*fbb6c848SEzequiel Garcia * expects tx16p[2][4], so this must be explicitly copied
130*fbb6c848SEzequiel Garcia * into vp9_ctx->cnts.tx16p when passing the data to the
131*fbb6c848SEzequiel Garcia * vp9 library function
132*fbb6c848SEzequiel Garcia */
133*fbb6c848SEzequiel Garcia vp9_ctx->cnts.tx8p = &cnts->tx8x8_count;
134*fbb6c848SEzequiel Garcia
135*fbb6c848SEzequiel Garcia vp9_ctx->cnts.y_mode = &cnts->sb_ymode_counts;
136*fbb6c848SEzequiel Garcia vp9_ctx->cnts.uv_mode = &cnts->uv_mode_counts;
137*fbb6c848SEzequiel Garcia vp9_ctx->cnts.comp = &cnts->comp_inter_count;
138*fbb6c848SEzequiel Garcia vp9_ctx->cnts.comp_ref = &cnts->comp_ref_count;
139*fbb6c848SEzequiel Garcia vp9_ctx->cnts.single_ref = &cnts->single_ref_count;
140*fbb6c848SEzequiel Garcia vp9_ctx->cnts.filter = &cnts->switchable_interp_counts;
141*fbb6c848SEzequiel Garcia vp9_ctx->cnts.mv_joint = &cnts->mv_counts.joints;
142*fbb6c848SEzequiel Garcia vp9_ctx->cnts.sign = &cnts->mv_counts.sign;
143*fbb6c848SEzequiel Garcia vp9_ctx->cnts.classes = &cnts->mv_counts.classes;
144*fbb6c848SEzequiel Garcia vp9_ctx->cnts.class0 = &cnts->mv_counts.class0;
145*fbb6c848SEzequiel Garcia vp9_ctx->cnts.bits = &cnts->mv_counts.bits;
146*fbb6c848SEzequiel Garcia vp9_ctx->cnts.class0_fp = &cnts->mv_counts.class0_fp;
147*fbb6c848SEzequiel Garcia vp9_ctx->cnts.fp = &cnts->mv_counts.fp;
148*fbb6c848SEzequiel Garcia vp9_ctx->cnts.class0_hp = &cnts->mv_counts.class0_hp;
149*fbb6c848SEzequiel Garcia vp9_ctx->cnts.hp = &cnts->mv_counts.hp;
150*fbb6c848SEzequiel Garcia
151*fbb6c848SEzequiel Garcia for (i = 0; i < ARRAY_SIZE(vp9_ctx->cnts.coeff); ++i)
152*fbb6c848SEzequiel Garcia for (j = 0; j < ARRAY_SIZE(vp9_ctx->cnts.coeff[i]); ++j)
153*fbb6c848SEzequiel Garcia for (k = 0; k < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0]); ++k)
154*fbb6c848SEzequiel Garcia for (l = 0; l < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0]); ++l)
155*fbb6c848SEzequiel Garcia INNER_LOOP;
156*fbb6c848SEzequiel Garcia }
157*fbb6c848SEzequiel Garcia
hantro_vp9_dec_init(struct hantro_ctx * ctx)158*fbb6c848SEzequiel Garcia int hantro_vp9_dec_init(struct hantro_ctx *ctx)
159*fbb6c848SEzequiel Garcia {
160*fbb6c848SEzequiel Garcia struct hantro_dev *vpu = ctx->dev;
161*fbb6c848SEzequiel Garcia const struct hantro_variant *variant = vpu->variant;
162*fbb6c848SEzequiel Garcia struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec;
163*fbb6c848SEzequiel Garcia struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge;
164*fbb6c848SEzequiel Garcia struct hantro_aux_buf *segment_map = &vp9_dec->segment_map;
165*fbb6c848SEzequiel Garcia struct hantro_aux_buf *misc = &vp9_dec->misc;
166*fbb6c848SEzequiel Garcia u32 i, max_width, max_height, size;
167*fbb6c848SEzequiel Garcia
168*fbb6c848SEzequiel Garcia if (variant->num_dec_fmts < 1)
169*fbb6c848SEzequiel Garcia return -EINVAL;
170*fbb6c848SEzequiel Garcia
171*fbb6c848SEzequiel Garcia for (i = 0; i < variant->num_dec_fmts; ++i)
172*fbb6c848SEzequiel Garcia if (variant->dec_fmts[i].fourcc == V4L2_PIX_FMT_VP9_FRAME)
173*fbb6c848SEzequiel Garcia break;
174*fbb6c848SEzequiel Garcia
175*fbb6c848SEzequiel Garcia if (i == variant->num_dec_fmts)
176*fbb6c848SEzequiel Garcia return -EINVAL;
177*fbb6c848SEzequiel Garcia
178*fbb6c848SEzequiel Garcia max_width = vpu->variant->dec_fmts[i].frmsize.max_width;
179*fbb6c848SEzequiel Garcia max_height = vpu->variant->dec_fmts[i].frmsize.max_height;
180*fbb6c848SEzequiel Garcia
181*fbb6c848SEzequiel Garcia size = hantro_vp9_tile_filter_size(max_height);
182*fbb6c848SEzequiel Garcia vp9_dec->bsd_ctrl_offset = size;
183*fbb6c848SEzequiel Garcia size += hantro_vp9_bsd_control_size(max_height);
184*fbb6c848SEzequiel Garcia
185*fbb6c848SEzequiel Garcia tile_edge->cpu = dma_alloc_coherent(vpu->dev, size, &tile_edge->dma, GFP_KERNEL);
186*fbb6c848SEzequiel Garcia if (!tile_edge->cpu)
187*fbb6c848SEzequiel Garcia return -ENOMEM;
188*fbb6c848SEzequiel Garcia
189*fbb6c848SEzequiel Garcia tile_edge->size = size;
190*fbb6c848SEzequiel Garcia memset(tile_edge->cpu, 0, size);
191*fbb6c848SEzequiel Garcia
192*fbb6c848SEzequiel Garcia size = hantro_vp9_segment_map_size(max_width, max_height);
193*fbb6c848SEzequiel Garcia vp9_dec->segment_map_size = size;
194*fbb6c848SEzequiel Garcia size *= 2; /* we need two areas of this size, used alternately */
195*fbb6c848SEzequiel Garcia
196*fbb6c848SEzequiel Garcia segment_map->cpu = dma_alloc_coherent(vpu->dev, size, &segment_map->dma, GFP_KERNEL);
197*fbb6c848SEzequiel Garcia if (!segment_map->cpu)
198*fbb6c848SEzequiel Garcia goto err_segment_map;
199*fbb6c848SEzequiel Garcia
200*fbb6c848SEzequiel Garcia segment_map->size = size;
201*fbb6c848SEzequiel Garcia memset(segment_map->cpu, 0, size);
202*fbb6c848SEzequiel Garcia
203*fbb6c848SEzequiel Garcia size = hantro_vp9_prob_tab_size();
204*fbb6c848SEzequiel Garcia vp9_dec->ctx_counters_offset = size;
205*fbb6c848SEzequiel Garcia size += hantro_vp9_count_tab_size();
206*fbb6c848SEzequiel Garcia vp9_dec->tile_info_offset = size;
207*fbb6c848SEzequiel Garcia size += hantro_vp9_tile_info_size();
208*fbb6c848SEzequiel Garcia
209*fbb6c848SEzequiel Garcia misc->cpu = dma_alloc_coherent(vpu->dev, size, &misc->dma, GFP_KERNEL);
210*fbb6c848SEzequiel Garcia if (!misc->cpu)
211*fbb6c848SEzequiel Garcia goto err_misc;
212*fbb6c848SEzequiel Garcia
213*fbb6c848SEzequiel Garcia misc->size = size;
214*fbb6c848SEzequiel Garcia memset(misc->cpu, 0, size);
215*fbb6c848SEzequiel Garcia
216*fbb6c848SEzequiel Garcia init_v4l2_vp9_count_tbl(ctx);
217*fbb6c848SEzequiel Garcia
218*fbb6c848SEzequiel Garcia return 0;
219*fbb6c848SEzequiel Garcia
220*fbb6c848SEzequiel Garcia err_misc:
221*fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma);
222*fbb6c848SEzequiel Garcia
223*fbb6c848SEzequiel Garcia err_segment_map:
224*fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma);
225*fbb6c848SEzequiel Garcia
226*fbb6c848SEzequiel Garcia return -ENOMEM;
227*fbb6c848SEzequiel Garcia }
228*fbb6c848SEzequiel Garcia
hantro_vp9_dec_exit(struct hantro_ctx * ctx)229*fbb6c848SEzequiel Garcia void hantro_vp9_dec_exit(struct hantro_ctx *ctx)
230*fbb6c848SEzequiel Garcia {
231*fbb6c848SEzequiel Garcia struct hantro_dev *vpu = ctx->dev;
232*fbb6c848SEzequiel Garcia struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec;
233*fbb6c848SEzequiel Garcia struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge;
234*fbb6c848SEzequiel Garcia struct hantro_aux_buf *segment_map = &vp9_dec->segment_map;
235*fbb6c848SEzequiel Garcia struct hantro_aux_buf *misc = &vp9_dec->misc;
236*fbb6c848SEzequiel Garcia
237*fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, misc->size, misc->cpu, misc->dma);
238*fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma);
239*fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma);
240*fbb6c848SEzequiel Garcia }
241