1fbb6c848SEzequiel Garcia // SPDX-License-Identifier: GPL-2.0
2fbb6c848SEzequiel Garcia /*
3fbb6c848SEzequiel Garcia * Hantro VPU HEVC codec driver
4fbb6c848SEzequiel Garcia *
5fbb6c848SEzequiel Garcia * Copyright (C) 2020 Safran Passenger Innovations LLC
6fbb6c848SEzequiel Garcia */
7fbb6c848SEzequiel Garcia
8fbb6c848SEzequiel Garcia #include <linux/types.h>
9fbb6c848SEzequiel Garcia #include <media/v4l2-mem2mem.h>
10fbb6c848SEzequiel Garcia
11fbb6c848SEzequiel Garcia #include "hantro.h"
12fbb6c848SEzequiel Garcia #include "hantro_hw.h"
13fbb6c848SEzequiel Garcia
14fbb6c848SEzequiel Garcia #define VERT_FILTER_RAM_SIZE 8 /* bytes per pixel row */
15fbb6c848SEzequiel Garcia /*
16fbb6c848SEzequiel Garcia * BSD control data of current picture at tile border
17fbb6c848SEzequiel Garcia * 128 bits per 4x4 tile = 128/(8*4) bytes per row
18fbb6c848SEzequiel Garcia */
19fbb6c848SEzequiel Garcia #define BSD_CTRL_RAM_SIZE 4 /* bytes per pixel row */
20fbb6c848SEzequiel Garcia /* tile border coefficients of filter */
21fbb6c848SEzequiel Garcia #define VERT_SAO_RAM_SIZE 48 /* bytes per pixel */
22fbb6c848SEzequiel Garcia
23fbb6c848SEzequiel Garcia #define SCALING_LIST_SIZE (16 * 64)
24fbb6c848SEzequiel Garcia
25fbb6c848SEzequiel Garcia #define MAX_TILE_COLS 20
26fbb6c848SEzequiel Garcia #define MAX_TILE_ROWS 22
27fbb6c848SEzequiel Garcia
hantro_hevc_ref_init(struct hantro_ctx * ctx)28fbb6c848SEzequiel Garcia void hantro_hevc_ref_init(struct hantro_ctx *ctx)
29fbb6c848SEzequiel Garcia {
30fbb6c848SEzequiel Garcia struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
31fbb6c848SEzequiel Garcia
32fbb6c848SEzequiel Garcia hevc_dec->ref_bufs_used = 0;
33fbb6c848SEzequiel Garcia }
34fbb6c848SEzequiel Garcia
hantro_hevc_get_ref_buf(struct hantro_ctx * ctx,s32 poc)35fbb6c848SEzequiel Garcia dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx,
36fbb6c848SEzequiel Garcia s32 poc)
37fbb6c848SEzequiel Garcia {
38fbb6c848SEzequiel Garcia struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
39fbb6c848SEzequiel Garcia int i;
40fbb6c848SEzequiel Garcia
41fbb6c848SEzequiel Garcia /* Find the reference buffer in already known ones */
42fbb6c848SEzequiel Garcia for (i = 0; i < NUM_REF_PICTURES; i++) {
43fbb6c848SEzequiel Garcia if (hevc_dec->ref_bufs_poc[i] == poc) {
44fbb6c848SEzequiel Garcia hevc_dec->ref_bufs_used |= 1 << i;
45fbb6c848SEzequiel Garcia return hevc_dec->ref_bufs[i].dma;
46fbb6c848SEzequiel Garcia }
47fbb6c848SEzequiel Garcia }
48fbb6c848SEzequiel Garcia
49fbb6c848SEzequiel Garcia return 0;
50fbb6c848SEzequiel Garcia }
51fbb6c848SEzequiel Garcia
hantro_hevc_add_ref_buf(struct hantro_ctx * ctx,int poc,dma_addr_t addr)52fbb6c848SEzequiel Garcia int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr)
53fbb6c848SEzequiel Garcia {
54fbb6c848SEzequiel Garcia struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
55fbb6c848SEzequiel Garcia int i;
56fbb6c848SEzequiel Garcia
57fbb6c848SEzequiel Garcia /* Add a new reference buffer */
58fbb6c848SEzequiel Garcia for (i = 0; i < NUM_REF_PICTURES; i++) {
59fbb6c848SEzequiel Garcia if (!(hevc_dec->ref_bufs_used & 1 << i)) {
60fbb6c848SEzequiel Garcia hevc_dec->ref_bufs_used |= 1 << i;
61fbb6c848SEzequiel Garcia hevc_dec->ref_bufs_poc[i] = poc;
62fbb6c848SEzequiel Garcia hevc_dec->ref_bufs[i].dma = addr;
63fbb6c848SEzequiel Garcia return 0;
64fbb6c848SEzequiel Garcia }
65fbb6c848SEzequiel Garcia }
66fbb6c848SEzequiel Garcia
67fbb6c848SEzequiel Garcia return -EINVAL;
68fbb6c848SEzequiel Garcia }
69fbb6c848SEzequiel Garcia
tile_buffer_reallocate(struct hantro_ctx * ctx)70fbb6c848SEzequiel Garcia static int tile_buffer_reallocate(struct hantro_ctx *ctx)
71fbb6c848SEzequiel Garcia {
72fbb6c848SEzequiel Garcia struct hantro_dev *vpu = ctx->dev;
73fbb6c848SEzequiel Garcia struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
74fbb6c848SEzequiel Garcia const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
75fbb6c848SEzequiel Garcia const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
76fbb6c848SEzequiel Garcia const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
77fbb6c848SEzequiel Garcia unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1;
78fbb6c848SEzequiel Garcia unsigned int height64 = (sps->pic_height_in_luma_samples + 63) & ~63;
79fbb6c848SEzequiel Garcia unsigned int size;
80fbb6c848SEzequiel Garcia
81fbb6c848SEzequiel Garcia if (num_tile_cols <= 1 ||
82fbb6c848SEzequiel Garcia num_tile_cols <= hevc_dec->num_tile_cols_allocated)
83fbb6c848SEzequiel Garcia return 0;
84fbb6c848SEzequiel Garcia
85fbb6c848SEzequiel Garcia /* Need to reallocate due to tiles passed via PPS */
86fbb6c848SEzequiel Garcia if (hevc_dec->tile_filter.cpu) {
87fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
88fbb6c848SEzequiel Garcia hevc_dec->tile_filter.cpu,
89fbb6c848SEzequiel Garcia hevc_dec->tile_filter.dma);
90fbb6c848SEzequiel Garcia hevc_dec->tile_filter.cpu = NULL;
91fbb6c848SEzequiel Garcia }
92fbb6c848SEzequiel Garcia
93fbb6c848SEzequiel Garcia if (hevc_dec->tile_sao.cpu) {
94fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
95fbb6c848SEzequiel Garcia hevc_dec->tile_sao.cpu,
96fbb6c848SEzequiel Garcia hevc_dec->tile_sao.dma);
97fbb6c848SEzequiel Garcia hevc_dec->tile_sao.cpu = NULL;
98fbb6c848SEzequiel Garcia }
99fbb6c848SEzequiel Garcia
100fbb6c848SEzequiel Garcia if (hevc_dec->tile_bsd.cpu) {
101fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
102fbb6c848SEzequiel Garcia hevc_dec->tile_bsd.cpu,
103fbb6c848SEzequiel Garcia hevc_dec->tile_bsd.dma);
104fbb6c848SEzequiel Garcia hevc_dec->tile_bsd.cpu = NULL;
105fbb6c848SEzequiel Garcia }
106fbb6c848SEzequiel Garcia
1078a438580SBenjamin Gaignard size = (VERT_FILTER_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8;
108fbb6c848SEzequiel Garcia hevc_dec->tile_filter.cpu = dma_alloc_coherent(vpu->dev, size,
109fbb6c848SEzequiel Garcia &hevc_dec->tile_filter.dma,
110fbb6c848SEzequiel Garcia GFP_KERNEL);
111fbb6c848SEzequiel Garcia if (!hevc_dec->tile_filter.cpu)
112*59cea5a3SBenjamin Gaignard return -ENOMEM;
113fbb6c848SEzequiel Garcia hevc_dec->tile_filter.size = size;
114fbb6c848SEzequiel Garcia
1158a438580SBenjamin Gaignard size = (VERT_SAO_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8;
116fbb6c848SEzequiel Garcia hevc_dec->tile_sao.cpu = dma_alloc_coherent(vpu->dev, size,
117fbb6c848SEzequiel Garcia &hevc_dec->tile_sao.dma,
118fbb6c848SEzequiel Garcia GFP_KERNEL);
119fbb6c848SEzequiel Garcia if (!hevc_dec->tile_sao.cpu)
120fbb6c848SEzequiel Garcia goto err_free_tile_buffers;
121fbb6c848SEzequiel Garcia hevc_dec->tile_sao.size = size;
122fbb6c848SEzequiel Garcia
123fbb6c848SEzequiel Garcia size = BSD_CTRL_RAM_SIZE * height64 * (num_tile_cols - 1);
124fbb6c848SEzequiel Garcia hevc_dec->tile_bsd.cpu = dma_alloc_coherent(vpu->dev, size,
125fbb6c848SEzequiel Garcia &hevc_dec->tile_bsd.dma,
126fbb6c848SEzequiel Garcia GFP_KERNEL);
127fbb6c848SEzequiel Garcia if (!hevc_dec->tile_bsd.cpu)
128*59cea5a3SBenjamin Gaignard goto err_free_sao_buffers;
129fbb6c848SEzequiel Garcia hevc_dec->tile_bsd.size = size;
130fbb6c848SEzequiel Garcia
131fbb6c848SEzequiel Garcia hevc_dec->num_tile_cols_allocated = num_tile_cols;
132fbb6c848SEzequiel Garcia
133fbb6c848SEzequiel Garcia return 0;
134fbb6c848SEzequiel Garcia
135*59cea5a3SBenjamin Gaignard err_free_sao_buffers:
136*59cea5a3SBenjamin Gaignard if (hevc_dec->tile_sao.cpu)
137*59cea5a3SBenjamin Gaignard dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
138*59cea5a3SBenjamin Gaignard hevc_dec->tile_sao.cpu,
139*59cea5a3SBenjamin Gaignard hevc_dec->tile_sao.dma);
140*59cea5a3SBenjamin Gaignard hevc_dec->tile_sao.cpu = NULL;
141*59cea5a3SBenjamin Gaignard
142fbb6c848SEzequiel Garcia err_free_tile_buffers:
143fbb6c848SEzequiel Garcia if (hevc_dec->tile_filter.cpu)
144fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
145fbb6c848SEzequiel Garcia hevc_dec->tile_filter.cpu,
146fbb6c848SEzequiel Garcia hevc_dec->tile_filter.dma);
147fbb6c848SEzequiel Garcia hevc_dec->tile_filter.cpu = NULL;
148fbb6c848SEzequiel Garcia
149fbb6c848SEzequiel Garcia return -ENOMEM;
150fbb6c848SEzequiel Garcia }
151fbb6c848SEzequiel Garcia
hantro_hevc_validate_sps(struct hantro_ctx * ctx,const struct v4l2_ctrl_hevc_sps * sps)152fbb6c848SEzequiel Garcia static int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps)
153fbb6c848SEzequiel Garcia {
154fbb6c848SEzequiel Garcia /*
155fbb6c848SEzequiel Garcia * for tile pixel format check if the width and height match
156fbb6c848SEzequiel Garcia * hardware constraints
157fbb6c848SEzequiel Garcia */
158fbb6c848SEzequiel Garcia if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) {
159fbb6c848SEzequiel Garcia if (ctx->dst_fmt.width !=
160fbb6c848SEzequiel Garcia ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width))
161fbb6c848SEzequiel Garcia return -EINVAL;
162fbb6c848SEzequiel Garcia
163fbb6c848SEzequiel Garcia if (ctx->dst_fmt.height !=
164fbb6c848SEzequiel Garcia ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height))
165fbb6c848SEzequiel Garcia return -EINVAL;
166fbb6c848SEzequiel Garcia }
167fbb6c848SEzequiel Garcia
168fbb6c848SEzequiel Garcia return 0;
169fbb6c848SEzequiel Garcia }
170fbb6c848SEzequiel Garcia
hantro_hevc_dec_prepare_run(struct hantro_ctx * ctx)171fbb6c848SEzequiel Garcia int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
172fbb6c848SEzequiel Garcia {
173fbb6c848SEzequiel Garcia struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
174fbb6c848SEzequiel Garcia struct hantro_hevc_dec_ctrls *ctrls = &hevc_ctx->ctrls;
175fbb6c848SEzequiel Garcia int ret;
176fbb6c848SEzequiel Garcia
177fbb6c848SEzequiel Garcia hantro_start_prepare_run(ctx);
178fbb6c848SEzequiel Garcia
179fbb6c848SEzequiel Garcia ctrls->decode_params =
180fbb6c848SEzequiel Garcia hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_DECODE_PARAMS);
181fbb6c848SEzequiel Garcia if (WARN_ON(!ctrls->decode_params))
182fbb6c848SEzequiel Garcia return -EINVAL;
183fbb6c848SEzequiel Garcia
184fbb6c848SEzequiel Garcia ctrls->scaling =
185fbb6c848SEzequiel Garcia hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SCALING_MATRIX);
186fbb6c848SEzequiel Garcia if (WARN_ON(!ctrls->scaling))
187fbb6c848SEzequiel Garcia return -EINVAL;
188fbb6c848SEzequiel Garcia
189fbb6c848SEzequiel Garcia ctrls->sps =
190fbb6c848SEzequiel Garcia hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SPS);
191fbb6c848SEzequiel Garcia if (WARN_ON(!ctrls->sps))
192fbb6c848SEzequiel Garcia return -EINVAL;
193fbb6c848SEzequiel Garcia
194fbb6c848SEzequiel Garcia ret = hantro_hevc_validate_sps(ctx, ctrls->sps);
195fbb6c848SEzequiel Garcia if (ret)
196fbb6c848SEzequiel Garcia return ret;
197fbb6c848SEzequiel Garcia
198fbb6c848SEzequiel Garcia ctrls->pps =
199fbb6c848SEzequiel Garcia hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS);
200fbb6c848SEzequiel Garcia if (WARN_ON(!ctrls->pps))
201fbb6c848SEzequiel Garcia return -EINVAL;
202fbb6c848SEzequiel Garcia
203fbb6c848SEzequiel Garcia ret = tile_buffer_reallocate(ctx);
204fbb6c848SEzequiel Garcia if (ret)
205fbb6c848SEzequiel Garcia return ret;
206fbb6c848SEzequiel Garcia
207fbb6c848SEzequiel Garcia return 0;
208fbb6c848SEzequiel Garcia }
209fbb6c848SEzequiel Garcia
hantro_hevc_dec_exit(struct hantro_ctx * ctx)210fbb6c848SEzequiel Garcia void hantro_hevc_dec_exit(struct hantro_ctx *ctx)
211fbb6c848SEzequiel Garcia {
212fbb6c848SEzequiel Garcia struct hantro_dev *vpu = ctx->dev;
213fbb6c848SEzequiel Garcia struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
214fbb6c848SEzequiel Garcia
215fbb6c848SEzequiel Garcia if (hevc_dec->tile_sizes.cpu)
216fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, hevc_dec->tile_sizes.size,
217fbb6c848SEzequiel Garcia hevc_dec->tile_sizes.cpu,
218fbb6c848SEzequiel Garcia hevc_dec->tile_sizes.dma);
219fbb6c848SEzequiel Garcia hevc_dec->tile_sizes.cpu = NULL;
220fbb6c848SEzequiel Garcia
221fbb6c848SEzequiel Garcia if (hevc_dec->scaling_lists.cpu)
222fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, hevc_dec->scaling_lists.size,
223fbb6c848SEzequiel Garcia hevc_dec->scaling_lists.cpu,
224fbb6c848SEzequiel Garcia hevc_dec->scaling_lists.dma);
225fbb6c848SEzequiel Garcia hevc_dec->scaling_lists.cpu = NULL;
226fbb6c848SEzequiel Garcia
227fbb6c848SEzequiel Garcia if (hevc_dec->tile_filter.cpu)
228fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
229fbb6c848SEzequiel Garcia hevc_dec->tile_filter.cpu,
230fbb6c848SEzequiel Garcia hevc_dec->tile_filter.dma);
231fbb6c848SEzequiel Garcia hevc_dec->tile_filter.cpu = NULL;
232fbb6c848SEzequiel Garcia
233fbb6c848SEzequiel Garcia if (hevc_dec->tile_sao.cpu)
234fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
235fbb6c848SEzequiel Garcia hevc_dec->tile_sao.cpu,
236fbb6c848SEzequiel Garcia hevc_dec->tile_sao.dma);
237fbb6c848SEzequiel Garcia hevc_dec->tile_sao.cpu = NULL;
238fbb6c848SEzequiel Garcia
239fbb6c848SEzequiel Garcia if (hevc_dec->tile_bsd.cpu)
240fbb6c848SEzequiel Garcia dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
241fbb6c848SEzequiel Garcia hevc_dec->tile_bsd.cpu,
242fbb6c848SEzequiel Garcia hevc_dec->tile_bsd.dma);
243fbb6c848SEzequiel Garcia hevc_dec->tile_bsd.cpu = NULL;
244fbb6c848SEzequiel Garcia }
245fbb6c848SEzequiel Garcia
hantro_hevc_dec_init(struct hantro_ctx * ctx)246fbb6c848SEzequiel Garcia int hantro_hevc_dec_init(struct hantro_ctx *ctx)
247fbb6c848SEzequiel Garcia {
248fbb6c848SEzequiel Garcia struct hantro_dev *vpu = ctx->dev;
249fbb6c848SEzequiel Garcia struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
250fbb6c848SEzequiel Garcia unsigned int size;
251fbb6c848SEzequiel Garcia
252fbb6c848SEzequiel Garcia memset(hevc_dec, 0, sizeof(*hevc_dec));
253fbb6c848SEzequiel Garcia
254fbb6c848SEzequiel Garcia /*
255fbb6c848SEzequiel Garcia * Maximum number of tiles times width and height (2 bytes each),
256fbb6c848SEzequiel Garcia * rounding up to next 16 bytes boundary + one extra 16 byte
257fbb6c848SEzequiel Garcia * chunk (HW guys wanted to have this).
258fbb6c848SEzequiel Garcia */
259fbb6c848SEzequiel Garcia size = round_up(MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 16, 16);
260fbb6c848SEzequiel Garcia hevc_dec->tile_sizes.cpu = dma_alloc_coherent(vpu->dev, size,
261fbb6c848SEzequiel Garcia &hevc_dec->tile_sizes.dma,
262fbb6c848SEzequiel Garcia GFP_KERNEL);
263fbb6c848SEzequiel Garcia if (!hevc_dec->tile_sizes.cpu)
264fbb6c848SEzequiel Garcia return -ENOMEM;
265fbb6c848SEzequiel Garcia
266fbb6c848SEzequiel Garcia hevc_dec->tile_sizes.size = size;
267fbb6c848SEzequiel Garcia
268fbb6c848SEzequiel Garcia hevc_dec->scaling_lists.cpu = dma_alloc_coherent(vpu->dev, SCALING_LIST_SIZE,
269fbb6c848SEzequiel Garcia &hevc_dec->scaling_lists.dma,
270fbb6c848SEzequiel Garcia GFP_KERNEL);
271fbb6c848SEzequiel Garcia if (!hevc_dec->scaling_lists.cpu)
272fbb6c848SEzequiel Garcia return -ENOMEM;
273fbb6c848SEzequiel Garcia
274fbb6c848SEzequiel Garcia hevc_dec->scaling_lists.size = SCALING_LIST_SIZE;
275fbb6c848SEzequiel Garcia
276fbb6c848SEzequiel Garcia hantro_hevc_ref_init(ctx);
277fbb6c848SEzequiel Garcia
278fbb6c848SEzequiel Garcia return 0;
279fbb6c848SEzequiel Garcia }
280