xref: /openbmc/linux/drivers/media/platform/verisilicon/hantro_hevc.c (revision 2612e3bbc0386368a850140a6c9b990cd496a5ec)
1fbb6c848SEzequiel Garcia // SPDX-License-Identifier: GPL-2.0
2fbb6c848SEzequiel Garcia /*
3fbb6c848SEzequiel Garcia  * Hantro VPU HEVC codec driver
4fbb6c848SEzequiel Garcia  *
5fbb6c848SEzequiel Garcia  * Copyright (C) 2020 Safran Passenger Innovations LLC
6fbb6c848SEzequiel Garcia  */
7fbb6c848SEzequiel Garcia 
8fbb6c848SEzequiel Garcia #include <linux/types.h>
9fbb6c848SEzequiel Garcia #include <media/v4l2-mem2mem.h>
10fbb6c848SEzequiel Garcia 
11fbb6c848SEzequiel Garcia #include "hantro.h"
12fbb6c848SEzequiel Garcia #include "hantro_hw.h"
13fbb6c848SEzequiel Garcia 
14fbb6c848SEzequiel Garcia #define VERT_FILTER_RAM_SIZE 8 /* bytes per pixel row */
15fbb6c848SEzequiel Garcia /*
16fbb6c848SEzequiel Garcia  * BSD control data of current picture at tile border
17fbb6c848SEzequiel Garcia  * 128 bits per 4x4 tile = 128/(8*4) bytes per row
18fbb6c848SEzequiel Garcia  */
19fbb6c848SEzequiel Garcia #define BSD_CTRL_RAM_SIZE 4 /* bytes per pixel row */
20fbb6c848SEzequiel Garcia /* tile border coefficients of filter */
21fbb6c848SEzequiel Garcia #define VERT_SAO_RAM_SIZE 48 /* bytes per pixel */
22fbb6c848SEzequiel Garcia 
23fbb6c848SEzequiel Garcia #define SCALING_LIST_SIZE (16 * 64)
24fbb6c848SEzequiel Garcia 
25fbb6c848SEzequiel Garcia #define MAX_TILE_COLS 20
26fbb6c848SEzequiel Garcia #define MAX_TILE_ROWS 22
27fbb6c848SEzequiel Garcia 
hantro_hevc_ref_init(struct hantro_ctx * ctx)28fbb6c848SEzequiel Garcia void hantro_hevc_ref_init(struct hantro_ctx *ctx)
29fbb6c848SEzequiel Garcia {
30fbb6c848SEzequiel Garcia 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
31fbb6c848SEzequiel Garcia 
32fbb6c848SEzequiel Garcia 	hevc_dec->ref_bufs_used = 0;
33fbb6c848SEzequiel Garcia }
34fbb6c848SEzequiel Garcia 
hantro_hevc_get_ref_buf(struct hantro_ctx * ctx,s32 poc)35fbb6c848SEzequiel Garcia dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx,
36fbb6c848SEzequiel Garcia 				   s32 poc)
37fbb6c848SEzequiel Garcia {
38fbb6c848SEzequiel Garcia 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
39fbb6c848SEzequiel Garcia 	int i;
40fbb6c848SEzequiel Garcia 
41fbb6c848SEzequiel Garcia 	/* Find the reference buffer in already known ones */
42fbb6c848SEzequiel Garcia 	for (i = 0;  i < NUM_REF_PICTURES; i++) {
43fbb6c848SEzequiel Garcia 		if (hevc_dec->ref_bufs_poc[i] == poc) {
44fbb6c848SEzequiel Garcia 			hevc_dec->ref_bufs_used |= 1 << i;
45fbb6c848SEzequiel Garcia 			return hevc_dec->ref_bufs[i].dma;
46fbb6c848SEzequiel Garcia 		}
47fbb6c848SEzequiel Garcia 	}
48fbb6c848SEzequiel Garcia 
49fbb6c848SEzequiel Garcia 	return 0;
50fbb6c848SEzequiel Garcia }
51fbb6c848SEzequiel Garcia 
hantro_hevc_add_ref_buf(struct hantro_ctx * ctx,int poc,dma_addr_t addr)52fbb6c848SEzequiel Garcia int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr)
53fbb6c848SEzequiel Garcia {
54fbb6c848SEzequiel Garcia 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
55fbb6c848SEzequiel Garcia 	int i;
56fbb6c848SEzequiel Garcia 
57fbb6c848SEzequiel Garcia 	/* Add a new reference buffer */
58fbb6c848SEzequiel Garcia 	for (i = 0; i < NUM_REF_PICTURES; i++) {
59fbb6c848SEzequiel Garcia 		if (!(hevc_dec->ref_bufs_used & 1 << i)) {
60fbb6c848SEzequiel Garcia 			hevc_dec->ref_bufs_used |= 1 << i;
61fbb6c848SEzequiel Garcia 			hevc_dec->ref_bufs_poc[i] = poc;
62fbb6c848SEzequiel Garcia 			hevc_dec->ref_bufs[i].dma = addr;
63fbb6c848SEzequiel Garcia 			return 0;
64fbb6c848SEzequiel Garcia 		}
65fbb6c848SEzequiel Garcia 	}
66fbb6c848SEzequiel Garcia 
67fbb6c848SEzequiel Garcia 	return -EINVAL;
68fbb6c848SEzequiel Garcia }
69fbb6c848SEzequiel Garcia 
tile_buffer_reallocate(struct hantro_ctx * ctx)70fbb6c848SEzequiel Garcia static int tile_buffer_reallocate(struct hantro_ctx *ctx)
71fbb6c848SEzequiel Garcia {
72fbb6c848SEzequiel Garcia 	struct hantro_dev *vpu = ctx->dev;
73fbb6c848SEzequiel Garcia 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
74fbb6c848SEzequiel Garcia 	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
75fbb6c848SEzequiel Garcia 	const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
76fbb6c848SEzequiel Garcia 	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
77fbb6c848SEzequiel Garcia 	unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1;
78fbb6c848SEzequiel Garcia 	unsigned int height64 = (sps->pic_height_in_luma_samples + 63) & ~63;
79fbb6c848SEzequiel Garcia 	unsigned int size;
80fbb6c848SEzequiel Garcia 
81fbb6c848SEzequiel Garcia 	if (num_tile_cols <= 1 ||
82fbb6c848SEzequiel Garcia 	    num_tile_cols <= hevc_dec->num_tile_cols_allocated)
83fbb6c848SEzequiel Garcia 		return 0;
84fbb6c848SEzequiel Garcia 
85fbb6c848SEzequiel Garcia 	/* Need to reallocate due to tiles passed via PPS */
86fbb6c848SEzequiel Garcia 	if (hevc_dec->tile_filter.cpu) {
87fbb6c848SEzequiel Garcia 		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
88fbb6c848SEzequiel Garcia 				  hevc_dec->tile_filter.cpu,
89fbb6c848SEzequiel Garcia 				  hevc_dec->tile_filter.dma);
90fbb6c848SEzequiel Garcia 		hevc_dec->tile_filter.cpu = NULL;
91fbb6c848SEzequiel Garcia 	}
92fbb6c848SEzequiel Garcia 
93fbb6c848SEzequiel Garcia 	if (hevc_dec->tile_sao.cpu) {
94fbb6c848SEzequiel Garcia 		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
95fbb6c848SEzequiel Garcia 				  hevc_dec->tile_sao.cpu,
96fbb6c848SEzequiel Garcia 				  hevc_dec->tile_sao.dma);
97fbb6c848SEzequiel Garcia 		hevc_dec->tile_sao.cpu = NULL;
98fbb6c848SEzequiel Garcia 	}
99fbb6c848SEzequiel Garcia 
100fbb6c848SEzequiel Garcia 	if (hevc_dec->tile_bsd.cpu) {
101fbb6c848SEzequiel Garcia 		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
102fbb6c848SEzequiel Garcia 				  hevc_dec->tile_bsd.cpu,
103fbb6c848SEzequiel Garcia 				  hevc_dec->tile_bsd.dma);
104fbb6c848SEzequiel Garcia 		hevc_dec->tile_bsd.cpu = NULL;
105fbb6c848SEzequiel Garcia 	}
106fbb6c848SEzequiel Garcia 
1078a438580SBenjamin Gaignard 	size = (VERT_FILTER_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8;
108fbb6c848SEzequiel Garcia 	hevc_dec->tile_filter.cpu = dma_alloc_coherent(vpu->dev, size,
109fbb6c848SEzequiel Garcia 						       &hevc_dec->tile_filter.dma,
110fbb6c848SEzequiel Garcia 						       GFP_KERNEL);
111fbb6c848SEzequiel Garcia 	if (!hevc_dec->tile_filter.cpu)
112*59cea5a3SBenjamin Gaignard 		return -ENOMEM;
113fbb6c848SEzequiel Garcia 	hevc_dec->tile_filter.size = size;
114fbb6c848SEzequiel Garcia 
1158a438580SBenjamin Gaignard 	size = (VERT_SAO_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8;
116fbb6c848SEzequiel Garcia 	hevc_dec->tile_sao.cpu = dma_alloc_coherent(vpu->dev, size,
117fbb6c848SEzequiel Garcia 						    &hevc_dec->tile_sao.dma,
118fbb6c848SEzequiel Garcia 						    GFP_KERNEL);
119fbb6c848SEzequiel Garcia 	if (!hevc_dec->tile_sao.cpu)
120fbb6c848SEzequiel Garcia 		goto err_free_tile_buffers;
121fbb6c848SEzequiel Garcia 	hevc_dec->tile_sao.size = size;
122fbb6c848SEzequiel Garcia 
123fbb6c848SEzequiel Garcia 	size = BSD_CTRL_RAM_SIZE * height64 * (num_tile_cols - 1);
124fbb6c848SEzequiel Garcia 	hevc_dec->tile_bsd.cpu = dma_alloc_coherent(vpu->dev, size,
125fbb6c848SEzequiel Garcia 						    &hevc_dec->tile_bsd.dma,
126fbb6c848SEzequiel Garcia 						    GFP_KERNEL);
127fbb6c848SEzequiel Garcia 	if (!hevc_dec->tile_bsd.cpu)
128*59cea5a3SBenjamin Gaignard 		goto err_free_sao_buffers;
129fbb6c848SEzequiel Garcia 	hevc_dec->tile_bsd.size = size;
130fbb6c848SEzequiel Garcia 
131fbb6c848SEzequiel Garcia 	hevc_dec->num_tile_cols_allocated = num_tile_cols;
132fbb6c848SEzequiel Garcia 
133fbb6c848SEzequiel Garcia 	return 0;
134fbb6c848SEzequiel Garcia 
135*59cea5a3SBenjamin Gaignard err_free_sao_buffers:
136*59cea5a3SBenjamin Gaignard 	if (hevc_dec->tile_sao.cpu)
137*59cea5a3SBenjamin Gaignard 		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
138*59cea5a3SBenjamin Gaignard 				  hevc_dec->tile_sao.cpu,
139*59cea5a3SBenjamin Gaignard 				  hevc_dec->tile_sao.dma);
140*59cea5a3SBenjamin Gaignard 	hevc_dec->tile_sao.cpu = NULL;
141*59cea5a3SBenjamin Gaignard 
142fbb6c848SEzequiel Garcia err_free_tile_buffers:
143fbb6c848SEzequiel Garcia 	if (hevc_dec->tile_filter.cpu)
144fbb6c848SEzequiel Garcia 		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
145fbb6c848SEzequiel Garcia 				  hevc_dec->tile_filter.cpu,
146fbb6c848SEzequiel Garcia 				  hevc_dec->tile_filter.dma);
147fbb6c848SEzequiel Garcia 	hevc_dec->tile_filter.cpu = NULL;
148fbb6c848SEzequiel Garcia 
149fbb6c848SEzequiel Garcia 	return -ENOMEM;
150fbb6c848SEzequiel Garcia }
151fbb6c848SEzequiel Garcia 
hantro_hevc_validate_sps(struct hantro_ctx * ctx,const struct v4l2_ctrl_hevc_sps * sps)152fbb6c848SEzequiel Garcia static int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps)
153fbb6c848SEzequiel Garcia {
154fbb6c848SEzequiel Garcia 	/*
155fbb6c848SEzequiel Garcia 	 * for tile pixel format check if the width and height match
156fbb6c848SEzequiel Garcia 	 * hardware constraints
157fbb6c848SEzequiel Garcia 	 */
158fbb6c848SEzequiel Garcia 	if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) {
159fbb6c848SEzequiel Garcia 		if (ctx->dst_fmt.width !=
160fbb6c848SEzequiel Garcia 		    ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width))
161fbb6c848SEzequiel Garcia 			return -EINVAL;
162fbb6c848SEzequiel Garcia 
163fbb6c848SEzequiel Garcia 		if (ctx->dst_fmt.height !=
164fbb6c848SEzequiel Garcia 		    ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height))
165fbb6c848SEzequiel Garcia 			return -EINVAL;
166fbb6c848SEzequiel Garcia 	}
167fbb6c848SEzequiel Garcia 
168fbb6c848SEzequiel Garcia 	return 0;
169fbb6c848SEzequiel Garcia }
170fbb6c848SEzequiel Garcia 
hantro_hevc_dec_prepare_run(struct hantro_ctx * ctx)171fbb6c848SEzequiel Garcia int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
172fbb6c848SEzequiel Garcia {
173fbb6c848SEzequiel Garcia 	struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
174fbb6c848SEzequiel Garcia 	struct hantro_hevc_dec_ctrls *ctrls = &hevc_ctx->ctrls;
175fbb6c848SEzequiel Garcia 	int ret;
176fbb6c848SEzequiel Garcia 
177fbb6c848SEzequiel Garcia 	hantro_start_prepare_run(ctx);
178fbb6c848SEzequiel Garcia 
179fbb6c848SEzequiel Garcia 	ctrls->decode_params =
180fbb6c848SEzequiel Garcia 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_DECODE_PARAMS);
181fbb6c848SEzequiel Garcia 	if (WARN_ON(!ctrls->decode_params))
182fbb6c848SEzequiel Garcia 		return -EINVAL;
183fbb6c848SEzequiel Garcia 
184fbb6c848SEzequiel Garcia 	ctrls->scaling =
185fbb6c848SEzequiel Garcia 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SCALING_MATRIX);
186fbb6c848SEzequiel Garcia 	if (WARN_ON(!ctrls->scaling))
187fbb6c848SEzequiel Garcia 		return -EINVAL;
188fbb6c848SEzequiel Garcia 
189fbb6c848SEzequiel Garcia 	ctrls->sps =
190fbb6c848SEzequiel Garcia 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SPS);
191fbb6c848SEzequiel Garcia 	if (WARN_ON(!ctrls->sps))
192fbb6c848SEzequiel Garcia 		return -EINVAL;
193fbb6c848SEzequiel Garcia 
194fbb6c848SEzequiel Garcia 	ret = hantro_hevc_validate_sps(ctx, ctrls->sps);
195fbb6c848SEzequiel Garcia 	if (ret)
196fbb6c848SEzequiel Garcia 		return ret;
197fbb6c848SEzequiel Garcia 
198fbb6c848SEzequiel Garcia 	ctrls->pps =
199fbb6c848SEzequiel Garcia 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS);
200fbb6c848SEzequiel Garcia 	if (WARN_ON(!ctrls->pps))
201fbb6c848SEzequiel Garcia 		return -EINVAL;
202fbb6c848SEzequiel Garcia 
203fbb6c848SEzequiel Garcia 	ret = tile_buffer_reallocate(ctx);
204fbb6c848SEzequiel Garcia 	if (ret)
205fbb6c848SEzequiel Garcia 		return ret;
206fbb6c848SEzequiel Garcia 
207fbb6c848SEzequiel Garcia 	return 0;
208fbb6c848SEzequiel Garcia }
209fbb6c848SEzequiel Garcia 
hantro_hevc_dec_exit(struct hantro_ctx * ctx)210fbb6c848SEzequiel Garcia void hantro_hevc_dec_exit(struct hantro_ctx *ctx)
211fbb6c848SEzequiel Garcia {
212fbb6c848SEzequiel Garcia 	struct hantro_dev *vpu = ctx->dev;
213fbb6c848SEzequiel Garcia 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
214fbb6c848SEzequiel Garcia 
215fbb6c848SEzequiel Garcia 	if (hevc_dec->tile_sizes.cpu)
216fbb6c848SEzequiel Garcia 		dma_free_coherent(vpu->dev, hevc_dec->tile_sizes.size,
217fbb6c848SEzequiel Garcia 				  hevc_dec->tile_sizes.cpu,
218fbb6c848SEzequiel Garcia 				  hevc_dec->tile_sizes.dma);
219fbb6c848SEzequiel Garcia 	hevc_dec->tile_sizes.cpu = NULL;
220fbb6c848SEzequiel Garcia 
221fbb6c848SEzequiel Garcia 	if (hevc_dec->scaling_lists.cpu)
222fbb6c848SEzequiel Garcia 		dma_free_coherent(vpu->dev, hevc_dec->scaling_lists.size,
223fbb6c848SEzequiel Garcia 				  hevc_dec->scaling_lists.cpu,
224fbb6c848SEzequiel Garcia 				  hevc_dec->scaling_lists.dma);
225fbb6c848SEzequiel Garcia 	hevc_dec->scaling_lists.cpu = NULL;
226fbb6c848SEzequiel Garcia 
227fbb6c848SEzequiel Garcia 	if (hevc_dec->tile_filter.cpu)
228fbb6c848SEzequiel Garcia 		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
229fbb6c848SEzequiel Garcia 				  hevc_dec->tile_filter.cpu,
230fbb6c848SEzequiel Garcia 				  hevc_dec->tile_filter.dma);
231fbb6c848SEzequiel Garcia 	hevc_dec->tile_filter.cpu = NULL;
232fbb6c848SEzequiel Garcia 
233fbb6c848SEzequiel Garcia 	if (hevc_dec->tile_sao.cpu)
234fbb6c848SEzequiel Garcia 		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
235fbb6c848SEzequiel Garcia 				  hevc_dec->tile_sao.cpu,
236fbb6c848SEzequiel Garcia 				  hevc_dec->tile_sao.dma);
237fbb6c848SEzequiel Garcia 	hevc_dec->tile_sao.cpu = NULL;
238fbb6c848SEzequiel Garcia 
239fbb6c848SEzequiel Garcia 	if (hevc_dec->tile_bsd.cpu)
240fbb6c848SEzequiel Garcia 		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
241fbb6c848SEzequiel Garcia 				  hevc_dec->tile_bsd.cpu,
242fbb6c848SEzequiel Garcia 				  hevc_dec->tile_bsd.dma);
243fbb6c848SEzequiel Garcia 	hevc_dec->tile_bsd.cpu = NULL;
244fbb6c848SEzequiel Garcia }
245fbb6c848SEzequiel Garcia 
hantro_hevc_dec_init(struct hantro_ctx * ctx)246fbb6c848SEzequiel Garcia int hantro_hevc_dec_init(struct hantro_ctx *ctx)
247fbb6c848SEzequiel Garcia {
248fbb6c848SEzequiel Garcia 	struct hantro_dev *vpu = ctx->dev;
249fbb6c848SEzequiel Garcia 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
250fbb6c848SEzequiel Garcia 	unsigned int size;
251fbb6c848SEzequiel Garcia 
252fbb6c848SEzequiel Garcia 	memset(hevc_dec, 0, sizeof(*hevc_dec));
253fbb6c848SEzequiel Garcia 
254fbb6c848SEzequiel Garcia 	/*
255fbb6c848SEzequiel Garcia 	 * Maximum number of tiles times width and height (2 bytes each),
256fbb6c848SEzequiel Garcia 	 * rounding up to next 16 bytes boundary + one extra 16 byte
257fbb6c848SEzequiel Garcia 	 * chunk (HW guys wanted to have this).
258fbb6c848SEzequiel Garcia 	 */
259fbb6c848SEzequiel Garcia 	size = round_up(MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 16, 16);
260fbb6c848SEzequiel Garcia 	hevc_dec->tile_sizes.cpu = dma_alloc_coherent(vpu->dev, size,
261fbb6c848SEzequiel Garcia 						      &hevc_dec->tile_sizes.dma,
262fbb6c848SEzequiel Garcia 						      GFP_KERNEL);
263fbb6c848SEzequiel Garcia 	if (!hevc_dec->tile_sizes.cpu)
264fbb6c848SEzequiel Garcia 		return -ENOMEM;
265fbb6c848SEzequiel Garcia 
266fbb6c848SEzequiel Garcia 	hevc_dec->tile_sizes.size = size;
267fbb6c848SEzequiel Garcia 
268fbb6c848SEzequiel Garcia 	hevc_dec->scaling_lists.cpu = dma_alloc_coherent(vpu->dev, SCALING_LIST_SIZE,
269fbb6c848SEzequiel Garcia 							 &hevc_dec->scaling_lists.dma,
270fbb6c848SEzequiel Garcia 							 GFP_KERNEL);
271fbb6c848SEzequiel Garcia 	if (!hevc_dec->scaling_lists.cpu)
272fbb6c848SEzequiel Garcia 		return -ENOMEM;
273fbb6c848SEzequiel Garcia 
274fbb6c848SEzequiel Garcia 	hevc_dec->scaling_lists.size = SCALING_LIST_SIZE;
275fbb6c848SEzequiel Garcia 
276fbb6c848SEzequiel Garcia 	hantro_hevc_ref_init(ctx);
277fbb6c848SEzequiel Garcia 
278fbb6c848SEzequiel Garcia 	return 0;
279fbb6c848SEzequiel Garcia }
280