1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Hantro G1 post-processor support
4  *
5  * Copyright (C) 2019 Collabora, Ltd.
6  */
7 
8 #include <linux/dma-mapping.h>
9 #include <linux/types.h>
10 
11 #include "hantro.h"
12 #include "hantro_hw.h"
13 #include "hantro_g1_regs.h"
14 #include "hantro_g2_regs.h"
15 #include "hantro_v4l2.h"
16 
17 #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
18 { \
19 	hantro_reg_write(vpu, \
20 			 &hantro_g1_postproc_regs.reg_name, \
21 			 val); \
22 }
23 
24 #define HANTRO_PP_REG_WRITE_S(vpu, reg_name, val) \
25 { \
26 	hantro_reg_write_s(vpu, \
27 			   &hantro_g1_postproc_regs.reg_name, \
28 			   val); \
29 }
30 
31 #define VPU_PP_IN_YUYV			0x0
32 #define VPU_PP_IN_NV12			0x1
33 #define VPU_PP_IN_YUV420		0x2
34 #define VPU_PP_IN_YUV240_TILED		0x5
35 #define VPU_PP_OUT_RGB			0x0
36 #define VPU_PP_OUT_YUYV			0x3
37 
38 static const struct hantro_postproc_regs hantro_g1_postproc_regs = {
39 	.pipeline_en = {G1_REG_PP_INTERRUPT, 1, 0x1},
40 	.max_burst = {G1_REG_PP_DEV_CONFIG, 0, 0x1f},
41 	.clk_gate = {G1_REG_PP_DEV_CONFIG, 1, 0x1},
42 	.out_swap32 = {G1_REG_PP_DEV_CONFIG, 5, 0x1},
43 	.out_endian = {G1_REG_PP_DEV_CONFIG, 6, 0x1},
44 	.out_luma_base = {G1_REG_PP_OUT_LUMA_BASE, 0, 0xffffffff},
45 	.input_width = {G1_REG_PP_INPUT_SIZE, 0, 0x1ff},
46 	.input_height = {G1_REG_PP_INPUT_SIZE, 9, 0x1ff},
47 	.output_width = {G1_REG_PP_CONTROL, 4, 0x7ff},
48 	.output_height = {G1_REG_PP_CONTROL, 15, 0x7ff},
49 	.input_fmt = {G1_REG_PP_CONTROL, 29, 0x7},
50 	.output_fmt = {G1_REG_PP_CONTROL, 26, 0x7},
51 	.orig_width = {G1_REG_PP_MASK1_ORIG_WIDTH, 23, 0x1ff},
52 	.display_width = {G1_REG_PP_DISPLAY_WIDTH, 0, 0xfff},
53 };
54 
55 bool hantro_needs_postproc(const struct hantro_ctx *ctx,
56 			   const struct hantro_fmt *fmt)
57 {
58 	if (ctx->is_encoder)
59 		return false;
60 	return fmt->postprocessed;
61 }
62 
63 static void hantro_postproc_g1_enable(struct hantro_ctx *ctx)
64 {
65 	struct hantro_dev *vpu = ctx->dev;
66 	struct vb2_v4l2_buffer *dst_buf;
67 	u32 src_pp_fmt, dst_pp_fmt;
68 	dma_addr_t dst_dma;
69 
70 	/* Turn on pipeline mode. Must be done first. */
71 	HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x1);
72 
73 	src_pp_fmt = VPU_PP_IN_NV12;
74 
75 	switch (ctx->vpu_dst_fmt->fourcc) {
76 	case V4L2_PIX_FMT_YUYV:
77 		dst_pp_fmt = VPU_PP_OUT_YUYV;
78 		break;
79 	default:
80 		WARN(1, "output format %d not supported by the post-processor, this wasn't expected.",
81 		     ctx->vpu_dst_fmt->fourcc);
82 		dst_pp_fmt = 0;
83 		break;
84 	}
85 
86 	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
87 	dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
88 
89 	HANTRO_PP_REG_WRITE(vpu, clk_gate, 0x1);
90 	HANTRO_PP_REG_WRITE(vpu, out_endian, 0x1);
91 	HANTRO_PP_REG_WRITE(vpu, out_swap32, 0x1);
92 	HANTRO_PP_REG_WRITE(vpu, max_burst, 16);
93 	HANTRO_PP_REG_WRITE(vpu, out_luma_base, dst_dma);
94 	HANTRO_PP_REG_WRITE(vpu, input_width, MB_WIDTH(ctx->dst_fmt.width));
95 	HANTRO_PP_REG_WRITE(vpu, input_height, MB_HEIGHT(ctx->dst_fmt.height));
96 	HANTRO_PP_REG_WRITE(vpu, input_fmt, src_pp_fmt);
97 	HANTRO_PP_REG_WRITE(vpu, output_fmt, dst_pp_fmt);
98 	HANTRO_PP_REG_WRITE(vpu, output_width, ctx->dst_fmt.width);
99 	HANTRO_PP_REG_WRITE(vpu, output_height, ctx->dst_fmt.height);
100 	HANTRO_PP_REG_WRITE(vpu, orig_width, MB_WIDTH(ctx->dst_fmt.width));
101 	HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width);
102 }
103 
104 static int down_scale_factor(struct hantro_ctx *ctx)
105 {
106 	if (ctx->src_fmt.width == ctx->dst_fmt.width)
107 		return 0;
108 
109 	return DIV_ROUND_CLOSEST(ctx->src_fmt.width, ctx->dst_fmt.width);
110 }
111 
112 static void hantro_postproc_g2_enable(struct hantro_ctx *ctx)
113 {
114 	struct hantro_dev *vpu = ctx->dev;
115 	struct vb2_v4l2_buffer *dst_buf;
116 	int down_scale = down_scale_factor(ctx);
117 	size_t chroma_offset;
118 	dma_addr_t dst_dma;
119 
120 	dst_buf = hantro_get_dst_buf(ctx);
121 	dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
122 	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
123 			ctx->dst_fmt.height;
124 
125 	if (down_scale) {
126 		hantro_reg_write(vpu, &g2_down_scale_e, 1);
127 		hantro_reg_write(vpu, &g2_down_scale_y, down_scale >> 2);
128 		hantro_reg_write(vpu, &g2_down_scale_x, down_scale >> 2);
129 		hantro_write_addr(vpu, G2_DS_DST, dst_dma);
130 		hantro_write_addr(vpu, G2_DS_DST_CHR, dst_dma + (chroma_offset >> down_scale));
131 	} else {
132 		hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma);
133 		hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma + chroma_offset);
134 	}
135 	if (ctx->dev->variant->legacy_regs) {
136 		int out_depth = hantro_get_format_depth(ctx->dst_fmt.pixelformat);
137 		u8 pp_shift = 0;
138 
139 		if (out_depth > 8)
140 			pp_shift = 16 - out_depth;
141 
142 		hantro_reg_write(ctx->dev, &g2_rs_out_bit_depth, out_depth);
143 		hantro_reg_write(ctx->dev, &g2_pp_pix_shift, pp_shift);
144 	}
145 	hantro_reg_write(vpu, &g2_out_rs_e, 1);
146 }
147 
148 static int hantro_postproc_g2_enum_framesizes(struct hantro_ctx *ctx,
149 					      struct v4l2_frmsizeenum *fsize)
150 {
151 	/**
152 	 * G2 scaler can scale down by 0, 2, 4 or 8
153 	 * use fsize->index has power of 2 diviser
154 	 **/
155 	if (fsize->index > 3)
156 		return -EINVAL;
157 
158 	if (!ctx->src_fmt.width || !ctx->src_fmt.height)
159 		return -EINVAL;
160 
161 	fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE;
162 	fsize->discrete.width = ctx->src_fmt.width >> fsize->index;
163 	fsize->discrete.height = ctx->src_fmt.height >> fsize->index;
164 
165 	return 0;
166 }
167 
168 void hantro_postproc_free(struct hantro_ctx *ctx)
169 {
170 	struct hantro_dev *vpu = ctx->dev;
171 	unsigned int i;
172 
173 	for (i = 0; i < VB2_MAX_FRAME; ++i) {
174 		struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
175 
176 		if (priv->cpu) {
177 			dma_free_attrs(vpu->dev, priv->size, priv->cpu,
178 				       priv->dma, priv->attrs);
179 			priv->cpu = NULL;
180 		}
181 	}
182 }
183 
184 int hantro_postproc_alloc(struct hantro_ctx *ctx)
185 {
186 	struct hantro_dev *vpu = ctx->dev;
187 	struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
188 	struct vb2_queue *cap_queue = &m2m_ctx->cap_q_ctx.q;
189 	unsigned int num_buffers = cap_queue->num_buffers;
190 	struct v4l2_pix_format_mplane pix_mp;
191 	const struct hantro_fmt *fmt;
192 	unsigned int i, buf_size;
193 
194 	/* this should always pick native format */
195 	fmt = hantro_get_default_fmt(ctx, false);
196 	if (!fmt)
197 		return -EINVAL;
198 	v4l2_fill_pixfmt_mp(&pix_mp, fmt->fourcc, ctx->src_fmt.width,
199 			    ctx->src_fmt.height);
200 
201 	buf_size = pix_mp.plane_fmt[0].sizeimage;
202 	if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
203 		buf_size += hantro_h264_mv_size(pix_mp.width,
204 						pix_mp.height);
205 	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
206 		buf_size += hantro_vp9_mv_size(pix_mp.width,
207 					       pix_mp.height);
208 	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE)
209 		buf_size += hantro_hevc_mv_size(pix_mp.width,
210 						pix_mp.height);
211 
212 	for (i = 0; i < num_buffers; ++i) {
213 		struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
214 
215 		/*
216 		 * The buffers on this queue are meant as intermediate
217 		 * buffers for the decoder, so no mapping is needed.
218 		 */
219 		priv->attrs = DMA_ATTR_NO_KERNEL_MAPPING;
220 		priv->cpu = dma_alloc_attrs(vpu->dev, buf_size, &priv->dma,
221 					    GFP_KERNEL, priv->attrs);
222 		if (!priv->cpu)
223 			return -ENOMEM;
224 		priv->size = buf_size;
225 	}
226 	return 0;
227 }
228 
229 static void hantro_postproc_g1_disable(struct hantro_ctx *ctx)
230 {
231 	struct hantro_dev *vpu = ctx->dev;
232 
233 	HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0);
234 }
235 
236 static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
237 {
238 	struct hantro_dev *vpu = ctx->dev;
239 
240 	hantro_reg_write(vpu, &g2_out_rs_e, 0);
241 }
242 
243 void hantro_postproc_disable(struct hantro_ctx *ctx)
244 {
245 	struct hantro_dev *vpu = ctx->dev;
246 
247 	if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->disable)
248 		vpu->variant->postproc_ops->disable(ctx);
249 }
250 
251 void hantro_postproc_enable(struct hantro_ctx *ctx)
252 {
253 	struct hantro_dev *vpu = ctx->dev;
254 
255 	if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enable)
256 		vpu->variant->postproc_ops->enable(ctx);
257 }
258 
259 int hanto_postproc_enum_framesizes(struct hantro_ctx *ctx,
260 				   struct v4l2_frmsizeenum *fsize)
261 {
262 	struct hantro_dev *vpu = ctx->dev;
263 
264 	if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enum_framesizes)
265 		return vpu->variant->postproc_ops->enum_framesizes(ctx, fsize);
266 
267 	return -EINVAL;
268 }
269 
270 const struct hantro_postproc_ops hantro_g1_postproc_ops = {
271 	.enable = hantro_postproc_g1_enable,
272 	.disable = hantro_postproc_g1_disable,
273 };
274 
275 const struct hantro_postproc_ops hantro_g2_postproc_ops = {
276 	.enable = hantro_postproc_g2_enable,
277 	.disable = hantro_postproc_g2_disable,
278 	.enum_framesizes = hantro_postproc_g2_enum_framesizes,
279 };
280