1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2016 MediaTek Inc.
4  * Author: Daniel Hsiao <daniel.hsiao@mediatek.com>
5  *         PoChun Lin <pochun.lin@mediatek.com>
6  */
7 
8 #include <linux/interrupt.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 
12 #include "../mtk_vcodec_enc_drv.h"
13 #include "../../common/mtk_vcodec_intr.h"
14 #include "../mtk_vcodec_enc.h"
15 #include "../mtk_vcodec_enc_pm.h"
16 #include "../venc_drv_base.h"
17 #include "../venc_ipi_msg.h"
18 #include "../venc_vpu_if.h"
19 
20 #define VENC_BITSTREAM_FRAME_SIZE 0x0098
21 #define VENC_BITSTREAM_HEADER_LEN 0x00e8
22 
23 /* This ac_tag is vp8 frame tag. */
24 #define MAX_AC_TAG_SIZE 10
25 
26 /*
27  * enum venc_vp8_vpu_work_buf - vp8 encoder buffer index
28  */
29 enum venc_vp8_vpu_work_buf {
30 	VENC_VP8_VPU_WORK_BUF_LUMA,
31 	VENC_VP8_VPU_WORK_BUF_LUMA2,
32 	VENC_VP8_VPU_WORK_BUF_LUMA3,
33 	VENC_VP8_VPU_WORK_BUF_CHROMA,
34 	VENC_VP8_VPU_WORK_BUF_CHROMA2,
35 	VENC_VP8_VPU_WORK_BUF_CHROMA3,
36 	VENC_VP8_VPU_WORK_BUF_MV_INFO,
37 	VENC_VP8_VPU_WORK_BUF_BS_HEADER,
38 	VENC_VP8_VPU_WORK_BUF_PROB_BUF,
39 	VENC_VP8_VPU_WORK_BUF_RC_INFO,
40 	VENC_VP8_VPU_WORK_BUF_RC_CODE,
41 	VENC_VP8_VPU_WORK_BUF_RC_CODE2,
42 	VENC_VP8_VPU_WORK_BUF_RC_CODE3,
43 	VENC_VP8_VPU_WORK_BUF_MAX,
44 };
45 
46 /*
47  * struct venc_vp8_vpu_config - Structure for vp8 encoder configuration
48  *                              AP-W/R : AP is writer/reader on this item
49  *                              VPU-W/R: VPU is write/reader on this item
50  * @input_fourcc: input fourcc
51  * @bitrate: target bitrate (in bps)
52  * @pic_w: picture width. Picture size is visible stream resolution, in pixels,
53  *         to be used for display purposes; must be smaller or equal to buffer
54  *         size.
55  * @pic_h: picture height
56  * @buf_w: buffer width (with 16 alignment). Buffer size is stream resolution
57  *         in pixels aligned to hardware requirements.
58  * @buf_h: buffer height (with 16 alignment)
59  * @gop_size: group of picture size (key frame)
60  * @framerate: frame rate in fps
61  * @ts_mode: temporal scalability mode (0: disable, 1: enable)
62  *	     support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps.
63  */
64 struct venc_vp8_vpu_config {
65 	u32 input_fourcc;
66 	u32 bitrate;
67 	u32 pic_w;
68 	u32 pic_h;
69 	u32 buf_w;
70 	u32 buf_h;
71 	u32 gop_size;
72 	u32 framerate;
73 	u32 ts_mode;
74 };
75 
76 /*
77  * struct venc_vp8_vpu_buf - Structure for buffer information
78  *                           AP-W/R : AP is writer/reader on this item
79  *                           VPU-W/R: VPU is write/reader on this item
80  * @iova: IO virtual address
81  * @vpua: VPU side memory addr which is used by RC_CODE
82  * @size: buffer size (in bytes)
83  */
84 struct venc_vp8_vpu_buf {
85 	u32 iova;
86 	u32 vpua;
87 	u32 size;
88 };
89 
90 /*
91  * struct venc_vp8_vsi - Structure for VPU driver control and info share
92  *                       AP-W/R : AP is writer/reader on this item
93  *                       VPU-W/R: VPU is write/reader on this item
94  * This structure is allocated in VPU side and shared to AP side.
95  * @config: vp8 encoder configuration
96  * @work_bufs: working buffer information in VPU side
97  * The work_bufs here is for storing the 'size' info shared to AP side.
98  * The similar item in struct venc_vp8_inst is for memory allocation
99  * in AP side. The AP driver will copy the 'size' from here to the one in
100  * struct mtk_vcodec_mem, then invoke mtk_vcodec_mem_alloc to allocate
101  * the buffer. After that, bypass the 'dma_addr' to the 'iova' field here for
102  * register setting in VPU side.
103  */
104 struct venc_vp8_vsi {
105 	struct venc_vp8_vpu_config config;
106 	struct venc_vp8_vpu_buf work_bufs[VENC_VP8_VPU_WORK_BUF_MAX];
107 };
108 
109 /*
110  * struct venc_vp8_inst - vp8 encoder AP driver instance
111  * @hw_base: vp8 encoder hardware register base
112  * @work_bufs: working buffer
113  * @work_buf_allocated: working buffer allocated flag
114  * @frm_cnt: encoded frame count, it's used for I-frame judgement and
115  *	     reset when force intra cmd received.
116  * @ts_mode: temporal scalability mode (0: disable, 1: enable)
117  *	     support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps.
118  * @vpu_inst: VPU instance to exchange information between AP and VPU
119  * @vsi: driver structure allocated by VPU side and shared to AP side for
120  *	 control and info share
121  * @ctx: context for v4l2 layer integration
122  */
123 struct venc_vp8_inst {
124 	void __iomem *hw_base;
125 	struct mtk_vcodec_mem work_bufs[VENC_VP8_VPU_WORK_BUF_MAX];
126 	bool work_buf_allocated;
127 	unsigned int frm_cnt;
128 	unsigned int ts_mode;
129 	struct venc_vpu_inst vpu_inst;
130 	struct venc_vp8_vsi *vsi;
131 	struct mtk_vcodec_enc_ctx *ctx;
132 };
133 
vp8_enc_read_reg(struct venc_vp8_inst * inst,u32 addr)134 static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr)
135 {
136 	return readl(inst->hw_base + addr);
137 }
138 
vp8_enc_free_work_buf(struct venc_vp8_inst * inst)139 static void vp8_enc_free_work_buf(struct venc_vp8_inst *inst)
140 {
141 	int i;
142 
143 	/* Buffers need to be freed by AP. */
144 	for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) {
145 		if (inst->work_bufs[i].size == 0)
146 			continue;
147 		mtk_vcodec_mem_free(inst->ctx, &inst->work_bufs[i]);
148 	}
149 }
150 
vp8_enc_alloc_work_buf(struct venc_vp8_inst * inst)151 static int vp8_enc_alloc_work_buf(struct venc_vp8_inst *inst)
152 {
153 	int i;
154 	int ret = 0;
155 	struct venc_vp8_vpu_buf *wb = inst->vsi->work_bufs;
156 
157 	for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) {
158 		if (wb[i].size == 0)
159 			continue;
160 		/*
161 		 * This 'wb' structure is set by VPU side and shared to AP for
162 		 * buffer allocation and IO virtual addr mapping. For most of
163 		 * the buffers, AP will allocate the buffer according to 'size'
164 		 * field and store the IO virtual addr in 'iova' field. For the
165 		 * RC_CODEx buffers, they are pre-allocated in the VPU side
166 		 * because they are inside VPU SRAM, and save the VPU addr in
167 		 * the 'vpua' field. The AP will translate the VPU addr to the
168 		 * corresponding IO virtual addr and store in 'iova' field.
169 		 */
170 		inst->work_bufs[i].size = wb[i].size;
171 		ret = mtk_vcodec_mem_alloc(inst->ctx, &inst->work_bufs[i]);
172 		if (ret) {
173 			mtk_venc_err(inst->ctx, "cannot alloc work_bufs[%d]", i);
174 			goto err_alloc;
175 		}
176 		/*
177 		 * This RC_CODEx is pre-allocated by VPU and saved in VPU addr.
178 		 * So we need use memcpy to copy RC_CODEx from VPU addr into IO
179 		 * virtual addr in 'iova' field for reg setting in VPU side.
180 		 */
181 		if (i == VENC_VP8_VPU_WORK_BUF_RC_CODE ||
182 		    i == VENC_VP8_VPU_WORK_BUF_RC_CODE2 ||
183 		    i == VENC_VP8_VPU_WORK_BUF_RC_CODE3) {
184 			struct mtk_vcodec_fw *handler;
185 			void *tmp_va;
186 
187 			handler = inst->vpu_inst.ctx->dev->fw_handler;
188 			tmp_va = mtk_vcodec_fw_map_dm_addr(handler,
189 							   wb[i].vpua);
190 			memcpy(inst->work_bufs[i].va, tmp_va, wb[i].size);
191 		}
192 		wb[i].iova = inst->work_bufs[i].dma_addr;
193 
194 		mtk_venc_debug(inst->ctx, "work_bufs[%d] va=0x%p,iova=%pad,size=%zu",
195 			       i, inst->work_bufs[i].va,
196 			       &inst->work_bufs[i].dma_addr,
197 			       inst->work_bufs[i].size);
198 	}
199 
200 	return ret;
201 
202 err_alloc:
203 	vp8_enc_free_work_buf(inst);
204 
205 	return ret;
206 }
207 
vp8_enc_wait_venc_done(struct venc_vp8_inst * inst)208 static unsigned int vp8_enc_wait_venc_done(struct venc_vp8_inst *inst)
209 {
210 	unsigned int irq_status = 0;
211 	struct mtk_vcodec_enc_ctx *ctx = (struct mtk_vcodec_enc_ctx *)inst->ctx;
212 
213 	if (!mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
214 					  WAIT_INTR_TIMEOUT_MS, 0)) {
215 		irq_status = ctx->irq_status;
216 		mtk_venc_debug(ctx, "isr return %x", irq_status);
217 	}
218 	return irq_status;
219 }
220 
221 /*
222  * Compose ac_tag, bitstream header and bitstream payload into
223  * one bitstream buffer.
224  */
vp8_enc_compose_one_frame(struct venc_vp8_inst * inst,struct mtk_vcodec_mem * bs_buf,unsigned int * bs_size)225 static int vp8_enc_compose_one_frame(struct venc_vp8_inst *inst,
226 				     struct mtk_vcodec_mem *bs_buf,
227 				     unsigned int *bs_size)
228 {
229 	unsigned int not_key;
230 	u32 bs_frm_size;
231 	u32 bs_hdr_len;
232 	unsigned int ac_tag_size;
233 	u8 ac_tag[MAX_AC_TAG_SIZE];
234 	u32 tag;
235 
236 	bs_frm_size = vp8_enc_read_reg(inst, VENC_BITSTREAM_FRAME_SIZE);
237 	bs_hdr_len = vp8_enc_read_reg(inst, VENC_BITSTREAM_HEADER_LEN);
238 
239 	/* if a frame is key frame, not_key is 0 */
240 	not_key = !inst->vpu_inst.is_key_frm;
241 	tag = (bs_hdr_len << 5) | 0x10 | not_key;
242 	ac_tag[0] = tag & 0xff;
243 	ac_tag[1] = (tag >> 8) & 0xff;
244 	ac_tag[2] = (tag >> 16) & 0xff;
245 
246 	/* key frame */
247 	if (not_key == 0) {
248 		ac_tag_size = MAX_AC_TAG_SIZE;
249 		ac_tag[3] = 0x9d;
250 		ac_tag[4] = 0x01;
251 		ac_tag[5] = 0x2a;
252 		ac_tag[6] = inst->vsi->config.pic_w;
253 		ac_tag[7] = inst->vsi->config.pic_w >> 8;
254 		ac_tag[8] = inst->vsi->config.pic_h;
255 		ac_tag[9] = inst->vsi->config.pic_h >> 8;
256 	} else {
257 		ac_tag_size = 3;
258 	}
259 
260 	if (bs_buf->size < bs_hdr_len + bs_frm_size + ac_tag_size) {
261 		mtk_venc_err(inst->ctx, "bitstream buf size is too small(%zu)", bs_buf->size);
262 		return -EINVAL;
263 	}
264 
265 	/*
266 	* (1) The vp8 bitstream header and body are generated by the HW vp8
267 	* encoder separately at the same time. We cannot know the bitstream
268 	* header length in advance.
269 	* (2) From the vp8 spec, there is no stuffing byte allowed between the
270 	* ac tag, bitstream header and bitstream body.
271 	*/
272 	memmove(bs_buf->va + bs_hdr_len + ac_tag_size,
273 		bs_buf->va, bs_frm_size);
274 	memcpy(bs_buf->va + ac_tag_size,
275 	       inst->work_bufs[VENC_VP8_VPU_WORK_BUF_BS_HEADER].va,
276 	       bs_hdr_len);
277 	memcpy(bs_buf->va, ac_tag, ac_tag_size);
278 	*bs_size = bs_frm_size + bs_hdr_len + ac_tag_size;
279 
280 	return 0;
281 }
282 
vp8_enc_encode_frame(struct venc_vp8_inst * inst,struct venc_frm_buf * frm_buf,struct mtk_vcodec_mem * bs_buf,unsigned int * bs_size)283 static int vp8_enc_encode_frame(struct venc_vp8_inst *inst,
284 				struct venc_frm_buf *frm_buf,
285 				struct mtk_vcodec_mem *bs_buf,
286 				unsigned int *bs_size)
287 {
288 	int ret = 0;
289 	unsigned int irq_status;
290 
291 	mtk_venc_debug(inst->ctx, "->frm_cnt=%d", inst->frm_cnt);
292 
293 	ret = vpu_enc_encode(&inst->vpu_inst, 0, frm_buf, bs_buf, NULL);
294 	if (ret)
295 		return ret;
296 
297 	irq_status = vp8_enc_wait_venc_done(inst);
298 	if (irq_status != MTK_VENC_IRQ_STATUS_FRM) {
299 		mtk_venc_err(inst->ctx, "irq_status=%d failed", irq_status);
300 		return -EIO;
301 	}
302 
303 	if (vp8_enc_compose_one_frame(inst, bs_buf, bs_size)) {
304 		mtk_venc_err(inst->ctx, "vp8_enc_compose_one_frame failed");
305 		return -EINVAL;
306 	}
307 
308 	inst->frm_cnt++;
309 	mtk_venc_debug(inst->ctx, "<-size=%d key_frm=%d", *bs_size, inst->vpu_inst.is_key_frm);
310 
311 	return ret;
312 }
313 
vp8_enc_init(struct mtk_vcodec_enc_ctx * ctx)314 static int vp8_enc_init(struct mtk_vcodec_enc_ctx *ctx)
315 {
316 	int ret = 0;
317 	struct venc_vp8_inst *inst;
318 
319 	inst = kzalloc(sizeof(*inst), GFP_KERNEL);
320 	if (!inst)
321 		return -ENOMEM;
322 
323 	inst->ctx = ctx;
324 	inst->vpu_inst.ctx = ctx;
325 	inst->vpu_inst.id = IPI_VENC_VP8;
326 	inst->hw_base = mtk_vcodec_get_reg_addr(inst->ctx->dev->reg_base, VENC_LT_SYS);
327 
328 	ret = vpu_enc_init(&inst->vpu_inst);
329 
330 	inst->vsi = (struct venc_vp8_vsi *)inst->vpu_inst.vsi;
331 
332 	if (ret)
333 		kfree(inst);
334 	else
335 		ctx->drv_handle = inst;
336 
337 	return ret;
338 }
339 
vp8_enc_encode(void * handle,enum venc_start_opt opt,struct venc_frm_buf * frm_buf,struct mtk_vcodec_mem * bs_buf,struct venc_done_result * result)340 static int vp8_enc_encode(void *handle,
341 			  enum venc_start_opt opt,
342 			  struct venc_frm_buf *frm_buf,
343 			  struct mtk_vcodec_mem *bs_buf,
344 			  struct venc_done_result *result)
345 {
346 	int ret = 0;
347 	struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
348 	struct mtk_vcodec_enc_ctx *ctx = inst->ctx;
349 
350 	enable_irq(ctx->dev->enc_irq);
351 
352 	switch (opt) {
353 	case VENC_START_OPT_ENCODE_FRAME:
354 		ret = vp8_enc_encode_frame(inst, frm_buf, bs_buf,
355 					   &result->bs_size);
356 		if (ret)
357 			goto encode_err;
358 		result->is_key_frm = inst->vpu_inst.is_key_frm;
359 		break;
360 
361 	default:
362 		mtk_venc_err(ctx, "opt not support:%d", opt);
363 		ret = -EINVAL;
364 		break;
365 	}
366 
367 encode_err:
368 
369 	disable_irq(ctx->dev->enc_irq);
370 	return ret;
371 }
372 
vp8_enc_set_param(void * handle,enum venc_set_param_type type,struct venc_enc_param * enc_prm)373 static int vp8_enc_set_param(void *handle,
374 			     enum venc_set_param_type type,
375 			     struct venc_enc_param *enc_prm)
376 {
377 	int ret = 0;
378 	struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
379 
380 	mtk_venc_debug(inst->ctx, "->type=%d", type);
381 
382 	switch (type) {
383 	case VENC_SET_PARAM_ENC:
384 		inst->vsi->config.input_fourcc = enc_prm->input_yuv_fmt;
385 		inst->vsi->config.bitrate = enc_prm->bitrate;
386 		inst->vsi->config.pic_w = enc_prm->width;
387 		inst->vsi->config.pic_h = enc_prm->height;
388 		inst->vsi->config.buf_w = enc_prm->buf_width;
389 		inst->vsi->config.buf_h = enc_prm->buf_height;
390 		inst->vsi->config.gop_size = enc_prm->gop_size;
391 		inst->vsi->config.framerate = enc_prm->frm_rate;
392 		inst->vsi->config.ts_mode = inst->ts_mode;
393 		ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm);
394 		if (ret)
395 			break;
396 		if (inst->work_buf_allocated) {
397 			vp8_enc_free_work_buf(inst);
398 			inst->work_buf_allocated = false;
399 		}
400 		ret = vp8_enc_alloc_work_buf(inst);
401 		if (ret)
402 			break;
403 		inst->work_buf_allocated = true;
404 		break;
405 
406 	/*
407 	 * VENC_SET_PARAM_TS_MODE must be called before VENC_SET_PARAM_ENC
408 	 */
409 	case VENC_SET_PARAM_TS_MODE:
410 		inst->ts_mode = 1;
411 		mtk_venc_debug(inst->ctx, "set ts_mode");
412 		break;
413 
414 	default:
415 		ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm);
416 		break;
417 	}
418 
419 	return ret;
420 }
421 
vp8_enc_deinit(void * handle)422 static int vp8_enc_deinit(void *handle)
423 {
424 	int ret = 0;
425 	struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
426 
427 	ret = vpu_enc_deinit(&inst->vpu_inst);
428 
429 	if (inst->work_buf_allocated)
430 		vp8_enc_free_work_buf(inst);
431 
432 	kfree(inst);
433 	return ret;
434 }
435 
436 const struct venc_common_if venc_vp8_if = {
437 	.init = vp8_enc_init,
438 	.encode = vp8_enc_encode,
439 	.set_param = vp8_enc_set_param,
440 	.deinit = vp8_enc_deinit,
441 };
442