1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Hantro VPU codec driver
4  *
5  * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
6  */
7 
8 #include <asm/unaligned.h>
9 #include <media/v4l2-mem2mem.h>
10 #include "hantro_jpeg.h"
11 #include "hantro.h"
12 #include "hantro_v4l2.h"
13 #include "hantro_hw.h"
14 #include "hantro_h1_regs.h"
15 
16 #define H1_JPEG_QUANT_TABLE_COUNT 16
17 
hantro_h1_set_src_img_ctrl(struct hantro_dev * vpu,struct hantro_ctx * ctx)18 static void hantro_h1_set_src_img_ctrl(struct hantro_dev *vpu,
19 				       struct hantro_ctx *ctx)
20 {
21 	u32 overfill_r, overfill_b;
22 	u32 reg;
23 
24 	/*
25 	 * The format width and height are already macroblock aligned
26 	 * by .vidioc_s_fmt_vid_cap_mplane() callback. Destination
27 	 * format width and height can be further modified by
28 	 * .vidioc_s_selection(), and the width is 4-aligned.
29 	 */
30 	overfill_r = ctx->src_fmt.width - ctx->dst_fmt.width;
31 	overfill_b = ctx->src_fmt.height - ctx->dst_fmt.height;
32 
33 	reg = H1_REG_IN_IMG_CTRL_ROW_LEN(ctx->src_fmt.width)
34 		| H1_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r / 4)
35 		| H1_REG_IN_IMG_CTRL_OVRFLB(overfill_b)
36 		| H1_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt);
37 	vepu_write_relaxed(vpu, reg, H1_REG_IN_IMG_CTRL);
38 }
39 
hantro_h1_jpeg_enc_set_buffers(struct hantro_dev * vpu,struct hantro_ctx * ctx,struct vb2_buffer * src_buf,struct vb2_buffer * dst_buf)40 static void hantro_h1_jpeg_enc_set_buffers(struct hantro_dev *vpu,
41 					   struct hantro_ctx *ctx,
42 					   struct vb2_buffer *src_buf,
43 					   struct vb2_buffer *dst_buf)
44 {
45 	struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
46 	dma_addr_t src[3];
47 	u32 size_left;
48 
49 	size_left = vb2_plane_size(dst_buf, 0) - ctx->vpu_dst_fmt->header_size;
50 	if (WARN_ON(vb2_plane_size(dst_buf, 0) < ctx->vpu_dst_fmt->header_size))
51 		size_left = 0;
52 
53 	WARN_ON(pix_fmt->num_planes > 3);
54 
55 	vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(dst_buf, 0) +
56 				ctx->vpu_dst_fmt->header_size,
57 			   H1_REG_ADDR_OUTPUT_STREAM);
58 	vepu_write_relaxed(vpu, size_left, H1_REG_STR_BUF_LIMIT);
59 
60 	if (pix_fmt->num_planes == 1) {
61 		src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
62 		/* single plane formats we supported are all interlaced */
63 		vepu_write_relaxed(vpu, src[0], H1_REG_ADDR_IN_PLANE_0);
64 	} else if (pix_fmt->num_planes == 2) {
65 		src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
66 		src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1);
67 		vepu_write_relaxed(vpu, src[0], H1_REG_ADDR_IN_PLANE_0);
68 		vepu_write_relaxed(vpu, src[1], H1_REG_ADDR_IN_PLANE_1);
69 	} else {
70 		src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
71 		src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1);
72 		src[2] = vb2_dma_contig_plane_dma_addr(src_buf, 2);
73 		vepu_write_relaxed(vpu, src[0], H1_REG_ADDR_IN_PLANE_0);
74 		vepu_write_relaxed(vpu, src[1], H1_REG_ADDR_IN_PLANE_1);
75 		vepu_write_relaxed(vpu, src[2], H1_REG_ADDR_IN_PLANE_2);
76 	}
77 }
78 
79 static void
hantro_h1_jpeg_enc_set_qtable(struct hantro_dev * vpu,unsigned char * luma_qtable,unsigned char * chroma_qtable)80 hantro_h1_jpeg_enc_set_qtable(struct hantro_dev *vpu,
81 			      unsigned char *luma_qtable,
82 			      unsigned char *chroma_qtable)
83 {
84 	u32 reg, i;
85 	__be32 *luma_qtable_p;
86 	__be32 *chroma_qtable_p;
87 
88 	luma_qtable_p = (__be32 *)luma_qtable;
89 	chroma_qtable_p = (__be32 *)chroma_qtable;
90 
91 	/*
92 	 * Quantization table registers must be written in contiguous blocks.
93 	 * DO NOT collapse the below two "for" loops into one.
94 	 */
95 	for (i = 0; i < H1_JPEG_QUANT_TABLE_COUNT; i++) {
96 		reg = get_unaligned_be32(&luma_qtable_p[i]);
97 		vepu_write_relaxed(vpu, reg, H1_REG_JPEG_LUMA_QUAT(i));
98 	}
99 
100 	for (i = 0; i < H1_JPEG_QUANT_TABLE_COUNT; i++) {
101 		reg = get_unaligned_be32(&chroma_qtable_p[i]);
102 		vepu_write_relaxed(vpu, reg, H1_REG_JPEG_CHROMA_QUAT(i));
103 	}
104 }
105 
hantro_h1_jpeg_enc_run(struct hantro_ctx * ctx)106 int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx)
107 {
108 	struct hantro_dev *vpu = ctx->dev;
109 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
110 	struct hantro_jpeg_ctx jpeg_ctx;
111 	u32 reg;
112 
113 	src_buf = hantro_get_src_buf(ctx);
114 	dst_buf = hantro_get_dst_buf(ctx);
115 
116 	hantro_start_prepare_run(ctx);
117 
118 	memset(&jpeg_ctx, 0, sizeof(jpeg_ctx));
119 	jpeg_ctx.buffer = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
120 	jpeg_ctx.width = ctx->dst_fmt.width;
121 	jpeg_ctx.height = ctx->dst_fmt.height;
122 	jpeg_ctx.quality = ctx->jpeg_quality;
123 	hantro_jpeg_header_assemble(&jpeg_ctx);
124 
125 	/* Switch to JPEG encoder mode before writing registers */
126 	vepu_write_relaxed(vpu, H1_REG_ENC_CTRL_ENC_MODE_JPEG,
127 			   H1_REG_ENC_CTRL);
128 
129 	hantro_h1_set_src_img_ctrl(vpu, ctx);
130 	hantro_h1_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf,
131 				       &dst_buf->vb2_buf);
132 	hantro_h1_jpeg_enc_set_qtable(vpu, jpeg_ctx.hw_luma_qtable,
133 				      jpeg_ctx.hw_chroma_qtable);
134 
135 	reg = H1_REG_AXI_CTRL_OUTPUT_SWAP16
136 		| H1_REG_AXI_CTRL_INPUT_SWAP16
137 		| H1_REG_AXI_CTRL_BURST_LEN(16)
138 		| H1_REG_AXI_CTRL_OUTPUT_SWAP32
139 		| H1_REG_AXI_CTRL_INPUT_SWAP32
140 		| H1_REG_AXI_CTRL_OUTPUT_SWAP8
141 		| H1_REG_AXI_CTRL_INPUT_SWAP8;
142 	/* Make sure that all registers are written at this point. */
143 	vepu_write(vpu, reg, H1_REG_AXI_CTRL);
144 
145 	reg = H1_REG_ENC_CTRL_WIDTH(MB_WIDTH(ctx->src_fmt.width))
146 		| H1_REG_ENC_CTRL_HEIGHT(MB_HEIGHT(ctx->src_fmt.height))
147 		| H1_REG_ENC_CTRL_ENC_MODE_JPEG
148 		| H1_REG_ENC_PIC_INTRA
149 		| H1_REG_ENC_CTRL_EN_BIT;
150 
151 	hantro_end_prepare_run(ctx);
152 
153 	vepu_write(vpu, reg, H1_REG_ENC_CTRL);
154 
155 	return 0;
156 }
157 
hantro_h1_jpeg_enc_done(struct hantro_ctx * ctx)158 void hantro_h1_jpeg_enc_done(struct hantro_ctx *ctx)
159 {
160 	struct hantro_dev *vpu = ctx->dev;
161 	u32 bytesused = vepu_read(vpu, H1_REG_STR_BUF_LIMIT) / 8;
162 	struct vb2_v4l2_buffer *dst_buf = hantro_get_dst_buf(ctx);
163 
164 	vb2_set_plane_payload(&dst_buf->vb2_buf, 0,
165 			      ctx->vpu_dst_fmt->header_size + bytesused);
166 }
167