1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Cedrus VPU driver
4  *
5  * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com>
6  * Copyright (c) 2018 Bootlin
7  */
8 
9 #include <linux/delay.h>
10 #include <linux/types.h>
11 
12 #include <media/videobuf2-dma-contig.h>
13 
14 #include "cedrus.h"
15 #include "cedrus_hw.h"
16 #include "cedrus_regs.h"
17 
18 enum cedrus_h264_sram_off {
19 	CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE	= 0x000,
20 	CEDRUS_SRAM_H264_FRAMEBUFFER_LIST	= 0x100,
21 	CEDRUS_SRAM_H264_REF_LIST_0		= 0x190,
22 	CEDRUS_SRAM_H264_REF_LIST_1		= 0x199,
23 	CEDRUS_SRAM_H264_SCALING_LIST_8x8_0	= 0x200,
24 	CEDRUS_SRAM_H264_SCALING_LIST_8x8_1	= 0x210,
25 	CEDRUS_SRAM_H264_SCALING_LIST_4x4	= 0x220,
26 };
27 
28 struct cedrus_h264_sram_ref_pic {
29 	__le32	top_field_order_cnt;
30 	__le32	bottom_field_order_cnt;
31 	__le32	frame_info;
32 	__le32	luma_ptr;
33 	__le32	chroma_ptr;
34 	__le32	mv_col_top_ptr;
35 	__le32	mv_col_bot_ptr;
36 	__le32	reserved;
37 } __packed;
38 
39 #define CEDRUS_H264_FRAME_NUM		18
40 
41 #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE	(32 * SZ_1K)
42 #define CEDRUS_MIN_PIC_INFO_BUF_SIZE       (130 * SZ_1K)
43 
44 static void cedrus_h264_write_sram(struct cedrus_dev *dev,
45 				   enum cedrus_h264_sram_off off,
46 				   const void *data, size_t len)
47 {
48 	const u32 *buffer = data;
49 	size_t count = DIV_ROUND_UP(len, 4);
50 
51 	cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2);
52 
53 	while (count--)
54 		cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
55 }
56 
57 static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_ctx *ctx,
58 					      unsigned int position,
59 					      unsigned int field)
60 {
61 	dma_addr_t addr = ctx->codec.h264.mv_col_buf_dma;
62 
63 	/* Adjust for the position */
64 	addr += position * ctx->codec.h264.mv_col_buf_field_size * 2;
65 
66 	/* Adjust for the field */
67 	addr += field * ctx->codec.h264.mv_col_buf_field_size;
68 
69 	return addr;
70 }
71 
72 static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
73 				struct cedrus_buffer *buf,
74 				unsigned int top_field_order_cnt,
75 				unsigned int bottom_field_order_cnt,
76 				struct cedrus_h264_sram_ref_pic *pic)
77 {
78 	struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
79 	unsigned int position = buf->codec.h264.position;
80 
81 	pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt);
82 	pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt);
83 	pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8);
84 
85 	pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0));
86 	pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1));
87 	pic->mv_col_top_ptr =
88 		cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 0));
89 	pic->mv_col_bot_ptr =
90 		cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 1));
91 }
92 
93 static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
94 				    struct cedrus_run *run)
95 {
96 	struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM];
97 	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
98 	const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
99 	struct vb2_queue *cap_q;
100 	struct cedrus_buffer *output_buf;
101 	struct cedrus_dev *dev = ctx->dev;
102 	unsigned long used_dpbs = 0;
103 	unsigned int position;
104 	int output = -1;
105 	unsigned int i;
106 
107 	cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
108 
109 	memset(pic_list, 0, sizeof(pic_list));
110 
111 	for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) {
112 		const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
113 		struct cedrus_buffer *cedrus_buf;
114 		struct vb2_buffer *buf;
115 
116 		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
117 			continue;
118 
119 		buf = vb2_find_buffer(cap_q, dpb->reference_ts);
120 		if (!buf)
121 			continue;
122 
123 		cedrus_buf = vb2_to_cedrus_buffer(buf);
124 		position = cedrus_buf->codec.h264.position;
125 		used_dpbs |= BIT(position);
126 
127 		if (run->dst->vb2_buf.timestamp == dpb->reference_ts) {
128 			output = position;
129 			continue;
130 		}
131 
132 		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
133 			continue;
134 
135 		cedrus_fill_ref_pic(ctx, cedrus_buf,
136 				    dpb->top_field_order_cnt,
137 				    dpb->bottom_field_order_cnt,
138 				    &pic_list[position]);
139 	}
140 
141 	if (output >= 0)
142 		position = output;
143 	else
144 		position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM);
145 
146 	output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
147 	output_buf->codec.h264.position = position;
148 
149 	if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
150 		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD;
151 	else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
152 		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF;
153 	else
154 		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME;
155 
156 	cedrus_fill_ref_pic(ctx, output_buf,
157 			    decode->top_field_order_cnt,
158 			    decode->bottom_field_order_cnt,
159 			    &pic_list[position]);
160 
161 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST,
162 			       pic_list, sizeof(pic_list));
163 
164 	cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position);
165 }
166 
167 #define CEDRUS_MAX_REF_IDX	32
168 
169 static void _cedrus_write_ref_list(struct cedrus_ctx *ctx,
170 				   struct cedrus_run *run,
171 				   const struct v4l2_h264_reference *ref_list,
172 				   u8 num_ref, enum cedrus_h264_sram_off sram)
173 {
174 	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
175 	struct vb2_queue *cap_q;
176 	struct cedrus_dev *dev = ctx->dev;
177 	u8 sram_array[CEDRUS_MAX_REF_IDX];
178 	unsigned int i;
179 	size_t size;
180 
181 	cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
182 
183 	memset(sram_array, 0, sizeof(sram_array));
184 
185 	for (i = 0; i < num_ref; i++) {
186 		const struct v4l2_h264_dpb_entry *dpb;
187 		const struct cedrus_buffer *cedrus_buf;
188 		unsigned int position;
189 		struct vb2_buffer *buf;
190 		u8 dpb_idx;
191 
192 		dpb_idx = ref_list[i].index;
193 		dpb = &decode->dpb[dpb_idx];
194 
195 		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
196 			continue;
197 
198 		buf = vb2_find_buffer(cap_q, dpb->reference_ts);
199 		if (!buf)
200 			continue;
201 
202 		cedrus_buf = vb2_to_cedrus_buffer(buf);
203 		position = cedrus_buf->codec.h264.position;
204 
205 		sram_array[i] |= position << 1;
206 		if (ref_list[i].fields == V4L2_H264_BOTTOM_FIELD_REF)
207 			sram_array[i] |= BIT(0);
208 	}
209 
210 	size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array));
211 	cedrus_h264_write_sram(dev, sram, &sram_array, size);
212 }
213 
214 static void cedrus_write_ref_list0(struct cedrus_ctx *ctx,
215 				   struct cedrus_run *run)
216 {
217 	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
218 
219 	_cedrus_write_ref_list(ctx, run,
220 			       slice->ref_pic_list0,
221 			       slice->num_ref_idx_l0_active_minus1 + 1,
222 			       CEDRUS_SRAM_H264_REF_LIST_0);
223 }
224 
225 static void cedrus_write_ref_list1(struct cedrus_ctx *ctx,
226 				   struct cedrus_run *run)
227 {
228 	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
229 
230 	_cedrus_write_ref_list(ctx, run,
231 			       slice->ref_pic_list1,
232 			       slice->num_ref_idx_l1_active_minus1 + 1,
233 			       CEDRUS_SRAM_H264_REF_LIST_1);
234 }
235 
236 static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx,
237 				       struct cedrus_run *run)
238 {
239 	const struct v4l2_ctrl_h264_scaling_matrix *scaling =
240 		run->h264.scaling_matrix;
241 	const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
242 	struct cedrus_dev *dev = ctx->dev;
243 
244 	if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
245 		return;
246 
247 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0,
248 			       scaling->scaling_list_8x8[0],
249 			       sizeof(scaling->scaling_list_8x8[0]));
250 
251 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1,
252 			       scaling->scaling_list_8x8[1],
253 			       sizeof(scaling->scaling_list_8x8[1]));
254 
255 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4,
256 			       scaling->scaling_list_4x4,
257 			       sizeof(scaling->scaling_list_4x4));
258 }
259 
260 static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx,
261 					   struct cedrus_run *run)
262 {
263 	const struct v4l2_ctrl_h264_pred_weights *pred_weight =
264 		run->h264.pred_weights;
265 	struct cedrus_dev *dev = ctx->dev;
266 	int i, j, k;
267 
268 	cedrus_write(dev, VE_H264_SHS_WP,
269 		     ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) |
270 		     ((pred_weight->luma_log2_weight_denom & 0x7) << 0));
271 
272 	cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET,
273 		     CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2);
274 
275 	for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) {
276 		const struct v4l2_h264_weight_factors *factors =
277 			&pred_weight->weight_factors[i];
278 
279 		for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) {
280 			u32 val;
281 
282 			val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) |
283 				(factors->luma_weight[j] & 0x1ff);
284 			cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
285 		}
286 
287 		for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) {
288 			for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) {
289 				u32 val;
290 
291 				val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) |
292 					(factors->chroma_weight[j][k] & 0x1ff);
293 				cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
294 			}
295 		}
296 	}
297 }
298 
299 /*
300  * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In
301  * rare cases frame is not decoded correctly. However, setting offset to 0 and
302  * skipping appropriate amount of bits with flush bits trigger always works.
303  */
304 static void cedrus_skip_bits(struct cedrus_dev *dev, int num)
305 {
306 	int count = 0;
307 
308 	while (count < num) {
309 		int tmp = min(num - count, 32);
310 
311 		cedrus_write(dev, VE_H264_TRIGGER_TYPE,
312 			     VE_H264_TRIGGER_TYPE_FLUSH_BITS |
313 			     VE_H264_TRIGGER_TYPE_N_BITS(tmp));
314 		while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY)
315 			udelay(1);
316 
317 		count += tmp;
318 	}
319 }
320 
321 static void cedrus_set_params(struct cedrus_ctx *ctx,
322 			      struct cedrus_run *run)
323 {
324 	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
325 	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
326 	const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
327 	const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
328 	struct vb2_buffer *src_buf = &run->src->vb2_buf;
329 	struct cedrus_dev *dev = ctx->dev;
330 	dma_addr_t src_buf_addr;
331 	size_t slice_bytes = vb2_get_plane_payload(src_buf, 0);
332 	unsigned int pic_width_in_mbs;
333 	bool mbaff_pic;
334 	u32 reg;
335 
336 	cedrus_write(dev, VE_H264_VLD_LEN, slice_bytes * 8);
337 	cedrus_write(dev, VE_H264_VLD_OFFSET, 0);
338 
339 	src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
340 	cedrus_write(dev, VE_H264_VLD_END, src_buf_addr + slice_bytes);
341 	cedrus_write(dev, VE_H264_VLD_ADDR,
342 		     VE_H264_VLD_ADDR_VAL(src_buf_addr) |
343 		     VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID |
344 		     VE_H264_VLD_ADDR_LAST);
345 
346 	if (ctx->src_fmt.width > 2048) {
347 		cedrus_write(dev, VE_BUF_CTRL,
348 			     VE_BUF_CTRL_INTRAPRED_MIXED_RAM |
349 			     VE_BUF_CTRL_DBLK_MIXED_RAM);
350 		cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR,
351 			     ctx->codec.h264.deblk_buf_dma);
352 		cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR,
353 			     ctx->codec.h264.intra_pred_buf_dma);
354 	} else {
355 		cedrus_write(dev, VE_BUF_CTRL,
356 			     VE_BUF_CTRL_INTRAPRED_INT_SRAM |
357 			     VE_BUF_CTRL_DBLK_INT_SRAM);
358 	}
359 
360 	/*
361 	 * FIXME: Since the bitstream parsing is done in software, and
362 	 * in userspace, this shouldn't be needed anymore. But it
363 	 * turns out that removing it breaks the decoding process,
364 	 * without any clear indication why.
365 	 */
366 	cedrus_write(dev, VE_H264_TRIGGER_TYPE,
367 		     VE_H264_TRIGGER_TYPE_INIT_SWDEC);
368 
369 	cedrus_skip_bits(dev, slice->header_bit_size);
370 
371 	if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice))
372 		cedrus_write_pred_weight_table(ctx, run);
373 
374 	if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) ||
375 	    (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) ||
376 	    (slice->slice_type == V4L2_H264_SLICE_TYPE_B))
377 		cedrus_write_ref_list0(ctx, run);
378 
379 	if (slice->slice_type == V4L2_H264_SLICE_TYPE_B)
380 		cedrus_write_ref_list1(ctx, run);
381 
382 	// picture parameters
383 	reg = 0;
384 	/*
385 	 * FIXME: the kernel headers are allowing the default value to
386 	 * be passed, but the libva doesn't give us that.
387 	 */
388 	reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10;
389 	reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5;
390 	reg |= (pps->weighted_bipred_idc & 0x3) << 2;
391 	if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
392 		reg |= VE_H264_PPS_ENTROPY_CODING_MODE;
393 	if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
394 		reg |= VE_H264_PPS_WEIGHTED_PRED;
395 	if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
396 		reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED;
397 	if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
398 		reg |= VE_H264_PPS_TRANSFORM_8X8_MODE;
399 	cedrus_write(dev, VE_H264_PPS, reg);
400 
401 	// sequence parameters
402 	reg = 0;
403 	reg |= (sps->chroma_format_idc & 0x7) << 19;
404 	reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8;
405 	reg |= sps->pic_height_in_map_units_minus1 & 0xff;
406 	if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
407 		reg |= VE_H264_SPS_MBS_ONLY;
408 	if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
409 		reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD;
410 	if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
411 		reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE;
412 	cedrus_write(dev, VE_H264_SPS, reg);
413 
414 	mbaff_pic = !(decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) &&
415 		    (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
416 	pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1;
417 
418 	// slice parameters
419 	reg = 0;
420 	reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24;
421 	reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) *
422 		 (mbaff_pic + 1)) & 0xff) << 16;
423 	reg |= decode->nal_ref_idc ? BIT(12) : 0;
424 	reg |= (slice->slice_type & 0xf) << 8;
425 	reg |= slice->cabac_init_idc & 0x3;
426 	if (ctx->fh.m2m_ctx->new_frame)
427 		reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
428 	if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
429 		reg |= VE_H264_SHS_FIELD_PIC;
430 	if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
431 		reg |= VE_H264_SHS_BOTTOM_FIELD;
432 	if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED)
433 		reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED;
434 	cedrus_write(dev, VE_H264_SHS, reg);
435 
436 	reg = 0;
437 	reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD;
438 	reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24;
439 	reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16;
440 	reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8;
441 	reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4;
442 	reg |= slice->slice_beta_offset_div2 & 0xf;
443 	cedrus_write(dev, VE_H264_SHS2, reg);
444 
445 	reg = 0;
446 	reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16;
447 	reg |= (pps->chroma_qp_index_offset & 0x3f) << 8;
448 	reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f;
449 	if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
450 		reg |= VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT;
451 	cedrus_write(dev, VE_H264_SHS_QP, reg);
452 
453 	// clear status flags
454 	cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS));
455 
456 	// enable int
457 	cedrus_write(dev, VE_H264_CTRL,
458 		     VE_H264_CTRL_SLICE_DECODE_INT |
459 		     VE_H264_CTRL_DECODE_ERR_INT |
460 		     VE_H264_CTRL_VLD_DATA_REQ_INT);
461 }
462 
463 static enum cedrus_irq_status
464 cedrus_h264_irq_status(struct cedrus_ctx *ctx)
465 {
466 	struct cedrus_dev *dev = ctx->dev;
467 	u32 reg = cedrus_read(dev, VE_H264_STATUS);
468 
469 	if (reg & (VE_H264_STATUS_DECODE_ERR_INT |
470 		   VE_H264_STATUS_VLD_DATA_REQ_INT))
471 		return CEDRUS_IRQ_ERROR;
472 
473 	if (reg & VE_H264_CTRL_SLICE_DECODE_INT)
474 		return CEDRUS_IRQ_OK;
475 
476 	return CEDRUS_IRQ_NONE;
477 }
478 
479 static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx)
480 {
481 	struct cedrus_dev *dev = ctx->dev;
482 
483 	cedrus_write(dev, VE_H264_STATUS,
484 		     VE_H264_STATUS_INT_MASK);
485 }
486 
487 static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx)
488 {
489 	struct cedrus_dev *dev = ctx->dev;
490 	u32 reg = cedrus_read(dev, VE_H264_CTRL);
491 
492 	cedrus_write(dev, VE_H264_CTRL,
493 		     reg & ~VE_H264_CTRL_INT_MASK);
494 }
495 
496 static int cedrus_h264_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
497 {
498 	struct cedrus_dev *dev = ctx->dev;
499 
500 	cedrus_engine_enable(ctx, CEDRUS_CODEC_H264);
501 
502 	cedrus_write(dev, VE_H264_SDROT_CTRL, 0);
503 	cedrus_write(dev, VE_H264_EXTRA_BUFFER1,
504 		     ctx->codec.h264.pic_info_buf_dma);
505 	cedrus_write(dev, VE_H264_EXTRA_BUFFER2,
506 		     ctx->codec.h264.neighbor_info_buf_dma);
507 
508 	cedrus_write_scaling_lists(ctx, run);
509 	cedrus_write_frame_list(ctx, run);
510 
511 	cedrus_set_params(ctx, run);
512 
513 	return 0;
514 }
515 
516 static int cedrus_h264_start(struct cedrus_ctx *ctx)
517 {
518 	struct cedrus_dev *dev = ctx->dev;
519 	unsigned int pic_info_size;
520 	unsigned int field_size;
521 	unsigned int mv_col_size;
522 	int ret;
523 
524 	/*
525 	 * NOTE: All buffers allocated here are only used by HW, so we
526 	 * can add DMA_ATTR_NO_KERNEL_MAPPING flag when allocating them.
527 	 */
528 
529 	/* Formula for picture buffer size is taken from CedarX source. */
530 
531 	if (ctx->src_fmt.width > 2048)
532 		pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000;
533 	else
534 		pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000;
535 
536 	/*
537 	 * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set,
538 	 * there is no need to multiply by 2.
539 	 */
540 	pic_info_size += ctx->src_fmt.height * 2 * 64;
541 
542 	if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE)
543 		pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE;
544 
545 	ctx->codec.h264.pic_info_buf_size = pic_info_size;
546 	ctx->codec.h264.pic_info_buf =
547 		dma_alloc_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
548 				&ctx->codec.h264.pic_info_buf_dma,
549 				GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
550 	if (!ctx->codec.h264.pic_info_buf)
551 		return -ENOMEM;
552 
553 	/*
554 	 * That buffer is supposed to be 16kiB in size, and be aligned
555 	 * on 16kiB as well. However, dma_alloc_attrs provides the
556 	 * guarantee that we'll have a DMA address aligned on the
557 	 * smallest page order that is greater to the requested size,
558 	 * so we don't have to overallocate.
559 	 */
560 	ctx->codec.h264.neighbor_info_buf =
561 		dma_alloc_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
562 				&ctx->codec.h264.neighbor_info_buf_dma,
563 				GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
564 	if (!ctx->codec.h264.neighbor_info_buf) {
565 		ret = -ENOMEM;
566 		goto err_pic_buf;
567 	}
568 
569 	field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
570 		DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
571 
572 	/*
573 	 * FIXME: This is actually conditional to
574 	 * V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE not being set, we
575 	 * might have to rework this if memory efficiency ever is
576 	 * something we need to work on.
577 	 */
578 	field_size = field_size * 2;
579 
580 	/*
581 	 * FIXME: This is actually conditional to
582 	 * V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY not being set, we might
583 	 * have to rework this if memory efficiency ever is something
584 	 * we need to work on.
585 	 */
586 	field_size = field_size * 2;
587 	ctx->codec.h264.mv_col_buf_field_size = field_size;
588 
589 	mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM;
590 	ctx->codec.h264.mv_col_buf_size = mv_col_size;
591 	ctx->codec.h264.mv_col_buf =
592 		dma_alloc_attrs(dev->dev,
593 				ctx->codec.h264.mv_col_buf_size,
594 				&ctx->codec.h264.mv_col_buf_dma,
595 				GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
596 	if (!ctx->codec.h264.mv_col_buf) {
597 		ret = -ENOMEM;
598 		goto err_neighbor_buf;
599 	}
600 
601 	if (ctx->src_fmt.width > 2048) {
602 		/*
603 		 * Formulas for deblock and intra prediction buffer sizes
604 		 * are taken from CedarX source.
605 		 */
606 
607 		ctx->codec.h264.deblk_buf_size =
608 			ALIGN(ctx->src_fmt.width, 32) * 12;
609 		ctx->codec.h264.deblk_buf =
610 			dma_alloc_attrs(dev->dev,
611 					ctx->codec.h264.deblk_buf_size,
612 					&ctx->codec.h264.deblk_buf_dma,
613 					GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
614 		if (!ctx->codec.h264.deblk_buf) {
615 			ret = -ENOMEM;
616 			goto err_mv_col_buf;
617 		}
618 
619 		/*
620 		 * NOTE: Multiplying by two deviates from CedarX logic, but it
621 		 * is for some unknown reason needed for H264 4K decoding on H6.
622 		 */
623 		ctx->codec.h264.intra_pred_buf_size =
624 			ALIGN(ctx->src_fmt.width, 64) * 5 * 2;
625 		ctx->codec.h264.intra_pred_buf =
626 			dma_alloc_attrs(dev->dev,
627 					ctx->codec.h264.intra_pred_buf_size,
628 					&ctx->codec.h264.intra_pred_buf_dma,
629 					GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
630 		if (!ctx->codec.h264.intra_pred_buf) {
631 			ret = -ENOMEM;
632 			goto err_deblk_buf;
633 		}
634 	}
635 
636 	return 0;
637 
638 err_deblk_buf:
639 	dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size,
640 		       ctx->codec.h264.deblk_buf,
641 		       ctx->codec.h264.deblk_buf_dma,
642 		       DMA_ATTR_NO_KERNEL_MAPPING);
643 
644 err_mv_col_buf:
645 	dma_free_attrs(dev->dev, ctx->codec.h264.mv_col_buf_size,
646 		       ctx->codec.h264.mv_col_buf,
647 		       ctx->codec.h264.mv_col_buf_dma,
648 		       DMA_ATTR_NO_KERNEL_MAPPING);
649 
650 err_neighbor_buf:
651 	dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
652 		       ctx->codec.h264.neighbor_info_buf,
653 		       ctx->codec.h264.neighbor_info_buf_dma,
654 		       DMA_ATTR_NO_KERNEL_MAPPING);
655 
656 err_pic_buf:
657 	dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
658 		       ctx->codec.h264.pic_info_buf,
659 		       ctx->codec.h264.pic_info_buf_dma,
660 		       DMA_ATTR_NO_KERNEL_MAPPING);
661 	return ret;
662 }
663 
664 static void cedrus_h264_stop(struct cedrus_ctx *ctx)
665 {
666 	struct cedrus_dev *dev = ctx->dev;
667 
668 	dma_free_attrs(dev->dev, ctx->codec.h264.mv_col_buf_size,
669 		       ctx->codec.h264.mv_col_buf,
670 		       ctx->codec.h264.mv_col_buf_dma,
671 		       DMA_ATTR_NO_KERNEL_MAPPING);
672 	dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
673 		       ctx->codec.h264.neighbor_info_buf,
674 		       ctx->codec.h264.neighbor_info_buf_dma,
675 		       DMA_ATTR_NO_KERNEL_MAPPING);
676 	dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size,
677 		       ctx->codec.h264.pic_info_buf,
678 		       ctx->codec.h264.pic_info_buf_dma,
679 		       DMA_ATTR_NO_KERNEL_MAPPING);
680 	if (ctx->codec.h264.deblk_buf_size)
681 		dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size,
682 			       ctx->codec.h264.deblk_buf,
683 			       ctx->codec.h264.deblk_buf_dma,
684 			       DMA_ATTR_NO_KERNEL_MAPPING);
685 	if (ctx->codec.h264.intra_pred_buf_size)
686 		dma_free_attrs(dev->dev, ctx->codec.h264.intra_pred_buf_size,
687 			       ctx->codec.h264.intra_pred_buf,
688 			       ctx->codec.h264.intra_pred_buf_dma,
689 			       DMA_ATTR_NO_KERNEL_MAPPING);
690 }
691 
692 static void cedrus_h264_trigger(struct cedrus_ctx *ctx)
693 {
694 	struct cedrus_dev *dev = ctx->dev;
695 
696 	cedrus_write(dev, VE_H264_TRIGGER_TYPE,
697 		     VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE);
698 }
699 
700 struct cedrus_dec_ops cedrus_dec_ops_h264 = {
701 	.irq_clear	= cedrus_h264_irq_clear,
702 	.irq_disable	= cedrus_h264_irq_disable,
703 	.irq_status	= cedrus_h264_irq_status,
704 	.setup		= cedrus_h264_setup,
705 	.start		= cedrus_h264_start,
706 	.stop		= cedrus_h264_stop,
707 	.trigger	= cedrus_h264_trigger,
708 };
709