1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Cedrus VPU driver
4  *
5  * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com>
6  * Copyright (c) 2018 Bootlin
7  */
8 
9 #include <linux/delay.h>
10 #include <linux/types.h>
11 
12 #include <media/videobuf2-dma-contig.h>
13 
14 #include "cedrus.h"
15 #include "cedrus_hw.h"
16 #include "cedrus_regs.h"
17 
18 enum cedrus_h264_sram_off {
19 	CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE	= 0x000,
20 	CEDRUS_SRAM_H264_FRAMEBUFFER_LIST	= 0x100,
21 	CEDRUS_SRAM_H264_REF_LIST_0		= 0x190,
22 	CEDRUS_SRAM_H264_REF_LIST_1		= 0x199,
23 	CEDRUS_SRAM_H264_SCALING_LIST_8x8_0	= 0x200,
24 	CEDRUS_SRAM_H264_SCALING_LIST_8x8_1	= 0x210,
25 	CEDRUS_SRAM_H264_SCALING_LIST_4x4	= 0x220,
26 };
27 
28 struct cedrus_h264_sram_ref_pic {
29 	__le32	top_field_order_cnt;
30 	__le32	bottom_field_order_cnt;
31 	__le32	frame_info;
32 	__le32	luma_ptr;
33 	__le32	chroma_ptr;
34 	__le32	mv_col_top_ptr;
35 	__le32	mv_col_bot_ptr;
36 	__le32	reserved;
37 } __packed;
38 
39 #define CEDRUS_H264_FRAME_NUM		18
40 
41 #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE	(16 * SZ_1K)
42 #define CEDRUS_PIC_INFO_BUF_SIZE	(128 * SZ_1K)
43 
44 static void cedrus_h264_write_sram(struct cedrus_dev *dev,
45 				   enum cedrus_h264_sram_off off,
46 				   const void *data, size_t len)
47 {
48 	const u32 *buffer = data;
49 	size_t count = DIV_ROUND_UP(len, 4);
50 
51 	cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2);
52 
53 	while (count--)
54 		cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
55 }
56 
57 static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_ctx *ctx,
58 					      unsigned int position,
59 					      unsigned int field)
60 {
61 	dma_addr_t addr = ctx->codec.h264.mv_col_buf_dma;
62 
63 	/* Adjust for the position */
64 	addr += position * ctx->codec.h264.mv_col_buf_field_size * 2;
65 
66 	/* Adjust for the field */
67 	addr += field * ctx->codec.h264.mv_col_buf_field_size;
68 
69 	return addr;
70 }
71 
72 static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
73 				struct cedrus_buffer *buf,
74 				unsigned int top_field_order_cnt,
75 				unsigned int bottom_field_order_cnt,
76 				struct cedrus_h264_sram_ref_pic *pic)
77 {
78 	struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
79 	unsigned int position = buf->codec.h264.position;
80 
81 	pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt);
82 	pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt);
83 	pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8);
84 
85 	pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0));
86 	pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1));
87 	pic->mv_col_top_ptr =
88 		cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 0));
89 	pic->mv_col_bot_ptr =
90 		cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 1));
91 }
92 
93 static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
94 				    struct cedrus_run *run)
95 {
96 	struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM];
97 	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
98 	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
99 	const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
100 	struct vb2_queue *cap_q;
101 	struct cedrus_buffer *output_buf;
102 	struct cedrus_dev *dev = ctx->dev;
103 	unsigned long used_dpbs = 0;
104 	unsigned int position;
105 	unsigned int output = 0;
106 	unsigned int i;
107 
108 	cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
109 
110 	memset(pic_list, 0, sizeof(pic_list));
111 
112 	for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) {
113 		const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
114 		struct cedrus_buffer *cedrus_buf;
115 		int buf_idx;
116 
117 		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
118 			continue;
119 
120 		buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0);
121 		if (buf_idx < 0)
122 			continue;
123 
124 		cedrus_buf = vb2_to_cedrus_buffer(cap_q->bufs[buf_idx]);
125 		position = cedrus_buf->codec.h264.position;
126 		used_dpbs |= BIT(position);
127 
128 		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
129 			continue;
130 
131 		cedrus_fill_ref_pic(ctx, cedrus_buf,
132 				    dpb->top_field_order_cnt,
133 				    dpb->bottom_field_order_cnt,
134 				    &pic_list[position]);
135 
136 		output = max(position, output);
137 	}
138 
139 	position = find_next_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM,
140 				      output);
141 	if (position >= CEDRUS_H264_FRAME_NUM)
142 		position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM);
143 
144 	output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
145 	output_buf->codec.h264.position = position;
146 
147 	if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
148 		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD;
149 	else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
150 		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF;
151 	else
152 		output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME;
153 
154 	cedrus_fill_ref_pic(ctx, output_buf,
155 			    decode->top_field_order_cnt,
156 			    decode->bottom_field_order_cnt,
157 			    &pic_list[position]);
158 
159 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST,
160 			       pic_list, sizeof(pic_list));
161 
162 	cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position);
163 }
164 
165 #define CEDRUS_MAX_REF_IDX	32
166 
167 static void _cedrus_write_ref_list(struct cedrus_ctx *ctx,
168 				   struct cedrus_run *run,
169 				   const u8 *ref_list, u8 num_ref,
170 				   enum cedrus_h264_sram_off sram)
171 {
172 	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
173 	struct vb2_queue *cap_q;
174 	struct cedrus_dev *dev = ctx->dev;
175 	u8 sram_array[CEDRUS_MAX_REF_IDX];
176 	unsigned int i;
177 	size_t size;
178 
179 	cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
180 
181 	memset(sram_array, 0, sizeof(sram_array));
182 
183 	for (i = 0; i < num_ref; i++) {
184 		const struct v4l2_h264_dpb_entry *dpb;
185 		const struct cedrus_buffer *cedrus_buf;
186 		const struct vb2_v4l2_buffer *ref_buf;
187 		unsigned int position;
188 		int buf_idx;
189 		u8 dpb_idx;
190 
191 		dpb_idx = ref_list[i];
192 		dpb = &decode->dpb[dpb_idx];
193 
194 		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
195 			continue;
196 
197 		buf_idx = vb2_find_timestamp(cap_q, dpb->reference_ts, 0);
198 		if (buf_idx < 0)
199 			continue;
200 
201 		ref_buf = to_vb2_v4l2_buffer(cap_q->bufs[buf_idx]);
202 		cedrus_buf = vb2_v4l2_to_cedrus_buffer(ref_buf);
203 		position = cedrus_buf->codec.h264.position;
204 
205 		sram_array[i] |= position << 1;
206 		if (ref_buf->field == V4L2_FIELD_BOTTOM)
207 			sram_array[i] |= BIT(0);
208 	}
209 
210 	size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array));
211 	cedrus_h264_write_sram(dev, sram, &sram_array, size);
212 }
213 
214 static void cedrus_write_ref_list0(struct cedrus_ctx *ctx,
215 				   struct cedrus_run *run)
216 {
217 	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
218 
219 	_cedrus_write_ref_list(ctx, run,
220 			       slice->ref_pic_list0,
221 			       slice->num_ref_idx_l0_active_minus1 + 1,
222 			       CEDRUS_SRAM_H264_REF_LIST_0);
223 }
224 
225 static void cedrus_write_ref_list1(struct cedrus_ctx *ctx,
226 				   struct cedrus_run *run)
227 {
228 	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
229 
230 	_cedrus_write_ref_list(ctx, run,
231 			       slice->ref_pic_list1,
232 			       slice->num_ref_idx_l1_active_minus1 + 1,
233 			       CEDRUS_SRAM_H264_REF_LIST_1);
234 }
235 
236 static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx,
237 				       struct cedrus_run *run)
238 {
239 	const struct v4l2_ctrl_h264_scaling_matrix *scaling =
240 		run->h264.scaling_matrix;
241 	struct cedrus_dev *dev = ctx->dev;
242 
243 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0,
244 			       scaling->scaling_list_8x8[0],
245 			       sizeof(scaling->scaling_list_8x8[0]));
246 
247 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1,
248 			       scaling->scaling_list_8x8[1],
249 			       sizeof(scaling->scaling_list_8x8[1]));
250 
251 	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4,
252 			       scaling->scaling_list_4x4,
253 			       sizeof(scaling->scaling_list_4x4));
254 }
255 
256 static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx,
257 					   struct cedrus_run *run)
258 {
259 	const struct v4l2_ctrl_h264_slice_params *slice =
260 		run->h264.slice_params;
261 	const struct v4l2_h264_pred_weight_table *pred_weight =
262 		&slice->pred_weight_table;
263 	struct cedrus_dev *dev = ctx->dev;
264 	int i, j, k;
265 
266 	cedrus_write(dev, VE_H264_SHS_WP,
267 		     ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) |
268 		     ((pred_weight->luma_log2_weight_denom & 0x7) << 0));
269 
270 	cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET,
271 		     CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2);
272 
273 	for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) {
274 		const struct v4l2_h264_weight_factors *factors =
275 			&pred_weight->weight_factors[i];
276 
277 		for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) {
278 			u32 val;
279 
280 			val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) |
281 				(factors->luma_weight[j] & 0x1ff);
282 			cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
283 		}
284 
285 		for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) {
286 			for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) {
287 				u32 val;
288 
289 				val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) |
290 					(factors->chroma_weight[j][k] & 0x1ff);
291 				cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val);
292 			}
293 		}
294 	}
295 }
296 
297 /*
298  * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In
299  * rare cases frame is not decoded correctly. However, setting offset to 0 and
300  * skipping appropriate amount of bits with flush bits trigger always works.
301  */
302 static void cedrus_skip_bits(struct cedrus_dev *dev, int num)
303 {
304 	int count = 0;
305 
306 	while (count < num) {
307 		int tmp = min(num - count, 32);
308 
309 		cedrus_write(dev, VE_H264_TRIGGER_TYPE,
310 			     VE_H264_TRIGGER_TYPE_FLUSH_BITS |
311 			     VE_H264_TRIGGER_TYPE_N_BITS(tmp));
312 		while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY)
313 			udelay(1);
314 
315 		count += tmp;
316 	}
317 }
318 
319 static void cedrus_set_params(struct cedrus_ctx *ctx,
320 			      struct cedrus_run *run)
321 {
322 	const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params;
323 	const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params;
324 	const struct v4l2_ctrl_h264_pps *pps = run->h264.pps;
325 	const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
326 	struct vb2_buffer *src_buf = &run->src->vb2_buf;
327 	struct cedrus_dev *dev = ctx->dev;
328 	dma_addr_t src_buf_addr;
329 	u32 len = slice->size * 8;
330 	unsigned int pic_width_in_mbs;
331 	bool mbaff_pic;
332 	u32 reg;
333 
334 	cedrus_write(dev, VE_H264_VLD_LEN, len);
335 	cedrus_write(dev, VE_H264_VLD_OFFSET, 0);
336 
337 	src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
338 	cedrus_write(dev, VE_H264_VLD_END,
339 		     src_buf_addr + vb2_get_plane_payload(src_buf, 0));
340 	cedrus_write(dev, VE_H264_VLD_ADDR,
341 		     VE_H264_VLD_ADDR_VAL(src_buf_addr) |
342 		     VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID |
343 		     VE_H264_VLD_ADDR_LAST);
344 
345 	/*
346 	 * FIXME: Since the bitstream parsing is done in software, and
347 	 * in userspace, this shouldn't be needed anymore. But it
348 	 * turns out that removing it breaks the decoding process,
349 	 * without any clear indication why.
350 	 */
351 	cedrus_write(dev, VE_H264_TRIGGER_TYPE,
352 		     VE_H264_TRIGGER_TYPE_INIT_SWDEC);
353 
354 	cedrus_skip_bits(dev, slice->header_bit_size);
355 
356 	if (((pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) &&
357 	     (slice->slice_type == V4L2_H264_SLICE_TYPE_P ||
358 	      slice->slice_type == V4L2_H264_SLICE_TYPE_SP)) ||
359 	    (pps->weighted_bipred_idc == 1 &&
360 	     slice->slice_type == V4L2_H264_SLICE_TYPE_B))
361 		cedrus_write_pred_weight_table(ctx, run);
362 
363 	if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) ||
364 	    (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) ||
365 	    (slice->slice_type == V4L2_H264_SLICE_TYPE_B))
366 		cedrus_write_ref_list0(ctx, run);
367 
368 	if (slice->slice_type == V4L2_H264_SLICE_TYPE_B)
369 		cedrus_write_ref_list1(ctx, run);
370 
371 	// picture parameters
372 	reg = 0;
373 	/*
374 	 * FIXME: the kernel headers are allowing the default value to
375 	 * be passed, but the libva doesn't give us that.
376 	 */
377 	reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10;
378 	reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5;
379 	reg |= (pps->weighted_bipred_idc & 0x3) << 2;
380 	if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
381 		reg |= VE_H264_PPS_ENTROPY_CODING_MODE;
382 	if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
383 		reg |= VE_H264_PPS_WEIGHTED_PRED;
384 	if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
385 		reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED;
386 	if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
387 		reg |= VE_H264_PPS_TRANSFORM_8X8_MODE;
388 	cedrus_write(dev, VE_H264_PPS, reg);
389 
390 	// sequence parameters
391 	reg = 0;
392 	reg |= (sps->chroma_format_idc & 0x7) << 19;
393 	reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8;
394 	reg |= sps->pic_height_in_map_units_minus1 & 0xff;
395 	if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
396 		reg |= VE_H264_SPS_MBS_ONLY;
397 	if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
398 		reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD;
399 	if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
400 		reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE;
401 	cedrus_write(dev, VE_H264_SPS, reg);
402 
403 	mbaff_pic = !(slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) &&
404 		    (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
405 	pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1;
406 
407 	// slice parameters
408 	reg = 0;
409 	reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24;
410 	reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) *
411 		 (mbaff_pic + 1)) & 0xff) << 16;
412 	reg |= decode->nal_ref_idc ? BIT(12) : 0;
413 	reg |= (slice->slice_type & 0xf) << 8;
414 	reg |= slice->cabac_init_idc & 0x3;
415 	if (ctx->fh.m2m_ctx->new_frame)
416 		reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
417 	if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
418 		reg |= VE_H264_SHS_FIELD_PIC;
419 	if (slice->flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
420 		reg |= VE_H264_SHS_BOTTOM_FIELD;
421 	if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED)
422 		reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED;
423 	cedrus_write(dev, VE_H264_SHS, reg);
424 
425 	reg = 0;
426 	reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD;
427 	reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24;
428 	reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16;
429 	reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8;
430 	reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4;
431 	reg |= slice->slice_beta_offset_div2 & 0xf;
432 	cedrus_write(dev, VE_H264_SHS2, reg);
433 
434 	reg = 0;
435 	reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16;
436 	reg |= (pps->chroma_qp_index_offset & 0x3f) << 8;
437 	reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f;
438 	cedrus_write(dev, VE_H264_SHS_QP, reg);
439 
440 	// clear status flags
441 	cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS));
442 
443 	// enable int
444 	cedrus_write(dev, VE_H264_CTRL,
445 		     VE_H264_CTRL_SLICE_DECODE_INT |
446 		     VE_H264_CTRL_DECODE_ERR_INT |
447 		     VE_H264_CTRL_VLD_DATA_REQ_INT);
448 }
449 
450 static enum cedrus_irq_status
451 cedrus_h264_irq_status(struct cedrus_ctx *ctx)
452 {
453 	struct cedrus_dev *dev = ctx->dev;
454 	u32 reg = cedrus_read(dev, VE_H264_STATUS);
455 
456 	if (reg & (VE_H264_STATUS_DECODE_ERR_INT |
457 		   VE_H264_STATUS_VLD_DATA_REQ_INT))
458 		return CEDRUS_IRQ_ERROR;
459 
460 	if (reg & VE_H264_CTRL_SLICE_DECODE_INT)
461 		return CEDRUS_IRQ_OK;
462 
463 	return CEDRUS_IRQ_NONE;
464 }
465 
466 static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx)
467 {
468 	struct cedrus_dev *dev = ctx->dev;
469 
470 	cedrus_write(dev, VE_H264_STATUS,
471 		     VE_H264_STATUS_INT_MASK);
472 }
473 
474 static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx)
475 {
476 	struct cedrus_dev *dev = ctx->dev;
477 	u32 reg = cedrus_read(dev, VE_H264_CTRL);
478 
479 	cedrus_write(dev, VE_H264_CTRL,
480 		     reg & ~VE_H264_CTRL_INT_MASK);
481 }
482 
483 static void cedrus_h264_setup(struct cedrus_ctx *ctx,
484 			      struct cedrus_run *run)
485 {
486 	struct cedrus_dev *dev = ctx->dev;
487 
488 	cedrus_engine_enable(dev, CEDRUS_CODEC_H264);
489 
490 	cedrus_write(dev, VE_H264_SDROT_CTRL, 0);
491 	cedrus_write(dev, VE_H264_EXTRA_BUFFER1,
492 		     ctx->codec.h264.pic_info_buf_dma);
493 	cedrus_write(dev, VE_H264_EXTRA_BUFFER2,
494 		     ctx->codec.h264.neighbor_info_buf_dma);
495 
496 	cedrus_write_scaling_lists(ctx, run);
497 	cedrus_write_frame_list(ctx, run);
498 
499 	cedrus_set_params(ctx, run);
500 }
501 
502 static int cedrus_h264_start(struct cedrus_ctx *ctx)
503 {
504 	struct cedrus_dev *dev = ctx->dev;
505 	unsigned int field_size;
506 	unsigned int mv_col_size;
507 	int ret;
508 
509 	/*
510 	 * FIXME: It seems that the H6 cedarX code is using a formula
511 	 * here based on the size of the frame, while all the older
512 	 * code is using a fixed size, so that might need to be
513 	 * changed at some point.
514 	 */
515 	ctx->codec.h264.pic_info_buf =
516 		dma_alloc_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE,
517 				   &ctx->codec.h264.pic_info_buf_dma,
518 				   GFP_KERNEL);
519 	if (!ctx->codec.h264.pic_info_buf)
520 		return -ENOMEM;
521 
522 	/*
523 	 * That buffer is supposed to be 16kiB in size, and be aligned
524 	 * on 16kiB as well. However, dma_alloc_coherent provides the
525 	 * guarantee that we'll have a CPU and DMA address aligned on
526 	 * the smallest page order that is greater to the requested
527 	 * size, so we don't have to overallocate.
528 	 */
529 	ctx->codec.h264.neighbor_info_buf =
530 		dma_alloc_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
531 				   &ctx->codec.h264.neighbor_info_buf_dma,
532 				   GFP_KERNEL);
533 	if (!ctx->codec.h264.neighbor_info_buf) {
534 		ret = -ENOMEM;
535 		goto err_pic_buf;
536 	}
537 
538 	field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
539 		DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
540 
541 	/*
542 	 * FIXME: This is actually conditional to
543 	 * V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE not being set, we
544 	 * might have to rework this if memory efficiency ever is
545 	 * something we need to work on.
546 	 */
547 	field_size = field_size * 2;
548 
549 	/*
550 	 * FIXME: This is actually conditional to
551 	 * V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY not being set, we might
552 	 * have to rework this if memory efficiency ever is something
553 	 * we need to work on.
554 	 */
555 	field_size = field_size * 2;
556 	ctx->codec.h264.mv_col_buf_field_size = field_size;
557 
558 	mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM;
559 	ctx->codec.h264.mv_col_buf_size = mv_col_size;
560 	ctx->codec.h264.mv_col_buf = dma_alloc_coherent(dev->dev,
561 							ctx->codec.h264.mv_col_buf_size,
562 							&ctx->codec.h264.mv_col_buf_dma,
563 							GFP_KERNEL);
564 	if (!ctx->codec.h264.mv_col_buf) {
565 		ret = -ENOMEM;
566 		goto err_neighbor_buf;
567 	}
568 
569 	return 0;
570 
571 err_neighbor_buf:
572 	dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
573 			  ctx->codec.h264.neighbor_info_buf,
574 			  ctx->codec.h264.neighbor_info_buf_dma);
575 
576 err_pic_buf:
577 	dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE,
578 			  ctx->codec.h264.pic_info_buf,
579 			  ctx->codec.h264.pic_info_buf_dma);
580 	return ret;
581 }
582 
583 static void cedrus_h264_stop(struct cedrus_ctx *ctx)
584 {
585 	struct cedrus_dev *dev = ctx->dev;
586 
587 	dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
588 			  ctx->codec.h264.mv_col_buf,
589 			  ctx->codec.h264.mv_col_buf_dma);
590 	dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
591 			  ctx->codec.h264.neighbor_info_buf,
592 			  ctx->codec.h264.neighbor_info_buf_dma);
593 	dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE,
594 			  ctx->codec.h264.pic_info_buf,
595 			  ctx->codec.h264.pic_info_buf_dma);
596 }
597 
598 static void cedrus_h264_trigger(struct cedrus_ctx *ctx)
599 {
600 	struct cedrus_dev *dev = ctx->dev;
601 
602 	cedrus_write(dev, VE_H264_TRIGGER_TYPE,
603 		     VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE);
604 }
605 
606 struct cedrus_dec_ops cedrus_dec_ops_h264 = {
607 	.irq_clear	= cedrus_h264_irq_clear,
608 	.irq_disable	= cedrus_h264_irq_disable,
609 	.irq_status	= cedrus_h264_irq_status,
610 	.setup		= cedrus_h264_setup,
611 	.start		= cedrus_h264_start,
612 	.stop		= cedrus_h264_stop,
613 	.trigger	= cedrus_h264_trigger,
614 };
615