1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Coda multi-standard codec IP - BIT processor functions 4 * 5 * Copyright (C) 2012 Vista Silicon S.L. 6 * Javier Martin, <javier.martin@vista-silicon.com> 7 * Xavier Duret 8 * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix 9 */ 10 11 #include <linux/clk.h> 12 #include <linux/irqreturn.h> 13 #include <linux/kernel.h> 14 #include <linux/log2.h> 15 #include <linux/platform_device.h> 16 #include <linux/ratelimit.h> 17 #include <linux/reset.h> 18 #include <linux/slab.h> 19 #include <linux/videodev2.h> 20 21 #include <media/v4l2-common.h> 22 #include <media/v4l2-ctrls.h> 23 #include <media/v4l2-fh.h> 24 #include <media/v4l2-mem2mem.h> 25 #include <media/videobuf2-v4l2.h> 26 #include <media/videobuf2-dma-contig.h> 27 #include <media/videobuf2-vmalloc.h> 28 29 #include "coda.h" 30 #include "imx-vdoa.h" 31 #define CREATE_TRACE_POINTS 32 #include "trace.h" 33 34 #define CODA_PARA_BUF_SIZE (10 * 1024) 35 #define CODA7_PS_BUF_SIZE 0x28000 36 #define CODA9_PS_SAVE_SIZE (512 * 1024) 37 38 #define CODA_DEFAULT_GAMMA 4096 39 #define CODA9_DEFAULT_GAMMA 24576 /* 0.75 * 32768 */ 40 41 static void coda_free_bitstream_buffer(struct coda_ctx *ctx); 42 43 static inline int coda_is_initialized(struct coda_dev *dev) 44 { 45 return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0; 46 } 47 48 static inline unsigned long coda_isbusy(struct coda_dev *dev) 49 { 50 return coda_read(dev, CODA_REG_BIT_BUSY); 51 } 52 53 static int coda_wait_timeout(struct coda_dev *dev) 54 { 55 unsigned long timeout = jiffies + msecs_to_jiffies(1000); 56 57 while (coda_isbusy(dev)) { 58 if (time_after(jiffies, timeout)) 59 return -ETIMEDOUT; 60 } 61 return 0; 62 } 63 64 static void coda_command_async(struct coda_ctx *ctx, int cmd) 65 { 66 struct coda_dev *dev = ctx->dev; 67 68 if (dev->devtype->product == CODA_HX4 || 69 dev->devtype->product == CODA_7541 || 70 dev->devtype->product == CODA_960) { 71 /* Restore context related registers to CODA */ 72 coda_write(dev, ctx->bit_stream_param, 73 CODA_REG_BIT_BIT_STREAM_PARAM); 74 coda_write(dev, ctx->frm_dis_flg, 75 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 76 coda_write(dev, ctx->frame_mem_ctrl, 77 CODA_REG_BIT_FRAME_MEM_CTRL); 78 coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR); 79 } 80 81 if (dev->devtype->product == CODA_960) { 82 coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR); 83 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN); 84 } 85 86 coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); 87 88 coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX); 89 coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD); 90 coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD); 91 92 trace_coda_bit_run(ctx, cmd); 93 94 coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND); 95 } 96 97 static int coda_command_sync(struct coda_ctx *ctx, int cmd) 98 { 99 struct coda_dev *dev = ctx->dev; 100 int ret; 101 102 lockdep_assert_held(&dev->coda_mutex); 103 104 coda_command_async(ctx, cmd); 105 ret = coda_wait_timeout(dev); 106 trace_coda_bit_done(ctx); 107 108 return ret; 109 } 110 111 int coda_hw_reset(struct coda_ctx *ctx) 112 { 113 struct coda_dev *dev = ctx->dev; 114 unsigned long timeout; 115 unsigned int idx; 116 int ret; 117 118 lockdep_assert_held(&dev->coda_mutex); 119 120 if (!dev->rstc) 121 return -ENOENT; 122 123 idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX); 124 125 if (dev->devtype->product == CODA_960) { 126 timeout = jiffies + msecs_to_jiffies(100); 127 coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL); 128 while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) { 129 if (time_after(jiffies, timeout)) 130 return -ETIME; 131 cpu_relax(); 132 } 133 } 134 135 ret = reset_control_reset(dev->rstc); 136 if (ret < 0) 137 return ret; 138 139 if (dev->devtype->product == CODA_960) 140 coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL); 141 coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); 142 coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN); 143 ret = coda_wait_timeout(dev); 144 coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX); 145 146 return ret; 147 } 148 149 static void coda_kfifo_sync_from_device(struct coda_ctx *ctx) 150 { 151 struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; 152 struct coda_dev *dev = ctx->dev; 153 u32 rd_ptr; 154 155 rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); 156 kfifo->out = (kfifo->in & ~kfifo->mask) | 157 (rd_ptr - ctx->bitstream.paddr); 158 if (kfifo->out > kfifo->in) 159 kfifo->out -= kfifo->mask + 1; 160 } 161 162 static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx) 163 { 164 struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; 165 struct coda_dev *dev = ctx->dev; 166 u32 rd_ptr, wr_ptr; 167 168 rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask); 169 coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); 170 wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask); 171 coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 172 } 173 174 static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx) 175 { 176 struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; 177 struct coda_dev *dev = ctx->dev; 178 u32 wr_ptr; 179 180 wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask); 181 coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 182 } 183 184 static int coda_h264_bitstream_pad(struct coda_ctx *ctx, u32 size) 185 { 186 unsigned char *buf; 187 u32 n; 188 189 if (size < 6) 190 size = 6; 191 192 buf = kmalloc(size, GFP_KERNEL); 193 if (!buf) 194 return -ENOMEM; 195 196 coda_h264_filler_nal(size, buf); 197 n = kfifo_in(&ctx->bitstream_fifo, buf, size); 198 kfree(buf); 199 200 return (n < size) ? -ENOSPC : 0; 201 } 202 203 int coda_bitstream_flush(struct coda_ctx *ctx) 204 { 205 int ret; 206 207 if (ctx->inst_type != CODA_INST_DECODER || !ctx->use_bit) 208 return 0; 209 210 ret = coda_command_sync(ctx, CODA_COMMAND_DEC_BUF_FLUSH); 211 if (ret < 0) { 212 v4l2_err(&ctx->dev->v4l2_dev, "failed to flush bitstream\n"); 213 return ret; 214 } 215 216 kfifo_init(&ctx->bitstream_fifo, ctx->bitstream.vaddr, 217 ctx->bitstream.size); 218 coda_kfifo_sync_to_device_full(ctx); 219 220 return 0; 221 } 222 223 static int coda_bitstream_queue(struct coda_ctx *ctx, const u8 *buf, u32 size) 224 { 225 u32 n = kfifo_in(&ctx->bitstream_fifo, buf, size); 226 227 return (n < size) ? -ENOSPC : 0; 228 } 229 230 static u32 coda_buffer_parse_headers(struct coda_ctx *ctx, 231 struct vb2_v4l2_buffer *src_buf, 232 u32 payload) 233 { 234 u8 *vaddr = vb2_plane_vaddr(&src_buf->vb2_buf, 0); 235 u32 size = 0; 236 237 switch (ctx->codec->src_fourcc) { 238 case V4L2_PIX_FMT_MPEG2: 239 size = coda_mpeg2_parse_headers(ctx, vaddr, payload); 240 break; 241 case V4L2_PIX_FMT_MPEG4: 242 size = coda_mpeg4_parse_headers(ctx, vaddr, payload); 243 break; 244 default: 245 break; 246 } 247 248 return size; 249 } 250 251 static bool coda_bitstream_try_queue(struct coda_ctx *ctx, 252 struct vb2_v4l2_buffer *src_buf) 253 { 254 unsigned long payload = vb2_get_plane_payload(&src_buf->vb2_buf, 0); 255 u8 *vaddr = vb2_plane_vaddr(&src_buf->vb2_buf, 0); 256 int ret; 257 int i; 258 259 if (coda_get_bitstream_payload(ctx) + payload + 512 >= 260 ctx->bitstream.size) 261 return false; 262 263 if (!vaddr) { 264 v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n"); 265 return true; 266 } 267 268 if (ctx->qsequence == 0 && payload < 512) { 269 /* 270 * Add padding after the first buffer, if it is too small to be 271 * fetched by the CODA, by repeating the headers. Without 272 * repeated headers, or the first frame already queued, decoder 273 * sequence initialization fails with error code 0x2000 on i.MX6 274 * or error code 0x1 on i.MX51. 275 */ 276 u32 header_size = coda_buffer_parse_headers(ctx, src_buf, 277 payload); 278 279 if (header_size) { 280 coda_dbg(1, ctx, "pad with %u-byte header\n", 281 header_size); 282 for (i = payload; i < 512; i += header_size) { 283 ret = coda_bitstream_queue(ctx, vaddr, 284 header_size); 285 if (ret < 0) { 286 v4l2_err(&ctx->dev->v4l2_dev, 287 "bitstream buffer overflow\n"); 288 return false; 289 } 290 if (ctx->dev->devtype->product == CODA_960) 291 break; 292 } 293 } else { 294 coda_dbg(1, ctx, 295 "could not parse header, sequence initialization might fail\n"); 296 } 297 298 /* Add padding before the first buffer, if it is too small */ 299 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264) 300 coda_h264_bitstream_pad(ctx, 512 - payload); 301 } 302 303 ret = coda_bitstream_queue(ctx, vaddr, payload); 304 if (ret < 0) { 305 v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n"); 306 return false; 307 } 308 309 src_buf->sequence = ctx->qsequence++; 310 311 /* Sync read pointer to device */ 312 if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev)) 313 coda_kfifo_sync_to_device_write(ctx); 314 315 /* Set the stream-end flag after the last buffer is queued */ 316 if (src_buf->flags & V4L2_BUF_FLAG_LAST) 317 coda_bit_stream_end_flag(ctx); 318 ctx->hold = false; 319 320 return true; 321 } 322 323 void coda_fill_bitstream(struct coda_ctx *ctx, struct list_head *buffer_list) 324 { 325 struct vb2_v4l2_buffer *src_buf; 326 struct coda_buffer_meta *meta; 327 u32 start; 328 329 if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) 330 return; 331 332 while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) { 333 /* 334 * Only queue two JPEGs into the bitstream buffer to keep 335 * latency low. We need at least one complete buffer and the 336 * header of another buffer (for prescan) in the bitstream. 337 */ 338 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG && 339 ctx->num_metas > 1) 340 break; 341 342 if (ctx->num_internal_frames && 343 ctx->num_metas >= ctx->num_internal_frames) { 344 meta = list_first_entry(&ctx->buffer_meta_list, 345 struct coda_buffer_meta, list); 346 347 /* 348 * If we managed to fill in at least a full reorder 349 * window of buffers (num_internal_frames is a 350 * conservative estimate for this) and the bitstream 351 * prefetcher has at least 2 256 bytes periods beyond 352 * the first buffer to fetch, we can safely stop queuing 353 * in order to limit the decoder drain latency. 354 */ 355 if (coda_bitstream_can_fetch_past(ctx, meta->end)) 356 break; 357 } 358 359 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); 360 361 /* Drop frames that do not start/end with a SOI/EOI markers */ 362 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG && 363 !coda_jpeg_check_buffer(ctx, &src_buf->vb2_buf)) { 364 v4l2_err(&ctx->dev->v4l2_dev, 365 "dropping invalid JPEG frame %d\n", 366 ctx->qsequence); 367 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 368 if (buffer_list) { 369 struct v4l2_m2m_buffer *m2m_buf; 370 371 m2m_buf = container_of(src_buf, 372 struct v4l2_m2m_buffer, 373 vb); 374 list_add_tail(&m2m_buf->list, buffer_list); 375 } else { 376 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); 377 } 378 continue; 379 } 380 381 /* Dump empty buffers */ 382 if (!vb2_get_plane_payload(&src_buf->vb2_buf, 0)) { 383 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 384 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 385 continue; 386 } 387 388 /* Buffer start position */ 389 start = ctx->bitstream_fifo.kfifo.in; 390 391 if (coda_bitstream_try_queue(ctx, src_buf)) { 392 /* 393 * Source buffer is queued in the bitstream ringbuffer; 394 * queue the timestamp and mark source buffer as done 395 */ 396 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 397 398 meta = kmalloc(sizeof(*meta), GFP_KERNEL); 399 if (meta) { 400 meta->sequence = src_buf->sequence; 401 meta->timecode = src_buf->timecode; 402 meta->timestamp = src_buf->vb2_buf.timestamp; 403 meta->start = start; 404 meta->end = ctx->bitstream_fifo.kfifo.in; 405 meta->last = src_buf->flags & V4L2_BUF_FLAG_LAST; 406 if (meta->last) 407 coda_dbg(1, ctx, "marking last meta"); 408 spin_lock(&ctx->buffer_meta_lock); 409 list_add_tail(&meta->list, 410 &ctx->buffer_meta_list); 411 ctx->num_metas++; 412 spin_unlock(&ctx->buffer_meta_lock); 413 414 trace_coda_bit_queue(ctx, src_buf, meta); 415 } 416 417 if (buffer_list) { 418 struct v4l2_m2m_buffer *m2m_buf; 419 420 m2m_buf = container_of(src_buf, 421 struct v4l2_m2m_buffer, 422 vb); 423 list_add_tail(&m2m_buf->list, buffer_list); 424 } else { 425 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 426 } 427 } else { 428 break; 429 } 430 } 431 } 432 433 void coda_bit_stream_end_flag(struct coda_ctx *ctx) 434 { 435 struct coda_dev *dev = ctx->dev; 436 437 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG; 438 439 /* If this context is currently running, update the hardware flag */ 440 if ((dev->devtype->product == CODA_960) && 441 coda_isbusy(dev) && 442 (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) { 443 coda_write(dev, ctx->bit_stream_param, 444 CODA_REG_BIT_BIT_STREAM_PARAM); 445 } 446 } 447 448 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value) 449 { 450 struct coda_dev *dev = ctx->dev; 451 u32 *p = ctx->parabuf.vaddr; 452 453 if (dev->devtype->product == CODA_DX6) 454 p[index] = value; 455 else 456 p[index ^ 1] = value; 457 } 458 459 static inline int coda_alloc_context_buf(struct coda_ctx *ctx, 460 struct coda_aux_buf *buf, size_t size, 461 const char *name) 462 { 463 return coda_alloc_aux_buf(ctx->dev, buf, size, name, ctx->debugfs_entry); 464 } 465 466 467 static void coda_free_framebuffers(struct coda_ctx *ctx) 468 { 469 int i; 470 471 for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++) 472 coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i].buf); 473 } 474 475 static int coda_alloc_framebuffers(struct coda_ctx *ctx, 476 struct coda_q_data *q_data, u32 fourcc) 477 { 478 struct coda_dev *dev = ctx->dev; 479 unsigned int ysize, ycbcr_size; 480 int ret; 481 int i; 482 483 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 || 484 ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 || 485 ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 || 486 ctx->codec->dst_fourcc == V4L2_PIX_FMT_MPEG4) 487 ysize = round_up(q_data->rect.width, 16) * 488 round_up(q_data->rect.height, 16); 489 else 490 ysize = round_up(q_data->rect.width, 8) * q_data->rect.height; 491 492 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 493 ycbcr_size = round_up(ysize, 4096) + ysize / 2; 494 else 495 ycbcr_size = ysize + ysize / 2; 496 497 /* Allocate frame buffers */ 498 for (i = 0; i < ctx->num_internal_frames; i++) { 499 size_t size = ycbcr_size; 500 char *name; 501 502 /* Add space for mvcol buffers */ 503 if (dev->devtype->product != CODA_DX6 && 504 (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 || 505 (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0))) 506 size += ysize / 4; 507 name = kasprintf(GFP_KERNEL, "fb%d", i); 508 if (!name) { 509 coda_free_framebuffers(ctx); 510 return -ENOMEM; 511 } 512 ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i].buf, 513 size, name); 514 kfree(name); 515 if (ret < 0) { 516 coda_free_framebuffers(ctx); 517 return ret; 518 } 519 } 520 521 /* Register frame buffers in the parameter buffer */ 522 for (i = 0; i < ctx->num_internal_frames; i++) { 523 u32 y, cb, cr, mvcol; 524 525 /* Start addresses of Y, Cb, Cr planes */ 526 y = ctx->internal_frames[i].buf.paddr; 527 cb = y + ysize; 528 cr = y + ysize + ysize/4; 529 mvcol = y + ysize + ysize/4 + ysize/4; 530 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) { 531 cb = round_up(cb, 4096); 532 mvcol = cb + ysize/2; 533 cr = 0; 534 /* Packed 20-bit MSB of base addresses */ 535 /* YYYYYCCC, CCyyyyyc, cccc.... */ 536 y = (y & 0xfffff000) | cb >> 20; 537 cb = (cb & 0x000ff000) << 12; 538 } 539 coda_parabuf_write(ctx, i * 3 + 0, y); 540 coda_parabuf_write(ctx, i * 3 + 1, cb); 541 coda_parabuf_write(ctx, i * 3 + 2, cr); 542 543 if (dev->devtype->product == CODA_DX6) 544 continue; 545 546 /* mvcol buffer for h.264 and mpeg4 */ 547 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264) 548 coda_parabuf_write(ctx, 96 + i, mvcol); 549 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0) 550 coda_parabuf_write(ctx, 97, mvcol); 551 } 552 553 return 0; 554 } 555 556 static void coda_free_context_buffers(struct coda_ctx *ctx) 557 { 558 struct coda_dev *dev = ctx->dev; 559 560 coda_free_aux_buf(dev, &ctx->slicebuf); 561 coda_free_aux_buf(dev, &ctx->psbuf); 562 if (dev->devtype->product != CODA_DX6) 563 coda_free_aux_buf(dev, &ctx->workbuf); 564 coda_free_aux_buf(dev, &ctx->parabuf); 565 } 566 567 static int coda_alloc_context_buffers(struct coda_ctx *ctx, 568 struct coda_q_data *q_data) 569 { 570 struct coda_dev *dev = ctx->dev; 571 size_t size; 572 int ret; 573 574 if (!ctx->parabuf.vaddr) { 575 ret = coda_alloc_context_buf(ctx, &ctx->parabuf, 576 CODA_PARA_BUF_SIZE, "parabuf"); 577 if (ret < 0) 578 return ret; 579 } 580 581 if (dev->devtype->product == CODA_DX6) 582 return 0; 583 584 if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) { 585 /* worst case slice size */ 586 size = (DIV_ROUND_UP(q_data->rect.width, 16) * 587 DIV_ROUND_UP(q_data->rect.height, 16)) * 3200 / 8 + 512; 588 ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size, 589 "slicebuf"); 590 if (ret < 0) 591 goto err; 592 } 593 594 if (!ctx->psbuf.vaddr && (dev->devtype->product == CODA_HX4 || 595 dev->devtype->product == CODA_7541)) { 596 ret = coda_alloc_context_buf(ctx, &ctx->psbuf, 597 CODA7_PS_BUF_SIZE, "psbuf"); 598 if (ret < 0) 599 goto err; 600 } 601 602 if (!ctx->workbuf.vaddr) { 603 size = dev->devtype->workbuf_size; 604 if (dev->devtype->product == CODA_960 && 605 q_data->fourcc == V4L2_PIX_FMT_H264) 606 size += CODA9_PS_SAVE_SIZE; 607 ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size, 608 "workbuf"); 609 if (ret < 0) 610 goto err; 611 } 612 613 return 0; 614 615 err: 616 coda_free_context_buffers(ctx); 617 return ret; 618 } 619 620 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf, 621 int header_code, u8 *header, int *size) 622 { 623 struct vb2_buffer *vb = &buf->vb2_buf; 624 struct coda_dev *dev = ctx->dev; 625 struct coda_q_data *q_data_src; 626 struct v4l2_rect *r; 627 size_t bufsize; 628 int ret; 629 int i; 630 631 if (dev->devtype->product == CODA_960) 632 memset(vb2_plane_vaddr(vb, 0), 0, 64); 633 634 coda_write(dev, vb2_dma_contig_plane_dma_addr(vb, 0), 635 CODA_CMD_ENC_HEADER_BB_START); 636 bufsize = vb2_plane_size(vb, 0); 637 if (dev->devtype->product == CODA_960) 638 bufsize /= 1024; 639 coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE); 640 if (dev->devtype->product == CODA_960 && 641 ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 && 642 header_code == CODA_HEADER_H264_SPS) { 643 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 644 r = &q_data_src->rect; 645 646 if (r->width % 16 || r->height % 16) { 647 u32 crop_right = round_up(r->width, 16) - r->width; 648 u32 crop_bottom = round_up(r->height, 16) - r->height; 649 650 coda_write(dev, crop_right, 651 CODA9_CMD_ENC_HEADER_FRAME_CROP_H); 652 coda_write(dev, crop_bottom, 653 CODA9_CMD_ENC_HEADER_FRAME_CROP_V); 654 header_code |= CODA9_HEADER_FRAME_CROP; 655 } 656 } 657 coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE); 658 ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER); 659 if (ret < 0) { 660 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n"); 661 return ret; 662 } 663 664 if (dev->devtype->product == CODA_960) { 665 for (i = 63; i > 0; i--) 666 if (((char *)vb2_plane_vaddr(vb, 0))[i] != 0) 667 break; 668 *size = i + 1; 669 } else { 670 *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) - 671 coda_read(dev, CODA_CMD_ENC_HEADER_BB_START); 672 } 673 memcpy(header, vb2_plane_vaddr(vb, 0), *size); 674 675 return 0; 676 } 677 678 static u32 coda_slice_mode(struct coda_ctx *ctx) 679 { 680 int size, unit; 681 682 switch (ctx->params.slice_mode) { 683 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE: 684 default: 685 return 0; 686 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_MB: 687 size = ctx->params.slice_max_mb; 688 unit = 1; 689 break; 690 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_BYTES: 691 size = ctx->params.slice_max_bits; 692 unit = 0; 693 break; 694 } 695 696 return ((size & CODA_SLICING_SIZE_MASK) << CODA_SLICING_SIZE_OFFSET) | 697 ((unit & CODA_SLICING_UNIT_MASK) << CODA_SLICING_UNIT_OFFSET) | 698 ((1 & CODA_SLICING_MODE_MASK) << CODA_SLICING_MODE_OFFSET); 699 } 700 701 static int coda_enc_param_change(struct coda_ctx *ctx) 702 { 703 struct coda_dev *dev = ctx->dev; 704 u32 change_enable = 0; 705 u32 success; 706 int ret; 707 708 if (ctx->params.gop_size_changed) { 709 change_enable |= CODA_PARAM_CHANGE_RC_GOP; 710 coda_write(dev, ctx->params.gop_size, 711 CODA_CMD_ENC_PARAM_RC_GOP); 712 ctx->gopcounter = ctx->params.gop_size - 1; 713 ctx->params.gop_size_changed = false; 714 } 715 if (ctx->params.h264_intra_qp_changed) { 716 coda_dbg(1, ctx, "parameter change: intra Qp %u\n", 717 ctx->params.h264_intra_qp); 718 719 if (ctx->params.bitrate) { 720 change_enable |= CODA_PARAM_CHANGE_RC_INTRA_QP; 721 coda_write(dev, ctx->params.h264_intra_qp, 722 CODA_CMD_ENC_PARAM_RC_INTRA_QP); 723 } 724 ctx->params.h264_intra_qp_changed = false; 725 } 726 if (ctx->params.bitrate_changed) { 727 coda_dbg(1, ctx, "parameter change: bitrate %u kbit/s\n", 728 ctx->params.bitrate); 729 change_enable |= CODA_PARAM_CHANGE_RC_BITRATE; 730 coda_write(dev, ctx->params.bitrate, 731 CODA_CMD_ENC_PARAM_RC_BITRATE); 732 ctx->params.bitrate_changed = false; 733 } 734 if (ctx->params.framerate_changed) { 735 coda_dbg(1, ctx, "parameter change: frame rate %u/%u Hz\n", 736 ctx->params.framerate & 0xffff, 737 (ctx->params.framerate >> 16) + 1); 738 change_enable |= CODA_PARAM_CHANGE_RC_FRAME_RATE; 739 coda_write(dev, ctx->params.framerate, 740 CODA_CMD_ENC_PARAM_RC_FRAME_RATE); 741 ctx->params.framerate_changed = false; 742 } 743 if (ctx->params.intra_refresh_changed) { 744 coda_dbg(1, ctx, "parameter change: intra refresh MBs %u\n", 745 ctx->params.intra_refresh); 746 change_enable |= CODA_PARAM_CHANGE_INTRA_MB_NUM; 747 coda_write(dev, ctx->params.intra_refresh, 748 CODA_CMD_ENC_PARAM_INTRA_MB_NUM); 749 ctx->params.intra_refresh_changed = false; 750 } 751 if (ctx->params.slice_mode_changed) { 752 change_enable |= CODA_PARAM_CHANGE_SLICE_MODE; 753 coda_write(dev, coda_slice_mode(ctx), 754 CODA_CMD_ENC_PARAM_SLICE_MODE); 755 ctx->params.slice_mode_changed = false; 756 } 757 758 if (!change_enable) 759 return 0; 760 761 coda_write(dev, change_enable, CODA_CMD_ENC_PARAM_CHANGE_ENABLE); 762 763 ret = coda_command_sync(ctx, CODA_COMMAND_RC_CHANGE_PARAMETER); 764 if (ret < 0) 765 return ret; 766 767 success = coda_read(dev, CODA_RET_ENC_PARAM_CHANGE_SUCCESS); 768 if (success != 1) 769 coda_dbg(1, ctx, "parameter change failed: %u\n", success); 770 771 return 0; 772 } 773 774 static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size) 775 { 776 phys_addr_t ret; 777 778 size = round_up(size, 1024); 779 if (size > iram->remaining) 780 return 0; 781 iram->remaining -= size; 782 783 ret = iram->next_paddr; 784 iram->next_paddr += size; 785 786 return ret; 787 } 788 789 static void coda_setup_iram(struct coda_ctx *ctx) 790 { 791 struct coda_iram_info *iram_info = &ctx->iram_info; 792 struct coda_dev *dev = ctx->dev; 793 int w64, w128; 794 int mb_width; 795 int dbk_bits; 796 int bit_bits; 797 int ip_bits; 798 int me_bits; 799 800 memset(iram_info, 0, sizeof(*iram_info)); 801 iram_info->next_paddr = dev->iram.paddr; 802 iram_info->remaining = dev->iram.size; 803 804 if (!dev->iram.vaddr) 805 return; 806 807 switch (dev->devtype->product) { 808 case CODA_HX4: 809 dbk_bits = CODA7_USE_HOST_DBK_ENABLE; 810 bit_bits = CODA7_USE_HOST_BIT_ENABLE; 811 ip_bits = CODA7_USE_HOST_IP_ENABLE; 812 me_bits = CODA7_USE_HOST_ME_ENABLE; 813 break; 814 case CODA_7541: 815 dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE; 816 bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE; 817 ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE; 818 me_bits = CODA7_USE_HOST_ME_ENABLE | CODA7_USE_ME_ENABLE; 819 break; 820 case CODA_960: 821 dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE; 822 bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE; 823 ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE; 824 me_bits = 0; 825 break; 826 default: /* CODA_DX6 */ 827 return; 828 } 829 830 if (ctx->inst_type == CODA_INST_ENCODER) { 831 struct coda_q_data *q_data_src; 832 833 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 834 mb_width = DIV_ROUND_UP(q_data_src->rect.width, 16); 835 w128 = mb_width * 128; 836 w64 = mb_width * 64; 837 838 /* Prioritize in case IRAM is too small for everything */ 839 if (dev->devtype->product == CODA_HX4 || 840 dev->devtype->product == CODA_7541) { 841 iram_info->search_ram_size = round_up(mb_width * 16 * 842 36 + 2048, 1024); 843 iram_info->search_ram_paddr = coda_iram_alloc(iram_info, 844 iram_info->search_ram_size); 845 if (!iram_info->search_ram_paddr) { 846 pr_err("IRAM is smaller than the search ram size\n"); 847 goto out; 848 } 849 iram_info->axi_sram_use |= me_bits; 850 } 851 852 /* Only H.264BP and H.263P3 are considered */ 853 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64); 854 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64); 855 if (!iram_info->buf_dbk_c_use) 856 goto out; 857 iram_info->axi_sram_use |= dbk_bits; 858 859 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128); 860 if (!iram_info->buf_bit_use) 861 goto out; 862 iram_info->axi_sram_use |= bit_bits; 863 864 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128); 865 if (!iram_info->buf_ip_ac_dc_use) 866 goto out; 867 iram_info->axi_sram_use |= ip_bits; 868 869 /* OVL and BTP disabled for encoder */ 870 } else if (ctx->inst_type == CODA_INST_DECODER) { 871 struct coda_q_data *q_data_dst; 872 873 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 874 mb_width = DIV_ROUND_UP(q_data_dst->width, 16); 875 w128 = mb_width * 128; 876 877 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128); 878 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128); 879 if (!iram_info->buf_dbk_c_use) 880 goto out; 881 iram_info->axi_sram_use |= dbk_bits; 882 883 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128); 884 if (!iram_info->buf_bit_use) 885 goto out; 886 iram_info->axi_sram_use |= bit_bits; 887 888 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128); 889 if (!iram_info->buf_ip_ac_dc_use) 890 goto out; 891 iram_info->axi_sram_use |= ip_bits; 892 893 /* OVL and BTP unused as there is no VC1 support yet */ 894 } 895 896 out: 897 if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE)) 898 coda_dbg(1, ctx, "IRAM smaller than needed\n"); 899 900 if (dev->devtype->product == CODA_HX4 || 901 dev->devtype->product == CODA_7541) { 902 /* TODO - Enabling these causes picture errors on CODA7541 */ 903 if (ctx->inst_type == CODA_INST_DECODER) { 904 /* fw 1.4.50 */ 905 iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE | 906 CODA7_USE_IP_ENABLE); 907 } else { 908 /* fw 13.4.29 */ 909 iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE | 910 CODA7_USE_HOST_DBK_ENABLE | 911 CODA7_USE_IP_ENABLE | 912 CODA7_USE_DBK_ENABLE); 913 } 914 } 915 } 916 917 static u32 coda_supported_firmwares[] = { 918 CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5), 919 CODA_FIRMWARE_VERNUM(CODA_HX4, 1, 4, 50), 920 CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50), 921 CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5), 922 CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 9), 923 CODA_FIRMWARE_VERNUM(CODA_960, 2, 3, 10), 924 CODA_FIRMWARE_VERNUM(CODA_960, 3, 1, 1), 925 }; 926 927 static bool coda_firmware_supported(u32 vernum) 928 { 929 int i; 930 931 for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++) 932 if (vernum == coda_supported_firmwares[i]) 933 return true; 934 return false; 935 } 936 937 int coda_check_firmware(struct coda_dev *dev) 938 { 939 u16 product, major, minor, release; 940 u32 data; 941 int ret; 942 943 ret = clk_prepare_enable(dev->clk_per); 944 if (ret) 945 goto err_clk_per; 946 947 ret = clk_prepare_enable(dev->clk_ahb); 948 if (ret) 949 goto err_clk_ahb; 950 951 coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM); 952 coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); 953 coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX); 954 coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD); 955 coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND); 956 if (coda_wait_timeout(dev)) { 957 v4l2_err(&dev->v4l2_dev, "firmware get command error\n"); 958 ret = -EIO; 959 goto err_run_cmd; 960 } 961 962 if (dev->devtype->product == CODA_960) { 963 data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV); 964 v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n", 965 data); 966 } 967 968 /* Check we are compatible with the loaded firmware */ 969 data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM); 970 product = CODA_FIRMWARE_PRODUCT(data); 971 major = CODA_FIRMWARE_MAJOR(data); 972 minor = CODA_FIRMWARE_MINOR(data); 973 release = CODA_FIRMWARE_RELEASE(data); 974 975 clk_disable_unprepare(dev->clk_per); 976 clk_disable_unprepare(dev->clk_ahb); 977 978 if (product != dev->devtype->product) { 979 v4l2_err(&dev->v4l2_dev, 980 "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n", 981 coda_product_name(dev->devtype->product), 982 coda_product_name(product), major, minor, release); 983 return -EINVAL; 984 } 985 986 v4l2_info(&dev->v4l2_dev, "Initialized %s.\n", 987 coda_product_name(product)); 988 989 if (coda_firmware_supported(data)) { 990 v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n", 991 major, minor, release); 992 } else { 993 v4l2_warn(&dev->v4l2_dev, 994 "Unsupported firmware version: %u.%u.%u\n", 995 major, minor, release); 996 } 997 998 return 0; 999 1000 err_run_cmd: 1001 clk_disable_unprepare(dev->clk_ahb); 1002 err_clk_ahb: 1003 clk_disable_unprepare(dev->clk_per); 1004 err_clk_per: 1005 return ret; 1006 } 1007 1008 static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc) 1009 { 1010 u32 cache_size, cache_config; 1011 1012 if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) { 1013 /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */ 1014 cache_size = 0x20262024; 1015 cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET; 1016 } else { 1017 /* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */ 1018 cache_size = 0x02440243; 1019 cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET; 1020 } 1021 coda_write(ctx->dev, cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE); 1022 if (fourcc == V4L2_PIX_FMT_NV12 || fourcc == V4L2_PIX_FMT_YUYV) { 1023 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET | 1024 16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET | 1025 0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET; 1026 } else { 1027 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET | 1028 8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET | 1029 8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET; 1030 } 1031 coda_write(ctx->dev, cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG); 1032 } 1033 1034 /* 1035 * Encoder context operations 1036 */ 1037 1038 static int coda_encoder_reqbufs(struct coda_ctx *ctx, 1039 struct v4l2_requestbuffers *rb) 1040 { 1041 struct coda_q_data *q_data_src; 1042 int ret; 1043 1044 if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) 1045 return 0; 1046 1047 if (rb->count) { 1048 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1049 ret = coda_alloc_context_buffers(ctx, q_data_src); 1050 if (ret < 0) 1051 return ret; 1052 } else { 1053 coda_free_context_buffers(ctx); 1054 } 1055 1056 return 0; 1057 } 1058 1059 static int coda_start_encoding(struct coda_ctx *ctx) 1060 { 1061 struct coda_dev *dev = ctx->dev; 1062 struct v4l2_device *v4l2_dev = &dev->v4l2_dev; 1063 struct coda_q_data *q_data_src, *q_data_dst; 1064 u32 bitstream_buf, bitstream_size; 1065 struct vb2_v4l2_buffer *buf; 1066 int gamma, ret, value; 1067 u32 dst_fourcc; 1068 int num_fb; 1069 u32 stride; 1070 1071 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1072 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 1073 dst_fourcc = q_data_dst->fourcc; 1074 1075 buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1076 bitstream_buf = vb2_dma_contig_plane_dma_addr(&buf->vb2_buf, 0); 1077 bitstream_size = q_data_dst->sizeimage; 1078 1079 if (!coda_is_initialized(dev)) { 1080 v4l2_err(v4l2_dev, "coda is not initialized.\n"); 1081 return -EFAULT; 1082 } 1083 1084 if (dst_fourcc == V4L2_PIX_FMT_JPEG) { 1085 if (!ctx->params.jpeg_qmat_tab[0]) 1086 ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL); 1087 if (!ctx->params.jpeg_qmat_tab[1]) 1088 ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL); 1089 coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality); 1090 } 1091 1092 mutex_lock(&dev->coda_mutex); 1093 1094 coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR); 1095 coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); 1096 coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 1097 switch (dev->devtype->product) { 1098 case CODA_DX6: 1099 coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN | 1100 CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL); 1101 break; 1102 case CODA_960: 1103 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN); 1104 fallthrough; 1105 case CODA_HX4: 1106 case CODA_7541: 1107 coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN | 1108 CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL); 1109 break; 1110 } 1111 1112 ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | 1113 CODA9_FRAME_TILED2LINEAR); 1114 if (q_data_src->fourcc == V4L2_PIX_FMT_NV12) 1115 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; 1116 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 1117 ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR; 1118 coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL); 1119 1120 if (dev->devtype->product == CODA_DX6) { 1121 /* Configure the coda */ 1122 coda_write(dev, dev->iram.paddr, 1123 CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR); 1124 } 1125 1126 /* Could set rotation here if needed */ 1127 value = 0; 1128 switch (dev->devtype->product) { 1129 case CODA_DX6: 1130 value = (q_data_src->rect.width & CODADX6_PICWIDTH_MASK) 1131 << CODADX6_PICWIDTH_OFFSET; 1132 value |= (q_data_src->rect.height & CODADX6_PICHEIGHT_MASK) 1133 << CODA_PICHEIGHT_OFFSET; 1134 break; 1135 case CODA_HX4: 1136 case CODA_7541: 1137 if (dst_fourcc == V4L2_PIX_FMT_H264) { 1138 value = (round_up(q_data_src->rect.width, 16) & 1139 CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET; 1140 value |= (round_up(q_data_src->rect.height, 16) & 1141 CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET; 1142 break; 1143 } 1144 fallthrough; 1145 case CODA_960: 1146 value = (q_data_src->rect.width & CODA7_PICWIDTH_MASK) 1147 << CODA7_PICWIDTH_OFFSET; 1148 value |= (q_data_src->rect.height & CODA7_PICHEIGHT_MASK) 1149 << CODA_PICHEIGHT_OFFSET; 1150 } 1151 coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE); 1152 if (dst_fourcc == V4L2_PIX_FMT_JPEG) 1153 ctx->params.framerate = 0; 1154 coda_write(dev, ctx->params.framerate, 1155 CODA_CMD_ENC_SEQ_SRC_F_RATE); 1156 1157 ctx->params.codec_mode = ctx->codec->mode; 1158 switch (dst_fourcc) { 1159 case V4L2_PIX_FMT_MPEG4: 1160 if (dev->devtype->product == CODA_960) 1161 coda_write(dev, CODA9_STD_MPEG4, 1162 CODA_CMD_ENC_SEQ_COD_STD); 1163 else 1164 coda_write(dev, CODA_STD_MPEG4, 1165 CODA_CMD_ENC_SEQ_COD_STD); 1166 coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA); 1167 break; 1168 case V4L2_PIX_FMT_H264: 1169 if (dev->devtype->product == CODA_960) 1170 coda_write(dev, CODA9_STD_H264, 1171 CODA_CMD_ENC_SEQ_COD_STD); 1172 else 1173 coda_write(dev, CODA_STD_H264, 1174 CODA_CMD_ENC_SEQ_COD_STD); 1175 value = ((ctx->params.h264_disable_deblocking_filter_idc & 1176 CODA_264PARAM_DISABLEDEBLK_MASK) << 1177 CODA_264PARAM_DISABLEDEBLK_OFFSET) | 1178 ((ctx->params.h264_slice_alpha_c0_offset_div2 & 1179 CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) << 1180 CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) | 1181 ((ctx->params.h264_slice_beta_offset_div2 & 1182 CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) << 1183 CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET) | 1184 (ctx->params.h264_constrained_intra_pred_flag << 1185 CODA_264PARAM_CONSTRAINEDINTRAPREDFLAG_OFFSET) | 1186 (ctx->params.h264_chroma_qp_index_offset & 1187 CODA_264PARAM_CHROMAQPOFFSET_MASK); 1188 coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA); 1189 break; 1190 case V4L2_PIX_FMT_JPEG: 1191 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA); 1192 coda_write(dev, ctx->params.jpeg_restart_interval, 1193 CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL); 1194 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN); 1195 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE); 1196 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET); 1197 1198 coda_jpeg_write_tables(ctx); 1199 break; 1200 default: 1201 v4l2_err(v4l2_dev, 1202 "dst format (0x%08x) invalid.\n", dst_fourcc); 1203 ret = -EINVAL; 1204 goto out; 1205 } 1206 1207 /* 1208 * slice mode and GOP size registers are used for thumb size/offset 1209 * in JPEG mode 1210 */ 1211 if (dst_fourcc != V4L2_PIX_FMT_JPEG) { 1212 value = coda_slice_mode(ctx); 1213 coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE); 1214 value = ctx->params.gop_size; 1215 coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE); 1216 } 1217 1218 if (ctx->params.bitrate && (ctx->params.frame_rc_enable || 1219 ctx->params.mb_rc_enable)) { 1220 ctx->params.bitrate_changed = false; 1221 ctx->params.h264_intra_qp_changed = false; 1222 1223 /* Rate control enabled */ 1224 value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK) 1225 << CODA_RATECONTROL_BITRATE_OFFSET; 1226 value |= 1 & CODA_RATECONTROL_ENABLE_MASK; 1227 value |= (ctx->params.vbv_delay & 1228 CODA_RATECONTROL_INITIALDELAY_MASK) 1229 << CODA_RATECONTROL_INITIALDELAY_OFFSET; 1230 if (dev->devtype->product == CODA_960) 1231 value |= BIT(31); /* disable autoskip */ 1232 } else { 1233 value = 0; 1234 } 1235 coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA); 1236 1237 coda_write(dev, ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE); 1238 coda_write(dev, ctx->params.intra_refresh, 1239 CODA_CMD_ENC_SEQ_INTRA_REFRESH); 1240 1241 coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START); 1242 coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE); 1243 1244 1245 value = 0; 1246 if (dev->devtype->product == CODA_960) 1247 gamma = CODA9_DEFAULT_GAMMA; 1248 else 1249 gamma = CODA_DEFAULT_GAMMA; 1250 if (gamma > 0) { 1251 coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET, 1252 CODA_CMD_ENC_SEQ_RC_GAMMA); 1253 } 1254 1255 if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) { 1256 coda_write(dev, 1257 ctx->params.h264_min_qp << CODA_QPMIN_OFFSET | 1258 ctx->params.h264_max_qp << CODA_QPMAX_OFFSET, 1259 CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX); 1260 } 1261 if (dev->devtype->product == CODA_960) { 1262 if (ctx->params.h264_max_qp) 1263 value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET; 1264 if (CODA_DEFAULT_GAMMA > 0) 1265 value |= 1 << CODA9_OPTION_GAMMA_OFFSET; 1266 } else { 1267 if (CODA_DEFAULT_GAMMA > 0) { 1268 if (dev->devtype->product == CODA_DX6) 1269 value |= 1 << CODADX6_OPTION_GAMMA_OFFSET; 1270 else 1271 value |= 1 << CODA7_OPTION_GAMMA_OFFSET; 1272 } 1273 if (ctx->params.h264_min_qp) 1274 value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET; 1275 if (ctx->params.h264_max_qp) 1276 value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET; 1277 } 1278 coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION); 1279 1280 if (ctx->params.frame_rc_enable && !ctx->params.mb_rc_enable) 1281 value = 1; 1282 else 1283 value = 0; 1284 coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE); 1285 1286 coda_setup_iram(ctx); 1287 1288 if (dst_fourcc == V4L2_PIX_FMT_H264) { 1289 switch (dev->devtype->product) { 1290 case CODA_DX6: 1291 value = FMO_SLICE_SAVE_BUF_SIZE << 7; 1292 coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO); 1293 break; 1294 case CODA_HX4: 1295 case CODA_7541: 1296 coda_write(dev, ctx->iram_info.search_ram_paddr, 1297 CODA7_CMD_ENC_SEQ_SEARCH_BASE); 1298 coda_write(dev, ctx->iram_info.search_ram_size, 1299 CODA7_CMD_ENC_SEQ_SEARCH_SIZE); 1300 break; 1301 case CODA_960: 1302 coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION); 1303 coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT); 1304 } 1305 } 1306 1307 ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT); 1308 if (ret < 0) { 1309 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n"); 1310 goto out; 1311 } 1312 1313 if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) { 1314 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n"); 1315 ret = -EFAULT; 1316 goto out; 1317 } 1318 ctx->initialized = 1; 1319 1320 if (dst_fourcc != V4L2_PIX_FMT_JPEG) { 1321 if (dev->devtype->product == CODA_960) 1322 ctx->num_internal_frames = 4; 1323 else 1324 ctx->num_internal_frames = 2; 1325 ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc); 1326 if (ret < 0) { 1327 v4l2_err(v4l2_dev, "failed to allocate framebuffers\n"); 1328 goto out; 1329 } 1330 num_fb = 2; 1331 stride = q_data_src->bytesperline; 1332 } else { 1333 ctx->num_internal_frames = 0; 1334 num_fb = 0; 1335 stride = 0; 1336 } 1337 coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM); 1338 coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE); 1339 1340 if (dev->devtype->product == CODA_HX4 || 1341 dev->devtype->product == CODA_7541) { 1342 coda_write(dev, q_data_src->bytesperline, 1343 CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE); 1344 } 1345 if (dev->devtype->product != CODA_DX6) { 1346 coda_write(dev, ctx->iram_info.buf_bit_use, 1347 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR); 1348 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use, 1349 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR); 1350 coda_write(dev, ctx->iram_info.buf_dbk_y_use, 1351 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR); 1352 coda_write(dev, ctx->iram_info.buf_dbk_c_use, 1353 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR); 1354 coda_write(dev, ctx->iram_info.buf_ovl_use, 1355 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR); 1356 if (dev->devtype->product == CODA_960) { 1357 coda_write(dev, ctx->iram_info.buf_btp_use, 1358 CODA9_CMD_SET_FRAME_AXI_BTP_ADDR); 1359 1360 coda9_set_frame_cache(ctx, q_data_src->fourcc); 1361 1362 /* FIXME */ 1363 coda_write(dev, ctx->internal_frames[2].buf.paddr, 1364 CODA9_CMD_SET_FRAME_SUBSAMP_A); 1365 coda_write(dev, ctx->internal_frames[3].buf.paddr, 1366 CODA9_CMD_SET_FRAME_SUBSAMP_B); 1367 } 1368 } 1369 1370 ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF); 1371 if (ret < 0) { 1372 v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n"); 1373 goto out; 1374 } 1375 1376 coda_dbg(1, ctx, "start encoding %dx%d %4.4s->%4.4s @ %d/%d Hz\n", 1377 q_data_src->rect.width, q_data_src->rect.height, 1378 (char *)&ctx->codec->src_fourcc, (char *)&dst_fourcc, 1379 ctx->params.framerate & 0xffff, 1380 (ctx->params.framerate >> 16) + 1); 1381 1382 /* Save stream headers */ 1383 buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1384 switch (dst_fourcc) { 1385 case V4L2_PIX_FMT_H264: 1386 /* 1387 * Get SPS in the first frame and copy it to an 1388 * intermediate buffer. 1389 */ 1390 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS, 1391 &ctx->vpu_header[0][0], 1392 &ctx->vpu_header_size[0]); 1393 if (ret < 0) 1394 goto out; 1395 1396 /* 1397 * If visible width or height are not aligned to macroblock 1398 * size, the crop_right and crop_bottom SPS fields must be set 1399 * to the difference between visible and coded size. This is 1400 * only supported by CODA960 firmware. All others do not allow 1401 * writing frame cropping parameters, so we have to manually 1402 * fix up the SPS RBSP (Sequence Parameter Set Raw Byte 1403 * Sequence Payload) ourselves. 1404 */ 1405 if (ctx->dev->devtype->product != CODA_960 && 1406 ((q_data_src->rect.width % 16) || 1407 (q_data_src->rect.height % 16))) { 1408 ret = coda_h264_sps_fixup(ctx, q_data_src->rect.width, 1409 q_data_src->rect.height, 1410 &ctx->vpu_header[0][0], 1411 &ctx->vpu_header_size[0], 1412 sizeof(ctx->vpu_header[0])); 1413 if (ret < 0) 1414 goto out; 1415 } 1416 1417 /* 1418 * Get PPS in the first frame and copy it to an 1419 * intermediate buffer. 1420 */ 1421 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS, 1422 &ctx->vpu_header[1][0], 1423 &ctx->vpu_header_size[1]); 1424 if (ret < 0) 1425 goto out; 1426 1427 /* 1428 * Length of H.264 headers is variable and thus it might not be 1429 * aligned for the coda to append the encoded frame. In that is 1430 * the case a filler NAL must be added to header 2. 1431 */ 1432 ctx->vpu_header_size[2] = coda_h264_padding( 1433 (ctx->vpu_header_size[0] + 1434 ctx->vpu_header_size[1]), 1435 ctx->vpu_header[2]); 1436 break; 1437 case V4L2_PIX_FMT_MPEG4: 1438 /* 1439 * Get VOS in the first frame and copy it to an 1440 * intermediate buffer 1441 */ 1442 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS, 1443 &ctx->vpu_header[0][0], 1444 &ctx->vpu_header_size[0]); 1445 if (ret < 0) 1446 goto out; 1447 1448 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS, 1449 &ctx->vpu_header[1][0], 1450 &ctx->vpu_header_size[1]); 1451 if (ret < 0) 1452 goto out; 1453 1454 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL, 1455 &ctx->vpu_header[2][0], 1456 &ctx->vpu_header_size[2]); 1457 if (ret < 0) 1458 goto out; 1459 break; 1460 default: 1461 /* No more formats need to save headers at the moment */ 1462 break; 1463 } 1464 1465 out: 1466 mutex_unlock(&dev->coda_mutex); 1467 return ret; 1468 } 1469 1470 static int coda_prepare_encode(struct coda_ctx *ctx) 1471 { 1472 struct coda_q_data *q_data_src, *q_data_dst; 1473 struct vb2_v4l2_buffer *src_buf, *dst_buf; 1474 struct coda_dev *dev = ctx->dev; 1475 int force_ipicture; 1476 int quant_param = 0; 1477 u32 pic_stream_buffer_addr, pic_stream_buffer_size; 1478 u32 rot_mode = 0; 1479 u32 dst_fourcc; 1480 u32 reg; 1481 int ret; 1482 1483 ret = coda_enc_param_change(ctx); 1484 if (ret < 0) { 1485 v4l2_warn(&ctx->dev->v4l2_dev, "parameter change failed: %d\n", 1486 ret); 1487 } 1488 1489 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); 1490 dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1491 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1492 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 1493 dst_fourcc = q_data_dst->fourcc; 1494 1495 src_buf->sequence = ctx->osequence; 1496 dst_buf->sequence = ctx->osequence; 1497 ctx->osequence++; 1498 1499 force_ipicture = ctx->params.force_ipicture; 1500 if (force_ipicture) 1501 ctx->params.force_ipicture = false; 1502 else if (ctx->params.gop_size != 0 && 1503 (src_buf->sequence % ctx->params.gop_size) == 0) 1504 force_ipicture = 1; 1505 1506 /* 1507 * Workaround coda firmware BUG that only marks the first 1508 * frame as IDR. This is a problem for some decoders that can't 1509 * recover when a frame is lost. 1510 */ 1511 if (!force_ipicture) { 1512 src_buf->flags |= V4L2_BUF_FLAG_PFRAME; 1513 src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME; 1514 } else { 1515 src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME; 1516 src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME; 1517 } 1518 1519 if (dev->devtype->product == CODA_960) 1520 coda_set_gdi_regs(ctx); 1521 1522 /* 1523 * Copy headers in front of the first frame and forced I frames for 1524 * H.264 only. In MPEG4 they are already copied by the CODA. 1525 */ 1526 if (src_buf->sequence == 0 || force_ipicture) { 1527 pic_stream_buffer_addr = 1528 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0) + 1529 ctx->vpu_header_size[0] + 1530 ctx->vpu_header_size[1] + 1531 ctx->vpu_header_size[2]; 1532 pic_stream_buffer_size = q_data_dst->sizeimage - 1533 ctx->vpu_header_size[0] - 1534 ctx->vpu_header_size[1] - 1535 ctx->vpu_header_size[2]; 1536 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0), 1537 &ctx->vpu_header[0][0], ctx->vpu_header_size[0]); 1538 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0) 1539 + ctx->vpu_header_size[0], &ctx->vpu_header[1][0], 1540 ctx->vpu_header_size[1]); 1541 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0) 1542 + ctx->vpu_header_size[0] + ctx->vpu_header_size[1], 1543 &ctx->vpu_header[2][0], ctx->vpu_header_size[2]); 1544 } else { 1545 pic_stream_buffer_addr = 1546 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); 1547 pic_stream_buffer_size = q_data_dst->sizeimage; 1548 } 1549 1550 if (force_ipicture) { 1551 switch (dst_fourcc) { 1552 case V4L2_PIX_FMT_H264: 1553 quant_param = ctx->params.h264_intra_qp; 1554 break; 1555 case V4L2_PIX_FMT_MPEG4: 1556 quant_param = ctx->params.mpeg4_intra_qp; 1557 break; 1558 case V4L2_PIX_FMT_JPEG: 1559 quant_param = 30; 1560 break; 1561 default: 1562 v4l2_warn(&ctx->dev->v4l2_dev, 1563 "cannot set intra qp, fmt not supported\n"); 1564 break; 1565 } 1566 } else { 1567 switch (dst_fourcc) { 1568 case V4L2_PIX_FMT_H264: 1569 quant_param = ctx->params.h264_inter_qp; 1570 break; 1571 case V4L2_PIX_FMT_MPEG4: 1572 quant_param = ctx->params.mpeg4_inter_qp; 1573 break; 1574 default: 1575 v4l2_warn(&ctx->dev->v4l2_dev, 1576 "cannot set inter qp, fmt not supported\n"); 1577 break; 1578 } 1579 } 1580 1581 /* submit */ 1582 if (ctx->params.rot_mode) 1583 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode; 1584 coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE); 1585 coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS); 1586 1587 if (dev->devtype->product == CODA_960) { 1588 coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX); 1589 coda_write(dev, q_data_src->bytesperline, 1590 CODA9_CMD_ENC_PIC_SRC_STRIDE); 1591 coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC); 1592 1593 reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y; 1594 } else { 1595 reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y; 1596 } 1597 coda_write_base(ctx, q_data_src, src_buf, reg); 1598 1599 coda_write(dev, force_ipicture << 1 & 0x2, 1600 CODA_CMD_ENC_PIC_OPTION); 1601 1602 coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START); 1603 coda_write(dev, pic_stream_buffer_size / 1024, 1604 CODA_CMD_ENC_PIC_BB_SIZE); 1605 1606 if (!ctx->streamon_out) { 1607 /* After streamoff on the output side, set stream end flag */ 1608 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG; 1609 coda_write(dev, ctx->bit_stream_param, 1610 CODA_REG_BIT_BIT_STREAM_PARAM); 1611 } 1612 1613 if (dev->devtype->product != CODA_DX6) 1614 coda_write(dev, ctx->iram_info.axi_sram_use, 1615 CODA7_REG_BIT_AXI_SRAM_USE); 1616 1617 trace_coda_enc_pic_run(ctx, src_buf); 1618 1619 coda_command_async(ctx, CODA_COMMAND_PIC_RUN); 1620 1621 return 0; 1622 } 1623 1624 static char coda_frame_type_char(u32 flags) 1625 { 1626 return (flags & V4L2_BUF_FLAG_KEYFRAME) ? 'I' : 1627 (flags & V4L2_BUF_FLAG_PFRAME) ? 'P' : 1628 (flags & V4L2_BUF_FLAG_BFRAME) ? 'B' : '?'; 1629 } 1630 1631 static void coda_finish_encode(struct coda_ctx *ctx) 1632 { 1633 struct vb2_v4l2_buffer *src_buf, *dst_buf; 1634 struct coda_dev *dev = ctx->dev; 1635 u32 wr_ptr, start_ptr; 1636 1637 if (ctx->aborting) 1638 return; 1639 1640 /* 1641 * Lock to make sure that an encoder stop command running in parallel 1642 * will either already have marked src_buf as last, or it will wake up 1643 * the capture queue after the buffers are returned. 1644 */ 1645 mutex_lock(&ctx->wakeup_mutex); 1646 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 1647 dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1648 1649 trace_coda_enc_pic_done(ctx, dst_buf); 1650 1651 /* Get results from the coda */ 1652 start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START); 1653 wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 1654 1655 /* Calculate bytesused field */ 1656 if (dst_buf->sequence == 0 || 1657 src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) { 1658 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr + 1659 ctx->vpu_header_size[0] + 1660 ctx->vpu_header_size[1] + 1661 ctx->vpu_header_size[2]); 1662 } else { 1663 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr); 1664 } 1665 1666 coda_dbg(1, ctx, "frame size = %u\n", wr_ptr - start_ptr); 1667 1668 coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM); 1669 coda_read(dev, CODA_RET_ENC_PIC_FLAG); 1670 1671 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME | 1672 V4L2_BUF_FLAG_PFRAME | 1673 V4L2_BUF_FLAG_LAST); 1674 if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) 1675 dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME; 1676 else 1677 dst_buf->flags |= V4L2_BUF_FLAG_PFRAME; 1678 dst_buf->flags |= src_buf->flags & V4L2_BUF_FLAG_LAST; 1679 1680 v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, false); 1681 1682 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 1683 1684 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); 1685 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE); 1686 mutex_unlock(&ctx->wakeup_mutex); 1687 1688 ctx->gopcounter--; 1689 if (ctx->gopcounter < 0) 1690 ctx->gopcounter = ctx->params.gop_size - 1; 1691 1692 coda_dbg(1, ctx, "job finished: encoded %c frame (%d)%s\n", 1693 coda_frame_type_char(dst_buf->flags), dst_buf->sequence, 1694 (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? " (last)" : ""); 1695 } 1696 1697 static void coda_seq_end_work(struct work_struct *work) 1698 { 1699 struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work); 1700 struct coda_dev *dev = ctx->dev; 1701 1702 mutex_lock(&ctx->buffer_mutex); 1703 mutex_lock(&dev->coda_mutex); 1704 1705 if (ctx->initialized == 0) 1706 goto out; 1707 1708 coda_dbg(1, ctx, "%s: sent command 'SEQ_END' to coda\n", __func__); 1709 if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) { 1710 v4l2_err(&dev->v4l2_dev, 1711 "CODA_COMMAND_SEQ_END failed\n"); 1712 } 1713 1714 /* 1715 * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing 1716 * from the output stream after the h.264 decoder has run. Resetting the 1717 * hardware after the decoder has finished seems to help. 1718 */ 1719 if (dev->devtype->product == CODA_960) 1720 coda_hw_reset(ctx); 1721 1722 kfifo_init(&ctx->bitstream_fifo, 1723 ctx->bitstream.vaddr, ctx->bitstream.size); 1724 1725 coda_free_framebuffers(ctx); 1726 1727 ctx->initialized = 0; 1728 1729 out: 1730 mutex_unlock(&dev->coda_mutex); 1731 mutex_unlock(&ctx->buffer_mutex); 1732 } 1733 1734 static void coda_bit_release(struct coda_ctx *ctx) 1735 { 1736 mutex_lock(&ctx->buffer_mutex); 1737 coda_free_framebuffers(ctx); 1738 coda_free_context_buffers(ctx); 1739 coda_free_bitstream_buffer(ctx); 1740 mutex_unlock(&ctx->buffer_mutex); 1741 } 1742 1743 const struct coda_context_ops coda_bit_encode_ops = { 1744 .queue_init = coda_encoder_queue_init, 1745 .reqbufs = coda_encoder_reqbufs, 1746 .start_streaming = coda_start_encoding, 1747 .prepare_run = coda_prepare_encode, 1748 .finish_run = coda_finish_encode, 1749 .seq_end_work = coda_seq_end_work, 1750 .release = coda_bit_release, 1751 }; 1752 1753 /* 1754 * Decoder context operations 1755 */ 1756 1757 static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx, 1758 struct coda_q_data *q_data) 1759 { 1760 if (ctx->bitstream.vaddr) 1761 return 0; 1762 1763 ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2); 1764 ctx->bitstream.vaddr = dma_alloc_wc(ctx->dev->dev, ctx->bitstream.size, 1765 &ctx->bitstream.paddr, GFP_KERNEL); 1766 if (!ctx->bitstream.vaddr) { 1767 v4l2_err(&ctx->dev->v4l2_dev, 1768 "failed to allocate bitstream ringbuffer"); 1769 return -ENOMEM; 1770 } 1771 kfifo_init(&ctx->bitstream_fifo, 1772 ctx->bitstream.vaddr, ctx->bitstream.size); 1773 1774 return 0; 1775 } 1776 1777 static void coda_free_bitstream_buffer(struct coda_ctx *ctx) 1778 { 1779 if (ctx->bitstream.vaddr == NULL) 1780 return; 1781 1782 dma_free_wc(ctx->dev->dev, ctx->bitstream.size, ctx->bitstream.vaddr, 1783 ctx->bitstream.paddr); 1784 ctx->bitstream.vaddr = NULL; 1785 kfifo_init(&ctx->bitstream_fifo, NULL, 0); 1786 } 1787 1788 static int coda_decoder_reqbufs(struct coda_ctx *ctx, 1789 struct v4l2_requestbuffers *rb) 1790 { 1791 struct coda_q_data *q_data_src; 1792 int ret; 1793 1794 if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) 1795 return 0; 1796 1797 if (rb->count) { 1798 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1799 ret = coda_alloc_context_buffers(ctx, q_data_src); 1800 if (ret < 0) 1801 return ret; 1802 ret = coda_alloc_bitstream_buffer(ctx, q_data_src); 1803 if (ret < 0) { 1804 coda_free_context_buffers(ctx); 1805 return ret; 1806 } 1807 } else { 1808 coda_free_bitstream_buffer(ctx); 1809 coda_free_context_buffers(ctx); 1810 } 1811 1812 return 0; 1813 } 1814 1815 static bool coda_reorder_enable(struct coda_ctx *ctx) 1816 { 1817 struct coda_dev *dev = ctx->dev; 1818 int profile; 1819 1820 if (dev->devtype->product != CODA_HX4 && 1821 dev->devtype->product != CODA_7541 && 1822 dev->devtype->product != CODA_960) 1823 return false; 1824 1825 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) 1826 return false; 1827 1828 if (ctx->codec->src_fourcc != V4L2_PIX_FMT_H264) 1829 return true; 1830 1831 profile = coda_h264_profile(ctx->params.h264_profile_idc); 1832 if (profile < 0) 1833 v4l2_warn(&dev->v4l2_dev, "Unknown H264 Profile: %u\n", 1834 ctx->params.h264_profile_idc); 1835 1836 /* Baseline profile does not support reordering */ 1837 return profile > V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE; 1838 } 1839 1840 static void coda_decoder_drop_used_metas(struct coda_ctx *ctx) 1841 { 1842 struct coda_buffer_meta *meta, *tmp; 1843 1844 /* 1845 * All metas that end at or before the RD pointer (fifo out), 1846 * are now consumed by the VPU and should be released. 1847 */ 1848 spin_lock(&ctx->buffer_meta_lock); 1849 list_for_each_entry_safe(meta, tmp, &ctx->buffer_meta_list, list) { 1850 if (ctx->bitstream_fifo.kfifo.out >= meta->end) { 1851 coda_dbg(2, ctx, "releasing meta: seq=%d start=%d end=%d\n", 1852 meta->sequence, meta->start, meta->end); 1853 1854 list_del(&meta->list); 1855 ctx->num_metas--; 1856 ctx->first_frame_sequence++; 1857 kfree(meta); 1858 } 1859 } 1860 spin_unlock(&ctx->buffer_meta_lock); 1861 } 1862 1863 static int __coda_decoder_seq_init(struct coda_ctx *ctx) 1864 { 1865 struct coda_q_data *q_data_src, *q_data_dst; 1866 u32 bitstream_buf, bitstream_size; 1867 struct coda_dev *dev = ctx->dev; 1868 int width, height; 1869 u32 src_fourcc, dst_fourcc; 1870 u32 val; 1871 int ret; 1872 1873 lockdep_assert_held(&dev->coda_mutex); 1874 1875 coda_dbg(1, ctx, "Video Data Order Adapter: %s\n", 1876 ctx->use_vdoa ? "Enabled" : "Disabled"); 1877 1878 /* Start decoding */ 1879 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1880 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 1881 bitstream_buf = ctx->bitstream.paddr; 1882 bitstream_size = ctx->bitstream.size; 1883 src_fourcc = q_data_src->fourcc; 1884 dst_fourcc = q_data_dst->fourcc; 1885 1886 /* Update coda bitstream read and write pointers from kfifo */ 1887 coda_kfifo_sync_to_device_full(ctx); 1888 1889 ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | 1890 CODA9_FRAME_TILED2LINEAR); 1891 if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV) 1892 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; 1893 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 1894 ctx->frame_mem_ctrl |= (0x3 << 9) | 1895 ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR); 1896 coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL); 1897 1898 ctx->display_idx = -1; 1899 ctx->frm_dis_flg = 0; 1900 coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 1901 1902 coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START); 1903 coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE); 1904 val = 0; 1905 if (coda_reorder_enable(ctx)) 1906 val |= CODA_REORDER_ENABLE; 1907 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) 1908 val |= CODA_NO_INT_ENABLE; 1909 coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION); 1910 1911 ctx->params.codec_mode = ctx->codec->mode; 1912 if (dev->devtype->product == CODA_960 && 1913 src_fourcc == V4L2_PIX_FMT_MPEG4) 1914 ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4; 1915 else 1916 ctx->params.codec_mode_aux = 0; 1917 if (src_fourcc == V4L2_PIX_FMT_MPEG4) { 1918 coda_write(dev, CODA_MP4_CLASS_MPEG4, 1919 CODA_CMD_DEC_SEQ_MP4_ASP_CLASS); 1920 } 1921 if (src_fourcc == V4L2_PIX_FMT_H264) { 1922 if (dev->devtype->product == CODA_HX4 || 1923 dev->devtype->product == CODA_7541) { 1924 coda_write(dev, ctx->psbuf.paddr, 1925 CODA_CMD_DEC_SEQ_PS_BB_START); 1926 coda_write(dev, (CODA7_PS_BUF_SIZE / 1024), 1927 CODA_CMD_DEC_SEQ_PS_BB_SIZE); 1928 } 1929 if (dev->devtype->product == CODA_960) { 1930 coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN); 1931 coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE); 1932 } 1933 } 1934 if (src_fourcc == V4L2_PIX_FMT_JPEG) 1935 coda_write(dev, 0, CODA_CMD_DEC_SEQ_JPG_THUMB_EN); 1936 if (dev->devtype->product != CODA_960) 1937 coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE); 1938 1939 ctx->bit_stream_param = CODA_BIT_DEC_SEQ_INIT_ESCAPE; 1940 ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT); 1941 ctx->bit_stream_param = 0; 1942 if (ret) { 1943 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n"); 1944 return ret; 1945 } 1946 ctx->sequence_offset = ~0U; 1947 ctx->initialized = 1; 1948 ctx->first_frame_sequence = 0; 1949 1950 /* Update kfifo out pointer from coda bitstream read pointer */ 1951 coda_kfifo_sync_from_device(ctx); 1952 1953 /* 1954 * After updating the read pointer, we need to check if 1955 * any metas are consumed and should be released. 1956 */ 1957 coda_decoder_drop_used_metas(ctx); 1958 1959 if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) { 1960 v4l2_err(&dev->v4l2_dev, 1961 "CODA_COMMAND_SEQ_INIT failed, error code = 0x%x\n", 1962 coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON)); 1963 return -EAGAIN; 1964 } 1965 1966 val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE); 1967 if (dev->devtype->product == CODA_DX6) { 1968 width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK; 1969 height = val & CODADX6_PICHEIGHT_MASK; 1970 } else { 1971 width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK; 1972 height = val & CODA7_PICHEIGHT_MASK; 1973 } 1974 1975 if (width > q_data_dst->bytesperline || height > q_data_dst->height) { 1976 v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n", 1977 width, height, q_data_dst->bytesperline, 1978 q_data_dst->height); 1979 return -EINVAL; 1980 } 1981 1982 width = round_up(width, 16); 1983 height = round_up(height, 16); 1984 1985 coda_dbg(1, ctx, "start decoding: %dx%d\n", width, height); 1986 1987 ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED); 1988 /* 1989 * If the VDOA is used, the decoder needs one additional frame, 1990 * because the frames are freed when the next frame is decoded. 1991 * Otherwise there are visible errors in the decoded frames (green 1992 * regions in displayed frames) and a broken order of frames (earlier 1993 * frames are sporadically displayed after later frames). 1994 */ 1995 if (ctx->use_vdoa) 1996 ctx->num_internal_frames += 1; 1997 if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) { 1998 v4l2_err(&dev->v4l2_dev, 1999 "not enough framebuffers to decode (%d < %d)\n", 2000 CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames); 2001 return -EINVAL; 2002 } 2003 2004 if (src_fourcc == V4L2_PIX_FMT_H264) { 2005 u32 left_right; 2006 u32 top_bottom; 2007 2008 left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT); 2009 top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM); 2010 2011 q_data_dst->rect.left = (left_right >> 10) & 0x3ff; 2012 q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff; 2013 q_data_dst->rect.width = width - q_data_dst->rect.left - 2014 (left_right & 0x3ff); 2015 q_data_dst->rect.height = height - q_data_dst->rect.top - 2016 (top_bottom & 0x3ff); 2017 } 2018 2019 if (dev->devtype->product != CODA_DX6) { 2020 u8 profile, level; 2021 2022 val = coda_read(dev, CODA7_RET_DEC_SEQ_HEADER_REPORT); 2023 profile = val & 0xff; 2024 level = (val >> 8) & 0x7f; 2025 2026 if (profile || level) 2027 coda_update_profile_level_ctrls(ctx, profile, level); 2028 } 2029 2030 return 0; 2031 } 2032 2033 static void coda_dec_seq_init_work(struct work_struct *work) 2034 { 2035 struct coda_ctx *ctx = container_of(work, 2036 struct coda_ctx, seq_init_work); 2037 struct coda_dev *dev = ctx->dev; 2038 2039 mutex_lock(&ctx->buffer_mutex); 2040 mutex_lock(&dev->coda_mutex); 2041 2042 if (!ctx->initialized) 2043 __coda_decoder_seq_init(ctx); 2044 2045 mutex_unlock(&dev->coda_mutex); 2046 mutex_unlock(&ctx->buffer_mutex); 2047 } 2048 2049 static int __coda_start_decoding(struct coda_ctx *ctx) 2050 { 2051 struct coda_q_data *q_data_src, *q_data_dst; 2052 struct coda_dev *dev = ctx->dev; 2053 u32 src_fourcc, dst_fourcc; 2054 int ret; 2055 2056 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 2057 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 2058 src_fourcc = q_data_src->fourcc; 2059 dst_fourcc = q_data_dst->fourcc; 2060 2061 if (!ctx->initialized) { 2062 ret = __coda_decoder_seq_init(ctx); 2063 if (ret < 0) 2064 return ret; 2065 } else { 2066 ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | 2067 CODA9_FRAME_TILED2LINEAR); 2068 if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV) 2069 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; 2070 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 2071 ctx->frame_mem_ctrl |= (0x3 << 9) | 2072 ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR); 2073 } 2074 2075 coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR); 2076 2077 ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc); 2078 if (ret < 0) { 2079 v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n"); 2080 return ret; 2081 } 2082 2083 /* Tell the decoder how many frame buffers we allocated. */ 2084 coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM); 2085 coda_write(dev, round_up(q_data_dst->rect.width, 16), 2086 CODA_CMD_SET_FRAME_BUF_STRIDE); 2087 2088 if (dev->devtype->product != CODA_DX6) { 2089 /* Set secondary AXI IRAM */ 2090 coda_setup_iram(ctx); 2091 2092 coda_write(dev, ctx->iram_info.buf_bit_use, 2093 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR); 2094 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use, 2095 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR); 2096 coda_write(dev, ctx->iram_info.buf_dbk_y_use, 2097 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR); 2098 coda_write(dev, ctx->iram_info.buf_dbk_c_use, 2099 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR); 2100 coda_write(dev, ctx->iram_info.buf_ovl_use, 2101 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR); 2102 if (dev->devtype->product == CODA_960) { 2103 coda_write(dev, ctx->iram_info.buf_btp_use, 2104 CODA9_CMD_SET_FRAME_AXI_BTP_ADDR); 2105 2106 coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY); 2107 coda9_set_frame_cache(ctx, dst_fourcc); 2108 } 2109 } 2110 2111 if (src_fourcc == V4L2_PIX_FMT_H264) { 2112 coda_write(dev, ctx->slicebuf.paddr, 2113 CODA_CMD_SET_FRAME_SLICE_BB_START); 2114 coda_write(dev, ctx->slicebuf.size / 1024, 2115 CODA_CMD_SET_FRAME_SLICE_BB_SIZE); 2116 } 2117 2118 if (dev->devtype->product == CODA_HX4 || 2119 dev->devtype->product == CODA_7541) { 2120 int max_mb_x = 1920 / 16; 2121 int max_mb_y = 1088 / 16; 2122 int max_mb_num = max_mb_x * max_mb_y; 2123 2124 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y, 2125 CODA7_CMD_SET_FRAME_MAX_DEC_SIZE); 2126 } else if (dev->devtype->product == CODA_960) { 2127 int max_mb_x = 1920 / 16; 2128 int max_mb_y = 1088 / 16; 2129 int max_mb_num = max_mb_x * max_mb_y; 2130 2131 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y, 2132 CODA9_CMD_SET_FRAME_MAX_DEC_SIZE); 2133 } 2134 2135 if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) { 2136 v4l2_err(&ctx->dev->v4l2_dev, 2137 "CODA_COMMAND_SET_FRAME_BUF timeout\n"); 2138 return -ETIMEDOUT; 2139 } 2140 2141 return 0; 2142 } 2143 2144 static int coda_start_decoding(struct coda_ctx *ctx) 2145 { 2146 struct coda_dev *dev = ctx->dev; 2147 int ret; 2148 2149 mutex_lock(&dev->coda_mutex); 2150 ret = __coda_start_decoding(ctx); 2151 mutex_unlock(&dev->coda_mutex); 2152 2153 return ret; 2154 } 2155 2156 static int coda_prepare_decode(struct coda_ctx *ctx) 2157 { 2158 struct vb2_v4l2_buffer *dst_buf; 2159 struct coda_dev *dev = ctx->dev; 2160 struct coda_q_data *q_data_dst; 2161 struct coda_buffer_meta *meta; 2162 u32 rot_mode = 0; 2163 u32 reg_addr, reg_stride; 2164 2165 dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 2166 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 2167 2168 /* Try to copy source buffer contents into the bitstream ringbuffer */ 2169 mutex_lock(&ctx->bitstream_mutex); 2170 coda_fill_bitstream(ctx, NULL); 2171 mutex_unlock(&ctx->bitstream_mutex); 2172 2173 if (coda_get_bitstream_payload(ctx) < 512 && 2174 (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) { 2175 coda_dbg(1, ctx, "bitstream payload: %d, skipping\n", 2176 coda_get_bitstream_payload(ctx)); 2177 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx); 2178 return -EAGAIN; 2179 } 2180 2181 /* Run coda_start_decoding (again) if not yet initialized */ 2182 if (!ctx->initialized) { 2183 int ret = __coda_start_decoding(ctx); 2184 2185 if (ret < 0) { 2186 v4l2_err(&dev->v4l2_dev, "failed to start decoding\n"); 2187 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx); 2188 return -EAGAIN; 2189 } else { 2190 ctx->initialized = 1; 2191 } 2192 } 2193 2194 if (dev->devtype->product == CODA_960) 2195 coda_set_gdi_regs(ctx); 2196 2197 if (ctx->use_vdoa && 2198 ctx->display_idx >= 0 && 2199 ctx->display_idx < ctx->num_internal_frames) { 2200 vdoa_device_run(ctx->vdoa, 2201 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0), 2202 ctx->internal_frames[ctx->display_idx].buf.paddr); 2203 } else { 2204 if (dev->devtype->product == CODA_960) { 2205 /* 2206 * It was previously assumed that the CODA960 has an 2207 * internal list of 64 buffer entries that contains 2208 * both the registered internal frame buffers as well 2209 * as the rotator buffer output, and that the ROT_INDEX 2210 * register must be set to a value between the last 2211 * internal frame buffers' index and 64. 2212 * At least on firmware version 3.1.1 it turns out that 2213 * setting ROT_INDEX to any value >= 32 causes CODA 2214 * hangups that it can not recover from with the SRC VPU 2215 * reset. 2216 * It does appear to work however, to just set it to a 2217 * fixed value in the [ctx->num_internal_frames, 31] 2218 * range, for example CODA_MAX_FRAMEBUFFERS. 2219 */ 2220 coda_write(dev, CODA_MAX_FRAMEBUFFERS, 2221 CODA9_CMD_DEC_PIC_ROT_INDEX); 2222 2223 reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y; 2224 reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE; 2225 } else { 2226 reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y; 2227 reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE; 2228 } 2229 coda_write_base(ctx, q_data_dst, dst_buf, reg_addr); 2230 coda_write(dev, q_data_dst->bytesperline, reg_stride); 2231 2232 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode; 2233 } 2234 2235 coda_write(dev, rot_mode, CODA_CMD_DEC_PIC_ROT_MODE); 2236 2237 switch (dev->devtype->product) { 2238 case CODA_DX6: 2239 /* TBD */ 2240 case CODA_HX4: 2241 case CODA_7541: 2242 coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION); 2243 break; 2244 case CODA_960: 2245 /* 'hardcode to use interrupt disable mode'? */ 2246 coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION); 2247 break; 2248 } 2249 2250 coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM); 2251 2252 coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START); 2253 coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE); 2254 2255 if (dev->devtype->product != CODA_DX6) 2256 coda_write(dev, ctx->iram_info.axi_sram_use, 2257 CODA7_REG_BIT_AXI_SRAM_USE); 2258 2259 spin_lock(&ctx->buffer_meta_lock); 2260 meta = list_first_entry_or_null(&ctx->buffer_meta_list, 2261 struct coda_buffer_meta, list); 2262 2263 if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) { 2264 2265 /* If this is the last buffer in the bitstream, add padding */ 2266 if (meta->end == ctx->bitstream_fifo.kfifo.in) { 2267 static unsigned char buf[512]; 2268 unsigned int pad; 2269 2270 /* Pad to multiple of 256 and then add 256 more */ 2271 pad = ((0 - meta->end) & 0xff) + 256; 2272 2273 memset(buf, 0xff, sizeof(buf)); 2274 2275 kfifo_in(&ctx->bitstream_fifo, buf, pad); 2276 } 2277 } 2278 spin_unlock(&ctx->buffer_meta_lock); 2279 2280 coda_kfifo_sync_to_device_full(ctx); 2281 2282 /* Clear decode success flag */ 2283 coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS); 2284 2285 /* Clear error return value */ 2286 coda_write(dev, 0, CODA_RET_DEC_PIC_ERR_MB); 2287 2288 trace_coda_dec_pic_run(ctx, meta); 2289 2290 coda_command_async(ctx, CODA_COMMAND_PIC_RUN); 2291 2292 return 0; 2293 } 2294 2295 static void coda_finish_decode(struct coda_ctx *ctx) 2296 { 2297 struct coda_dev *dev = ctx->dev; 2298 struct coda_q_data *q_data_src; 2299 struct coda_q_data *q_data_dst; 2300 struct vb2_v4l2_buffer *dst_buf; 2301 struct coda_buffer_meta *meta; 2302 int width, height; 2303 int decoded_idx; 2304 int display_idx; 2305 struct coda_internal_frame *decoded_frame = NULL; 2306 u32 src_fourcc; 2307 int success; 2308 u32 err_mb; 2309 int err_vdoa = 0; 2310 u32 val; 2311 2312 if (ctx->aborting) 2313 return; 2314 2315 /* Update kfifo out pointer from coda bitstream read pointer */ 2316 coda_kfifo_sync_from_device(ctx); 2317 2318 /* 2319 * in stream-end mode, the read pointer can overshoot the write pointer 2320 * by up to 512 bytes 2321 */ 2322 if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) { 2323 if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512) 2324 kfifo_init(&ctx->bitstream_fifo, 2325 ctx->bitstream.vaddr, ctx->bitstream.size); 2326 } 2327 2328 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 2329 src_fourcc = q_data_src->fourcc; 2330 2331 val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS); 2332 if (val != 1) 2333 pr_err("DEC_PIC_SUCCESS = %d\n", val); 2334 2335 success = val & 0x1; 2336 if (!success) 2337 v4l2_err(&dev->v4l2_dev, "decode failed\n"); 2338 2339 if (src_fourcc == V4L2_PIX_FMT_H264) { 2340 if (val & (1 << 3)) 2341 v4l2_err(&dev->v4l2_dev, 2342 "insufficient PS buffer space (%d bytes)\n", 2343 ctx->psbuf.size); 2344 if (val & (1 << 2)) 2345 v4l2_err(&dev->v4l2_dev, 2346 "insufficient slice buffer space (%d bytes)\n", 2347 ctx->slicebuf.size); 2348 } 2349 2350 val = coda_read(dev, CODA_RET_DEC_PIC_SIZE); 2351 width = (val >> 16) & 0xffff; 2352 height = val & 0xffff; 2353 2354 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 2355 2356 /* frame crop information */ 2357 if (src_fourcc == V4L2_PIX_FMT_H264) { 2358 u32 left_right; 2359 u32 top_bottom; 2360 2361 left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT); 2362 top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM); 2363 2364 if (left_right == 0xffffffff && top_bottom == 0xffffffff) { 2365 /* Keep current crop information */ 2366 } else { 2367 struct v4l2_rect *rect = &q_data_dst->rect; 2368 2369 rect->left = left_right >> 16 & 0xffff; 2370 rect->top = top_bottom >> 16 & 0xffff; 2371 rect->width = width - rect->left - 2372 (left_right & 0xffff); 2373 rect->height = height - rect->top - 2374 (top_bottom & 0xffff); 2375 } 2376 } else { 2377 /* no cropping */ 2378 } 2379 2380 err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB); 2381 if (err_mb > 0) { 2382 if (__ratelimit(&dev->mb_err_rs)) 2383 coda_dbg(1, ctx, "errors in %d macroblocks\n", err_mb); 2384 v4l2_ctrl_s_ctrl(ctx->mb_err_cnt_ctrl, 2385 v4l2_ctrl_g_ctrl(ctx->mb_err_cnt_ctrl) + err_mb); 2386 } 2387 2388 if (dev->devtype->product == CODA_HX4 || 2389 dev->devtype->product == CODA_7541) { 2390 val = coda_read(dev, CODA_RET_DEC_PIC_OPTION); 2391 if (val == 0) { 2392 /* not enough bitstream data */ 2393 coda_dbg(1, ctx, "prescan failed: %d\n", val); 2394 ctx->hold = true; 2395 return; 2396 } 2397 } 2398 2399 /* Wait until the VDOA finished writing the previous display frame */ 2400 if (ctx->use_vdoa && 2401 ctx->display_idx >= 0 && 2402 ctx->display_idx < ctx->num_internal_frames) { 2403 err_vdoa = vdoa_wait_for_completion(ctx->vdoa); 2404 } 2405 2406 ctx->frm_dis_flg = coda_read(dev, 2407 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 2408 2409 /* The previous display frame was copied out and can be overwritten */ 2410 if (ctx->display_idx >= 0 && 2411 ctx->display_idx < ctx->num_internal_frames) { 2412 ctx->frm_dis_flg &= ~(1 << ctx->display_idx); 2413 coda_write(dev, ctx->frm_dis_flg, 2414 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 2415 } 2416 2417 /* 2418 * The index of the last decoded frame, not necessarily in 2419 * display order, and the index of the next display frame. 2420 * The latter could have been decoded in a previous run. 2421 */ 2422 decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX); 2423 display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX); 2424 2425 if (decoded_idx == -1) { 2426 /* no frame was decoded, but we might have a display frame */ 2427 if (display_idx >= 0 && display_idx < ctx->num_internal_frames) 2428 ctx->sequence_offset++; 2429 else if (ctx->display_idx < 0) 2430 ctx->hold = true; 2431 } else if (decoded_idx == -2) { 2432 if (ctx->display_idx >= 0 && 2433 ctx->display_idx < ctx->num_internal_frames) 2434 ctx->sequence_offset++; 2435 /* no frame was decoded, we still return remaining buffers */ 2436 } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) { 2437 v4l2_err(&dev->v4l2_dev, 2438 "decoded frame index out of range: %d\n", decoded_idx); 2439 } else { 2440 int sequence; 2441 2442 decoded_frame = &ctx->internal_frames[decoded_idx]; 2443 2444 val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM); 2445 if (ctx->sequence_offset == -1) 2446 ctx->sequence_offset = val; 2447 2448 sequence = val + ctx->first_frame_sequence 2449 - ctx->sequence_offset; 2450 spin_lock(&ctx->buffer_meta_lock); 2451 if (!list_empty(&ctx->buffer_meta_list)) { 2452 meta = list_first_entry(&ctx->buffer_meta_list, 2453 struct coda_buffer_meta, list); 2454 list_del(&meta->list); 2455 ctx->num_metas--; 2456 spin_unlock(&ctx->buffer_meta_lock); 2457 /* 2458 * Clamp counters to 16 bits for comparison, as the HW 2459 * counter rolls over at this point for h.264. This 2460 * may be different for other formats, but using 16 bits 2461 * should be enough to detect most errors and saves us 2462 * from doing different things based on the format. 2463 */ 2464 if ((sequence & 0xffff) != (meta->sequence & 0xffff)) { 2465 v4l2_err(&dev->v4l2_dev, 2466 "sequence number mismatch (%d(%d) != %d)\n", 2467 sequence, ctx->sequence_offset, 2468 meta->sequence); 2469 } 2470 decoded_frame->meta = *meta; 2471 kfree(meta); 2472 } else { 2473 spin_unlock(&ctx->buffer_meta_lock); 2474 v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n"); 2475 memset(&decoded_frame->meta, 0, 2476 sizeof(struct coda_buffer_meta)); 2477 decoded_frame->meta.sequence = sequence; 2478 decoded_frame->meta.last = false; 2479 ctx->sequence_offset++; 2480 } 2481 2482 trace_coda_dec_pic_done(ctx, &decoded_frame->meta); 2483 2484 val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7; 2485 decoded_frame->type = (val == 0) ? V4L2_BUF_FLAG_KEYFRAME : 2486 (val == 1) ? V4L2_BUF_FLAG_PFRAME : 2487 V4L2_BUF_FLAG_BFRAME; 2488 2489 decoded_frame->error = err_mb; 2490 } 2491 2492 if (display_idx == -1) { 2493 /* 2494 * no more frames to be decoded, but there could still 2495 * be rotator output to dequeue 2496 */ 2497 ctx->hold = true; 2498 } else if (display_idx == -3) { 2499 /* possibly prescan failure */ 2500 } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) { 2501 v4l2_err(&dev->v4l2_dev, 2502 "presentation frame index out of range: %d\n", 2503 display_idx); 2504 } 2505 2506 /* If a frame was copied out, return it */ 2507 if (ctx->display_idx >= 0 && 2508 ctx->display_idx < ctx->num_internal_frames) { 2509 struct coda_internal_frame *ready_frame; 2510 2511 ready_frame = &ctx->internal_frames[ctx->display_idx]; 2512 2513 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); 2514 dst_buf->sequence = ctx->osequence++; 2515 2516 dst_buf->field = V4L2_FIELD_NONE; 2517 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME | 2518 V4L2_BUF_FLAG_PFRAME | 2519 V4L2_BUF_FLAG_BFRAME); 2520 dst_buf->flags |= ready_frame->type; 2521 meta = &ready_frame->meta; 2522 if (meta->last && !coda_reorder_enable(ctx)) { 2523 /* 2524 * If this was the last decoded frame, and reordering 2525 * is disabled, this will be the last display frame. 2526 */ 2527 coda_dbg(1, ctx, "last meta, marking as last frame\n"); 2528 dst_buf->flags |= V4L2_BUF_FLAG_LAST; 2529 } else if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG && 2530 display_idx == -1) { 2531 /* 2532 * If there is no designated presentation frame anymore, 2533 * this frame has to be the last one. 2534 */ 2535 coda_dbg(1, ctx, 2536 "no more frames to return, marking as last frame\n"); 2537 dst_buf->flags |= V4L2_BUF_FLAG_LAST; 2538 } 2539 dst_buf->timecode = meta->timecode; 2540 dst_buf->vb2_buf.timestamp = meta->timestamp; 2541 2542 trace_coda_dec_rot_done(ctx, dst_buf, meta); 2543 2544 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, 2545 q_data_dst->sizeimage); 2546 2547 if (ready_frame->error || err_vdoa) 2548 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR); 2549 else 2550 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE); 2551 2552 if (decoded_frame) { 2553 coda_dbg(1, ctx, "job finished: decoded %c frame %u, returned %c frame %u (%u/%u)%s\n", 2554 coda_frame_type_char(decoded_frame->type), 2555 decoded_frame->meta.sequence, 2556 coda_frame_type_char(dst_buf->flags), 2557 ready_frame->meta.sequence, 2558 dst_buf->sequence, ctx->qsequence, 2559 (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? 2560 " (last)" : ""); 2561 } else { 2562 coda_dbg(1, ctx, "job finished: no frame decoded (%d), returned %c frame %u (%u/%u)%s\n", 2563 decoded_idx, 2564 coda_frame_type_char(dst_buf->flags), 2565 ready_frame->meta.sequence, 2566 dst_buf->sequence, ctx->qsequence, 2567 (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? 2568 " (last)" : ""); 2569 } 2570 } else { 2571 if (decoded_frame) { 2572 coda_dbg(1, ctx, "job finished: decoded %c frame %u, no frame returned (%d)\n", 2573 coda_frame_type_char(decoded_frame->type), 2574 decoded_frame->meta.sequence, 2575 ctx->display_idx); 2576 } else { 2577 coda_dbg(1, ctx, "job finished: no frame decoded (%d) or returned (%d)\n", 2578 decoded_idx, ctx->display_idx); 2579 } 2580 } 2581 2582 /* The rotator will copy the current display frame next time */ 2583 ctx->display_idx = display_idx; 2584 2585 /* 2586 * The current decode run might have brought the bitstream fill level 2587 * below the size where we can start the next decode run. As userspace 2588 * might have filled the output queue completely and might thus be 2589 * blocked, we can't rely on the next qbuf to trigger the bitstream 2590 * refill. Check if we have data to refill the bitstream now. 2591 */ 2592 mutex_lock(&ctx->bitstream_mutex); 2593 coda_fill_bitstream(ctx, NULL); 2594 mutex_unlock(&ctx->bitstream_mutex); 2595 } 2596 2597 static void coda_decode_timeout(struct coda_ctx *ctx) 2598 { 2599 struct vb2_v4l2_buffer *dst_buf; 2600 2601 /* 2602 * For now this only handles the case where we would deadlock with 2603 * userspace, i.e. userspace issued DEC_CMD_STOP and waits for EOS, 2604 * but after a failed decode run we would hold the context and wait for 2605 * userspace to queue more buffers. 2606 */ 2607 if (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)) 2608 return; 2609 2610 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); 2611 dst_buf->sequence = ctx->qsequence - 1; 2612 2613 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR); 2614 } 2615 2616 const struct coda_context_ops coda_bit_decode_ops = { 2617 .queue_init = coda_decoder_queue_init, 2618 .reqbufs = coda_decoder_reqbufs, 2619 .start_streaming = coda_start_decoding, 2620 .prepare_run = coda_prepare_decode, 2621 .finish_run = coda_finish_decode, 2622 .run_timeout = coda_decode_timeout, 2623 .seq_init_work = coda_dec_seq_init_work, 2624 .seq_end_work = coda_seq_end_work, 2625 .release = coda_bit_release, 2626 }; 2627 2628 irqreturn_t coda_irq_handler(int irq, void *data) 2629 { 2630 struct coda_dev *dev = data; 2631 struct coda_ctx *ctx; 2632 2633 /* read status register to attend the IRQ */ 2634 coda_read(dev, CODA_REG_BIT_INT_STATUS); 2635 coda_write(dev, 0, CODA_REG_BIT_INT_REASON); 2636 coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET, 2637 CODA_REG_BIT_INT_CLEAR); 2638 2639 ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev); 2640 if (ctx == NULL) { 2641 v4l2_err(&dev->v4l2_dev, 2642 "Instance released before the end of transaction\n"); 2643 return IRQ_HANDLED; 2644 } 2645 2646 trace_coda_bit_done(ctx); 2647 2648 if (ctx->aborting) { 2649 coda_dbg(1, ctx, "task has been aborted\n"); 2650 } 2651 2652 if (coda_isbusy(ctx->dev)) { 2653 coda_dbg(1, ctx, "coda is still busy!!!!\n"); 2654 return IRQ_NONE; 2655 } 2656 2657 complete(&ctx->completion); 2658 2659 return IRQ_HANDLED; 2660 } 2661