1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Coda multi-standard codec IP - BIT processor functions 4 * 5 * Copyright (C) 2012 Vista Silicon S.L. 6 * Javier Martin, <javier.martin@vista-silicon.com> 7 * Xavier Duret 8 * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix 9 */ 10 11 #include <linux/clk.h> 12 #include <linux/irqreturn.h> 13 #include <linux/kernel.h> 14 #include <linux/log2.h> 15 #include <linux/platform_device.h> 16 #include <linux/ratelimit.h> 17 #include <linux/reset.h> 18 #include <linux/slab.h> 19 #include <linux/videodev2.h> 20 21 #include <media/v4l2-common.h> 22 #include <media/v4l2-ctrls.h> 23 #include <media/v4l2-fh.h> 24 #include <media/v4l2-mem2mem.h> 25 #include <media/videobuf2-v4l2.h> 26 #include <media/videobuf2-dma-contig.h> 27 #include <media/videobuf2-vmalloc.h> 28 29 #include "coda.h" 30 #include "imx-vdoa.h" 31 #define CREATE_TRACE_POINTS 32 #include "trace.h" 33 34 #define CODA_PARA_BUF_SIZE (10 * 1024) 35 #define CODA7_PS_BUF_SIZE 0x28000 36 #define CODA9_PS_SAVE_SIZE (512 * 1024) 37 38 #define CODA_DEFAULT_GAMMA 4096 39 #define CODA9_DEFAULT_GAMMA 24576 /* 0.75 * 32768 */ 40 41 static void coda_free_bitstream_buffer(struct coda_ctx *ctx); 42 43 static inline int coda_is_initialized(struct coda_dev *dev) 44 { 45 return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0; 46 } 47 48 static inline unsigned long coda_isbusy(struct coda_dev *dev) 49 { 50 return coda_read(dev, CODA_REG_BIT_BUSY); 51 } 52 53 static int coda_wait_timeout(struct coda_dev *dev) 54 { 55 unsigned long timeout = jiffies + msecs_to_jiffies(1000); 56 57 while (coda_isbusy(dev)) { 58 if (time_after(jiffies, timeout)) 59 return -ETIMEDOUT; 60 } 61 return 0; 62 } 63 64 static void coda_command_async(struct coda_ctx *ctx, int cmd) 65 { 66 struct coda_dev *dev = ctx->dev; 67 68 if (dev->devtype->product == CODA_HX4 || 69 dev->devtype->product == CODA_7541 || 70 dev->devtype->product == CODA_960) { 71 /* Restore context related registers to CODA */ 72 coda_write(dev, ctx->bit_stream_param, 73 CODA_REG_BIT_BIT_STREAM_PARAM); 74 coda_write(dev, ctx->frm_dis_flg, 75 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 76 coda_write(dev, ctx->frame_mem_ctrl, 77 CODA_REG_BIT_FRAME_MEM_CTRL); 78 coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR); 79 } 80 81 if (dev->devtype->product == CODA_960) { 82 coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR); 83 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN); 84 } 85 86 coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); 87 88 coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX); 89 coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD); 90 coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD); 91 92 trace_coda_bit_run(ctx, cmd); 93 94 coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND); 95 } 96 97 static int coda_command_sync(struct coda_ctx *ctx, int cmd) 98 { 99 struct coda_dev *dev = ctx->dev; 100 int ret; 101 102 lockdep_assert_held(&dev->coda_mutex); 103 104 coda_command_async(ctx, cmd); 105 ret = coda_wait_timeout(dev); 106 trace_coda_bit_done(ctx); 107 108 return ret; 109 } 110 111 int coda_hw_reset(struct coda_ctx *ctx) 112 { 113 struct coda_dev *dev = ctx->dev; 114 unsigned long timeout; 115 unsigned int idx; 116 int ret; 117 118 lockdep_assert_held(&dev->coda_mutex); 119 120 if (!dev->rstc) 121 return -ENOENT; 122 123 idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX); 124 125 if (dev->devtype->product == CODA_960) { 126 timeout = jiffies + msecs_to_jiffies(100); 127 coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL); 128 while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) { 129 if (time_after(jiffies, timeout)) 130 return -ETIME; 131 cpu_relax(); 132 } 133 } 134 135 ret = reset_control_reset(dev->rstc); 136 if (ret < 0) 137 return ret; 138 139 if (dev->devtype->product == CODA_960) 140 coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL); 141 coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); 142 coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN); 143 ret = coda_wait_timeout(dev); 144 coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX); 145 146 return ret; 147 } 148 149 static void coda_kfifo_sync_from_device(struct coda_ctx *ctx) 150 { 151 struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; 152 struct coda_dev *dev = ctx->dev; 153 u32 rd_ptr; 154 155 rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); 156 kfifo->out = (kfifo->in & ~kfifo->mask) | 157 (rd_ptr - ctx->bitstream.paddr); 158 if (kfifo->out > kfifo->in) 159 kfifo->out -= kfifo->mask + 1; 160 } 161 162 static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx) 163 { 164 struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; 165 struct coda_dev *dev = ctx->dev; 166 u32 rd_ptr, wr_ptr; 167 168 rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask); 169 coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); 170 wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask); 171 coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 172 } 173 174 static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx) 175 { 176 struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo; 177 struct coda_dev *dev = ctx->dev; 178 u32 wr_ptr; 179 180 wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask); 181 coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 182 } 183 184 static int coda_h264_bitstream_pad(struct coda_ctx *ctx, u32 size) 185 { 186 unsigned char *buf; 187 u32 n; 188 189 if (size < 6) 190 size = 6; 191 192 buf = kmalloc(size, GFP_KERNEL); 193 if (!buf) 194 return -ENOMEM; 195 196 coda_h264_filler_nal(size, buf); 197 n = kfifo_in(&ctx->bitstream_fifo, buf, size); 198 kfree(buf); 199 200 return (n < size) ? -ENOSPC : 0; 201 } 202 203 int coda_bitstream_flush(struct coda_ctx *ctx) 204 { 205 int ret; 206 207 if (ctx->inst_type != CODA_INST_DECODER || !ctx->use_bit) 208 return 0; 209 210 ret = coda_command_sync(ctx, CODA_COMMAND_DEC_BUF_FLUSH); 211 if (ret < 0) { 212 v4l2_err(&ctx->dev->v4l2_dev, "failed to flush bitstream\n"); 213 return ret; 214 } 215 216 kfifo_init(&ctx->bitstream_fifo, ctx->bitstream.vaddr, 217 ctx->bitstream.size); 218 coda_kfifo_sync_to_device_full(ctx); 219 220 return 0; 221 } 222 223 static int coda_bitstream_queue(struct coda_ctx *ctx, const u8 *buf, u32 size) 224 { 225 u32 n = kfifo_in(&ctx->bitstream_fifo, buf, size); 226 227 return (n < size) ? -ENOSPC : 0; 228 } 229 230 static u32 coda_buffer_parse_headers(struct coda_ctx *ctx, 231 struct vb2_v4l2_buffer *src_buf, 232 u32 payload) 233 { 234 u8 *vaddr = vb2_plane_vaddr(&src_buf->vb2_buf, 0); 235 u32 size = 0; 236 237 switch (ctx->codec->src_fourcc) { 238 case V4L2_PIX_FMT_MPEG2: 239 size = coda_mpeg2_parse_headers(ctx, vaddr, payload); 240 break; 241 case V4L2_PIX_FMT_MPEG4: 242 size = coda_mpeg4_parse_headers(ctx, vaddr, payload); 243 break; 244 default: 245 break; 246 } 247 248 return size; 249 } 250 251 static bool coda_bitstream_try_queue(struct coda_ctx *ctx, 252 struct vb2_v4l2_buffer *src_buf) 253 { 254 unsigned long payload = vb2_get_plane_payload(&src_buf->vb2_buf, 0); 255 u8 *vaddr = vb2_plane_vaddr(&src_buf->vb2_buf, 0); 256 int ret; 257 int i; 258 259 if (coda_get_bitstream_payload(ctx) + payload + 512 >= 260 ctx->bitstream.size) 261 return false; 262 263 if (!vaddr) { 264 v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n"); 265 return true; 266 } 267 268 if (ctx->qsequence == 0 && payload < 512) { 269 /* 270 * Add padding after the first buffer, if it is too small to be 271 * fetched by the CODA, by repeating the headers. Without 272 * repeated headers, or the first frame already queued, decoder 273 * sequence initialization fails with error code 0x2000 on i.MX6 274 * or error code 0x1 on i.MX51. 275 */ 276 u32 header_size = coda_buffer_parse_headers(ctx, src_buf, 277 payload); 278 279 if (header_size) { 280 coda_dbg(1, ctx, "pad with %u-byte header\n", 281 header_size); 282 for (i = payload; i < 512; i += header_size) { 283 ret = coda_bitstream_queue(ctx, vaddr, 284 header_size); 285 if (ret < 0) { 286 v4l2_err(&ctx->dev->v4l2_dev, 287 "bitstream buffer overflow\n"); 288 return false; 289 } 290 if (ctx->dev->devtype->product == CODA_960) 291 break; 292 } 293 } else { 294 coda_dbg(1, ctx, 295 "could not parse header, sequence initialization might fail\n"); 296 } 297 298 /* Add padding before the first buffer, if it is too small */ 299 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264) 300 coda_h264_bitstream_pad(ctx, 512 - payload); 301 } 302 303 ret = coda_bitstream_queue(ctx, vaddr, payload); 304 if (ret < 0) { 305 v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n"); 306 return false; 307 } 308 309 src_buf->sequence = ctx->qsequence++; 310 311 /* Sync read pointer to device */ 312 if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev)) 313 coda_kfifo_sync_to_device_write(ctx); 314 315 /* Set the stream-end flag after the last buffer is queued */ 316 if (src_buf->flags & V4L2_BUF_FLAG_LAST) 317 coda_bit_stream_end_flag(ctx); 318 ctx->hold = false; 319 320 return true; 321 } 322 323 void coda_fill_bitstream(struct coda_ctx *ctx, struct list_head *buffer_list) 324 { 325 struct vb2_v4l2_buffer *src_buf; 326 struct coda_buffer_meta *meta; 327 u32 start; 328 329 lockdep_assert_held(&ctx->bitstream_mutex); 330 331 if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) 332 return; 333 334 while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) { 335 /* 336 * Only queue two JPEGs into the bitstream buffer to keep 337 * latency low. We need at least one complete buffer and the 338 * header of another buffer (for prescan) in the bitstream. 339 */ 340 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG && 341 ctx->num_metas > 1) 342 break; 343 344 if (ctx->num_internal_frames && 345 ctx->num_metas >= ctx->num_internal_frames) { 346 meta = list_first_entry(&ctx->buffer_meta_list, 347 struct coda_buffer_meta, list); 348 349 /* 350 * If we managed to fill in at least a full reorder 351 * window of buffers (num_internal_frames is a 352 * conservative estimate for this) and the bitstream 353 * prefetcher has at least 2 256 bytes periods beyond 354 * the first buffer to fetch, we can safely stop queuing 355 * in order to limit the decoder drain latency. 356 */ 357 if (coda_bitstream_can_fetch_past(ctx, meta->end)) 358 break; 359 } 360 361 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); 362 363 /* Drop frames that do not start/end with a SOI/EOI markers */ 364 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG && 365 !coda_jpeg_check_buffer(ctx, &src_buf->vb2_buf)) { 366 v4l2_err(&ctx->dev->v4l2_dev, 367 "dropping invalid JPEG frame %d\n", 368 ctx->qsequence); 369 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 370 if (buffer_list) { 371 struct v4l2_m2m_buffer *m2m_buf; 372 373 m2m_buf = container_of(src_buf, 374 struct v4l2_m2m_buffer, 375 vb); 376 list_add_tail(&m2m_buf->list, buffer_list); 377 } else { 378 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); 379 } 380 continue; 381 } 382 383 /* Dump empty buffers */ 384 if (!vb2_get_plane_payload(&src_buf->vb2_buf, 0)) { 385 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 386 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 387 continue; 388 } 389 390 /* Buffer start position */ 391 start = ctx->bitstream_fifo.kfifo.in; 392 393 if (coda_bitstream_try_queue(ctx, src_buf)) { 394 /* 395 * Source buffer is queued in the bitstream ringbuffer; 396 * queue the timestamp and mark source buffer as done 397 */ 398 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 399 400 meta = kmalloc(sizeof(*meta), GFP_KERNEL); 401 if (meta) { 402 meta->sequence = src_buf->sequence; 403 meta->timecode = src_buf->timecode; 404 meta->timestamp = src_buf->vb2_buf.timestamp; 405 meta->start = start; 406 meta->end = ctx->bitstream_fifo.kfifo.in; 407 meta->last = src_buf->flags & V4L2_BUF_FLAG_LAST; 408 if (meta->last) 409 coda_dbg(1, ctx, "marking last meta"); 410 spin_lock(&ctx->buffer_meta_lock); 411 list_add_tail(&meta->list, 412 &ctx->buffer_meta_list); 413 ctx->num_metas++; 414 spin_unlock(&ctx->buffer_meta_lock); 415 416 trace_coda_bit_queue(ctx, src_buf, meta); 417 } 418 419 if (buffer_list) { 420 struct v4l2_m2m_buffer *m2m_buf; 421 422 m2m_buf = container_of(src_buf, 423 struct v4l2_m2m_buffer, 424 vb); 425 list_add_tail(&m2m_buf->list, buffer_list); 426 } else { 427 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 428 } 429 } else { 430 break; 431 } 432 } 433 } 434 435 void coda_bit_stream_end_flag(struct coda_ctx *ctx) 436 { 437 struct coda_dev *dev = ctx->dev; 438 439 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG; 440 441 /* If this context is currently running, update the hardware flag */ 442 if ((dev->devtype->product == CODA_960) && 443 coda_isbusy(dev) && 444 (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) { 445 coda_write(dev, ctx->bit_stream_param, 446 CODA_REG_BIT_BIT_STREAM_PARAM); 447 } 448 } 449 450 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value) 451 { 452 struct coda_dev *dev = ctx->dev; 453 u32 *p = ctx->parabuf.vaddr; 454 455 if (dev->devtype->product == CODA_DX6) 456 p[index] = value; 457 else 458 p[index ^ 1] = value; 459 } 460 461 static inline int coda_alloc_context_buf(struct coda_ctx *ctx, 462 struct coda_aux_buf *buf, size_t size, 463 const char *name) 464 { 465 return coda_alloc_aux_buf(ctx->dev, buf, size, name, ctx->debugfs_entry); 466 } 467 468 469 static void coda_free_framebuffers(struct coda_ctx *ctx) 470 { 471 int i; 472 473 for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++) 474 coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i].buf); 475 } 476 477 static int coda_alloc_framebuffers(struct coda_ctx *ctx, 478 struct coda_q_data *q_data, u32 fourcc) 479 { 480 struct coda_dev *dev = ctx->dev; 481 unsigned int ysize, ycbcr_size; 482 int ret; 483 int i; 484 485 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 || 486 ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 || 487 ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 || 488 ctx->codec->dst_fourcc == V4L2_PIX_FMT_MPEG4) 489 ysize = round_up(q_data->rect.width, 16) * 490 round_up(q_data->rect.height, 16); 491 else 492 ysize = round_up(q_data->rect.width, 8) * q_data->rect.height; 493 494 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 495 ycbcr_size = round_up(ysize, 4096) + ysize / 2; 496 else 497 ycbcr_size = ysize + ysize / 2; 498 499 /* Allocate frame buffers */ 500 for (i = 0; i < ctx->num_internal_frames; i++) { 501 size_t size = ycbcr_size; 502 char *name; 503 504 /* Add space for mvcol buffers */ 505 if (dev->devtype->product != CODA_DX6 && 506 (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 || 507 (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0))) 508 size += ysize / 4; 509 name = kasprintf(GFP_KERNEL, "fb%d", i); 510 if (!name) { 511 coda_free_framebuffers(ctx); 512 return -ENOMEM; 513 } 514 ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i].buf, 515 size, name); 516 kfree(name); 517 if (ret < 0) { 518 coda_free_framebuffers(ctx); 519 return ret; 520 } 521 } 522 523 /* Register frame buffers in the parameter buffer */ 524 for (i = 0; i < ctx->num_internal_frames; i++) { 525 u32 y, cb, cr, mvcol; 526 527 /* Start addresses of Y, Cb, Cr planes */ 528 y = ctx->internal_frames[i].buf.paddr; 529 cb = y + ysize; 530 cr = y + ysize + ysize/4; 531 mvcol = y + ysize + ysize/4 + ysize/4; 532 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) { 533 cb = round_up(cb, 4096); 534 mvcol = cb + ysize/2; 535 cr = 0; 536 /* Packed 20-bit MSB of base addresses */ 537 /* YYYYYCCC, CCyyyyyc, cccc.... */ 538 y = (y & 0xfffff000) | cb >> 20; 539 cb = (cb & 0x000ff000) << 12; 540 } 541 coda_parabuf_write(ctx, i * 3 + 0, y); 542 coda_parabuf_write(ctx, i * 3 + 1, cb); 543 coda_parabuf_write(ctx, i * 3 + 2, cr); 544 545 if (dev->devtype->product == CODA_DX6) 546 continue; 547 548 /* mvcol buffer for h.264 and mpeg4 */ 549 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264) 550 coda_parabuf_write(ctx, 96 + i, mvcol); 551 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0) 552 coda_parabuf_write(ctx, 97, mvcol); 553 } 554 555 return 0; 556 } 557 558 static void coda_free_context_buffers(struct coda_ctx *ctx) 559 { 560 struct coda_dev *dev = ctx->dev; 561 562 coda_free_aux_buf(dev, &ctx->slicebuf); 563 coda_free_aux_buf(dev, &ctx->psbuf); 564 if (dev->devtype->product != CODA_DX6) 565 coda_free_aux_buf(dev, &ctx->workbuf); 566 coda_free_aux_buf(dev, &ctx->parabuf); 567 } 568 569 static int coda_alloc_context_buffers(struct coda_ctx *ctx, 570 struct coda_q_data *q_data) 571 { 572 struct coda_dev *dev = ctx->dev; 573 size_t size; 574 int ret; 575 576 if (!ctx->parabuf.vaddr) { 577 ret = coda_alloc_context_buf(ctx, &ctx->parabuf, 578 CODA_PARA_BUF_SIZE, "parabuf"); 579 if (ret < 0) 580 return ret; 581 } 582 583 if (dev->devtype->product == CODA_DX6) 584 return 0; 585 586 if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) { 587 /* worst case slice size */ 588 size = (DIV_ROUND_UP(q_data->rect.width, 16) * 589 DIV_ROUND_UP(q_data->rect.height, 16)) * 3200 / 8 + 512; 590 ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size, 591 "slicebuf"); 592 if (ret < 0) 593 goto err; 594 } 595 596 if (!ctx->psbuf.vaddr && (dev->devtype->product == CODA_HX4 || 597 dev->devtype->product == CODA_7541)) { 598 ret = coda_alloc_context_buf(ctx, &ctx->psbuf, 599 CODA7_PS_BUF_SIZE, "psbuf"); 600 if (ret < 0) 601 goto err; 602 } 603 604 if (!ctx->workbuf.vaddr) { 605 size = dev->devtype->workbuf_size; 606 if (dev->devtype->product == CODA_960 && 607 q_data->fourcc == V4L2_PIX_FMT_H264) 608 size += CODA9_PS_SAVE_SIZE; 609 ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size, 610 "workbuf"); 611 if (ret < 0) 612 goto err; 613 } 614 615 return 0; 616 617 err: 618 coda_free_context_buffers(ctx); 619 return ret; 620 } 621 622 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf, 623 int header_code, u8 *header, int *size) 624 { 625 struct vb2_buffer *vb = &buf->vb2_buf; 626 struct coda_dev *dev = ctx->dev; 627 struct coda_q_data *q_data_src; 628 struct v4l2_rect *r; 629 size_t bufsize; 630 int ret; 631 int i; 632 633 if (dev->devtype->product == CODA_960) 634 memset(vb2_plane_vaddr(vb, 0), 0, 64); 635 636 coda_write(dev, vb2_dma_contig_plane_dma_addr(vb, 0), 637 CODA_CMD_ENC_HEADER_BB_START); 638 bufsize = vb2_plane_size(vb, 0); 639 if (dev->devtype->product == CODA_960) 640 bufsize /= 1024; 641 coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE); 642 if (dev->devtype->product == CODA_960 && 643 ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 && 644 header_code == CODA_HEADER_H264_SPS) { 645 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 646 r = &q_data_src->rect; 647 648 if (r->width % 16 || r->height % 16) { 649 u32 crop_right = round_up(r->width, 16) - r->width; 650 u32 crop_bottom = round_up(r->height, 16) - r->height; 651 652 coda_write(dev, crop_right, 653 CODA9_CMD_ENC_HEADER_FRAME_CROP_H); 654 coda_write(dev, crop_bottom, 655 CODA9_CMD_ENC_HEADER_FRAME_CROP_V); 656 header_code |= CODA9_HEADER_FRAME_CROP; 657 } 658 } 659 coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE); 660 ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER); 661 if (ret < 0) { 662 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n"); 663 return ret; 664 } 665 666 if (dev->devtype->product == CODA_960) { 667 for (i = 63; i > 0; i--) 668 if (((char *)vb2_plane_vaddr(vb, 0))[i] != 0) 669 break; 670 *size = i + 1; 671 } else { 672 *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) - 673 coda_read(dev, CODA_CMD_ENC_HEADER_BB_START); 674 } 675 memcpy(header, vb2_plane_vaddr(vb, 0), *size); 676 677 return 0; 678 } 679 680 static u32 coda_slice_mode(struct coda_ctx *ctx) 681 { 682 int size, unit; 683 684 switch (ctx->params.slice_mode) { 685 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE: 686 default: 687 return 0; 688 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_MB: 689 size = ctx->params.slice_max_mb; 690 unit = 1; 691 break; 692 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_BYTES: 693 size = ctx->params.slice_max_bits; 694 unit = 0; 695 break; 696 } 697 698 return ((size & CODA_SLICING_SIZE_MASK) << CODA_SLICING_SIZE_OFFSET) | 699 ((unit & CODA_SLICING_UNIT_MASK) << CODA_SLICING_UNIT_OFFSET) | 700 ((1 & CODA_SLICING_MODE_MASK) << CODA_SLICING_MODE_OFFSET); 701 } 702 703 static int coda_enc_param_change(struct coda_ctx *ctx) 704 { 705 struct coda_dev *dev = ctx->dev; 706 u32 change_enable = 0; 707 u32 success; 708 int ret; 709 710 if (ctx->params.gop_size_changed) { 711 change_enable |= CODA_PARAM_CHANGE_RC_GOP; 712 coda_write(dev, ctx->params.gop_size, 713 CODA_CMD_ENC_PARAM_RC_GOP); 714 ctx->gopcounter = ctx->params.gop_size - 1; 715 ctx->params.gop_size_changed = false; 716 } 717 if (ctx->params.h264_intra_qp_changed) { 718 coda_dbg(1, ctx, "parameter change: intra Qp %u\n", 719 ctx->params.h264_intra_qp); 720 721 if (ctx->params.bitrate) { 722 change_enable |= CODA_PARAM_CHANGE_RC_INTRA_QP; 723 coda_write(dev, ctx->params.h264_intra_qp, 724 CODA_CMD_ENC_PARAM_RC_INTRA_QP); 725 } 726 ctx->params.h264_intra_qp_changed = false; 727 } 728 if (ctx->params.bitrate_changed) { 729 coda_dbg(1, ctx, "parameter change: bitrate %u kbit/s\n", 730 ctx->params.bitrate); 731 change_enable |= CODA_PARAM_CHANGE_RC_BITRATE; 732 coda_write(dev, ctx->params.bitrate, 733 CODA_CMD_ENC_PARAM_RC_BITRATE); 734 ctx->params.bitrate_changed = false; 735 } 736 if (ctx->params.framerate_changed) { 737 coda_dbg(1, ctx, "parameter change: frame rate %u/%u Hz\n", 738 ctx->params.framerate & 0xffff, 739 (ctx->params.framerate >> 16) + 1); 740 change_enable |= CODA_PARAM_CHANGE_RC_FRAME_RATE; 741 coda_write(dev, ctx->params.framerate, 742 CODA_CMD_ENC_PARAM_RC_FRAME_RATE); 743 ctx->params.framerate_changed = false; 744 } 745 if (ctx->params.intra_refresh_changed) { 746 coda_dbg(1, ctx, "parameter change: intra refresh MBs %u\n", 747 ctx->params.intra_refresh); 748 change_enable |= CODA_PARAM_CHANGE_INTRA_MB_NUM; 749 coda_write(dev, ctx->params.intra_refresh, 750 CODA_CMD_ENC_PARAM_INTRA_MB_NUM); 751 ctx->params.intra_refresh_changed = false; 752 } 753 if (ctx->params.slice_mode_changed) { 754 change_enable |= CODA_PARAM_CHANGE_SLICE_MODE; 755 coda_write(dev, coda_slice_mode(ctx), 756 CODA_CMD_ENC_PARAM_SLICE_MODE); 757 ctx->params.slice_mode_changed = false; 758 } 759 760 if (!change_enable) 761 return 0; 762 763 coda_write(dev, change_enable, CODA_CMD_ENC_PARAM_CHANGE_ENABLE); 764 765 ret = coda_command_sync(ctx, CODA_COMMAND_RC_CHANGE_PARAMETER); 766 if (ret < 0) 767 return ret; 768 769 success = coda_read(dev, CODA_RET_ENC_PARAM_CHANGE_SUCCESS); 770 if (success != 1) 771 coda_dbg(1, ctx, "parameter change failed: %u\n", success); 772 773 return 0; 774 } 775 776 static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size) 777 { 778 phys_addr_t ret; 779 780 size = round_up(size, 1024); 781 if (size > iram->remaining) 782 return 0; 783 iram->remaining -= size; 784 785 ret = iram->next_paddr; 786 iram->next_paddr += size; 787 788 return ret; 789 } 790 791 static void coda_setup_iram(struct coda_ctx *ctx) 792 { 793 struct coda_iram_info *iram_info = &ctx->iram_info; 794 struct coda_dev *dev = ctx->dev; 795 int w64, w128; 796 int mb_width; 797 int dbk_bits; 798 int bit_bits; 799 int ip_bits; 800 int me_bits; 801 802 memset(iram_info, 0, sizeof(*iram_info)); 803 iram_info->next_paddr = dev->iram.paddr; 804 iram_info->remaining = dev->iram.size; 805 806 if (!dev->iram.vaddr) 807 return; 808 809 switch (dev->devtype->product) { 810 case CODA_HX4: 811 dbk_bits = CODA7_USE_HOST_DBK_ENABLE; 812 bit_bits = CODA7_USE_HOST_BIT_ENABLE; 813 ip_bits = CODA7_USE_HOST_IP_ENABLE; 814 me_bits = CODA7_USE_HOST_ME_ENABLE; 815 break; 816 case CODA_7541: 817 dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE; 818 bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE; 819 ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE; 820 me_bits = CODA7_USE_HOST_ME_ENABLE | CODA7_USE_ME_ENABLE; 821 break; 822 case CODA_960: 823 dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE; 824 bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE; 825 ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE; 826 me_bits = 0; 827 break; 828 default: /* CODA_DX6 */ 829 return; 830 } 831 832 if (ctx->inst_type == CODA_INST_ENCODER) { 833 struct coda_q_data *q_data_src; 834 835 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 836 mb_width = DIV_ROUND_UP(q_data_src->rect.width, 16); 837 w128 = mb_width * 128; 838 w64 = mb_width * 64; 839 840 /* Prioritize in case IRAM is too small for everything */ 841 if (dev->devtype->product == CODA_HX4 || 842 dev->devtype->product == CODA_7541) { 843 iram_info->search_ram_size = round_up(mb_width * 16 * 844 36 + 2048, 1024); 845 iram_info->search_ram_paddr = coda_iram_alloc(iram_info, 846 iram_info->search_ram_size); 847 if (!iram_info->search_ram_paddr) { 848 pr_err("IRAM is smaller than the search ram size\n"); 849 goto out; 850 } 851 iram_info->axi_sram_use |= me_bits; 852 } 853 854 /* Only H.264BP and H.263P3 are considered */ 855 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64); 856 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64); 857 if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use) 858 goto out; 859 iram_info->axi_sram_use |= dbk_bits; 860 861 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128); 862 if (!iram_info->buf_bit_use) 863 goto out; 864 iram_info->axi_sram_use |= bit_bits; 865 866 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128); 867 if (!iram_info->buf_ip_ac_dc_use) 868 goto out; 869 iram_info->axi_sram_use |= ip_bits; 870 871 /* OVL and BTP disabled for encoder */ 872 } else if (ctx->inst_type == CODA_INST_DECODER) { 873 struct coda_q_data *q_data_dst; 874 875 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 876 mb_width = DIV_ROUND_UP(q_data_dst->width, 16); 877 w128 = mb_width * 128; 878 879 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128); 880 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128); 881 if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use) 882 goto out; 883 iram_info->axi_sram_use |= dbk_bits; 884 885 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128); 886 if (!iram_info->buf_bit_use) 887 goto out; 888 iram_info->axi_sram_use |= bit_bits; 889 890 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128); 891 if (!iram_info->buf_ip_ac_dc_use) 892 goto out; 893 iram_info->axi_sram_use |= ip_bits; 894 895 /* OVL and BTP unused as there is no VC1 support yet */ 896 } 897 898 out: 899 if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE)) 900 coda_dbg(1, ctx, "IRAM smaller than needed\n"); 901 902 if (dev->devtype->product == CODA_HX4 || 903 dev->devtype->product == CODA_7541) { 904 /* TODO - Enabling these causes picture errors on CODA7541 */ 905 if (ctx->inst_type == CODA_INST_DECODER) { 906 /* fw 1.4.50 */ 907 iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE | 908 CODA7_USE_IP_ENABLE); 909 } else { 910 /* fw 13.4.29 */ 911 iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE | 912 CODA7_USE_HOST_DBK_ENABLE | 913 CODA7_USE_IP_ENABLE | 914 CODA7_USE_DBK_ENABLE); 915 } 916 } 917 } 918 919 static u32 coda_supported_firmwares[] = { 920 CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5), 921 CODA_FIRMWARE_VERNUM(CODA_HX4, 1, 4, 50), 922 CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50), 923 CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5), 924 CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 9), 925 CODA_FIRMWARE_VERNUM(CODA_960, 2, 3, 10), 926 CODA_FIRMWARE_VERNUM(CODA_960, 3, 1, 1), 927 }; 928 929 static bool coda_firmware_supported(u32 vernum) 930 { 931 int i; 932 933 for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++) 934 if (vernum == coda_supported_firmwares[i]) 935 return true; 936 return false; 937 } 938 939 int coda_check_firmware(struct coda_dev *dev) 940 { 941 u16 product, major, minor, release; 942 u32 data; 943 int ret; 944 945 ret = clk_prepare_enable(dev->clk_per); 946 if (ret) 947 goto err_clk_per; 948 949 ret = clk_prepare_enable(dev->clk_ahb); 950 if (ret) 951 goto err_clk_ahb; 952 953 coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM); 954 coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY); 955 coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX); 956 coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD); 957 coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND); 958 if (coda_wait_timeout(dev)) { 959 v4l2_err(&dev->v4l2_dev, "firmware get command error\n"); 960 ret = -EIO; 961 goto err_run_cmd; 962 } 963 964 if (dev->devtype->product == CODA_960) { 965 data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV); 966 v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n", 967 data); 968 } 969 970 /* Check we are compatible with the loaded firmware */ 971 data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM); 972 product = CODA_FIRMWARE_PRODUCT(data); 973 major = CODA_FIRMWARE_MAJOR(data); 974 minor = CODA_FIRMWARE_MINOR(data); 975 release = CODA_FIRMWARE_RELEASE(data); 976 977 clk_disable_unprepare(dev->clk_per); 978 clk_disable_unprepare(dev->clk_ahb); 979 980 if (product != dev->devtype->product) { 981 v4l2_err(&dev->v4l2_dev, 982 "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n", 983 coda_product_name(dev->devtype->product), 984 coda_product_name(product), major, minor, release); 985 return -EINVAL; 986 } 987 988 v4l2_info(&dev->v4l2_dev, "Initialized %s.\n", 989 coda_product_name(product)); 990 991 if (coda_firmware_supported(data)) { 992 v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n", 993 major, minor, release); 994 } else { 995 v4l2_warn(&dev->v4l2_dev, 996 "Unsupported firmware version: %u.%u.%u\n", 997 major, minor, release); 998 } 999 1000 return 0; 1001 1002 err_run_cmd: 1003 clk_disable_unprepare(dev->clk_ahb); 1004 err_clk_ahb: 1005 clk_disable_unprepare(dev->clk_per); 1006 err_clk_per: 1007 return ret; 1008 } 1009 1010 static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc) 1011 { 1012 u32 cache_size, cache_config; 1013 1014 if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) { 1015 /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */ 1016 cache_size = 0x20262024; 1017 cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET; 1018 } else { 1019 /* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */ 1020 cache_size = 0x02440243; 1021 cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET; 1022 } 1023 coda_write(ctx->dev, cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE); 1024 if (fourcc == V4L2_PIX_FMT_NV12 || fourcc == V4L2_PIX_FMT_YUYV) { 1025 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET | 1026 16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET | 1027 0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET; 1028 } else { 1029 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET | 1030 8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET | 1031 8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET; 1032 } 1033 coda_write(ctx->dev, cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG); 1034 } 1035 1036 /* 1037 * Encoder context operations 1038 */ 1039 1040 static int coda_encoder_reqbufs(struct coda_ctx *ctx, 1041 struct v4l2_requestbuffers *rb) 1042 { 1043 struct coda_q_data *q_data_src; 1044 int ret; 1045 1046 if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) 1047 return 0; 1048 1049 if (rb->count) { 1050 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1051 ret = coda_alloc_context_buffers(ctx, q_data_src); 1052 if (ret < 0) 1053 return ret; 1054 } else { 1055 coda_free_context_buffers(ctx); 1056 } 1057 1058 return 0; 1059 } 1060 1061 static int coda_start_encoding(struct coda_ctx *ctx) 1062 { 1063 struct coda_dev *dev = ctx->dev; 1064 struct v4l2_device *v4l2_dev = &dev->v4l2_dev; 1065 struct coda_q_data *q_data_src, *q_data_dst; 1066 u32 bitstream_buf, bitstream_size; 1067 struct vb2_v4l2_buffer *buf; 1068 int gamma, ret, value; 1069 u32 dst_fourcc; 1070 int num_fb; 1071 u32 stride; 1072 1073 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1074 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 1075 dst_fourcc = q_data_dst->fourcc; 1076 1077 buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1078 bitstream_buf = vb2_dma_contig_plane_dma_addr(&buf->vb2_buf, 0); 1079 bitstream_size = q_data_dst->sizeimage; 1080 1081 if (!coda_is_initialized(dev)) { 1082 v4l2_err(v4l2_dev, "coda is not initialized.\n"); 1083 return -EFAULT; 1084 } 1085 1086 if (dst_fourcc == V4L2_PIX_FMT_JPEG) { 1087 if (!ctx->params.jpeg_qmat_tab[0]) { 1088 ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL); 1089 if (!ctx->params.jpeg_qmat_tab[0]) 1090 return -ENOMEM; 1091 } 1092 if (!ctx->params.jpeg_qmat_tab[1]) { 1093 ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL); 1094 if (!ctx->params.jpeg_qmat_tab[1]) 1095 return -ENOMEM; 1096 } 1097 coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality); 1098 } 1099 1100 mutex_lock(&dev->coda_mutex); 1101 1102 coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR); 1103 coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx)); 1104 coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 1105 switch (dev->devtype->product) { 1106 case CODA_DX6: 1107 coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN | 1108 CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL); 1109 break; 1110 case CODA_960: 1111 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN); 1112 fallthrough; 1113 case CODA_HX4: 1114 case CODA_7541: 1115 coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN | 1116 CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL); 1117 break; 1118 } 1119 1120 ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | 1121 CODA9_FRAME_TILED2LINEAR); 1122 if (q_data_src->fourcc == V4L2_PIX_FMT_NV12) 1123 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; 1124 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 1125 ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR; 1126 coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL); 1127 1128 if (dev->devtype->product == CODA_DX6) { 1129 /* Configure the coda */ 1130 coda_write(dev, dev->iram.paddr, 1131 CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR); 1132 } 1133 1134 /* Could set rotation here if needed */ 1135 value = 0; 1136 switch (dev->devtype->product) { 1137 case CODA_DX6: 1138 value = (q_data_src->rect.width & CODADX6_PICWIDTH_MASK) 1139 << CODADX6_PICWIDTH_OFFSET; 1140 value |= (q_data_src->rect.height & CODADX6_PICHEIGHT_MASK) 1141 << CODA_PICHEIGHT_OFFSET; 1142 break; 1143 case CODA_HX4: 1144 case CODA_7541: 1145 if (dst_fourcc == V4L2_PIX_FMT_H264) { 1146 value = (round_up(q_data_src->rect.width, 16) & 1147 CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET; 1148 value |= (round_up(q_data_src->rect.height, 16) & 1149 CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET; 1150 break; 1151 } 1152 fallthrough; 1153 case CODA_960: 1154 value = (q_data_src->rect.width & CODA7_PICWIDTH_MASK) 1155 << CODA7_PICWIDTH_OFFSET; 1156 value |= (q_data_src->rect.height & CODA7_PICHEIGHT_MASK) 1157 << CODA_PICHEIGHT_OFFSET; 1158 } 1159 coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE); 1160 if (dst_fourcc == V4L2_PIX_FMT_JPEG) 1161 ctx->params.framerate = 0; 1162 coda_write(dev, ctx->params.framerate, 1163 CODA_CMD_ENC_SEQ_SRC_F_RATE); 1164 1165 ctx->params.codec_mode = ctx->codec->mode; 1166 switch (dst_fourcc) { 1167 case V4L2_PIX_FMT_MPEG4: 1168 if (dev->devtype->product == CODA_960) 1169 coda_write(dev, CODA9_STD_MPEG4, 1170 CODA_CMD_ENC_SEQ_COD_STD); 1171 else 1172 coda_write(dev, CODA_STD_MPEG4, 1173 CODA_CMD_ENC_SEQ_COD_STD); 1174 coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA); 1175 break; 1176 case V4L2_PIX_FMT_H264: 1177 if (dev->devtype->product == CODA_960) 1178 coda_write(dev, CODA9_STD_H264, 1179 CODA_CMD_ENC_SEQ_COD_STD); 1180 else 1181 coda_write(dev, CODA_STD_H264, 1182 CODA_CMD_ENC_SEQ_COD_STD); 1183 value = ((ctx->params.h264_disable_deblocking_filter_idc & 1184 CODA_264PARAM_DISABLEDEBLK_MASK) << 1185 CODA_264PARAM_DISABLEDEBLK_OFFSET) | 1186 ((ctx->params.h264_slice_alpha_c0_offset_div2 & 1187 CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) << 1188 CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) | 1189 ((ctx->params.h264_slice_beta_offset_div2 & 1190 CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) << 1191 CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET) | 1192 (ctx->params.h264_constrained_intra_pred_flag << 1193 CODA_264PARAM_CONSTRAINEDINTRAPREDFLAG_OFFSET) | 1194 (ctx->params.h264_chroma_qp_index_offset & 1195 CODA_264PARAM_CHROMAQPOFFSET_MASK); 1196 coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA); 1197 break; 1198 case V4L2_PIX_FMT_JPEG: 1199 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA); 1200 coda_write(dev, ctx->params.jpeg_restart_interval, 1201 CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL); 1202 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN); 1203 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE); 1204 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET); 1205 1206 coda_jpeg_write_tables(ctx); 1207 break; 1208 default: 1209 v4l2_err(v4l2_dev, 1210 "dst format (0x%08x) invalid.\n", dst_fourcc); 1211 ret = -EINVAL; 1212 goto out; 1213 } 1214 1215 /* 1216 * slice mode and GOP size registers are used for thumb size/offset 1217 * in JPEG mode 1218 */ 1219 if (dst_fourcc != V4L2_PIX_FMT_JPEG) { 1220 value = coda_slice_mode(ctx); 1221 coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE); 1222 value = ctx->params.gop_size; 1223 coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE); 1224 } 1225 1226 if (ctx->params.bitrate && (ctx->params.frame_rc_enable || 1227 ctx->params.mb_rc_enable)) { 1228 ctx->params.bitrate_changed = false; 1229 ctx->params.h264_intra_qp_changed = false; 1230 1231 /* Rate control enabled */ 1232 value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK) 1233 << CODA_RATECONTROL_BITRATE_OFFSET; 1234 value |= 1 & CODA_RATECONTROL_ENABLE_MASK; 1235 value |= (ctx->params.vbv_delay & 1236 CODA_RATECONTROL_INITIALDELAY_MASK) 1237 << CODA_RATECONTROL_INITIALDELAY_OFFSET; 1238 if (dev->devtype->product == CODA_960) 1239 value |= BIT(31); /* disable autoskip */ 1240 } else { 1241 value = 0; 1242 } 1243 coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA); 1244 1245 coda_write(dev, ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE); 1246 coda_write(dev, ctx->params.intra_refresh, 1247 CODA_CMD_ENC_SEQ_INTRA_REFRESH); 1248 1249 coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START); 1250 coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE); 1251 1252 1253 value = 0; 1254 if (dev->devtype->product == CODA_960) 1255 gamma = CODA9_DEFAULT_GAMMA; 1256 else 1257 gamma = CODA_DEFAULT_GAMMA; 1258 if (gamma > 0) { 1259 coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET, 1260 CODA_CMD_ENC_SEQ_RC_GAMMA); 1261 } 1262 1263 if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) { 1264 coda_write(dev, 1265 ctx->params.h264_min_qp << CODA_QPMIN_OFFSET | 1266 ctx->params.h264_max_qp << CODA_QPMAX_OFFSET, 1267 CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX); 1268 } 1269 if (dev->devtype->product == CODA_960) { 1270 if (ctx->params.h264_max_qp) 1271 value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET; 1272 if (CODA_DEFAULT_GAMMA > 0) 1273 value |= 1 << CODA9_OPTION_GAMMA_OFFSET; 1274 } else { 1275 if (CODA_DEFAULT_GAMMA > 0) { 1276 if (dev->devtype->product == CODA_DX6) 1277 value |= 1 << CODADX6_OPTION_GAMMA_OFFSET; 1278 else 1279 value |= 1 << CODA7_OPTION_GAMMA_OFFSET; 1280 } 1281 if (ctx->params.h264_min_qp) 1282 value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET; 1283 if (ctx->params.h264_max_qp) 1284 value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET; 1285 } 1286 coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION); 1287 1288 if (ctx->params.frame_rc_enable && !ctx->params.mb_rc_enable) 1289 value = 1; 1290 else 1291 value = 0; 1292 coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE); 1293 1294 coda_setup_iram(ctx); 1295 1296 if (dst_fourcc == V4L2_PIX_FMT_H264) { 1297 switch (dev->devtype->product) { 1298 case CODA_DX6: 1299 value = FMO_SLICE_SAVE_BUF_SIZE << 7; 1300 coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO); 1301 break; 1302 case CODA_HX4: 1303 case CODA_7541: 1304 coda_write(dev, ctx->iram_info.search_ram_paddr, 1305 CODA7_CMD_ENC_SEQ_SEARCH_BASE); 1306 coda_write(dev, ctx->iram_info.search_ram_size, 1307 CODA7_CMD_ENC_SEQ_SEARCH_SIZE); 1308 break; 1309 case CODA_960: 1310 coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION); 1311 coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT); 1312 } 1313 } 1314 1315 ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT); 1316 if (ret < 0) { 1317 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n"); 1318 goto out; 1319 } 1320 1321 if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) { 1322 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n"); 1323 ret = -EFAULT; 1324 goto out; 1325 } 1326 ctx->initialized = 1; 1327 1328 if (dst_fourcc != V4L2_PIX_FMT_JPEG) { 1329 if (dev->devtype->product == CODA_960) 1330 ctx->num_internal_frames = 4; 1331 else 1332 ctx->num_internal_frames = 2; 1333 ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc); 1334 if (ret < 0) { 1335 v4l2_err(v4l2_dev, "failed to allocate framebuffers\n"); 1336 goto out; 1337 } 1338 num_fb = 2; 1339 stride = q_data_src->bytesperline; 1340 } else { 1341 ctx->num_internal_frames = 0; 1342 num_fb = 0; 1343 stride = 0; 1344 } 1345 coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM); 1346 coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE); 1347 1348 if (dev->devtype->product == CODA_HX4 || 1349 dev->devtype->product == CODA_7541) { 1350 coda_write(dev, q_data_src->bytesperline, 1351 CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE); 1352 } 1353 if (dev->devtype->product != CODA_DX6) { 1354 coda_write(dev, ctx->iram_info.buf_bit_use, 1355 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR); 1356 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use, 1357 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR); 1358 coda_write(dev, ctx->iram_info.buf_dbk_y_use, 1359 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR); 1360 coda_write(dev, ctx->iram_info.buf_dbk_c_use, 1361 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR); 1362 coda_write(dev, ctx->iram_info.buf_ovl_use, 1363 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR); 1364 if (dev->devtype->product == CODA_960) { 1365 coda_write(dev, ctx->iram_info.buf_btp_use, 1366 CODA9_CMD_SET_FRAME_AXI_BTP_ADDR); 1367 1368 coda9_set_frame_cache(ctx, q_data_src->fourcc); 1369 1370 /* FIXME */ 1371 coda_write(dev, ctx->internal_frames[2].buf.paddr, 1372 CODA9_CMD_SET_FRAME_SUBSAMP_A); 1373 coda_write(dev, ctx->internal_frames[3].buf.paddr, 1374 CODA9_CMD_SET_FRAME_SUBSAMP_B); 1375 } 1376 } 1377 1378 ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF); 1379 if (ret < 0) { 1380 v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n"); 1381 goto out; 1382 } 1383 1384 coda_dbg(1, ctx, "start encoding %dx%d %4.4s->%4.4s @ %d/%d Hz\n", 1385 q_data_src->rect.width, q_data_src->rect.height, 1386 (char *)&ctx->codec->src_fourcc, (char *)&dst_fourcc, 1387 ctx->params.framerate & 0xffff, 1388 (ctx->params.framerate >> 16) + 1); 1389 1390 /* Save stream headers */ 1391 buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1392 switch (dst_fourcc) { 1393 case V4L2_PIX_FMT_H264: 1394 /* 1395 * Get SPS in the first frame and copy it to an 1396 * intermediate buffer. 1397 */ 1398 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS, 1399 &ctx->vpu_header[0][0], 1400 &ctx->vpu_header_size[0]); 1401 if (ret < 0) 1402 goto out; 1403 1404 /* 1405 * If visible width or height are not aligned to macroblock 1406 * size, the crop_right and crop_bottom SPS fields must be set 1407 * to the difference between visible and coded size. This is 1408 * only supported by CODA960 firmware. All others do not allow 1409 * writing frame cropping parameters, so we have to manually 1410 * fix up the SPS RBSP (Sequence Parameter Set Raw Byte 1411 * Sequence Payload) ourselves. 1412 */ 1413 if (ctx->dev->devtype->product != CODA_960 && 1414 ((q_data_src->rect.width % 16) || 1415 (q_data_src->rect.height % 16))) { 1416 ret = coda_h264_sps_fixup(ctx, q_data_src->rect.width, 1417 q_data_src->rect.height, 1418 &ctx->vpu_header[0][0], 1419 &ctx->vpu_header_size[0], 1420 sizeof(ctx->vpu_header[0])); 1421 if (ret < 0) 1422 goto out; 1423 } 1424 1425 /* 1426 * Get PPS in the first frame and copy it to an 1427 * intermediate buffer. 1428 */ 1429 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS, 1430 &ctx->vpu_header[1][0], 1431 &ctx->vpu_header_size[1]); 1432 if (ret < 0) 1433 goto out; 1434 1435 /* 1436 * Length of H.264 headers is variable and thus it might not be 1437 * aligned for the coda to append the encoded frame. In that is 1438 * the case a filler NAL must be added to header 2. 1439 */ 1440 ctx->vpu_header_size[2] = coda_h264_padding( 1441 (ctx->vpu_header_size[0] + 1442 ctx->vpu_header_size[1]), 1443 ctx->vpu_header[2]); 1444 break; 1445 case V4L2_PIX_FMT_MPEG4: 1446 /* 1447 * Get VOS in the first frame and copy it to an 1448 * intermediate buffer 1449 */ 1450 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS, 1451 &ctx->vpu_header[0][0], 1452 &ctx->vpu_header_size[0]); 1453 if (ret < 0) 1454 goto out; 1455 1456 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS, 1457 &ctx->vpu_header[1][0], 1458 &ctx->vpu_header_size[1]); 1459 if (ret < 0) 1460 goto out; 1461 1462 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL, 1463 &ctx->vpu_header[2][0], 1464 &ctx->vpu_header_size[2]); 1465 if (ret < 0) 1466 goto out; 1467 break; 1468 default: 1469 /* No more formats need to save headers at the moment */ 1470 break; 1471 } 1472 1473 out: 1474 mutex_unlock(&dev->coda_mutex); 1475 return ret; 1476 } 1477 1478 static int coda_prepare_encode(struct coda_ctx *ctx) 1479 { 1480 struct coda_q_data *q_data_src, *q_data_dst; 1481 struct vb2_v4l2_buffer *src_buf, *dst_buf; 1482 struct coda_dev *dev = ctx->dev; 1483 int force_ipicture; 1484 int quant_param = 0; 1485 u32 pic_stream_buffer_addr, pic_stream_buffer_size; 1486 u32 rot_mode = 0; 1487 u32 dst_fourcc; 1488 u32 reg; 1489 int ret; 1490 1491 ret = coda_enc_param_change(ctx); 1492 if (ret < 0) { 1493 v4l2_warn(&ctx->dev->v4l2_dev, "parameter change failed: %d\n", 1494 ret); 1495 } 1496 1497 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); 1498 dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1499 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1500 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 1501 dst_fourcc = q_data_dst->fourcc; 1502 1503 src_buf->sequence = ctx->osequence; 1504 dst_buf->sequence = ctx->osequence; 1505 ctx->osequence++; 1506 1507 force_ipicture = ctx->params.force_ipicture; 1508 if (force_ipicture) 1509 ctx->params.force_ipicture = false; 1510 else if (ctx->params.gop_size != 0 && 1511 (src_buf->sequence % ctx->params.gop_size) == 0) 1512 force_ipicture = 1; 1513 1514 /* 1515 * Workaround coda firmware BUG that only marks the first 1516 * frame as IDR. This is a problem for some decoders that can't 1517 * recover when a frame is lost. 1518 */ 1519 if (!force_ipicture) { 1520 src_buf->flags |= V4L2_BUF_FLAG_PFRAME; 1521 src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME; 1522 } else { 1523 src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME; 1524 src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME; 1525 } 1526 1527 if (dev->devtype->product == CODA_960) 1528 coda_set_gdi_regs(ctx); 1529 1530 /* 1531 * Copy headers in front of the first frame and forced I frames for 1532 * H.264 only. In MPEG4 they are already copied by the CODA. 1533 */ 1534 if (src_buf->sequence == 0 || force_ipicture) { 1535 pic_stream_buffer_addr = 1536 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0) + 1537 ctx->vpu_header_size[0] + 1538 ctx->vpu_header_size[1] + 1539 ctx->vpu_header_size[2]; 1540 pic_stream_buffer_size = q_data_dst->sizeimage - 1541 ctx->vpu_header_size[0] - 1542 ctx->vpu_header_size[1] - 1543 ctx->vpu_header_size[2]; 1544 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0), 1545 &ctx->vpu_header[0][0], ctx->vpu_header_size[0]); 1546 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0) 1547 + ctx->vpu_header_size[0], &ctx->vpu_header[1][0], 1548 ctx->vpu_header_size[1]); 1549 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0) 1550 + ctx->vpu_header_size[0] + ctx->vpu_header_size[1], 1551 &ctx->vpu_header[2][0], ctx->vpu_header_size[2]); 1552 } else { 1553 pic_stream_buffer_addr = 1554 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); 1555 pic_stream_buffer_size = q_data_dst->sizeimage; 1556 } 1557 1558 if (force_ipicture) { 1559 switch (dst_fourcc) { 1560 case V4L2_PIX_FMT_H264: 1561 quant_param = ctx->params.h264_intra_qp; 1562 break; 1563 case V4L2_PIX_FMT_MPEG4: 1564 quant_param = ctx->params.mpeg4_intra_qp; 1565 break; 1566 case V4L2_PIX_FMT_JPEG: 1567 quant_param = 30; 1568 break; 1569 default: 1570 v4l2_warn(&ctx->dev->v4l2_dev, 1571 "cannot set intra qp, fmt not supported\n"); 1572 break; 1573 } 1574 } else { 1575 switch (dst_fourcc) { 1576 case V4L2_PIX_FMT_H264: 1577 quant_param = ctx->params.h264_inter_qp; 1578 break; 1579 case V4L2_PIX_FMT_MPEG4: 1580 quant_param = ctx->params.mpeg4_inter_qp; 1581 break; 1582 default: 1583 v4l2_warn(&ctx->dev->v4l2_dev, 1584 "cannot set inter qp, fmt not supported\n"); 1585 break; 1586 } 1587 } 1588 1589 /* submit */ 1590 if (ctx->params.rot_mode) 1591 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode; 1592 coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE); 1593 coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS); 1594 1595 if (dev->devtype->product == CODA_960) { 1596 coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX); 1597 coda_write(dev, q_data_src->bytesperline, 1598 CODA9_CMD_ENC_PIC_SRC_STRIDE); 1599 coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC); 1600 1601 reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y; 1602 } else { 1603 reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y; 1604 } 1605 coda_write_base(ctx, q_data_src, src_buf, reg); 1606 1607 coda_write(dev, force_ipicture << 1 & 0x2, 1608 CODA_CMD_ENC_PIC_OPTION); 1609 1610 coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START); 1611 coda_write(dev, pic_stream_buffer_size / 1024, 1612 CODA_CMD_ENC_PIC_BB_SIZE); 1613 1614 if (!ctx->streamon_out) { 1615 /* After streamoff on the output side, set stream end flag */ 1616 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG; 1617 coda_write(dev, ctx->bit_stream_param, 1618 CODA_REG_BIT_BIT_STREAM_PARAM); 1619 } 1620 1621 if (dev->devtype->product != CODA_DX6) 1622 coda_write(dev, ctx->iram_info.axi_sram_use, 1623 CODA7_REG_BIT_AXI_SRAM_USE); 1624 1625 trace_coda_enc_pic_run(ctx, src_buf); 1626 1627 coda_command_async(ctx, CODA_COMMAND_PIC_RUN); 1628 1629 return 0; 1630 } 1631 1632 static char coda_frame_type_char(u32 flags) 1633 { 1634 return (flags & V4L2_BUF_FLAG_KEYFRAME) ? 'I' : 1635 (flags & V4L2_BUF_FLAG_PFRAME) ? 'P' : 1636 (flags & V4L2_BUF_FLAG_BFRAME) ? 'B' : '?'; 1637 } 1638 1639 static void coda_finish_encode(struct coda_ctx *ctx) 1640 { 1641 struct vb2_v4l2_buffer *src_buf, *dst_buf; 1642 struct coda_dev *dev = ctx->dev; 1643 u32 wr_ptr, start_ptr; 1644 1645 if (ctx->aborting) 1646 return; 1647 1648 /* 1649 * Lock to make sure that an encoder stop command running in parallel 1650 * will either already have marked src_buf as last, or it will wake up 1651 * the capture queue after the buffers are returned. 1652 */ 1653 mutex_lock(&ctx->wakeup_mutex); 1654 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); 1655 dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 1656 1657 trace_coda_enc_pic_done(ctx, dst_buf); 1658 1659 /* Get results from the coda */ 1660 start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START); 1661 wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)); 1662 1663 /* Calculate bytesused field */ 1664 if (dst_buf->sequence == 0 || 1665 src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) { 1666 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr + 1667 ctx->vpu_header_size[0] + 1668 ctx->vpu_header_size[1] + 1669 ctx->vpu_header_size[2]); 1670 } else { 1671 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr); 1672 } 1673 1674 coda_dbg(1, ctx, "frame size = %u\n", wr_ptr - start_ptr); 1675 1676 coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM); 1677 coda_read(dev, CODA_RET_ENC_PIC_FLAG); 1678 1679 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME | 1680 V4L2_BUF_FLAG_PFRAME | 1681 V4L2_BUF_FLAG_LAST); 1682 if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) 1683 dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME; 1684 else 1685 dst_buf->flags |= V4L2_BUF_FLAG_PFRAME; 1686 dst_buf->flags |= src_buf->flags & V4L2_BUF_FLAG_LAST; 1687 1688 v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, false); 1689 1690 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); 1691 1692 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); 1693 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE); 1694 mutex_unlock(&ctx->wakeup_mutex); 1695 1696 ctx->gopcounter--; 1697 if (ctx->gopcounter < 0) 1698 ctx->gopcounter = ctx->params.gop_size - 1; 1699 1700 coda_dbg(1, ctx, "job finished: encoded %c frame (%d)%s\n", 1701 coda_frame_type_char(dst_buf->flags), dst_buf->sequence, 1702 (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? " (last)" : ""); 1703 } 1704 1705 static void coda_seq_end_work(struct work_struct *work) 1706 { 1707 struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work); 1708 struct coda_dev *dev = ctx->dev; 1709 1710 mutex_lock(&ctx->buffer_mutex); 1711 mutex_lock(&dev->coda_mutex); 1712 1713 if (ctx->initialized == 0) 1714 goto out; 1715 1716 coda_dbg(1, ctx, "%s: sent command 'SEQ_END' to coda\n", __func__); 1717 if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) { 1718 v4l2_err(&dev->v4l2_dev, 1719 "CODA_COMMAND_SEQ_END failed\n"); 1720 } 1721 1722 /* 1723 * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing 1724 * from the output stream after the h.264 decoder has run. Resetting the 1725 * hardware after the decoder has finished seems to help. 1726 */ 1727 if (dev->devtype->product == CODA_960) 1728 coda_hw_reset(ctx); 1729 1730 kfifo_init(&ctx->bitstream_fifo, 1731 ctx->bitstream.vaddr, ctx->bitstream.size); 1732 1733 coda_free_framebuffers(ctx); 1734 1735 ctx->initialized = 0; 1736 1737 out: 1738 mutex_unlock(&dev->coda_mutex); 1739 mutex_unlock(&ctx->buffer_mutex); 1740 } 1741 1742 static void coda_bit_release(struct coda_ctx *ctx) 1743 { 1744 mutex_lock(&ctx->buffer_mutex); 1745 coda_free_framebuffers(ctx); 1746 coda_free_context_buffers(ctx); 1747 coda_free_bitstream_buffer(ctx); 1748 mutex_unlock(&ctx->buffer_mutex); 1749 } 1750 1751 const struct coda_context_ops coda_bit_encode_ops = { 1752 .queue_init = coda_encoder_queue_init, 1753 .reqbufs = coda_encoder_reqbufs, 1754 .start_streaming = coda_start_encoding, 1755 .prepare_run = coda_prepare_encode, 1756 .finish_run = coda_finish_encode, 1757 .seq_end_work = coda_seq_end_work, 1758 .release = coda_bit_release, 1759 }; 1760 1761 /* 1762 * Decoder context operations 1763 */ 1764 1765 static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx, 1766 struct coda_q_data *q_data) 1767 { 1768 if (ctx->bitstream.vaddr) 1769 return 0; 1770 1771 ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2); 1772 ctx->bitstream.vaddr = dma_alloc_wc(ctx->dev->dev, ctx->bitstream.size, 1773 &ctx->bitstream.paddr, GFP_KERNEL); 1774 if (!ctx->bitstream.vaddr) { 1775 v4l2_err(&ctx->dev->v4l2_dev, 1776 "failed to allocate bitstream ringbuffer"); 1777 return -ENOMEM; 1778 } 1779 kfifo_init(&ctx->bitstream_fifo, 1780 ctx->bitstream.vaddr, ctx->bitstream.size); 1781 1782 return 0; 1783 } 1784 1785 static void coda_free_bitstream_buffer(struct coda_ctx *ctx) 1786 { 1787 if (ctx->bitstream.vaddr == NULL) 1788 return; 1789 1790 dma_free_wc(ctx->dev->dev, ctx->bitstream.size, ctx->bitstream.vaddr, 1791 ctx->bitstream.paddr); 1792 ctx->bitstream.vaddr = NULL; 1793 kfifo_init(&ctx->bitstream_fifo, NULL, 0); 1794 } 1795 1796 static int coda_decoder_reqbufs(struct coda_ctx *ctx, 1797 struct v4l2_requestbuffers *rb) 1798 { 1799 struct coda_q_data *q_data_src; 1800 int ret; 1801 1802 if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) 1803 return 0; 1804 1805 if (rb->count) { 1806 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1807 ret = coda_alloc_context_buffers(ctx, q_data_src); 1808 if (ret < 0) 1809 return ret; 1810 ret = coda_alloc_bitstream_buffer(ctx, q_data_src); 1811 if (ret < 0) { 1812 coda_free_context_buffers(ctx); 1813 return ret; 1814 } 1815 } else { 1816 coda_free_bitstream_buffer(ctx); 1817 coda_free_context_buffers(ctx); 1818 } 1819 1820 return 0; 1821 } 1822 1823 static bool coda_reorder_enable(struct coda_ctx *ctx) 1824 { 1825 struct coda_dev *dev = ctx->dev; 1826 int profile; 1827 1828 if (dev->devtype->product != CODA_HX4 && 1829 dev->devtype->product != CODA_7541 && 1830 dev->devtype->product != CODA_960) 1831 return false; 1832 1833 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) 1834 return false; 1835 1836 if (ctx->codec->src_fourcc != V4L2_PIX_FMT_H264) 1837 return true; 1838 1839 profile = coda_h264_profile(ctx->params.h264_profile_idc); 1840 if (profile < 0) 1841 v4l2_warn(&dev->v4l2_dev, "Unknown H264 Profile: %u\n", 1842 ctx->params.h264_profile_idc); 1843 1844 /* Baseline profile does not support reordering */ 1845 return profile > V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE; 1846 } 1847 1848 static void coda_decoder_drop_used_metas(struct coda_ctx *ctx) 1849 { 1850 struct coda_buffer_meta *meta, *tmp; 1851 1852 /* 1853 * All metas that end at or before the RD pointer (fifo out), 1854 * are now consumed by the VPU and should be released. 1855 */ 1856 spin_lock(&ctx->buffer_meta_lock); 1857 list_for_each_entry_safe(meta, tmp, &ctx->buffer_meta_list, list) { 1858 if (ctx->bitstream_fifo.kfifo.out >= meta->end) { 1859 coda_dbg(2, ctx, "releasing meta: seq=%d start=%d end=%d\n", 1860 meta->sequence, meta->start, meta->end); 1861 1862 list_del(&meta->list); 1863 ctx->num_metas--; 1864 ctx->first_frame_sequence++; 1865 kfree(meta); 1866 } 1867 } 1868 spin_unlock(&ctx->buffer_meta_lock); 1869 } 1870 1871 static int __coda_decoder_seq_init(struct coda_ctx *ctx) 1872 { 1873 struct coda_q_data *q_data_src, *q_data_dst; 1874 u32 bitstream_buf, bitstream_size; 1875 struct coda_dev *dev = ctx->dev; 1876 int width, height; 1877 u32 src_fourcc, dst_fourcc; 1878 u32 val; 1879 int ret; 1880 1881 lockdep_assert_held(&dev->coda_mutex); 1882 1883 coda_dbg(1, ctx, "Video Data Order Adapter: %s\n", 1884 ctx->use_vdoa ? "Enabled" : "Disabled"); 1885 1886 /* Start decoding */ 1887 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 1888 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 1889 bitstream_buf = ctx->bitstream.paddr; 1890 bitstream_size = ctx->bitstream.size; 1891 src_fourcc = q_data_src->fourcc; 1892 dst_fourcc = q_data_dst->fourcc; 1893 1894 /* Update coda bitstream read and write pointers from kfifo */ 1895 coda_kfifo_sync_to_device_full(ctx); 1896 1897 ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | 1898 CODA9_FRAME_TILED2LINEAR); 1899 if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV) 1900 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; 1901 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 1902 ctx->frame_mem_ctrl |= (0x3 << 9) | 1903 ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR); 1904 coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL); 1905 1906 ctx->display_idx = -1; 1907 ctx->frm_dis_flg = 0; 1908 coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 1909 1910 coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START); 1911 coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE); 1912 val = 0; 1913 if (coda_reorder_enable(ctx)) 1914 val |= CODA_REORDER_ENABLE; 1915 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) 1916 val |= CODA_NO_INT_ENABLE; 1917 coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION); 1918 1919 ctx->params.codec_mode = ctx->codec->mode; 1920 if (dev->devtype->product == CODA_960 && 1921 src_fourcc == V4L2_PIX_FMT_MPEG4) 1922 ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4; 1923 else 1924 ctx->params.codec_mode_aux = 0; 1925 if (src_fourcc == V4L2_PIX_FMT_MPEG4) { 1926 coda_write(dev, CODA_MP4_CLASS_MPEG4, 1927 CODA_CMD_DEC_SEQ_MP4_ASP_CLASS); 1928 } 1929 if (src_fourcc == V4L2_PIX_FMT_H264) { 1930 if (dev->devtype->product == CODA_HX4 || 1931 dev->devtype->product == CODA_7541) { 1932 coda_write(dev, ctx->psbuf.paddr, 1933 CODA_CMD_DEC_SEQ_PS_BB_START); 1934 coda_write(dev, (CODA7_PS_BUF_SIZE / 1024), 1935 CODA_CMD_DEC_SEQ_PS_BB_SIZE); 1936 } 1937 if (dev->devtype->product == CODA_960) { 1938 coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN); 1939 coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE); 1940 } 1941 } 1942 if (src_fourcc == V4L2_PIX_FMT_JPEG) 1943 coda_write(dev, 0, CODA_CMD_DEC_SEQ_JPG_THUMB_EN); 1944 if (dev->devtype->product != CODA_960) 1945 coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE); 1946 1947 ctx->bit_stream_param = CODA_BIT_DEC_SEQ_INIT_ESCAPE; 1948 ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT); 1949 ctx->bit_stream_param = 0; 1950 if (ret) { 1951 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n"); 1952 return ret; 1953 } 1954 ctx->sequence_offset = ~0U; 1955 ctx->initialized = 1; 1956 ctx->first_frame_sequence = 0; 1957 1958 /* Update kfifo out pointer from coda bitstream read pointer */ 1959 coda_kfifo_sync_from_device(ctx); 1960 1961 /* 1962 * After updating the read pointer, we need to check if 1963 * any metas are consumed and should be released. 1964 */ 1965 coda_decoder_drop_used_metas(ctx); 1966 1967 if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) { 1968 v4l2_err(&dev->v4l2_dev, 1969 "CODA_COMMAND_SEQ_INIT failed, error code = 0x%x\n", 1970 coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON)); 1971 return -EAGAIN; 1972 } 1973 1974 val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE); 1975 if (dev->devtype->product == CODA_DX6) { 1976 width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK; 1977 height = val & CODADX6_PICHEIGHT_MASK; 1978 } else { 1979 width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK; 1980 height = val & CODA7_PICHEIGHT_MASK; 1981 } 1982 1983 if (width > q_data_dst->bytesperline || height > q_data_dst->height) { 1984 v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n", 1985 width, height, q_data_dst->bytesperline, 1986 q_data_dst->height); 1987 return -EINVAL; 1988 } 1989 1990 width = round_up(width, 16); 1991 height = round_up(height, 16); 1992 1993 coda_dbg(1, ctx, "start decoding: %dx%d\n", width, height); 1994 1995 ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED); 1996 /* 1997 * If the VDOA is used, the decoder needs one additional frame, 1998 * because the frames are freed when the next frame is decoded. 1999 * Otherwise there are visible errors in the decoded frames (green 2000 * regions in displayed frames) and a broken order of frames (earlier 2001 * frames are sporadically displayed after later frames). 2002 */ 2003 if (ctx->use_vdoa) 2004 ctx->num_internal_frames += 1; 2005 if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) { 2006 v4l2_err(&dev->v4l2_dev, 2007 "not enough framebuffers to decode (%d < %d)\n", 2008 CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames); 2009 return -EINVAL; 2010 } 2011 2012 if (src_fourcc == V4L2_PIX_FMT_H264) { 2013 u32 left_right; 2014 u32 top_bottom; 2015 2016 left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT); 2017 top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM); 2018 2019 q_data_dst->rect.left = (left_right >> 10) & 0x3ff; 2020 q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff; 2021 q_data_dst->rect.width = width - q_data_dst->rect.left - 2022 (left_right & 0x3ff); 2023 q_data_dst->rect.height = height - q_data_dst->rect.top - 2024 (top_bottom & 0x3ff); 2025 } 2026 2027 if (dev->devtype->product != CODA_DX6) { 2028 u8 profile, level; 2029 2030 val = coda_read(dev, CODA7_RET_DEC_SEQ_HEADER_REPORT); 2031 profile = val & 0xff; 2032 level = (val >> 8) & 0x7f; 2033 2034 if (profile || level) 2035 coda_update_profile_level_ctrls(ctx, profile, level); 2036 } 2037 2038 return 0; 2039 } 2040 2041 static void coda_dec_seq_init_work(struct work_struct *work) 2042 { 2043 struct coda_ctx *ctx = container_of(work, 2044 struct coda_ctx, seq_init_work); 2045 struct coda_dev *dev = ctx->dev; 2046 2047 mutex_lock(&ctx->buffer_mutex); 2048 mutex_lock(&dev->coda_mutex); 2049 2050 if (!ctx->initialized) 2051 __coda_decoder_seq_init(ctx); 2052 2053 mutex_unlock(&dev->coda_mutex); 2054 mutex_unlock(&ctx->buffer_mutex); 2055 } 2056 2057 static int __coda_start_decoding(struct coda_ctx *ctx) 2058 { 2059 struct coda_q_data *q_data_src, *q_data_dst; 2060 struct coda_dev *dev = ctx->dev; 2061 u32 src_fourcc, dst_fourcc; 2062 int ret; 2063 2064 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 2065 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 2066 src_fourcc = q_data_src->fourcc; 2067 dst_fourcc = q_data_dst->fourcc; 2068 2069 if (!ctx->initialized) { 2070 ret = __coda_decoder_seq_init(ctx); 2071 if (ret < 0) 2072 return ret; 2073 } else { 2074 ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) | 2075 CODA9_FRAME_TILED2LINEAR); 2076 if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV) 2077 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE; 2078 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) 2079 ctx->frame_mem_ctrl |= (0x3 << 9) | 2080 ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR); 2081 } 2082 2083 coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR); 2084 2085 ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc); 2086 if (ret < 0) { 2087 v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n"); 2088 return ret; 2089 } 2090 2091 /* Tell the decoder how many frame buffers we allocated. */ 2092 coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM); 2093 coda_write(dev, round_up(q_data_dst->rect.width, 16), 2094 CODA_CMD_SET_FRAME_BUF_STRIDE); 2095 2096 if (dev->devtype->product != CODA_DX6) { 2097 /* Set secondary AXI IRAM */ 2098 coda_setup_iram(ctx); 2099 2100 coda_write(dev, ctx->iram_info.buf_bit_use, 2101 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR); 2102 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use, 2103 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR); 2104 coda_write(dev, ctx->iram_info.buf_dbk_y_use, 2105 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR); 2106 coda_write(dev, ctx->iram_info.buf_dbk_c_use, 2107 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR); 2108 coda_write(dev, ctx->iram_info.buf_ovl_use, 2109 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR); 2110 if (dev->devtype->product == CODA_960) { 2111 coda_write(dev, ctx->iram_info.buf_btp_use, 2112 CODA9_CMD_SET_FRAME_AXI_BTP_ADDR); 2113 2114 coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY); 2115 coda9_set_frame_cache(ctx, dst_fourcc); 2116 } 2117 } 2118 2119 if (src_fourcc == V4L2_PIX_FMT_H264) { 2120 coda_write(dev, ctx->slicebuf.paddr, 2121 CODA_CMD_SET_FRAME_SLICE_BB_START); 2122 coda_write(dev, ctx->slicebuf.size / 1024, 2123 CODA_CMD_SET_FRAME_SLICE_BB_SIZE); 2124 } 2125 2126 if (dev->devtype->product == CODA_HX4 || 2127 dev->devtype->product == CODA_7541) { 2128 int max_mb_x = 1920 / 16; 2129 int max_mb_y = 1088 / 16; 2130 int max_mb_num = max_mb_x * max_mb_y; 2131 2132 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y, 2133 CODA7_CMD_SET_FRAME_MAX_DEC_SIZE); 2134 } else if (dev->devtype->product == CODA_960) { 2135 int max_mb_x = 1920 / 16; 2136 int max_mb_y = 1088 / 16; 2137 int max_mb_num = max_mb_x * max_mb_y; 2138 2139 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y, 2140 CODA9_CMD_SET_FRAME_MAX_DEC_SIZE); 2141 } 2142 2143 if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) { 2144 v4l2_err(&ctx->dev->v4l2_dev, 2145 "CODA_COMMAND_SET_FRAME_BUF timeout\n"); 2146 return -ETIMEDOUT; 2147 } 2148 2149 return 0; 2150 } 2151 2152 static int coda_start_decoding(struct coda_ctx *ctx) 2153 { 2154 struct coda_dev *dev = ctx->dev; 2155 int ret; 2156 2157 mutex_lock(&dev->coda_mutex); 2158 ret = __coda_start_decoding(ctx); 2159 mutex_unlock(&dev->coda_mutex); 2160 2161 return ret; 2162 } 2163 2164 static int coda_prepare_decode(struct coda_ctx *ctx) 2165 { 2166 struct vb2_v4l2_buffer *dst_buf; 2167 struct coda_dev *dev = ctx->dev; 2168 struct coda_q_data *q_data_dst; 2169 struct coda_buffer_meta *meta; 2170 u32 rot_mode = 0; 2171 u32 reg_addr, reg_stride; 2172 2173 dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); 2174 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 2175 2176 /* Try to copy source buffer contents into the bitstream ringbuffer */ 2177 mutex_lock(&ctx->bitstream_mutex); 2178 coda_fill_bitstream(ctx, NULL); 2179 mutex_unlock(&ctx->bitstream_mutex); 2180 2181 if (coda_get_bitstream_payload(ctx) < 512 && 2182 (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) { 2183 coda_dbg(1, ctx, "bitstream payload: %d, skipping\n", 2184 coda_get_bitstream_payload(ctx)); 2185 return -EAGAIN; 2186 } 2187 2188 /* Run coda_start_decoding (again) if not yet initialized */ 2189 if (!ctx->initialized) { 2190 int ret = __coda_start_decoding(ctx); 2191 2192 if (ret < 0) { 2193 v4l2_err(&dev->v4l2_dev, "failed to start decoding\n"); 2194 return -EAGAIN; 2195 } else { 2196 ctx->initialized = 1; 2197 } 2198 } 2199 2200 if (dev->devtype->product == CODA_960) 2201 coda_set_gdi_regs(ctx); 2202 2203 if (ctx->use_vdoa && 2204 ctx->display_idx >= 0 && 2205 ctx->display_idx < ctx->num_internal_frames) { 2206 vdoa_device_run(ctx->vdoa, 2207 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0), 2208 ctx->internal_frames[ctx->display_idx].buf.paddr); 2209 } else { 2210 if (dev->devtype->product == CODA_960) { 2211 /* 2212 * It was previously assumed that the CODA960 has an 2213 * internal list of 64 buffer entries that contains 2214 * both the registered internal frame buffers as well 2215 * as the rotator buffer output, and that the ROT_INDEX 2216 * register must be set to a value between the last 2217 * internal frame buffers' index and 64. 2218 * At least on firmware version 3.1.1 it turns out that 2219 * setting ROT_INDEX to any value >= 32 causes CODA 2220 * hangups that it can not recover from with the SRC VPU 2221 * reset. 2222 * It does appear to work however, to just set it to a 2223 * fixed value in the [ctx->num_internal_frames, 31] 2224 * range, for example CODA_MAX_FRAMEBUFFERS. 2225 */ 2226 coda_write(dev, CODA_MAX_FRAMEBUFFERS, 2227 CODA9_CMD_DEC_PIC_ROT_INDEX); 2228 2229 reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y; 2230 reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE; 2231 } else { 2232 reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y; 2233 reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE; 2234 } 2235 coda_write_base(ctx, q_data_dst, dst_buf, reg_addr); 2236 coda_write(dev, q_data_dst->bytesperline, reg_stride); 2237 2238 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode; 2239 } 2240 2241 coda_write(dev, rot_mode, CODA_CMD_DEC_PIC_ROT_MODE); 2242 2243 switch (dev->devtype->product) { 2244 case CODA_DX6: 2245 /* TBD */ 2246 case CODA_HX4: 2247 case CODA_7541: 2248 coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION); 2249 break; 2250 case CODA_960: 2251 /* 'hardcode to use interrupt disable mode'? */ 2252 coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION); 2253 break; 2254 } 2255 2256 coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM); 2257 2258 coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START); 2259 coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE); 2260 2261 if (dev->devtype->product != CODA_DX6) 2262 coda_write(dev, ctx->iram_info.axi_sram_use, 2263 CODA7_REG_BIT_AXI_SRAM_USE); 2264 2265 spin_lock(&ctx->buffer_meta_lock); 2266 meta = list_first_entry_or_null(&ctx->buffer_meta_list, 2267 struct coda_buffer_meta, list); 2268 2269 if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) { 2270 2271 /* If this is the last buffer in the bitstream, add padding */ 2272 if (meta->end == ctx->bitstream_fifo.kfifo.in) { 2273 static unsigned char buf[512]; 2274 unsigned int pad; 2275 2276 /* Pad to multiple of 256 and then add 256 more */ 2277 pad = ((0 - meta->end) & 0xff) + 256; 2278 2279 memset(buf, 0xff, sizeof(buf)); 2280 2281 kfifo_in(&ctx->bitstream_fifo, buf, pad); 2282 } 2283 } 2284 spin_unlock(&ctx->buffer_meta_lock); 2285 2286 coda_kfifo_sync_to_device_full(ctx); 2287 2288 /* Clear decode success flag */ 2289 coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS); 2290 2291 /* Clear error return value */ 2292 coda_write(dev, 0, CODA_RET_DEC_PIC_ERR_MB); 2293 2294 trace_coda_dec_pic_run(ctx, meta); 2295 2296 coda_command_async(ctx, CODA_COMMAND_PIC_RUN); 2297 2298 return 0; 2299 } 2300 2301 static void coda_finish_decode(struct coda_ctx *ctx) 2302 { 2303 struct coda_dev *dev = ctx->dev; 2304 struct coda_q_data *q_data_src; 2305 struct coda_q_data *q_data_dst; 2306 struct vb2_v4l2_buffer *dst_buf; 2307 struct coda_buffer_meta *meta; 2308 int width, height; 2309 int decoded_idx; 2310 int display_idx; 2311 struct coda_internal_frame *decoded_frame = NULL; 2312 u32 src_fourcc; 2313 int success; 2314 u32 err_mb; 2315 int err_vdoa = 0; 2316 u32 val; 2317 2318 if (ctx->aborting) 2319 return; 2320 2321 /* Update kfifo out pointer from coda bitstream read pointer */ 2322 coda_kfifo_sync_from_device(ctx); 2323 2324 /* 2325 * in stream-end mode, the read pointer can overshoot the write pointer 2326 * by up to 512 bytes 2327 */ 2328 if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) { 2329 if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512) 2330 kfifo_init(&ctx->bitstream_fifo, 2331 ctx->bitstream.vaddr, ctx->bitstream.size); 2332 } 2333 2334 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); 2335 src_fourcc = q_data_src->fourcc; 2336 2337 val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS); 2338 if (val != 1) 2339 pr_err("DEC_PIC_SUCCESS = %d\n", val); 2340 2341 success = val & 0x1; 2342 if (!success) 2343 v4l2_err(&dev->v4l2_dev, "decode failed\n"); 2344 2345 if (src_fourcc == V4L2_PIX_FMT_H264) { 2346 if (val & (1 << 3)) 2347 v4l2_err(&dev->v4l2_dev, 2348 "insufficient PS buffer space (%d bytes)\n", 2349 ctx->psbuf.size); 2350 if (val & (1 << 2)) 2351 v4l2_err(&dev->v4l2_dev, 2352 "insufficient slice buffer space (%d bytes)\n", 2353 ctx->slicebuf.size); 2354 } 2355 2356 val = coda_read(dev, CODA_RET_DEC_PIC_SIZE); 2357 width = (val >> 16) & 0xffff; 2358 height = val & 0xffff; 2359 2360 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); 2361 2362 /* frame crop information */ 2363 if (src_fourcc == V4L2_PIX_FMT_H264) { 2364 u32 left_right; 2365 u32 top_bottom; 2366 2367 left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT); 2368 top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM); 2369 2370 if (left_right == 0xffffffff && top_bottom == 0xffffffff) { 2371 /* Keep current crop information */ 2372 } else { 2373 struct v4l2_rect *rect = &q_data_dst->rect; 2374 2375 rect->left = left_right >> 16 & 0xffff; 2376 rect->top = top_bottom >> 16 & 0xffff; 2377 rect->width = width - rect->left - 2378 (left_right & 0xffff); 2379 rect->height = height - rect->top - 2380 (top_bottom & 0xffff); 2381 } 2382 } else { 2383 /* no cropping */ 2384 } 2385 2386 err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB); 2387 if (err_mb > 0) { 2388 if (__ratelimit(&dev->mb_err_rs)) 2389 coda_dbg(1, ctx, "errors in %d macroblocks\n", err_mb); 2390 v4l2_ctrl_s_ctrl(ctx->mb_err_cnt_ctrl, 2391 v4l2_ctrl_g_ctrl(ctx->mb_err_cnt_ctrl) + err_mb); 2392 } 2393 2394 if (dev->devtype->product == CODA_HX4 || 2395 dev->devtype->product == CODA_7541) { 2396 val = coda_read(dev, CODA_RET_DEC_PIC_OPTION); 2397 if (val == 0) { 2398 /* not enough bitstream data */ 2399 coda_dbg(1, ctx, "prescan failed: %d\n", val); 2400 ctx->hold = true; 2401 return; 2402 } 2403 } 2404 2405 /* Wait until the VDOA finished writing the previous display frame */ 2406 if (ctx->use_vdoa && 2407 ctx->display_idx >= 0 && 2408 ctx->display_idx < ctx->num_internal_frames) { 2409 err_vdoa = vdoa_wait_for_completion(ctx->vdoa); 2410 } 2411 2412 ctx->frm_dis_flg = coda_read(dev, 2413 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 2414 2415 /* The previous display frame was copied out and can be overwritten */ 2416 if (ctx->display_idx >= 0 && 2417 ctx->display_idx < ctx->num_internal_frames) { 2418 ctx->frm_dis_flg &= ~(1 << ctx->display_idx); 2419 coda_write(dev, ctx->frm_dis_flg, 2420 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx)); 2421 } 2422 2423 /* 2424 * The index of the last decoded frame, not necessarily in 2425 * display order, and the index of the next display frame. 2426 * The latter could have been decoded in a previous run. 2427 */ 2428 decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX); 2429 display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX); 2430 2431 if (decoded_idx == -1) { 2432 /* no frame was decoded, but we might have a display frame */ 2433 if (display_idx >= 0 && display_idx < ctx->num_internal_frames) 2434 ctx->sequence_offset++; 2435 else if (ctx->display_idx < 0) 2436 ctx->hold = true; 2437 } else if (decoded_idx == -2) { 2438 if (ctx->display_idx >= 0 && 2439 ctx->display_idx < ctx->num_internal_frames) 2440 ctx->sequence_offset++; 2441 /* no frame was decoded, we still return remaining buffers */ 2442 } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) { 2443 v4l2_err(&dev->v4l2_dev, 2444 "decoded frame index out of range: %d\n", decoded_idx); 2445 } else { 2446 int sequence; 2447 2448 decoded_frame = &ctx->internal_frames[decoded_idx]; 2449 2450 val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM); 2451 if (ctx->sequence_offset == -1) 2452 ctx->sequence_offset = val; 2453 2454 sequence = val + ctx->first_frame_sequence 2455 - ctx->sequence_offset; 2456 spin_lock(&ctx->buffer_meta_lock); 2457 if (!list_empty(&ctx->buffer_meta_list)) { 2458 meta = list_first_entry(&ctx->buffer_meta_list, 2459 struct coda_buffer_meta, list); 2460 list_del(&meta->list); 2461 ctx->num_metas--; 2462 spin_unlock(&ctx->buffer_meta_lock); 2463 /* 2464 * Clamp counters to 16 bits for comparison, as the HW 2465 * counter rolls over at this point for h.264. This 2466 * may be different for other formats, but using 16 bits 2467 * should be enough to detect most errors and saves us 2468 * from doing different things based on the format. 2469 */ 2470 if ((sequence & 0xffff) != (meta->sequence & 0xffff)) { 2471 v4l2_err(&dev->v4l2_dev, 2472 "sequence number mismatch (%d(%d) != %d)\n", 2473 sequence, ctx->sequence_offset, 2474 meta->sequence); 2475 } 2476 decoded_frame->meta = *meta; 2477 kfree(meta); 2478 } else { 2479 spin_unlock(&ctx->buffer_meta_lock); 2480 v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n"); 2481 memset(&decoded_frame->meta, 0, 2482 sizeof(struct coda_buffer_meta)); 2483 decoded_frame->meta.sequence = sequence; 2484 decoded_frame->meta.last = false; 2485 ctx->sequence_offset++; 2486 } 2487 2488 trace_coda_dec_pic_done(ctx, &decoded_frame->meta); 2489 2490 val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7; 2491 decoded_frame->type = (val == 0) ? V4L2_BUF_FLAG_KEYFRAME : 2492 (val == 1) ? V4L2_BUF_FLAG_PFRAME : 2493 V4L2_BUF_FLAG_BFRAME; 2494 2495 decoded_frame->error = err_mb; 2496 } 2497 2498 if (display_idx == -1) { 2499 /* 2500 * no more frames to be decoded, but there could still 2501 * be rotator output to dequeue 2502 */ 2503 ctx->hold = true; 2504 } else if (display_idx == -3) { 2505 /* possibly prescan failure */ 2506 } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) { 2507 v4l2_err(&dev->v4l2_dev, 2508 "presentation frame index out of range: %d\n", 2509 display_idx); 2510 } 2511 2512 /* If a frame was copied out, return it */ 2513 if (ctx->display_idx >= 0 && 2514 ctx->display_idx < ctx->num_internal_frames) { 2515 struct coda_internal_frame *ready_frame; 2516 2517 ready_frame = &ctx->internal_frames[ctx->display_idx]; 2518 2519 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); 2520 dst_buf->sequence = ctx->osequence++; 2521 2522 dst_buf->field = V4L2_FIELD_NONE; 2523 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME | 2524 V4L2_BUF_FLAG_PFRAME | 2525 V4L2_BUF_FLAG_BFRAME); 2526 dst_buf->flags |= ready_frame->type; 2527 meta = &ready_frame->meta; 2528 if (meta->last && !coda_reorder_enable(ctx)) { 2529 /* 2530 * If this was the last decoded frame, and reordering 2531 * is disabled, this will be the last display frame. 2532 */ 2533 coda_dbg(1, ctx, "last meta, marking as last frame\n"); 2534 dst_buf->flags |= V4L2_BUF_FLAG_LAST; 2535 } else if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG && 2536 display_idx == -1) { 2537 /* 2538 * If there is no designated presentation frame anymore, 2539 * this frame has to be the last one. 2540 */ 2541 coda_dbg(1, ctx, 2542 "no more frames to return, marking as last frame\n"); 2543 dst_buf->flags |= V4L2_BUF_FLAG_LAST; 2544 } 2545 dst_buf->timecode = meta->timecode; 2546 dst_buf->vb2_buf.timestamp = meta->timestamp; 2547 2548 trace_coda_dec_rot_done(ctx, dst_buf, meta); 2549 2550 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, 2551 q_data_dst->sizeimage); 2552 2553 if (ready_frame->error || err_vdoa) 2554 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR); 2555 else 2556 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE); 2557 2558 if (decoded_frame) { 2559 coda_dbg(1, ctx, "job finished: decoded %c frame %u, returned %c frame %u (%u/%u)%s\n", 2560 coda_frame_type_char(decoded_frame->type), 2561 decoded_frame->meta.sequence, 2562 coda_frame_type_char(dst_buf->flags), 2563 ready_frame->meta.sequence, 2564 dst_buf->sequence, ctx->qsequence, 2565 (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? 2566 " (last)" : ""); 2567 } else { 2568 coda_dbg(1, ctx, "job finished: no frame decoded (%d), returned %c frame %u (%u/%u)%s\n", 2569 decoded_idx, 2570 coda_frame_type_char(dst_buf->flags), 2571 ready_frame->meta.sequence, 2572 dst_buf->sequence, ctx->qsequence, 2573 (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? 2574 " (last)" : ""); 2575 } 2576 } else { 2577 if (decoded_frame) { 2578 coda_dbg(1, ctx, "job finished: decoded %c frame %u, no frame returned (%d)\n", 2579 coda_frame_type_char(decoded_frame->type), 2580 decoded_frame->meta.sequence, 2581 ctx->display_idx); 2582 } else { 2583 coda_dbg(1, ctx, "job finished: no frame decoded (%d) or returned (%d)\n", 2584 decoded_idx, ctx->display_idx); 2585 } 2586 } 2587 2588 /* The rotator will copy the current display frame next time */ 2589 ctx->display_idx = display_idx; 2590 2591 /* 2592 * The current decode run might have brought the bitstream fill level 2593 * below the size where we can start the next decode run. As userspace 2594 * might have filled the output queue completely and might thus be 2595 * blocked, we can't rely on the next qbuf to trigger the bitstream 2596 * refill. Check if we have data to refill the bitstream now. 2597 */ 2598 mutex_lock(&ctx->bitstream_mutex); 2599 coda_fill_bitstream(ctx, NULL); 2600 mutex_unlock(&ctx->bitstream_mutex); 2601 } 2602 2603 static void coda_decode_timeout(struct coda_ctx *ctx) 2604 { 2605 struct vb2_v4l2_buffer *dst_buf; 2606 2607 /* 2608 * For now this only handles the case where we would deadlock with 2609 * userspace, i.e. userspace issued DEC_CMD_STOP and waits for EOS, 2610 * but after a failed decode run we would hold the context and wait for 2611 * userspace to queue more buffers. 2612 */ 2613 if (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)) 2614 return; 2615 2616 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); 2617 dst_buf->sequence = ctx->qsequence - 1; 2618 2619 coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR); 2620 } 2621 2622 const struct coda_context_ops coda_bit_decode_ops = { 2623 .queue_init = coda_decoder_queue_init, 2624 .reqbufs = coda_decoder_reqbufs, 2625 .start_streaming = coda_start_decoding, 2626 .prepare_run = coda_prepare_decode, 2627 .finish_run = coda_finish_decode, 2628 .run_timeout = coda_decode_timeout, 2629 .seq_init_work = coda_dec_seq_init_work, 2630 .seq_end_work = coda_seq_end_work, 2631 .release = coda_bit_release, 2632 }; 2633 2634 irqreturn_t coda_irq_handler(int irq, void *data) 2635 { 2636 struct coda_dev *dev = data; 2637 struct coda_ctx *ctx; 2638 2639 /* read status register to attend the IRQ */ 2640 coda_read(dev, CODA_REG_BIT_INT_STATUS); 2641 coda_write(dev, 0, CODA_REG_BIT_INT_REASON); 2642 coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET, 2643 CODA_REG_BIT_INT_CLEAR); 2644 2645 ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev); 2646 if (ctx == NULL) { 2647 v4l2_err(&dev->v4l2_dev, 2648 "Instance released before the end of transaction\n"); 2649 return IRQ_HANDLED; 2650 } 2651 2652 trace_coda_bit_done(ctx); 2653 2654 if (ctx->aborting) { 2655 coda_dbg(1, ctx, "task has been aborted\n"); 2656 } 2657 2658 if (coda_isbusy(ctx->dev)) { 2659 coda_dbg(1, ctx, "coda is still busy!!!!\n"); 2660 return IRQ_NONE; 2661 } 2662 2663 complete(&ctx->completion); 2664 2665 return IRQ_HANDLED; 2666 } 2667