1 2Upstream-Status: Inappropriate 3 4RPI-Distro repo clones original ffmpeg and applies patches to enable 5raspiberry pi support. 6 7--- a/configure 8+++ b/configure 9@@ -205,6 +205,7 @@ External library support: 10 --disable-bzlib disable bzlib [autodetect] 11 --disable-coreimage disable Apple CoreImage framework [autodetect] 12 --enable-chromaprint enable audio fingerprinting with chromaprint [no] 13+ --disable-epoxy disable epoxy [autodetect] 14 --enable-frei0r enable frei0r video filtering [no] 15 --enable-gcrypt enable gcrypt, needed for rtmp(t)e support 16 if openssl, librtmp or gmp is not used [no] 17@@ -281,6 +282,7 @@ External library support: 18 if openssl, gnutls or mbedtls is not used [no] 19 --enable-libtwolame enable MP2 encoding via libtwolame [no] 20 --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] 21+ --disable-libudev disable libudev [autodetect] 22 --enable-libv4l2 enable libv4l2/v4l-utils [no] 23 --enable-libvidstab enable video stabilization using vid.stab [no] 24 --enable-libvmaf enable vmaf filter via libvmaf [no] 25@@ -343,12 +345,16 @@ External library support: 26 --enable-libmfx enable Intel MediaSDK (AKA Quick Sync Video) code via libmfx [no] 27 --enable-libnpp enable Nvidia Performance Primitives-based code [no] 28 --enable-mmal enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no] 29+ --enable-sand enable sand video formats [rpi] 30+ --enable-vout-drm enable the vout_drm module - for internal testing only [no] 31+ --enable-vout-egl enable the vout_egl module - for internal testing only [no] 32 --disable-nvdec disable Nvidia video decoding acceleration (via hwaccel) [autodetect] 33 --disable-nvenc disable Nvidia video encoding code [autodetect] 34 --enable-omx enable OpenMAX IL code [no] 35 --enable-omx-rpi enable OpenMAX IL code for Raspberry Pi [no] 36 --enable-rkmpp enable Rockchip Media Process Platform code [no] 37 --disable-v4l2-m2m disable V4L2 mem2mem code [autodetect] 38+ --enable-v4l2-request enable V4L2 request API code [no] 39 --disable-vaapi disable Video Acceleration API (mainly Unix/Intel) code [autodetect] 40 --disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect] 41 --disable-videotoolbox disable VideoToolbox code [autodetect] 42@@ -1754,7 +1760,9 @@ EXTERNAL_AUTODETECT_LIBRARY_LIST=" 43 avfoundation 44 bzlib 45 coreimage 46+ epoxy 47 iconv 48+ libudev 49 libxcb 50 libxcb_shm 51 libxcb_shape 52@@ -1924,6 +1932,7 @@ HWACCEL_LIBRARY_LIST=" 53 mmal 54 omx 55 opencl 56+ v4l2_request 57 " 58 59 DOCUMENT_LIST=" 60@@ -1941,10 +1950,14 @@ FEATURE_LIST=" 61 omx_rpi 62 runtime_cpudetect 63 safe_bitstream_reader 64+ sand 65 shared 66 small 67 static 68 swscale_alpha 69+ vout_drm 70+ vout_egl 71+ v4l2_req_hevc_vx 72 " 73 74 # this list should be kept in linking order 75@@ -2501,6 +2514,7 @@ CONFIG_EXTRA=" 76 rtpdec 77 rtpenc_chain 78 rv34dsp 79+ sand 80 scene_sad 81 sinewin 82 snappy 83@@ -3011,6 +3025,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder 84 dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32" 85 ffnvcodec_deps_any="libdl LoadLibrary" 86 nvdec_deps="ffnvcodec" 87+v4l2_request_deps="linux_videodev2_h linux_media_h v4l2_timeval_to_ns libdrm libudev" 88 vaapi_x11_deps="xlib_x11" 89 videotoolbox_hwaccel_deps="videotoolbox pthreads" 90 videotoolbox_hwaccel_extralibs="-framework QuartzCore" 91@@ -3054,6 +3069,8 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicP 92 hevc_dxva2_hwaccel_select="hevc_decoder" 93 hevc_nvdec_hwaccel_deps="nvdec" 94 hevc_nvdec_hwaccel_select="hevc_decoder" 95+hevc_v4l2request_hwaccel_deps="v4l2_request" 96+hevc_v4l2request_hwaccel_select="hevc_decoder" 97 hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC" 98 hevc_vaapi_hwaccel_select="hevc_decoder" 99 hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC" 100@@ -3539,8 +3556,11 @@ sndio_indev_deps="sndio" 101 sndio_outdev_deps="sndio" 102 v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h" 103 v4l2_indev_suggest="libv4l2" 104+v4l2_outdev_deps="libdrm" 105 v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h" 106 v4l2_outdev_suggest="libv4l2" 107+vout_drm_outdev_deps="libdrm" 108+vout_egl_outdev_deps="xlib epoxy" 109 vfwcap_indev_deps="vfw32 vfwcap_defines" 110 xcbgrab_indev_deps="libxcb" 111 xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes" 112@@ -3745,6 +3765,7 @@ tonemap_opencl_filter_deps="opencl const 113 transpose_opencl_filter_deps="opencl" 114 transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags" 115 transpose_vulkan_filter_deps="vulkan spirv_compiler" 116+unsand_filter_select="sand" 117 unsharp_opencl_filter_deps="opencl" 118 uspp_filter_deps="gpl avcodec" 119 vaguedenoiser_filter_deps="gpl" 120@@ -6296,6 +6317,12 @@ if enabled xlib; then 121 disable xlib 122 fi 123 124+enabled libudev && 125+ check_pkg_config libudev libudev libudev.h udev_new 126+ 127+enabled epoxy && 128+ check_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version 129+ 130 check_headers direct.h 131 check_headers dirent.h 132 check_headers dxgidebug.h 133@@ -6735,8 +6762,16 @@ enabled rkmpp && { require_p 134 { enabled libdrm || 135 die "ERROR: rkmpp requires --enable-libdrm"; } 136 } 137+enabled v4l2_request && { enabled libdrm || 138+ die "ERROR: v4l2-request requires --enable-libdrm"; } && 139+ { enabled libudev || 140+ die "ERROR: v4l2-request requires libudev"; } 141 enabled vapoursynth && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init 142 143+enabled vout_drm && { enabled libdrm || die "ERROR: vout_drm requires --enable-libdrm"; } 144+ 145+enabled vout_egl && { enabled epoxy || die "ERROR: vout_egl requires epoxy"; } && 146+ { enabled xlib || die "ERROR: vout_egl requires xlib"; } 147 148 if enabled gcrypt; then 149 GCRYPT_CONFIG="${cross_prefix}libgcrypt-config" 150@@ -6817,6 +6852,10 @@ if enabled v4l2_m2m; then 151 check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;" 152 fi 153 154+check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns 155+check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;" 156+disable v4l2_req_hevc_vx 157+ 158 check_headers sys/videoio.h 159 test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete 160 161@@ -7305,6 +7344,9 @@ check_deps $CONFIG_LIST \ 162 163 enabled threads && ! enabled pthreads && ! enabled atomics_native && die "non pthread threading without atomics not supported, try adding --enable-pthreads or --cpu=i486 or higher if you are on x86" 164 165+# Sub-feature of hevc_v4l2request_hwaccel - can only be set once deps are done 166+enabled hevc_v4l2request_hwaccel && disabled hevc_v4l2_request && enable v4l2_req_hevc_vx 167+ 168 case $target_os in 169 haiku) 170 disable memalign 171--- a/fftools/ffmpeg.c 172+++ b/fftools/ffmpeg.c 173@@ -1953,8 +1953,8 @@ static int ifilter_send_frame(InputFilte 174 av_channel_layout_compare(&ifilter->ch_layout, &frame->ch_layout); 175 break; 176 case AVMEDIA_TYPE_VIDEO: 177- need_reinit |= ifilter->width != frame->width || 178- ifilter->height != frame->height; 179+ need_reinit |= ifilter->width != av_frame_cropped_width(frame) || 180+ ifilter->height != av_frame_cropped_height(frame); 181 break; 182 } 183 184@@ -1965,6 +1965,9 @@ static int ifilter_send_frame(InputFilte 185 (ifilter->hw_frames_ctx && ifilter->hw_frames_ctx->data != frame->hw_frames_ctx->data)) 186 need_reinit = 1; 187 188+ if (no_cvt_hw && fg->graph) 189+ need_reinit = 0; 190+ 191 if (sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX)) { 192 if (!ifilter->displaymatrix || memcmp(sd->data, ifilter->displaymatrix, sizeof(int32_t) * 9)) 193 need_reinit = 1; 194@@ -2220,8 +2223,7 @@ static int decode_video(InputStream *ist 195 decoded_frame->top_field_first = ist->top_field_first; 196 197 ist->frames_decoded++; 198- 199- if (ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) { 200+ if (!no_cvt_hw && ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) { 201 err = ist->hwaccel_retrieve_data(ist->dec_ctx, decoded_frame); 202 if (err < 0) 203 goto fail; 204@@ -2418,7 +2420,12 @@ static int process_input_packet(InputStr 205 case AVMEDIA_TYPE_VIDEO: 206 ret = decode_video (ist, repeating ? NULL : avpkt, &got_output, &duration_pts, !pkt, 207 &decode_failed); 208- if (!repeating || !pkt || got_output) { 209+ // Pi: Do not inc dts if no_cvt_hw set 210+ // V4L2 H264 decode has long latency and sometimes spits out a long 211+ // stream of output without input. In this case incrementing DTS is wrong. 212+ // There may be cases where the condition as written is correct so only 213+ // "fix" in the cases which cause problems 214+ if (!repeating || !pkt || (got_output && !no_cvt_hw)) { 215 if (pkt && pkt->duration) { 216 duration_dts = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q); 217 } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) { 218@@ -2564,12 +2571,15 @@ static enum AVPixelFormat get_format(AVC 219 break; 220 221 if (ist->hwaccel_id == HWACCEL_GENERIC || 222- ist->hwaccel_id == HWACCEL_AUTO) { 223+ ist->hwaccel_id == HWACCEL_AUTO || 224+ no_cvt_hw) { 225 for (i = 0;; i++) { 226 config = avcodec_get_hw_config(s->codec, i); 227 if (!config) 228 break; 229- if (!(config->methods & 230+ if (no_cvt_hw && (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL)) 231+ av_log(s, AV_LOG_DEBUG, "no_cvt_hw so trying pix_fmt %d with codec internal hwaccel\n", *p); 232+ else if (!(config->methods & 233 AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) 234 continue; 235 if (config->pix_fmt == *p) 236--- a/fftools/ffmpeg.h 237+++ b/fftools/ffmpeg.h 238@@ -626,6 +626,7 @@ extern enum VideoSyncMethod video_sync_m 239 extern float frame_drop_threshold; 240 extern int do_benchmark; 241 extern int do_benchmark_all; 242+extern int no_cvt_hw; 243 extern int do_deinterlace; 244 extern int do_hex_dump; 245 extern int do_pkt_dump; 246--- a/fftools/ffmpeg_filter.c 247+++ b/fftools/ffmpeg_filter.c 248@@ -1175,8 +1175,8 @@ int ifilter_parameters_from_frame(InputF 249 250 ifilter->format = frame->format; 251 252- ifilter->width = frame->width; 253- ifilter->height = frame->height; 254+ ifilter->width = av_frame_cropped_width(frame); 255+ ifilter->height = av_frame_cropped_height(frame); 256 ifilter->sample_aspect_ratio = frame->sample_aspect_ratio; 257 258 ifilter->sample_rate = frame->sample_rate; 259--- a/fftools/ffmpeg_hw.c 260+++ b/fftools/ffmpeg_hw.c 261@@ -75,6 +75,8 @@ static char *hw_device_default_name(enum 262 char *name; 263 size_t index_pos; 264 int index, index_limit = 1000; 265+ if (!type_name) 266+ return NULL; 267 index_pos = strlen(type_name); 268 name = av_malloc(index_pos + 4); 269 if (!name) 270--- a/fftools/ffmpeg_opt.c 271+++ b/fftools/ffmpeg_opt.c 272@@ -162,6 +162,7 @@ enum VideoSyncMethod video_sync_method = 273 float frame_drop_threshold = 0; 274 int do_benchmark = 0; 275 int do_benchmark_all = 0; 276+int no_cvt_hw = 0; 277 int do_hex_dump = 0; 278 int do_pkt_dump = 0; 279 int copy_ts = 0; 280@@ -3724,6 +3725,8 @@ const OptionDef options[] = { 281 "add timings for benchmarking" }, 282 { "benchmark_all", OPT_BOOL | OPT_EXPERT, { &do_benchmark_all }, 283 "add timings for each task" }, 284+ { "no_cvt_hw", OPT_BOOL | OPT_EXPERT, { &no_cvt_hw }, 285+ "do not auto-convert hw frames to sw" }, 286 { "progress", HAS_ARG | OPT_EXPERT, { .func_arg = opt_progress }, 287 "write program-readable progress information", "url" }, 288 { "stdin", OPT_BOOL | OPT_EXPERT, { &stdin_interaction }, 289--- a/libavcodec/Makefile 290+++ b/libavcodec/Makefile 291@@ -161,7 +161,10 @@ OBJS-$(CONFIG_VIDEODSP) + 292 OBJS-$(CONFIG_VP3DSP) += vp3dsp.o 293 OBJS-$(CONFIG_VP56DSP) += vp56dsp.o 294 OBJS-$(CONFIG_VP8DSP) += vp8dsp.o 295-OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o 296+OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\ 297+ weak_link.o v4l2_req_dmabufs.o 298+OBJS-$(CONFIG_V4L2_REQUEST) += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\ 299+ v4l2_req_devscan.o weak_link.o 300 OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o 301 OBJS-$(CONFIG_WMV2DSP) += wmv2dsp.o 302 303@@ -972,6 +975,8 @@ OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) 304 OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o 305 OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o 306 OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o 307+OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o v4l2_req_hevc_v4.o 308+OBJS-$(CONFIG_V4L2_REQ_HEVC_VX) += v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o 309 OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o 310 OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o 311 OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o 312--- a/libavcodec/avcodec.h 313+++ b/libavcodec/avcodec.h 314@@ -2212,6 +2212,17 @@ typedef struct AVHWAccel { 315 * that avctx->hwaccel_priv_data is invalid. 316 */ 317 int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); 318+ 319+ /** 320+ * Called if parsing fails 321+ * 322+ * An error has occured, end_frame will not be called 323+ * start_frame & decode_slice may or may not have been called 324+ * Optional 325+ * 326+ * @param avctx the codec context 327+ */ 328+ void (*abort_frame)(AVCodecContext *avctx); 329 } AVHWAccel; 330 331 /** 332--- /dev/null 333+++ b/libavcodec/hevc-ctrls-v1.h 334@@ -0,0 +1,229 @@ 335+/* SPDX-License-Identifier: GPL-2.0 */ 336+/* 337+ * These are the HEVC state controls for use with stateless HEVC 338+ * codec drivers. 339+ * 340+ * It turns out that these structs are not stable yet and will undergo 341+ * more changes. So keep them private until they are stable and ready to 342+ * become part of the official public API. 343+ */ 344+ 345+#ifndef _HEVC_CTRLS_H_ 346+#define _HEVC_CTRLS_H_ 347+ 348+#include <linux/videodev2.h> 349+ 350+/* The pixel format isn't stable at the moment and will likely be renamed. */ 351+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ 352+ 353+#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_MPEG_BASE + 1008) 354+#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_MPEG_BASE + 1009) 355+#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_MPEG_BASE + 1010) 356+#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_MPEG_BASE + 1011) 357+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_MPEG_BASE + 1015) 358+#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_MPEG_BASE + 1016) 359+ 360+/* enum v4l2_ctrl_type type values */ 361+#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120 362+#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121 363+#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122 364+#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123 365+ 366+enum v4l2_mpeg_video_hevc_decode_mode { 367+ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, 368+ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED, 369+}; 370+ 371+enum v4l2_mpeg_video_hevc_start_code { 372+ V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, 373+ V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B, 374+}; 375+ 376+#define V4L2_HEVC_SLICE_TYPE_B 0 377+#define V4L2_HEVC_SLICE_TYPE_P 1 378+#define V4L2_HEVC_SLICE_TYPE_I 2 379+ 380+#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) 381+#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) 382+#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) 383+#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) 384+#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) 385+#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) 386+#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) 387+#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) 388+#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) 389+ 390+/* The controls are not stable at the moment and will likely be reworked. */ 391+struct v4l2_ctrl_hevc_sps { 392+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ 393+ __u16 pic_width_in_luma_samples; 394+ __u16 pic_height_in_luma_samples; 395+ __u8 bit_depth_luma_minus8; 396+ __u8 bit_depth_chroma_minus8; 397+ __u8 log2_max_pic_order_cnt_lsb_minus4; 398+ __u8 sps_max_dec_pic_buffering_minus1; 399+ __u8 sps_max_num_reorder_pics; 400+ __u8 sps_max_latency_increase_plus1; 401+ __u8 log2_min_luma_coding_block_size_minus3; 402+ __u8 log2_diff_max_min_luma_coding_block_size; 403+ __u8 log2_min_luma_transform_block_size_minus2; 404+ __u8 log2_diff_max_min_luma_transform_block_size; 405+ __u8 max_transform_hierarchy_depth_inter; 406+ __u8 max_transform_hierarchy_depth_intra; 407+ __u8 pcm_sample_bit_depth_luma_minus1; 408+ __u8 pcm_sample_bit_depth_chroma_minus1; 409+ __u8 log2_min_pcm_luma_coding_block_size_minus3; 410+ __u8 log2_diff_max_min_pcm_luma_coding_block_size; 411+ __u8 num_short_term_ref_pic_sets; 412+ __u8 num_long_term_ref_pics_sps; 413+ __u8 chroma_format_idc; 414+ __u8 sps_max_sub_layers_minus1; 415+ 416+ __u64 flags; 417+}; 418+ 419+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 0) 420+#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) 421+#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) 422+#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) 423+#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) 424+#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) 425+#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) 426+#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) 427+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) 428+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) 429+#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) 430+#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) 431+#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) 432+#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) 433+#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) 434+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) 435+#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) 436+#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) 437+#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) 438+ 439+struct v4l2_ctrl_hevc_pps { 440+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ 441+ __u8 num_extra_slice_header_bits; 442+ __s8 init_qp_minus26; 443+ __u8 diff_cu_qp_delta_depth; 444+ __s8 pps_cb_qp_offset; 445+ __s8 pps_cr_qp_offset; 446+ __u8 num_tile_columns_minus1; 447+ __u8 num_tile_rows_minus1; 448+ __u8 column_width_minus1[20]; 449+ __u8 row_height_minus1[22]; 450+ __s8 pps_beta_offset_div2; 451+ __s8 pps_tc_offset_div2; 452+ __u8 log2_parallel_merge_level_minus2; 453+ 454+ __u8 padding[4]; 455+ __u64 flags; 456+}; 457+ 458+#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE 0x01 459+#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER 0x02 460+#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR 0x03 461+ 462+#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 463+ 464+struct v4l2_hevc_dpb_entry { 465+ __u64 timestamp; 466+ __u8 rps; 467+ __u8 field_pic; 468+ __u16 pic_order_cnt[2]; 469+ __u8 padding[2]; 470+}; 471+ 472+struct v4l2_hevc_pred_weight_table { 473+ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 474+ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 475+ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 476+ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 477+ 478+ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 479+ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 480+ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 481+ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 482+ 483+ __u8 padding[6]; 484+ 485+ __u8 luma_log2_weight_denom; 486+ __s8 delta_chroma_log2_weight_denom; 487+}; 488+ 489+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) 490+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) 491+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) 492+#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) 493+#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) 494+#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) 495+#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) 496+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) 497+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) 498+#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) 499+ 500+struct v4l2_ctrl_hevc_slice_params { 501+ __u32 bit_size; 502+ __u32 data_bit_offset; 503+ 504+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 505+ __u32 slice_segment_addr; 506+ __u32 num_entry_point_offsets; 507+ 508+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ 509+ __u8 nal_unit_type; 510+ __u8 nuh_temporal_id_plus1; 511+ 512+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 513+ __u8 slice_type; 514+ __u8 colour_plane_id; 515+ __u16 slice_pic_order_cnt; 516+ __u8 num_ref_idx_l0_active_minus1; 517+ __u8 num_ref_idx_l1_active_minus1; 518+ __u8 collocated_ref_idx; 519+ __u8 five_minus_max_num_merge_cand; 520+ __s8 slice_qp_delta; 521+ __s8 slice_cb_qp_offset; 522+ __s8 slice_cr_qp_offset; 523+ __s8 slice_act_y_qp_offset; 524+ __s8 slice_act_cb_qp_offset; 525+ __s8 slice_act_cr_qp_offset; 526+ __s8 slice_beta_offset_div2; 527+ __s8 slice_tc_offset_div2; 528+ 529+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ 530+ __u8 pic_struct; 531+ 532+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 533+ __u8 num_active_dpb_entries; 534+ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 535+ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 536+ 537+ __u8 num_rps_poc_st_curr_before; 538+ __u8 num_rps_poc_st_curr_after; 539+ __u8 num_rps_poc_lt_curr; 540+ 541+ __u8 padding; 542+ 543+ __u32 entry_point_offset_minus1[256]; 544+ 545+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 546+ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 547+ 548+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ 549+ struct v4l2_hevc_pred_weight_table pred_weight_table; 550+ 551+ __u64 flags; 552+}; 553+ 554+struct v4l2_ctrl_hevc_scaling_matrix { 555+ __u8 scaling_list_4x4[6][16]; 556+ __u8 scaling_list_8x8[6][64]; 557+ __u8 scaling_list_16x16[6][64]; 558+ __u8 scaling_list_32x32[2][64]; 559+ __u8 scaling_list_dc_coef_16x16[6]; 560+ __u8 scaling_list_dc_coef_32x32[2]; 561+}; 562+ 563+#endif 564--- /dev/null 565+++ b/libavcodec/hevc-ctrls-v2.h 566@@ -0,0 +1,257 @@ 567+/* SPDX-License-Identifier: GPL-2.0 */ 568+/* 569+ * These are the HEVC state controls for use with stateless HEVC 570+ * codec drivers. 571+ * 572+ * It turns out that these structs are not stable yet and will undergo 573+ * more changes. So keep them private until they are stable and ready to 574+ * become part of the official public API. 575+ */ 576+ 577+#ifndef _HEVC_CTRLS_H_ 578+#define _HEVC_CTRLS_H_ 579+ 580+#include <linux/videodev2.h> 581+ 582+/* The pixel format isn't stable at the moment and will likely be renamed. */ 583+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ 584+ 585+#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_CODEC_BASE + 1008) 586+#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_CODEC_BASE + 1009) 587+#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_BASE + 1010) 588+#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_BASE + 1011) 589+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_BASE + 1012) 590+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_CODEC_BASE + 1015) 591+#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_CODEC_BASE + 1016) 592+ 593+/* enum v4l2_ctrl_type type values */ 594+#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120 595+#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121 596+#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122 597+#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123 598+#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124 599+ 600+enum v4l2_mpeg_video_hevc_decode_mode { 601+ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, 602+ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED, 603+}; 604+ 605+enum v4l2_mpeg_video_hevc_start_code { 606+ V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, 607+ V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B, 608+}; 609+ 610+#define V4L2_HEVC_SLICE_TYPE_B 0 611+#define V4L2_HEVC_SLICE_TYPE_P 1 612+#define V4L2_HEVC_SLICE_TYPE_I 2 613+ 614+#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) 615+#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) 616+#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) 617+#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) 618+#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) 619+#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) 620+#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) 621+#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) 622+#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) 623+ 624+/* The controls are not stable at the moment and will likely be reworked. */ 625+struct v4l2_ctrl_hevc_sps { 626+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ 627+ __u16 pic_width_in_luma_samples; 628+ __u16 pic_height_in_luma_samples; 629+ __u8 bit_depth_luma_minus8; 630+ __u8 bit_depth_chroma_minus8; 631+ __u8 log2_max_pic_order_cnt_lsb_minus4; 632+ __u8 sps_max_dec_pic_buffering_minus1; 633+ __u8 sps_max_num_reorder_pics; 634+ __u8 sps_max_latency_increase_plus1; 635+ __u8 log2_min_luma_coding_block_size_minus3; 636+ __u8 log2_diff_max_min_luma_coding_block_size; 637+ __u8 log2_min_luma_transform_block_size_minus2; 638+ __u8 log2_diff_max_min_luma_transform_block_size; 639+ __u8 max_transform_hierarchy_depth_inter; 640+ __u8 max_transform_hierarchy_depth_intra; 641+ __u8 pcm_sample_bit_depth_luma_minus1; 642+ __u8 pcm_sample_bit_depth_chroma_minus1; 643+ __u8 log2_min_pcm_luma_coding_block_size_minus3; 644+ __u8 log2_diff_max_min_pcm_luma_coding_block_size; 645+ __u8 num_short_term_ref_pic_sets; 646+ __u8 num_long_term_ref_pics_sps; 647+ __u8 chroma_format_idc; 648+ __u8 sps_max_sub_layers_minus1; 649+ 650+ __u64 flags; 651+}; 652+ 653+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) 654+#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) 655+#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) 656+#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) 657+#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) 658+#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) 659+#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) 660+#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) 661+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) 662+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) 663+#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) 664+#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) 665+#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) 666+#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) 667+#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) 668+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) 669+#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) 670+#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) 671+#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) 672+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) 673+#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) 674+ 675+struct v4l2_ctrl_hevc_pps { 676+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ 677+ __u8 num_extra_slice_header_bits; 678+ __u8 num_ref_idx_l0_default_active_minus1; 679+ __u8 num_ref_idx_l1_default_active_minus1; 680+ __s8 init_qp_minus26; 681+ __u8 diff_cu_qp_delta_depth; 682+ __s8 pps_cb_qp_offset; 683+ __s8 pps_cr_qp_offset; 684+ __u8 num_tile_columns_minus1; 685+ __u8 num_tile_rows_minus1; 686+ __u8 column_width_minus1[20]; 687+ __u8 row_height_minus1[22]; 688+ __s8 pps_beta_offset_div2; 689+ __s8 pps_tc_offset_div2; 690+ __u8 log2_parallel_merge_level_minus2; 691+ 692+ __u8 padding[4]; 693+ __u64 flags; 694+}; 695+ 696+#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE 0x01 697+#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER 0x02 698+#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR 0x03 699+ 700+#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 701+ 702+struct v4l2_hevc_dpb_entry { 703+ __u64 timestamp; 704+ __u8 rps; 705+ __u8 field_pic; 706+ __u16 pic_order_cnt[2]; 707+ __u8 padding[2]; 708+}; 709+ 710+struct v4l2_hevc_pred_weight_table { 711+ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 712+ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 713+ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 714+ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 715+ 716+ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 717+ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 718+ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 719+ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 720+ 721+ __u8 padding[6]; 722+ 723+ __u8 luma_log2_weight_denom; 724+ __s8 delta_chroma_log2_weight_denom; 725+}; 726+ 727+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) 728+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) 729+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) 730+#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) 731+#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) 732+#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) 733+#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) 734+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) 735+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) 736+#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) 737+ 738+struct v4l2_ctrl_hevc_slice_params { 739+ __u32 bit_size; 740+ __u32 data_bit_offset; 741+ 742+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 743+ __u32 slice_segment_addr; 744+ __u32 num_entry_point_offsets; 745+ 746+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ 747+ __u8 nal_unit_type; 748+ __u8 nuh_temporal_id_plus1; 749+ 750+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 751+ __u8 slice_type; 752+ __u8 colour_plane_id; 753+ __u16 slice_pic_order_cnt; 754+ __u8 num_ref_idx_l0_active_minus1; 755+ __u8 num_ref_idx_l1_active_minus1; 756+ __u8 collocated_ref_idx; 757+ __u8 five_minus_max_num_merge_cand; 758+ __s8 slice_qp_delta; 759+ __s8 slice_cb_qp_offset; 760+ __s8 slice_cr_qp_offset; 761+ __s8 slice_act_y_qp_offset; 762+ __s8 slice_act_cb_qp_offset; 763+ __s8 slice_act_cr_qp_offset; 764+ __s8 slice_beta_offset_div2; 765+ __s8 slice_tc_offset_div2; 766+ 767+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ 768+ __u8 pic_struct; 769+ 770+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 771+ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 772+ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 773+ 774+ __u8 padding[5]; 775+ 776+ __u32 entry_point_offset_minus1[256]; 777+ 778+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ 779+ struct v4l2_hevc_pred_weight_table pred_weight_table; 780+ 781+ __u64 flags; 782+}; 783+ 784+#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 785+#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 786+#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 787+ 788+struct v4l2_ctrl_hevc_decode_params { 789+ __s32 pic_order_cnt_val; 790+ __u8 num_active_dpb_entries; 791+ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 792+ __u8 num_poc_st_curr_before; 793+ __u8 num_poc_st_curr_after; 794+ __u8 num_poc_lt_curr; 795+ __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 796+ __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 797+ __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 798+ __u64 flags; 799+}; 800+ 801+/* MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */ 802+#define V4L2_CID_CODEC_HANTRO_BASE (V4L2_CTRL_CLASS_CODEC | 0x1200) 803+/* 804+ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP - 805+ * the number of data (in bits) to skip in the 806+ * slice segment header. 807+ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag" 808+ * to before syntax element "slice_temporal_mvp_enabled_flag". 809+ * If IDR, the skipped bits are just "pic_output_flag" 810+ * (separate_colour_plane_flag is not supported). 811+ */ 812+#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (V4L2_CID_CODEC_HANTRO_BASE + 0) 813+ 814+struct v4l2_ctrl_hevc_scaling_matrix { 815+ __u8 scaling_list_4x4[6][16]; 816+ __u8 scaling_list_8x8[6][64]; 817+ __u8 scaling_list_16x16[6][64]; 818+ __u8 scaling_list_32x32[2][64]; 819+ __u8 scaling_list_dc_coef_16x16[6]; 820+ __u8 scaling_list_dc_coef_32x32[2]; 821+}; 822+ 823+#endif 824--- /dev/null 825+++ b/libavcodec/hevc-ctrls-v3.h 826@@ -0,0 +1,255 @@ 827+/* SPDX-License-Identifier: GPL-2.0 */ 828+/* 829+ * These are the HEVC state controls for use with stateless HEVC 830+ * codec drivers. 831+ * 832+ * It turns out that these structs are not stable yet and will undergo 833+ * more changes. So keep them private until they are stable and ready to 834+ * become part of the official public API. 835+ */ 836+ 837+#ifndef _HEVC_CTRLS_H_ 838+#define _HEVC_CTRLS_H_ 839+ 840+#include <linux/videodev2.h> 841+ 842+/* The pixel format isn't stable at the moment and will likely be renamed. */ 843+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ 844+ 845+#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_CODEC_BASE + 1008) 846+#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_CODEC_BASE + 1009) 847+#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_BASE + 1010) 848+#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_BASE + 1011) 849+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_BASE + 1012) 850+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE (V4L2_CID_CODEC_BASE + 1015) 851+#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE (V4L2_CID_CODEC_BASE + 1016) 852+ 853+/* enum v4l2_ctrl_type type values */ 854+#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120 855+#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121 856+#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122 857+#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123 858+#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124 859+ 860+enum v4l2_mpeg_video_hevc_decode_mode { 861+ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, 862+ V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED, 863+}; 864+ 865+enum v4l2_mpeg_video_hevc_start_code { 866+ V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, 867+ V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B, 868+}; 869+ 870+#define V4L2_HEVC_SLICE_TYPE_B 0 871+#define V4L2_HEVC_SLICE_TYPE_P 1 872+#define V4L2_HEVC_SLICE_TYPE_I 2 873+ 874+#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) 875+#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) 876+#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) 877+#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) 878+#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) 879+#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) 880+#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) 881+#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) 882+#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) 883+ 884+/* The controls are not stable at the moment and will likely be reworked. */ 885+struct v4l2_ctrl_hevc_sps { 886+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ 887+ __u16 pic_width_in_luma_samples; 888+ __u16 pic_height_in_luma_samples; 889+ __u8 bit_depth_luma_minus8; 890+ __u8 bit_depth_chroma_minus8; 891+ __u8 log2_max_pic_order_cnt_lsb_minus4; 892+ __u8 sps_max_dec_pic_buffering_minus1; 893+ __u8 sps_max_num_reorder_pics; 894+ __u8 sps_max_latency_increase_plus1; 895+ __u8 log2_min_luma_coding_block_size_minus3; 896+ __u8 log2_diff_max_min_luma_coding_block_size; 897+ __u8 log2_min_luma_transform_block_size_minus2; 898+ __u8 log2_diff_max_min_luma_transform_block_size; 899+ __u8 max_transform_hierarchy_depth_inter; 900+ __u8 max_transform_hierarchy_depth_intra; 901+ __u8 pcm_sample_bit_depth_luma_minus1; 902+ __u8 pcm_sample_bit_depth_chroma_minus1; 903+ __u8 log2_min_pcm_luma_coding_block_size_minus3; 904+ __u8 log2_diff_max_min_pcm_luma_coding_block_size; 905+ __u8 num_short_term_ref_pic_sets; 906+ __u8 num_long_term_ref_pics_sps; 907+ __u8 chroma_format_idc; 908+ __u8 sps_max_sub_layers_minus1; 909+ 910+ __u64 flags; 911+}; 912+ 913+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) 914+#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) 915+#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) 916+#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) 917+#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) 918+#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) 919+#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) 920+#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) 921+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) 922+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) 923+#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) 924+#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) 925+#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) 926+#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) 927+#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) 928+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) 929+#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) 930+#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) 931+#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) 932+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) 933+#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) 934+ 935+struct v4l2_ctrl_hevc_pps { 936+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ 937+ __u8 num_extra_slice_header_bits; 938+ __u8 num_ref_idx_l0_default_active_minus1; 939+ __u8 num_ref_idx_l1_default_active_minus1; 940+ __s8 init_qp_minus26; 941+ __u8 diff_cu_qp_delta_depth; 942+ __s8 pps_cb_qp_offset; 943+ __s8 pps_cr_qp_offset; 944+ __u8 num_tile_columns_minus1; 945+ __u8 num_tile_rows_minus1; 946+ __u8 column_width_minus1[20]; 947+ __u8 row_height_minus1[22]; 948+ __s8 pps_beta_offset_div2; 949+ __s8 pps_tc_offset_div2; 950+ __u8 log2_parallel_merge_level_minus2; 951+ 952+ __u8 padding[4]; 953+ __u64 flags; 954+}; 955+ 956+#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01 957+ 958+#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 959+ 960+struct v4l2_hevc_dpb_entry { 961+ __u64 timestamp; 962+ __u8 flags; 963+ __u8 field_pic; 964+ __u16 pic_order_cnt[2]; 965+ __u8 padding[2]; 966+}; 967+ 968+struct v4l2_hevc_pred_weight_table { 969+ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 970+ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 971+ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 972+ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 973+ 974+ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 975+ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 976+ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 977+ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 978+ 979+ __u8 padding[6]; 980+ 981+ __u8 luma_log2_weight_denom; 982+ __s8 delta_chroma_log2_weight_denom; 983+}; 984+ 985+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) 986+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) 987+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) 988+#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) 989+#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) 990+#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) 991+#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) 992+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) 993+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) 994+#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) 995+ 996+struct v4l2_ctrl_hevc_slice_params { 997+ __u32 bit_size; 998+ __u32 data_bit_offset; 999+ 1000+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 1001+ __u32 slice_segment_addr; 1002+ __u32 num_entry_point_offsets; 1003+ 1004+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ 1005+ __u8 nal_unit_type; 1006+ __u8 nuh_temporal_id_plus1; 1007+ 1008+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 1009+ __u8 slice_type; 1010+ __u8 colour_plane_id; 1011+ __u16 slice_pic_order_cnt; 1012+ __u8 num_ref_idx_l0_active_minus1; 1013+ __u8 num_ref_idx_l1_active_minus1; 1014+ __u8 collocated_ref_idx; 1015+ __u8 five_minus_max_num_merge_cand; 1016+ __s8 slice_qp_delta; 1017+ __s8 slice_cb_qp_offset; 1018+ __s8 slice_cr_qp_offset; 1019+ __s8 slice_act_y_qp_offset; 1020+ __s8 slice_act_cb_qp_offset; 1021+ __s8 slice_act_cr_qp_offset; 1022+ __s8 slice_beta_offset_div2; 1023+ __s8 slice_tc_offset_div2; 1024+ 1025+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ 1026+ __u8 pic_struct; 1027+ 1028+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 1029+ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1030+ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1031+ 1032+ __u8 padding[5]; 1033+ 1034+ __u32 entry_point_offset_minus1[256]; 1035+ 1036+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ 1037+ struct v4l2_hevc_pred_weight_table pred_weight_table; 1038+ 1039+ __u64 flags; 1040+}; 1041+ 1042+#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 1043+#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 1044+#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 1045+ 1046+struct v4l2_ctrl_hevc_decode_params { 1047+ __s32 pic_order_cnt_val; 1048+ __u8 num_active_dpb_entries; 1049+ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1050+ __u8 num_poc_st_curr_before; 1051+ __u8 num_poc_st_curr_after; 1052+ __u8 num_poc_lt_curr; 1053+ __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1054+ __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1055+ __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1056+ __u64 flags; 1057+}; 1058+ 1059+struct v4l2_ctrl_hevc_scaling_matrix { 1060+ __u8 scaling_list_4x4[6][16]; 1061+ __u8 scaling_list_8x8[6][64]; 1062+ __u8 scaling_list_16x16[6][64]; 1063+ __u8 scaling_list_32x32[2][64]; 1064+ __u8 scaling_list_dc_coef_16x16[6]; 1065+ __u8 scaling_list_dc_coef_32x32[2]; 1066+}; 1067+ 1068+/* MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */ 1069+#define V4L2_CID_CODEC_HANTRO_BASE (V4L2_CTRL_CLASS_CODEC | 0x1200) 1070+/* 1071+ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP - 1072+ * the number of data (in bits) to skip in the 1073+ * slice segment header. 1074+ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag" 1075+ * to before syntax element "slice_temporal_mvp_enabled_flag". 1076+ * If IDR, the skipped bits are just "pic_output_flag" 1077+ * (separate_colour_plane_flag is not supported). 1078+ */ 1079+#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (V4L2_CID_CODEC_HANTRO_BASE + 0) 1080+ 1081+#endif 1082--- /dev/null 1083+++ b/libavcodec/hevc-ctrls-v4.h 1084@@ -0,0 +1,524 @@ 1085+/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */ 1086+/* 1087+ * Video for Linux Two controls header file 1088+ * 1089+ * Copyright (C) 1999-2012 the contributors 1090+ * 1091+ * This program is free software; you can redistribute it and/or modify 1092+ * it under the terms of the GNU General Public License as published by 1093+ * the Free Software Foundation; either version 2 of the License, or 1094+ * (at your option) any later version. 1095+ * 1096+ * This program is distributed in the hope that it will be useful, 1097+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 1098+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1099+ * GNU General Public License for more details. 1100+ * 1101+ * Alternatively you can redistribute this file under the terms of the 1102+ * BSD license as stated below: 1103+ * 1104+ * Redistribution and use in source and binary forms, with or without 1105+ * modification, are permitted provided that the following conditions 1106+ * are met: 1107+ * 1. Redistributions of source code must retain the above copyright 1108+ * notice, this list of conditions and the following disclaimer. 1109+ * 2. Redistributions in binary form must reproduce the above copyright 1110+ * notice, this list of conditions and the following disclaimer in 1111+ * the documentation and/or other materials provided with the 1112+ * distribution. 1113+ * 3. The names of its contributors may not be used to endorse or promote 1114+ * products derived from this software without specific prior written 1115+ * permission. 1116+ * 1117+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1118+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1119+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1120+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1121+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1122+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 1123+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 1124+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 1125+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 1126+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 1127+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1128+ * 1129+ * The contents of this header was split off from videodev2.h. All control 1130+ * definitions should be added to this header, which is included by 1131+ * videodev2.h. 1132+ */ 1133+ 1134+#ifndef AVCODEC_HEVC_CTRLS_V4_H 1135+#define AVCODEC_HEVC_CTRLS_V4_H 1136+ 1137+#include <linux/const.h> 1138+#include <linux/types.h> 1139+ 1140+#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS 1141+#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000 /* Stateless codecs controls */ 1142+#endif 1143+#ifndef V4L2_CID_CODEC_STATELESS_BASE 1144+#define V4L2_CID_CODEC_STATELESS_BASE (V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900) 1145+#endif 1146+ 1147+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ 1148+ 1149+#define V4L2_CID_STATELESS_HEVC_SPS (V4L2_CID_CODEC_STATELESS_BASE + 400) 1150+#define V4L2_CID_STATELESS_HEVC_PPS (V4L2_CID_CODEC_STATELESS_BASE + 401) 1151+#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 402) 1152+#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_STATELESS_BASE + 403) 1153+#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 404) 1154+#define V4L2_CID_STATELESS_HEVC_DECODE_MODE (V4L2_CID_CODEC_STATELESS_BASE + 405) 1155+#define V4L2_CID_STATELESS_HEVC_START_CODE (V4L2_CID_CODEC_STATELESS_BASE + 406) 1156+#define V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS (V4L2_CID_CODEC_STATELESS_BASE + 407) 1157+ 1158+enum v4l2_stateless_hevc_decode_mode { 1159+ V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED, 1160+ V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, 1161+}; 1162+ 1163+enum v4l2_stateless_hevc_start_code { 1164+ V4L2_STATELESS_HEVC_START_CODE_NONE, 1165+ V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, 1166+}; 1167+ 1168+#define V4L2_HEVC_SLICE_TYPE_B 0 1169+#define V4L2_HEVC_SLICE_TYPE_P 1 1170+#define V4L2_HEVC_SLICE_TYPE_I 2 1171+ 1172+#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) 1173+#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) 1174+#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) 1175+#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) 1176+#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) 1177+#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) 1178+#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) 1179+#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) 1180+#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) 1181+ 1182+/** 1183+ * struct v4l2_ctrl_hevc_sps - ITU-T Rec. H.265: Sequence parameter set 1184+ * 1185+ * @video_parameter_set_id: specifies the value of the 1186+ * vps_video_parameter_set_id of the active VPS 1187+ * @seq_parameter_set_id: provides an identifier for the SPS for 1188+ * reference by other syntax elements 1189+ * @pic_width_in_luma_samples: specifies the width of each decoded picture 1190+ * in units of luma samples 1191+ * @pic_height_in_luma_samples: specifies the height of each decoded picture 1192+ * in units of luma samples 1193+ * @bit_depth_luma_minus8: this value plus 8specifies the bit depth of the 1194+ * samples of the luma array 1195+ * @bit_depth_chroma_minus8: this value plus 8 specifies the bit depth of the 1196+ * samples of the chroma arrays 1197+ * @log2_max_pic_order_cnt_lsb_minus4: this value plus 4 specifies the value of 1198+ * the variable MaxPicOrderCntLsb 1199+ * @sps_max_dec_pic_buffering_minus1: this value plus 1 specifies the maximum 1200+ * required size of the decoded picture 1201+ * buffer for the codec video sequence 1202+ * @sps_max_num_reorder_pics: indicates the maximum allowed number of pictures 1203+ * @sps_max_latency_increase_plus1: not equal to 0 is used to compute the 1204+ * value of SpsMaxLatencyPictures array 1205+ * @log2_min_luma_coding_block_size_minus3: plus 3 specifies the minimum 1206+ * luma coding block size 1207+ * @log2_diff_max_min_luma_coding_block_size: specifies the difference between 1208+ * the maximum and minimum luma 1209+ * coding block size 1210+ * @log2_min_luma_transform_block_size_minus2: plus 2 specifies the minimum luma 1211+ * transform block size 1212+ * @log2_diff_max_min_luma_transform_block_size: specifies the difference between 1213+ * the maximum and minimum luma 1214+ * transform block size 1215+ * @max_transform_hierarchy_depth_inter: specifies the maximum hierarchy 1216+ * depth for transform units of 1217+ * coding units coded in inter 1218+ * prediction mode 1219+ * @max_transform_hierarchy_depth_intra: specifies the maximum hierarchy 1220+ * depth for transform units of 1221+ * coding units coded in intra 1222+ * prediction mode 1223+ * @pcm_sample_bit_depth_luma_minus1: this value plus 1 specifies the number of 1224+ * bits used to represent each of PCM sample 1225+ * values of the luma component 1226+ * @pcm_sample_bit_depth_chroma_minus1: this value plus 1 specifies the number 1227+ * of bits used to represent each of PCM 1228+ * sample values of the chroma components 1229+ * @log2_min_pcm_luma_coding_block_size_minus3: this value plus 3 specifies the 1230+ * minimum size of coding blocks 1231+ * @log2_diff_max_min_pcm_luma_coding_block_size: specifies the difference between 1232+ * the maximum and minimum size of 1233+ * coding blocks 1234+ * @num_short_term_ref_pic_sets: specifies the number of st_ref_pic_set() 1235+ * syntax structures included in the SPS 1236+ * @num_long_term_ref_pics_sps: specifies the number of candidate long-term 1237+ * reference pictures that are specified in the SPS 1238+ * @chroma_format_idc: specifies the chroma sampling 1239+ * @sps_max_sub_layers_minus1: this value plus 1 specifies the maximum number 1240+ * of temporal sub-layers 1241+ * @reserved: padding field. Should be zeroed by applications. 1242+ * @flags: see V4L2_HEVC_SPS_FLAG_{} 1243+ */ 1244+struct v4l2_ctrl_hevc_sps { 1245+ __u8 video_parameter_set_id; 1246+ __u8 seq_parameter_set_id; 1247+ __u16 pic_width_in_luma_samples; 1248+ __u16 pic_height_in_luma_samples; 1249+ __u8 bit_depth_luma_minus8; 1250+ __u8 bit_depth_chroma_minus8; 1251+ __u8 log2_max_pic_order_cnt_lsb_minus4; 1252+ __u8 sps_max_dec_pic_buffering_minus1; 1253+ __u8 sps_max_num_reorder_pics; 1254+ __u8 sps_max_latency_increase_plus1; 1255+ __u8 log2_min_luma_coding_block_size_minus3; 1256+ __u8 log2_diff_max_min_luma_coding_block_size; 1257+ __u8 log2_min_luma_transform_block_size_minus2; 1258+ __u8 log2_diff_max_min_luma_transform_block_size; 1259+ __u8 max_transform_hierarchy_depth_inter; 1260+ __u8 max_transform_hierarchy_depth_intra; 1261+ __u8 pcm_sample_bit_depth_luma_minus1; 1262+ __u8 pcm_sample_bit_depth_chroma_minus1; 1263+ __u8 log2_min_pcm_luma_coding_block_size_minus3; 1264+ __u8 log2_diff_max_min_pcm_luma_coding_block_size; 1265+ __u8 num_short_term_ref_pic_sets; 1266+ __u8 num_long_term_ref_pics_sps; 1267+ __u8 chroma_format_idc; 1268+ __u8 sps_max_sub_layers_minus1; 1269+ 1270+ __u8 reserved[6]; 1271+ __u64 flags; 1272+}; 1273+ 1274+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) 1275+#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) 1276+#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) 1277+#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) 1278+#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) 1279+#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) 1280+#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) 1281+#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) 1282+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) 1283+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) 1284+#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) 1285+#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) 1286+#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) 1287+#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) 1288+#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) 1289+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) 1290+#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) 1291+#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) 1292+#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) 1293+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) 1294+#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) 1295+ 1296+/** 1297+ * struct v4l2_ctrl_hevc_pps - ITU-T Rec. H.265: Picture parameter set 1298+ * 1299+ * @pic_parameter_set_id: identifies the PPS for reference by other 1300+ * syntax elements 1301+ * @num_extra_slice_header_bits: specifies the number of extra slice header 1302+ * bits that are present in the slice header RBSP 1303+ * for coded pictures referring to the PPS. 1304+ * @num_ref_idx_l0_default_active_minus1: this value plus 1 specifies the 1305+ * inferred value of num_ref_idx_l0_active_minus1 1306+ * @num_ref_idx_l1_default_active_minus1: this value plus 1 specifies the 1307+ * inferred value of num_ref_idx_l1_active_minus1 1308+ * @init_qp_minus26: this value plus 26 specifies the initial value of SliceQp Y for 1309+ * each slice referring to the PPS 1310+ * @diff_cu_qp_delta_depth: specifies the difference between the luma coding 1311+ * tree block size and the minimum luma coding block 1312+ * size of coding units that convey cu_qp_delta_abs 1313+ * and cu_qp_delta_sign_flag 1314+ * @pps_cb_qp_offset: specify the offsets to the luma quantization parameter Cb 1315+ * @pps_cr_qp_offset: specify the offsets to the luma quantization parameter Cr 1316+ * @num_tile_columns_minus1: this value plus 1 specifies the number of tile columns 1317+ * partitioning the picture 1318+ * @num_tile_rows_minus1: this value plus 1 specifies the number of tile rows partitioning 1319+ * the picture 1320+ * @column_width_minus1: this value plus 1 specifies the width of the each tile column in 1321+ * units of coding tree blocks 1322+ * @row_height_minus1: this value plus 1 specifies the height of the each tile row in 1323+ * units of coding tree blocks 1324+ * @pps_beta_offset_div2: specify the default deblocking parameter offsets for 1325+ * beta divided by 2 1326+ * @pps_tc_offset_div2: specify the default deblocking parameter offsets for tC 1327+ * divided by 2 1328+ * @log2_parallel_merge_level_minus2: this value plus 2 specifies the value of 1329+ * the variable Log2ParMrgLevel 1330+ * @reserved: padding field. Should be zeroed by applications. 1331+ * @flags: see V4L2_HEVC_PPS_FLAG_{} 1332+ */ 1333+struct v4l2_ctrl_hevc_pps { 1334+ __u8 pic_parameter_set_id; 1335+ __u8 num_extra_slice_header_bits; 1336+ __u8 num_ref_idx_l0_default_active_minus1; 1337+ __u8 num_ref_idx_l1_default_active_minus1; 1338+ __s8 init_qp_minus26; 1339+ __u8 diff_cu_qp_delta_depth; 1340+ __s8 pps_cb_qp_offset; 1341+ __s8 pps_cr_qp_offset; 1342+ __u8 num_tile_columns_minus1; 1343+ __u8 num_tile_rows_minus1; 1344+ __u8 column_width_minus1[20]; 1345+ __u8 row_height_minus1[22]; 1346+ __s8 pps_beta_offset_div2; 1347+ __s8 pps_tc_offset_div2; 1348+ __u8 log2_parallel_merge_level_minus2; 1349+ __u8 reserved; 1350+ __u64 flags; 1351+}; 1352+ 1353+#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01 1354+ 1355+#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME 0 1356+#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_FIELD 1 1357+#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_FIELD 2 1358+#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM 3 1359+#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP 4 1360+#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM_TOP 5 1361+#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM 6 1362+#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING 7 1363+#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING 8 1364+#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_PREVIOUS_BOTTOM 9 1365+#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_PREVIOUS_TOP 10 1366+#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_NEXT_BOTTOM 11 1367+#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_NEXT_TOP 12 1368+ 1369+#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 1370+ 1371+/** 1372+ * struct v4l2_hevc_dpb_entry - HEVC decoded picture buffer entry 1373+ * 1374+ * @timestamp: timestamp of the V4L2 capture buffer to use as reference. 1375+ * @flags: long term flag for the reference frame 1376+ * @field_pic: whether the reference is a field picture or a frame. 1377+ * @reserved: padding field. Should be zeroed by applications. 1378+ * @pic_order_cnt_val: the picture order count of the current picture. 1379+ */ 1380+struct v4l2_hevc_dpb_entry { 1381+ __u64 timestamp; 1382+ __u8 flags; 1383+ __u8 field_pic; 1384+ __u16 reserved; 1385+ __s32 pic_order_cnt_val; 1386+}; 1387+ 1388+/** 1389+ * struct v4l2_hevc_pred_weight_table - HEVC weighted prediction parameters 1390+ * 1391+ * @delta_luma_weight_l0: the difference of the weighting factor applied 1392+ * to the luma prediction value for list 0 1393+ * @luma_offset_l0: the additive offset applied to the luma prediction value 1394+ * for list 0 1395+ * @delta_chroma_weight_l0: the difference of the weighting factor applied 1396+ * to the chroma prediction values for list 0 1397+ * @chroma_offset_l0: the difference of the additive offset applied to 1398+ * the chroma prediction values for list 0 1399+ * @delta_luma_weight_l1: the difference of the weighting factor applied 1400+ * to the luma prediction value for list 1 1401+ * @luma_offset_l1: the additive offset applied to the luma prediction value 1402+ * for list 1 1403+ * @delta_chroma_weight_l1: the difference of the weighting factor applied 1404+ * to the chroma prediction values for list 1 1405+ * @chroma_offset_l1: the difference of the additive offset applied to 1406+ * the chroma prediction values for list 1 1407+ * @luma_log2_weight_denom: the base 2 logarithm of the denominator for 1408+ * all luma weighting factors 1409+ * @delta_chroma_log2_weight_denom: the difference of the base 2 logarithm 1410+ * of the denominator for all chroma 1411+ * weighting factors 1412+ */ 1413+struct v4l2_hevc_pred_weight_table { 1414+ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1415+ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1416+ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 1417+ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 1418+ 1419+ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1420+ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1421+ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 1422+ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; 1423+ 1424+ __u8 luma_log2_weight_denom; 1425+ __s8 delta_chroma_log2_weight_denom; 1426+}; 1427+ 1428+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) 1429+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) 1430+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) 1431+#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) 1432+#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) 1433+#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) 1434+#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) 1435+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) 1436+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) 1437+#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) 1438+ 1439+/** 1440+ * struct v4l2_ctrl_hevc_slice_params - HEVC slice parameters 1441+ * 1442+ * This control is a dynamically sized 1-dimensional array, 1443+ * V4L2_CTRL_FLAG_DYNAMIC_ARRAY flag must be set when using it. 1444+ * 1445+ * @bit_size: size (in bits) of the current slice data 1446+ * @data_byte_offset: offset (in bytes) to the video data in the current slice data 1447+ * @num_entry_point_offsets: specifies the number of entry point offset syntax 1448+ * elements in the slice header. 1449+ * @nal_unit_type: specifies the coding type of the slice (B, P or I) 1450+ * @nuh_temporal_id_plus1: minus 1 specifies a temporal identifier for the NAL unit 1451+ * @slice_type: see V4L2_HEVC_SLICE_TYPE_{} 1452+ * @colour_plane_id: specifies the colour plane associated with the current slice 1453+ * @slice_pic_order_cnt: specifies the picture order count 1454+ * @num_ref_idx_l0_active_minus1: this value plus 1 specifies the maximum 1455+ * reference index for reference picture list 0 1456+ * that may be used to decode the slice 1457+ * @num_ref_idx_l1_active_minus1: this value plus 1 specifies the maximum 1458+ * reference index for reference picture list 1 1459+ * that may be used to decode the slice 1460+ * @collocated_ref_idx: specifies the reference index of the collocated picture used 1461+ * for temporal motion vector prediction 1462+ * @five_minus_max_num_merge_cand: specifies the maximum number of merging 1463+ * motion vector prediction candidates supported in 1464+ * the slice subtracted from 5 1465+ * @slice_qp_delta: specifies the initial value of QpY to be used for the coding 1466+ * blocks in the slice 1467+ * @slice_cb_qp_offset: specifies a difference to be added to the value of pps_cb_qp_offset 1468+ * @slice_cr_qp_offset: specifies a difference to be added to the value of pps_cr_qp_offset 1469+ * @slice_act_y_qp_offset: screen content extension parameters 1470+ * @slice_act_cb_qp_offset: screen content extension parameters 1471+ * @slice_act_cr_qp_offset: screen content extension parameters 1472+ * @slice_beta_offset_div2: specify the deblocking parameter offsets for beta divided by 2 1473+ * @slice_tc_offset_div2: specify the deblocking parameter offsets for tC divided by 2 1474+ * @pic_struct: indicates whether a picture should be displayed as a frame or as one or 1475+ * more fields 1476+ * @reserved0: padding field. Should be zeroed by applications. 1477+ * @slice_segment_addr: specifies the address of the first coding tree block in 1478+ * the slice segment 1479+ * @ref_idx_l0: the list of L0 reference elements as indices in the DPB 1480+ * @ref_idx_l1: the list of L1 reference elements as indices in the DPB 1481+ * @short_term_ref_pic_set_size: specifies the size of short-term reference 1482+ * pictures set included in the SPS 1483+ * @long_term_ref_pic_set_size: specifies the size of long-term reference 1484+ * pictures set include in the SPS 1485+ * @pred_weight_table: the prediction weight coefficients for inter-picture 1486+ * prediction 1487+ * @reserved1: padding field. Should be zeroed by applications. 1488+ * @flags: see V4L2_HEVC_SLICE_PARAMS_FLAG_{} 1489+ */ 1490+struct v4l2_ctrl_hevc_slice_params { 1491+ __u32 bit_size; 1492+ __u32 data_byte_offset; 1493+ __u32 num_entry_point_offsets; 1494+ 1495+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ 1496+ __u8 nal_unit_type; 1497+ __u8 nuh_temporal_id_plus1; 1498+ 1499+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 1500+ __u8 slice_type; 1501+ __u8 colour_plane_id; 1502+ __s32 slice_pic_order_cnt; 1503+ __u8 num_ref_idx_l0_active_minus1; 1504+ __u8 num_ref_idx_l1_active_minus1; 1505+ __u8 collocated_ref_idx; 1506+ __u8 five_minus_max_num_merge_cand; 1507+ __s8 slice_qp_delta; 1508+ __s8 slice_cb_qp_offset; 1509+ __s8 slice_cr_qp_offset; 1510+ __s8 slice_act_y_qp_offset; 1511+ __s8 slice_act_cb_qp_offset; 1512+ __s8 slice_act_cr_qp_offset; 1513+ __s8 slice_beta_offset_div2; 1514+ __s8 slice_tc_offset_div2; 1515+ 1516+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ 1517+ __u8 pic_struct; 1518+ 1519+ __u8 reserved0[3]; 1520+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 1521+ __u32 slice_segment_addr; 1522+ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1523+ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1524+ __u16 short_term_ref_pic_set_size; 1525+ __u16 long_term_ref_pic_set_size; 1526+ 1527+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ 1528+ struct v4l2_hevc_pred_weight_table pred_weight_table; 1529+ 1530+ __u8 reserved1[2]; 1531+ __u64 flags; 1532+}; 1533+ 1534+#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 1535+#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 1536+#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 1537+ 1538+/** 1539+ * struct v4l2_ctrl_hevc_decode_params - HEVC decode parameters 1540+ * 1541+ * @pic_order_cnt_val: picture order count 1542+ * @short_term_ref_pic_set_size: specifies the size of short-term reference 1543+ * pictures set included in the SPS of the first slice 1544+ * @long_term_ref_pic_set_size: specifies the size of long-term reference 1545+ * pictures set include in the SPS of the first slice 1546+ * @num_active_dpb_entries: the number of entries in dpb 1547+ * @num_poc_st_curr_before: the number of reference pictures in the short-term 1548+ * set that come before the current frame 1549+ * @num_poc_st_curr_after: the number of reference pictures in the short-term 1550+ * set that come after the current frame 1551+ * @num_poc_lt_curr: the number of reference pictures in the long-term set 1552+ * @poc_st_curr_before: provides the index of the short term before references 1553+ * in DPB array 1554+ * @poc_st_curr_after: provides the index of the short term after references 1555+ * in DPB array 1556+ * @poc_lt_curr: provides the index of the long term references in DPB array 1557+ * @reserved: padding field. Should be zeroed by applications. 1558+ * @dpb: the decoded picture buffer, for meta-data about reference frames 1559+ * @flags: see V4L2_HEVC_DECODE_PARAM_FLAG_{} 1560+ */ 1561+struct v4l2_ctrl_hevc_decode_params { 1562+ __s32 pic_order_cnt_val; 1563+ __u16 short_term_ref_pic_set_size; 1564+ __u16 long_term_ref_pic_set_size; 1565+ __u8 num_active_dpb_entries; 1566+ __u8 num_poc_st_curr_before; 1567+ __u8 num_poc_st_curr_after; 1568+ __u8 num_poc_lt_curr; 1569+ __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1570+ __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1571+ __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1572+ __u8 reserved[4]; 1573+ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; 1574+ __u64 flags; 1575+}; 1576+ 1577+/** 1578+ * struct v4l2_ctrl_hevc_scaling_matrix - HEVC scaling lists parameters 1579+ * 1580+ * @scaling_list_4x4: scaling list is used for the scaling process for 1581+ * transform coefficients. The values on each scaling 1582+ * list are expected in raster scan order 1583+ * @scaling_list_8x8: scaling list is used for the scaling process for 1584+ * transform coefficients. The values on each scaling 1585+ * list are expected in raster scan order 1586+ * @scaling_list_16x16: scaling list is used for the scaling process for 1587+ * transform coefficients. The values on each scaling 1588+ * list are expected in raster scan order 1589+ * @scaling_list_32x32: scaling list is used for the scaling process for 1590+ * transform coefficients. The values on each scaling 1591+ * list are expected in raster scan order 1592+ * @scaling_list_dc_coef_16x16: scaling list is used for the scaling process 1593+ * for transform coefficients. The values on each 1594+ * scaling list are expected in raster scan order. 1595+ * @scaling_list_dc_coef_32x32: scaling list is used for the scaling process 1596+ * for transform coefficients. The values on each 1597+ * scaling list are expected in raster scan order. 1598+ */ 1599+struct v4l2_ctrl_hevc_scaling_matrix { 1600+ __u8 scaling_list_4x4[6][16]; 1601+ __u8 scaling_list_8x8[6][64]; 1602+ __u8 scaling_list_16x16[6][64]; 1603+ __u8 scaling_list_32x32[2][64]; 1604+ __u8 scaling_list_dc_coef_16x16[6]; 1605+ __u8 scaling_list_dc_coef_32x32[2]; 1606+}; 1607+ 1608+#endif 1609--- a/libavcodec/hevc_parser.c 1610+++ b/libavcodec/hevc_parser.c 1611@@ -97,6 +97,19 @@ static int hevc_parse_slice_header(AVCod 1612 avctx->profile = ps->sps->ptl.general_ptl.profile_idc; 1613 avctx->level = ps->sps->ptl.general_ptl.level_idc; 1614 1615+ if (ps->sps->chroma_format_idc == 1) { 1616+ avctx->chroma_sample_location = ps->sps->vui.chroma_loc_info_present_flag ? 1617+ ps->sps->vui.chroma_sample_loc_type_top_field + 1 : 1618+ AVCHROMA_LOC_LEFT; 1619+ } 1620+ else if (ps->sps->chroma_format_idc == 2 || 1621+ ps->sps->chroma_format_idc == 3) { 1622+ avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;; 1623+ } 1624+ else { 1625+ avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED; 1626+ } 1627+ 1628 if (ps->vps->vps_timing_info_present_flag) { 1629 num = ps->vps->vps_num_units_in_tick; 1630 den = ps->vps->vps_time_scale; 1631--- a/libavcodec/hevc_refs.c 1632+++ b/libavcodec/hevc_refs.c 1633@@ -98,18 +98,22 @@ static HEVCFrame *alloc_frame(HEVCContex 1634 if (!frame->rpl_buf) 1635 goto fail; 1636 1637- frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); 1638- if (!frame->tab_mvf_buf) 1639- goto fail; 1640- frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; 1641+ if (s->tab_mvf_pool) { 1642+ frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); 1643+ if (!frame->tab_mvf_buf) 1644+ goto fail; 1645+ frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; 1646+ } 1647 1648- frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); 1649- if (!frame->rpl_tab_buf) 1650- goto fail; 1651- frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; 1652- frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; 1653- for (j = 0; j < frame->ctb_count; j++) 1654- frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; 1655+ if (s->rpl_tab_pool) { 1656+ frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); 1657+ if (!frame->rpl_tab_buf) 1658+ goto fail; 1659+ frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; 1660+ frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; 1661+ for (j = 0; j < frame->ctb_count; j++) 1662+ frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; 1663+ } 1664 1665 frame->frame->top_field_first = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD; 1666 frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD); 1667@@ -284,14 +288,17 @@ static int init_slice_rpl(HEVCContext *s 1668 int ctb_count = frame->ctb_count; 1669 int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr]; 1670 int i; 1671+ RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; 1672 1673 if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab)) 1674 return AVERROR_INVALIDDATA; 1675 1676- for (i = ctb_addr_ts; i < ctb_count; i++) 1677- frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; 1678+ if (frame->rpl_tab) { 1679+ for (i = ctb_addr_ts; i < ctb_count; i++) 1680+ frame->rpl_tab[i] = tab; 1681+ } 1682 1683- frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts]; 1684+ frame->refPicList = tab->refPicList; 1685 1686 return 0; 1687 } 1688--- a/libavcodec/hevcdec.c 1689+++ b/libavcodec/hevcdec.c 1690@@ -340,6 +340,19 @@ static void export_stream_params(HEVCCon 1691 1692 ff_set_sar(avctx, sps->vui.sar); 1693 1694+ if (sps->chroma_format_idc == 1) { 1695+ avctx->chroma_sample_location = sps->vui.chroma_loc_info_present_flag ? 1696+ sps->vui.chroma_sample_loc_type_top_field + 1 : 1697+ AVCHROMA_LOC_LEFT; 1698+ } 1699+ else if (sps->chroma_format_idc == 2 || 1700+ sps->chroma_format_idc == 3) { 1701+ avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;; 1702+ } 1703+ else { 1704+ avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED; 1705+ } 1706+ 1707 if (sps->vui.video_signal_type_present_flag) 1708 avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG 1709 : AVCOL_RANGE_MPEG; 1710@@ -402,6 +415,7 @@ static enum AVPixelFormat get_format(HEV 1711 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \ 1712 CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \ 1713 CONFIG_HEVC_NVDEC_HWACCEL + \ 1714+ CONFIG_HEVC_V4L2REQUEST_HWACCEL + \ 1715 CONFIG_HEVC_VAAPI_HWACCEL + \ 1716 CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \ 1717 CONFIG_HEVC_VDPAU_HWACCEL) 1718@@ -429,6 +443,9 @@ static enum AVPixelFormat get_format(HEV 1719 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 1720 *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; 1721 #endif 1722+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL 1723+ *fmt++ = AV_PIX_FMT_DRM_PRIME; 1724+#endif 1725 break; 1726 case AV_PIX_FMT_YUV420P10: 1727 #if CONFIG_HEVC_DXVA2_HWACCEL 1728@@ -450,6 +467,9 @@ static enum AVPixelFormat get_format(HEV 1729 #if CONFIG_HEVC_NVDEC_HWACCEL 1730 *fmt++ = AV_PIX_FMT_CUDA; 1731 #endif 1732+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL 1733+ *fmt++ = AV_PIX_FMT_DRM_PRIME; 1734+#endif 1735 break; 1736 case AV_PIX_FMT_YUV444P: 1737 #if CONFIG_HEVC_VDPAU_HWACCEL 1738@@ -504,6 +524,16 @@ static int set_sps(HEVCContext *s, const 1739 if (!sps) 1740 return 0; 1741 1742+ // If hwaccel then we don't need all the s/w decode helper arrays 1743+ if (s->avctx->hwaccel) { 1744+ export_stream_params(s, sps); 1745+ 1746+ s->avctx->pix_fmt = pix_fmt; 1747+ s->ps.sps = sps; 1748+ s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data; 1749+ return 0; 1750+ } 1751+ 1752 ret = pic_arrays_init(s, sps); 1753 if (ret < 0) 1754 goto fail; 1755@@ -3011,11 +3041,13 @@ static int hevc_frame_start(HEVCContext 1756 ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1); 1757 int ret; 1758 1759- memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); 1760- memset(s->vertical_bs, 0, s->bs_width * s->bs_height); 1761- memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); 1762- memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); 1763- memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); 1764+ if (s->horizontal_bs) { 1765+ memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); 1766+ memset(s->vertical_bs, 0, s->bs_width * s->bs_height); 1767+ memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); 1768+ memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); 1769+ memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); 1770+ } 1771 1772 s->is_decoded = 0; 1773 s->first_nal_type = s->nal_unit_type; 1774@@ -3507,8 +3539,13 @@ static int hevc_decode_frame(AVCodecCont 1775 1776 s->ref = NULL; 1777 ret = decode_nal_units(s, avpkt->data, avpkt->size); 1778- if (ret < 0) 1779+ if (ret < 0) { 1780+ // Ensure that hwaccel knows this frame is over 1781+ if (s->avctx->hwaccel && s->avctx->hwaccel->abort_frame) 1782+ s->avctx->hwaccel->abort_frame(s->avctx); 1783+ 1784 return ret; 1785+ } 1786 1787 if (avctx->hwaccel) { 1788 if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) { 1789@@ -3558,15 +3595,19 @@ static int hevc_ref_frame(HEVCContext *s 1790 dst->needs_fg = 1; 1791 } 1792 1793- dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); 1794- if (!dst->tab_mvf_buf) 1795- goto fail; 1796- dst->tab_mvf = src->tab_mvf; 1797+ if (src->tab_mvf_buf) { 1798+ dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); 1799+ if (!dst->tab_mvf_buf) 1800+ goto fail; 1801+ dst->tab_mvf = src->tab_mvf; 1802+ } 1803 1804- dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); 1805- if (!dst->rpl_tab_buf) 1806- goto fail; 1807- dst->rpl_tab = src->rpl_tab; 1808+ if (src->rpl_tab_buf) { 1809+ dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); 1810+ if (!dst->rpl_tab_buf) 1811+ goto fail; 1812+ dst->rpl_tab = src->rpl_tab; 1813+ } 1814 1815 dst->rpl_buf = av_buffer_ref(src->rpl_buf); 1816 if (!dst->rpl_buf) 1817@@ -3900,6 +3941,9 @@ const FFCodec ff_hevc_decoder = { 1818 #if CONFIG_HEVC_NVDEC_HWACCEL 1819 HWACCEL_NVDEC(hevc), 1820 #endif 1821+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL 1822+ HWACCEL_V4L2REQUEST(hevc), 1823+#endif 1824 #if CONFIG_HEVC_VAAPI_HWACCEL 1825 HWACCEL_VAAPI(hevc), 1826 #endif 1827--- a/libavcodec/hwaccels.h 1828+++ b/libavcodec/hwaccels.h 1829@@ -40,6 +40,7 @@ extern const AVHWAccel ff_hevc_d3d11va_h 1830 extern const AVHWAccel ff_hevc_d3d11va2_hwaccel; 1831 extern const AVHWAccel ff_hevc_dxva2_hwaccel; 1832 extern const AVHWAccel ff_hevc_nvdec_hwaccel; 1833+extern const AVHWAccel ff_hevc_v4l2request_hwaccel; 1834 extern const AVHWAccel ff_hevc_vaapi_hwaccel; 1835 extern const AVHWAccel ff_hevc_vdpau_hwaccel; 1836 extern const AVHWAccel ff_hevc_videotoolbox_hwaccel; 1837--- a/libavcodec/hwconfig.h 1838+++ b/libavcodec/hwconfig.h 1839@@ -24,6 +24,7 @@ 1840 1841 1842 #define HWACCEL_CAP_ASYNC_SAFE (1 << 0) 1843+#define HWACCEL_CAP_MT_SAFE (1 << 1) 1844 1845 1846 typedef struct AVCodecHWConfigInternal { 1847@@ -70,6 +71,8 @@ typedef struct AVCodecHWConfigInternal { 1848 HW_CONFIG_HWACCEL(1, 1, 0, D3D11, D3D11VA, ff_ ## codec ## _d3d11va2_hwaccel) 1849 #define HWACCEL_NVDEC(codec) \ 1850 HW_CONFIG_HWACCEL(1, 1, 0, CUDA, CUDA, ff_ ## codec ## _nvdec_hwaccel) 1851+#define HWACCEL_V4L2REQUEST(codec) \ 1852+ HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME, DRM, ff_ ## codec ## _v4l2request_hwaccel) 1853 #define HWACCEL_VAAPI(codec) \ 1854 HW_CONFIG_HWACCEL(1, 1, 1, VAAPI, VAAPI, ff_ ## codec ## _vaapi_hwaccel) 1855 #define HWACCEL_VDPAU(codec) \ 1856--- a/libavcodec/mmaldec.c 1857+++ b/libavcodec/mmaldec.c 1858@@ -24,6 +24,9 @@ 1859 * MMAL Video Decoder 1860 */ 1861 1862+#pragma GCC diagnostic push 1863+// Many many redundant decls in the header files 1864+#pragma GCC diagnostic ignored "-Wredundant-decls" 1865 #include <bcm_host.h> 1866 #include <interface/mmal/mmal.h> 1867 #include <interface/mmal/mmal_parameters_video.h> 1868@@ -31,6 +34,7 @@ 1869 #include <interface/mmal/util/mmal_util_params.h> 1870 #include <interface/mmal/util/mmal_default_components.h> 1871 #include <interface/mmal/vc/mmal_vc_api.h> 1872+#pragma GCC diagnostic pop 1873 #include <stdatomic.h> 1874 1875 #include "avcodec.h" 1876--- a/libavcodec/pthread_frame.c 1877+++ b/libavcodec/pthread_frame.c 1878@@ -217,7 +217,8 @@ FF_ENABLE_DEPRECATION_WARNINGS 1879 1880 /* if the previous thread uses hwaccel then we take the lock to ensure 1881 * the threads don't run concurrently */ 1882- if (avctx->hwaccel) { 1883+ if (avctx->hwaccel && 1884+ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { 1885 pthread_mutex_lock(&p->parent->hwaccel_mutex); 1886 p->hwaccel_serializing = 1; 1887 } 1888@@ -243,7 +244,7 @@ FF_ENABLE_DEPRECATION_WARNINGS 1889 p->hwaccel_serializing = 0; 1890 pthread_mutex_unlock(&p->parent->hwaccel_mutex); 1891 } 1892- av_assert0(!avctx->hwaccel); 1893+ av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); 1894 1895 if (p->async_serializing) { 1896 p->async_serializing = 0; 1897@@ -331,6 +332,12 @@ FF_ENABLE_DEPRECATION_WARNINGS 1898 } 1899 1900 dst->hwaccel_flags = src->hwaccel_flags; 1901+ if (src->hwaccel && 1902+ (src->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { 1903+ dst->hwaccel = src->hwaccel; 1904+ dst->hwaccel_context = src->hwaccel_context; 1905+ dst->internal->hwaccel_priv_data = src->internal->hwaccel_priv_data; 1906+ } 1907 1908 err = av_buffer_replace(&dst->internal->pool, src->internal->pool); 1909 if (err < 0) 1910@@ -461,10 +468,13 @@ static int submit_packet(PerThreadContex 1911 } 1912 1913 /* transfer the stashed hwaccel state, if any */ 1914- av_assert0(!p->avctx->hwaccel); 1915- FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); 1916- FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); 1917- FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); 1918+ av_assert0(!p->avctx->hwaccel || (p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); 1919+ if (p->avctx->hwaccel && 1920+ !(p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { 1921+ FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); 1922+ FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); 1923+ FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); 1924+ } 1925 1926 av_packet_unref(p->avpkt); 1927 ret = av_packet_ref(p->avpkt, avpkt); 1928@@ -656,7 +666,9 @@ void ff_thread_finish_setup(AVCodecConte 1929 1930 if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return; 1931 1932- if (avctx->hwaccel && !p->hwaccel_serializing) { 1933+ if (avctx->hwaccel && 1934+ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) && 1935+ !p->hwaccel_serializing) { 1936 pthread_mutex_lock(&p->parent->hwaccel_mutex); 1937 p->hwaccel_serializing = 1; 1938 } 1939@@ -673,9 +685,12 @@ void ff_thread_finish_setup(AVCodecConte 1940 * this is done here so that this worker thread can wipe its own hwaccel 1941 * state after decoding, without requiring synchronization */ 1942 av_assert0(!p->parent->stash_hwaccel); 1943- p->parent->stash_hwaccel = avctx->hwaccel; 1944- p->parent->stash_hwaccel_context = avctx->hwaccel_context; 1945- p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; 1946+ if (avctx->hwaccel && 1947+ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { 1948+ p->parent->stash_hwaccel = avctx->hwaccel; 1949+ p->parent->stash_hwaccel_context = avctx->hwaccel_context; 1950+ p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; 1951+ } 1952 1953 pthread_mutex_lock(&p->progress_mutex); 1954 if(atomic_load(&p->state) == STATE_SETUP_FINISHED){ 1955@@ -730,6 +745,15 @@ void ff_frame_thread_free(AVCodecContext 1956 1957 park_frame_worker_threads(fctx, thread_count); 1958 1959+ if (fctx->prev_thread && 1960+ avctx->hwaccel && (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) && 1961+ avctx->internal->hwaccel_priv_data != 1962+ fctx->prev_thread->avctx->internal->hwaccel_priv_data) { 1963+ if (update_context_from_thread(avctx, fctx->prev_thread->avctx, 1) < 0) { 1964+ av_log(avctx, AV_LOG_ERROR, "Failed to update user thread.\n"); 1965+ } 1966+ } 1967+ 1968 for (i = 0; i < thread_count; i++) { 1969 PerThreadContext *p = &fctx->threads[i]; 1970 AVCodecContext *ctx = p->avctx; 1971@@ -778,10 +802,13 @@ void ff_frame_thread_free(AVCodecContext 1972 1973 /* if we have stashed hwaccel state, move it to the user-facing context, 1974 * so it will be freed in avcodec_close() */ 1975- av_assert0(!avctx->hwaccel); 1976- FFSWAP(const AVHWAccel*, avctx->hwaccel, fctx->stash_hwaccel); 1977- FFSWAP(void*, avctx->hwaccel_context, fctx->stash_hwaccel_context); 1978- FFSWAP(void*, avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); 1979+ av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); 1980+ if (avctx->hwaccel && 1981+ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { 1982+ FFSWAP(const AVHWAccel*, avctx->hwaccel, fctx->stash_hwaccel); 1983+ FFSWAP(void*, avctx->hwaccel_context, fctx->stash_hwaccel_context); 1984+ FFSWAP(void*, avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); 1985+ } 1986 1987 av_freep(&avctx->internal->thread_ctx); 1988 } 1989--- a/libavcodec/raw.c 1990+++ b/libavcodec/raw.c 1991@@ -294,6 +294,12 @@ static const PixelFormatTag raw_pix_fmt_ 1992 { AV_PIX_FMT_RGB565LE,MKTAG( 3 , 0 , 0 , 0 ) }, /* flipped RGB565LE */ 1993 { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */ 1994 1995+ /* RPI (Might as well define for everything) */ 1996+ { AV_PIX_FMT_SAND128, MKTAG('S', 'A', 'N', 'D') }, 1997+ { AV_PIX_FMT_RPI4_8, MKTAG('S', 'A', 'N', 'D') }, 1998+ { AV_PIX_FMT_SAND64_10, MKTAG('S', 'N', 'D', 'A') }, 1999+ { AV_PIX_FMT_RPI4_10, MKTAG('S', 'N', 'D', 'B') }, 2000+ 2001 { AV_PIX_FMT_NONE, 0 }, 2002 }; 2003 2004--- a/libavcodec/rawenc.c 2005+++ b/libavcodec/rawenc.c 2006@@ -24,6 +24,7 @@ 2007 * Raw Video Encoder 2008 */ 2009 2010+#include "config.h" 2011 #include "avcodec.h" 2012 #include "codec_internal.h" 2013 #include "encode.h" 2014@@ -33,6 +34,10 @@ 2015 #include "libavutil/intreadwrite.h" 2016 #include "libavutil/imgutils.h" 2017 #include "libavutil/internal.h" 2018+#include "libavutil/avassert.h" 2019+#if CONFIG_SAND 2020+#include "libavutil/rpi_sand_fns.h" 2021+#endif 2022 2023 static av_cold int raw_encode_init(AVCodecContext *avctx) 2024 { 2025@@ -46,22 +51,114 @@ static av_cold int raw_encode_init(AVCod 2026 return 0; 2027 } 2028 2029+#if CONFIG_SAND 2030+static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, 2031+ const AVFrame *frame) 2032+{ 2033+ const int width = av_frame_cropped_width(frame); 2034+ const int height = av_frame_cropped_height(frame); 2035+ const int x0 = frame->crop_left; 2036+ const int y0 = frame->crop_top; 2037+ const int size = width * height * 3 / 2; 2038+ uint8_t * dst; 2039+ int ret; 2040+ 2041+ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) 2042+ return ret; 2043+ 2044+ dst = pkt->data; 2045+ 2046+ av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height); 2047+ dst += width * height; 2048+ av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2, 2049+ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0 / 2, y0 / 2, width / 2, height / 2); 2050+ return 0; 2051+} 2052+ 2053+static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, 2054+ const AVFrame *frame) 2055+{ 2056+ const int width = av_frame_cropped_width(frame); 2057+ const int height = av_frame_cropped_height(frame); 2058+ const int x0 = frame->crop_left; 2059+ const int y0 = frame->crop_top; 2060+ const int size = width * height * 3; 2061+ uint8_t * dst; 2062+ int ret; 2063+ 2064+ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) 2065+ return ret; 2066+ 2067+ dst = pkt->data; 2068+ 2069+ av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0 * 2, y0, width * 2, height); 2070+ dst += width * height * 2; 2071+ av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width, 2072+ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0, y0 / 2, width, height / 2); 2073+ return 0; 2074+} 2075+ 2076+static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, 2077+ const AVFrame *frame) 2078+{ 2079+ const int width = av_frame_cropped_width(frame); 2080+ const int height = av_frame_cropped_height(frame); 2081+ const int x0 = frame->crop_left; 2082+ const int y0 = frame->crop_top; 2083+ const int size = width * height * 3; 2084+ uint8_t * dst; 2085+ int ret; 2086+ 2087+ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) 2088+ return ret; 2089+ 2090+ dst = pkt->data; 2091+ 2092+ av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height); 2093+ dst += width * height * 2; 2094+ av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width, 2095+ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0/2, y0 / 2, width/2, height / 2); 2096+ return 0; 2097+} 2098+#endif 2099+ 2100+ 2101 static int raw_encode(AVCodecContext *avctx, AVPacket *pkt, 2102- const AVFrame *frame, int *got_packet) 2103+ const AVFrame *src_frame, int *got_packet) 2104 { 2105- int ret = av_image_get_buffer_size(frame->format, 2106- frame->width, frame->height, 1); 2107+ int ret; 2108+ AVFrame * frame = NULL; 2109 2110- if (ret < 0) 2111+#if CONFIG_SAND 2112+ if (av_rpi_is_sand_frame(src_frame)) { 2113+ ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) : 2114+ av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) : 2115+ av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1; 2116+ *got_packet = (ret == 0); 2117 return ret; 2118+ } 2119+#endif 2120+ 2121+ if ((frame = av_frame_clone(src_frame)) == NULL) { 2122+ ret = AVERROR(ENOMEM); 2123+ goto fail; 2124+ } 2125+ 2126+ if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0) 2127+ goto fail; 2128+ 2129+ ret = av_image_get_buffer_size(frame->format, 2130+ frame->width, frame->height, 1); 2131+ if (ret < 0) 2132+ goto fail; 2133 2134 if ((ret = ff_get_encode_buffer(avctx, pkt, ret, 0)) < 0) 2135- return ret; 2136+ goto fail; 2137 if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size, 2138 (const uint8_t **)frame->data, frame->linesize, 2139 frame->format, 2140 frame->width, frame->height, 1)) < 0) 2141- return ret; 2142+ goto fail; 2143 2144 if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 && 2145 frame->format == AV_PIX_FMT_YUYV422) { 2146@@ -77,8 +174,15 @@ static int raw_encode(AVCodecContext *av 2147 AV_WB64(&pkt->data[8 * x], v << 48 | v >> 16); 2148 } 2149 } 2150+ pkt->flags |= AV_PKT_FLAG_KEY; 2151+ av_frame_free(&frame); 2152 *got_packet = 1; 2153 return 0; 2154+ 2155+fail: 2156+ av_frame_free(&frame); 2157+ *got_packet = 0; 2158+ return ret; 2159 } 2160 2161 const FFCodec ff_rawvideo_encoder = { 2162--- a/libavcodec/v4l2_buffers.c 2163+++ b/libavcodec/v4l2_buffers.c 2164@@ -21,6 +21,7 @@ 2165 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 2166 */ 2167 2168+#include <drm_fourcc.h> 2169 #include <linux/videodev2.h> 2170 #include <sys/ioctl.h> 2171 #include <sys/mman.h> 2172@@ -28,57 +29,89 @@ 2173 #include <fcntl.h> 2174 #include <poll.h> 2175 #include "libavcodec/avcodec.h" 2176+#include "libavcodec/internal.h" 2177+#include "libavutil/avassert.h" 2178 #include "libavutil/pixdesc.h" 2179+#include "libavutil/hwcontext.h" 2180 #include "v4l2_context.h" 2181 #include "v4l2_buffers.h" 2182 #include "v4l2_m2m.h" 2183+#include "v4l2_req_dmabufs.h" 2184+#include "weak_link.h" 2185 2186 #define USEC_PER_SEC 1000000 2187-static AVRational v4l2_timebase = { 1, USEC_PER_SEC }; 2188+static const AVRational v4l2_timebase = { 1, USEC_PER_SEC }; 2189 2190-static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf) 2191+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) 2192 { 2193- return V4L2_TYPE_IS_OUTPUT(buf->context->type) ? 2194- container_of(buf->context, V4L2m2mContext, output) : 2195- container_of(buf->context, V4L2m2mContext, capture); 2196+ return V4L2_TYPE_IS_OUTPUT(ctx->type) ? 2197+ container_of(ctx, V4L2m2mContext, output) : 2198+ container_of(ctx, V4L2m2mContext, capture); 2199 } 2200 2201-static inline AVCodecContext *logger(V4L2Buffer *buf) 2202+static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf) 2203 { 2204- return buf_to_m2mctx(buf)->avctx; 2205+ return ctx_to_m2mctx(buf->context); 2206 } 2207 2208-static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf) 2209+static inline AVCodecContext *logger(const V4L2Buffer * const buf) 2210 { 2211- V4L2m2mContext *s = buf_to_m2mctx(avbuf); 2212+ return buf_to_m2mctx(buf)->avctx; 2213+} 2214 2215- if (s->avctx->pkt_timebase.num) 2216- return s->avctx->pkt_timebase; 2217- return s->avctx->time_base; 2218+static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf) 2219+{ 2220+ const V4L2m2mContext *s = buf_to_m2mctx(avbuf); 2221+ const AVRational tb = s->avctx->pkt_timebase.num ? 2222+ s->avctx->pkt_timebase : 2223+ s->avctx->time_base; 2224+ return tb.num && tb.den ? tb : v4l2_timebase; 2225 } 2226 2227-static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts) 2228+static inline struct timeval tv_from_int(const int64_t t) 2229 { 2230- int64_t v4l2_pts; 2231+ return (struct timeval){ 2232+ .tv_usec = t % USEC_PER_SEC, 2233+ .tv_sec = t / USEC_PER_SEC 2234+ }; 2235+} 2236 2237- if (pts == AV_NOPTS_VALUE) 2238- pts = 0; 2239+static inline int64_t int_from_tv(const struct timeval t) 2240+{ 2241+ return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec; 2242+} 2243 2244+static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts) 2245+{ 2246 /* convert pts to v4l2 timebase */ 2247- v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); 2248- out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; 2249- out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; 2250+ const int64_t v4l2_pts = 2251+ pts == AV_NOPTS_VALUE ? 0 : 2252+ av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); 2253+ out->buf.timestamp = tv_from_int(v4l2_pts); 2254 } 2255 2256-static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf) 2257+static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf) 2258 { 2259- int64_t v4l2_pts; 2260- 2261+ const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp); 2262+ return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE; 2263+#if 0 2264 /* convert pts back to encoder timebase */ 2265- v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + 2266- avbuf->buf.timestamp.tv_usec; 2267+ return 2268+ avbuf->context->no_pts_rescale ? v4l2_pts : 2269+ v4l2_pts == 0 ? AV_NOPTS_VALUE : 2270+ av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); 2271+#endif 2272+} 2273 2274- return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); 2275+static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length) 2276+{ 2277+ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { 2278+ out->planes[plane].bytesused = bytesused; 2279+ out->planes[plane].length = length; 2280+ } else { 2281+ out->buf.bytesused = bytesused; 2282+ out->buf.length = length; 2283+ } 2284 } 2285 2286 static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) 2287@@ -115,6 +148,105 @@ static enum AVColorPrimaries v4l2_get_co 2288 return AVCOL_PRI_UNSPECIFIED; 2289 } 2290 2291+static void v4l2_set_color(V4L2Buffer *buf, 2292+ const enum AVColorPrimaries avcp, 2293+ const enum AVColorSpace avcs, 2294+ const enum AVColorTransferCharacteristic avxc) 2295+{ 2296+ enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT; 2297+ enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT; 2298+ enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT; 2299+ 2300+ switch (avcp) { 2301+ case AVCOL_PRI_BT709: 2302+ cs = V4L2_COLORSPACE_REC709; 2303+ ycbcr = V4L2_YCBCR_ENC_709; 2304+ break; 2305+ case AVCOL_PRI_BT470M: 2306+ cs = V4L2_COLORSPACE_470_SYSTEM_M; 2307+ ycbcr = V4L2_YCBCR_ENC_601; 2308+ break; 2309+ case AVCOL_PRI_BT470BG: 2310+ cs = V4L2_COLORSPACE_470_SYSTEM_BG; 2311+ break; 2312+ case AVCOL_PRI_SMPTE170M: 2313+ cs = V4L2_COLORSPACE_SMPTE170M; 2314+ break; 2315+ case AVCOL_PRI_SMPTE240M: 2316+ cs = V4L2_COLORSPACE_SMPTE240M; 2317+ break; 2318+ case AVCOL_PRI_BT2020: 2319+ cs = V4L2_COLORSPACE_BT2020; 2320+ break; 2321+ case AVCOL_PRI_SMPTE428: 2322+ case AVCOL_PRI_SMPTE431: 2323+ case AVCOL_PRI_SMPTE432: 2324+ case AVCOL_PRI_EBU3213: 2325+ case AVCOL_PRI_RESERVED: 2326+ case AVCOL_PRI_FILM: 2327+ case AVCOL_PRI_UNSPECIFIED: 2328+ default: 2329+ break; 2330+ } 2331+ 2332+ switch (avcs) { 2333+ case AVCOL_SPC_RGB: 2334+ cs = V4L2_COLORSPACE_SRGB; 2335+ break; 2336+ case AVCOL_SPC_BT709: 2337+ cs = V4L2_COLORSPACE_REC709; 2338+ break; 2339+ case AVCOL_SPC_FCC: 2340+ cs = V4L2_COLORSPACE_470_SYSTEM_M; 2341+ break; 2342+ case AVCOL_SPC_BT470BG: 2343+ cs = V4L2_COLORSPACE_470_SYSTEM_BG; 2344+ break; 2345+ case AVCOL_SPC_SMPTE170M: 2346+ cs = V4L2_COLORSPACE_SMPTE170M; 2347+ break; 2348+ case AVCOL_SPC_SMPTE240M: 2349+ cs = V4L2_COLORSPACE_SMPTE240M; 2350+ break; 2351+ case AVCOL_SPC_BT2020_CL: 2352+ cs = V4L2_COLORSPACE_BT2020; 2353+ ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM; 2354+ break; 2355+ case AVCOL_SPC_BT2020_NCL: 2356+ cs = V4L2_COLORSPACE_BT2020; 2357+ break; 2358+ default: 2359+ break; 2360+ } 2361+ 2362+ switch (xfer) { 2363+ case AVCOL_TRC_BT709: 2364+ xfer = V4L2_XFER_FUNC_709; 2365+ break; 2366+ case AVCOL_TRC_IEC61966_2_1: 2367+ xfer = V4L2_XFER_FUNC_SRGB; 2368+ break; 2369+ case AVCOL_TRC_SMPTE240M: 2370+ xfer = V4L2_XFER_FUNC_SMPTE240M; 2371+ break; 2372+ case AVCOL_TRC_SMPTE2084: 2373+ xfer = V4L2_XFER_FUNC_SMPTE2084; 2374+ break; 2375+ default: 2376+ break; 2377+ } 2378+ 2379+ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) { 2380+ buf->context->format.fmt.pix_mp.colorspace = cs; 2381+ buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr; 2382+ buf->context->format.fmt.pix_mp.xfer_func = xfer; 2383+ } else { 2384+ buf->context->format.fmt.pix.colorspace = cs; 2385+ buf->context->format.fmt.pix.ycbcr_enc = ycbcr; 2386+ buf->context->format.fmt.pix.xfer_func = xfer; 2387+ } 2388+} 2389+ 2390 static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) 2391 { 2392 enum v4l2_quantization qt; 2393@@ -133,6 +265,20 @@ static enum AVColorRange v4l2_get_color_ 2394 return AVCOL_RANGE_UNSPECIFIED; 2395 } 2396 2397+static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr) 2398+{ 2399+ const enum v4l2_quantization q = 2400+ avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE : 2401+ avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE : 2402+ V4L2_QUANTIZATION_DEFAULT; 2403+ 2404+ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) { 2405+ buf->context->format.fmt.pix_mp.quantization = q; 2406+ } else { 2407+ buf->context->format.fmt.pix.quantization = q; 2408+ } 2409+} 2410+ 2411 static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf) 2412 { 2413 enum v4l2_ycbcr_encoding ycbcr; 2414@@ -209,73 +355,218 @@ static enum AVColorTransferCharacteristi 2415 return AVCOL_TRC_UNSPECIFIED; 2416 } 2417 2418-static void v4l2_free_buffer(void *opaque, uint8_t *unused) 2419+static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf) 2420 { 2421- V4L2Buffer* avbuf = opaque; 2422- V4L2m2mContext *s = buf_to_m2mctx(avbuf); 2423+ return V4L2_FIELD_IS_INTERLACED(buf->buf.field); 2424+} 2425 2426- if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) { 2427- atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel); 2428+static int v4l2_buf_is_top_first(const V4L2Buffer * const buf) 2429+{ 2430+ return buf->buf.field == V4L2_FIELD_INTERLACED_TB; 2431+} 2432 2433- if (s->reinit) { 2434- if (!atomic_load(&s->refcount)) 2435- sem_post(&s->refsync); 2436- } else { 2437- if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) { 2438- /* no need to queue more buffers to the driver */ 2439- avbuf->status = V4L2BUF_AVAILABLE; 2440- } 2441- else if (avbuf->context->streamon) 2442- ff_v4l2_buffer_enqueue(avbuf); 2443- } 2444+static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff) 2445+{ 2446+ buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE : 2447+ is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT; 2448+} 2449 2450- av_buffer_unref(&avbuf->context_ref); 2451+static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf) 2452+{ 2453+ AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; 2454+ AVDRMLayerDescriptor *layer; 2455+ 2456+ /* fill the DRM frame descriptor */ 2457+ drm_desc->nb_objects = avbuf->num_planes; 2458+ drm_desc->nb_layers = 1; 2459+ 2460+ layer = &drm_desc->layers[0]; 2461+ layer->nb_planes = avbuf->num_planes; 2462+ 2463+ for (int i = 0; i < avbuf->num_planes; i++) { 2464+ layer->planes[i].object_index = i; 2465+ layer->planes[i].offset = avbuf->plane_info[i].offset; 2466+ layer->planes[i].pitch = avbuf->plane_info[i].bytesperline; 2467+ } 2468+ 2469+ switch (avbuf->context->av_pix_fmt) { 2470+ case AV_PIX_FMT_0BGR: 2471+ layer->format = DRM_FORMAT_RGBX8888; 2472+ break; 2473+ case AV_PIX_FMT_RGB0: 2474+ layer->format = DRM_FORMAT_XBGR8888; 2475+ break; 2476+ case AV_PIX_FMT_0RGB: 2477+ layer->format = DRM_FORMAT_BGRX8888; 2478+ break; 2479+ case AV_PIX_FMT_BGR0: 2480+ layer->format = DRM_FORMAT_XRGB8888; 2481+ break; 2482+ 2483+ case AV_PIX_FMT_ABGR: 2484+ layer->format = DRM_FORMAT_RGBA8888; 2485+ break; 2486+ case AV_PIX_FMT_RGBA: 2487+ layer->format = DRM_FORMAT_ABGR8888; 2488+ break; 2489+ case AV_PIX_FMT_ARGB: 2490+ layer->format = DRM_FORMAT_BGRA8888; 2491+ break; 2492+ case AV_PIX_FMT_BGRA: 2493+ layer->format = DRM_FORMAT_ARGB8888; 2494+ break; 2495+ 2496+ case AV_PIX_FMT_BGR24: 2497+ layer->format = DRM_FORMAT_BGR888; 2498+ break; 2499+ case AV_PIX_FMT_RGB24: 2500+ layer->format = DRM_FORMAT_RGB888; 2501+ break; 2502+ 2503+ case AV_PIX_FMT_YUYV422: 2504+ 2505+ layer->format = DRM_FORMAT_YUYV; 2506+ layer->nb_planes = 1; 2507+ 2508+ break; 2509+ 2510+ case AV_PIX_FMT_NV12: 2511+ case AV_PIX_FMT_NV21: 2512+ 2513+ layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ? 2514+ DRM_FORMAT_NV12 : DRM_FORMAT_NV21; 2515+ 2516+ if (avbuf->num_planes > 1) 2517+ break; 2518+ 2519+ layer->nb_planes = 2; 2520+ 2521+ layer->planes[1].object_index = 0; 2522+ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * 2523+ avbuf->context->format.fmt.pix.height; 2524+ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline; 2525+ break; 2526+ 2527+ case AV_PIX_FMT_YUV420P: 2528+ 2529+ layer->format = DRM_FORMAT_YUV420; 2530+ 2531+ if (avbuf->num_planes > 1) 2532+ break; 2533+ 2534+ layer->nb_planes = 3; 2535+ 2536+ layer->planes[1].object_index = 0; 2537+ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * 2538+ avbuf->context->format.fmt.pix.height; 2539+ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1; 2540+ 2541+ layer->planes[2].object_index = 0; 2542+ layer->planes[2].offset = layer->planes[1].offset + 2543+ ((avbuf->plane_info[0].bytesperline * 2544+ avbuf->context->format.fmt.pix.height) >> 2); 2545+ layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1; 2546+ break; 2547+ 2548+ default: 2549+ drm_desc->nb_layers = 0; 2550+ break; 2551 } 2552+ 2553+ return (uint8_t *) drm_desc; 2554 } 2555 2556-static int v4l2_buf_increase_ref(V4L2Buffer *in) 2557+static void v4l2_free_bufref(void *opaque, uint8_t *data) 2558 { 2559- V4L2m2mContext *s = buf_to_m2mctx(in); 2560+ AVBufferRef * bufref = (AVBufferRef *)data; 2561+ V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data; 2562+ struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl); 2563 2564- if (in->context_ref) 2565- atomic_fetch_add(&in->context_refcount, 1); 2566- else { 2567- in->context_ref = av_buffer_ref(s->self_ref); 2568- if (!in->context_ref) 2569- return AVERROR(ENOMEM); 2570+ if (ctx != NULL) { 2571+ // Buffer still attached to context 2572+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); 2573+ 2574+ if (!s->output_drm && avbuf->dmabuf[0] != NULL) { 2575+ for (unsigned int i = 0; i != avbuf->num_planes; ++i) 2576+ dmabuf_read_end(avbuf->dmabuf[i]); 2577+ } 2578+ 2579+ ff_mutex_lock(&ctx->lock); 2580+ 2581+ ff_v4l2_buffer_set_avail(avbuf); 2582+ avbuf->buf.timestamp.tv_sec = 0; 2583+ avbuf->buf.timestamp.tv_usec = 0; 2584+ 2585+ if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { 2586+ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name); 2587+ } 2588+ else if (ctx->streamon) { 2589+ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name); 2590+ ff_v4l2_buffer_enqueue(avbuf); // will set to IN_DRIVER 2591+ } 2592+ else { 2593+ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name); 2594+ } 2595 2596- in->context_refcount = 1; 2597+ ff_mutex_unlock(&ctx->lock); 2598 } 2599 2600- in->status = V4L2BUF_RET_USER; 2601- atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed); 2602+ ff_weak_link_unlock(avbuf->context_wl); 2603+ av_buffer_unref(&bufref); 2604+} 2605 2606- return 0; 2607+static inline uint32_t ff_v4l2_buf_len(const struct v4l2_buffer * b, unsigned int i) 2608+{ 2609+ return V4L2_TYPE_IS_MULTIPLANAR(b->type) ? b->m.planes[i].length : b->length; 2610 } 2611 2612-static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf) 2613+static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) 2614 { 2615- int ret; 2616+ int i, ret; 2617+ const V4L2m2mContext * const s = buf_to_m2mctx(avbuf); 2618 2619- if (plane >= in->num_planes) 2620- return AVERROR(EINVAL); 2621+ for (i = 0; i < avbuf->num_planes; i++) { 2622+ int dma_fd = -1; 2623+ const uint32_t blen = ff_v4l2_buf_len(&avbuf->buf, i); 2624 2625- /* even though most encoders return 0 in data_offset encoding vp8 does require this value */ 2626- *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset, 2627- in->plane_info[plane].length, v4l2_free_buffer, in, 0); 2628- if (!*buf) 2629- return AVERROR(ENOMEM); 2630+ if (s->db_ctl != NULL) { 2631+ if ((avbuf->dmabuf[i] = dmabuf_alloc(s->db_ctl, blen)) == NULL) 2632+ return AVERROR(ENOMEM); 2633+ dma_fd = dmabuf_fd(avbuf->dmabuf[i]); 2634+ if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type)) 2635+ avbuf->buf.m.planes[i].m.fd = dma_fd; 2636+ else 2637+ avbuf->buf.m.fd = dma_fd; 2638 2639- ret = v4l2_buf_increase_ref(in); 2640- if (ret) 2641- av_buffer_unref(buf); 2642+ if (!s->output_drm) 2643+ avbuf->plane_info[i].mm_addr = dmabuf_map(avbuf->dmabuf[i]); 2644+ } 2645+ else { 2646+ struct v4l2_exportbuffer expbuf; 2647+ memset(&expbuf, 0, sizeof(expbuf)); 2648+ 2649+ expbuf.index = avbuf->buf.index; 2650+ expbuf.type = avbuf->buf.type; 2651+ expbuf.plane = i; 2652+ 2653+ ret = ioctl(s->fd, VIDIOC_EXPBUF, &expbuf); 2654+ if (ret < 0) 2655+ return AVERROR(errno); 2656+ dma_fd = expbuf.fd; 2657+ } 2658 2659- return ret; 2660+ avbuf->drm_frame.objects[i].size = blen; 2661+ avbuf->drm_frame.objects[i].fd = dma_fd; 2662+ avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR; 2663+ } 2664+ 2665+ return 0; 2666 } 2667 2668 static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset) 2669 { 2670 unsigned int bytesused, length; 2671+ int rv = 0; 2672 2673 if (plane >= out->num_planes) 2674 return AVERROR(EINVAL); 2675@@ -283,32 +574,61 @@ static int v4l2_bufref_to_buf(V4L2Buffer 2676 length = out->plane_info[plane].length; 2677 bytesused = FFMIN(size+offset, length); 2678 2679- memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset)); 2680- 2681- if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { 2682- out->planes[plane].bytesused = bytesused; 2683- out->planes[plane].length = length; 2684- } else { 2685- out->buf.bytesused = bytesused; 2686- out->buf.length = length; 2687+ if (size > length - offset) { 2688+ size = length - offset; 2689+ rv = AVERROR(ENOMEM); 2690 } 2691 2692- return 0; 2693+ memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size); 2694+ 2695+ set_buf_length(out, plane, bytesused, length); 2696+ 2697+ return rv; 2698+} 2699+ 2700+static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf) 2701+{ 2702+ AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]); 2703+ AVBufferRef * newbuf; 2704+ 2705+ if (!bufref) 2706+ return NULL; 2707+ 2708+ newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0); 2709+ if (newbuf == NULL) 2710+ av_buffer_unref(&bufref); 2711+ 2712+ avbuf->status = V4L2BUF_RET_USER; 2713+ return newbuf; 2714 } 2715 2716 static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) 2717 { 2718- int i, ret; 2719+ int i; 2720 2721 frame->format = avbuf->context->av_pix_fmt; 2722 2723- for (i = 0; i < avbuf->num_planes; i++) { 2724- ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]); 2725- if (ret) 2726- return ret; 2727+ frame->buf[0] = wrap_avbuf(avbuf); 2728+ if (frame->buf[0] == NULL) 2729+ return AVERROR(ENOMEM); 2730+ 2731+ if (buf_to_m2mctx(avbuf)->output_drm) { 2732+ /* 1. get references to the actual data */ 2733+ const int rv = ff_v4l2_context_frames_set(avbuf->context); 2734+ if (rv != 0) 2735+ return rv; 2736+ 2737+ frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf); 2738+ frame->format = AV_PIX_FMT_DRM_PRIME; 2739+ frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref); 2740+ return 0; 2741+ } 2742+ 2743 2744+ /* 1. get references to the actual data */ 2745+ for (i = 0; i < avbuf->num_planes; i++) { 2746+ frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset; 2747 frame->linesize[i] = avbuf->plane_info[i].bytesperline; 2748- frame->data[i] = frame->buf[i]->data; 2749 } 2750 2751 /* fixup special cases */ 2752@@ -317,88 +637,152 @@ static int v4l2_buffer_buf_to_swframe(AV 2753 case AV_PIX_FMT_NV21: 2754 if (avbuf->num_planes > 1) 2755 break; 2756- frame->linesize[1] = avbuf->plane_info[0].bytesperline; 2757- frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height; 2758+ frame->linesize[1] = frame->linesize[0]; 2759+ frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format); 2760 break; 2761 2762 case AV_PIX_FMT_YUV420P: 2763 if (avbuf->num_planes > 1) 2764 break; 2765- frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1; 2766- frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1; 2767- frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height; 2768- frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2); 2769+ frame->linesize[1] = frame->linesize[0] / 2; 2770+ frame->linesize[2] = frame->linesize[1]; 2771+ frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format); 2772+ frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2; 2773 break; 2774 2775 default: 2776 break; 2777 } 2778 2779+ if (avbuf->dmabuf[0] != NULL) { 2780+ for (unsigned int i = 0; i != avbuf->num_planes; ++i) 2781+ dmabuf_read_start(avbuf->dmabuf[i]); 2782+ } 2783+ 2784+ return 0; 2785+} 2786+ 2787+static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h) 2788+{ 2789+ if (dst_stride == src_stride && w + 32 >= dst_stride) { 2790+ memcpy(dst, src, dst_stride * h); 2791+ } 2792+ else { 2793+ while (--h >= 0) { 2794+ memcpy(dst, src, w); 2795+ dst += dst_stride; 2796+ src += src_stride; 2797+ } 2798+ } 2799+} 2800+ 2801+static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes) 2802+{ 2803+ return i != 0 && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA)); 2804+} 2805+ 2806+static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out) 2807+{ 2808+ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; 2809+ 2810+ if (frame->format != AV_PIX_FMT_DRM_PRIME || !src) 2811+ return AVERROR(EINVAL); 2812+ 2813+ av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF); 2814+ 2815+ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { 2816+ // Only currently cope with single buffer types 2817+ if (out->buf.length != 1) 2818+ return AVERROR_PATCHWELCOME; 2819+ if (src->nb_objects != 1) 2820+ return AVERROR(EINVAL); 2821+ 2822+ out->planes[0].m.fd = src->objects[0].fd; 2823+ } 2824+ else { 2825+ if (src->nb_objects != 1) 2826+ return AVERROR(EINVAL); 2827+ 2828+ out->buf.m.fd = src->objects[0].fd; 2829+ } 2830+ 2831+ // No need to copy src AVDescriptor and if we did then we may confuse 2832+ // fd close on free 2833+ out->ref_buf = av_buffer_ref(frame->buf[0]); 2834+ 2835 return 0; 2836 } 2837 2838 static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) 2839 { 2840- int i, ret; 2841- struct v4l2_format fmt = out->context->format; 2842- int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ? 2843- fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat; 2844- int height = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ? 2845- fmt.fmt.pix_mp.height : fmt.fmt.pix.height; 2846- int is_planar_format = 0; 2847- 2848- switch (pixel_format) { 2849- case V4L2_PIX_FMT_YUV420M: 2850- case V4L2_PIX_FMT_YVU420M: 2851-#ifdef V4L2_PIX_FMT_YUV422M 2852- case V4L2_PIX_FMT_YUV422M: 2853-#endif 2854-#ifdef V4L2_PIX_FMT_YVU422M 2855- case V4L2_PIX_FMT_YVU422M: 2856-#endif 2857-#ifdef V4L2_PIX_FMT_YUV444M 2858- case V4L2_PIX_FMT_YUV444M: 2859-#endif 2860-#ifdef V4L2_PIX_FMT_YVU444M 2861- case V4L2_PIX_FMT_YVU444M: 2862-#endif 2863- case V4L2_PIX_FMT_NV12M: 2864- case V4L2_PIX_FMT_NV21M: 2865- case V4L2_PIX_FMT_NV12MT_16X16: 2866- case V4L2_PIX_FMT_NV12MT: 2867- case V4L2_PIX_FMT_NV16M: 2868- case V4L2_PIX_FMT_NV61M: 2869- is_planar_format = 1; 2870- } 2871- 2872- if (!is_planar_format) { 2873- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); 2874- int planes_nb = 0; 2875- int offset = 0; 2876- 2877- for (i = 0; i < desc->nb_components; i++) 2878- planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); 2879- 2880- for (i = 0; i < planes_nb; i++) { 2881- int size, h = height; 2882- if (i == 1 || i == 2) { 2883+ int i; 2884+ int num_planes = 0; 2885+ int pel_strides[4] = {0}; 2886+ 2887+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); 2888+ 2889+ if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) { 2890+ av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__); 2891+ return -1; 2892+ } 2893+ 2894+ for (i = 0; i != desc->nb_components; ++i) { 2895+ if (desc->comp[i].plane >= num_planes) 2896+ num_planes = desc->comp[i].plane + 1; 2897+ pel_strides[desc->comp[i].plane] = desc->comp[i].step; 2898+ } 2899+ 2900+ if (out->num_planes > 1) { 2901+ if (num_planes != out->num_planes) { 2902+ av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes); 2903+ return -1; 2904+ } 2905+ for (i = 0; i != num_planes; ++i) { 2906+ int w = frame->width; 2907+ int h = frame->height; 2908+ if (is_chroma(desc, i, num_planes)) { 2909+ w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); 2910 h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); 2911 } 2912- size = frame->linesize[i] * h; 2913- ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset); 2914- if (ret) 2915- return ret; 2916- offset += size; 2917+ 2918+ cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline, 2919+ frame->data[i], frame->linesize[i], 2920+ w * pel_strides[i], h); 2921+ set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length); 2922 } 2923- return 0; 2924 } 2925+ else 2926+ { 2927+ unsigned int offset = 0; 2928+ 2929+ for (i = 0; i != num_planes; ++i) { 2930+ int w = frame->width; 2931+ int h = frame->height; 2932+ int dst_stride = out->plane_info[0].bytesperline; 2933+ uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset; 2934+ 2935+ if (is_chroma(desc, i, num_planes)) { 2936+ // Is chroma 2937+ dst_stride >>= desc->log2_chroma_w; 2938+ offset += dst_stride * (out->context->height >> desc->log2_chroma_h); 2939+ w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); 2940+ h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); 2941+ } 2942+ else { 2943+ // Is luma or alpha 2944+ offset += dst_stride * out->context->height; 2945+ } 2946+ if (offset > out->plane_info[0].length) { 2947+ av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length); 2948+ return -1; 2949+ } 2950 2951- for (i = 0; i < out->num_planes; i++) { 2952- ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0); 2953- if (ret) 2954- return ret; 2955+ cpy_2d(dst, dst_stride, 2956+ frame->data[i], frame->linesize[i], 2957+ w * pel_strides[i], h); 2958+ } 2959+ set_buf_length(out, 0, offset, out->plane_info[0].length); 2960 } 2961- 2962 return 0; 2963 } 2964 2965@@ -408,16 +792,31 @@ static int v4l2_buffer_swframe_to_buf(co 2966 * 2967 ******************************************************************************/ 2968 2969-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) 2970+int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts) 2971 { 2972- v4l2_set_pts(out, frame->pts); 2973- 2974- return v4l2_buffer_swframe_to_buf(frame, out); 2975+ out->buf.flags = frame->key_frame ? 2976+ (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : 2977+ (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); 2978+ // Beware that colour info is held in format rather than the actual 2979+ // v4l2 buffer struct so this may not be as useful as you might hope 2980+ v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc); 2981+ v4l2_set_color_range(out, frame->color_range); 2982+ // PTS & interlace are buffer vars 2983+ if (track_ts) 2984+ out->buf.timestamp = tv_from_int(track_ts); 2985+ else 2986+ v4l2_set_pts(out, frame->pts); 2987+ v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first); 2988+ 2989+ return frame->format == AV_PIX_FMT_DRM_PRIME ? 2990+ v4l2_buffer_primeframe_to_buf(frame, out) : 2991+ v4l2_buffer_swframe_to_buf(frame, out); 2992 } 2993 2994 int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) 2995 { 2996 int ret; 2997+ V4L2Context * const ctx = avbuf->context; 2998 2999 av_frame_unref(frame); 3000 3001@@ -428,17 +827,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFram 3002 3003 /* 2. get frame information */ 3004 frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME); 3005+ frame->pict_type = frame->key_frame ? AV_PICTURE_TYPE_I : 3006+ (avbuf->buf.flags & V4L2_BUF_FLAG_PFRAME) != 0 ? AV_PICTURE_TYPE_P : 3007+ (avbuf->buf.flags & V4L2_BUF_FLAG_BFRAME) != 0 ? AV_PICTURE_TYPE_B : 3008+ AV_PICTURE_TYPE_NONE; 3009 frame->color_primaries = v4l2_get_color_primaries(avbuf); 3010 frame->colorspace = v4l2_get_color_space(avbuf); 3011 frame->color_range = v4l2_get_color_range(avbuf); 3012 frame->color_trc = v4l2_get_color_trc(avbuf); 3013 frame->pts = v4l2_get_pts(avbuf); 3014 frame->pkt_dts = AV_NOPTS_VALUE; 3015+ frame->interlaced_frame = v4l2_buf_is_interlaced(avbuf); 3016+ frame->top_field_first = v4l2_buf_is_top_first(avbuf); 3017 3018 /* these values are updated also during re-init in v4l2_process_driver_event */ 3019- frame->height = avbuf->context->height; 3020- frame->width = avbuf->context->width; 3021- frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio; 3022+ frame->height = ctx->height; 3023+ frame->width = ctx->width; 3024+ frame->sample_aspect_ratio = ctx->sample_aspect_ratio; 3025+ 3026+ if (ctx->selection.height && ctx->selection.width) { 3027+ frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0; 3028+ frame->crop_top = ctx->selection.top < frame->height ? ctx->selection.top : 0; 3029+ frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ? 3030+ frame->width - (ctx->selection.left + ctx->selection.width) : 0; 3031+ frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ? 3032+ frame->height - (ctx->selection.top + ctx->selection.height) : 0; 3033+ } 3034 3035 /* 3. report errors upstream */ 3036 if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) { 3037@@ -451,15 +865,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFram 3038 3039 int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) 3040 { 3041- int ret; 3042- 3043 av_packet_unref(pkt); 3044- ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf); 3045- if (ret) 3046- return ret; 3047+ 3048+ pkt->buf = wrap_avbuf(avbuf); 3049+ if (pkt->buf == NULL) 3050+ return AVERROR(ENOMEM); 3051 3052 pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused; 3053- pkt->data = pkt->buf->data; 3054+ pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset; 3055+ pkt->flags = 0; 3056 3057 if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) 3058 pkt->flags |= AV_PKT_FLAG_KEY; 3059@@ -474,39 +888,108 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket 3060 return 0; 3061 } 3062 3063-int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) 3064+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, 3065+ const void *extdata, size_t extlen, 3066+ const int64_t timestamp) 3067 { 3068 int ret; 3069 3070- ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0); 3071- if (ret) 3072+ if (extlen) { 3073+ ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0); 3074+ if (ret) 3075+ return ret; 3076+ } 3077+ 3078+ ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen); 3079+ if (ret && ret != AVERROR(ENOMEM)) 3080 return ret; 3081 3082- v4l2_set_pts(out, pkt->pts); 3083+ if (timestamp) 3084+ out->buf.timestamp = tv_from_int(timestamp); 3085+ else 3086+ v4l2_set_pts(out, pkt->pts); 3087+ 3088+ out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ? 3089+ (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : 3090+ (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); 3091 3092- if (pkt->flags & AV_PKT_FLAG_KEY) 3093- out->flags = V4L2_BUF_FLAG_KEYFRAME; 3094+ return ret; 3095+} 3096 3097- return 0; 3098+int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) 3099+{ 3100+ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0); 3101+} 3102+ 3103+ 3104+static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data) 3105+{ 3106+ V4L2Buffer * const avbuf = (V4L2Buffer *)data; 3107+ int i; 3108+ 3109+ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) { 3110+ struct V4L2Plane_info *p = avbuf->plane_info + i; 3111+ if (p->mm_addr != NULL) 3112+ munmap(p->mm_addr, p->length); 3113+ } 3114+ 3115+ if (avbuf->dmabuf[0] == NULL) { 3116+ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { 3117+ if (avbuf->drm_frame.objects[i].fd != -1) 3118+ close(avbuf->drm_frame.objects[i].fd); 3119+ } 3120+ } 3121+ else { 3122+ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->dmabuf); ++i) { 3123+ dmabuf_free(avbuf->dmabuf[i]); 3124+ } 3125+ } 3126+ 3127+ av_buffer_unref(&avbuf->ref_buf); 3128+ 3129+ ff_weak_link_unref(&avbuf->context_wl); 3130+ 3131+ av_free(avbuf); 3132 } 3133 3134-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) 3135+ 3136+int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem) 3137 { 3138- V4L2Context *ctx = avbuf->context; 3139 int ret, i; 3140+ V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf)); 3141+ AVBufferRef * bufref; 3142+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); 3143+ int want_mmap; 3144 3145- avbuf->buf.memory = V4L2_MEMORY_MMAP; 3146+ *pbufref = NULL; 3147+ if (avbuf == NULL) 3148+ return AVERROR(ENOMEM); 3149+ 3150+ bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0); 3151+ if (bufref == NULL) { 3152+ av_free(avbuf); 3153+ return AVERROR(ENOMEM); 3154+ } 3155+ 3156+ avbuf->context = ctx; 3157+ avbuf->buf.memory = mem; 3158 avbuf->buf.type = ctx->type; 3159 avbuf->buf.index = index; 3160 3161+ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { 3162+ avbuf->drm_frame.objects[i].fd = -1; 3163+ } 3164+ 3165+ avbuf->context_wl = ff_weak_link_ref(ctx->wl_master); 3166+ 3167 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { 3168 avbuf->buf.length = VIDEO_MAX_PLANES; 3169 avbuf->buf.m.planes = avbuf->planes; 3170 } 3171 3172- ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf); 3173+ ret = ioctl(s->fd, VIDIOC_QUERYBUF, &avbuf->buf); 3174 if (ret < 0) 3175- return AVERROR(errno); 3176+ goto fail; 3177 3178 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { 3179 avbuf->num_planes = 0; 3180@@ -518,33 +1001,41 @@ int ff_v4l2_buffer_initialize(V4L2Buffer 3181 } else 3182 avbuf->num_planes = 1; 3183 3184- for (i = 0; i < avbuf->num_planes; i++) { 3185+ want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP && 3186+ (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm); 3187 3188+ for (i = 0; i < avbuf->num_planes; i++) { 3189 avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? 3190 ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline : 3191 ctx->format.fmt.pix.bytesperline; 3192 3193 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { 3194 avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length; 3195- avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, 3196- PROT_READ | PROT_WRITE, MAP_SHARED, 3197- buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); 3198+ avbuf->plane_info[i].offset = avbuf->buf.m.planes[i].data_offset; 3199+ 3200+ if (want_mmap) 3201+ avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, 3202+ PROT_READ | PROT_WRITE, MAP_SHARED, 3203+ buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); 3204 } else { 3205 avbuf->plane_info[i].length = avbuf->buf.length; 3206- avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, 3207- PROT_READ | PROT_WRITE, MAP_SHARED, 3208- buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); 3209+ avbuf->plane_info[i].offset = 0; 3210+ 3211+ if (want_mmap) 3212+ avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, 3213+ PROT_READ | PROT_WRITE, MAP_SHARED, 3214+ buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); 3215 } 3216 3217- if (avbuf->plane_info[i].mm_addr == MAP_FAILED) 3218- return AVERROR(ENOMEM); 3219+ if (avbuf->plane_info[i].mm_addr == MAP_FAILED) { 3220+ avbuf->plane_info[i].mm_addr = NULL; 3221+ ret = AVERROR(ENOMEM); 3222+ goto fail; 3223+ } 3224 } 3225 3226 avbuf->status = V4L2BUF_AVAILABLE; 3227 3228- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) 3229- return 0; 3230- 3231 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { 3232 avbuf->buf.m.planes = avbuf->planes; 3233 avbuf->buf.length = avbuf->num_planes; 3234@@ -554,20 +1045,52 @@ int ff_v4l2_buffer_initialize(V4L2Buffer 3235 avbuf->buf.length = avbuf->planes[0].length; 3236 } 3237 3238- return ff_v4l2_buffer_enqueue(avbuf); 3239+ if (V4L2_TYPE_IS_CAPTURE(ctx->type) && !want_mmap) { 3240+ // export_drm does dmabuf alloc if we aren't using v4l2 alloc 3241+ ret = v4l2_buffer_export_drm(avbuf); 3242+ if (ret) { 3243+ av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n"); 3244+ goto fail; 3245+ } 3246+ } 3247+ 3248+ *pbufref = bufref; 3249+ return 0; 3250+ 3251+fail: 3252+ av_buffer_unref(&bufref); 3253+ return ret; 3254 } 3255 3256 int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) 3257 { 3258 int ret; 3259+ int qc; 3260 3261- avbuf->buf.flags = avbuf->flags; 3262+ if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) { 3263+ av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", 3264+ avbuf->context->name, avbuf->buf.index, 3265+ avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, 3266+ avbuf->context->q_count); 3267+ } 3268 3269 ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf); 3270- if (ret < 0) 3271- return AVERROR(errno); 3272+ if (ret < 0) { 3273+ int err = errno; 3274+ av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n", 3275+ avbuf->context->name, avbuf->buf.index, 3276+ err, strerror(err)); 3277+ return AVERROR(err); 3278+ } 3279 3280+ // Lock not wanted - if called from buffer free then lock already obtained 3281+ qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1; 3282 avbuf->status = V4L2BUF_IN_DRIVER; 3283+ pthread_cond_broadcast(&avbuf->context->cond); 3284+ 3285+ av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", 3286+ avbuf->context->name, avbuf->buf.index, 3287+ avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc); 3288 3289 return 0; 3290 } 3291--- a/libavcodec/v4l2_buffers.h 3292+++ b/libavcodec/v4l2_buffers.h 3293@@ -28,31 +28,47 @@ 3294 #include <stddef.h> 3295 #include <linux/videodev2.h> 3296 3297+#include "avcodec.h" 3298 #include "libavutil/buffer.h" 3299 #include "libavutil/frame.h" 3300+#include "libavutil/hwcontext_drm.h" 3301 #include "packet.h" 3302 3303 enum V4L2Buffer_status { 3304 V4L2BUF_AVAILABLE, 3305 V4L2BUF_IN_DRIVER, 3306+ V4L2BUF_IN_USE, 3307 V4L2BUF_RET_USER, 3308 }; 3309 3310 /** 3311 * V4L2Buffer (wrapper for v4l2_buffer management) 3312 */ 3313+struct V4L2Context; 3314+struct ff_weak_link_client; 3315+struct dmabuf_h; 3316+ 3317 typedef struct V4L2Buffer { 3318- /* each buffer needs to have a reference to its context */ 3319+ /* each buffer needs to have a reference to its context 3320+ * The pointer is good enough for most operation but once the buffer has 3321+ * been passed to the user the buffer may become orphaned so for free ops 3322+ * the weak link must be used to ensure that the context is actually 3323+ * there 3324+ */ 3325 struct V4L2Context *context; 3326+ struct ff_weak_link_client *context_wl; 3327 3328- /* This object is refcounted per-plane, so we need to keep track 3329- * of how many context-refs we are holding. */ 3330- AVBufferRef *context_ref; 3331- atomic_uint context_refcount; 3332+ /* DRM descriptor */ 3333+ AVDRMFrameDescriptor drm_frame; 3334+ /* For DRM_PRIME encode - need to keep a ref to the source buffer till we 3335+ * are done 3336+ */ 3337+ AVBufferRef * ref_buf; 3338 3339 /* keep track of the mmap address and mmap length */ 3340 struct V4L2Plane_info { 3341- int bytesperline; 3342+ size_t bytesperline; 3343+ size_t offset; 3344 void * mm_addr; 3345 size_t length; 3346 } plane_info[VIDEO_MAX_PLANES]; 3347@@ -63,9 +79,9 @@ typedef struct V4L2Buffer { 3348 struct v4l2_buffer buf; 3349 struct v4l2_plane planes[VIDEO_MAX_PLANES]; 3350 3351- int flags; 3352 enum V4L2Buffer_status status; 3353 3354+ struct dmabuf_h * dmabuf[VIDEO_MAX_PLANES]; // If externally alloced dmabufs - stash other info here 3355 } V4L2Buffer; 3356 3357 /** 3358@@ -101,6 +117,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket 3359 */ 3360 int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); 3361 3362+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, 3363+ const void *extdata, size_t extlen, 3364+ const int64_t timestamp); 3365+ 3366 /** 3367 * Extracts the data from an AVFrame to a V4L2Buffer 3368 * 3369@@ -109,7 +129,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AV 3370 * 3371 * @returns 0 in case of success, a negative AVERROR code otherwise 3372 */ 3373-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); 3374+int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts); 3375 3376 /** 3377 * Initializes a V4L2Buffer 3378@@ -119,7 +139,7 @@ int ff_v4l2_buffer_avframe_to_buf(const 3379 * 3380 * @returns 0 in case of success, a negative AVERROR code otherwise 3381 */ 3382-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index); 3383+int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem); 3384 3385 /** 3386 * Enqueues a V4L2Buffer 3387@@ -130,5 +150,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer 3388 */ 3389 int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf); 3390 3391+static inline void 3392+ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf) 3393+{ 3394+ avbuf->status = V4L2BUF_AVAILABLE; 3395+ av_buffer_unref(&avbuf->ref_buf); 3396+} 3397+ 3398 3399 #endif // AVCODEC_V4L2_BUFFERS_H 3400--- a/libavcodec/v4l2_context.c 3401+++ b/libavcodec/v4l2_context.c 3402@@ -27,11 +27,14 @@ 3403 #include <unistd.h> 3404 #include <fcntl.h> 3405 #include <poll.h> 3406+#include "libavutil/avassert.h" 3407+#include "libavutil/pixdesc.h" 3408 #include "libavcodec/avcodec.h" 3409 #include "libavcodec/internal.h" 3410 #include "v4l2_buffers.h" 3411 #include "v4l2_fmt.h" 3412 #include "v4l2_m2m.h" 3413+#include "weak_link.h" 3414 3415 struct v4l2_format_update { 3416 uint32_t v4l2_fmt; 3417@@ -41,26 +44,168 @@ struct v4l2_format_update { 3418 int update_avfmt; 3419 }; 3420 3421-static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx) 3422+ 3423+static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) 3424 { 3425- return V4L2_TYPE_IS_OUTPUT(ctx->type) ? 3426- container_of(ctx, V4L2m2mContext, output) : 3427- container_of(ctx, V4L2m2mContext, capture); 3428+ return (int64_t)n; 3429 } 3430 3431-static inline AVCodecContext *logger(V4L2Context *ctx) 3432+static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) 3433 { 3434- return ctx_to_m2mctx(ctx)->avctx; 3435+ return (unsigned int)pts; 3436+} 3437+ 3438+// FFmpeg requires us to propagate a number of vars from the coded pkt into 3439+// the decoded frame. The only thing that tracks like that in V4L2 stateful 3440+// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no 3441+// guarantees about PTS being unique or specified for every frame so replace 3442+// the supplied PTS with a simple incrementing number and keep a circular 3443+// buffer of all the things we want preserved (including the original PTS) 3444+// indexed by the tracking no. 3445+static int64_t 3446+xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt) 3447+{ 3448+ int64_t track_pts; 3449+ 3450+ // Avoid 0 3451+ if (++x->track_no == 0) 3452+ x->track_no = 1; 3453+ 3454+ track_pts = track_to_pts(avctx, x->track_no); 3455+ 3456+ av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); 3457+ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ 3458+ .discard = 0, 3459+ .pending = 1, 3460+ .pkt_size = avpkt->size, 3461+ .pts = avpkt->pts, 3462+ .dts = avpkt->dts, 3463+ .reordered_opaque = avctx->reordered_opaque, 3464+ .pkt_pos = avpkt->pos, 3465+ .pkt_duration = avpkt->duration, 3466+ .track_pts = track_pts 3467+ }; 3468+ return track_pts; 3469+} 3470+ 3471+static int64_t 3472+xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame) 3473+{ 3474+ int64_t track_pts; 3475+ 3476+ // Avoid 0 3477+ if (++x->track_no == 0) 3478+ x->track_no = 1; 3479+ 3480+ track_pts = track_to_pts(avctx, x->track_no); 3481+ 3482+ av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no); 3483+ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ 3484+ .discard = 0, 3485+ .pending = 1, 3486+ .pkt_size = 0, 3487+ .pts = frame->pts, 3488+ .dts = AV_NOPTS_VALUE, 3489+ .reordered_opaque = frame->reordered_opaque, 3490+ .pkt_pos = frame->pkt_pos, 3491+ .pkt_duration = frame->pkt_duration, 3492+ .track_pts = track_pts 3493+ }; 3494+ return track_pts; 3495+} 3496+ 3497+ 3498+// Returns -1 if we should discard the frame 3499+static int 3500+xlat_pts_frame_out(AVCodecContext *const avctx, 3501+ xlat_track_t * const x, 3502+ AVFrame *const frame) 3503+{ 3504+ unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; 3505+ V4L2m2mTrackEl *const t = x->track_els + n; 3506+ if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) 3507+ { 3508+ av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, 3509+ "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); 3510+ frame->pts = AV_NOPTS_VALUE; 3511+ frame->pkt_dts = AV_NOPTS_VALUE; 3512+ frame->reordered_opaque = x->last_opaque; 3513+ frame->pkt_pos = -1; 3514+ frame->pkt_duration = 0; 3515+ frame->pkt_size = -1; 3516+ } 3517+ else if (!t->discard) 3518+ { 3519+ frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; 3520+ frame->pkt_dts = t->dts; 3521+ frame->reordered_opaque = t->reordered_opaque; 3522+ frame->pkt_pos = t->pkt_pos; 3523+ frame->pkt_duration = t->pkt_duration; 3524+ frame->pkt_size = t->pkt_size; 3525+ 3526+ x->last_opaque = x->track_els[n].reordered_opaque; 3527+ if (frame->pts != AV_NOPTS_VALUE) 3528+ x->last_pts = frame->pts; 3529+ t->pending = 0; 3530+ } 3531+ else 3532+ { 3533+ av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); 3534+ return -1; 3535+ } 3536+ 3537+ av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n", 3538+ frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n); 3539+ return 0; 3540+} 3541+ 3542+// Returns -1 if we should discard the frame 3543+static int 3544+xlat_pts_pkt_out(AVCodecContext *const avctx, 3545+ xlat_track_t * const x, 3546+ AVPacket *const pkt) 3547+{ 3548+ unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE; 3549+ V4L2m2mTrackEl *const t = x->track_els + n; 3550+ if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts) 3551+ { 3552+ av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, 3553+ "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); 3554+ pkt->pts = AV_NOPTS_VALUE; 3555+ } 3556+ else if (!t->discard) 3557+ { 3558+ pkt->pts = t->pending ? t->pts : AV_NOPTS_VALUE; 3559+ 3560+ x->last_opaque = x->track_els[n].reordered_opaque; 3561+ if (pkt->pts != AV_NOPTS_VALUE) 3562+ x->last_pts = pkt->pts; 3563+ t->pending = 0; 3564+ } 3565+ else 3566+ { 3567+ av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); 3568+ return -1; 3569+ } 3570+ 3571+ // * Would like something much better than this...xlat(offset + out_count)? 3572+ pkt->dts = pkt->pts; 3573+ av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n", 3574+ pkt->pts, t->track_pts, n); 3575+ return 0; 3576 } 3577 3578-static inline unsigned int v4l2_get_width(struct v4l2_format *fmt) 3579+ 3580+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) 3581 { 3582- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; 3583+ return V4L2_TYPE_IS_OUTPUT(ctx->type) ? 3584+ container_of(ctx, V4L2m2mContext, output) : 3585+ container_of(ctx, V4L2m2mContext, capture); 3586 } 3587 3588-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt) 3589+static inline AVCodecContext *logger(const V4L2Context *ctx) 3590 { 3591- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; 3592+ return ctx_to_m2mctx(ctx)->avctx; 3593 } 3594 3595 static AVRational v4l2_get_sar(V4L2Context *ctx) 3596@@ -81,21 +226,29 @@ static AVRational v4l2_get_sar(V4L2Conte 3597 return sar; 3598 } 3599 3600-static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2) 3601+static inline int ctx_buffers_alloced(const V4L2Context * const ctx) 3602+{ 3603+ return ctx->bufrefs != NULL; 3604+} 3605+ 3606+// Width/Height changed or we don't have an alloc in the first place? 3607+static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2) 3608 { 3609- struct v4l2_format *fmt1 = &ctx->format; 3610- int ret = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? 3611- fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width || 3612- fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height 3613- : 3614- fmt1->fmt.pix.width != fmt2->fmt.pix.width || 3615- fmt1->fmt.pix.height != fmt2->fmt.pix.height; 3616+ const struct v4l2_format *fmt1 = &ctx->format; 3617+ int ret = !ctx_buffers_alloced(ctx) || 3618+ (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? 3619+ fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width || 3620+ fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height 3621+ : 3622+ fmt1->fmt.pix.width != fmt2->fmt.pix.width || 3623+ fmt1->fmt.pix.height != fmt2->fmt.pix.height); 3624 3625 if (ret) 3626- av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n", 3627+ av_log(logger(ctx), AV_LOG_DEBUG, "V4L2 %s changed: alloc=%d (%dx%d) -> (%dx%d)\n", 3628 ctx->name, 3629- v4l2_get_width(fmt1), v4l2_get_height(fmt1), 3630- v4l2_get_width(fmt2), v4l2_get_height(fmt2)); 3631+ ctx_buffers_alloced(ctx), 3632+ ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1), 3633+ ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2)); 3634 3635 return ret; 3636 } 3637@@ -153,76 +306,100 @@ static inline void v4l2_save_to_context( 3638 } 3639 } 3640 3641-static int v4l2_start_decode(V4L2Context *ctx) 3642+static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r) 3643 { 3644- struct v4l2_decoder_cmd cmd = { 3645- .cmd = V4L2_DEC_CMD_START, 3646- .flags = 0, 3647+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); 3648+ struct v4l2_selection selection = { 3649+ .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, 3650+ .target = V4L2_SEL_TGT_COMPOSE 3651 }; 3652- int ret; 3653 3654- ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DECODER_CMD, &cmd); 3655- if (ret) 3656+ memset(r, 0, sizeof(*r)); 3657+ if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection)) 3658 return AVERROR(errno); 3659 3660+ *r = selection.r; 3661 return 0; 3662 } 3663 3664-/** 3665- * handle resolution change event and end of stream event 3666- * returns 1 if reinit was successful, negative if it failed 3667- * returns 0 if reinit was not executed 3668- */ 3669-static int v4l2_handle_event(V4L2Context *ctx) 3670+static int do_source_change(V4L2m2mContext * const s) 3671 { 3672- V4L2m2mContext *s = ctx_to_m2mctx(ctx); 3673- struct v4l2_format cap_fmt = s->capture.format; 3674- struct v4l2_event evt = { 0 }; 3675+ AVCodecContext *const avctx = s->avctx; 3676+ 3677 int ret; 3678+ int reinit; 3679+ struct v4l2_format cap_fmt = s->capture.format; 3680 3681- ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt); 3682- if (ret < 0) { 3683- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name); 3684- return 0; 3685- } 3686+ s->capture.done = 0; 3687 3688- if (evt.type == V4L2_EVENT_EOS) { 3689- ctx->done = 1; 3690+ ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt); 3691+ if (ret) { 3692+ av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name); 3693 return 0; 3694 } 3695 3696- if (evt.type != V4L2_EVENT_SOURCE_CHANGE) 3697- return 0; 3698+ get_default_selection(&s->capture, &s->capture.selection); 3699 3700- ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt); 3701- if (ret) { 3702- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name); 3703- return 0; 3704+ reinit = ctx_resolution_changed(&s->capture, &cap_fmt); 3705+ if ((s->quirks & FF_V4L2_QUIRK_REINIT_ALWAYS) != 0) 3706+ reinit = 1; 3707+ 3708+ s->capture.format = cap_fmt; 3709+ if (reinit) { 3710+ s->capture.height = ff_v4l2_get_format_height(&cap_fmt); 3711+ s->capture.width = ff_v4l2_get_format_width(&cap_fmt); 3712 } 3713 3714- if (v4l2_resolution_changed(&s->capture, &cap_fmt)) { 3715- s->capture.height = v4l2_get_height(&cap_fmt); 3716- s->capture.width = v4l2_get_width(&cap_fmt); 3717- s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); 3718- } else { 3719- v4l2_start_decode(ctx); 3720- return 0; 3721+ // If we don't support selection (or it is bust) and we obviously have HD then kludge 3722+ if ((s->capture.selection.width == 0 || s->capture.selection.height == 0) && 3723+ (s->capture.height == 1088 && s->capture.width == 1920)) { 3724+ s->capture.selection = (struct v4l2_rect){.width = 1920, .height = 1080}; 3725 } 3726 3727- s->reinit = 1; 3728+ s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); 3729 3730- if (s->avctx) 3731- ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height); 3732- if (ret < 0) 3733- av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n"); 3734+ av_log(avctx, AV_LOG_DEBUG, "Source change: Fmt: %s, SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n", 3735+ av_fourcc2str(ff_v4l2_get_format_pixelformat(&cap_fmt)), 3736+ s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den, 3737+ s->capture.width, s->capture.height, 3738+ s->capture.selection.width, s->capture.selection.height, 3739+ s->capture.selection.left, s->capture.selection.top, reinit); 3740 3741- ret = ff_v4l2_m2m_codec_reinit(s); 3742- if (ret) { 3743- av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n"); 3744- return AVERROR(EINVAL); 3745+ ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); 3746+ if (ret) 3747+ av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n"); 3748+ s->draining = 0; 3749+ 3750+ if (!reinit) { 3751+ /* Buffers are OK so just stream off to ack */ 3752+ av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__); 3753+ } 3754+ else { 3755+ if (avctx) 3756+ ret = ff_set_dimensions(s->avctx, 3757+ s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width, 3758+ s->capture.selection.height != 0 ? s->capture.selection.height : s->capture.height); 3759+ if (ret < 0) 3760+ av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n"); 3761+ 3762+ ff_v4l2_context_release(&s->capture); 3763+ 3764+ if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) || 3765+ s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) { 3766+ av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n", 3767+ s->capture.width, s->capture.height, 3768+ ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format)); 3769+ return AVERROR(EINVAL); 3770+ } 3771+ 3772+ // Update pixel format - should only actually do something on initial change 3773+ s->capture.av_pix_fmt = 3774+ ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO); 3775+ avctx->pix_fmt = s->output_drm ? AV_PIX_FMT_DRM_PRIME : s->capture.av_pix_fmt; 3776+ avctx->sw_pix_fmt = s->capture.av_pix_fmt; 3777 } 3778 3779- /* reinit executed */ 3780+ ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON); 3781 return 1; 3782 } 3783 3784@@ -266,171 +443,293 @@ static int v4l2_stop_encode(V4L2Context 3785 return 0; 3786 } 3787 3788-static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout) 3789-{ 3790- struct v4l2_plane planes[VIDEO_MAX_PLANES]; 3791- struct v4l2_buffer buf = { 0 }; 3792- V4L2Buffer *avbuf; 3793- struct pollfd pfd = { 3794- .events = POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */ 3795- .fd = ctx_to_m2mctx(ctx)->fd, 3796+// DQ a buffer 3797+// Amalgamates all the various ways there are of signalling EOS/Event to 3798+// generate a consistant EPIPE. 3799+// 3800+// Sets ctx->flag_last if next dq would produce EPIPE (i.e. stream has stopped) 3801+// 3802+// Returns: 3803+// 0 Success 3804+// AVERROR(EPIPE) Nothing more to read 3805+// AVERROR(ENOSPC) No buffers in Q to put result in 3806+// * AVERROR(..) 3807+ 3808+ static int 3809+dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf) 3810+{ 3811+ V4L2m2mContext * const m = ctx_to_m2mctx(ctx); 3812+ AVCodecContext * const avctx = m->avctx; 3813+ V4L2Buffer * avbuf; 3814+ const int is_mp = V4L2_TYPE_IS_MULTIPLANAR(ctx->type); 3815+ 3816+ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; 3817+ 3818+ struct v4l2_buffer buf = { 3819+ .type = ctx->type, 3820+ .memory = V4L2_MEMORY_MMAP, 3821 }; 3822- int i, ret; 3823 3824- if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) { 3825- for (i = 0; i < ctx->num_buffers; i++) { 3826- if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER) 3827- break; 3828- } 3829- if (i == ctx->num_buffers) 3830- av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to " 3831- "userspace. Increase num_capture_buffers " 3832- "to prevent device deadlock or dropped " 3833- "packets/frames.\n"); 3834+ *ppavbuf = NULL; 3835+ 3836+ if (ctx->flag_last) 3837+ return AVERROR(EPIPE); 3838+ 3839+ if (is_mp) { 3840+ buf.length = VIDEO_MAX_PLANES; 3841+ buf.m.planes = planes; 3842 } 3843 3844- /* if we are draining and there are no more capture buffers queued in the driver we are done */ 3845- if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) { 3846- for (i = 0; i < ctx->num_buffers; i++) { 3847- /* capture buffer initialization happens during decode hence 3848- * detection happens at runtime 3849- */ 3850- if (!ctx->buffers) 3851- break; 3852+ while (ioctl(m->fd, VIDIOC_DQBUF, &buf) != 0) { 3853+ const int err = errno; 3854+ av_assert0(AVERROR(err) < 0); 3855+ if (err != EINTR) { 3856+ av_log(avctx, AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n", 3857+ ctx->name, av_err2str(AVERROR(err))); 3858+ 3859+ if (err == EPIPE) 3860+ ctx->flag_last = 1; 3861 3862- if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER) 3863- goto start; 3864+ return AVERROR(err); 3865 } 3866- ctx->done = 1; 3867- return NULL; 3868 } 3869+ atomic_fetch_sub(&ctx->q_count, 1); 3870 3871-start: 3872- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) 3873- pfd.events = POLLOUT | POLLWRNORM; 3874- else { 3875- /* no need to listen to requests for more input while draining */ 3876- if (ctx_to_m2mctx(ctx)->draining) 3877- pfd.events = POLLIN | POLLRDNORM | POLLPRI; 3878+ avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data; 3879+ ff_v4l2_buffer_set_avail(avbuf); 3880+ avbuf->buf = buf; 3881+ if (is_mp) { 3882+ memcpy(avbuf->planes, planes, sizeof(planes)); 3883+ avbuf->buf.m.planes = avbuf->planes; 3884+ } 3885+ // Done with any attached buffer 3886+ av_buffer_unref(&avbuf->ref_buf); 3887+ 3888+ if (V4L2_TYPE_IS_CAPTURE(ctx->type)) { 3889+ // Zero length cap buffer return == EOS 3890+ if ((is_mp ? buf.m.planes[0].bytesused : buf.bytesused) == 0) { 3891+ av_log(avctx, AV_LOG_DEBUG, "Buffer empty - reQ\n"); 3892+ 3893+ // Must reQ so we don't leak 3894+ // May not matter if the next thing we do is release all the 3895+ // buffers but better to be tidy. 3896+ ff_v4l2_buffer_enqueue(avbuf); 3897+ 3898+ ctx->flag_last = 1; 3899+ return AVERROR(EPIPE); 3900+ } 3901+ 3902+#ifdef V4L2_BUF_FLAG_LAST 3903+ // If flag_last set then this contains data but is the last frame 3904+ // so remember that but return OK 3905+ if ((buf.flags & V4L2_BUF_FLAG_LAST) != 0) 3906+ ctx->flag_last = 1; 3907+#endif 3908 } 3909 3910- for (;;) { 3911- ret = poll(&pfd, 1, timeout); 3912- if (ret > 0) 3913- break; 3914- if (errno == EINTR) 3915+ *ppavbuf = avbuf; 3916+ return 0; 3917+} 3918+ 3919+/** 3920+ * handle resolution change event and end of stream event 3921+ * Expects to be called after the stream has stopped 3922+ * 3923+ * returns 1 if reinit was successful, negative if it failed 3924+ * returns 0 if reinit was not executed 3925+ */ 3926+static int 3927+get_event(V4L2m2mContext * const m) 3928+{ 3929+ AVCodecContext * const avctx = m->avctx; 3930+ struct v4l2_event evt = { 0 }; 3931+ 3932+ while (ioctl(m->fd, VIDIOC_DQEVENT, &evt) != 0) { 3933+ const int rv = AVERROR(errno); 3934+ if (rv == AVERROR(EINTR)) 3935 continue; 3936- return NULL; 3937+ if (rv == AVERROR(EAGAIN)) { 3938+ av_log(avctx, AV_LOG_WARNING, "V4L2 failed to get expected event - assume EOS\n"); 3939+ return AVERROR_EOF; 3940+ } 3941+ av_log(avctx, AV_LOG_ERROR, "V4L2 VIDIOC_DQEVENT: %s\n", av_err2str(rv)); 3942+ return rv; 3943 } 3944 3945- /* 0. handle errors */ 3946- if (pfd.revents & POLLERR) { 3947- /* if we are trying to get free buffers but none have been queued yet 3948- no need to raise a warning */ 3949- if (timeout == 0) { 3950- for (i = 0; i < ctx->num_buffers; i++) { 3951- if (ctx->buffers[i].status != V4L2BUF_AVAILABLE) 3952- av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name); 3953- } 3954- } 3955- else 3956- av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name); 3957+ av_log(avctx, AV_LOG_DEBUG, "Dq event %d\n", evt.type); 3958 3959- return NULL; 3960+ if (evt.type == V4L2_EVENT_EOS) { 3961+ av_log(avctx, AV_LOG_TRACE, "V4L2 VIDIOC_EVENT_EOS\n"); 3962+ return AVERROR_EOF; 3963 } 3964 3965- /* 1. handle resolution changes */ 3966- if (pfd.revents & POLLPRI) { 3967- ret = v4l2_handle_event(ctx); 3968- if (ret < 0) { 3969- /* if re-init failed, abort */ 3970- ctx->done = 1; 3971- return NULL; 3972+ if (evt.type == V4L2_EVENT_SOURCE_CHANGE) 3973+ return do_source_change(m); 3974+ 3975+ return 0; 3976+} 3977+ 3978+static inline int 3979+dq_ok(const V4L2Context * const c) 3980+{ 3981+ return c->streamon && atomic_load(&c->q_count) != 0; 3982+} 3983+ 3984+// Get a buffer 3985+// If output then just gets the buffer in the expected way 3986+// If capture then runs the capture state m/c to deal with res change etc. 3987+// If return value == 0 then *ppavbuf != NULL 3988+ 3989+static int 3990+get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout) 3991+{ 3992+ V4L2m2mContext * const m = ctx_to_m2mctx(ctx); 3993+ AVCodecContext * const avctx = m->avctx; 3994+ const int is_cap = V4L2_TYPE_IS_CAPTURE(ctx->type); 3995+ 3996+ const unsigned int poll_cap = (POLLIN | POLLRDNORM); 3997+ const unsigned int poll_out = (POLLOUT | POLLWRNORM); 3998+ const unsigned int poll_event = POLLPRI; 3999+ 4000+ *ppavbuf = NULL; 4001+ 4002+ for (;;) { 4003+ struct pollfd pfd = { 4004+ .fd = m->fd, 4005+ // If capture && stream not started then assume we are waiting for the initial event 4006+ .events = !is_cap ? poll_out : 4007+ !ff_v4l2_ctx_eos(ctx) && ctx->streamon ? poll_cap : 4008+ poll_event, 4009+ }; 4010+ int ret; 4011+ 4012+ if (ctx->done) { 4013+ av_log(avctx, AV_LOG_TRACE, "V4L2 %s already done\n", ctx->name); 4014+ return AVERROR_EOF; 4015 } 4016- if (ret) { 4017- /* if re-init was successful drop the buffer (if there was one) 4018- * since we had to reconfigure capture (unmap all buffers) 4019- */ 4020- return NULL; 4021+ 4022+ // If capture && timeout == -1 then also wait for rx buffer free 4023+ if (is_cap && timeout == -1 && dq_ok(&m->output) && !m->draining) 4024+ pfd.events |= poll_out; 4025+ 4026+ // If nothing Qed all we will get is POLLERR - avoid that 4027+ if ((pfd.events == poll_out && !dq_ok(&m->output)) || 4028+ (pfd.events == poll_cap && !dq_ok(&m->capture)) || 4029+ (pfd.events == (poll_cap | poll_out) && !dq_ok(&m->capture) && !dq_ok(&m->output))) { 4030+ av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name); 4031+ return AVERROR(ENOSPC); 4032 } 4033- } 4034 4035- /* 2. dequeue the buffer */ 4036- if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) { 4037+ // Timeout kludged s.t. "forever" eventually gives up & produces logging 4038+ // If waiting for an event when we have seen a last_frame then we expect 4039+ // it to be ready already so force a short timeout 4040+ ret = poll(&pfd, 1, 4041+ ff_v4l2_ctx_eos(ctx) ? 10 : 4042+ timeout == -1 ? 3000 : timeout); 4043+ if (ret < 0) { 4044+ ret = AVERROR(errno); // Remember errno before logging etc. 4045+ av_assert0(ret < 0); 4046+ } 4047+ 4048+ av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s ret=%d, timeout=%d, events=%#x, revents=%#x\n", 4049+ ctx->name, ret, timeout, pfd.events, pfd.revents); 4050 4051- if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) { 4052- /* there is a capture buffer ready */ 4053- if (pfd.revents & (POLLIN | POLLRDNORM)) 4054- goto dequeue; 4055+ if (ret < 0) { 4056+ if (ret == AVERROR(EINTR)) 4057+ continue; 4058+ av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll error %d (%s)\n", ctx->name, AVUNERROR(ret), av_err2str(ret)); 4059+ return ret; 4060+ } 4061 4062- /* the driver is ready to accept more input; instead of waiting for the capture 4063- * buffer to complete we return NULL so input can proceed (we are single threaded) 4064- */ 4065- if (pfd.revents & (POLLOUT | POLLWRNORM)) 4066- return NULL; 4067+ if (ret == 0) { 4068+ if (timeout == -1) 4069+ av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll unexpected timeout: events=%#x\n", ctx->name, pfd.events); 4070+ if (ff_v4l2_ctx_eos(ctx)) { 4071+ av_log(avctx, AV_LOG_WARNING, "V4L2 %s poll event timeout\n", ctx->name); 4072+ ret = get_event(m); 4073+ if (ret < 0) { 4074+ ctx->done = 1; 4075+ return ret; 4076+ } 4077+ } 4078+ return AVERROR(EAGAIN); 4079 } 4080 4081-dequeue: 4082- memset(&buf, 0, sizeof(buf)); 4083- buf.memory = V4L2_MEMORY_MMAP; 4084- buf.type = ctx->type; 4085- if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { 4086- memset(planes, 0, sizeof(planes)); 4087- buf.length = VIDEO_MAX_PLANES; 4088- buf.m.planes = planes; 4089+ if ((pfd.revents & POLLERR) != 0) { 4090+ av_log(avctx, AV_LOG_WARNING, "V4L2 %s POLLERR\n", ctx->name); 4091+ return AVERROR_UNKNOWN; 4092 } 4093 4094- ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf); 4095- if (ret) { 4096- if (errno != EAGAIN) { 4097+ if ((pfd.revents & poll_event) != 0) { 4098+ ret = get_event(m); 4099+ if (ret < 0) { 4100 ctx->done = 1; 4101- if (errno != EPIPE) 4102- av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n", 4103- ctx->name, av_err2str(AVERROR(errno))); 4104+ return ret; 4105 } 4106- return NULL; 4107+ continue; 4108 } 4109 4110- if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) { 4111- int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ? 4112- buf.m.planes[0].bytesused : buf.bytesused; 4113- if (bytesused == 0) { 4114- ctx->done = 1; 4115- return NULL; 4116- } 4117-#ifdef V4L2_BUF_FLAG_LAST 4118- if (buf.flags & V4L2_BUF_FLAG_LAST) 4119- ctx->done = 1; 4120-#endif 4121+ if ((pfd.revents & poll_cap) != 0) { 4122+ ret = dq_buf(ctx, ppavbuf); 4123+ if (ret == AVERROR(EPIPE)) 4124+ continue; 4125+ return ret; 4126 } 4127 4128- avbuf = &ctx->buffers[buf.index]; 4129- avbuf->status = V4L2BUF_AVAILABLE; 4130- avbuf->buf = buf; 4131- if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { 4132- memcpy(avbuf->planes, planes, sizeof(planes)); 4133- avbuf->buf.m.planes = avbuf->planes; 4134+ if ((pfd.revents & poll_out) != 0) { 4135+ if (is_cap) 4136+ return AVERROR(EAGAIN); 4137+ return dq_buf(ctx, ppavbuf); 4138 } 4139- return avbuf; 4140+ 4141+ av_log(avctx, AV_LOG_ERROR, "V4L2 poll unexpected events=%#x, revents=%#x\n", pfd.events, pfd.revents); 4142+ return AVERROR_UNKNOWN; 4143 } 4144+} 4145 4146- return NULL; 4147+// Clear out flags and timestamps that should should be set by the user 4148+// Returns the passed avbuf 4149+static V4L2Buffer * 4150+clean_v4l2_buffer(V4L2Buffer * const avbuf) 4151+{ 4152+ struct v4l2_buffer *const buf = &avbuf->buf; 4153+ 4154+ buf->flags = 0; 4155+ buf->field = V4L2_FIELD_ANY; 4156+ buf->timestamp = (struct timeval){0}; 4157+ buf->timecode = (struct v4l2_timecode){0}; 4158+ buf->sequence = 0; 4159+ 4160+ return avbuf; 4161+} 4162+ 4163+int 4164+ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1) 4165+{ 4166+ V4L2Buffer * avbuf; 4167+ if (timeout1 != 0) { 4168+ int rv = get_qbuf(ctx, &avbuf, timeout1); 4169+ if (rv != 0) 4170+ return rv; 4171+ } 4172+ do { 4173+ get_qbuf(ctx, &avbuf, 0); 4174+ } while (avbuf); 4175+ return 0; 4176 } 4177 4178 static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) 4179 { 4180- int timeout = 0; /* return when no more buffers to dequeue */ 4181 int i; 4182 4183 /* get back as many output buffers as possible */ 4184- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { 4185- do { 4186- } while (v4l2_dequeue_v4l2buf(ctx, timeout)); 4187- } 4188+ if (V4L2_TYPE_IS_OUTPUT(ctx->type)) 4189+ ff_v4l2_dq_all(ctx, 0); 4190 4191 for (i = 0; i < ctx->num_buffers; i++) { 4192- if (ctx->buffers[i].status == V4L2BUF_AVAILABLE) 4193- return &ctx->buffers[i]; 4194+ V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; 4195+ if (avbuf->status == V4L2BUF_AVAILABLE) 4196+ return clean_v4l2_buffer(avbuf); 4197 } 4198 4199 return NULL; 4200@@ -438,25 +737,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf( 4201 4202 static int v4l2_release_buffers(V4L2Context* ctx) 4203 { 4204- struct v4l2_requestbuffers req = { 4205- .memory = V4L2_MEMORY_MMAP, 4206- .type = ctx->type, 4207- .count = 0, /* 0 -> unmaps buffers from the driver */ 4208- }; 4209- int i, j; 4210+ int i; 4211+ int ret = 0; 4212+ const int fd = ctx_to_m2mctx(ctx)->fd; 4213 4214- for (i = 0; i < ctx->num_buffers; i++) { 4215- V4L2Buffer *buffer = &ctx->buffers[i]; 4216+ // Orphan any buffers in the wild 4217+ ff_weak_link_break(&ctx->wl_master); 4218 4219- for (j = 0; j < buffer->num_planes; j++) { 4220- struct V4L2Plane_info *p = &buffer->plane_info[j]; 4221- if (p->mm_addr && p->length) 4222- if (munmap(p->mm_addr, p->length) < 0) 4223- av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno))); 4224+ if (ctx->bufrefs) { 4225+ for (i = 0; i < ctx->num_buffers; i++) 4226+ av_buffer_unref(ctx->bufrefs + i); 4227+ } 4228+ 4229+ if (fd != -1) { 4230+ struct v4l2_requestbuffers req = { 4231+ .memory = V4L2_MEMORY_MMAP, 4232+ .type = ctx->type, 4233+ .count = 0, /* 0 -> unmap all buffers from the driver */ 4234+ }; 4235+ 4236+ while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) { 4237+ if (errno == EINTR) 4238+ continue; 4239+ 4240+ ret = AVERROR(errno); 4241+ 4242+ av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n", 4243+ ctx->name, av_err2str(AVERROR(errno))); 4244+ 4245+ if (ctx_to_m2mctx(ctx)->output_drm) 4246+ av_log(logger(ctx), AV_LOG_ERROR, 4247+ "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n" 4248+ "for all buffers: \n" 4249+ " 1. drmModeRmFB(..)\n" 4250+ " 2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n"); 4251 } 4252 } 4253+ atomic_store(&ctx->q_count, 0); 4254 4255- return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req); 4256+ return ret; 4257 } 4258 4259 static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt) 4260@@ -485,6 +804,8 @@ static inline int v4l2_try_raw_format(V4 4261 4262 static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) 4263 { 4264+ V4L2m2mContext* s = ctx_to_m2mctx(ctx); 4265+ V4L2m2mPriv *priv = s->avctx->priv_data; 4266 enum AVPixelFormat pixfmt = ctx->av_pix_fmt; 4267 struct v4l2_fmtdesc fdesc; 4268 int ret; 4269@@ -498,21 +819,22 @@ static int v4l2_get_raw_format(V4L2Conte 4270 return 0; 4271 } 4272 4273- for (;;) { 4274+ for (;; ++fdesc.index) { 4275 ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc); 4276 if (ret) 4277 return AVERROR(EINVAL); 4278 4279+ if (priv->pix_fmt != AV_PIX_FMT_NONE) { 4280+ if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt)) 4281+ continue; 4282+ } 4283+ 4284 pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO); 4285 ret = v4l2_try_raw_format(ctx, pixfmt); 4286- if (ret){ 4287- fdesc.index++; 4288- continue; 4289+ if (ret == 0) { 4290+ *p = pixfmt; 4291+ return 0; 4292 } 4293- 4294- *p = pixfmt; 4295- 4296- return 0; 4297 } 4298 4299 return AVERROR(EINVAL); 4300@@ -555,30 +877,131 @@ static int v4l2_get_coded_format(V4L2Con 4301 * 4302 *****************************************************************************/ 4303 4304-int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) 4305+ 4306+static void flush_all_buffers_status(V4L2Context* const ctx) 4307+{ 4308+ int i; 4309+ 4310+ if (!ctx->bufrefs) 4311+ return; 4312+ 4313+ for (i = 0; i < ctx->num_buffers; ++i) { 4314+ struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; 4315+ if (buf->status == V4L2BUF_IN_DRIVER) 4316+ ff_v4l2_buffer_set_avail(buf); 4317+ } 4318+ atomic_store(&ctx->q_count, 0); 4319+} 4320+ 4321+static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx) 4322+{ 4323+ int i; 4324+ int rv; 4325+ 4326+ if (!ctx->bufrefs) { 4327+ rv = ff_v4l2_context_init(ctx); 4328+ if (rv) { 4329+ av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); 4330+ return rv; 4331+ } 4332+ } 4333+ 4334+ ff_mutex_lock(&ctx->lock); 4335+ for (i = 0; i < ctx->num_buffers; ++i) { 4336+ struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; 4337+ if (buf->status == V4L2BUF_AVAILABLE) { 4338+ rv = ff_v4l2_buffer_enqueue(buf); 4339+ if (rv < 0) 4340+ break; 4341+ } 4342+ } 4343+ ff_mutex_unlock(&ctx->lock); 4344+ return rv; 4345+} 4346+ 4347+static int set_streamon(AVCodecContext * const avctx, V4L2Context*const ctx) 4348 { 4349 int type = ctx->type; 4350- int ret; 4351+ int ret = 0; 4352 4353- ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type); 4354- if (ret < 0) 4355- return AVERROR(errno); 4356+ if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) 4357+ stuff_all_buffers(avctx, ctx); 4358 4359- ctx->streamon = (cmd == VIDIOC_STREAMON); 4360+ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMON, &type) < 0) { 4361+ ret = AVERROR(errno); 4362+ av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name, 4363+ av_err2str(ret)); 4364+ return ret; 4365+ } 4366 4367- return 0; 4368+ ctx->first_buf = 1; 4369+ ctx->streamon = 1; 4370+ ctx->flag_last = 0; 4371+ av_log(avctx, AV_LOG_DEBUG, "%s set status ON OK\n", ctx->name); 4372+ return ret; 4373+} 4374+ 4375+static int set_streamoff(AVCodecContext * const avctx, V4L2Context*const ctx) 4376+{ 4377+ int type = ctx->type; 4378+ int ret = 0; 4379+ const int has_bufs = ctx_buffers_alloced(ctx); 4380+ 4381+ // Avoid doing anything if there is nothing we can do 4382+ if (!has_bufs && !ctx->streamon) 4383+ return 0; 4384+ 4385+ if (has_bufs) 4386+ ff_mutex_lock(&ctx->lock); 4387+ 4388+ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMOFF, &type) < 0) { 4389+ ret = AVERROR(errno); 4390+ av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name, 4391+ av_err2str(ret)); 4392+ } 4393+ else { 4394+ flush_all_buffers_status(ctx); 4395+ 4396+ ctx->streamon = 0; 4397+ ctx->flag_last = 0; 4398+ 4399+ av_log(avctx, AV_LOG_DEBUG, "%s set status OFF OK\n", ctx->name); 4400+ } 4401+ 4402+ if (has_bufs) 4403+ ff_mutex_unlock(&ctx->lock); 4404+ return ret; 4405+} 4406+ 4407+ 4408+int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) 4409+{ 4410+ AVCodecContext * const avctx = logger(ctx); 4411+ 4412+ switch (cmd) { 4413+ case VIDIOC_STREAMOFF: 4414+ return set_streamoff(avctx, ctx); 4415+ case VIDIOC_STREAMON: 4416+ return set_streamon(avctx, ctx); 4417+ default: 4418+ av_log(avctx, AV_LOG_ERROR, "%s: Unexpected cmd: %d\n", __func__, cmd); 4419+ break; 4420+ } 4421+ return AVERROR_BUG; 4422 } 4423 4424 int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) 4425 { 4426- V4L2m2mContext *s = ctx_to_m2mctx(ctx); 4427+ V4L2m2mContext *const s = ctx_to_m2mctx(ctx); 4428+ AVCodecContext *const avctx = s->avctx; 4429+ int64_t track_ts; 4430 V4L2Buffer* avbuf; 4431 int ret; 4432 4433 if (!frame) { 4434 ret = v4l2_stop_encode(ctx); 4435 if (ret) 4436- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name); 4437+ av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name); 4438 s->draining= 1; 4439 return 0; 4440 } 4441@@ -587,23 +1010,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Co 4442 if (!avbuf) 4443 return AVERROR(EAGAIN); 4444 4445- ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf); 4446+ track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame); 4447+ 4448+ ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts); 4449 if (ret) 4450 return ret; 4451 4452 return ff_v4l2_buffer_enqueue(avbuf); 4453 } 4454 4455-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) 4456+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, 4457+ const void * extdata, size_t extlen) 4458 { 4459 V4L2m2mContext *s = ctx_to_m2mctx(ctx); 4460+ AVCodecContext *const avctx = s->avctx; 4461 V4L2Buffer* avbuf; 4462 int ret; 4463+ int64_t track_ts; 4464 4465 if (!pkt->size) { 4466 ret = v4l2_stop_decode(ctx); 4467+ // Log but otherwise ignore stop failure 4468 if (ret) 4469- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name); 4470+ av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret); 4471 s->draining = 1; 4472 return 0; 4473 } 4474@@ -612,8 +1041,13 @@ int ff_v4l2_context_enqueue_packet(V4L2C 4475 if (!avbuf) 4476 return AVERROR(EAGAIN); 4477 4478- ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf); 4479- if (ret) 4480+ track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt); 4481+ 4482+ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts); 4483+ if (ret == AVERROR(ENOMEM)) 4484+ av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n", 4485+ __func__, pkt->size, avbuf->planes[0].length); 4486+ else if (ret) 4487 return ret; 4488 4489 return ff_v4l2_buffer_enqueue(avbuf); 4490@@ -621,42 +1055,77 @@ int ff_v4l2_context_enqueue_packet(V4L2C 4491 4492 int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) 4493 { 4494+ V4L2m2mContext *s = ctx_to_m2mctx(ctx); 4495+ AVCodecContext *const avctx = s->avctx; 4496 V4L2Buffer *avbuf; 4497+ int rv; 4498 4499- /* 4500- * timeout=-1 blocks until: 4501- * 1. decoded frame available 4502- * 2. an input buffer is ready to be dequeued 4503- */ 4504- avbuf = v4l2_dequeue_v4l2buf(ctx, timeout); 4505- if (!avbuf) { 4506- if (ctx->done) 4507- return AVERROR_EOF; 4508- 4509- return AVERROR(EAGAIN); 4510- } 4511+ do { 4512+ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) 4513+ return rv; 4514+ if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0) 4515+ return rv; 4516+ } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0); 4517 4518- return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); 4519+ return 0; 4520 } 4521 4522-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) 4523+int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout) 4524 { 4525+ V4L2m2mContext *s = ctx_to_m2mctx(ctx); 4526+ AVCodecContext *const avctx = s->avctx; 4527 V4L2Buffer *avbuf; 4528+ int rv; 4529 4530- /* 4531- * blocks until: 4532- * 1. encoded packet available 4533- * 2. an input buffer ready to be dequeued 4534- */ 4535- avbuf = v4l2_dequeue_v4l2buf(ctx, -1); 4536- if (!avbuf) { 4537- if (ctx->done) 4538- return AVERROR_EOF; 4539+ do { 4540+ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) 4541+ return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC 4542+ if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0) 4543+ return rv; 4544+ } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0); 4545 4546- return AVERROR(EAGAIN); 4547+ return 0; 4548+} 4549+ 4550+// Return 0 terminated list of drm fourcc video formats for this context 4551+// NULL if none found or error 4552+// Returned list is malloced so must be freed 4553+uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN) 4554+{ 4555+ unsigned int i; 4556+ unsigned int n = 0; 4557+ unsigned int size = 0; 4558+ uint32_t * e = NULL; 4559+ *pN = 0; 4560+ 4561+ for (i = 0; i < 1024; ++i) { 4562+ struct v4l2_fmtdesc fdesc = { 4563+ .index = i, 4564+ .type = ctx->type 4565+ }; 4566+ 4567+ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc)) 4568+ return e; 4569+ 4570+ if (n + 1 >= size) { 4571+ unsigned int newsize = (size == 0) ? 16 : size * 2; 4572+ uint32_t * t = av_realloc(e, newsize * sizeof(*t)); 4573+ if (!t) 4574+ return e; 4575+ e = t; 4576+ size = newsize; 4577+ } 4578+ 4579+ e[n] = fdesc.pixelformat; 4580+ e[++n] = 0; 4581+ if (pN) 4582+ *pN = n; 4583 } 4584 4585- return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf); 4586+ // If we've looped 1024 times we are clearly confused 4587+ *pN = 0; 4588+ av_free(e); 4589+ return NULL; 4590 } 4591 4592 int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) 4593@@ -688,78 +1157,194 @@ int ff_v4l2_context_get_format(V4L2Conte 4594 4595 int ff_v4l2_context_set_format(V4L2Context* ctx) 4596 { 4597- return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); 4598+ int ret; 4599+ 4600+ ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); 4601+ if (ret != 0) 4602+ return ret; 4603+ 4604+ // Check returned size against min size and if smaller have another go 4605+ // Only worry about plane[0] as this is meant to enforce limits for 4606+ // encoded streams where we might know a bit more about the shape 4607+ // than the driver 4608+ if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) { 4609+ if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage) 4610+ return 0; 4611+ ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size; 4612+ } 4613+ else { 4614+ if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage) 4615+ return 0; 4616+ ctx->format.fmt.pix.sizeimage = ctx->min_buf_size; 4617+ } 4618+ 4619+ ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); 4620+ return ret; 4621 } 4622 4623 void ff_v4l2_context_release(V4L2Context* ctx) 4624 { 4625 int ret; 4626 4627- if (!ctx->buffers) 4628+ if (!ctx->bufrefs) 4629 return; 4630 4631 ret = v4l2_release_buffers(ctx); 4632 if (ret) 4633 av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name); 4634 4635- av_freep(&ctx->buffers); 4636+ av_freep(&ctx->bufrefs); 4637+ av_buffer_unref(&ctx->frames_ref); 4638+ 4639+ ff_mutex_destroy(&ctx->lock); 4640+ pthread_cond_destroy(&ctx->cond); 4641 } 4642 4643-int ff_v4l2_context_init(V4L2Context* ctx) 4644+ 4645+static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem) 4646 { 4647- V4L2m2mContext *s = ctx_to_m2mctx(ctx); 4648+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); 4649 struct v4l2_requestbuffers req; 4650- int ret, i; 4651- 4652- if (!v4l2_type_supported(ctx)) { 4653- av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); 4654- return AVERROR_PATCHWELCOME; 4655- } 4656+ int ret; 4657+ int i; 4658 4659- ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); 4660- if (ret) 4661- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name); 4662+ av_assert0(ctx->bufrefs == NULL); 4663 4664 memset(&req, 0, sizeof(req)); 4665- req.count = ctx->num_buffers; 4666- req.memory = V4L2_MEMORY_MMAP; 4667+ req.count = req_buffers; 4668+ req.memory = mem; 4669 req.type = ctx->type; 4670- ret = ioctl(s->fd, VIDIOC_REQBUFS, &req); 4671- if (ret < 0) { 4672- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno)); 4673- return AVERROR(errno); 4674+ while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) { 4675+ if (errno != EINTR) { 4676+ ret = AVERROR(errno); 4677+ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret)); 4678+ return ret; 4679+ } 4680 } 4681 4682 ctx->num_buffers = req.count; 4683- ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer)); 4684- if (!ctx->buffers) { 4685+ ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs)); 4686+ if (!ctx->bufrefs) { 4687 av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name); 4688- return AVERROR(ENOMEM); 4689+ goto fail_release; 4690 } 4691 4692- for (i = 0; i < req.count; i++) { 4693- ctx->buffers[i].context = ctx; 4694- ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i); 4695- if (ret < 0) { 4696+ ctx->wl_master = ff_weak_link_new(ctx); 4697+ if (!ctx->wl_master) { 4698+ ret = AVERROR(ENOMEM); 4699+ goto fail_release; 4700+ } 4701+ 4702+ for (i = 0; i < ctx->num_buffers; i++) { 4703+ ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem); 4704+ if (ret) { 4705 av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret)); 4706- goto error; 4707+ goto fail_release; 4708 } 4709 } 4710 4711 av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name, 4712 V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat), 4713 req.count, 4714- v4l2_get_width(&ctx->format), 4715- v4l2_get_height(&ctx->format), 4716+ ff_v4l2_get_format_width(&ctx->format), 4717+ ff_v4l2_get_format_height(&ctx->format), 4718 V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage, 4719 V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline); 4720 4721 return 0; 4722 4723-error: 4724+fail_release: 4725 v4l2_release_buffers(ctx); 4726+ av_freep(&ctx->bufrefs); 4727+ return ret; 4728+} 4729+ 4730+int ff_v4l2_context_frames_set(V4L2Context *const ctx) 4731+{ 4732+ AVHWFramesContext *hwframes; 4733+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); 4734+ const int w = ctx->width != 0 ? ctx->width : s->avctx->width; 4735+ const int h = ctx->height != 0 ? ctx->height : s->avctx->height; 4736+ int ret; 4737+ 4738+ if (ctx->frames_ref != NULL) { 4739+ const AVHWFramesContext * const hwf = (AVHWFramesContext*)ctx->frames_ref->data; 4740+ if (hwf->sw_format == ctx->av_pix_fmt && hwf->width == w && hwf->height == h) 4741+ return 0; 4742+ av_buffer_unref(&ctx->frames_ref); 4743+ } 4744+ 4745+ ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref); 4746+ if (!ctx->frames_ref) 4747+ return AVERROR(ENOMEM); 4748+ 4749+ hwframes = (AVHWFramesContext*)ctx->frames_ref->data; 4750+ hwframes->format = AV_PIX_FMT_DRM_PRIME; 4751+ hwframes->sw_format = ctx->av_pix_fmt; 4752+ hwframes->width = w; 4753+ hwframes->height = h; 4754+ ret = av_hwframe_ctx_init(ctx->frames_ref); 4755+ if (ret < 0) { 4756+ av_log(s->avctx, AV_LOG_ERROR, "Failed to create hwframes context: %s\n", av_err2str(ret)); 4757+ av_buffer_unref(&ctx->frames_ref); 4758+ return ret; 4759+ } 4760+ 4761+ av_log(s->avctx, AV_LOG_DEBUG, "%s: HWFramesContext set to %s, %dx%d\n", __func__, 4762+ av_get_pix_fmt_name(ctx->av_pix_fmt), w, h); 4763+ return 0; 4764+} 4765+ 4766+int ff_v4l2_context_init(V4L2Context* ctx) 4767+{ 4768+ struct v4l2_queryctrl qctrl; 4769+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); 4770+ int ret; 4771+ 4772+ // It is not valid to reinit a context without a previous release 4773+ av_assert0(ctx->bufrefs == NULL); 4774+ 4775+ if (!v4l2_type_supported(ctx)) { 4776+ av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); 4777+ return AVERROR_PATCHWELCOME; 4778+ } 4779+ 4780+ ff_mutex_init(&ctx->lock, NULL); 4781+ pthread_cond_init(&ctx->cond, NULL); 4782+ atomic_init(&ctx->q_count, 0); 4783+ 4784+ ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); 4785+ if (ret) { 4786+ ret = AVERROR(errno); 4787+ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret)); 4788+ goto fail_unlock; 4789+ } 4790+ 4791+ memset(&qctrl, 0, sizeof(qctrl)); 4792+ qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT; 4793+ if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) { 4794+ ret = AVERROR(errno); 4795+ if (ret != AVERROR(EINVAL)) { 4796+ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret)); 4797+ goto fail_unlock; 4798+ } 4799+ // Control unsupported - set default if wanted 4800+ if (ctx->num_buffers < 2) 4801+ ctx->num_buffers = 4; 4802+ } 4803+ else { 4804+ if (ctx->num_buffers < 2) 4805+ ctx->num_buffers = qctrl.minimum + 2; 4806+ ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum); 4807+ } 4808 4809- av_freep(&ctx->buffers); 4810+ ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem); 4811+ if (ret < 0) 4812+ goto fail_unlock; 4813+ 4814+ return 0; 4815 4816+fail_unlock: 4817+ ff_mutex_destroy(&ctx->lock); 4818 return ret; 4819 } 4820--- a/libavcodec/v4l2_context.h 4821+++ b/libavcodec/v4l2_context.h 4822@@ -32,6 +32,8 @@ 4823 #include "libavutil/rational.h" 4824 #include "codec_id.h" 4825 #include "packet.h" 4826+#include "libavutil/buffer.h" 4827+#include "libavutil/thread.h" 4828 #include "v4l2_buffers.h" 4829 4830 typedef struct V4L2Context { 4831@@ -71,11 +73,18 @@ typedef struct V4L2Context { 4832 */ 4833 int width, height; 4834 AVRational sample_aspect_ratio; 4835+ struct v4l2_rect selection; 4836 4837 /** 4838- * Indexed array of V4L2Buffers 4839+ * If the default size of buffer is less than this then try to 4840+ * set to this. 4841 */ 4842- V4L2Buffer *buffers; 4843+ uint32_t min_buf_size; 4844+ 4845+ /** 4846+ * Indexed array of pointers to V4L2Buffers 4847+ */ 4848+ AVBufferRef **bufrefs; 4849 4850 /** 4851 * Readonly after init. 4852@@ -83,16 +92,38 @@ typedef struct V4L2Context { 4853 int num_buffers; 4854 4855 /** 4856+ * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF 4857+ */ 4858+ enum v4l2_memory buf_mem; 4859+ 4860+ /** 4861 * Whether the stream has been started (VIDIOC_STREAMON has been sent). 4862 */ 4863 int streamon; 4864 4865+ /* 1st buffer after stream on */ 4866+ int first_buf; 4867+ 4868 /** 4869 * Either no more buffers available or an unrecoverable error was notified 4870 * by the V4L2 kernel driver: once set the context has to be exited. 4871 */ 4872 int done; 4873 4874+ int flag_last; 4875+ 4876+ /** 4877+ * If NZ then when Qing frame/pkt use this rather than the 4878+ * "real" PTS 4879+ */ 4880+ uint64_t track_ts; 4881+ 4882+ AVBufferRef *frames_ref; 4883+ atomic_int q_count; 4884+ struct ff_weak_link_master *wl_master; 4885+ 4886+ AVMutex lock; 4887+ pthread_cond_t cond; 4888 } V4L2Context; 4889 4890 /** 4891@@ -104,6 +135,14 @@ typedef struct V4L2Context { 4892 int ff_v4l2_context_init(V4L2Context* ctx); 4893 4894 /** 4895+ * (re)set the hwframecontext from the current v4l2 context 4896+ * 4897+ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables. 4898+ * @return 0 in case of success, a negative value representing the error otherwise. 4899+ */ 4900+int ff_v4l2_context_frames_set(V4L2Context *const ctx); 4901+ 4902+/** 4903 * Sets the V4L2Context format in the v4l2 driver. 4904 * 4905 * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables. 4906@@ -121,6 +160,19 @@ int ff_v4l2_context_set_format(V4L2Conte 4907 int ff_v4l2_context_get_format(V4L2Context* ctx, int probe); 4908 4909 /** 4910+ * Get the list of drm fourcc pixel formats for this context 4911+ * 4912+ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context 4913+ * description for required variables. 4914+ * @param[in] pN A pointer to receive the number of formats 4915+ * found. May be NULL if not wanted. 4916+ * @return Pointer to malloced list of zero terminated formats, 4917+ * NULL if none or error. As list is malloced it must be 4918+ * freed. 4919+ */ 4920+uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN); 4921+ 4922+/** 4923 * Releases a V4L2Context. 4924 * 4925 * @param[in] ctx A pointer to a V4L2Context. 4926@@ -148,7 +200,7 @@ int ff_v4l2_context_set_status(V4L2Conte 4927 * @param[inout] pkt The AVPacket to dequeue to. 4928 * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. 4929 */ 4930-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); 4931+int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout); 4932 4933 /** 4934 * Dequeues a buffer from a V4L2Context to an AVFrame. 4935@@ -157,7 +209,10 @@ int ff_v4l2_context_dequeue_packet(V4L2C 4936 * @param[in] ctx The V4L2Context to dequeue from. 4937 * @param[inout] f The AVFrame to dequeue to. 4938 * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds) 4939+ * 4940 * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. 4941+ * AVERROR(ENOSPC) if no buffer availible to put 4942+ * the frame in 4943 */ 4944 int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); 4945 4946@@ -171,7 +226,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Co 4947 * @param[in] pkt A pointer to an AVPacket. 4948 * @return 0 in case of success, a negative error otherwise. 4949 */ 4950-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt); 4951+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size); 4952 4953 /** 4954 * Enqueues a buffer to a V4L2Context from an AVFrame 4955@@ -184,4 +239,28 @@ int ff_v4l2_context_enqueue_packet(V4L2C 4956 */ 4957 int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f); 4958 4959+/** 4960+ * Dequeue all buffers on this queue 4961+ * 4962+ * Used to recycle output buffers 4963+ * 4964+ * @param[in] ctx The V4L2Context to dequeue from. 4965+ * @param[in] timeout1 A timeout on dequeuing the 1st buffer, 4966+ * all others have a timeout of zero 4967+ * @return AVERROR(EAGAIN) if timeout1 non-zero then the return 4968+ * of the first dequeue operation, 0 otherwise. 4969+ */ 4970+int ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1); 4971+ 4972+/** 4973+ * Returns the number of buffers currently queued 4974+ * 4975+ * @param[in] ctx The V4L2Context to evaluate 4976+ */ 4977+static inline int 4978+ff_v4l2_context_q_count(const V4L2Context* const ctx) 4979+{ 4980+ return atomic_load(&ctx->q_count); 4981+} 4982+ 4983 #endif // AVCODEC_V4L2_CONTEXT_H 4984--- a/libavcodec/v4l2_fmt.c 4985+++ b/libavcodec/v4l2_fmt.c 4986@@ -42,6 +42,14 @@ static const struct fmt_conversion { 4987 { AV_FMT(RGB24), AV_CODEC(RAWVIDEO), V4L2_FMT(RGB24) }, 4988 { AV_FMT(BGR0), AV_CODEC(RAWVIDEO), V4L2_FMT(BGR32) }, 4989 { AV_FMT(0RGB), AV_CODEC(RAWVIDEO), V4L2_FMT(RGB32) }, 4990+ { AV_FMT(BGR0), AV_CODEC(RAWVIDEO), V4L2_FMT(BGRX32) }, 4991+ { AV_FMT(RGB0), AV_CODEC(RAWVIDEO), V4L2_FMT(RGBX32) }, 4992+ { AV_FMT(0BGR), AV_CODEC(RAWVIDEO), V4L2_FMT(XBGR32) }, 4993+ { AV_FMT(0RGB), AV_CODEC(RAWVIDEO), V4L2_FMT(XRGB32) }, 4994+ { AV_FMT(BGRA), AV_CODEC(RAWVIDEO), V4L2_FMT(BGRA32) }, 4995+ { AV_FMT(RGBA), AV_CODEC(RAWVIDEO), V4L2_FMT(RGBA32) }, 4996+ { AV_FMT(ABGR), AV_CODEC(RAWVIDEO), V4L2_FMT(ABGR32) }, 4997+ { AV_FMT(ARGB), AV_CODEC(RAWVIDEO), V4L2_FMT(ARGB32) }, 4998 { AV_FMT(GRAY8), AV_CODEC(RAWVIDEO), V4L2_FMT(GREY) }, 4999 { AV_FMT(YUV420P), AV_CODEC(RAWVIDEO), V4L2_FMT(YUV420) }, 5000 { AV_FMT(YUYV422), AV_CODEC(RAWVIDEO), V4L2_FMT(YUYV) }, 5001--- a/libavcodec/v4l2_m2m.c 5002+++ b/libavcodec/v4l2_m2m.c 5003@@ -34,6 +34,15 @@ 5004 #include "v4l2_context.h" 5005 #include "v4l2_fmt.h" 5006 #include "v4l2_m2m.h" 5007+#include "v4l2_req_dmabufs.h" 5008+ 5009+static void 5010+xlat_init(xlat_track_t * const x) 5011+{ 5012+ memset(x, 0, sizeof(*x)); 5013+ x->last_pts = AV_NOPTS_VALUE; 5014+} 5015+ 5016 5017 static inline int v4l2_splane_video(struct v4l2_capability *cap) 5018 { 5019@@ -67,7 +76,9 @@ static int v4l2_prepare_contexts(V4L2m2m 5020 5021 s->capture.done = s->output.done = 0; 5022 s->capture.name = "capture"; 5023+ s->capture.buf_mem = s->db_ctl != NULL ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; 5024 s->output.name = "output"; 5025+ s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; 5026 atomic_init(&s->refcount, 0); 5027 sem_init(&s->refsync, 0, 0); 5028 5029@@ -84,18 +95,58 @@ static int v4l2_prepare_contexts(V4L2m2m 5030 if (v4l2_mplane_video(&cap)) { 5031 s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; 5032 s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; 5033+ s->output.format.type = s->output.type; 5034 return 0; 5035 } 5036 5037 if (v4l2_splane_video(&cap)) { 5038 s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 5039 s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; 5040+ s->output.format.type = s->output.type; 5041 return 0; 5042 } 5043 5044 return AVERROR(EINVAL); 5045 } 5046 5047+static int check_size(AVCodecContext * const avctx, V4L2m2mContext * const s) 5048+{ 5049+ struct v4l2_format fmt = {.type = s->output.type}; 5050+ int rv; 5051+ uint32_t pixfmt = ff_v4l2_format_avfmt_to_v4l2(avctx->pix_fmt); 5052+ unsigned int w; 5053+ unsigned int h; 5054+ 5055+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { 5056+ fmt.fmt.pix_mp.pixelformat = pixfmt; 5057+ fmt.fmt.pix_mp.width = avctx->width; 5058+ fmt.fmt.pix_mp.height = avctx->height; 5059+ } 5060+ else { 5061+ fmt.fmt.pix.pixelformat = pixfmt; 5062+ fmt.fmt.pix.width = avctx->width; 5063+ fmt.fmt.pix.height = avctx->height; 5064+ } 5065+ 5066+ rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt); 5067+ 5068+ if (rv != 0) { 5069+ rv = AVERROR(errno); 5070+ av_log(avctx, AV_LOG_ERROR, "%s: Tryfmt failed: %s\n", __func__, av_err2str(rv)); 5071+ return rv; 5072+ } 5073+ 5074+ w = ff_v4l2_get_format_width(&fmt); 5075+ h = ff_v4l2_get_format_height(&fmt); 5076+ 5077+ if (w < avctx->width || h < avctx->height) { 5078+ av_log(avctx, AV_LOG_WARNING, "%s: Size check failed: asked for %dx%d, got: %dx%d\n", __func__, avctx->width, avctx->height, w, h); 5079+ return AVERROR(EINVAL); 5080+ } 5081+ 5082+ return 0; 5083+} 5084+ 5085 static int v4l2_probe_driver(V4L2m2mContext *s) 5086 { 5087 void *log_ctx = s->avctx; 5088@@ -115,6 +166,11 @@ static int v4l2_probe_driver(V4L2m2mCont 5089 goto done; 5090 } 5091 5092+ // If being given frames (encode) check that V4L2 can cope with the size 5093+ if (s->output.av_codec_id == AV_CODEC_ID_RAWVIDEO && 5094+ (ret = check_size(s->avctx, s)) != 0) 5095+ goto done; 5096+ 5097 ret = ff_v4l2_context_get_format(&s->capture, 1); 5098 if (ret) { 5099 av_log(log_ctx, AV_LOG_DEBUG, "v4l2 capture format not supported\n"); 5100@@ -214,13 +270,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont 5101 av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n"); 5102 5103 /* 2. unmap the capture buffers (v4l2 and ffmpeg): 5104- * we must wait for all references to be released before being allowed 5105- * to queue new buffers. 5106 */ 5107- av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n"); 5108- if (atomic_load(&s->refcount)) 5109- while(sem_wait(&s->refsync) == -1 && errno == EINTR); 5110- 5111 ff_v4l2_context_release(&s->capture); 5112 5113 /* 3. get the new capture format */ 5114@@ -239,7 +289,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont 5115 5116 /* 5. complete reinit */ 5117 s->draining = 0; 5118- s->reinit = 0; 5119 5120 return 0; 5121 } 5122@@ -256,6 +305,9 @@ static void v4l2_m2m_destroy_context(voi 5123 av_frame_unref(s->frame); 5124 av_frame_free(&s->frame); 5125 av_packet_unref(&s->buf_pkt); 5126+ av_freep(&s->extdata_data); 5127+ 5128+ av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n"); 5129 5130 av_free(s); 5131 } 5132@@ -268,6 +320,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *p 5133 if (!s) 5134 return 0; 5135 5136+ av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n"); 5137+ 5138+ if (s->avctx && av_codec_is_decoder(s->avctx->codec)) 5139+ av_packet_unref(&s->buf_pkt); 5140+ 5141 if (s->fd >= 0) { 5142 ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF); 5143 if (ret) 5144@@ -279,8 +336,20 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *p 5145 } 5146 5147 ff_v4l2_context_release(&s->output); 5148+ av_buffer_unref(&s->device_ref); 5149+ 5150+ dmabufs_ctl_unref(&s->db_ctl); 5151+ 5152+ if (s->fd != -1) { 5153+ close(s->fd); 5154+ s->fd = -1; 5155+ } 5156 5157 s->self_ref = NULL; 5158+ // This is only called on avctx close so after this point we don't have that 5159+ // Crash sooner if we find we are using it (can still log with avctx = NULL) 5160+ s->avctx = NULL; 5161+ priv->context = NULL; 5162 av_buffer_unref(&priv->context_ref); 5163 5164 return 0; 5165@@ -324,35 +393,38 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv * 5166 return v4l2_configure_contexts(s); 5167 } 5168 5169-int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s) 5170+int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **pps) 5171 { 5172- *s = av_mallocz(sizeof(V4L2m2mContext)); 5173- if (!*s) 5174+ V4L2m2mContext * const s = av_mallocz(sizeof(V4L2m2mContext)); 5175+ 5176+ *pps = NULL; 5177+ if (!s) 5178 return AVERROR(ENOMEM); 5179 5180- priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext), 5181+ priv->context_ref = av_buffer_create((uint8_t *)s, sizeof(*s), 5182 &v4l2_m2m_destroy_context, NULL, 0); 5183 if (!priv->context_ref) { 5184- av_freep(s); 5185+ av_free(s); 5186 return AVERROR(ENOMEM); 5187 } 5188 5189 /* assign the context */ 5190- priv->context = *s; 5191- (*s)->priv = priv; 5192+ priv->context = s; 5193+ s->priv = priv; 5194 5195 /* populate it */ 5196- priv->context->capture.num_buffers = priv->num_capture_buffers; 5197- priv->context->output.num_buffers = priv->num_output_buffers; 5198- priv->context->self_ref = priv->context_ref; 5199- priv->context->fd = -1; 5200+ s->capture.num_buffers = priv->num_capture_buffers; 5201+ s->output.num_buffers = priv->num_output_buffers; 5202+ s->self_ref = priv->context_ref; 5203+ s->fd = -1; 5204+ xlat_init(&s->xlat); 5205 5206 priv->context->frame = av_frame_alloc(); 5207 if (!priv->context->frame) { 5208 av_buffer_unref(&priv->context_ref); 5209- *s = NULL; /* freed when unreferencing context_ref */ 5210 return AVERROR(ENOMEM); 5211 } 5212 5213+ *pps = s; 5214 return 0; 5215 } 5216--- a/libavcodec/v4l2_m2m.h 5217+++ b/libavcodec/v4l2_m2m.h 5218@@ -30,6 +30,7 @@ 5219 #include <linux/videodev2.h> 5220 5221 #include "libavcodec/avcodec.h" 5222+#include "libavutil/pixfmt.h" 5223 #include "v4l2_context.h" 5224 5225 #define container_of(ptr, type, member) ({ \ 5226@@ -38,7 +39,39 @@ 5227 5228 #define V4L_M2M_DEFAULT_OPTS \ 5229 { "num_output_buffers", "Number of buffers in the output context",\ 5230- OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 6, INT_MAX, FLAGS } 5231+ OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS } 5232+ 5233+#define FF_V4L2_M2M_TRACK_SIZE 128 5234+typedef struct V4L2m2mTrackEl { 5235+ int discard; // If we see this buffer its been flushed, so discard 5236+ int pending; 5237+ int pkt_size; 5238+ int64_t pts; 5239+ int64_t dts; 5240+ int64_t reordered_opaque; 5241+ int64_t pkt_pos; 5242+ int64_t pkt_duration; 5243+ int64_t track_pts; 5244+} V4L2m2mTrackEl; 5245+ 5246+typedef struct pts_stats_s 5247+{ 5248+ void * logctx; 5249+ const char * name; // For debug 5250+ unsigned int last_count; 5251+ unsigned int last_interval; 5252+ int64_t last_pts; 5253+ int64_t guess; 5254+} pts_stats_t; 5255+ 5256+typedef struct xlat_track_s { 5257+ unsigned int track_no; 5258+ int64_t last_pts; // Last valid PTS decoded 5259+ int64_t last_opaque; 5260+ V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; 5261+} xlat_track_t; 5262+ 5263+struct dmabufs_ctl; 5264 5265 typedef struct V4L2m2mContext { 5266 char devname[PATH_MAX]; 5267@@ -52,10 +85,10 @@ typedef struct V4L2m2mContext { 5268 AVCodecContext *avctx; 5269 sem_t refsync; 5270 atomic_uint refcount; 5271- int reinit; 5272 5273 /* null frame/packet received */ 5274 int draining; 5275+ int running; 5276 AVPacket buf_pkt; 5277 5278 /* Reference to a frame. Only used during encoding */ 5279@@ -66,6 +99,36 @@ typedef struct V4L2m2mContext { 5280 5281 /* reference back to V4L2m2mPriv */ 5282 void *priv; 5283+ 5284+ AVBufferRef *device_ref; 5285+ 5286+ /* generate DRM frames */ 5287+ int output_drm; 5288+ 5289+ /* input frames are drmprime */ 5290+ int input_drm; 5291+ 5292+ /* Frame tracking */ 5293+ xlat_track_t xlat; 5294+ 5295+ pts_stats_t pts_stat; 5296+ 5297+ /* req pkt */ 5298+ int req_pkt; 5299+ int reorder_size; 5300+ 5301+ /* Ext data sent */ 5302+ int extdata_sent; 5303+ /* Ext data sent in packet - overrides ctx */ 5304+ void * extdata_data; 5305+ size_t extdata_size; 5306+ 5307+#define FF_V4L2_QUIRK_REINIT_ALWAYS 1 5308+#define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN 2 5309+ /* Quirks */ 5310+ unsigned int quirks; 5311+ 5312+ struct dmabufs_ctl * db_ctl; 5313 } V4L2m2mContext; 5314 5315 typedef struct V4L2m2mPriv { 5316@@ -76,6 +139,8 @@ typedef struct V4L2m2mPriv { 5317 5318 int num_output_buffers; 5319 int num_capture_buffers; 5320+ const char * dmabuf_alloc; 5321+ enum AVPixelFormat pix_fmt; 5322 } V4L2m2mPriv; 5323 5324 /** 5325@@ -129,4 +194,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont 5326 */ 5327 int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx); 5328 5329+ 5330+static inline unsigned int ff_v4l2_get_format_width(const struct v4l2_format * const fmt) 5331+{ 5332+ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; 5333+} 5334+ 5335+static inline unsigned int ff_v4l2_get_format_height(const struct v4l2_format * const fmt) 5336+{ 5337+ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; 5338+} 5339+ 5340+static inline uint32_t ff_v4l2_get_format_pixelformat(const struct v4l2_format * const fmt) 5341+{ 5342+ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat; 5343+} 5344+ 5345+static inline int ff_v4l2_ctx_eos(const V4L2Context * const ctx) 5346+{ 5347+ return ctx->flag_last; 5348+} 5349+ 5350+ 5351 #endif /* AVCODEC_V4L2_M2M_H */ 5352--- a/libavcodec/v4l2_m2m_dec.c 5353+++ b/libavcodec/v4l2_m2m_dec.c 5354@@ -21,8 +21,14 @@ 5355 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 5356 */ 5357 5358+#include "config_components.h" 5359+ 5360 #include <linux/videodev2.h> 5361 #include <sys/ioctl.h> 5362+ 5363+#include "libavutil/avassert.h" 5364+#include "libavutil/hwcontext.h" 5365+#include "libavutil/hwcontext_drm.h" 5366 #include "libavutil/pixfmt.h" 5367 #include "libavutil/pixdesc.h" 5368 #include "libavutil/opt.h" 5369@@ -30,75 +36,279 @@ 5370 #include "codec_internal.h" 5371 #include "libavcodec/decode.h" 5372 5373+#include "libavcodec/hwaccels.h" 5374+#include "libavcodec/internal.h" 5375+#include "libavcodec/hwconfig.h" 5376+ 5377 #include "v4l2_context.h" 5378 #include "v4l2_m2m.h" 5379 #include "v4l2_fmt.h" 5380+#include "v4l2_req_dmabufs.h" 5381 5382-static int v4l2_try_start(AVCodecContext *avctx) 5383+#if CONFIG_H264_DECODER 5384+#include "h264_parse.h" 5385+#endif 5386+#if CONFIG_HEVC_DECODER 5387+#include "hevc_parse.h" 5388+#endif 5389+ 5390+// Pick 64 for max last count - that is >1sec at 60fps 5391+#define STATS_LAST_COUNT_MAX 64 5392+#define STATS_INTERVAL_MAX (1 << 30) 5393+ 5394+#ifndef FF_API_BUFFER_SIZE_T 5395+#define FF_API_BUFFER_SIZE_T 1 5396+#endif 5397+ 5398+#define DUMP_FAILED_EXTRADATA 0 5399+ 5400+#if DUMP_FAILED_EXTRADATA 5401+static inline char hex1(unsigned int x) 5402 { 5403- V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; 5404- V4L2Context *const capture = &s->capture; 5405- V4L2Context *const output = &s->output; 5406- struct v4l2_selection selection = { 0 }; 5407- int ret; 5408+ x &= 0xf; 5409+ return x <= 9 ? '0' + x : 'a' + x - 10; 5410+} 5411 5412- /* 1. start the output process */ 5413- if (!output->streamon) { 5414- ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON); 5415- if (ret < 0) { 5416- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n"); 5417- return ret; 5418- } 5419+static inline char * hex2(char * s, unsigned int x) 5420+{ 5421+ *s++ = hex1(x >> 4); 5422+ *s++ = hex1(x); 5423+ return s; 5424+} 5425+ 5426+static inline char * hex4(char * s, unsigned int x) 5427+{ 5428+ s = hex2(s, x >> 8); 5429+ s = hex2(s, x); 5430+ return s; 5431+} 5432+ 5433+static inline char * dash2(char * s) 5434+{ 5435+ *s++ = '-'; 5436+ *s++ = '-'; 5437+ return s; 5438+} 5439+ 5440+static void 5441+data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len) 5442+{ 5443+ size_t i; 5444+ s = hex4(s, offset); 5445+ m += offset; 5446+ for (i = 0; i != 8; ++i) { 5447+ *s++ = ' '; 5448+ s = len > i + offset ? hex2(s, *m++) : dash2(s); 5449 } 5450+ *s++ = ' '; 5451+ *s++ = ':'; 5452+ for (; i != 16; ++i) { 5453+ *s++ = ' '; 5454+ s = len > i + offset ? hex2(s, *m++) : dash2(s); 5455+ } 5456+ *s++ = 0; 5457+} 5458 5459- if (capture->streamon) 5460- return 0; 5461+static void 5462+log_dump(void * logctx, int lvl, const void * const data, const size_t len) 5463+{ 5464+ size_t i; 5465+ for (i = 0; i < len; i += 16) { 5466+ char buf[80]; 5467+ data16(buf, i, data, len); 5468+ av_log(logctx, lvl, "%s\n", buf); 5469+ } 5470+} 5471+#endif 5472 5473- /* 2. get the capture format */ 5474- capture->format.type = capture->type; 5475- ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format); 5476- if (ret) { 5477- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n"); 5478- return ret; 5479+static unsigned int pts_stats_interval(const pts_stats_t * const stats) 5480+{ 5481+ return stats->last_interval; 5482+} 5483+ 5484+static int64_t pts_stats_guess(const pts_stats_t * const stats, const int fail_bad_guess) 5485+{ 5486+ if (stats->last_count <= 1) 5487+ return stats->last_pts; 5488+ if (stats->last_pts == AV_NOPTS_VALUE || 5489+ fail_bad_guess && (stats->last_interval == 0 || 5490+ stats->last_count >= STATS_LAST_COUNT_MAX)) 5491+ return AV_NOPTS_VALUE; 5492+ return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval; 5493+} 5494+ 5495+static void pts_stats_add(pts_stats_t * const stats, int64_t pts) 5496+{ 5497+ if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) { 5498+ if (stats->last_count < STATS_LAST_COUNT_MAX) 5499+ ++stats->last_count; 5500+ return; 5501 } 5502 5503- /* 2.1 update the AVCodecContext */ 5504- avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO); 5505- capture->av_pix_fmt = avctx->pix_fmt; 5506+ if (stats->last_pts != AV_NOPTS_VALUE) { 5507+ const int64_t interval = pts - stats->last_pts; 5508 5509- /* 3. set the crop parameters */ 5510- selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 5511- selection.r.height = avctx->coded_height; 5512- selection.r.width = avctx->coded_width; 5513- ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection); 5514- if (!ret) { 5515- ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection); 5516- if (ret) { 5517- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n"); 5518- } else { 5519- av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height); 5520- /* update the size of the resulting frame */ 5521- capture->height = selection.r.height; 5522- capture->width = selection.r.width; 5523+ if (interval < 0 || interval >= STATS_INTERVAL_MAX || 5524+ stats->last_count >= STATS_LAST_COUNT_MAX) { 5525+ if (stats->last_interval != 0) 5526+ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n", 5527+ __func__, stats->name, interval, stats->last_count); 5528+ stats->last_interval = 0; 5529+ } 5530+ else { 5531+ const int64_t frame_time = interval / (int64_t)stats->last_count; 5532+ 5533+ if (frame_time != stats->last_interval) 5534+ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n", 5535+ __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time); 5536+ stats->last_interval = frame_time; 5537 } 5538 } 5539 5540- /* 4. init the capture context now that we have the capture format */ 5541- if (!capture->buffers) { 5542- ret = ff_v4l2_context_init(capture); 5543- if (ret) { 5544- av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); 5545- return AVERROR(ENOMEM); 5546+ stats->last_pts = pts; 5547+ stats->last_count = 1; 5548+} 5549+ 5550+static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name) 5551+{ 5552+ *stats = (pts_stats_t){ 5553+ .logctx = logctx, 5554+ .name = name, 5555+ .last_count = 1, 5556+ .last_interval = 0, 5557+ .last_pts = AV_NOPTS_VALUE 5558+ }; 5559+} 5560+ 5561+// If abdata == NULL then this just counts space required 5562+// Unpacks avcC if detected 5563+static int 5564+h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata) 5565+{ 5566+ const uint8_t * const xdend = extradata + extrasize; 5567+ const uint8_t * p = extradata; 5568+ uint8_t * d = abdata; 5569+ unsigned int n; 5570+ unsigned int len; 5571+ const unsigned int hdrlen = 4; 5572+ unsigned int need_pps = 1; 5573+ 5574+ if (extrasize < 8) 5575+ return AVERROR(EINVAL); 5576+ 5577+ if (p[0] == 0 && p[1] == 0) { 5578+ // Assume a couple of leading zeros are good enough to indicate NAL 5579+ if (abdata) 5580+ memcpy(d, p, extrasize); 5581+ return extrasize; 5582+ } 5583+ 5584+ // avcC starts with a 1 5585+ if (p[0] != 1) 5586+ return AVERROR(EINVAL); 5587+ 5588+ p += 5; 5589+ n = *p++ & 0x1f; 5590+ 5591+doxps: 5592+ while (n--) { 5593+ if (xdend - p < 2) 5594+ return AVERROR(EINVAL); 5595+ len = (p[0] << 8) | p[1]; 5596+ p += 2; 5597+ if (xdend - p < (ptrdiff_t)len) 5598+ return AVERROR(EINVAL); 5599+ if (abdata) { 5600+ d[0] = 0; 5601+ d[1] = 0; 5602+ d[2] = 0; 5603+ d[3] = 1; 5604+ memcpy(d + 4, p, len); 5605 } 5606+ d += len + hdrlen; 5607+ p += len; 5608+ } 5609+ if (need_pps) { 5610+ need_pps = 0; 5611+ if (p >= xdend) 5612+ return AVERROR(EINVAL); 5613+ n = *p++; 5614+ goto doxps; 5615 } 5616 5617- /* 5. start the capture process */ 5618- ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); 5619- if (ret) { 5620- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n"); 5621+ return d - abdata; 5622+} 5623+ 5624+static int 5625+copy_extradata(AVCodecContext * const avctx, 5626+ const void * const src_data, const int src_len, 5627+ void ** const pdst_data, size_t * const pdst_len) 5628+{ 5629+ int len; 5630+ 5631+ *pdst_len = 0; 5632+ av_freep(pdst_data); 5633+ 5634+ if (avctx->codec_id == AV_CODEC_ID_H264) 5635+ len = h264_xd_copy(src_data, src_len, NULL); 5636+ else 5637+ len = src_len < 0 ? AVERROR(EINVAL) : src_len; 5638+ 5639+ // Zero length is OK but we want to stop - -ve is error val 5640+ if (len <= 0) 5641+ return len; 5642+ 5643+ if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) 5644+ return AVERROR(ENOMEM); 5645+ 5646+ if (avctx->codec_id == AV_CODEC_ID_H264) 5647+ h264_xd_copy(src_data, src_len, *pdst_data); 5648+ else 5649+ memcpy(*pdst_data, src_data, len); 5650+ *pdst_len = len; 5651+ 5652+ return 0; 5653+} 5654+ 5655+ 5656+ 5657+static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s) 5658+{ 5659+ int ret; 5660+ struct v4l2_decoder_cmd cmd = { 5661+ .cmd = V4L2_DEC_CMD_START, 5662+ .flags = 0, 5663+ }; 5664+ 5665+ if (s->output.streamon) 5666+ return 0; 5667+ 5668+ ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON); 5669+ if (ret != 0) { 5670+ av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret)); 5671 return ret; 5672 } 5673 5674+ // STREAMON should do implicit START so this just for those that don't. 5675+ // It is optional so don't worry if it fails 5676+ if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) { 5677+ ret = AVERROR(errno); 5678+ av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret)); 5679+ } 5680+ else { 5681+ av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n"); 5682+ } 5683+ return 0; 5684+} 5685+ 5686+static int v4l2_try_start(AVCodecContext *avctx) 5687+{ 5688+ V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context; 5689+ int ret; 5690+ 5691+ /* 1. start the output process */ 5692+ if ((ret = check_output_streamon(avctx, s)) != 0) 5693+ return ret; 5694 return 0; 5695 } 5696 5697@@ -133,51 +343,823 @@ static int v4l2_prepare_decoder(V4L2m2mC 5698 return 0; 5699 } 5700 5701-static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) 5702+static void 5703+set_best_effort_pts(AVCodecContext *const avctx, 5704+ pts_stats_t * const ps, 5705+ AVFrame *const frame) 5706+{ 5707+ pts_stats_add(ps, frame->pts); 5708+ 5709+ frame->best_effort_timestamp = pts_stats_guess(ps, 1); 5710+ // If we can't guess from just PTS - try DTS 5711+ if (frame->best_effort_timestamp == AV_NOPTS_VALUE) 5712+ frame->best_effort_timestamp = frame->pkt_dts; 5713+ 5714+ // We can't emulate what s/w does in a useful manner and using the 5715+ // "correct" answer seems to just confuse things. 5716+ frame->pkt_dts = frame->pts; 5717+ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", 5718+ frame->pts, frame->best_effort_timestamp, frame->pkt_dts); 5719+} 5720+ 5721+static void 5722+xlat_flush(xlat_track_t * const x) 5723+{ 5724+ unsigned int i; 5725+ // Do not reset track_no - this ensures that any frames left in the decoder 5726+ // that turn up later get discarded. 5727+ 5728+ x->last_pts = AV_NOPTS_VALUE; 5729+ x->last_opaque = 0; 5730+ for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) { 5731+ x->track_els[i].pending = 0; 5732+ x->track_els[i].discard = 1; 5733+ } 5734+} 5735+ 5736+static void 5737+xlat_init(xlat_track_t * const x) 5738+{ 5739+ memset(x, 0, sizeof(*x)); 5740+ xlat_flush(x); 5741+} 5742+ 5743+static int 5744+xlat_pending(const V4L2m2mContext * const s) 5745+{ 5746+ const xlat_track_t *const x = &s->xlat; 5747+ unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE; 5748+ int i; 5749+ const int64_t now = pts_stats_guess(&s->pts_stat, 0); 5750+ int64_t first_dts = AV_NOPTS_VALUE; 5751+ int no_dts_count = 0; 5752+ unsigned int interval = pts_stats_interval(&s->pts_stat); 5753+ 5754+ for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) { 5755+ const V4L2m2mTrackEl * const t = x->track_els + n; 5756+ 5757+ if (first_dts == AV_NOPTS_VALUE) 5758+ if (t->dts == AV_NOPTS_VALUE) 5759+ ++no_dts_count; 5760+ else 5761+ first_dts = t->dts; 5762+ 5763+ // Discard only set on never-set or flushed entries 5764+ // So if we get here we've never successfully decoded a frame so allow 5765+ // more frames into the buffer before stalling 5766+ if (t->discard) 5767+ return i - 16; 5768+ 5769+ // If we've got this frame out then everything before this point 5770+ // must have entered the decoder 5771+ if (!t->pending) 5772+ break; 5773+ 5774+ // If we've never seen a pts all we can do is count frames 5775+ if (now == AV_NOPTS_VALUE) 5776+ continue; 5777+ 5778+ if (t->dts != AV_NOPTS_VALUE && now >= t->dts) 5779+ break; 5780+ } 5781+ 5782+ if (first_dts != AV_NOPTS_VALUE && now != AV_NOPTS_VALUE && interval != 0 && s->reorder_size != 0) { 5783+ const int iframes = (first_dts - now) / (int)interval; 5784+ const int t = iframes - s->reorder_size + no_dts_count; 5785+ 5786+// av_log(s->avctx, AV_LOG_DEBUG, "Last:%"PRId64", Now:%"PRId64", First:%"PRId64", delta=%"PRId64", frames=%d, nodts=%d\n", 5787+// x->last_dts, now, first_dts, first_dts - now, iframes, no_dts_count); 5788+ 5789+ if (iframes > 0 && iframes < 64 && t < i) { 5790+ return t; 5791+ } 5792+ } 5793+ 5794+ return i; 5795+} 5796+ 5797+static inline int stream_started(const V4L2m2mContext * const s) { 5798+ return s->output.streamon; 5799+} 5800+ 5801+#define NQ_OK 0 5802+#define NQ_Q_FULL 1 5803+#define NQ_SRC_EMPTY 2 5804+#define NQ_NONE 3 5805+#define NQ_DRAINING 4 5806+#define NQ_DEAD 5 5807+ 5808+#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING) 5809+#define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE) 5810+ 5811+// do_not_get If true then no new packet will be got but status will 5812+// be set appropriately 5813+ 5814+// AVERROR_EOF Flushing an already flushed stream 5815+// -ve Error (all errors except EOF are unexpected) 5816+// NQ_OK (0) OK 5817+// NQ_Q_FULL Dst full (retry if we think V4L2 Q has space now) 5818+// NQ_SRC_EMPTY Src empty (do not retry) 5819+// NQ_NONE Enqueue not attempted 5820+// NQ_DRAINING At EOS, dQ dest until EOS there too 5821+// NQ_DEAD Not running (do not retry, do not attempt capture dQ) 5822+ 5823+static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s, const int do_not_get) 5824 { 5825- V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; 5826- V4L2Context *const capture = &s->capture; 5827- V4L2Context *const output = &s->output; 5828 int ret; 5829 5830- if (!s->buf_pkt.size) { 5831- ret = ff_decode_get_packet(avctx, &s->buf_pkt); 5832+ // If we don't already have a coded packet - get a new one 5833+ // We will already have a coded pkt if the output Q was full last time we 5834+ // tried to Q it 5835+ if (!s->buf_pkt.size && !do_not_get) { 5836+ unsigned int i; 5837+ 5838+ for (i = 0; i < 256; ++i) { 5839+ uint8_t * side_data; 5840+ size_t side_size; 5841+ 5842+ ret = ff_decode_get_packet(avctx, &s->buf_pkt); 5843+ if (ret != 0) 5844+ break; 5845+ 5846+ // New extradata is the only side-data we undertand 5847+ side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); 5848+ if (side_data) { 5849+ av_log(avctx, AV_LOG_DEBUG, "New extradata\n"); 5850+ if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0) 5851+ av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret)); 5852+ s->extdata_sent = 0; 5853+ } 5854+ 5855+ if (s->buf_pkt.size != 0) 5856+ break; 5857+ 5858+ if (s->buf_pkt.side_data_elems == 0) { 5859+ av_log(avctx, AV_LOG_WARNING, "Empty pkt from ff_decode_get_packet - treating as EOF\n"); 5860+ ret = AVERROR_EOF; 5861+ break; 5862+ } 5863+ 5864+ // Retry a side-data only pkt 5865+ } 5866+ // If i >= 256 something has gone wrong 5867+ if (i >= 256) { 5868+ av_log(avctx, AV_LOG_ERROR, "Too many side-data only packets\n"); 5869+ return AVERROR(EIO); 5870+ } 5871+ 5872+ if (ret == AVERROR(EAGAIN)) { 5873+ if (!stream_started(s)) { 5874+ av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__); 5875+ return NQ_DEAD; 5876+ } 5877+ return NQ_SRC_EMPTY; 5878+ } 5879+ 5880+ if (ret == AVERROR_EOF) { 5881+ // EOF - enter drain mode 5882+ av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n", 5883+ ret, s->buf_pkt.size, stream_started(s), s->draining); 5884+ if (!stream_started(s)) { 5885+ av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n"); 5886+ s->draining = 1; 5887+ s->capture.done = 1; 5888+ return AVERROR_EOF; 5889+ } 5890+ 5891+ if (!s->draining) { 5892+ // Calling enqueue with an empty pkt starts drain 5893+ av_assert0(s->buf_pkt.size == 0); 5894+ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); 5895+ if (ret) { 5896+ av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret); 5897+ return ret; 5898+ } 5899+ } 5900+ return NQ_DRAINING; 5901+ } 5902+ 5903 if (ret < 0) { 5904- if (ret == AVERROR(EAGAIN)) 5905- return ff_v4l2_context_dequeue_frame(capture, frame, 0); 5906- else if (ret != AVERROR_EOF) 5907- return ret; 5908+ av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret); 5909+ return ret; 5910 } 5911 } 5912 5913- if (s->draining) 5914- goto dequeue; 5915+ if (s->draining) { 5916+ if (s->buf_pkt.size) { 5917+ av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n"); 5918+ av_packet_unref(&s->buf_pkt); 5919+ } 5920+ return NQ_DRAINING; 5921+ } 5922+ 5923+ if (!s->buf_pkt.size) 5924+ return NQ_NONE; 5925 5926- ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt); 5927- if (ret < 0 && ret != AVERROR(EAGAIN)) 5928- goto fail; 5929+ if ((ret = check_output_streamon(avctx, s)) != 0) 5930+ return ret; 5931 5932- /* if EAGAIN don't unref packet and try to enqueue in the next iteration */ 5933- if (ret != AVERROR(EAGAIN)) 5934+ if (s->extdata_sent) 5935+ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); 5936+ else 5937+ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size); 5938+ 5939+ if (ret == AVERROR(EAGAIN)) { 5940+ // Out of input buffers - keep packet 5941+ ret = NQ_Q_FULL; 5942+ } 5943+ else { 5944+ // In all other cases we are done with this packet 5945 av_packet_unref(&s->buf_pkt); 5946+ s->extdata_sent = 1; 5947 5948- if (!s->draining) { 5949- ret = v4l2_try_start(avctx); 5950 if (ret) { 5951- /* cant recover */ 5952- if (ret != AVERROR(ENOMEM)) 5953- ret = 0; 5954- goto fail; 5955+ av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret); 5956+ return ret; 5957+ } 5958+ } 5959+ 5960+ // Start if we haven't 5961+ { 5962+ const int ret2 = v4l2_try_start(avctx); 5963+ if (ret2) { 5964+ av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2); 5965+ ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD; 5966 } 5967 } 5968 5969-dequeue: 5970- return ff_v4l2_context_dequeue_frame(capture, frame, -1); 5971-fail: 5972- av_packet_unref(&s->buf_pkt); 5973 return ret; 5974 } 5975 5976+static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx) 5977+{ 5978+ int rv = 0; 5979+ 5980+ ff_mutex_lock(&ctx->lock); 5981+ 5982+ while (atomic_load(&ctx->q_count) == 0 && ctx->streamon) { 5983+ if (pthread_cond_wait(&ctx->cond, &ctx->lock) != 0) { 5984+ rv = AVERROR(errno); 5985+ av_log(avctx, AV_LOG_ERROR, "Cond wait failure: %s\n", av_err2str(rv)); 5986+ break; 5987+ } 5988+ } 5989+ 5990+ ff_mutex_unlock(&ctx->lock); 5991+ return rv; 5992+} 5993+ 5994+static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) 5995+{ 5996+ V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; 5997+ int src_rv = -1; 5998+ int dst_rv = 1; // Non-zero (done), non-negative (error) number 5999+ unsigned int i = 0; 6000+ 6001+ do { 6002+ const int pending = xlat_pending(s); 6003+ const int prefer_dq = (pending > 4); 6004+ const int last_src_rv = src_rv; 6005+ 6006+ av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt); 6007+ 6008+ // Enqueue another pkt for decode if 6009+ // (a) We don't have a lot of stuff in the buffer already OR 6010+ // (b) ... we (think we) do but we've failed to get a frame already OR 6011+ // (c) We've dequeued a lot of frames without asking for input 6012+ src_rv = try_enqueue_src(avctx, s, !(!prefer_dq || i != 0 || s->req_pkt > 2)); 6013+ 6014+ // If we got a frame last time or we've already tried to get a frame and 6015+ // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN) 6016+ // indicating that we want more input. 6017+ // This should mean that once decode starts we enter a stable state where 6018+ // we alternately ask for input and produce output 6019+ if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY) 6020+ break; 6021+ 6022+ if (src_rv == NQ_Q_FULL && last_src_rv == NQ_Q_FULL) { 6023+ av_log(avctx, AV_LOG_WARNING, "Poll thinks src Q has space; none found\n"); 6024+ break; 6025+ } 6026+ 6027+ // Try to get a new frame if 6028+ // (a) we haven't already got one AND 6029+ // (b) enqueue returned a status indicating that decode should be attempted 6030+ if (dst_rv != 0 && TRY_DQ(src_rv)) { 6031+ // Pick a timeout depending on state 6032+ // The pending count isn't completely reliable so it is good enough 6033+ // hint that we want a frame but not good enough to require it in 6034+ // all cases; however if it has got > 31 that exceeds its margin of 6035+ // error so require a frame to prevent ridiculous levels of latency 6036+ const int t = 6037+ src_rv == NQ_Q_FULL ? -1 : 6038+ src_rv == NQ_DRAINING ? 300 : 6039+ prefer_dq ? (s->running && pending > 31 ? 100 : 5) : 0; 6040+ 6041+ // Dequeue frame will unref any previous contents of frame 6042+ // if it returns success so we don't need an explicit unref 6043+ // when discarding 6044+ // This returns AVERROR(EAGAIN) on timeout or if 6045+ // there is room in the input Q and timeout == -1 6046+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); 6047+ 6048+ // Failure due to no buffer in Q? 6049+ if (dst_rv == AVERROR(ENOSPC)) { 6050+ // Wait & retry 6051+ if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) { 6052+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); 6053+ } 6054+ } 6055+ 6056+ if (dst_rv == 0) { 6057+ set_best_effort_pts(avctx, &s->pts_stat, frame); 6058+ if (!s->running) { 6059+ s->running = 1; 6060+ av_log(avctx, AV_LOG_VERBOSE, "Decode running\n"); 6061+ } 6062+ } 6063+ 6064+ if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { 6065+ av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); 6066+ dst_rv = AVERROR_EOF; 6067+ s->capture.done = 1; 6068+ } 6069+ else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) 6070+ av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", 6071+ s->draining, s->capture.done); 6072+ else if (dst_rv && dst_rv != AVERROR(EAGAIN)) 6073+ av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", 6074+ s->draining, s->capture.done, dst_rv); 6075+ } 6076+ 6077+ ++i; 6078+ if (i >= 256) { 6079+ av_log(avctx, AV_LOG_ERROR, "Unexpectedly large retry count: %d\n", i); 6080+ src_rv = AVERROR(EIO); 6081+ } 6082+ 6083+ // Continue trying to enqueue packets if either 6084+ // (a) we succeeded last time OR 6085+ // (b) we didn't ret a frame and we can retry the input 6086+ } while (src_rv == NQ_OK || (dst_rv == AVERROR(EAGAIN) && RETRY_NQ(src_rv))); 6087+ 6088+ // Ensure that the frame contains nothing if we aren't returning a frame 6089+ // (might happen when discarding) 6090+ if (dst_rv) 6091+ av_frame_unref(frame); 6092+ 6093+ // If we got a frame this time ask for a pkt next time 6094+ s->req_pkt = (dst_rv == 0) ? s->req_pkt + 1 : 0; 6095+ 6096+#if 0 6097+ if (dst_rv == 0) 6098+ { 6099+ static int z = 0; 6100+ if (++z > 50) { 6101+ av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n"); 6102+ ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); 6103+ return -1; 6104+ } 6105+ } 6106+#endif 6107+ 6108+ return dst_rv == 0 ? 0 : 6109+ src_rv < 0 ? src_rv : 6110+ dst_rv < 0 ? dst_rv : 6111+ AVERROR(EAGAIN); 6112+} 6113+ 6114+#if 0 6115+#include <time.h> 6116+static int64_t us_time(void) 6117+{ 6118+ struct timespec ts; 6119+ clock_gettime(CLOCK_MONOTONIC, &ts); 6120+ return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000; 6121+} 6122+ 6123+static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) 6124+{ 6125+ int ret; 6126+ const int64_t now = us_time(); 6127+ int64_t done; 6128+ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); 6129+ ret = v4l2_receive_frame2(avctx, frame); 6130+ done = us_time(); 6131+ av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret); 6132+ return ret; 6133+} 6134+#endif 6135+ 6136+static uint32_t 6137+avprofile_to_v4l2(const enum AVCodecID codec_id, const int avprofile) 6138+{ 6139+ switch (codec_id) { 6140+ case AV_CODEC_ID_H264: 6141+ switch (avprofile) { 6142+ case FF_PROFILE_H264_BASELINE: 6143+ return V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE; 6144+ case FF_PROFILE_H264_CONSTRAINED_BASELINE: 6145+ return V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE; 6146+ case FF_PROFILE_H264_MAIN: 6147+ return V4L2_MPEG_VIDEO_H264_PROFILE_MAIN; 6148+ case FF_PROFILE_H264_EXTENDED: 6149+ return V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED; 6150+ case FF_PROFILE_H264_HIGH: 6151+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH; 6152+ case FF_PROFILE_H264_HIGH_10: 6153+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10; 6154+ case FF_PROFILE_H264_HIGH_10_INTRA: 6155+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA; 6156+ case FF_PROFILE_H264_MULTIVIEW_HIGH: 6157+ case FF_PROFILE_H264_HIGH_422: 6158+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422; 6159+ case FF_PROFILE_H264_HIGH_422_INTRA: 6160+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA; 6161+ case FF_PROFILE_H264_STEREO_HIGH: 6162+ return V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH; 6163+ case FF_PROFILE_H264_HIGH_444_PREDICTIVE: 6164+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE; 6165+ case FF_PROFILE_H264_HIGH_444_INTRA: 6166+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA; 6167+ case FF_PROFILE_H264_CAVLC_444: 6168+ return V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA; 6169+ case FF_PROFILE_H264_HIGH_444: 6170+ default: 6171+ break; 6172+// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE = 12, 6173+// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH = 13, 6174+// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA = 14, 6175+// V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH = 16, 6176+// V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH = 17, 6177+ } 6178+ break; 6179+ case AV_CODEC_ID_MPEG2VIDEO: 6180+ case AV_CODEC_ID_MPEG4: 6181+ case AV_CODEC_ID_VC1: 6182+ case AV_CODEC_ID_VP8: 6183+ case AV_CODEC_ID_VP9: 6184+ case AV_CODEC_ID_AV1: 6185+ // Most profiles are a simple number that matches the V4L2 enum 6186+ return avprofile; 6187+ default: 6188+ break; 6189+ } 6190+ return ~(uint32_t)0; 6191+} 6192+ 6193+// This check mirrors Chrome's profile check by testing to see if the profile 6194+// exists as a possible value for the V4L2 profile control 6195+static int 6196+check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s) 6197+{ 6198+ struct v4l2_queryctrl query_ctrl; 6199+ struct v4l2_querymenu query_menu; 6200+ uint32_t profile_id; 6201+ 6202+ // An unset profile is almost certainly zero or -99 - do not reject 6203+ if (avctx->profile <= 0) { 6204+ av_log(avctx, AV_LOG_VERBOSE, "Profile %d <= 0 - check skipped\n", avctx->profile); 6205+ return 0; 6206+ } 6207+ 6208+ memset(&query_ctrl, 0, sizeof(query_ctrl)); 6209+ switch (avctx->codec_id) { 6210+ case AV_CODEC_ID_MPEG2VIDEO: 6211+ profile_id = V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE; 6212+ break; 6213+ case AV_CODEC_ID_MPEG4: 6214+ profile_id = V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE; 6215+ break; 6216+ case AV_CODEC_ID_H264: 6217+ profile_id = V4L2_CID_MPEG_VIDEO_H264_PROFILE; 6218+ break; 6219+ case AV_CODEC_ID_VP8: 6220+ profile_id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE; 6221+ break; 6222+ case AV_CODEC_ID_VP9: 6223+ profile_id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE; 6224+ break; 6225+#ifdef V4L2_CID_MPEG_VIDEO_AV1_PROFILE 6226+ case AV_CODEC_ID_AV1: 6227+ profile_id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE; 6228+ break; 6229+#endif 6230+ default: 6231+ av_log(avctx, AV_LOG_VERBOSE, "Can't map profile for codec id %d; profile check skipped\n", avctx->codec_id); 6232+ return 0; 6233+ } 6234+ 6235+ query_ctrl = (struct v4l2_queryctrl){.id = profile_id}; 6236+ if (ioctl(s->fd, VIDIOC_QUERYCTRL, &query_ctrl) != 0) { 6237+ av_log(avctx, AV_LOG_VERBOSE, "Query profile ctrl (%#x) not supported: assume OK\n", query_ctrl.id); 6238+ } 6239+ else { 6240+ av_log(avctx, AV_LOG_DEBUG, "%s: Control supported: %#x\n", __func__, query_ctrl.id); 6241+ 6242+ query_menu = (struct v4l2_querymenu){ 6243+ .id = query_ctrl.id, 6244+ .index = avprofile_to_v4l2(avctx->codec_id, avctx->profile), 6245+ }; 6246+ 6247+ if (query_menu.index > query_ctrl.maximum || 6248+ query_menu.index < query_ctrl.minimum || 6249+ ioctl(s->fd, VIDIOC_QUERYMENU, &query_menu) != 0) { 6250+ return AVERROR(ENOENT); 6251+ } 6252+ } 6253+ 6254+ return 0; 6255+}; 6256+ 6257+static int 6258+check_size(AVCodecContext * const avctx, V4L2m2mContext * const s, const uint32_t fcc) 6259+{ 6260+ unsigned int i; 6261+ const uint32_t w = avctx->coded_width; 6262+ const uint32_t h = avctx->coded_height; 6263+ 6264+ if (w == 0 || h == 0 || fcc == 0) { 6265+ av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc)); 6266+ return 0; 6267+ } 6268+ if ((s->quirks & FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN) != 0) { 6269+ av_log(avctx, AV_LOG_TRACE, "%s: Skipped (quirk): Size %dx%d, fcc %s\n", __func__, w, h, av_fourcc2str(fcc)); 6270+ return 0; 6271+ } 6272+ 6273+ for (i = 0;; ++i) { 6274+ struct v4l2_frmsizeenum fs = { 6275+ .index = i, 6276+ .pixel_format = fcc, 6277+ }; 6278+ 6279+ while (ioctl(s->fd, VIDIOC_ENUM_FRAMESIZES, &fs) != 0) { 6280+ const int err = AVERROR(errno); 6281+ if (err == AVERROR(EINTR)) 6282+ continue; 6283+ if (i == 0 && err == AVERROR(ENOTTY)) { 6284+ av_log(avctx, AV_LOG_DEBUG, "Framesize enum not supported\n"); 6285+ return 0; 6286+ } 6287+ if (err != AVERROR(EINVAL)) { 6288+ av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err)); 6289+ return err; 6290+ } 6291+ av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in %u frame size enums\n", 6292+ w, h, av_fourcc2str(fcc), i); 6293+ return err; 6294+ } 6295+ 6296+ switch (fs.type) { 6297+ case V4L2_FRMSIZE_TYPE_DISCRETE: 6298+ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Discrete: %dx%d\n", __func__, i, 6299+ fs.discrete.width,fs.discrete.height); 6300+ if (w == fs.discrete.width && h == fs.discrete.height) 6301+ return 0; 6302+ break; 6303+ case V4L2_FRMSIZE_TYPE_STEPWISE: 6304+ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Stepwise: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i, 6305+ fs.stepwise.min_width, fs.stepwise.min_height, 6306+ fs.stepwise.max_width, fs.stepwise.max_height, 6307+ fs.stepwise.step_width,fs.stepwise.step_height); 6308+ if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width && 6309+ h >= fs.stepwise.min_height && h <= fs.stepwise.max_height && 6310+ (w - fs.stepwise.min_width) % fs.stepwise.step_width == 0 && 6311+ (h - fs.stepwise.min_height) % fs.stepwise.step_height == 0) 6312+ return 0; 6313+ break; 6314+ case V4L2_FRMSIZE_TYPE_CONTINUOUS: 6315+ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Continuous: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i, 6316+ fs.stepwise.min_width, fs.stepwise.min_height, 6317+ fs.stepwise.max_width, fs.stepwise.max_height, 6318+ fs.stepwise.step_width,fs.stepwise.step_height); 6319+ if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width && 6320+ h >= fs.stepwise.min_height && h <= fs.stepwise.max_height) 6321+ return 0; 6322+ break; 6323+ default: 6324+ av_log(avctx, AV_LOG_ERROR, "Unexpected framesize enum: %d", fs.type); 6325+ return AVERROR(EINVAL); 6326+ } 6327+ } 6328+} 6329+ 6330+static int 6331+get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s) 6332+{ 6333+ struct v4l2_capability cap; 6334+ 6335+ memset(&cap, 0, sizeof(cap)); 6336+ while (ioctl(s->fd, VIDIOC_QUERYCAP, &cap) != 0) { 6337+ int err = errno; 6338+ if (err == EINTR) 6339+ continue; 6340+ av_log(avctx, AV_LOG_ERROR, "V4L2: Failed to get capabilities: %s\n", strerror(err)); 6341+ return AVERROR(err); 6342+ } 6343+ 6344+ // Could be made table driven if we have a few more but right now there 6345+ // seems no point 6346+ 6347+ // Meson (amlogic) always gives a resolution changed event after output 6348+ // streamon and userspace must (re)allocate capture buffers and streamon 6349+ // capture to clear the event even if the capture buffers were the right 6350+ // size in the first place. 6351+ if (strcmp(cap.driver, "meson-vdec") == 0) 6352+ s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS | FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN; 6353+ 6354+ av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks); 6355+ return 0; 6356+} 6357+ 6358+// This heuristic is for H264 but use for everything 6359+static uint32_t max_coded_size(const AVCodecContext * const avctx) 6360+{ 6361+ uint32_t wxh = avctx->coded_width * avctx->coded_height; 6362+ uint32_t size; 6363+ 6364+ size = wxh * 3 / 2; 6365+ // H.264 Annex A table A-1 gives minCR which is either 2 or 4 6366+ // unfortunately that doesn't yield an actually useful limit 6367+ // and it should be noted that frame 0 is special cased to allow 6368+ // a bigger number which really isn't helpful for us. So just pick 6369+ // frame_size / 2 6370+ size /= 2; 6371+ // Add 64k to allow for any overheads and/or encoder hopefulness 6372+ // with small WxH 6373+ return size + (1 << 16); 6374+} 6375+ 6376+static void 6377+parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s) 6378+{ 6379+ s->reorder_size = 0; 6380+ 6381+ if (!avctx->extradata || !avctx->extradata_size) 6382+ return; 6383+ 6384+ switch (avctx->codec_id) { 6385+#if CONFIG_H264_DECODER 6386+ case AV_CODEC_ID_H264: 6387+ { 6388+ H264ParamSets ps; 6389+ int is_avc = 0; 6390+ int nal_length_size = 0; 6391+ int ret; 6392+ 6393+ memset(&ps, 0, sizeof(ps)); 6394+ 6395+ ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size, 6396+ &ps, &is_avc, &nal_length_size, 6397+ avctx->err_recognition, avctx); 6398+ if (ret > 0) { 6399+ const SPS * sps = NULL; 6400+ unsigned int i; 6401+ for (i = 0; i != MAX_SPS_COUNT; ++i) { 6402+ if (ps.sps_list[i]) { 6403+ sps = (const SPS *)ps.sps_list[i]->data; 6404+ break; 6405+ } 6406+ } 6407+ if (sps) { 6408+ avctx->profile = ff_h264_get_profile(sps); 6409+ avctx->level = sps->level_idc; 6410+ s->reorder_size = sps->num_reorder_frames; 6411+ } 6412+ } 6413+ ff_h264_ps_uninit(&ps); 6414+ break; 6415+ } 6416+#endif 6417+#if CONFIG_HEVC_DECODER 6418+ case AV_CODEC_ID_HEVC: 6419+ { 6420+ HEVCParamSets ps; 6421+ HEVCSEI sei; 6422+ int is_nalff = 0; 6423+ int nal_length_size = 0; 6424+ int ret; 6425+ 6426+ memset(&ps, 0, sizeof(ps)); 6427+ memset(&sei, 0, sizeof(sei)); 6428+ 6429+ ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size, 6430+ &ps, &sei, &is_nalff, &nal_length_size, 6431+ avctx->err_recognition, 0, avctx); 6432+ if (ret > 0) { 6433+ const HEVCSPS * sps = NULL; 6434+ unsigned int i; 6435+ for (i = 0; i != HEVC_MAX_SPS_COUNT; ++i) { 6436+ if (ps.sps_list[i]) { 6437+ sps = (const HEVCSPS *)ps.sps_list[i]->data; 6438+ break; 6439+ } 6440+ } 6441+ if (sps) { 6442+ avctx->profile = sps->ptl.general_ptl.profile_idc; 6443+ avctx->level = sps->ptl.general_ptl.level_idc; 6444+ s->reorder_size = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering; 6445+ } 6446+ } 6447+ ff_hevc_ps_uninit(&ps); 6448+ ff_hevc_reset_sei(&sei); 6449+ break; 6450+ } 6451+#endif 6452+ default: 6453+ break; 6454+ } 6455+} 6456+ 6457+static int 6458+choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s) 6459+{ 6460+ const V4L2m2mPriv * const priv = avctx->priv_data; 6461+ unsigned int fmts_n; 6462+ uint32_t *fmts = ff_v4l2_context_enum_drm_formats(&s->capture, &fmts_n); 6463+ enum AVPixelFormat *fmts2 = NULL; 6464+ enum AVPixelFormat gf_pix_fmt; 6465+ unsigned int i; 6466+ unsigned int n = 0; 6467+ unsigned int pref_n = 1; 6468+ int rv = AVERROR(ENOENT); 6469+ 6470+ if (!fmts) 6471+ return AVERROR(ENOENT); 6472+ 6473+ if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 3))) == NULL) { 6474+ rv = AVERROR(ENOMEM); 6475+ goto error; 6476+ } 6477+ 6478+ // Filter for formats that are supported by ffmpeg and 6479+ // can accomodate the stream size 6480+ fmts2[n++] = AV_PIX_FMT_DRM_PRIME; 6481+ for (i = 0; i != fmts_n; ++i) { 6482+ const enum AVPixelFormat f = ff_v4l2_format_v4l2_to_avfmt(fmts[i], AV_CODEC_ID_RAWVIDEO); 6483+ av_log(avctx, AV_LOG_TRACE, "VLC pix %s -> %s\n", av_fourcc2str(fmts[i]), av_get_pix_fmt_name(f)); 6484+ if (f == AV_PIX_FMT_NONE) 6485+ continue; 6486+ 6487+ if (check_size(avctx, s, fmts[i]) != 0) 6488+ continue; 6489+ 6490+ if (f == priv->pix_fmt) 6491+ pref_n = n; 6492+ fmts2[n++] = f; 6493+ } 6494+ 6495+ if (n < 2) { 6496+ av_log(avctx, AV_LOG_DEBUG, "%s: No usable formats found\n", __func__); 6497+ goto error; 6498+ } 6499+ 6500+ if (n != 2) { 6501+ // ffmpeg.c really only expects one s/w format. It thinks that the 6502+ // last format in the list is the s/w format of the h/w format but 6503+ // also chooses the first non-h/w format as the preferred s/w format. 6504+ // The only way of reconciling this is to dup our preferred format into 6505+ // both last & first place :-( 6506+ const enum AVPixelFormat t = fmts2[pref_n]; 6507+ fmts2[pref_n] = fmts2[1]; 6508+ fmts2[1] = t; 6509+ fmts2[n++] = t; 6510+ } 6511+ 6512+ fmts2[n] = AV_PIX_FMT_NONE; 6513+ 6514+ gf_pix_fmt = ff_get_format(avctx, fmts2); 6515+ av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n", 6516+ avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), 6517+ avctx->coded_width, avctx->coded_height, 6518+ gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt)); 6519+ 6520+ if (gf_pix_fmt == AV_PIX_FMT_NONE) 6521+ goto error; 6522+ 6523+ if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { 6524+ avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; 6525+ s->capture.av_pix_fmt = avctx->sw_pix_fmt; 6526+ s->output_drm = 1; 6527+ } 6528+ else { 6529+ avctx->pix_fmt = gf_pix_fmt; 6530+ s->capture.av_pix_fmt = gf_pix_fmt; 6531+ s->output_drm = 0; 6532+ } 6533+ 6534+ // Get format converts capture.av_pix_fmt back into a V4L2 format in the context 6535+ if ((rv = ff_v4l2_context_get_format(&s->capture, 0)) != 0) 6536+ goto error; 6537+ rv = ff_v4l2_context_set_format(&s->capture); 6538+ 6539+error: 6540+ av_free(fmts2); 6541+ av_free(fmts); 6542+ return rv; 6543+} 6544+ 6545 static av_cold int v4l2_decode_init(AVCodecContext *avctx) 6546 { 6547 V4L2Context *capture, *output; 6548@@ -185,10 +1167,27 @@ static av_cold int v4l2_decode_init(AVCo 6549 V4L2m2mPriv *priv = avctx->priv_data; 6550 int ret; 6551 6552+ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); 6553+ 6554+ if (avctx->codec_id == AV_CODEC_ID_H264) { 6555+ if (avctx->ticks_per_frame == 1) { 6556+ if(avctx->time_base.den < INT_MAX/2) { 6557+ avctx->time_base.den *= 2; 6558+ } else 6559+ avctx->time_base.num /= 2; 6560+ } 6561+ avctx->ticks_per_frame = 2; 6562+ } 6563+ 6564 ret = ff_v4l2_m2m_create_context(priv, &s); 6565 if (ret < 0) 6566 return ret; 6567 6568+ parse_extradata(avctx, s); 6569+ 6570+ xlat_init(&s->xlat); 6571+ pts_stats_init(&s->pts_stat, avctx, "decoder"); 6572+ 6573 capture = &s->capture; 6574 output = &s->output; 6575 6576@@ -196,14 +1195,45 @@ static av_cold int v4l2_decode_init(AVCo 6577 * by the v4l2 driver; this event will trigger a full pipeline reconfig and 6578 * the proper values will be retrieved from the kernel driver. 6579 */ 6580- output->height = capture->height = avctx->coded_height; 6581- output->width = capture->width = avctx->coded_width; 6582+// output->height = capture->height = avctx->coded_height; 6583+// output->width = capture->width = avctx->coded_width; 6584+ output->height = capture->height = 0; 6585+ output->width = capture->width = 0; 6586 6587 output->av_codec_id = avctx->codec_id; 6588 output->av_pix_fmt = AV_PIX_FMT_NONE; 6589+ output->min_buf_size = max_coded_size(avctx); 6590 6591 capture->av_codec_id = AV_CODEC_ID_RAWVIDEO; 6592 capture->av_pix_fmt = avctx->pix_fmt; 6593+ capture->min_buf_size = 0; 6594+ 6595+ capture->av_pix_fmt = AV_PIX_FMT_NONE; 6596+ s->output_drm = 0; 6597+ 6598+ s->db_ctl = NULL; 6599+ if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) { 6600+ if (strcmp(priv->dmabuf_alloc, "cma") == 0) 6601+ s->db_ctl = dmabufs_ctl_new(); 6602+ else { 6603+ av_log(avctx, AV_LOG_ERROR, "Unknown dmabuf alloc method: '%s'\n", priv->dmabuf_alloc); 6604+ return AVERROR(EINVAL); 6605+ } 6606+ if (!s->db_ctl) { 6607+ av_log(avctx, AV_LOG_ERROR, "Can't open dmabuf provider '%s'\n", priv->dmabuf_alloc); 6608+ return AVERROR(ENOMEM); 6609+ } 6610+ } 6611+ 6612+ s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); 6613+ if (!s->device_ref) { 6614+ ret = AVERROR(ENOMEM); 6615+ return ret; 6616+ } 6617+ 6618+ ret = av_hwdevice_ctx_init(s->device_ref); 6619+ if (ret < 0) 6620+ return ret; 6621 6622 s->avctx = avctx; 6623 ret = ff_v4l2_m2m_codec_init(priv); 6624@@ -212,12 +1242,90 @@ static av_cold int v4l2_decode_init(AVCo 6625 return ret; 6626 } 6627 6628- return v4l2_prepare_decoder(s); 6629+ if (avctx->extradata && 6630+ (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) { 6631+ av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret)); 6632+#if DUMP_FAILED_EXTRADATA 6633+ log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size); 6634+#endif 6635+ return ret; 6636+ } 6637+ 6638+ if ((ret = get_quirks(avctx, s)) != 0) 6639+ return ret; 6640+ 6641+ if ((ret = check_profile(avctx, s)) != 0) { 6642+ av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile); 6643+ return ret; 6644+ } 6645+ 6646+ // Size check done as part of format filtering 6647+ if ((ret = choose_capture_format(avctx, s)) != 0) 6648+ return ret; 6649+ 6650+ if ((ret = v4l2_prepare_decoder(s)) < 0) 6651+ return ret; 6652+ 6653+ return 0; 6654 } 6655 6656 static av_cold int v4l2_decode_close(AVCodecContext *avctx) 6657 { 6658- return ff_v4l2_m2m_codec_end(avctx->priv_data); 6659+ int rv; 6660+ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); 6661+ rv = ff_v4l2_m2m_codec_end(avctx->priv_data); 6662+ av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv); 6663+ return rv; 6664+} 6665+ 6666+static void v4l2_decode_flush(AVCodecContext *avctx) 6667+{ 6668+ // An alternatve and more drastic form of flush is to simply do this: 6669+ // v4l2_decode_close(avctx); 6670+ // v4l2_decode_init(avctx); 6671+ // The downside is that this keeps a decoder open until all the frames 6672+ // associated with it have been returned. This is a bit wasteful on 6673+ // possibly limited h/w resources and fails on a Pi for this reason unless 6674+ // more GPU mem is allocated than is the default. 6675+ 6676+ V4L2m2mPriv * const priv = avctx->priv_data; 6677+ V4L2m2mContext * const s = priv->context; 6678+ V4L2Context * const output = &s->output; 6679+ V4L2Context * const capture = &s->capture; 6680+ 6681+ av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon); 6682+ 6683+ // Reflushing everything is benign, quick and avoids having to worry about 6684+ // states like EOS processing so don't try to optimize out (having got it 6685+ // wrong once) 6686+ 6687+ ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF); 6688+ 6689+ // Clear any buffered input packet 6690+ av_packet_unref(&s->buf_pkt); 6691+ 6692+ // Clear a pending EOS 6693+ if (ff_v4l2_ctx_eos(capture)) { 6694+ // Arguably we could delay this but this is easy and doesn't require 6695+ // thought or extra vars 6696+ ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF); 6697+ ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); 6698+ } 6699+ 6700+ // V4L2 makes no guarantees about whether decoded frames are flushed or not 6701+ // so mark all frames we are tracking to be discarded if they appear 6702+ xlat_flush(&s->xlat); 6703+ 6704+ // resend extradata 6705+ s->extdata_sent = 0; 6706+ // clear status vars 6707+ s->running = 0; 6708+ s->draining = 0; 6709+ output->done = 0; 6710+ capture->done = 0; 6711+ 6712+ // Stream on will occur when we actually submit a new frame 6713+ av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__); 6714 } 6715 6716 #define OFFSET(x) offsetof(V4L2m2mPriv, x) 6717@@ -226,10 +1334,17 @@ static av_cold int v4l2_decode_close(AVC 6718 static const AVOption options[] = { 6719 V4L_M2M_DEFAULT_OPTS, 6720 { "num_capture_buffers", "Number of buffers in the capture context", 6721- OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 20, INT_MAX, FLAGS }, 6722+ OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS }, 6723+ { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS }, 6724+ { "dmabuf_alloc", "Dmabuf alloc method", OFFSET(dmabuf_alloc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS }, 6725 { NULL}, 6726 }; 6727 6728+static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = { 6729+ HW_CONFIG_INTERNAL(DRM_PRIME), 6730+ NULL 6731+}; 6732+ 6733 #define M2MDEC_CLASS(NAME) \ 6734 static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \ 6735 .class_name = #NAME "_v4l2m2m_decoder", \ 6736@@ -250,10 +1365,16 @@ static const AVOption options[] = { 6737 .init = v4l2_decode_init, \ 6738 FF_CODEC_RECEIVE_FRAME_CB(v4l2_receive_frame), \ 6739 .close = v4l2_decode_close, \ 6740+ .flush = v4l2_decode_flush, \ 6741 .bsfs = bsf_name, \ 6742 .p.capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \ 6743 .caps_internal = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP, \ 6744 .p.wrapper_name = "v4l2m2m", \ 6745+ .p.pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \ 6746+ AV_PIX_FMT_NV12, \ 6747+ AV_PIX_FMT_YUV420P, \ 6748+ AV_PIX_FMT_NONE}, \ 6749+ .hw_configs = v4l2_m2m_hw_configs, \ 6750 } 6751 6752 M2MDEC(h264, "H.264", AV_CODEC_ID_H264, "h264_mp4toannexb"); 6753--- a/libavcodec/v4l2_m2m_enc.c 6754+++ b/libavcodec/v4l2_m2m_enc.c 6755@@ -24,6 +24,8 @@ 6756 #include <linux/videodev2.h> 6757 #include <sys/ioctl.h> 6758 #include <search.h> 6759+#include <drm_fourcc.h> 6760+ 6761 #include "encode.h" 6762 #include "libavcodec/avcodec.h" 6763 #include "libavutil/pixdesc.h" 6764@@ -38,6 +40,34 @@ 6765 #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x 6766 #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x 6767 6768+// P030 should be defined in drm_fourcc.h and hopefully will be sometime 6769+// in the future but until then... 6770+#ifndef DRM_FORMAT_P030 6771+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') 6772+#endif 6773+ 6774+#ifndef DRM_FORMAT_NV15 6775+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') 6776+#endif 6777+ 6778+#ifndef DRM_FORMAT_NV20 6779+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') 6780+#endif 6781+ 6782+#ifndef V4L2_CID_CODEC_BASE 6783+#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE 6784+#endif 6785+ 6786+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined 6787+// in videodev2.h hopefully will be sometime in the future but until then... 6788+#ifndef V4L2_PIX_FMT_NV12_10_COL128 6789+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') 6790+#endif 6791+ 6792+#ifndef V4L2_PIX_FMT_NV12_COL128 6793+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ 6794+#endif 6795+ 6796 static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den) 6797 { 6798 struct v4l2_streamparm parm = { 0 }; 6799@@ -148,15 +178,14 @@ static inline int v4l2_mpeg4_profile_fro 6800 static int v4l2_check_b_frame_support(V4L2m2mContext *s) 6801 { 6802 if (s->avctx->max_b_frames) 6803- av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n"); 6804+ av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames); 6805 6806- v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0); 6807+ v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1); 6808 v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0); 6809 if (s->avctx->max_b_frames == 0) 6810 return 0; 6811 6812 avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding"); 6813- 6814 return AVERROR_PATCHWELCOME; 6815 } 6816 6817@@ -271,17 +300,208 @@ static int v4l2_prepare_encoder(V4L2m2mC 6818 return 0; 6819 } 6820 6821+static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame) 6822+{ 6823+ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; 6824+ 6825+ const uint32_t drm_fmt = src->layers[0].format; 6826+ // Treat INVALID as LINEAR 6827+ const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ? 6828+ DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier; 6829+ uint32_t pix_fmt = 0; 6830+ uint32_t w = 0; 6831+ uint32_t h = 0; 6832+ uint32_t bpl = src->layers[0].planes[0].pitch; 6833+ 6834+ // We really don't expect multiple layers 6835+ // All formats that we currently cope with are single object 6836+ 6837+ if (src->nb_layers != 1 || src->nb_objects != 1) 6838+ return AVERROR(EINVAL); 6839+ 6840+ switch (drm_fmt) { 6841+ case DRM_FORMAT_YUV420: 6842+ if (mod == DRM_FORMAT_MOD_LINEAR) { 6843+ if (src->layers[0].nb_planes != 3) 6844+ break; 6845+ pix_fmt = V4L2_PIX_FMT_YUV420; 6846+ h = src->layers[0].planes[1].offset / bpl; 6847+ w = bpl; 6848+ } 6849+ break; 6850+ 6851+ case DRM_FORMAT_NV12: 6852+ if (mod == DRM_FORMAT_MOD_LINEAR) { 6853+ if (src->layers[0].nb_planes != 2) 6854+ break; 6855+ pix_fmt = V4L2_PIX_FMT_NV12; 6856+ h = src->layers[0].planes[1].offset / bpl; 6857+ w = bpl; 6858+ } 6859+ else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { 6860+ if (src->layers[0].nb_planes != 2) 6861+ break; 6862+ pix_fmt = V4L2_PIX_FMT_NV12_COL128; 6863+ w = bpl; 6864+ h = src->layers[0].planes[1].offset / 128; 6865+ bpl = fourcc_mod_broadcom_param(mod); 6866+ } 6867+ break; 6868+ 6869+ case DRM_FORMAT_P030: 6870+ if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { 6871+ if (src->layers[0].nb_planes != 2) 6872+ break; 6873+ pix_fmt = V4L2_PIX_FMT_NV12_10_COL128; 6874+ w = bpl / 2; // Matching lie to how we construct this 6875+ h = src->layers[0].planes[1].offset / 128; 6876+ bpl = fourcc_mod_broadcom_param(mod); 6877+ } 6878+ break; 6879+ 6880+ default: 6881+ break; 6882+ } 6883+ 6884+ if (!pix_fmt) 6885+ return AVERROR(EINVAL); 6886+ 6887+ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { 6888+ struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp; 6889+ 6890+ pix->width = w; 6891+ pix->height = h; 6892+ pix->pixelformat = pix_fmt; 6893+ pix->plane_fmt[0].bytesperline = bpl; 6894+ pix->num_planes = 1; 6895+ } 6896+ else { 6897+ struct v4l2_pix_format *const pix = &format->fmt.pix; 6898+ 6899+ pix->width = w; 6900+ pix->height = h; 6901+ pix->pixelformat = pix_fmt; 6902+ pix->bytesperline = bpl; 6903+ } 6904+ 6905+ return 0; 6906+} 6907+ 6908+// Do we have similar enough formats to be usable? 6909+static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b) 6910+{ 6911+ if (a->type != b->type) 6912+ return 0; 6913+ 6914+ if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) { 6915+ const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp; 6916+ const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp; 6917+ unsigned int i; 6918+ if (pa->pixelformat != pb->pixelformat || 6919+ pa->num_planes != pb->num_planes) 6920+ return 0; 6921+ for (i = 0; i != pa->num_planes; ++i) { 6922+ if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline) 6923+ return 0; 6924+ } 6925+ } 6926+ else { 6927+ const struct v4l2_pix_format *const pa = &a->fmt.pix; 6928+ const struct v4l2_pix_format *const pb = &b->fmt.pix; 6929+ if (pa->pixelformat != pb->pixelformat || 6930+ pa->bytesperline != pb->bytesperline) 6931+ return 0; 6932+ } 6933+ return 1; 6934+} 6935+ 6936+static inline int q_full(const V4L2Context *const output) 6937+{ 6938+ return ff_v4l2_context_q_count(output) == output->num_buffers; 6939+} 6940+ 6941 static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) 6942 { 6943 V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; 6944 V4L2Context *const output = &s->output; 6945+ int rv; 6946+ const int needs_slot = q_full(output); 6947+ 6948+ av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot); 6949+ 6950+ // Signal EOF if needed (doesn't need q slot) 6951+ if (!frame) { 6952+ av_log(avctx, AV_LOG_TRACE, "--- %s: EOS\n", __func__); 6953+ return ff_v4l2_context_enqueue_frame(output, frame); 6954+ } 6955+ 6956+ if ((rv = ff_v4l2_dq_all(output, needs_slot? 500 : 0)) != 0) { 6957+ // We should be able to return AVERROR(EAGAIN) to indicate buffer 6958+ // exhaustion, but ffmpeg currently treats that as fatal. 6959+ av_log(avctx, AV_LOG_WARNING, "Failed to get buffer for src frame: %s\n", av_err2str(rv)); 6960+ return rv; 6961+ } 6962+ 6963+ if (s->input_drm && !output->streamon) { 6964+ struct v4l2_format req_format = {.type = output->format.type}; 6965+ 6966+ // Set format when we first get a buffer 6967+ if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) { 6968+ av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n"); 6969+ return rv; 6970+ } 6971+ 6972+ ff_v4l2_context_release(output); 6973+ 6974+ output->format = req_format; 6975+ 6976+ if ((rv = ff_v4l2_context_set_format(output)) != 0) { 6977+ av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n"); 6978+ return rv; 6979+ } 6980+ 6981+ if (!fmt_eq(&req_format, &output->format)) { 6982+ av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n"); 6983+ return AVERROR(EINVAL); 6984+ } 6985+ 6986+ output->selection.top = frame->crop_top; 6987+ output->selection.left = frame->crop_left; 6988+ output->selection.width = av_frame_cropped_width(frame); 6989+ output->selection.height = av_frame_cropped_height(frame); 6990+ 6991+ if ((rv = ff_v4l2_context_init(output)) != 0) { 6992+ av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n"); 6993+ return rv; 6994+ } 6995+ 6996+ { 6997+ struct v4l2_selection selection = { 6998+ .type = V4L2_BUF_TYPE_VIDEO_OUTPUT, 6999+ .target = V4L2_SEL_TGT_CROP, 7000+ .r = output->selection 7001+ }; 7002+ if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) { 7003+ av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n", 7004+ selection.r.width, selection.r.height, selection.r.left, selection.r.top, 7005+ av_err2str(AVERROR(errno))); 7006+ } 7007+ av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n", 7008+ selection.r.width, selection.r.height, selection.r.left, selection.r.top); 7009+ } 7010+ } 7011 7012 #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME 7013- if (frame && frame->pict_type == AV_PICTURE_TYPE_I) 7014+ if (frame->pict_type == AV_PICTURE_TYPE_I) 7015 v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1); 7016 #endif 7017 7018- return ff_v4l2_context_enqueue_frame(output, frame); 7019+ rv = ff_v4l2_context_enqueue_frame(output, frame); 7020+ if (rv) { 7021+ av_log(avctx, AV_LOG_ERROR, "Enqueue frame failed: %s\n", av_err2str(rv)); 7022+ } 7023+ 7024+ return rv; 7025 } 7026 7027 static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) 7028@@ -292,6 +512,11 @@ static int v4l2_receive_packet(AVCodecCo 7029 AVFrame *frame = s->frame; 7030 int ret; 7031 7032+ av_log(avctx, AV_LOG_TRACE, "<<< %s: qlen out %d cap %d\n", __func__, 7033+ ff_v4l2_context_q_count(output), ff_v4l2_context_q_count(capture)); 7034+ 7035+ ff_v4l2_dq_all(output, 0); 7036+ 7037 if (s->draining) 7038 goto dequeue; 7039 7040@@ -328,7 +553,115 @@ static int v4l2_receive_packet(AVCodecCo 7041 } 7042 7043 dequeue: 7044- return ff_v4l2_context_dequeue_packet(capture, avpkt); 7045+ // Dequeue a frame 7046+ for (;;) { 7047+ int t = q_full(output) ? -1 : s->draining ? 300 : 0; 7048+ int rv2; 7049+ 7050+ // If output is full wait for either a packet or output to become not full 7051+ ret = ff_v4l2_context_dequeue_packet(capture, avpkt, t); 7052+ 7053+ // If output was full retry packet dequeue 7054+ t = (ret != AVERROR(EAGAIN) || t != -1) ? 0 : 300; 7055+ rv2 = ff_v4l2_dq_all(output, t); 7056+ if (t == 0 || rv2 != 0) 7057+ break; 7058+ } 7059+ if (ret) 7060+ return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret; 7061+ 7062+ if (capture->first_buf == 1) { 7063+ uint8_t * data; 7064+ const int len = avpkt->size; 7065+ 7066+ // 1st buffer after streamon should be SPS/PPS 7067+ capture->first_buf = 2; 7068+ 7069+ // Clear both possible stores so there is no chance of confusion 7070+ av_freep(&s->extdata_data); 7071+ s->extdata_size = 0; 7072+ av_freep(&avctx->extradata); 7073+ avctx->extradata_size = 0; 7074+ 7075+ if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) 7076+ goto fail_no_mem; 7077+ 7078+ memcpy(data, avpkt->data, len); 7079+ av_packet_unref(avpkt); 7080+ 7081+ // We need to copy the header, but keep local if not global 7082+ if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) { 7083+ avctx->extradata = data; 7084+ avctx->extradata_size = len; 7085+ } 7086+ else { 7087+ s->extdata_data = data; 7088+ s->extdata_size = len; 7089+ } 7090+ 7091+ ret = ff_v4l2_context_dequeue_packet(capture, avpkt, 0); 7092+ ff_v4l2_dq_all(output, 0); 7093+ if (ret) 7094+ return ret; 7095+ } 7096+ 7097+ // First frame must be key so mark as such even if encoder forgot 7098+ if (capture->first_buf == 2) { 7099+ avpkt->flags |= AV_PKT_FLAG_KEY; 7100+ 7101+ // Add any extradata to the 1st packet we emit as we cannot create it at init 7102+ if (avctx->extradata_size > 0 && avctx->extradata) { 7103+ void * const side = av_packet_new_side_data(avpkt, 7104+ AV_PKT_DATA_NEW_EXTRADATA, 7105+ avctx->extradata_size); 7106+ if (!side) 7107+ goto fail_no_mem; 7108+ 7109+ memcpy(side, avctx->extradata, avctx->extradata_size); 7110+ } 7111+ } 7112+ 7113+ // Add SPS/PPS to the start of every key frame if non-global headers 7114+ if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) { 7115+ const size_t newlen = s->extdata_size + avpkt->size; 7116+ AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE); 7117+ 7118+ if (buf == NULL) 7119+ goto fail_no_mem; 7120+ 7121+ memcpy(buf->data, s->extdata_data, s->extdata_size); 7122+ memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size); 7123+ 7124+ av_buffer_unref(&avpkt->buf); 7125+ avpkt->buf = buf; 7126+ avpkt->data = buf->data; 7127+ avpkt->size = newlen; 7128+ } 7129+ else if (ff_v4l2_context_q_count(capture) < 2) { 7130+ // Avoid running out of capture buffers 7131+ // In most cases the buffers will be returned quickly in which case 7132+ // we don't copy and can use the v4l2 buffers directly but sometimes 7133+ // ffmpeg seems to hold onto all of them for a long time (.mkv 7134+ // creation?) so avoid deadlock in those cases. 7135+ AVBufferRef * const buf = av_buffer_alloc(avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE); 7136+ if (buf == NULL) 7137+ goto fail_no_mem; 7138+ 7139+ memcpy(buf->data, avpkt->data, avpkt->size); 7140+ av_buffer_unref(&avpkt->buf); // Will recycle the V4L2 buffer 7141+ 7142+ avpkt->buf = buf; 7143+ avpkt->data = buf->data; 7144+ } 7145+ 7146+ capture->first_buf = 0; 7147+ return 0; 7148+ 7149+fail_no_mem: 7150+ av_log(avctx, AV_LOG_ERROR, "Rx pkt failed: No memory\n"); 7151+ ret = AVERROR(ENOMEM); 7152+ av_packet_unref(avpkt); 7153+ return ret; 7154 } 7155 7156 static av_cold int v4l2_encode_init(AVCodecContext *avctx) 7157@@ -340,6 +673,8 @@ static av_cold int v4l2_encode_init(AVCo 7158 uint32_t v4l2_fmt_output; 7159 int ret; 7160 7161+ av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt); 7162+ 7163 ret = ff_v4l2_m2m_create_context(priv, &s); 7164 if (ret < 0) 7165 return ret; 7166@@ -347,13 +682,17 @@ static av_cold int v4l2_encode_init(AVCo 7167 capture = &s->capture; 7168 output = &s->output; 7169 7170+ s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME); 7171+ 7172 /* common settings output/capture */ 7173 output->height = capture->height = avctx->height; 7174 output->width = capture->width = avctx->width; 7175 7176 /* output context */ 7177 output->av_codec_id = AV_CODEC_ID_RAWVIDEO; 7178- output->av_pix_fmt = avctx->pix_fmt; 7179+ output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt : 7180+ avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt : 7181+ AV_PIX_FMT_YUV420P; 7182 7183 /* capture context */ 7184 capture->av_codec_id = avctx->codec_id; 7185@@ -372,7 +711,7 @@ static av_cold int v4l2_encode_init(AVCo 7186 v4l2_fmt_output = output->format.fmt.pix.pixelformat; 7187 7188 pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO); 7189- if (pix_fmt_output != avctx->pix_fmt) { 7190+ if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) { 7191 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output); 7192 av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name); 7193 return AVERROR(EINVAL); 7194@@ -390,9 +729,10 @@ static av_cold int v4l2_encode_close(AVC 7195 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM 7196 7197 #define V4L_M2M_CAPTURE_OPTS \ 7198- V4L_M2M_DEFAULT_OPTS,\ 7199+ { "num_output_buffers", "Number of buffers in the output context",\ 7200+ OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\ 7201 { "num_capture_buffers", "Number of buffers in the capture context", \ 7202- OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS } 7203+ OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS } 7204 7205 static const AVOption mpeg4_options[] = { 7206 V4L_M2M_CAPTURE_OPTS, 7207--- /dev/null 7208+++ b/libavcodec/v4l2_req_decode_q.c 7209@@ -0,0 +1,84 @@ 7210+#include <memory.h> 7211+#include <semaphore.h> 7212+#include <pthread.h> 7213+ 7214+#include "v4l2_req_decode_q.h" 7215+ 7216+int decode_q_in_q(const req_decode_ent * const d) 7217+{ 7218+ return d->in_q; 7219+} 7220+ 7221+void decode_q_add(req_decode_q * const q, req_decode_ent * const d) 7222+{ 7223+ pthread_mutex_lock(&q->q_lock); 7224+ if (!q->head) { 7225+ q->head = d; 7226+ q->tail = d; 7227+ d->prev = NULL; 7228+ } 7229+ else { 7230+ q->tail->next = d; 7231+ d->prev = q->tail; 7232+ q->tail = d; 7233+ } 7234+ d->next = NULL; 7235+ d->in_q = 1; 7236+ pthread_mutex_unlock(&q->q_lock); 7237+} 7238+ 7239+// Remove entry from Q - if head wake-up anything that was waiting 7240+void decode_q_remove(req_decode_q * const q, req_decode_ent * const d) 7241+{ 7242+ int try_signal = 0; 7243+ 7244+ if (!d->in_q) 7245+ return; 7246+ 7247+ pthread_mutex_lock(&q->q_lock); 7248+ if (d->prev) 7249+ d->prev->next = d->next; 7250+ else { 7251+ try_signal = 1; // Only need to signal if we were head 7252+ q->head = d->next; 7253+ } 7254+ 7255+ if (d->next) 7256+ d->next->prev = d->prev; 7257+ else 7258+ q->tail = d->prev; 7259+ 7260+ // Not strictly needed but makes debug easier 7261+ d->next = NULL; 7262+ d->prev = NULL; 7263+ d->in_q = 0; 7264+ pthread_mutex_unlock(&q->q_lock); 7265+ 7266+ if (try_signal) 7267+ pthread_cond_broadcast(&q->q_cond); 7268+} 7269+ 7270+void decode_q_wait(req_decode_q * const q, req_decode_ent * const d) 7271+{ 7272+ pthread_mutex_lock(&q->q_lock); 7273+ 7274+ while (q->head != d) 7275+ pthread_cond_wait(&q->q_cond, &q->q_lock); 7276+ 7277+ pthread_mutex_unlock(&q->q_lock); 7278+} 7279+ 7280+void decode_q_uninit(req_decode_q * const q) 7281+{ 7282+ pthread_mutex_destroy(&q->q_lock); 7283+ pthread_cond_destroy(&q->q_cond); 7284+} 7285+ 7286+void decode_q_init(req_decode_q * const q) 7287+{ 7288+ memset(q, 0, sizeof(*q)); 7289+ pthread_mutex_init(&q->q_lock, NULL); 7290+ pthread_cond_init(&q->q_cond, NULL); 7291+} 7292+ 7293+ 7294--- /dev/null 7295+++ b/libavcodec/v4l2_req_decode_q.h 7296@@ -0,0 +1,27 @@ 7297+#ifndef AVCODEC_V4L2_REQ_DECODE_Q_H 7298+#define AVCODEC_V4L2_REQ_DECODE_Q_H 7299+ 7300+#include <pthread.h> 7301+ 7302+typedef struct req_decode_ent { 7303+ struct req_decode_ent * next; 7304+ struct req_decode_ent * prev; 7305+ int in_q; 7306+} req_decode_ent; 7307+ 7308+typedef struct req_decode_q { 7309+ pthread_mutex_t q_lock; 7310+ pthread_cond_t q_cond; 7311+ req_decode_ent * head; 7312+ req_decode_ent * tail; 7313+} req_decode_q; 7314+ 7315+int decode_q_in_q(const req_decode_ent * const d); 7316+void decode_q_add(req_decode_q * const q, req_decode_ent * const d); 7317+void decode_q_remove(req_decode_q * const q, req_decode_ent * const d); 7318+void decode_q_wait(req_decode_q * const q, req_decode_ent * const d); 7319+void decode_q_uninit(req_decode_q * const q); 7320+void decode_q_init(req_decode_q * const q); 7321+ 7322+#endif 7323+ 7324--- /dev/null 7325+++ b/libavcodec/v4l2_req_devscan.c 7326@@ -0,0 +1,451 @@ 7327+#include <errno.h> 7328+#include <fcntl.h> 7329+#include <libudev.h> 7330+#include <stdlib.h> 7331+#include <string.h> 7332+#include <unistd.h> 7333+ 7334+#include <sys/ioctl.h> 7335+#include <sys/sysmacros.h> 7336+ 7337+#include <linux/media.h> 7338+#include <linux/videodev2.h> 7339+ 7340+#include "v4l2_req_devscan.h" 7341+#include "v4l2_req_utils.h" 7342+ 7343+struct decdev { 7344+ enum v4l2_buf_type src_type; 7345+ uint32_t src_fmt_v4l2; 7346+ const char * vname; 7347+ const char * mname; 7348+}; 7349+ 7350+struct devscan { 7351+ struct decdev env; 7352+ unsigned int dev_size; 7353+ unsigned int dev_count; 7354+ struct decdev *devs; 7355+}; 7356+ 7357+static int video_src_pixfmt_supported(uint32_t fmt) 7358+{ 7359+ return 1; 7360+} 7361+ 7362+static void v4l2_setup_format(struct v4l2_format *format, unsigned int type, 7363+ unsigned int width, unsigned int height, 7364+ unsigned int pixelformat) 7365+{ 7366+ unsigned int sizeimage; 7367+ 7368+ memset(format, 0, sizeof(*format)); 7369+ format->type = type; 7370+ 7371+ sizeimage = V4L2_TYPE_IS_OUTPUT(type) ? 4 * 1024 * 1024 : 0; 7372+ 7373+ if (V4L2_TYPE_IS_MULTIPLANAR(type)) { 7374+ format->fmt.pix_mp.width = width; 7375+ format->fmt.pix_mp.height = height; 7376+ format->fmt.pix_mp.plane_fmt[0].sizeimage = sizeimage; 7377+ format->fmt.pix_mp.pixelformat = pixelformat; 7378+ } else { 7379+ format->fmt.pix.width = width; 7380+ format->fmt.pix.height = height; 7381+ format->fmt.pix.sizeimage = sizeimage; 7382+ format->fmt.pix.pixelformat = pixelformat; 7383+ } 7384+} 7385+ 7386+static int v4l2_set_format(int video_fd, unsigned int type, unsigned int pixelformat, 7387+ unsigned int width, unsigned int height) 7388+{ 7389+ struct v4l2_format format; 7390+ 7391+ v4l2_setup_format(&format, type, width, height, pixelformat); 7392+ 7393+ return ioctl(video_fd, VIDIOC_S_FMT, &format) ? -errno : 0; 7394+} 7395+ 7396+static int v4l2_query_capabilities(int video_fd, unsigned int *capabilities) 7397+{ 7398+ struct v4l2_capability capability = { 0 }; 7399+ int rc; 7400+ 7401+ rc = ioctl(video_fd, VIDIOC_QUERYCAP, &capability); 7402+ if (rc < 0) 7403+ return -errno; 7404+ 7405+ if (capabilities != NULL) { 7406+ if ((capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0) 7407+ *capabilities = capability.device_caps; 7408+ else 7409+ *capabilities = capability.capabilities; 7410+ } 7411+ 7412+ return 0; 7413+} 7414+ 7415+static int devscan_add(struct devscan *const scan, 7416+ enum v4l2_buf_type src_type, 7417+ uint32_t src_fmt_v4l2, 7418+ const char * vname, 7419+ const char * mname) 7420+{ 7421+ struct decdev *d; 7422+ 7423+ if (scan->dev_size <= scan->dev_count) { 7424+ unsigned int n = !scan->dev_size ? 4 : scan->dev_size * 2; 7425+ d = realloc(scan->devs, n * sizeof(*d)); 7426+ if (!d) 7427+ return -ENOMEM; 7428+ scan->devs = d; 7429+ scan->dev_size = n; 7430+ } 7431+ 7432+ d = scan->devs + scan->dev_count; 7433+ d->src_type = src_type; 7434+ d->src_fmt_v4l2 = src_fmt_v4l2; 7435+ d->vname = strdup(vname); 7436+ if (!d->vname) 7437+ return -ENOMEM; 7438+ d->mname = strdup(mname); 7439+ if (!d->mname) { 7440+ free((char *)d->vname); 7441+ return -ENOMEM; 7442+ } 7443+ ++scan->dev_count; 7444+ return 0; 7445+} 7446+ 7447+void devscan_delete(struct devscan **const pScan) 7448+{ 7449+ unsigned int i; 7450+ struct devscan * const scan = *pScan; 7451+ 7452+ if (!scan) 7453+ return; 7454+ *pScan = NULL; 7455+ 7456+ for (i = 0; i < scan->dev_count; ++i) { 7457+ free((char*)scan->devs[i].mname); 7458+ free((char*)scan->devs[i].vname); 7459+ } 7460+ free(scan->devs); 7461+ free(scan); 7462+} 7463+ 7464+#define REQ_BUF_CAPS (\ 7465+ V4L2_BUF_CAP_SUPPORTS_DMABUF |\ 7466+ V4L2_BUF_CAP_SUPPORTS_REQUESTS |\ 7467+ V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF) 7468+ 7469+static void probe_formats(void * const dc, 7470+ struct devscan *const scan, 7471+ const int fd, 7472+ const unsigned int type_v4l2, 7473+ const char *const mpath, 7474+ const char *const vpath) 7475+{ 7476+ unsigned int i; 7477+ for (i = 0;; ++i) { 7478+ struct v4l2_fmtdesc fmtdesc = { 7479+ .index = i, 7480+ .type = type_v4l2 7481+ }; 7482+ struct v4l2_requestbuffers rbufs = { 7483+ .count = 0, 7484+ .type = type_v4l2, 7485+ .memory = V4L2_MEMORY_MMAP 7486+ }; 7487+ while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) { 7488+ if (errno == EINTR) 7489+ continue; 7490+ if (errno != EINVAL) 7491+ request_err(dc, "Enum[%d] failed for type=%d\n", i, type_v4l2); 7492+ return; 7493+ } 7494+ if (!video_src_pixfmt_supported(fmtdesc.pixelformat)) 7495+ continue; 7496+ 7497+ if (v4l2_set_format(fd, type_v4l2, fmtdesc.pixelformat, 720, 480)) { 7498+ request_debug(dc, "Set failed for type=%d, pf=%.4s\n", type_v4l2, (char*)&fmtdesc.pixelformat); 7499+ continue; 7500+ } 7501+ 7502+ while (ioctl(fd, VIDIOC_REQBUFS, &rbufs)) { 7503+ if (errno != EINTR) { 7504+ request_debug(dc, "%s: Reqbufs failed\n", vpath); 7505+ continue; 7506+ } 7507+ } 7508+ 7509+ if ((rbufs.capabilities & REQ_BUF_CAPS) != REQ_BUF_CAPS) { 7510+ request_debug(dc, "%s: Buf caps %#x insufficient\n", vpath, rbufs.capabilities); 7511+ continue; 7512+ } 7513+ 7514+ request_debug(dc, "Adding: %s,%s pix=%#x, type=%d\n", 7515+ mpath, vpath, fmtdesc.pixelformat, type_v4l2); 7516+ devscan_add(scan, type_v4l2, fmtdesc.pixelformat, vpath, mpath); 7517+ } 7518+} 7519+ 7520+ 7521+static int probe_video_device(void * const dc, 7522+ struct udev_device *const device, 7523+ struct devscan *const scan, 7524+ const char *const mpath) 7525+{ 7526+ int ret; 7527+ unsigned int capabilities = 0; 7528+ int video_fd = -1; 7529+ 7530+ const char *path = udev_device_get_devnode(device); 7531+ if (!path) { 7532+ request_err(dc, "%s: get video device devnode failed\n", __func__); 7533+ ret = -EINVAL; 7534+ goto fail; 7535+ } 7536+ 7537+ video_fd = open(path, O_RDWR, 0); 7538+ if (video_fd == -1) { 7539+ ret = -errno; 7540+ request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(errno), errno); 7541+ goto fail; 7542+ } 7543+ 7544+ ret = v4l2_query_capabilities(video_fd, &capabilities); 7545+ if (ret < 0) { 7546+ request_err(dc, "%s: get video capability failed, %s (%d)\n", __func__, strerror(-ret), -ret); 7547+ goto fail; 7548+ } 7549+ 7550+ request_debug(dc, "%s: path=%s capabilities=%#x\n", __func__, path, capabilities); 7551+ 7552+ if (!(capabilities & V4L2_CAP_STREAMING)) { 7553+ request_debug(dc, "%s: missing required streaming capability\n", __func__); 7554+ ret = -EINVAL; 7555+ goto fail; 7556+ } 7557+ 7558+ if (!(capabilities & (V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M))) { 7559+ request_debug(dc, "%s: missing required mem2mem capability\n", __func__); 7560+ ret = -EINVAL; 7561+ goto fail; 7562+ } 7563+ 7564+ /* Should check capture formats too... */ 7565+ if ((capabilities & V4L2_CAP_VIDEO_M2M) != 0) 7566+ probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT, mpath, path); 7567+ if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) 7568+ probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, mpath, path); 7569+ 7570+ close(video_fd); 7571+ return 0; 7572+ 7573+fail: 7574+ if (video_fd >= 0) 7575+ close(video_fd); 7576+ return ret; 7577+} 7578+ 7579+static int probe_media_device(void * const dc, 7580+ struct udev_device *const device, 7581+ struct devscan *const scan) 7582+{ 7583+ int ret; 7584+ int rv; 7585+ struct media_device_info device_info = { 0 }; 7586+ struct media_v2_topology topology = { 0 }; 7587+ struct media_v2_interface *interfaces = NULL; 7588+ struct udev *udev = udev_device_get_udev(device); 7589+ struct udev_device *video_device; 7590+ dev_t devnum; 7591+ int media_fd = -1; 7592+ 7593+ const char *path = udev_device_get_devnode(device); 7594+ if (!path) { 7595+ request_err(dc, "%s: get media device devnode failed\n", __func__); 7596+ ret = -EINVAL; 7597+ goto fail; 7598+ } 7599+ 7600+ media_fd = open(path, O_RDWR, 0); 7601+ if (media_fd < 0) { 7602+ ret = -errno; 7603+ request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(-ret), -ret); 7604+ goto fail; 7605+ } 7606+ 7607+ rv = ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &device_info); 7608+ if (rv < 0) { 7609+ ret = -errno; 7610+ request_err(dc, "%s: get media device info failed, %s (%d)\n", __func__, strerror(-ret), -ret); 7611+ goto fail; 7612+ } 7613+ 7614+ rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology); 7615+ if (rv < 0) { 7616+ ret = -errno; 7617+ request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret); 7618+ goto fail; 7619+ } 7620+ 7621+ if (topology.num_interfaces <= 0) { 7622+ request_err(dc, "%s: media device has no interfaces\n", __func__); 7623+ ret = -EINVAL; 7624+ goto fail; 7625+ } 7626+ 7627+ interfaces = calloc(topology.num_interfaces, sizeof(*interfaces)); 7628+ if (!interfaces) { 7629+ request_err(dc, "%s: allocating media interface struct failed\n", __func__); 7630+ ret = -ENOMEM; 7631+ goto fail; 7632+ } 7633+ 7634+ topology.ptr_interfaces = (__u64)(uintptr_t)interfaces; 7635+ rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology); 7636+ if (rv < 0) { 7637+ ret = -errno; 7638+ request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret); 7639+ goto fail; 7640+ } 7641+ 7642+ for (int i = 0; i < topology.num_interfaces; i++) { 7643+ if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO) 7644+ continue; 7645+ 7646+ devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor); 7647+ video_device = udev_device_new_from_devnum(udev, 'c', devnum); 7648+ if (!video_device) { 7649+ ret = -errno; 7650+ request_err(dc, "%s: video_device[%d]=%p\n", __func__, i, video_device); 7651+ continue; 7652+ } 7653+ 7654+ ret = probe_video_device(dc, video_device, scan, path); 7655+ udev_device_unref(video_device); 7656+ 7657+ if (ret != 0) 7658+ goto fail; 7659+ } 7660+ 7661+fail: 7662+ free(interfaces); 7663+ if (media_fd != -1) 7664+ close(media_fd); 7665+ return ret; 7666+} 7667+ 7668+const char *decdev_media_path(const struct decdev *const dev) 7669+{ 7670+ return !dev ? NULL : dev->mname; 7671+} 7672+ 7673+const char *decdev_video_path(const struct decdev *const dev) 7674+{ 7675+ return !dev ? NULL : dev->vname; 7676+} 7677+ 7678+enum v4l2_buf_type decdev_src_type(const struct decdev *const dev) 7679+{ 7680+ return !dev ? 0 : dev->src_type; 7681+} 7682+ 7683+uint32_t decdev_src_pixelformat(const struct decdev *const dev) 7684+{ 7685+ return !dev ? 0 : dev->src_fmt_v4l2; 7686+} 7687+ 7688+ 7689+const struct decdev *devscan_find(struct devscan *const scan, 7690+ const uint32_t src_fmt_v4l2) 7691+{ 7692+ unsigned int i; 7693+ 7694+ if (scan->env.mname && scan->env.vname) 7695+ return &scan->env; 7696+ 7697+ if (!src_fmt_v4l2) 7698+ return scan->dev_count ? scan->devs + 0 : NULL; 7699+ 7700+ for (i = 0; i != scan->dev_count; ++i) { 7701+ if (scan->devs[i].src_fmt_v4l2 == src_fmt_v4l2) 7702+ return scan->devs + i; 7703+ } 7704+ return NULL; 7705+} 7706+ 7707+int devscan_build(void * const dc, struct devscan **pscan) 7708+{ 7709+ int ret; 7710+ struct udev *udev; 7711+ struct udev_enumerate *enumerate; 7712+ struct udev_list_entry *devices; 7713+ struct udev_list_entry *entry; 7714+ struct udev_device *device; 7715+ struct devscan * scan; 7716+ 7717+ *pscan = NULL; 7718+ 7719+ scan = calloc(1, sizeof(*scan)); 7720+ if (!scan) { 7721+ ret = -ENOMEM; 7722+ goto fail; 7723+ } 7724+ 7725+ scan->env.mname = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH"); 7726+ scan->env.vname = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH"); 7727+ if (scan->env.mname && scan->env.vname) { 7728+ request_info(dc, "Media/video device env overrides found: %s,%s\n", 7729+ scan->env.mname, scan->env.vname); 7730+ *pscan = scan; 7731+ return 0; 7732+ } 7733+ 7734+ udev = udev_new(); 7735+ if (!udev) { 7736+ request_err(dc, "%s: allocating udev context failed\n", __func__); 7737+ ret = -ENOMEM; 7738+ goto fail; 7739+ } 7740+ 7741+ enumerate = udev_enumerate_new(udev); 7742+ if (!enumerate) { 7743+ request_err(dc, "%s: allocating udev enumerator failed\n", __func__); 7744+ ret = -ENOMEM; 7745+ goto fail; 7746+ } 7747+ 7748+ udev_enumerate_add_match_subsystem(enumerate, "media"); 7749+ udev_enumerate_scan_devices(enumerate); 7750+ 7751+ devices = udev_enumerate_get_list_entry(enumerate); 7752+ udev_list_entry_foreach(entry, devices) { 7753+ const char *path = udev_list_entry_get_name(entry); 7754+ if (!path) 7755+ continue; 7756+ 7757+ device = udev_device_new_from_syspath(udev, path); 7758+ if (!device) 7759+ continue; 7760+ 7761+ probe_media_device(dc, device, scan); 7762+ udev_device_unref(device); 7763+ } 7764+ 7765+ udev_enumerate_unref(enumerate); 7766+ udev_unref(udev); 7767+ 7768+ *pscan = scan; 7769+ return 0; 7770+ 7771+fail: 7772+ if (udev) 7773+ udev_unref(udev); 7774+ devscan_delete(&scan); 7775+ return ret; 7776+} 7777+ 7778--- /dev/null 7779+++ b/libavcodec/v4l2_req_devscan.h 7780@@ -0,0 +1,23 @@ 7781+#ifndef _DEVSCAN_H_ 7782+#define _DEVSCAN_H_ 7783+ 7784+#include <stdint.h> 7785+ 7786+struct devscan; 7787+struct decdev; 7788+enum v4l2_buf_type; 7789+ 7790+/* These return pointers to data in the devscan structure and so are vaild 7791+ * for the lifetime of that 7792+ */ 7793+const char *decdev_media_path(const struct decdev *const dev); 7794+const char *decdev_video_path(const struct decdev *const dev); 7795+enum v4l2_buf_type decdev_src_type(const struct decdev *const dev); 7796+uint32_t decdev_src_pixelformat(const struct decdev *const dev); 7797+ 7798+const struct decdev *devscan_find(struct devscan *const scan, const uint32_t src_fmt_v4l2); 7799+ 7800+int devscan_build(void * const dc, struct devscan **pscan); 7801+void devscan_delete(struct devscan **const pScan); 7802+ 7803+#endif 7804--- /dev/null 7805+++ b/libavcodec/v4l2_req_dmabufs.c 7806@@ -0,0 +1,409 @@ 7807+#include <stdatomic.h> 7808+#include <stdio.h> 7809+#include <stdlib.h> 7810+#include <unistd.h> 7811+#include <inttypes.h> 7812+#include <fcntl.h> 7813+#include <errno.h> 7814+#include <string.h> 7815+#include <sys/ioctl.h> 7816+#include <sys/mman.h> 7817+#include <linux/mman.h> 7818+#include <linux/dma-buf.h> 7819+#include <linux/dma-heap.h> 7820+ 7821+#include "v4l2_req_dmabufs.h" 7822+#include "v4l2_req_utils.h" 7823+ 7824+#define TRACE_ALLOC 0 7825+ 7826+#ifndef __O_CLOEXEC 7827+#define __O_CLOEXEC 0 7828+#endif 7829+ 7830+struct dmabufs_ctl; 7831+struct dmabuf_h; 7832+ 7833+struct dmabuf_fns { 7834+ int (*buf_alloc)(struct dmabufs_ctl * dbsc, struct dmabuf_h * dh, size_t size); 7835+ void (*buf_free)(struct dmabuf_h * dh); 7836+ int (*ctl_new)(struct dmabufs_ctl * dbsc); 7837+ void (*ctl_free)(struct dmabufs_ctl * dbsc); 7838+}; 7839+ 7840+struct dmabufs_ctl { 7841+ atomic_int ref_count; 7842+ int fd; 7843+ size_t page_size; 7844+ void * v; 7845+ const struct dmabuf_fns * fns; 7846+}; 7847+ 7848+struct dmabuf_h { 7849+ int fd; 7850+ size_t size; 7851+ size_t len; 7852+ void * mapptr; 7853+ void * v; 7854+ const struct dmabuf_fns * fns; 7855+}; 7856+ 7857+#if TRACE_ALLOC 7858+static unsigned int total_bufs = 0; 7859+static size_t total_size = 0; 7860+#endif 7861+ 7862+struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size) 7863+{ 7864+ struct dmabuf_h *dh; 7865+ 7866+ if (mapptr == MAP_FAILED) 7867+ return NULL; 7868+ 7869+ dh = malloc(sizeof(*dh)); 7870+ if (!dh) 7871+ return NULL; 7872+ 7873+ *dh = (struct dmabuf_h) { 7874+ .fd = -1, 7875+ .size = size, 7876+ .mapptr = mapptr 7877+ }; 7878+ 7879+ return dh; 7880+} 7881+ 7882+struct dmabuf_h * dmabuf_import(int fd, size_t size) 7883+{ 7884+ struct dmabuf_h *dh; 7885+ 7886+ fd = dup(fd); 7887+ if (fd < 0 || size == 0) 7888+ return NULL; 7889+ 7890+ dh = malloc(sizeof(*dh)); 7891+ if (!dh) { 7892+ close(fd); 7893+ return NULL; 7894+ } 7895+ 7896+ *dh = (struct dmabuf_h) { 7897+ .fd = fd, 7898+ .size = size, 7899+ .mapptr = MAP_FAILED 7900+ }; 7901+ 7902+#if TRACE_ALLOC 7903+ ++total_bufs; 7904+ total_size += dh->size; 7905+ request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); 7906+#endif 7907+ 7908+ return dh; 7909+} 7910+ 7911+struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size) 7912+{ 7913+ struct dmabuf_h * dh; 7914+ if (old != NULL) { 7915+ if (old->size >= size) { 7916+ return old; 7917+ } 7918+ dmabuf_free(old); 7919+ } 7920+ 7921+ if (size == 0 || 7922+ (dh = malloc(sizeof(*dh))) == NULL) 7923+ return NULL; 7924+ 7925+ *dh = (struct dmabuf_h){ 7926+ .fd = -1, 7927+ .mapptr = MAP_FAILED, 7928+ .fns = dbsc->fns 7929+ }; 7930+ 7931+ if (dh->fns->buf_alloc(dbsc, dh, size) != 0) 7932+ goto fail; 7933+ 7934+ 7935+#if TRACE_ALLOC 7936+ ++total_bufs; 7937+ total_size += dh->size; 7938+ request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); 7939+#endif 7940+ 7941+ return dh; 7942+ 7943+fail: 7944+ free(dh); 7945+ return NULL; 7946+} 7947+ 7948+int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags) 7949+{ 7950+ struct dma_buf_sync sync = { 7951+ .flags = flags 7952+ }; 7953+ if (dh->fd == -1) 7954+ return 0; 7955+ while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) { 7956+ const int err = errno; 7957+ if (errno == EINTR) 7958+ continue; 7959+ request_log("%s: ioctl failed: flags=%#x\n", __func__, flags); 7960+ return -err; 7961+ } 7962+ return 0; 7963+} 7964+ 7965+int dmabuf_write_start(struct dmabuf_h * const dh) 7966+{ 7967+ return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE); 7968+} 7969+ 7970+int dmabuf_write_end(struct dmabuf_h * const dh) 7971+{ 7972+ return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE); 7973+} 7974+ 7975+int dmabuf_read_start(struct dmabuf_h * const dh) 7976+{ 7977+ if (!dmabuf_map(dh)) 7978+ return -1; 7979+ return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ); 7980+} 7981+ 7982+int dmabuf_read_end(struct dmabuf_h * const dh) 7983+{ 7984+ return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ); 7985+} 7986+ 7987+ 7988+void * dmabuf_map(struct dmabuf_h * const dh) 7989+{ 7990+ if (!dh) 7991+ return NULL; 7992+ if (dh->mapptr != MAP_FAILED) 7993+ return dh->mapptr; 7994+ dh->mapptr = mmap(NULL, dh->size, 7995+ PROT_READ | PROT_WRITE, 7996+ MAP_SHARED | MAP_POPULATE, 7997+ dh->fd, 0); 7998+ if (dh->mapptr == MAP_FAILED) { 7999+ request_log("%s: Map failed\n", __func__); 8000+ return NULL; 8001+ } 8002+ return dh->mapptr; 8003+} 8004+ 8005+int dmabuf_fd(const struct dmabuf_h * const dh) 8006+{ 8007+ if (!dh) 8008+ return -1; 8009+ return dh->fd; 8010+} 8011+ 8012+size_t dmabuf_size(const struct dmabuf_h * const dh) 8013+{ 8014+ if (!dh) 8015+ return 0; 8016+ return dh->size; 8017+} 8018+ 8019+size_t dmabuf_len(const struct dmabuf_h * const dh) 8020+{ 8021+ if (!dh) 8022+ return 0; 8023+ return dh->len; 8024+} 8025+ 8026+void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len) 8027+{ 8028+ dh->len = len; 8029+} 8030+ 8031+void dmabuf_free(struct dmabuf_h * dh) 8032+{ 8033+ if (!dh) 8034+ return; 8035+ 8036+#if TRACE_ALLOC 8037+ --total_bufs; 8038+ total_size -= dh->size; 8039+ request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); 8040+#endif 8041+ 8042+ if (dh->fns != NULL && dh->fns->buf_free) 8043+ dh->fns->buf_free(dh); 8044+ 8045+ if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL) 8046+ munmap(dh->mapptr, dh->size); 8047+ if (dh->fd != -1) 8048+ while (close(dh->fd) == -1 && errno == EINTR) 8049+ /* loop */; 8050+ free(dh); 8051+} 8052+ 8053+static struct dmabufs_ctl * dmabufs_ctl_new2(const struct dmabuf_fns * const fns) 8054+{ 8055+ struct dmabufs_ctl * dbsc = calloc(1, sizeof(*dbsc)); 8056+ 8057+ if (!dbsc) 8058+ return NULL; 8059+ 8060+ dbsc->fd = -1; 8061+ dbsc->fns = fns; 8062+ dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE); 8063+ 8064+ if (fns->ctl_new(dbsc) != 0) 8065+ goto fail; 8066+ 8067+ return dbsc; 8068+ 8069+fail: 8070+ free(dbsc); 8071+ return NULL; 8072+} 8073+ 8074+static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc) 8075+{ 8076+ request_debug(NULL, "Free dmabuf ctl\n"); 8077+ 8078+ dbsc->fns->ctl_free(dbsc); 8079+ 8080+ free(dbsc); 8081+} 8082+ 8083+void dmabufs_ctl_unref(struct dmabufs_ctl ** const pDbsc) 8084+{ 8085+ struct dmabufs_ctl * const dbsc = *pDbsc; 8086+ 8087+ if (!dbsc) 8088+ return; 8089+ *pDbsc = NULL; 8090+ 8091+ if (atomic_fetch_sub(&dbsc->ref_count, 1) != 0) 8092+ return; 8093+ 8094+ dmabufs_ctl_free(dbsc); 8095+} 8096+ 8097+struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc) 8098+{ 8099+ atomic_fetch_add(&dbsc->ref_count, 1); 8100+ return dbsc; 8101+} 8102+ 8103+//----------------------------------------------------------------------------- 8104+// 8105+// Alloc dmabuf via CMA 8106+ 8107+static int ctl_cma_new2(struct dmabufs_ctl * dbsc, const char * const * names) 8108+{ 8109+ for (; *names != NULL; ++names) 8110+ { 8111+ while ((dbsc->fd = open(*names, O_RDWR | __O_CLOEXEC)) == -1 && 8112+ errno == EINTR) 8113+ /* Loop */; 8114+ if (dbsc->fd != -1) 8115+ { 8116+ request_debug(NULL, "%s: Using dma_heap device %s\n", __func__, *names); 8117+ return 0; 8118+ } 8119+ request_debug(NULL, "%s: Not using dma_heap device %s: %s\n", __func__, *names, strerror(errno)); 8120+ } 8121+ request_log("Unable to open any dma_heap device\n"); 8122+ return -1; 8123+} 8124+ 8125+static int ctl_cma_new(struct dmabufs_ctl * dbsc) 8126+{ 8127+ static const char * const names[] = { 8128+ "/dev/dma_heap/linux,cma", 8129+ "/dev/dma_heap/reserved", 8130+ NULL 8131+ }; 8132+ 8133+ return ctl_cma_new2(dbsc, names); 8134+} 8135+ 8136+static void ctl_cma_free(struct dmabufs_ctl * dbsc) 8137+{ 8138+ if (dbsc->fd != -1) 8139+ while (close(dbsc->fd) == -1 && errno == EINTR) 8140+ /* loop */; 8141+} 8142+ 8143+static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size) 8144+{ 8145+ struct dma_heap_allocation_data data = { 8146+ .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1), 8147+ .fd = 0, 8148+ .fd_flags = O_RDWR, 8149+ .heap_flags = 0 8150+ }; 8151+ 8152+ while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) { 8153+ int err = errno; 8154+ request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n", 8155+ (uint64_t)data.len, 8156+ dbsc->fd, 8157+ err, 8158+ strerror(err)); 8159+ if (err == EINTR) 8160+ continue; 8161+ return -err; 8162+ } 8163+ 8164+ dh->fd = data.fd; 8165+ dh->size = (size_t)data.len; 8166+ 8167+// fprintf(stderr, "%s: size=%#zx, ftell=%#zx\n", __func__, 8168+// dh->size, (size_t)lseek(dh->fd, 0, SEEK_END)); 8169+ 8170+ return 0; 8171+} 8172+ 8173+static void buf_cma_free(struct dmabuf_h * dh) 8174+{ 8175+ // Nothing needed 8176+} 8177+ 8178+static const struct dmabuf_fns dmabuf_cma_fns = { 8179+ .buf_alloc = buf_cma_alloc, 8180+ .buf_free = buf_cma_free, 8181+ .ctl_new = ctl_cma_new, 8182+ .ctl_free = ctl_cma_free, 8183+}; 8184+ 8185+struct dmabufs_ctl * dmabufs_ctl_new(void) 8186+{ 8187+ request_debug(NULL, "Dmabufs using CMA\n"); 8188+ return dmabufs_ctl_new2(&dmabuf_cma_fns); 8189+} 8190+ 8191+static int ctl_cma_new_vidbuf_cached(struct dmabufs_ctl * dbsc) 8192+{ 8193+ static const char * const names[] = { 8194+ "/dev/dma_heap/vidbuf_cached", 8195+ "/dev/dma_heap/linux,cma", 8196+ "/dev/dma_heap/reserved", 8197+ NULL 8198+ }; 8199+ 8200+ return ctl_cma_new2(dbsc, names); 8201+} 8202+ 8203+static const struct dmabuf_fns dmabuf_vidbuf_cached_fns = { 8204+ .buf_alloc = buf_cma_alloc, 8205+ .buf_free = buf_cma_free, 8206+ .ctl_new = ctl_cma_new_vidbuf_cached, 8207+ .ctl_free = ctl_cma_free, 8208+}; 8209+ 8210+struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void) 8211+{ 8212+ request_debug(NULL, "Dmabufs using Vidbuf\n"); 8213+ return dmabufs_ctl_new2(&dmabuf_vidbuf_cached_fns); 8214+} 8215+ 8216--- /dev/null 8217+++ b/libavcodec/v4l2_req_dmabufs.h 8218@@ -0,0 +1,45 @@ 8219+#ifndef DMABUFS_H 8220+#define DMABUFS_H 8221+ 8222+#include <stddef.h> 8223+ 8224+struct dmabufs_ctl; 8225+struct dmabuf_h; 8226+ 8227+struct dmabufs_ctl * dmabufs_ctl_new(void); 8228+struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void); 8229+void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc); 8230+struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc); 8231+ 8232+// Need not preserve old contents 8233+// On NULL return old buffer is freed 8234+struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size); 8235+ 8236+static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) { 8237+ return dmabuf_realloc(dbsc, NULL, size); 8238+} 8239+/* Create from existing fd - dups(fd) */ 8240+struct dmabuf_h * dmabuf_import(int fd, size_t size); 8241+/* Import an MMAP - return NULL if mapptr = MAP_FAIL */ 8242+struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size); 8243+ 8244+void * dmabuf_map(struct dmabuf_h * const dh); 8245+ 8246+/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */ 8247+int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags); 8248+ 8249+int dmabuf_write_start(struct dmabuf_h * const dh); 8250+int dmabuf_write_end(struct dmabuf_h * const dh); 8251+int dmabuf_read_start(struct dmabuf_h * const dh); 8252+int dmabuf_read_end(struct dmabuf_h * const dh); 8253+ 8254+int dmabuf_fd(const struct dmabuf_h * const dh); 8255+/* Allocated size */ 8256+size_t dmabuf_size(const struct dmabuf_h * const dh); 8257+/* Bytes in use */ 8258+size_t dmabuf_len(const struct dmabuf_h * const dh); 8259+/* Set bytes in use */ 8260+void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len); 8261+void dmabuf_free(struct dmabuf_h * dh); 8262+ 8263+#endif 8264--- /dev/null 8265+++ b/libavcodec/v4l2_req_hevc_v1.c 8266@@ -0,0 +1,3 @@ 8267+#define HEVC_CTRLS_VERSION 1 8268+#include "v4l2_req_hevc_vx.c" 8269+ 8270--- /dev/null 8271+++ b/libavcodec/v4l2_req_hevc_v2.c 8272@@ -0,0 +1,3 @@ 8273+#define HEVC_CTRLS_VERSION 2 8274+#include "v4l2_req_hevc_vx.c" 8275+ 8276--- /dev/null 8277+++ b/libavcodec/v4l2_req_hevc_v3.c 8278@@ -0,0 +1,3 @@ 8279+#define HEVC_CTRLS_VERSION 3 8280+#include "v4l2_req_hevc_vx.c" 8281+ 8282--- /dev/null 8283+++ b/libavcodec/v4l2_req_hevc_v4.c 8284@@ -0,0 +1,3 @@ 8285+#define HEVC_CTRLS_VERSION 4 8286+#include "v4l2_req_hevc_vx.c" 8287+ 8288--- /dev/null 8289+++ b/libavcodec/v4l2_req_hevc_vx.c 8290@@ -0,0 +1,1362 @@ 8291+// File included by v4l2_req_hevc_v* - not compiled on its own 8292+ 8293+#include "decode.h" 8294+#include "hevcdec.h" 8295+#include "hwconfig.h" 8296+#include "internal.h" 8297+#include "thread.h" 8298+ 8299+#if HEVC_CTRLS_VERSION == 1 8300+#include "hevc-ctrls-v1.h" 8301+ 8302+// Fixup renamed entries 8303+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT 8304+ 8305+#elif HEVC_CTRLS_VERSION == 2 8306+#include "hevc-ctrls-v2.h" 8307+#elif HEVC_CTRLS_VERSION == 3 8308+#include "hevc-ctrls-v3.h" 8309+#elif HEVC_CTRLS_VERSION == 4 8310+#include <linux/v4l2-controls.h> 8311+#if !defined(V4L2_CID_STATELESS_HEVC_SPS) 8312+#include "hevc-ctrls-v4.h" 8313+#endif 8314+#else 8315+#error Unknown HEVC_CTRLS_VERSION 8316+#endif 8317+ 8318+#ifndef V4L2_CID_STATELESS_HEVC_SPS 8319+#define V4L2_CID_STATELESS_HEVC_SPS V4L2_CID_MPEG_VIDEO_HEVC_SPS 8320+#define V4L2_CID_STATELESS_HEVC_PPS V4L2_CID_MPEG_VIDEO_HEVC_PPS 8321+#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS 8322+#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX 8323+#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS 8324+#define V4L2_CID_STATELESS_HEVC_DECODE_MODE V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE 8325+#define V4L2_CID_STATELESS_HEVC_START_CODE V4L2_CID_MPEG_VIDEO_HEVC_START_CODE 8326+ 8327+#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED 8328+#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED 8329+#define V4L2_STATELESS_HEVC_START_CODE_NONE V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE 8330+#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B 8331+#endif 8332+ 8333+#include "v4l2_request_hevc.h" 8334+ 8335+#include "libavutil/hwcontext_drm.h" 8336+ 8337+#include <semaphore.h> 8338+#include <pthread.h> 8339+ 8340+#include "v4l2_req_devscan.h" 8341+#include "v4l2_req_dmabufs.h" 8342+#include "v4l2_req_pollqueue.h" 8343+#include "v4l2_req_media.h" 8344+#include "v4l2_req_utils.h" 8345+ 8346+// Attached to buf[0] in frame 8347+// Pooled in hwcontext so generally create once - 1/frame 8348+typedef struct V4L2MediaReqDescriptor { 8349+ AVDRMFrameDescriptor drm; 8350+ 8351+ // Media 8352+ uint64_t timestamp; 8353+ struct qent_dst * qe_dst; 8354+ 8355+ // Decode only - should be NULL by the time we emit the frame 8356+ struct req_decode_ent decode_ent; 8357+ 8358+ struct media_request *req; 8359+ struct qent_src *qe_src; 8360+ 8361+#if HEVC_CTRLS_VERSION >= 2 8362+ struct v4l2_ctrl_hevc_decode_params dec; 8363+#endif 8364+ 8365+ size_t num_slices; 8366+ size_t alloced_slices; 8367+ struct v4l2_ctrl_hevc_slice_params * slice_params; 8368+ struct slice_info * slices; 8369+ 8370+ size_t num_offsets; 8371+ size_t alloced_offsets; 8372+ uint32_t *offsets; 8373+ 8374+} V4L2MediaReqDescriptor; 8375+ 8376+struct slice_info { 8377+ const uint8_t * ptr; 8378+ size_t len; // bytes 8379+ size_t n_offsets; 8380+}; 8381+ 8382+// Handy container for accumulating controls before setting 8383+struct req_controls { 8384+ int has_scaling; 8385+ struct timeval tv; 8386+ struct v4l2_ctrl_hevc_sps sps; 8387+ struct v4l2_ctrl_hevc_pps pps; 8388+ struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix; 8389+}; 8390+ 8391+//static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 }; 8392+ 8393+ 8394+// Get an FFmpeg format from the v4l2 format 8395+static enum AVPixelFormat pixel_format_from_format(const struct v4l2_format *const format) 8396+{ 8397+ switch (V4L2_TYPE_IS_MULTIPLANAR(format->type) ? 8398+ format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat) { 8399+ case V4L2_PIX_FMT_YUV420: 8400+ return AV_PIX_FMT_YUV420P; 8401+ case V4L2_PIX_FMT_NV12: 8402+ return AV_PIX_FMT_NV12; 8403+#if CONFIG_SAND 8404+ case V4L2_PIX_FMT_NV12_COL128: 8405+ return AV_PIX_FMT_RPI4_8; 8406+ case V4L2_PIX_FMT_NV12_10_COL128: 8407+ return AV_PIX_FMT_RPI4_10; 8408+#endif 8409+ default: 8410+ break; 8411+ } 8412+ return AV_PIX_FMT_NONE; 8413+} 8414+ 8415+static inline uint64_t frame_capture_dpb(const AVFrame * const frame) 8416+{ 8417+ const V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0]; 8418+ return rd->timestamp; 8419+} 8420+ 8421+static inline void frame_set_capture_dpb(AVFrame * const frame, const uint64_t dpb_stamp) 8422+{ 8423+ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0]; 8424+ rd->timestamp = dpb_stamp; 8425+} 8426+ 8427+static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_table *table) 8428+{ 8429+ int32_t luma_weight_denom, chroma_weight_denom; 8430+ const SliceHeader *sh = &h->sh; 8431+ 8432+ if (sh->slice_type == HEVC_SLICE_I || 8433+ (sh->slice_type == HEVC_SLICE_P && !h->ps.pps->weighted_pred_flag) || 8434+ (sh->slice_type == HEVC_SLICE_B && !h->ps.pps->weighted_bipred_flag)) 8435+ return; 8436+ 8437+ table->luma_log2_weight_denom = sh->luma_log2_weight_denom; 8438+ 8439+ if (h->ps.sps->chroma_format_idc) 8440+ table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom; 8441+ 8442+ luma_weight_denom = (1 << sh->luma_log2_weight_denom); 8443+ chroma_weight_denom = (1 << sh->chroma_log2_weight_denom); 8444+ 8445+ for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) { 8446+ table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom; 8447+ table->luma_offset_l0[i] = sh->luma_offset_l0[i]; 8448+ table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom; 8449+ table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom; 8450+ table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0]; 8451+ table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1]; 8452+ } 8453+ 8454+ if (sh->slice_type != HEVC_SLICE_B) 8455+ return; 8456+ 8457+ for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) { 8458+ table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom; 8459+ table->luma_offset_l1[i] = sh->luma_offset_l1[i]; 8460+ table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom; 8461+ table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom; 8462+ table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0]; 8463+ table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1]; 8464+ } 8465+} 8466+ 8467+#if HEVC_CTRLS_VERSION <= 2 8468+static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp) 8469+{ 8470+ const HEVCFrame *frame; 8471+ int i; 8472+ 8473+ for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) { 8474+ frame = h->rps[ST_CURR_BEF].ref[i]; 8475+ if (frame && timestamp == frame_capture_dpb(frame->frame)) 8476+ return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE; 8477+ } 8478+ 8479+ for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) { 8480+ frame = h->rps[ST_CURR_AFT].ref[i]; 8481+ if (frame && timestamp == frame_capture_dpb(frame->frame)) 8482+ return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER; 8483+ } 8484+ 8485+ for (i = 0; i < h->rps[LT_CURR].nb_refs; i++) { 8486+ frame = h->rps[LT_CURR].ref[i]; 8487+ if (frame && timestamp == frame_capture_dpb(frame->frame)) 8488+ return V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR; 8489+ } 8490+ 8491+ return 0; 8492+} 8493+#endif 8494+ 8495+static unsigned int 8496+get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame, 8497+ const struct v4l2_hevc_dpb_entry * const entries, 8498+ const unsigned int num_entries) 8499+{ 8500+ uint64_t timestamp; 8501+ 8502+ if (!frame) 8503+ return 0; 8504+ 8505+ timestamp = frame_capture_dpb(frame->frame); 8506+ 8507+ for (unsigned int i = 0; i < num_entries; i++) { 8508+ if (entries[i].timestamp == timestamp) 8509+ return i; 8510+ } 8511+ 8512+ return 0; 8513+} 8514+ 8515+static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx) 8516+{ 8517+ unsigned int z = 0; 8518+ while (idx--) { 8519+ if (*b++ == 0) { 8520+ ++z; 8521+ if (z >= 2 && *b == 3) { 8522+ ++b; 8523+ z = 0; 8524+ } 8525+ } 8526+ else { 8527+ z = 0; 8528+ } 8529+ } 8530+ return b; 8531+} 8532+ 8533+static int slice_add(V4L2MediaReqDescriptor * const rd) 8534+{ 8535+ if (rd->num_slices >= rd->alloced_slices) { 8536+ struct v4l2_ctrl_hevc_slice_params * p2; 8537+ struct slice_info * s2; 8538+ size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2; 8539+ 8540+ p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2)); 8541+ if (p2 == NULL) 8542+ return AVERROR(ENOMEM); 8543+ rd->slice_params = p2; 8544+ 8545+ s2 = av_realloc_array(rd->slices, n2, sizeof(*s2)); 8546+ if (s2 == NULL) 8547+ return AVERROR(ENOMEM); 8548+ rd->slices = s2; 8549+ 8550+ rd->alloced_slices = n2; 8551+ } 8552+ ++rd->num_slices; 8553+ return 0; 8554+} 8555+ 8556+static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets) 8557+{ 8558+ if (rd->num_offsets + n > rd->alloced_offsets) { 8559+ size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2; 8560+ void * p2; 8561+ while (rd->num_offsets + n > n2) 8562+ n2 *= 2; 8563+ if ((p2 = av_realloc_array(rd->offsets, n2, sizeof(*rd->offsets))) == NULL) 8564+ return AVERROR(ENOMEM); 8565+ rd->offsets = p2; 8566+ rd->alloced_offsets = n2; 8567+ } 8568+ for (size_t i = 0; i != n; ++i) 8569+ rd->offsets[rd->num_offsets++] = offsets[i] - 1; 8570+ return 0; 8571+} 8572+ 8573+static unsigned int 8574+fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries) 8575+{ 8576+ unsigned int i; 8577+ unsigned int n = 0; 8578+ const HEVCFrame * const pic = h->ref; 8579+ 8580+ for (i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) { 8581+ const HEVCFrame * const frame = &h->DPB[i]; 8582+ if (frame != pic && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) { 8583+ struct v4l2_hevc_dpb_entry * const entry = entries + n++; 8584+ 8585+ entry->timestamp = frame_capture_dpb(frame->frame); 8586+#if HEVC_CTRLS_VERSION <= 2 8587+ entry->rps = find_frame_rps_type(h, entry->timestamp); 8588+#else 8589+ entry->flags = (frame->flags & HEVC_FRAME_FLAG_LONG_REF) == 0 ? 0 : 8590+ V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE; 8591+#endif 8592+ entry->field_pic = frame->frame->interlaced_frame; 8593+ 8594+#if HEVC_CTRLS_VERSION <= 3 8595+ /* TODO: Interleaved: Get the POC for each field. */ 8596+ entry->pic_order_cnt[0] = frame->poc; 8597+ entry->pic_order_cnt[1] = frame->poc; 8598+#else 8599+ entry->pic_order_cnt_val = frame->poc; 8600+#endif 8601+ } 8602+ } 8603+ return n; 8604+} 8605+ 8606+static void fill_slice_params(const HEVCContext * const h, 8607+#if HEVC_CTRLS_VERSION >= 2 8608+ const struct v4l2_ctrl_hevc_decode_params * const dec, 8609+#endif 8610+ struct v4l2_ctrl_hevc_slice_params *slice_params, 8611+ uint32_t bit_size, uint32_t bit_offset) 8612+{ 8613+ const SliceHeader * const sh = &h->sh; 8614+#if HEVC_CTRLS_VERSION >= 2 8615+ const struct v4l2_hevc_dpb_entry *const dpb = dec->dpb; 8616+ const unsigned int dpb_n = dec->num_active_dpb_entries; 8617+#else 8618+ struct v4l2_hevc_dpb_entry *const dpb = slice_params->dpb; 8619+ unsigned int dpb_n; 8620+#endif 8621+ unsigned int i; 8622+ RefPicList *rpl; 8623+ 8624+ *slice_params = (struct v4l2_ctrl_hevc_slice_params) { 8625+ .bit_size = bit_size, 8626+#if HEVC_CTRLS_VERSION <= 3 8627+ .data_bit_offset = bit_offset, 8628+#else 8629+ .data_byte_offset = bit_offset / 8 + 1, 8630+#endif 8631+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 8632+ .slice_segment_addr = sh->slice_segment_addr, 8633+ 8634+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ 8635+ .nal_unit_type = h->nal_unit_type, 8636+ .nuh_temporal_id_plus1 = h->temporal_id + 1, 8637+ 8638+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 8639+ .slice_type = sh->slice_type, 8640+ .colour_plane_id = sh->colour_plane_id, 8641+ .slice_pic_order_cnt = h->ref->poc, 8642+ .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0, 8643+ .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0, 8644+ .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ? sh->collocated_ref_idx : 0, 8645+ .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? 0 : 5 - sh->max_num_merge_cand, 8646+ .slice_qp_delta = sh->slice_qp_delta, 8647+ .slice_cb_qp_offset = sh->slice_cb_qp_offset, 8648+ .slice_cr_qp_offset = sh->slice_cr_qp_offset, 8649+ .slice_act_y_qp_offset = 0, 8650+ .slice_act_cb_qp_offset = 0, 8651+ .slice_act_cr_qp_offset = 0, 8652+ .slice_beta_offset_div2 = sh->beta_offset / 2, 8653+ .slice_tc_offset_div2 = sh->tc_offset / 2, 8654+ 8655+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ 8656+ .pic_struct = h->sei.picture_timing.picture_struct, 8657+ 8658+#if HEVC_CTRLS_VERSION < 2 8659+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ 8660+ .num_rps_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs, 8661+ .num_rps_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs, 8662+ .num_rps_poc_lt_curr = h->rps[LT_CURR].nb_refs, 8663+#endif 8664+ }; 8665+ 8666+ if (sh->slice_sample_adaptive_offset_flag[0]) 8667+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA; 8668+ 8669+ if (sh->slice_sample_adaptive_offset_flag[1]) 8670+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA; 8671+ 8672+ if (sh->slice_temporal_mvp_enabled_flag) 8673+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED; 8674+ 8675+ if (sh->mvd_l1_zero_flag) 8676+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO; 8677+ 8678+ if (sh->cabac_init_flag) 8679+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT; 8680+ 8681+ if (sh->collocated_list == L0) 8682+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0; 8683+ 8684+ if (sh->disable_deblocking_filter_flag) 8685+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED; 8686+ 8687+ if (sh->slice_loop_filter_across_slices_enabled_flag) 8688+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED; 8689+ 8690+ if (sh->dependent_slice_segment_flag) 8691+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT; 8692+ 8693+#if HEVC_CTRLS_VERSION < 2 8694+ dpb_n = fill_dpb_entries(h, dpb); 8695+ slice_params->num_active_dpb_entries = dpb_n; 8696+#endif 8697+ 8698+ if (sh->slice_type != HEVC_SLICE_I) { 8699+ rpl = &h->ref->refPicList[0]; 8700+ for (i = 0; i < rpl->nb_refs; i++) 8701+ slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n); 8702+ } 8703+ 8704+ if (sh->slice_type == HEVC_SLICE_B) { 8705+ rpl = &h->ref->refPicList[1]; 8706+ for (i = 0; i < rpl->nb_refs; i++) 8707+ slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n); 8708+ } 8709+ 8710+ fill_pred_table(h, &slice_params->pred_weight_table); 8711+ 8712+ slice_params->num_entry_point_offsets = sh->num_entry_point_offsets; 8713+#if HEVC_CTRLS_VERSION <= 3 8714+ if (slice_params->num_entry_point_offsets > 256) { 8715+ slice_params->num_entry_point_offsets = 256; 8716+ av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets); 8717+ } 8718+ 8719+ for (i = 0; i < slice_params->num_entry_point_offsets; i++) 8720+ slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1; 8721+#endif 8722+} 8723+ 8724+#if HEVC_CTRLS_VERSION >= 2 8725+static void 8726+fill_decode_params(const HEVCContext * const h, 8727+ struct v4l2_ctrl_hevc_decode_params * const dec) 8728+{ 8729+ unsigned int i; 8730+ 8731+ *dec = (struct v4l2_ctrl_hevc_decode_params){ 8732+ .pic_order_cnt_val = h->poc, 8733+ .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs, 8734+ .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs, 8735+ .num_poc_lt_curr = h->rps[LT_CURR].nb_refs, 8736+ }; 8737+ 8738+ dec->num_active_dpb_entries = fill_dpb_entries(h, dec->dpb); 8739+ 8740+ // The docn does seem to ask that we fit our 32 bit signed POC into 8741+ // a U8 so... (To be fair 16 bits would be enough) 8742+ // Luckily we (Pi) don't use these fields 8743+ for (i = 0; i != h->rps[ST_CURR_BEF].nb_refs; ++i) 8744+ dec->poc_st_curr_before[i] = h->rps[ST_CURR_BEF].ref[i]->poc; 8745+ for (i = 0; i != h->rps[ST_CURR_AFT].nb_refs; ++i) 8746+ dec->poc_st_curr_after[i] = h->rps[ST_CURR_AFT].ref[i]->poc; 8747+ for (i = 0; i != h->rps[LT_CURR].nb_refs; ++i) 8748+ dec->poc_lt_curr[i] = h->rps[LT_CURR].ref[i]->poc; 8749+ 8750+ if (IS_IRAP(h)) 8751+ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC; 8752+ if (IS_IDR(h)) 8753+ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC; 8754+ if (h->sh.no_output_of_prior_pics_flag) 8755+ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR; 8756+ 8757+} 8758+#endif 8759+ 8760+static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCSPS *sps) 8761+{ 8762+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ 8763+ *ctrl = (struct v4l2_ctrl_hevc_sps) { 8764+ .chroma_format_idc = sps->chroma_format_idc, 8765+ .pic_width_in_luma_samples = sps->width, 8766+ .pic_height_in_luma_samples = sps->height, 8767+ .bit_depth_luma_minus8 = sps->bit_depth - 8, 8768+ .bit_depth_chroma_minus8 = sps->bit_depth - 8, 8769+ .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4, 8770+ .sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1, 8771+ .sps_max_num_reorder_pics = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics, 8772+ .sps_max_latency_increase_plus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1, 8773+ .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3, 8774+ .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size, 8775+ .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2, 8776+ .log2_diff_max_min_luma_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size, 8777+ .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, 8778+ .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, 8779+ .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1, 8780+ .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1, 8781+ .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3, 8782+ .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size, 8783+ .num_short_term_ref_pic_sets = sps->nb_st_rps, 8784+ .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, 8785+ .chroma_format_idc = sps->chroma_format_idc, 8786+ .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1, 8787+ }; 8788+ 8789+ if (sps->separate_colour_plane_flag) 8790+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE; 8791+ 8792+ if (sps->scaling_list_enable_flag) 8793+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED; 8794+ 8795+ if (sps->amp_enabled_flag) 8796+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED; 8797+ 8798+ if (sps->sao_enabled) 8799+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET; 8800+ 8801+ if (sps->pcm_enabled_flag) 8802+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED; 8803+ 8804+ if (sps->pcm.loop_filter_disable_flag) 8805+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED; 8806+ 8807+ if (sps->long_term_ref_pics_present_flag) 8808+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT; 8809+ 8810+ if (sps->sps_temporal_mvp_enabled_flag) 8811+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED; 8812+ 8813+ if (sps->sps_strong_intra_smoothing_enable_flag) 8814+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED; 8815+} 8816+ 8817+static void fill_scaling_matrix(const ScalingList * const sl, 8818+ struct v4l2_ctrl_hevc_scaling_matrix * const sm) 8819+{ 8820+ unsigned int i; 8821+ 8822+ for (i = 0; i < 6; i++) { 8823+ unsigned int j; 8824+ 8825+ for (j = 0; j < 16; j++) 8826+ sm->scaling_list_4x4[i][j] = sl->sl[0][i][j]; 8827+ for (j = 0; j < 64; j++) { 8828+ sm->scaling_list_8x8[i][j] = sl->sl[1][i][j]; 8829+ sm->scaling_list_16x16[i][j] = sl->sl[2][i][j]; 8830+ if (i < 2) 8831+ sm->scaling_list_32x32[i][j] = sl->sl[3][i * 3][j]; 8832+ } 8833+ sm->scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i]; 8834+ if (i < 2) 8835+ sm->scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3]; 8836+ } 8837+} 8838+ 8839+static void fill_pps(struct v4l2_ctrl_hevc_pps * const ctrl, const HEVCPPS * const pps) 8840+{ 8841+ uint64_t flags = 0; 8842+ 8843+ if (pps->dependent_slice_segments_enabled_flag) 8844+ flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED; 8845+ 8846+ if (pps->output_flag_present_flag) 8847+ flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT; 8848+ 8849+ if (pps->sign_data_hiding_flag) 8850+ flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED; 8851+ 8852+ if (pps->cabac_init_present_flag) 8853+ flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT; 8854+ 8855+ if (pps->constrained_intra_pred_flag) 8856+ flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED; 8857+ 8858+ if (pps->transform_skip_enabled_flag) 8859+ flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED; 8860+ 8861+ if (pps->cu_qp_delta_enabled_flag) 8862+ flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED; 8863+ 8864+ if (pps->pic_slice_level_chroma_qp_offsets_present_flag) 8865+ flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT; 8866+ 8867+ if (pps->weighted_pred_flag) 8868+ flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED; 8869+ 8870+ if (pps->weighted_bipred_flag) 8871+ flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED; 8872+ 8873+ if (pps->transquant_bypass_enable_flag) 8874+ flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED; 8875+ 8876+ if (pps->tiles_enabled_flag) 8877+ flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED; 8878+ 8879+ if (pps->entropy_coding_sync_enabled_flag) 8880+ flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED; 8881+ 8882+ if (pps->loop_filter_across_tiles_enabled_flag) 8883+ flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED; 8884+ 8885+ if (pps->seq_loop_filter_across_slices_enabled_flag) 8886+ flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED; 8887+ 8888+ if (pps->deblocking_filter_override_enabled_flag) 8889+ flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED; 8890+ 8891+ if (pps->disable_dbf) 8892+ flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER; 8893+ 8894+ if (pps->lists_modification_present_flag) 8895+ flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT; 8896+ 8897+ if (pps->slice_header_extension_present_flag) 8898+ flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT; 8899+ 8900+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ 8901+ *ctrl = (struct v4l2_ctrl_hevc_pps) { 8902+ .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, 8903+ .init_qp_minus26 = pps->pic_init_qp_minus26, 8904+ .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, 8905+ .pps_cb_qp_offset = pps->cb_qp_offset, 8906+ .pps_cr_qp_offset = pps->cr_qp_offset, 8907+ .pps_beta_offset_div2 = pps->beta_offset / 2, 8908+ .pps_tc_offset_div2 = pps->tc_offset / 2, 8909+ .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2, 8910+ .flags = flags 8911+ }; 8912+ 8913+ 8914+ if (pps->tiles_enabled_flag) { 8915+ ctrl->num_tile_columns_minus1 = pps->num_tile_columns - 1; 8916+ ctrl->num_tile_rows_minus1 = pps->num_tile_rows - 1; 8917+ 8918+ for (int i = 0; i < pps->num_tile_columns; i++) 8919+ ctrl->column_width_minus1[i] = pps->column_width[i] - 1; 8920+ 8921+ for (int i = 0; i < pps->num_tile_rows; i++) 8922+ ctrl->row_height_minus1[i] = pps->row_height[i] - 1; 8923+ } 8924+} 8925+ 8926+// Called before finally returning the frame to the user 8927+// Set corrupt flag here as this is actually the frame structure that 8928+// is going to the user (in MT land each thread has its own pool) 8929+static int frame_post_process(void *logctx, AVFrame *frame) 8930+{ 8931+ V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)frame->data[0]; 8932+ 8933+// av_log(NULL, AV_LOG_INFO, "%s\n", __func__); 8934+ frame->flags &= ~AV_FRAME_FLAG_CORRUPT; 8935+ if (rd->qe_dst) { 8936+ MediaBufsStatus stat = qent_dst_wait(rd->qe_dst); 8937+ if (stat != MEDIABUFS_STATUS_SUCCESS) { 8938+ av_log(logctx, AV_LOG_ERROR, "%s: Decode fail\n", __func__); 8939+ frame->flags |= AV_FRAME_FLAG_CORRUPT; 8940+ } 8941+ } 8942+ 8943+ return 0; 8944+} 8945+ 8946+static inline struct timeval cvt_dpb_to_tv(uint64_t t) 8947+{ 8948+ t /= 1000; 8949+ return (struct timeval){ 8950+ .tv_usec = t % 1000000, 8951+ .tv_sec = t / 1000000 8952+ }; 8953+} 8954+ 8955+static inline uint64_t cvt_timestamp_to_dpb(const unsigned int t) 8956+{ 8957+ return (uint64_t)t * 1000; 8958+} 8959+ 8960+static int v4l2_request_hevc_start_frame(AVCodecContext *avctx, 8961+ av_unused const uint8_t *buffer, 8962+ av_unused uint32_t size) 8963+{ 8964+ const HEVCContext *h = avctx->priv_data; 8965+ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0]; 8966+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; 8967+ 8968+// av_log(NULL, AV_LOG_INFO, "%s\n", __func__); 8969+ decode_q_add(&ctx->decode_q, &rd->decode_ent); 8970+ 8971+ rd->num_slices = 0; 8972+ ctx->timestamp++; 8973+ rd->timestamp = cvt_timestamp_to_dpb(ctx->timestamp); 8974+ 8975+ { 8976+ FrameDecodeData * const fdd = (FrameDecodeData*)h->ref->frame->private_ref->data; 8977+ fdd->post_process = frame_post_process; 8978+ } 8979+ 8980+ // qe_dst needs to be bound to the data buffer and only returned when that is 8981+ if (!rd->qe_dst) 8982+ { 8983+ if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) { 8984+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__); 8985+ return AVERROR(ENOMEM); 8986+ } 8987+ } 8988+ 8989+ ff_thread_finish_setup(avctx); // Allow next thread to enter rpi_hevc_start_frame 8990+ 8991+ return 0; 8992+} 8993+ 8994+// Object fd & size will be zapped by this & need setting later 8995+static int drm_from_format(AVDRMFrameDescriptor * const desc, const struct v4l2_format * const format) 8996+{ 8997+ AVDRMLayerDescriptor *layer = &desc->layers[0]; 8998+ unsigned int width; 8999+ unsigned int height; 9000+ unsigned int bpl; 9001+ uint32_t pixelformat; 9002+ 9003+ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { 9004+ width = format->fmt.pix_mp.width; 9005+ height = format->fmt.pix_mp.height; 9006+ pixelformat = format->fmt.pix_mp.pixelformat; 9007+ bpl = format->fmt.pix_mp.plane_fmt[0].bytesperline; 9008+ } 9009+ else { 9010+ width = format->fmt.pix.width; 9011+ height = format->fmt.pix.height; 9012+ pixelformat = format->fmt.pix.pixelformat; 9013+ bpl = format->fmt.pix.bytesperline; 9014+ } 9015+ 9016+ switch (pixelformat) { 9017+ case V4L2_PIX_FMT_NV12: 9018+ layer->format = DRM_FORMAT_NV12; 9019+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; 9020+ break; 9021+#if CONFIG_SAND 9022+ case V4L2_PIX_FMT_NV12_COL128: 9023+ layer->format = DRM_FORMAT_NV12; 9024+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl); 9025+ break; 9026+ case V4L2_PIX_FMT_NV12_10_COL128: 9027+ layer->format = DRM_FORMAT_P030; 9028+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl); 9029+ break; 9030+#endif 9031+#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED 9032+ case V4L2_PIX_FMT_SUNXI_TILED_NV12: 9033+ layer->format = DRM_FORMAT_NV12; 9034+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED; 9035+ break; 9036+#endif 9037+#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15) 9038+ case V4L2_PIX_FMT_NV15: 9039+ layer->format = DRM_FORMAT_NV15; 9040+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; 9041+ break; 9042+#endif 9043+ case V4L2_PIX_FMT_NV16: 9044+ layer->format = DRM_FORMAT_NV16; 9045+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; 9046+ break; 9047+#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20) 9048+ case V4L2_PIX_FMT_NV20: 9049+ layer->format = DRM_FORMAT_NV20; 9050+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; 9051+ break; 9052+#endif 9053+ default: 9054+ return -1; 9055+ } 9056+ 9057+ desc->nb_objects = 1; 9058+ desc->objects[0].fd = -1; 9059+ desc->objects[0].size = 0; 9060+ 9061+ desc->nb_layers = 1; 9062+ layer->nb_planes = 2; 9063+ 9064+ layer->planes[0].object_index = 0; 9065+ layer->planes[0].offset = 0; 9066+ layer->planes[0].pitch = bpl; 9067+#if CONFIG_SAND 9068+ if (pixelformat == V4L2_PIX_FMT_NV12_COL128) { 9069+ layer->planes[1].object_index = 0; 9070+ layer->planes[1].offset = height * 128; 9071+ layer->planes[0].pitch = width; 9072+ layer->planes[1].pitch = width; 9073+ } 9074+ else if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128) { 9075+ layer->planes[1].object_index = 0; 9076+ layer->planes[1].offset = height * 128; 9077+ layer->planes[0].pitch = width * 2; // Lies but it keeps DRM import happy 9078+ layer->planes[1].pitch = width * 2; 9079+ } 9080+ else 9081+#endif 9082+ { 9083+ layer->planes[1].object_index = 0; 9084+ layer->planes[1].offset = layer->planes[0].pitch * height; 9085+ layer->planes[1].pitch = layer->planes[0].pitch; 9086+ } 9087+ 9088+ return 0; 9089+} 9090+ 9091+static int 9092+set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq, 9093+ struct req_controls *const controls, 9094+#if HEVC_CTRLS_VERSION >= 2 9095+ struct v4l2_ctrl_hevc_decode_params * const dec, 9096+#endif 9097+ struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count, 9098+ void * const offsets, const size_t offset_count) 9099+{ 9100+ int rv; 9101+#if HEVC_CTRLS_VERSION >= 2 9102+ unsigned int n = 3; 9103+#else 9104+ unsigned int n = 2; 9105+#endif 9106+ 9107+ struct v4l2_ext_control control[6] = { 9108+ { 9109+ .id = V4L2_CID_STATELESS_HEVC_SPS, 9110+ .ptr = &controls->sps, 9111+ .size = sizeof(controls->sps), 9112+ }, 9113+ { 9114+ .id = V4L2_CID_STATELESS_HEVC_PPS, 9115+ .ptr = &controls->pps, 9116+ .size = sizeof(controls->pps), 9117+ }, 9118+#if HEVC_CTRLS_VERSION >= 2 9119+ { 9120+ .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, 9121+ .ptr = dec, 9122+ .size = sizeof(*dec), 9123+ }, 9124+#endif 9125+ }; 9126+ 9127+ if (slices) 9128+ control[n++] = (struct v4l2_ext_control) { 9129+ .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, 9130+ .ptr = slices, 9131+ .size = sizeof(*slices) * slice_count, 9132+ }; 9133+ 9134+ if (controls->has_scaling) 9135+ control[n++] = (struct v4l2_ext_control) { 9136+ .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, 9137+ .ptr = &controls->scaling_matrix, 9138+ .size = sizeof(controls->scaling_matrix), 9139+ }; 9140+ 9141+#if HEVC_CTRLS_VERSION >= 4 9142+ if (offsets) 9143+ control[n++] = (struct v4l2_ext_control) { 9144+ .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, 9145+ .ptr = offsets, 9146+ .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count, 9147+ }; 9148+#endif 9149+ 9150+ rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n); 9151+ 9152+ return rv; 9153+} 9154+ 9155+// This only works because we started out from a single coded frame buffer 9156+// that will remain intact until after end_frame 9157+static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) 9158+{ 9159+ const HEVCContext * const h = avctx->priv_data; 9160+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; 9161+ V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0]; 9162+ int bcount = get_bits_count(&h->HEVClc->gb); 9163+ uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount; 9164+ 9165+ const unsigned int n = rd->num_slices; 9166+ const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices; 9167+ 9168+ int rv; 9169+ struct slice_info * si; 9170+ 9171+ // This looks dodgy but we know that FFmpeg has parsed this from a buffer 9172+ // that contains the entire frame including the start code 9173+ if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) { 9174+ buffer -= 3; 9175+ size += 3; 9176+ boff += 24; 9177+ if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) { 9178+ av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n", 9179+ buffer[0], buffer[1], buffer[2]); 9180+ } 9181+ } 9182+ 9183+ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) { 9184+ if (rd->slices == NULL) { 9185+ if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL) 9186+ return AVERROR(ENOMEM); 9187+ rd->slices->ptr = buffer; 9188+ rd->num_slices = 1; 9189+ } 9190+ rd->slices->len = buffer - rd->slices->ptr + size; 9191+ return 0; 9192+ } 9193+ 9194+ if ((rv = slice_add(rd)) != 0) 9195+ return rv; 9196+ 9197+ si = rd->slices + n; 9198+ si->ptr = buffer; 9199+ si->len = size; 9200+ si->n_offsets = rd->num_offsets; 9201+ 9202+ if (n != block_start) { 9203+ struct slice_info *const si0 = rd->slices + block_start; 9204+ const size_t offset = (buffer - si0->ptr); 9205+ boff += offset * 8; 9206+ size += offset; 9207+ si0->len = si->len + offset; 9208+ } 9209+ 9210+#if HEVC_CTRLS_VERSION >= 2 9211+ if (n == 0) 9212+ fill_decode_params(h, &rd->dec); 9213+ fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff); 9214+#else 9215+ fill_slice_params(h, rd->slice_params + n, size * 8, boff); 9216+#endif 9217+ if (ctx->max_offsets != 0 && 9218+ (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0) 9219+ return rv; 9220+ 9221+ return 0; 9222+} 9223+ 9224+static void v4l2_request_hevc_abort_frame(AVCodecContext * const avctx) 9225+{ 9226+ const HEVCContext * const h = avctx->priv_data; 9227+ if (h->ref != NULL) { 9228+ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0]; 9229+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; 9230+ 9231+ media_request_abort(&rd->req); 9232+ mediabufs_src_qent_abort(ctx->mbufs, &rd->qe_src); 9233+ 9234+ decode_q_remove(&ctx->decode_q, &rd->decode_ent); 9235+ } 9236+} 9237+ 9238+static int send_slice(AVCodecContext * const avctx, 9239+ V4L2MediaReqDescriptor * const rd, 9240+ struct req_controls *const controls, 9241+ const unsigned int i, const unsigned int j) 9242+{ 9243+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; 9244+ 9245+ const int is_last = (j == rd->num_slices); 9246+ struct slice_info *const si = rd->slices + i; 9247+ struct media_request * req = NULL; 9248+ struct qent_src * src = NULL; 9249+ MediaBufsStatus stat; 9250+ void * offsets = rd->offsets + rd->slices[i].n_offsets; 9251+ size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets; 9252+ 9253+ if ((req = media_request_get(ctx->mpool)) == NULL) { 9254+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__); 9255+ return AVERROR(ENOMEM); 9256+ } 9257+ 9258+ if (set_req_ctls(ctx, req, 9259+ controls, 9260+#if HEVC_CTRLS_VERSION >= 2 9261+ &rd->dec, 9262+#endif 9263+ rd->slice_params + i, j - i, 9264+ offsets, n_offsets)) { 9265+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__); 9266+ goto fail1; 9267+ } 9268+ 9269+ if ((src = mediabufs_src_qent_get(ctx->mbufs)) == NULL) { 9270+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get src buffer\n", __func__); 9271+ goto fail1; 9272+ } 9273+ 9274+ if (qent_src_data_copy(src, 0, si->ptr, si->len, ctx->dbufs) != 0) { 9275+ av_log(avctx, AV_LOG_ERROR, "%s: Failed data copy\n", __func__); 9276+ goto fail2; 9277+ } 9278+ 9279+ if (qent_src_params_set(src, &controls->tv)) { 9280+ av_log(avctx, AV_LOG_ERROR, "%s: Failed src param set\n", __func__); 9281+ goto fail2; 9282+ } 9283+ 9284+ stat = mediabufs_start_request(ctx->mbufs, &req, &src, 9285+ i == 0 ? rd->qe_dst : NULL, 9286+ is_last); 9287+ 9288+ if (stat != MEDIABUFS_STATUS_SUCCESS) { 9289+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__); 9290+ return AVERROR_UNKNOWN; 9291+ } 9292+ return 0; 9293+ 9294+fail2: 9295+ mediabufs_src_qent_abort(ctx->mbufs, &src); 9296+fail1: 9297+ media_request_abort(&req); 9298+ return AVERROR_UNKNOWN; 9299+} 9300+ 9301+static int v4l2_request_hevc_end_frame(AVCodecContext *avctx) 9302+{ 9303+ const HEVCContext * const h = avctx->priv_data; 9304+ V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0]; 9305+ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; 9306+ struct req_controls rc; 9307+ unsigned int i; 9308+ int rv; 9309+ 9310+ // It is possible, though maybe a bug, to get an end_frame without 9311+ // a previous start_frame. If we do then give up. 9312+ if (!decode_q_in_q(&rd->decode_ent)) { 9313+ av_log(avctx, AV_LOG_DEBUG, "%s: Frame not in decode Q\n", __func__); 9314+ return AVERROR_INVALIDDATA; 9315+ } 9316+ 9317+ { 9318+ const ScalingList *sl = h->ps.pps->scaling_list_data_present_flag ? 9319+ &h->ps.pps->scaling_list : 9320+ h->ps.sps->scaling_list_enable_flag ? 9321+ &h->ps.sps->scaling_list : NULL; 9322+ 9323+ 9324+ memset(&rc, 0, sizeof(rc)); 9325+ rc.tv = cvt_dpb_to_tv(rd->timestamp); 9326+ fill_sps(&rc.sps, h->ps.sps); 9327+ fill_pps(&rc.pps, h->ps.pps); 9328+ if (sl) { 9329+ rc.has_scaling = 1; 9330+ fill_scaling_matrix(sl, &rc.scaling_matrix); 9331+ } 9332+ } 9333+ 9334+ decode_q_wait(&ctx->decode_q, &rd->decode_ent); 9335+ 9336+ // qe_dst needs to be bound to the data buffer and only returned when that is 9337+ // Alloc almost certainly wants to be serialised if there is any chance of blocking 9338+ // so we get the next frame to be free in the thread that needs it for decode first. 9339+ // 9340+ // In our current world this probably isn't a concern but put it here anyway 9341+ if (!rd->qe_dst) 9342+ { 9343+ if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) { 9344+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__); 9345+ rv = AVERROR(ENOMEM); 9346+ goto fail; 9347+ } 9348+ } 9349+ 9350+ // Send as slices 9351+ for (i = 0; i < rd->num_slices; i += ctx->max_slices) { 9352+ const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices); 9353+ if ((rv = send_slice(avctx, rd, &rc, i, e)) != 0) 9354+ goto fail; 9355+ } 9356+ 9357+ // Set the drm_prime desriptor 9358+ drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs)); 9359+ rd->drm.objects[0].fd = dmabuf_fd(qent_dst_dmabuf(rd->qe_dst, 0)); 9360+ rd->drm.objects[0].size = dmabuf_size(qent_dst_dmabuf(rd->qe_dst, 0)); 9361+ 9362+ decode_q_remove(&ctx->decode_q, &rd->decode_ent); 9363+ return 0; 9364+ 9365+fail: 9366+ decode_q_remove(&ctx->decode_q, &rd->decode_ent); 9367+ return rv; 9368+} 9369+ 9370+static inline int 9371+ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v) 9372+{ 9373+ return v >= c->minimum && v <= c->maximum; 9374+} 9375+ 9376+// Initial check & init 9377+static int 9378+probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) 9379+{ 9380+ const HEVCContext *h = avctx->priv_data; 9381+ const HEVCSPS * const sps = h->ps.sps; 9382+ struct v4l2_ctrl_hevc_sps ctrl_sps; 9383+ unsigned int i; 9384+ 9385+ // Check for var slice array 9386+ struct v4l2_query_ext_ctrl qc[] = { 9387+ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS }, 9388+ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, 9389+ { .id = V4L2_CID_STATELESS_HEVC_SPS }, 9390+ { .id = V4L2_CID_STATELESS_HEVC_PPS }, 9391+ { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX }, 9392+#if HEVC_CTRLS_VERSION >= 2 9393+ { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS }, 9394+#endif 9395+ }; 9396+ // Order & size must match! 9397+ static const size_t ctrl_sizes[] = { 9398+ sizeof(struct v4l2_ctrl_hevc_slice_params), 9399+ sizeof(int32_t), 9400+ sizeof(struct v4l2_ctrl_hevc_sps), 9401+ sizeof(struct v4l2_ctrl_hevc_pps), 9402+ sizeof(struct v4l2_ctrl_hevc_scaling_matrix), 9403+#if HEVC_CTRLS_VERSION >= 2 9404+ sizeof(struct v4l2_ctrl_hevc_decode_params), 9405+#endif 9406+ }; 9407+ const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc); 9408+ 9409+#if HEVC_CTRLS_VERSION == 2 9410+ if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(5, 18, 0)) 9411+ return AVERROR(EINVAL); 9412+#elif HEVC_CTRLS_VERSION == 3 9413+ if (mediabufs_ctl_driver_version(ctx->mbufs) < MEDIABUFS_DRIVER_VERSION(5, 18, 0)) 9414+ return AVERROR(EINVAL); 9415+#endif 9416+ 9417+ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls); 9418+ i = 0; 9419+#if HEVC_CTRLS_VERSION >= 4 9420+ // Skip slice check if no slice mode 9421+ if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) 9422+ i = 1; 9423+#else 9424+ // Fail frame mode silently for anything prior to V4 9425+ if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) 9426+ return AVERROR(EINVAL); 9427+#endif 9428+ for (; i != noof_ctrls; ++i) { 9429+ if (qc[i].type == 0) { 9430+ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id); 9431+ return AVERROR(EINVAL); 9432+ } 9433+ if (ctrl_sizes[i] != (size_t)qc[i].elem_size) { 9434+ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n", 9435+ HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size); 9436+ return AVERROR(EINVAL); 9437+ } 9438+ } 9439+ 9440+ fill_sps(&ctrl_sps, sps); 9441+ 9442+ if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) { 9443+ av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n"); 9444+ return AVERROR(EINVAL); 9445+ } 9446+ 9447+ return 0; 9448+} 9449+ 9450+// Final init 9451+static int 9452+set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) 9453+{ 9454+ int ret; 9455+ 9456+ struct v4l2_query_ext_ctrl querys[] = { 9457+ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, 9458+ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, 9459+ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, }, 9460+#if HEVC_CTRLS_VERSION >= 4 9461+ { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, }, 9462+#endif 9463+ }; 9464+ 9465+ struct v4l2_ext_control ctrls[] = { 9466+ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, 9467+ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, 9468+ }; 9469+ 9470+ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys)); 9471+ 9472+ ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) || 9473+ querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ? 9474+ 1 : querys[2].dims[0]; 9475+ av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices); 9476+ 9477+#if HEVC_CTRLS_VERSION >= 4 9478+ ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ? 9479+ 0 : querys[3].dims[0]; 9480+ av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets); 9481+#else 9482+ ctx->max_offsets = 0; 9483+#endif 9484+ 9485+ if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED || 9486+ querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) 9487+ ctx->decode_mode = querys[0].default_value; 9488+ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)) 9489+ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED; 9490+ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) 9491+ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED; 9492+ else { 9493+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__); 9494+ return AVERROR(EINVAL); 9495+ } 9496+ 9497+ if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE || 9498+ querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) 9499+ ctx->start_code = querys[1].default_value; 9500+ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)) 9501+ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B; 9502+ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) 9503+ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; 9504+ else { 9505+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__); 9506+ return AVERROR(EINVAL); 9507+ } 9508+ 9509+ // If we are in slice mode & START_CODE_NONE supported then pick that 9510+ // as it doesn't require the slightly dodgy look backwards in our raw buffer 9511+ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED && 9512+ ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) 9513+ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; 9514+ 9515+ ctrls[0].value = ctx->decode_mode; 9516+ ctrls[1].value = ctx->start_code; 9517+ 9518+ ret = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, NULL, ctrls, FF_ARRAY_ELEMS(ctrls)); 9519+ return !ret ? 0 : AVERROR(-ret); 9520+} 9521+ 9522+static void v4l2_req_frame_free(void *opaque, uint8_t *data) 9523+{ 9524+ AVCodecContext *avctx = opaque; 9525+ V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)data; 9526+ 9527+ av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p\n", __func__, avctx, data); 9528+ 9529+ qent_dst_unref(&rd->qe_dst); 9530+ 9531+ // We don't expect req or qe_src to be set 9532+ if (rd->req || rd->qe_src) 9533+ av_log(NULL, AV_LOG_ERROR, "%s: qe_src %p or req %p not NULL\n", __func__, rd->req, rd->qe_src); 9534+ 9535+ av_freep(&rd->slices); 9536+ av_freep(&rd->slice_params); 9537+ av_freep(&rd->offsets); 9538+ 9539+ av_free(rd); 9540+} 9541+ 9542+static AVBufferRef *v4l2_req_frame_alloc(void *opaque, int size) 9543+{ 9544+ AVCodecContext *avctx = opaque; 9545+// V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; 9546+// V4L2MediaReqDescriptor *req; 9547+ AVBufferRef *ref; 9548+ uint8_t *data; 9549+// int ret; 9550+ 9551+ data = av_mallocz(size); 9552+ if (!data) 9553+ return NULL; 9554+ 9555+ av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data); 9556+ ref = av_buffer_create(data, size, v4l2_req_frame_free, avctx, 0); 9557+ if (!ref) { 9558+ av_freep(&data); 9559+ return NULL; 9560+ } 9561+ return ref; 9562+} 9563+ 9564+#if 0 9565+static void v4l2_req_pool_free(void *opaque) 9566+{ 9567+ av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque); 9568+} 9569+ 9570+static void v4l2_req_hwframe_ctx_free(AVHWFramesContext *hwfc) 9571+{ 9572+ av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool); 9573+ 9574+ av_buffer_pool_uninit(&hwfc->pool); 9575+} 9576+#endif 9577+ 9578+static int frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) 9579+{ 9580+ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; 9581+ AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data; 9582+ const struct v4l2_format *vfmt = mediabufs_dst_fmt(ctx->mbufs); 9583+ 9584+ hwfc->format = AV_PIX_FMT_DRM_PRIME; 9585+ hwfc->sw_format = pixel_format_from_format(vfmt); 9586+ if (V4L2_TYPE_IS_MULTIPLANAR(vfmt->type)) { 9587+ hwfc->width = vfmt->fmt.pix_mp.width; 9588+ hwfc->height = vfmt->fmt.pix_mp.height; 9589+ } else { 9590+ hwfc->width = vfmt->fmt.pix.width; 9591+ hwfc->height = vfmt->fmt.pix.height; 9592+ } 9593+#if 0 9594+ hwfc->pool = av_buffer_pool_init2(sizeof(V4L2MediaReqDescriptor), avctx, v4l2_req_frame_alloc, v4l2_req_pool_free); 9595+ if (!hwfc->pool) 9596+ return AVERROR(ENOMEM); 9597+ 9598+ hwfc->free = v4l2_req_hwframe_ctx_free; 9599+ 9600+ hwfc->initial_pool_size = 1; 9601+ 9602+ switch (avctx->codec_id) { 9603+ case AV_CODEC_ID_VP9: 9604+ hwfc->initial_pool_size += 8; 9605+ break; 9606+ case AV_CODEC_ID_VP8: 9607+ hwfc->initial_pool_size += 3; 9608+ break; 9609+ default: 9610+ hwfc->initial_pool_size += 2; 9611+ } 9612+#endif 9613+ av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size); 9614+ 9615+ return 0; 9616+} 9617+ 9618+static int alloc_frame(AVCodecContext * avctx, AVFrame *frame) 9619+{ 9620+ int rv; 9621+ 9622+ frame->buf[0] = v4l2_req_frame_alloc(avctx, sizeof(V4L2MediaReqDescriptor)); 9623+ if (!frame->buf[0]) 9624+ return AVERROR(ENOMEM); 9625+ 9626+ frame->data[0] = frame->buf[0]->data; 9627+ 9628+ frame->hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx); 9629+ 9630+ if ((rv = ff_attach_decode_data(frame)) != 0) { 9631+ av_log(avctx, AV_LOG_ERROR, "Failed to attach decode data to frame\n"); 9632+ av_frame_unref(frame); 9633+ return rv; 9634+ } 9635+ 9636+ return 0; 9637+} 9638+ 9639+const v4l2_req_decode_fns V(ff_v4l2_req_hevc) = { 9640+ .src_pix_fmt_v4l2 = V4L2_PIX_FMT_HEVC_SLICE, 9641+ .name = "V4L2 HEVC stateless V" STR(HEVC_CTRLS_VERSION), 9642+ .probe = probe, 9643+ .set_controls = set_controls, 9644+ 9645+ .start_frame = v4l2_request_hevc_start_frame, 9646+ .decode_slice = v4l2_request_hevc_decode_slice, 9647+ .end_frame = v4l2_request_hevc_end_frame, 9648+ .abort_frame = v4l2_request_hevc_abort_frame, 9649+ .frame_params = frame_params, 9650+ .alloc_frame = alloc_frame, 9651+}; 9652+ 9653--- /dev/null 9654+++ b/libavcodec/v4l2_req_media.c 9655@@ -0,0 +1,1808 @@ 9656+/* 9657+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> 9658+ * 9659+ * Permission is hereby granted, free of charge, to any person obtaining a 9660+ * copy of this software and associated documentation files (the 9661+ * "Software"), to deal in the Software without restriction, including 9662+ * without limitation the rights to use, copy, modify, merge, publish, 9663+ * distribute, sub license, and/or sell copies of the Software, and to 9664+ * permit persons to whom the Software is furnished to do so, subject to 9665+ * the following conditions: 9666+ * 9667+ * The above copyright notice and this permission notice (including the 9668+ * next paragraph) shall be included in all copies or substantial portions 9669+ * of the Software. 9670+ * 9671+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 9672+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 9673+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 9674+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR 9675+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 9676+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 9677+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9678+ */ 9679+ 9680+#include <errno.h> 9681+#include <fcntl.h> 9682+#include <poll.h> 9683+#include <pthread.h> 9684+#include <semaphore.h> 9685+#include <stdatomic.h> 9686+#include <stdbool.h> 9687+#include <stdlib.h> 9688+#include <string.h> 9689+#include <unistd.h> 9690+#include <linux/media.h> 9691+#include <linux/mman.h> 9692+#include <sys/ioctl.h> 9693+#include <sys/select.h> 9694+#include <sys/ioctl.h> 9695+#include <sys/mman.h> 9696+ 9697+#include <linux/videodev2.h> 9698+ 9699+#include "v4l2_req_dmabufs.h" 9700+#include "v4l2_req_media.h" 9701+#include "v4l2_req_pollqueue.h" 9702+#include "v4l2_req_utils.h" 9703+#include "weak_link.h" 9704+ 9705+ 9706+/* floor(log2(x)) */ 9707+static unsigned int log2_size(size_t x) 9708+{ 9709+ unsigned int n = 0; 9710+ 9711+ if (x & ~0xffff) { 9712+ n += 16; 9713+ x >>= 16; 9714+ } 9715+ if (x & ~0xff) { 9716+ n += 8; 9717+ x >>= 8; 9718+ } 9719+ if (x & ~0xf) { 9720+ n += 4; 9721+ x >>= 4; 9722+ } 9723+ if (x & ~3) { 9724+ n += 2; 9725+ x >>= 2; 9726+ } 9727+ return (x & ~1) ? n + 1 : n; 9728+} 9729+ 9730+static size_t round_up_size(const size_t x) 9731+{ 9732+ /* Admit no size < 256 */ 9733+ const unsigned int n = x < 256 ? 8 : log2_size(x) - 1; 9734+ 9735+ return x >= (3 << n) ? 4 << n : (3 << n); 9736+} 9737+ 9738+struct media_request; 9739+ 9740+struct media_pool { 9741+ int fd; 9742+ sem_t sem; 9743+ pthread_mutex_t lock; 9744+ unsigned int pool_n; 9745+ struct media_request * pool_reqs; 9746+ struct media_request * free_reqs; 9747+ struct pollqueue * pq; 9748+}; 9749+ 9750+struct media_request { 9751+ struct media_request * next; 9752+ struct media_pool * mp; 9753+ int fd; 9754+ struct polltask * pt; 9755+}; 9756+ 9757+static inline enum v4l2_memory 9758+mediabufs_memory_to_v4l2(const enum mediabufs_memory m) 9759+{ 9760+ return (enum v4l2_memory)m; 9761+} 9762+ 9763+const char * 9764+mediabufs_memory_name(const enum mediabufs_memory m) 9765+{ 9766+ switch (m) { 9767+ case MEDIABUFS_MEMORY_UNSET: 9768+ return "Unset"; 9769+ case MEDIABUFS_MEMORY_MMAP: 9770+ return "MMap"; 9771+ case MEDIABUFS_MEMORY_USERPTR: 9772+ return "UserPtr"; 9773+ case MEDIABUFS_MEMORY_OVERLAY: 9774+ return "Overlay"; 9775+ case MEDIABUFS_MEMORY_DMABUF: 9776+ return "DMABuf"; 9777+ default: 9778+ break; 9779+ } 9780+ return "Unknown"; 9781+} 9782+ 9783+ 9784+static inline int do_trywait(sem_t *const sem) 9785+{ 9786+ while (sem_trywait(sem)) { 9787+ if (errno != EINTR) 9788+ return -errno; 9789+ } 9790+ return 0; 9791+} 9792+ 9793+static inline int do_wait(sem_t *const sem) 9794+{ 9795+ while (sem_wait(sem)) { 9796+ if (errno != EINTR) 9797+ return -errno; 9798+ } 9799+ return 0; 9800+} 9801+ 9802+static int request_buffers(int video_fd, unsigned int type, 9803+ enum mediabufs_memory memory, unsigned int buffers_count) 9804+{ 9805+ struct v4l2_requestbuffers buffers; 9806+ int rc; 9807+ 9808+ memset(&buffers, 0, sizeof(buffers)); 9809+ buffers.type = type; 9810+ buffers.memory = mediabufs_memory_to_v4l2(memory); 9811+ buffers.count = buffers_count; 9812+ 9813+ rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers); 9814+ if (rc < 0) { 9815+ rc = -errno; 9816+ request_log("Unable to request %d type %d buffers: %s\n", buffers_count, type, strerror(-rc)); 9817+ return rc; 9818+ } 9819+ 9820+ return 0; 9821+} 9822+ 9823+ 9824+static int set_stream(int video_fd, unsigned int type, bool enable) 9825+{ 9826+ enum v4l2_buf_type buf_type = type; 9827+ int rc; 9828+ 9829+ rc = ioctl(video_fd, enable ? VIDIOC_STREAMON : VIDIOC_STREAMOFF, 9830+ &buf_type); 9831+ if (rc < 0) { 9832+ rc = -errno; 9833+ request_log("Unable to %sable stream: %s\n", 9834+ enable ? "en" : "dis", strerror(-rc)); 9835+ return rc; 9836+ } 9837+ 9838+ return 0; 9839+} 9840+ 9841+ 9842+ 9843+struct media_request * media_request_get(struct media_pool * const mp) 9844+{ 9845+ struct media_request *req = NULL; 9846+ 9847+ /* Timeout handled by poll code */ 9848+ if (do_wait(&mp->sem)) 9849+ return NULL; 9850+ 9851+ pthread_mutex_lock(&mp->lock); 9852+ req = mp->free_reqs; 9853+ if (req) { 9854+ mp->free_reqs = req->next; 9855+ req->next = NULL; 9856+ } 9857+ pthread_mutex_unlock(&mp->lock); 9858+ return req; 9859+} 9860+ 9861+int media_request_fd(const struct media_request * const req) 9862+{ 9863+ return req->fd; 9864+} 9865+ 9866+int media_request_start(struct media_request * const req) 9867+{ 9868+ while (ioctl(req->fd, MEDIA_REQUEST_IOC_QUEUE, NULL) == -1) 9869+ { 9870+ const int err = errno; 9871+ if (err == EINTR) 9872+ continue; 9873+ request_log("%s: Failed to Q media: (%d) %s\n", __func__, err, strerror(err)); 9874+ return -err; 9875+ } 9876+ 9877+ pollqueue_add_task(req->pt, 2000); 9878+ return 0; 9879+} 9880+ 9881+static void media_request_done(void *v, short revents) 9882+{ 9883+ struct media_request *const req = v; 9884+ struct media_pool *const mp = req->mp; 9885+ 9886+ /* ** Not sure what to do about timeout */ 9887+ 9888+ if (ioctl(req->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0) 9889+ request_log("Unable to reinit media request: %s\n", 9890+ strerror(errno)); 9891+ 9892+ pthread_mutex_lock(&mp->lock); 9893+ req->next = mp->free_reqs; 9894+ mp->free_reqs = req; 9895+ pthread_mutex_unlock(&mp->lock); 9896+ sem_post(&mp->sem); 9897+} 9898+ 9899+int media_request_abort(struct media_request ** const preq) 9900+{ 9901+ struct media_request * const req = *preq; 9902+ 9903+ if (req == NULL) 9904+ return 0; 9905+ *preq = NULL; 9906+ 9907+ media_request_done(req, 0); 9908+ return 0; 9909+} 9910+ 9911+static void free_req_pool(struct media_request * const pool, const unsigned int n) 9912+{ 9913+ unsigned int i; 9914+ for (i = 0; i != n; ++i) { 9915+ struct media_request * const req = pool + i; 9916+ if (req->pt) 9917+ polltask_delete(&req->pt); 9918+ if (req->fd != -1) 9919+ close(req->fd); 9920+ } 9921+ free(pool); 9922+} 9923+ 9924+struct media_pool * media_pool_new(const char * const media_path, 9925+ struct pollqueue * const pq, 9926+ const unsigned int n) 9927+{ 9928+ struct media_pool * const mp = calloc(1, sizeof(*mp)); 9929+ unsigned int i; 9930+ 9931+ if (!mp) 9932+ goto fail0; 9933+ 9934+ mp->pq = pq; 9935+ pthread_mutex_init(&mp->lock, NULL); 9936+ mp->fd = open(media_path, O_RDWR | O_NONBLOCK); 9937+ if (mp->fd == -1) { 9938+ request_log("Failed to open '%s': %s\n", media_path, strerror(errno)); 9939+ goto fail1; 9940+ } 9941+ 9942+ if ((mp->pool_reqs = calloc(n, sizeof(*mp->pool_reqs))) == NULL) 9943+ goto fail3; 9944+ mp->pool_n = n; 9945+ for (i = 0; i != n; ++i) { 9946+ mp->pool_reqs[i].mp = mp; 9947+ mp->pool_reqs[i].fd = -1; 9948+ } 9949+ 9950+ for (i = 0; i != n; ++i) { 9951+ struct media_request * const req = mp->pool_reqs + i; 9952+ 9953+ if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) { 9954+ request_log("Failed to alloc request %d: %s\n", i, strerror(errno)); 9955+ goto fail4; 9956+ } 9957+ 9958+ req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req); 9959+ if (!req->pt) 9960+ goto fail4; 9961+ 9962+ req->next = mp->free_reqs, 9963+ mp->free_reqs = req; 9964+ } 9965+ 9966+ sem_init(&mp->sem, 0, n); 9967+ 9968+ return mp; 9969+ 9970+fail4: 9971+ free_req_pool(mp->pool_reqs, mp->pool_n); 9972+fail3: 9973+ close(mp->fd); 9974+ pthread_mutex_destroy(&mp->lock); 9975+fail1: 9976+ free(mp); 9977+fail0: 9978+ return NULL; 9979+} 9980+ 9981+void media_pool_delete(struct media_pool ** pMp) 9982+{ 9983+ struct media_pool * const mp = *pMp; 9984+ 9985+ if (!mp) 9986+ return; 9987+ *pMp = NULL; 9988+ 9989+ free_req_pool(mp->pool_reqs, mp->pool_n); 9990+ close(mp->fd); 9991+ sem_destroy(&mp->sem); 9992+ pthread_mutex_destroy(&mp->lock); 9993+ free(mp); 9994+} 9995+ 9996+ 9997+#define INDEX_UNSET (~(uint32_t)0) 9998+ 9999+enum qent_status { 10000+ QENT_NEW = 0, // Initial state - shouldn't last 10001+ QENT_FREE, // On free chain 10002+ QENT_PENDING, // User has ent 10003+ QENT_WAITING, // On inuse 10004+ QENT_DONE, // Frame rx 10005+ QENT_ERROR, // Error 10006+ QENT_IMPORT 10007+}; 10008+ 10009+struct qent_base { 10010+ atomic_int ref_count; 10011+ struct qent_base *next; 10012+ struct qent_base *prev; 10013+ enum qent_status status; 10014+ enum mediabufs_memory memtype; 10015+ uint32_t index; 10016+ struct dmabuf_h *dh[VIDEO_MAX_PLANES]; 10017+ struct timeval timestamp; 10018+}; 10019+ 10020+struct qent_src { 10021+ struct qent_base base; 10022+ int fixed_size; 10023+}; 10024+ 10025+struct qent_dst { 10026+ struct qent_base base; 10027+ bool waiting; 10028+ pthread_mutex_t lock; 10029+ pthread_cond_t cond; 10030+ struct ff_weak_link_client * mbc_wl; 10031+}; 10032+ 10033+struct qe_list_head { 10034+ struct qent_base *head; 10035+ struct qent_base *tail; 10036+}; 10037+ 10038+struct buf_pool { 10039+ enum mediabufs_memory memtype; 10040+ pthread_mutex_t lock; 10041+ sem_t free_sem; 10042+ struct qe_list_head free; 10043+ struct qe_list_head inuse; 10044+}; 10045+ 10046+ 10047+static inline struct qent_dst *base_to_dst(struct qent_base *be) 10048+{ 10049+ return (struct qent_dst *)be; 10050+} 10051+ 10052+static inline struct qent_src *base_to_src(struct qent_base *be) 10053+{ 10054+ return (struct qent_src *)be; 10055+} 10056+ 10057+ 10058+#define QENT_BASE_INITIALIZER(mtype) {\ 10059+ .ref_count = ATOMIC_VAR_INIT(0),\ 10060+ .status = QENT_NEW,\ 10061+ .memtype = (mtype),\ 10062+ .index = INDEX_UNSET\ 10063+} 10064+ 10065+static void qe_base_uninit(struct qent_base *const be) 10066+{ 10067+ unsigned int i; 10068+ for (i = 0; i != VIDEO_MAX_PLANES; ++i) { 10069+ dmabuf_free(be->dh[i]); 10070+ be->dh[i] = NULL; 10071+ } 10072+} 10073+ 10074+static void qe_src_free(struct qent_src *const be_src) 10075+{ 10076+ if (!be_src) 10077+ return; 10078+ qe_base_uninit(&be_src->base); 10079+ free(be_src); 10080+} 10081+ 10082+static struct qent_src * qe_src_new(enum mediabufs_memory mtype) 10083+{ 10084+ struct qent_src *const be_src = malloc(sizeof(*be_src)); 10085+ if (!be_src) 10086+ return NULL; 10087+ *be_src = (struct qent_src){ 10088+ .base = QENT_BASE_INITIALIZER(mtype) 10089+ }; 10090+ return be_src; 10091+} 10092+ 10093+static void qe_dst_free(struct qent_dst *const be_dst) 10094+{ 10095+ if (!be_dst) 10096+ return; 10097+ 10098+ ff_weak_link_unref(&be_dst->mbc_wl); 10099+ pthread_cond_destroy(&be_dst->cond); 10100+ pthread_mutex_destroy(&be_dst->lock); 10101+ qe_base_uninit(&be_dst->base); 10102+ free(be_dst); 10103+} 10104+ 10105+static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl, const enum mediabufs_memory memtype) 10106+{ 10107+ struct qent_dst *const be_dst = malloc(sizeof(*be_dst)); 10108+ if (!be_dst) 10109+ return NULL; 10110+ *be_dst = (struct qent_dst){ 10111+ .base = QENT_BASE_INITIALIZER(memtype), 10112+ .lock = PTHREAD_MUTEX_INITIALIZER, 10113+ .cond = PTHREAD_COND_INITIALIZER, 10114+ .mbc_wl = ff_weak_link_ref(wl) 10115+ }; 10116+ return be_dst; 10117+} 10118+ 10119+static void ql_add_tail(struct qe_list_head * const ql, struct qent_base * be) 10120+{ 10121+ if (ql->tail) 10122+ ql->tail->next = be; 10123+ else 10124+ ql->head = be; 10125+ be->prev = ql->tail; 10126+ be->next = NULL; 10127+ ql->tail = be; 10128+} 10129+ 10130+static struct qent_base * ql_extract(struct qe_list_head * const ql, struct qent_base * be) 10131+{ 10132+ if (!be) 10133+ return NULL; 10134+ 10135+ if (be->next) 10136+ be->next->prev = be->prev; 10137+ else 10138+ ql->tail = be->prev; 10139+ if (be->prev) 10140+ be->prev->next = be->next; 10141+ else 10142+ ql->head = be->next; 10143+ be->next = NULL; 10144+ be->prev = NULL; 10145+ return be; 10146+} 10147+ 10148+ 10149+static void bq_put_free(struct buf_pool *const bp, struct qent_base * be) 10150+{ 10151+ ql_add_tail(&bp->free, be); 10152+} 10153+ 10154+static struct qent_base * bq_get_free(struct buf_pool *const bp) 10155+{ 10156+ return ql_extract(&bp->free, bp->free.head); 10157+} 10158+ 10159+static struct qent_base * bq_extract_inuse(struct buf_pool *const bp, struct qent_base *const be) 10160+{ 10161+ return ql_extract(&bp->inuse, be); 10162+} 10163+ 10164+static struct qent_base * bq_get_inuse(struct buf_pool *const bp) 10165+{ 10166+ return ql_extract(&bp->inuse, bp->inuse.head); 10167+} 10168+ 10169+static void bq_free_all_free_src(struct buf_pool *const bp) 10170+{ 10171+ struct qent_base *be; 10172+ while ((be = bq_get_free(bp)) != NULL) 10173+ qe_src_free(base_to_src(be)); 10174+} 10175+ 10176+static void bq_free_all_inuse_src(struct buf_pool *const bp) 10177+{ 10178+ struct qent_base *be; 10179+ while ((be = bq_get_inuse(bp)) != NULL) 10180+ qe_src_free(base_to_src(be)); 10181+} 10182+ 10183+static void bq_free_all_free_dst(struct buf_pool *const bp) 10184+{ 10185+ struct qent_base *be; 10186+ while ((be = bq_get_free(bp)) != NULL) 10187+ qe_dst_free(base_to_dst(be)); 10188+} 10189+ 10190+static void queue_put_free(struct buf_pool *const bp, struct qent_base *be) 10191+{ 10192+ unsigned int i; 10193+ 10194+ pthread_mutex_lock(&bp->lock); 10195+ /* Clear out state vars */ 10196+ be->timestamp.tv_sec = 0; 10197+ be->timestamp.tv_usec = 0; 10198+ be->status = QENT_FREE; 10199+ for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) 10200+ dmabuf_len_set(be->dh[i], 0); 10201+ bq_put_free(bp, be); 10202+ pthread_mutex_unlock(&bp->lock); 10203+ sem_post(&bp->free_sem); 10204+} 10205+ 10206+static bool queue_is_inuse(const struct buf_pool *const bp) 10207+{ 10208+ return bp->inuse.tail != NULL; 10209+} 10210+ 10211+static void queue_put_inuse(struct buf_pool *const bp, struct qent_base *be) 10212+{ 10213+ if (!be) 10214+ return; 10215+ pthread_mutex_lock(&bp->lock); 10216+ ql_add_tail(&bp->inuse, be); 10217+ be->status = QENT_WAITING; 10218+ pthread_mutex_unlock(&bp->lock); 10219+} 10220+ 10221+static struct qent_base *queue_get_free(struct buf_pool *const bp) 10222+{ 10223+ struct qent_base *buf; 10224+ 10225+ if (do_wait(&bp->free_sem)) 10226+ return NULL; 10227+ pthread_mutex_lock(&bp->lock); 10228+ buf = bq_get_free(bp); 10229+ pthread_mutex_unlock(&bp->lock); 10230+ return buf; 10231+} 10232+ 10233+static struct qent_base *queue_tryget_free(struct buf_pool *const bp) 10234+{ 10235+ struct qent_base *buf; 10236+ 10237+ if (do_trywait(&bp->free_sem)) 10238+ return NULL; 10239+ pthread_mutex_lock(&bp->lock); 10240+ buf = bq_get_free(bp); 10241+ pthread_mutex_unlock(&bp->lock); 10242+ return buf; 10243+} 10244+ 10245+static struct qent_base * queue_find_extract_index(struct buf_pool *const bp, const unsigned int index) 10246+{ 10247+ struct qent_base *be; 10248+ 10249+ pthread_mutex_lock(&bp->lock); 10250+ /* Expect 1st in Q, but allow anywhere */ 10251+ for (be = bp->inuse.head; be; be = be->next) { 10252+ if (be->index == index) { 10253+ bq_extract_inuse(bp, be); 10254+ break; 10255+ } 10256+ } 10257+ pthread_mutex_unlock(&bp->lock); 10258+ 10259+ return be; 10260+} 10261+ 10262+static void queue_delete(struct buf_pool *const bp) 10263+{ 10264+ sem_destroy(&bp->free_sem); 10265+ pthread_mutex_destroy(&bp->lock); 10266+ free(bp); 10267+} 10268+ 10269+static struct buf_pool* queue_new(const int vfd) 10270+{ 10271+ struct buf_pool *bp = calloc(1, sizeof(*bp)); 10272+ if (!bp) 10273+ return NULL; 10274+ pthread_mutex_init(&bp->lock, NULL); 10275+ sem_init(&bp->free_sem, 0, 0); 10276+ return bp; 10277+} 10278+ 10279+ 10280+struct mediabufs_ctl { 10281+ atomic_int ref_count; /* 0 is single ref for easier atomics */ 10282+ void * dc; 10283+ int vfd; 10284+ bool stream_on; 10285+ bool polling; 10286+ bool dst_fixed; // Dst Q is fixed size 10287+ pthread_mutex_t lock; 10288+ struct buf_pool * src; 10289+ struct buf_pool * dst; 10290+ struct polltask * pt; 10291+ struct pollqueue * pq; 10292+ struct ff_weak_link_master * this_wlm; 10293+ 10294+ enum mediabufs_memory src_memtype; 10295+ enum mediabufs_memory dst_memtype; 10296+ struct v4l2_format src_fmt; 10297+ struct v4l2_format dst_fmt; 10298+ struct v4l2_capability capability; 10299+}; 10300+ 10301+static int qe_v4l2_queue(struct qent_base *const be, 10302+ const int vfd, struct media_request *const mreq, 10303+ const struct v4l2_format *const fmt, 10304+ const bool is_dst, const bool hold_flag) 10305+{ 10306+ struct v4l2_buffer buffer = { 10307+ .type = fmt->type, 10308+ .memory = mediabufs_memory_to_v4l2(be->memtype), 10309+ .index = be->index 10310+ }; 10311+ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; 10312+ 10313+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { 10314+ unsigned int i; 10315+ for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) { 10316+ if (is_dst) 10317+ dmabuf_len_set(be->dh[i], 0); 10318+ 10319+ /* *** Really need a pixdesc rather than a format so we can fill in data_offset */ 10320+ planes[i].length = dmabuf_size(be->dh[i]); 10321+ planes[i].bytesused = dmabuf_len(be->dh[i]); 10322+ if (be->memtype == MEDIABUFS_MEMORY_DMABUF) 10323+ planes[i].m.fd = dmabuf_fd(be->dh[i]); 10324+ else 10325+ planes[i].m.mem_offset = 0; 10326+ } 10327+ buffer.m.planes = planes; 10328+ buffer.length = i; 10329+ } 10330+ else { 10331+ if (is_dst) 10332+ dmabuf_len_set(be->dh[0], 0); 10333+ 10334+ buffer.bytesused = dmabuf_len(be->dh[0]); 10335+ buffer.length = dmabuf_size(be->dh[0]); 10336+ if (be->memtype == MEDIABUFS_MEMORY_DMABUF) 10337+ buffer.m.fd = dmabuf_fd(be->dh[0]); 10338+ else 10339+ buffer.m.offset = 0; 10340+ } 10341+ 10342+ if (!is_dst && mreq) { 10343+ buffer.flags |= V4L2_BUF_FLAG_REQUEST_FD; 10344+ buffer.request_fd = media_request_fd(mreq); 10345+ if (hold_flag) 10346+ buffer.flags |= V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF; 10347+ } 10348+ 10349+ if (is_dst) 10350+ be->timestamp = (struct timeval){0,0}; 10351+ 10352+ buffer.timestamp = be->timestamp; 10353+ 10354+ while (ioctl(vfd, VIDIOC_QBUF, &buffer)) { 10355+ const int err = errno; 10356+ if (err != EINTR) { 10357+ request_log("%s: Failed to Q buffer: err=%d (%s)\n", __func__, err, strerror(err)); 10358+ return -err; 10359+ } 10360+ } 10361+ return 0; 10362+} 10363+ 10364+static struct qent_base * qe_dequeue(struct buf_pool *const bp, 10365+ const int vfd, 10366+ const struct v4l2_format * const f) 10367+{ 10368+ struct qent_base *be; 10369+ int rc; 10370+ const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type); 10371+ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; 10372+ struct v4l2_buffer buffer = { 10373+ .type = f->type, 10374+ .memory = mediabufs_memory_to_v4l2(bp->memtype) 10375+ }; 10376+ if (mp) { 10377+ buffer.length = f->fmt.pix_mp.num_planes; 10378+ buffer.m.planes = planes; 10379+ } 10380+ 10381+ while ((rc = ioctl(vfd, VIDIOC_DQBUF, &buffer)) != 0 && 10382+ errno == EINTR) 10383+ /* Loop */; 10384+ if (rc) { 10385+ request_log("Error DQing buffer type %d: %s\n", f->type, strerror(errno)); 10386+ return NULL; 10387+ } 10388+ 10389+ be = queue_find_extract_index(bp, buffer.index); 10390+ if (!be) { 10391+ request_log("Failed to find index %d in Q\n", buffer.index); 10392+ return NULL; 10393+ } 10394+ 10395+ if (mp) { 10396+ unsigned int i; 10397+ for (i = 0; i != buffer.length; ++i) 10398+ dmabuf_len_set(be->dh[i], V4L2_TYPE_IS_CAPTURE(f->type) ? planes[i].bytesused : 0); 10399+ } 10400+ else 10401+ dmabuf_len_set(be->dh[0], V4L2_TYPE_IS_CAPTURE(f->type) ? buffer.length : 0); 10402+ 10403+ be->timestamp = buffer.timestamp; 10404+ be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE; 10405+ return be; 10406+} 10407+ 10408+static void qe_dst_done(struct qent_dst * dst_be) 10409+{ 10410+ pthread_mutex_lock(&dst_be->lock); 10411+ dst_be->waiting = false; 10412+ pthread_cond_broadcast(&dst_be->cond); 10413+ pthread_mutex_unlock(&dst_be->lock); 10414+ 10415+ qent_dst_unref(&dst_be); 10416+} 10417+ 10418+static bool qe_dst_waiting(struct qent_dst *const dst_be) 10419+{ 10420+ bool waiting; 10421+ pthread_mutex_lock(&dst_be->lock); 10422+ waiting = dst_be->waiting; 10423+ dst_be->waiting = true; 10424+ pthread_mutex_unlock(&dst_be->lock); 10425+ return waiting; 10426+} 10427+ 10428+ 10429+static bool mediabufs_wants_poll(const struct mediabufs_ctl *const mbc) 10430+{ 10431+ return queue_is_inuse(mbc->src) || queue_is_inuse(mbc->dst); 10432+} 10433+ 10434+static void mediabufs_poll_cb(void * v, short revents) 10435+{ 10436+ struct mediabufs_ctl *mbc = v; 10437+ struct qent_src *src_be = NULL; 10438+ struct qent_dst *dst_be = NULL; 10439+ 10440+ if (!revents) 10441+ request_err(mbc->dc, "%s: Timeout\n", __func__); 10442+ 10443+ pthread_mutex_lock(&mbc->lock); 10444+ mbc->polling = false; 10445+ 10446+ if ((revents & POLLOUT) != 0) 10447+ src_be = base_to_src(qe_dequeue(mbc->src, mbc->vfd, &mbc->src_fmt)); 10448+ if ((revents & POLLIN) != 0) 10449+ dst_be = base_to_dst(qe_dequeue(mbc->dst, mbc->vfd, &mbc->dst_fmt)); 10450+ 10451+ /* Reschedule */ 10452+ if (mediabufs_wants_poll(mbc)) { 10453+ mbc->polling = true; 10454+ pollqueue_add_task(mbc->pt, 2000); 10455+ } 10456+ pthread_mutex_unlock(&mbc->lock); 10457+ 10458+ if (src_be) 10459+ queue_put_free(mbc->src, &src_be->base); 10460+ if (dst_be) 10461+ qe_dst_done(dst_be); 10462+} 10463+ 10464+int qent_src_params_set(struct qent_src *const be_src, const struct timeval * timestamp) 10465+{ 10466+ struct qent_base *const be = &be_src->base; 10467+ 10468+ be->timestamp = *timestamp; 10469+ return 0; 10470+} 10471+ 10472+struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst) 10473+{ 10474+ return be_dst->base.timestamp; 10475+} 10476+ 10477+static int qent_base_realloc(struct qent_base *const be, const size_t len, struct dmabufs_ctl * dbsc) 10478+{ 10479+ if (!be->dh[0] || len > dmabuf_size(be->dh[0])) { 10480+ size_t newsize = round_up_size(len); 10481+ request_log("%s: Overrun %zd > %zd; trying %zd\n", __func__, len, dmabuf_size(be->dh[0]), newsize); 10482+ if (!dbsc) { 10483+ request_log("%s: No dmbabuf_ctrl for realloc\n", __func__); 10484+ return -ENOMEM; 10485+ } 10486+ if ((be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], newsize)) == NULL) { 10487+ request_log("%s: Realloc %zd failed\n", __func__, newsize); 10488+ return -ENOMEM; 10489+ } 10490+ } 10491+ return 0; 10492+} 10493+ 10494+int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc) 10495+{ 10496+ struct qent_base *const be = &be_src->base; 10497+ return qent_base_realloc(be, len, dbsc); 10498+} 10499+ 10500+ 10501+int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc) 10502+{ 10503+ void * dst; 10504+ struct qent_base *const be = &be_src->base; 10505+ int rv; 10506+ 10507+ // Realloc doesn't copy so don't alloc if offset != 0 10508+ if ((rv = qent_base_realloc(be, offset + len, 10509+ be_src->fixed_size || offset ? NULL : dbsc)) != 0) 10510+ return rv; 10511+ 10512+ dmabuf_write_start(be->dh[0]); 10513+ dst = dmabuf_map(be->dh[0]); 10514+ if (!dst) 10515+ return -1; 10516+ memcpy((char*)dst + offset, src, len); 10517+ dmabuf_len_set(be->dh[0], len); 10518+ dmabuf_write_end(be->dh[0]); 10519+ return 0; 10520+} 10521+ 10522+const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be_dst, unsigned int plane) 10523+{ 10524+ const struct qent_base *const be = &be_dst->base; 10525+ 10526+ return (plane >= sizeof(be->dh)/sizeof(be->dh[0])) ? NULL : be->dh[plane]; 10527+} 10528+ 10529+int qent_dst_dup_fd(const struct qent_dst *const be_dst, unsigned int plane) 10530+{ 10531+ return dup(dmabuf_fd(qent_dst_dmabuf(be_dst, plane))); 10532+} 10533+ 10534+MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc, 10535+ struct media_request **const pmreq, 10536+ struct qent_src **const psrc_be, 10537+ struct qent_dst *const dst_be, 10538+ const bool is_final) 10539+{ 10540+ struct media_request * mreq = *pmreq; 10541+ struct qent_src *const src_be = *psrc_be; 10542+ 10543+ // Req & src are always both "consumed" 10544+ *pmreq = NULL; 10545+ *psrc_be = NULL; 10546+ 10547+ pthread_mutex_lock(&mbc->lock); 10548+ 10549+ if (!src_be) 10550+ goto fail1; 10551+ 10552+ if (dst_be) { 10553+ if (qe_dst_waiting(dst_be)) { 10554+ request_info(mbc->dc, "Request buffer already waiting on start\n"); 10555+ goto fail1; 10556+ } 10557+ dst_be->base.timestamp = (struct timeval){0,0}; 10558+ if (qe_v4l2_queue(&dst_be->base, mbc->vfd, NULL, &mbc->dst_fmt, true, false)) 10559+ goto fail1; 10560+ 10561+ qent_dst_ref(dst_be); 10562+ queue_put_inuse(mbc->dst, &dst_be->base); 10563+ } 10564+ 10565+ if (qe_v4l2_queue(&src_be->base, mbc->vfd, mreq, &mbc->src_fmt, false, !is_final)) 10566+ goto fail1; 10567+ queue_put_inuse(mbc->src, &src_be->base); 10568+ 10569+ if (!mbc->polling && mediabufs_wants_poll(mbc)) { 10570+ mbc->polling = true; 10571+ pollqueue_add_task(mbc->pt, 2000); 10572+ } 10573+ pthread_mutex_unlock(&mbc->lock); 10574+ 10575+ if (media_request_start(mreq)) 10576+ return MEDIABUFS_ERROR_OPERATION_FAILED; 10577+ 10578+ return MEDIABUFS_STATUS_SUCCESS; 10579+ 10580+fail1: 10581+ media_request_abort(&mreq); 10582+ if (src_be) 10583+ queue_put_free(mbc->src, &src_be->base); 10584+ 10585+// *** TODO: If src Q fails this doesnt unwind properly - separate dst Q from src Q 10586+ if (dst_be) { 10587+ dst_be->base.status = QENT_ERROR; 10588+ qe_dst_done(dst_be); 10589+ } 10590+ pthread_mutex_unlock(&mbc->lock); 10591+ return MEDIABUFS_ERROR_OPERATION_FAILED; 10592+} 10593+ 10594+ 10595+static int qe_alloc_from_fmt(struct qent_base *const be, 10596+ struct dmabufs_ctl *const dbsc, 10597+ const struct v4l2_format *const fmt) 10598+{ 10599+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { 10600+ unsigned int i; 10601+ for (i = 0; i != fmt->fmt.pix_mp.num_planes; ++i) { 10602+ be->dh[i] = dmabuf_realloc(dbsc, be->dh[i], 10603+ fmt->fmt.pix_mp.plane_fmt[i].sizeimage); 10604+ /* On failure tidy up and die */ 10605+ if (!be->dh[i]) { 10606+ while (i--) { 10607+ dmabuf_free(be->dh[i]); 10608+ be->dh[i] = NULL; 10609+ } 10610+ return -1; 10611+ } 10612+ } 10613+ } 10614+ else { 10615+// be->dh[0] = dmabuf_alloc(dbsc, fmt->fmt.pix.sizeimage); 10616+ size_t size = fmt->fmt.pix.sizeimage; 10617+ be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], size); 10618+ if (!be->dh[0]) 10619+ return -1; 10620+ } 10621+ return 0; 10622+} 10623+ 10624+static MediaBufsStatus fmt_set(struct v4l2_format *const fmt, const int fd, 10625+ const enum v4l2_buf_type buftype, 10626+ uint32_t pixfmt, 10627+ const unsigned int width, const unsigned int height, 10628+ const size_t bufsize) 10629+{ 10630+ *fmt = (struct v4l2_format){.type = buftype}; 10631+ 10632+ if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) { 10633+ fmt->fmt.pix_mp.width = width; 10634+ fmt->fmt.pix_mp.height = height; 10635+ fmt->fmt.pix_mp.pixelformat = pixfmt; 10636+ if (bufsize) { 10637+ fmt->fmt.pix_mp.num_planes = 1; 10638+ fmt->fmt.pix_mp.plane_fmt[0].sizeimage = bufsize; 10639+ } 10640+ } 10641+ else { 10642+ fmt->fmt.pix.width = width; 10643+ fmt->fmt.pix.height = height; 10644+ fmt->fmt.pix.pixelformat = pixfmt; 10645+ fmt->fmt.pix.sizeimage = bufsize; 10646+ } 10647+ 10648+ while (ioctl(fd, VIDIOC_S_FMT, fmt)) 10649+ if (errno != EINTR) 10650+ return MEDIABUFS_ERROR_OPERATION_FAILED; 10651+ 10652+ // Treat anything where we don't get at least what we asked for as a fail 10653+ if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) { 10654+ if (fmt->fmt.pix_mp.width < width || 10655+ fmt->fmt.pix_mp.height < height || 10656+ fmt->fmt.pix_mp.pixelformat != pixfmt) { 10657+ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; 10658+ } 10659+ } 10660+ else { 10661+ if (fmt->fmt.pix.width < width || 10662+ fmt->fmt.pix.height < height || 10663+ fmt->fmt.pix.pixelformat != pixfmt) { 10664+ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; 10665+ } 10666+ } 10667+ 10668+ return MEDIABUFS_STATUS_SUCCESS; 10669+} 10670+ 10671+static MediaBufsStatus find_fmt_flags(struct v4l2_format *const fmt, 10672+ const int fd, 10673+ const unsigned int type_v4l2, 10674+ const uint32_t flags_must, 10675+ const uint32_t flags_not, 10676+ const unsigned int width, 10677+ const unsigned int height, 10678+ mediabufs_dst_fmt_accept_fn *const accept_fn, 10679+ void *const accept_v) 10680+{ 10681+ unsigned int i; 10682+ 10683+ for (i = 0;; ++i) { 10684+ struct v4l2_fmtdesc fmtdesc = { 10685+ .index = i, 10686+ .type = type_v4l2 10687+ }; 10688+ while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) { 10689+ if (errno != EINTR) 10690+ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; 10691+ } 10692+ if ((fmtdesc.flags & flags_must) != flags_must || 10693+ (fmtdesc.flags & flags_not)) 10694+ continue; 10695+ if (!accept_fn(accept_v, &fmtdesc)) 10696+ continue; 10697+ 10698+ if (fmt_set(fmt, fd, fmtdesc.type, fmtdesc.pixelformat, 10699+ width, height, 0) == MEDIABUFS_STATUS_SUCCESS) 10700+ return MEDIABUFS_STATUS_SUCCESS; 10701+ } 10702+ return 0; 10703+} 10704+ 10705+ 10706+/* Wait for qent done */ 10707+ 10708+MediaBufsStatus qent_dst_wait(struct qent_dst *const be_dst) 10709+{ 10710+ struct qent_base *const be = &be_dst->base; 10711+ enum qent_status estat; 10712+ 10713+ pthread_mutex_lock(&be_dst->lock); 10714+ while (be_dst->waiting && 10715+ !pthread_cond_wait(&be_dst->cond, &be_dst->lock)) 10716+ /* Loop */; 10717+ estat = be->status; 10718+ pthread_mutex_unlock(&be_dst->lock); 10719+ 10720+ return estat == QENT_DONE ? MEDIABUFS_STATUS_SUCCESS : 10721+ estat == QENT_ERROR ? MEDIABUFS_ERROR_DECODING_ERROR : 10722+ MEDIABUFS_ERROR_OPERATION_FAILED; 10723+} 10724+ 10725+const uint8_t * qent_dst_data(struct qent_dst *const be_dst, unsigned int buf_no) 10726+{ 10727+ struct qent_base *const be = &be_dst->base; 10728+ return dmabuf_map(be->dh[buf_no]); 10729+} 10730+ 10731+MediaBufsStatus qent_dst_read_start(struct qent_dst *const be_dst) 10732+{ 10733+ struct qent_base *const be = &be_dst->base; 10734+ unsigned int i; 10735+ for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) { 10736+ if (dmabuf_read_start(be->dh[i])) { 10737+ while (i--) 10738+ dmabuf_read_end(be->dh[i]); 10739+ return MEDIABUFS_ERROR_ALLOCATION_FAILED; 10740+ } 10741+ } 10742+ return MEDIABUFS_STATUS_SUCCESS; 10743+} 10744+ 10745+MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be_dst) 10746+{ 10747+ struct qent_base *const be = &be_dst->base; 10748+ unsigned int i; 10749+ MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS; 10750+ 10751+ for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) { 10752+ if (dmabuf_read_end(be->dh[i])) 10753+ status = MEDIABUFS_ERROR_OPERATION_FAILED; 10754+ } 10755+ return status; 10756+} 10757+ 10758+struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst) 10759+{ 10760+ if (be_dst) 10761+ atomic_fetch_add(&be_dst->base.ref_count, 1); 10762+ return be_dst; 10763+} 10764+ 10765+void qent_dst_unref(struct qent_dst ** const pbe_dst) 10766+{ 10767+ struct qent_dst * const be_dst = *pbe_dst; 10768+ struct mediabufs_ctl * mbc; 10769+ if (!be_dst) 10770+ return; 10771+ *pbe_dst = NULL; 10772+ 10773+ if (atomic_fetch_sub(&be_dst->base.ref_count, 1) != 0) 10774+ return; 10775+ 10776+ if ((mbc = ff_weak_link_lock(&be_dst->mbc_wl)) != NULL) { 10777+ queue_put_free(mbc->dst, &be_dst->base); 10778+ ff_weak_link_unlock(be_dst->mbc_wl); 10779+ } 10780+ else { 10781+ qe_dst_free(be_dst); 10782+ } 10783+} 10784+ 10785+MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst, 10786+ unsigned int plane, 10787+ int fd, size_t size) 10788+{ 10789+ struct qent_base *const be = &be_dst->base; 10790+ struct dmabuf_h * dh; 10791+ 10792+ if (be->status != QENT_IMPORT || be->dh[plane]) 10793+ return MEDIABUFS_ERROR_OPERATION_FAILED; 10794+ 10795+ dh = dmabuf_import(fd, size); 10796+ if (!dh) 10797+ return MEDIABUFS_ERROR_ALLOCATION_FAILED; 10798+ 10799+ be->dh[plane] = dh; 10800+ return MEDIABUFS_STATUS_SUCCESS; 10801+} 10802+ 10803+// Returns noof buffers created, -ve for error 10804+static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, struct qent_dst * const qes[]) 10805+{ 10806+ unsigned int i; 10807+ 10808+ struct v4l2_create_buffers cbuf = { 10809+ .count = n, 10810+ .memory = mediabufs_memory_to_v4l2(mbc->dst->memtype), 10811+ .format = mbc->dst_fmt, 10812+ }; 10813+ 10814+ while (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf)) { 10815+ const int err = -errno; 10816+ if (err != EINTR) { 10817+ request_err(mbc->dc, "%s: Failed to create V4L2 buffer\n", __func__); 10818+ return -err; 10819+ } 10820+ } 10821+ 10822+ if (cbuf.count != n) 10823+ request_warn(mbc->dc, "%s: Created %d of %d V4L2 buffers requested\n", __func__, cbuf.count, n); 10824+ 10825+ for (i = 0; i != cbuf.count; ++i) 10826+ qes[i]->base.index = cbuf.index + i; 10827+ 10828+ return cbuf.count; 10829+} 10830+ 10831+static MediaBufsStatus 10832+qe_import_from_buf(struct mediabufs_ctl *const mbc, struct qent_base * const be, const struct v4l2_format *const fmt, 10833+ const unsigned int n, const bool x_dmabuf) 10834+{ 10835+ struct v4l2_buffer buf = { 10836+ .index = n, 10837+ .type = fmt->type, 10838+ }; 10839+ struct v4l2_plane planes[VIDEO_MAX_PLANES]; 10840+ int ret; 10841+ 10842+ if (be->dh[0]) 10843+ return 0; 10844+ 10845+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { 10846+ memset(planes, 0, sizeof(planes)); 10847+ buf.m.planes = planes; 10848+ buf.length = VIDEO_MAX_PLANES; 10849+ } 10850+ 10851+ if ((ret = ioctl(mbc->vfd, VIDIOC_QUERYBUF, &buf)) != 0) { 10852+ request_err(mbc->dc, "VIDIOC_QUERYBUF failed"); 10853+ return MEDIABUFS_ERROR_OPERATION_FAILED; 10854+ } 10855+ 10856+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) 10857+ { 10858+ unsigned int i; 10859+ for (i = 0; i != buf.length; ++i) { 10860+ if (x_dmabuf) { 10861+ struct v4l2_exportbuffer xbuf = { 10862+ .type = buf.type, 10863+ .index = buf.index, 10864+ .plane = i, 10865+ .flags = O_RDWR, // *** Arguably O_RDONLY would be fine 10866+ }; 10867+ if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) { 10868+ be->dh[i] = dmabuf_import(xbuf.fd, planes[i].length); 10869+ close(xbuf.fd); // dmabuf_import dups the fd so close this one 10870+ } 10871+ } 10872+ else { 10873+ be->dh[i] = dmabuf_import_mmap( 10874+ mmap(NULL, planes[i].length, 10875+ PROT_READ | PROT_WRITE, 10876+ MAP_SHARED | MAP_POPULATE, 10877+ mbc->vfd, planes[i].m.mem_offset), 10878+ planes[i].length); 10879+ } 10880+ /* On failure tidy up and die */ 10881+ if (!be->dh[i]) { 10882+ while (i--) { 10883+ dmabuf_free(be->dh[i]); 10884+ be->dh[i] = NULL; 10885+ } 10886+ return MEDIABUFS_ERROR_OPERATION_FAILED; 10887+ } 10888+ } 10889+ } 10890+ else 10891+ { 10892+ if (x_dmabuf) { 10893+ struct v4l2_exportbuffer xbuf = { 10894+ .type = buf.type, 10895+ .index = buf.index, 10896+ .flags = O_RDWR, // *** Arguably O_RDONLY would be fine 10897+ }; 10898+ if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) 10899+ be->dh[0] = dmabuf_import(xbuf.fd, buf.length); 10900+ } 10901+ else { 10902+ be->dh[0] = dmabuf_import_mmap( 10903+ mmap(NULL, buf.length, 10904+ PROT_READ | PROT_WRITE, 10905+ MAP_SHARED | MAP_POPULATE, 10906+ mbc->vfd, buf.m.offset), 10907+ buf.length); 10908+ } 10909+ /* On failure tidy up and die */ 10910+ if (!be->dh[0]) { 10911+ return MEDIABUFS_ERROR_OPERATION_FAILED; 10912+ } 10913+ } 10914+ 10915+ return 0; 10916+} 10917+ 10918+struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc) 10919+{ 10920+ struct qent_dst * be_dst; 10921+ 10922+ if (mbc == NULL) { 10923+ be_dst = qe_dst_new(NULL, MEDIABUFS_MEMORY_DMABUF); 10924+ if (be_dst) 10925+ be_dst->base.status = QENT_IMPORT; 10926+ return be_dst; 10927+ } 10928+ 10929+ if (mbc->dst_fixed) { 10930+ be_dst = base_to_dst(queue_get_free(mbc->dst)); 10931+ if (!be_dst) 10932+ return NULL; 10933+ } 10934+ else { 10935+ be_dst = base_to_dst(queue_tryget_free(mbc->dst)); 10936+ if (!be_dst) { 10937+ be_dst = qe_dst_new(mbc->this_wlm, mbc->dst->memtype); 10938+ if (!be_dst) 10939+ return NULL; 10940+ 10941+ if (create_dst_bufs(mbc, 1, &be_dst) != 1) { 10942+ qe_dst_free(be_dst); 10943+ return NULL; 10944+ } 10945+ } 10946+ } 10947+ 10948+ if (mbc->dst->memtype == MEDIABUFS_MEMORY_MMAP) { 10949+ if (qe_import_from_buf(mbc, &be_dst->base, &mbc->dst_fmt, be_dst->base.index, true)) { 10950+ request_err(mbc->dc, "Failed to export as dmabuf\n"); 10951+ queue_put_free(mbc->dst, &be_dst->base); 10952+ return NULL; 10953+ } 10954+ } 10955+ else { 10956+ if (qe_alloc_from_fmt(&be_dst->base, dbsc, &mbc->dst_fmt)) { 10957+ /* Given how create buf works we can't uncreate it on alloc failure 10958+ * all we can do is put it on the free Q 10959+ */ 10960+ queue_put_free(mbc->dst, &be_dst->base); 10961+ return NULL; 10962+ } 10963+ } 10964+ 10965+ be_dst->base.status = QENT_PENDING; 10966+ atomic_store(&be_dst->base.ref_count, 0); 10967+ return be_dst; 10968+} 10969+ 10970+const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc) 10971+{ 10972+ return &mbc->dst_fmt; 10973+} 10974+ 10975+MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc, 10976+ const unsigned int width, 10977+ const unsigned int height, 10978+ mediabufs_dst_fmt_accept_fn *const accept_fn, 10979+ void *const accept_v) 10980+{ 10981+ MediaBufsStatus status; 10982+ unsigned int i; 10983+ const enum v4l2_buf_type buf_type = mbc->dst_fmt.type; 10984+ static const struct { 10985+ unsigned int flags_must; 10986+ unsigned int flags_not; 10987+ } trys[] = { 10988+ {0, V4L2_FMT_FLAG_EMULATED}, 10989+ {V4L2_FMT_FLAG_EMULATED, 0}, 10990+ }; 10991+ for (i = 0; i != sizeof(trys)/sizeof(trys[0]); ++i) { 10992+ status = find_fmt_flags(&mbc->dst_fmt, mbc->vfd, 10993+ buf_type, 10994+ trys[i].flags_must, 10995+ trys[i].flags_not, 10996+ width, height, accept_fn, accept_v); 10997+ if (status != MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE) 10998+ return status; 10999+ } 11000+ 11001+ if (status != MEDIABUFS_STATUS_SUCCESS) 11002+ return status; 11003+ 11004+ /* Try to create a buffer - don't alloc */ 11005+ return status; 11006+} 11007+ 11008+// ** This is a mess if we get partial alloc but without any way to remove 11009+// individual V4L2 Q members we are somewhat stuffed 11010+MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype) 11011+{ 11012+ unsigned int i; 11013+ int a = 0; 11014+ unsigned int qc; 11015+ struct qent_dst * qes[32]; 11016+ 11017+ if (n > 32) 11018+ return MEDIABUFS_ERROR_ALLOCATION_FAILED; 11019+ 11020+ mbc->dst->memtype = memtype; 11021+ 11022+ // Create qents first as it is hard to get rid of the V4L2 buffers on error 11023+ for (qc = 0; qc != n; ++qc) 11024+ { 11025+ if ((qes[qc] = qe_dst_new(mbc->this_wlm, mbc->dst->memtype)) == NULL) 11026+ goto fail; 11027+ } 11028+ 11029+ if ((a = create_dst_bufs(mbc, n, qes)) < 0) 11030+ goto fail; 11031+ 11032+ for (i = 0; i != a; ++i) 11033+ queue_put_free(mbc->dst, &qes[i]->base); 11034+ 11035+ if (a != n) 11036+ goto fail; 11037+ 11038+ mbc->dst_fixed = fixed; 11039+ return MEDIABUFS_STATUS_SUCCESS; 11040+ 11041+fail: 11042+ for (i = (a < 0 ? 0 : a); i != qc; ++i) 11043+ qe_dst_free(qes[i]); 11044+ 11045+ return MEDIABUFS_ERROR_ALLOCATION_FAILED; 11046+} 11047+ 11048+struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc) 11049+{ 11050+ struct qent_base * buf = queue_get_free(mbc->src); 11051+ buf->status = QENT_PENDING; 11052+ return base_to_src(buf); 11053+} 11054+ 11055+void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src) 11056+{ 11057+ struct qent_src *const qe_src = *pqe_src; 11058+ if (!qe_src) 11059+ return; 11060+ *pqe_src = NULL; 11061+ queue_put_free(mbc->src, &qe_src->base); 11062+} 11063+ 11064+static MediaBufsStatus 11065+chk_memory_type(struct mediabufs_ctl *const mbc, 11066+ const struct v4l2_format * const f, 11067+ const enum mediabufs_memory m) 11068+{ 11069+ struct v4l2_create_buffers cbuf = { 11070+ .count = 0, 11071+ .memory = V4L2_MEMORY_MMAP, 11072+ .format = *f 11073+ }; 11074+ 11075+ if (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf) != 0) 11076+ return MEDIABUFS_ERROR_OPERATION_FAILED; 11077+ 11078+ switch (m) { 11079+ case MEDIABUFS_MEMORY_DMABUF: 11080+ // 0 = Unknown but assume not in that case 11081+ if ((cbuf.capabilities & V4L2_BUF_CAP_SUPPORTS_DMABUF) == 0) 11082+ return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY; 11083+ break; 11084+ case MEDIABUFS_MEMORY_MMAP: 11085+ break; 11086+ default: 11087+ return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY; 11088+ } 11089+ 11090+ return MEDIABUFS_STATUS_SUCCESS; 11091+} 11092+ 11093+MediaBufsStatus 11094+mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype) 11095+{ 11096+ return chk_memory_type(mbc, &mbc->src_fmt, memtype); 11097+} 11098+ 11099+MediaBufsStatus 11100+mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype) 11101+{ 11102+ return chk_memory_type(mbc, &mbc->dst_fmt, memtype); 11103+} 11104+ 11105+/* src format must have been set up before this */ 11106+MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc, 11107+ struct dmabufs_ctl * const dbsc, 11108+ unsigned int n, const enum mediabufs_memory memtype) 11109+{ 11110+ unsigned int i; 11111+ struct v4l2_requestbuffers req = { 11112+ .count = n, 11113+ .type = mbc->src_fmt.type, 11114+ .memory = mediabufs_memory_to_v4l2(memtype) 11115+ }; 11116+ 11117+ bq_free_all_free_src(mbc->src); 11118+ 11119+ while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) { 11120+ if (errno != EINTR) { 11121+ request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__); 11122+ return MEDIABUFS_ERROR_OPERATION_FAILED; 11123+ } 11124+ } 11125+ 11126+ if (n > req.count) { 11127+ request_info(mbc->dc, "Only allocated %d of %d src buffers requested\n", req.count, n); 11128+ n = req.count; 11129+ } 11130+ 11131+ for (i = 0; i != n; ++i) { 11132+ struct qent_src *const be_src = qe_src_new(memtype); 11133+ if (!be_src) { 11134+ request_err(mbc->dc, "Failed to create src be %d\n", i); 11135+ goto fail; 11136+ } 11137+ switch (memtype) { 11138+ case MEDIABUFS_MEMORY_MMAP: 11139+ if (qe_import_from_buf(mbc, &be_src->base, &mbc->src_fmt, i, false)) { 11140+ qe_src_free(be_src); 11141+ goto fail; 11142+ } 11143+ be_src->fixed_size = 1; 11144+ break; 11145+ case MEDIABUFS_MEMORY_DMABUF: 11146+ if (qe_alloc_from_fmt(&be_src->base, dbsc, &mbc->src_fmt)) { 11147+ qe_src_free(be_src); 11148+ goto fail; 11149+ } 11150+ be_src->fixed_size = !mediabufs_src_resizable(mbc); 11151+ break; 11152+ default: 11153+ request_err(mbc->dc, "Unexpected memorty type\n"); 11154+ goto fail; 11155+ } 11156+ be_src->base.index = i; 11157+ 11158+ queue_put_free(mbc->src, &be_src->base); 11159+ } 11160+ 11161+ mbc->src->memtype = memtype; 11162+ return MEDIABUFS_STATUS_SUCCESS; 11163+ 11164+fail: 11165+ bq_free_all_free_src(mbc->src); 11166+ req.count = 0; 11167+ while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1 && 11168+ errno == EINTR) 11169+ /* Loop */; 11170+ 11171+ return MEDIABUFS_ERROR_OPERATION_FAILED; 11172+} 11173+ 11174+ 11175+ 11176+/* 11177+ * Set stuff order: 11178+ * Set src fmt 11179+ * Set parameters (sps) on vfd 11180+ * Negotiate dst format (dst_fmt_set) 11181+ * Create src buffers 11182+ * Alloc a dst buffer or Create dst slots 11183+*/ 11184+MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc) 11185+{ 11186+ if (mbc->stream_on) 11187+ return MEDIABUFS_STATUS_SUCCESS; 11188+ 11189+ if (set_stream(mbc->vfd, mbc->src_fmt.type, true) < 0) { 11190+ request_log("Failed to set stream on src type %d\n", mbc->src_fmt.type); 11191+ return MEDIABUFS_ERROR_OPERATION_FAILED; 11192+ } 11193+ 11194+ if (set_stream(mbc->vfd, mbc->dst_fmt.type, true) < 0) { 11195+ request_log("Failed to set stream on dst type %d\n", mbc->dst_fmt.type); 11196+ set_stream(mbc->vfd, mbc->src_fmt.type, false); 11197+ return MEDIABUFS_ERROR_OPERATION_FAILED; 11198+ } 11199+ 11200+ mbc->stream_on = true; 11201+ return MEDIABUFS_STATUS_SUCCESS; 11202+} 11203+ 11204+MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc) 11205+{ 11206+ MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS; 11207+ 11208+ if (!mbc->stream_on) 11209+ return MEDIABUFS_STATUS_SUCCESS; 11210+ 11211+ if (set_stream(mbc->vfd, mbc->dst_fmt.type, false) < 0) { 11212+ request_log("Failed to set stream off dst type %d\n", mbc->dst_fmt.type); 11213+ status = MEDIABUFS_ERROR_OPERATION_FAILED; 11214+ } 11215+ 11216+ if (set_stream(mbc->vfd, mbc->src_fmt.type, false) < 0) { 11217+ request_log("Failed to set stream off src type %d\n", mbc->src_fmt.type); 11218+ status = MEDIABUFS_ERROR_OPERATION_FAILED; 11219+ } 11220+ 11221+ mbc->stream_on = false; 11222+ return status; 11223+} 11224+ 11225+int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, struct v4l2_ext_control control_array[], unsigned int n) 11226+{ 11227+ struct v4l2_ext_controls controls = { 11228+ .controls = control_array, 11229+ .count = n 11230+ }; 11231+ 11232+ if (mreq) { 11233+ controls.which = V4L2_CTRL_WHICH_REQUEST_VAL; 11234+ controls.request_fd = media_request_fd(mreq); 11235+ } 11236+ 11237+ while (ioctl(mbc->vfd, VIDIOC_S_EXT_CTRLS, &controls)) 11238+ { 11239+ const int err = errno; 11240+ if (err != EINTR) { 11241+ request_err(mbc->dc, "Unable to set controls: %s\n", strerror(err)); 11242+ return -err; 11243+ } 11244+ } 11245+ 11246+ return 0; 11247+} 11248+ 11249+MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc, 11250+ struct media_request * const mreq, 11251+ unsigned int id, void *data, 11252+ unsigned int size) 11253+{ 11254+ struct v4l2_ext_control control = { 11255+ .id = id, 11256+ .ptr = data, 11257+ .size = size 11258+ }; 11259+ 11260+ int rv = mediabufs_ctl_set_ext_ctrls(mbc, mreq, &control, 1); 11261+ return !rv ? MEDIABUFS_STATUS_SUCCESS : MEDIABUFS_ERROR_OPERATION_FAILED; 11262+} 11263+ 11264+MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc, 11265+ enum v4l2_buf_type buf_type, 11266+ const uint32_t pixfmt, 11267+ const uint32_t width, const uint32_t height, 11268+ const size_t bufsize) 11269+{ 11270+ MediaBufsStatus rv = fmt_set(&mbc->src_fmt, mbc->vfd, buf_type, pixfmt, width, height, bufsize); 11271+ if (rv != MEDIABUFS_STATUS_SUCCESS) 11272+ request_err(mbc->dc, "Failed to set src buftype %d, format %#x %dx%d\n", buf_type, pixfmt, width, height); 11273+ 11274+ return rv; 11275+} 11276+ 11277+int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n) 11278+{ 11279+ int rv = 0; 11280+ while (n--) { 11281+ while (ioctl(mbc->vfd, VIDIOC_QUERY_EXT_CTRL, ctrls)) { 11282+ const int err = errno; 11283+ if (err != EINTR) { 11284+ // Often used for probing - errors are to be expected 11285+ request_debug(mbc->dc, "Failed to query ext id=%#x, err=%d\n", ctrls->id, err); 11286+ ctrls->type = 0; // 0 is invalid 11287+ rv = -err; 11288+ break; 11289+ } 11290+ } 11291+ ++ctrls; 11292+ } 11293+ return rv; 11294+} 11295+ 11296+int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc) 11297+{ 11298+#if 1 11299+ return 0; 11300+#else 11301+ // Single planar OUTPUT can only take exact size buffers 11302+ // Multiplanar will take larger than negotiated 11303+ return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type); 11304+#endif 11305+} 11306+ 11307+static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc) 11308+{ 11309+ if (!mbc) 11310+ return; 11311+ 11312+ // Break the weak link first 11313+ ff_weak_link_break(&mbc->this_wlm); 11314+ 11315+ polltask_delete(&mbc->pt); 11316+ 11317+ mediabufs_stream_off(mbc); 11318+ 11319+ // Empty v4l2 buffer stash 11320+ request_buffers(mbc->vfd, mbc->src_fmt.type, V4L2_MEMORY_MMAP, 0); 11321+ request_buffers(mbc->vfd, mbc->dst_fmt.type, V4L2_MEMORY_MMAP, 0); 11322+ 11323+ bq_free_all_free_src(mbc->src); 11324+ bq_free_all_inuse_src(mbc->src); 11325+ bq_free_all_free_dst(mbc->dst); 11326+ 11327+ { 11328+ struct qent_dst *dst_be; 11329+ while ((dst_be = base_to_dst(bq_get_inuse(mbc->dst))) != NULL) { 11330+ dst_be->base.timestamp = (struct timeval){0}; 11331+ dst_be->base.status = QENT_ERROR; 11332+ qe_dst_done(dst_be); 11333+ } 11334+ } 11335+ 11336+ queue_delete(mbc->dst); 11337+ queue_delete(mbc->src); 11338+ close(mbc->vfd); 11339+ pthread_mutex_destroy(&mbc->lock); 11340+ 11341+ free(mbc); 11342+} 11343+ 11344+struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc) 11345+{ 11346+ atomic_fetch_add(&mbc->ref_count, 1); 11347+ return mbc; 11348+} 11349+ 11350+void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc) 11351+{ 11352+ struct mediabufs_ctl *const mbc = *pmbc; 11353+ int n; 11354+ 11355+ if (!mbc) 11356+ return; 11357+ *pmbc = NULL; 11358+ n = atomic_fetch_sub(&mbc->ref_count, 1); 11359+ if (n) 11360+ return; 11361+ mediabufs_ctl_delete(mbc); 11362+} 11363+ 11364+unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc) 11365+{ 11366+ return mbc->capability.version; 11367+} 11368+ 11369+static int set_capabilities(struct mediabufs_ctl *const mbc) 11370+{ 11371+ uint32_t caps; 11372+ 11373+ if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &mbc->capability)) { 11374+ int err = errno; 11375+ request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err)); 11376+ return -err; 11377+ } 11378+ 11379+ caps = (mbc->capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ? 11380+ mbc->capability.device_caps : 11381+ mbc->capability.capabilities; 11382+ 11383+ if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) { 11384+ mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; 11385+ mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; 11386+ } 11387+ else if ((caps & V4L2_CAP_VIDEO_M2M) != 0) { 11388+ mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; 11389+ mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; 11390+ } 11391+ else { 11392+ request_err(mbc->dc, "No M2M capabilities (%#x)\n", caps); 11393+ return -EINVAL; 11394+ } 11395+ 11396+ return 0; 11397+} 11398+ 11399+/* One of these per context */ 11400+struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, const char * vpath, struct pollqueue *const pq) 11401+{ 11402+ struct mediabufs_ctl *const mbc = calloc(1, sizeof(*mbc)); 11403+ 11404+ if (!mbc) 11405+ return NULL; 11406+ 11407+ mbc->dc = dc; 11408+ // Default mono planar 11409+ mbc->pq = pq; 11410+ pthread_mutex_init(&mbc->lock, NULL); 11411+ 11412+ /* Pick a default - could we scan for this? */ 11413+ if (vpath == NULL) 11414+ vpath = "/dev/media0"; 11415+ 11416+ while ((mbc->vfd = open(vpath, O_RDWR)) == -1) 11417+ { 11418+ const int err = errno; 11419+ if (err != EINTR) { 11420+ request_err(dc, "Failed to open video dev '%s': %s\n", vpath, strerror(err)); 11421+ goto fail0; 11422+ } 11423+ } 11424+ 11425+ if (set_capabilities(mbc)) { 11426+ request_err(dc, "Bad capabilities for video dev '%s'\n", vpath); 11427+ goto fail1; 11428+ } 11429+ 11430+ mbc->src = queue_new(mbc->vfd); 11431+ if (!mbc->src) 11432+ goto fail1; 11433+ mbc->dst = queue_new(mbc->vfd); 11434+ if (!mbc->dst) 11435+ goto fail2; 11436+ mbc->pt = polltask_new(pq, mbc->vfd, POLLIN | POLLOUT, mediabufs_poll_cb, mbc); 11437+ if (!mbc->pt) 11438+ goto fail3; 11439+ mbc->this_wlm = ff_weak_link_new(mbc); 11440+ if (!mbc->this_wlm) 11441+ goto fail4; 11442+ 11443+ /* Cannot add polltask now - polling with nothing pending 11444+ * generates infinite error polls 11445+ */ 11446+ return mbc; 11447+ 11448+fail4: 11449+ polltask_delete(&mbc->pt); 11450+fail3: 11451+ queue_delete(mbc->dst); 11452+fail2: 11453+ queue_delete(mbc->src); 11454+fail1: 11455+ close(mbc->vfd); 11456+fail0: 11457+ free(mbc); 11458+ request_info(dc, "%s: FAILED\n", __func__); 11459+ return NULL; 11460+} 11461+ 11462+ 11463+ 11464--- /dev/null 11465+++ b/libavcodec/v4l2_req_media.h 11466@@ -0,0 +1,171 @@ 11467+/* 11468+e.h 11469+* 11470+ * Permission is hereby granted, free of charge, to any person obtaining a 11471+ * copy of this software and associated documentation files (the 11472+ * "Software"), to deal in the Software without restriction, including 11473+ * without limitation the rights to use, copy, modify, merge, publish, 11474+ * distribute, sub license, and/or sell copies of the Software, and to 11475+ * permit persons to whom the Software is furnished to do so, subject to 11476+ * the following conditions: 11477+ * 11478+ * The above copyright notice and this permission notice (including the 11479+ * next paragraph) shall be included in all copies or substantial portions 11480+ * of the Software. 11481+ * 11482+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 11483+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 11484+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 11485+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR 11486+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 11487+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 11488+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11489+ */ 11490+ 11491+#ifndef _MEDIA_H_ 11492+#define _MEDIA_H_ 11493+ 11494+#include <stdbool.h> 11495+#include <stdint.h> 11496+ 11497+struct v4l2_format; 11498+struct v4l2_fmtdesc; 11499+struct v4l2_query_ext_ctrl; 11500+ 11501+struct pollqueue; 11502+struct media_request; 11503+struct media_pool; 11504+ 11505+typedef enum media_buf_status { 11506+ MEDIABUFS_STATUS_SUCCESS = 0, 11507+ MEDIABUFS_ERROR_OPERATION_FAILED, 11508+ MEDIABUFS_ERROR_DECODING_ERROR, 11509+ MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE, 11510+ MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT, 11511+ MEDIABUFS_ERROR_ALLOCATION_FAILED, 11512+ MEDIABUFS_ERROR_UNSUPPORTED_MEMORY, 11513+} MediaBufsStatus; 11514+ 11515+struct media_pool * media_pool_new(const char * const media_path, 11516+ struct pollqueue * const pq, 11517+ const unsigned int n); 11518+void media_pool_delete(struct media_pool ** pmp); 11519+ 11520+// Obtain a media request 11521+// Will block if none availible - has a 2sec timeout 11522+struct media_request * media_request_get(struct media_pool * const mp); 11523+int media_request_fd(const struct media_request * const req); 11524+ 11525+// Start this request 11526+// Request structure is returned to pool once done 11527+int media_request_start(struct media_request * const req); 11528+ 11529+// Return an *unstarted* media_request to the pool 11530+// May later be upgraded to allow for aborting a started req 11531+int media_request_abort(struct media_request ** const preq); 11532+ 11533+ 11534+struct mediabufs_ctl; 11535+struct qent_src; 11536+struct qent_dst; 11537+struct dmabuf_h; 11538+struct dmabufs_ctl; 11539+ 11540+// 1-1 mammping to V4L2 type - just defined separetely to avoid some include versioning difficulties 11541+enum mediabufs_memory { 11542+ MEDIABUFS_MEMORY_UNSET = 0, 11543+ MEDIABUFS_MEMORY_MMAP = 1, 11544+ MEDIABUFS_MEMORY_USERPTR = 2, 11545+ MEDIABUFS_MEMORY_OVERLAY = 3, 11546+ MEDIABUFS_MEMORY_DMABUF = 4, 11547+}; 11548+ 11549+int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp); 11550+struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst); 11551+ 11552+// prealloc 11553+int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc); 11554+// dbsc may be NULL if realloc not required 11555+int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc); 11556+const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be, unsigned int plane); 11557+int qent_dst_dup_fd(const struct qent_dst *const be, unsigned int plane); 11558+MediaBufsStatus qent_dst_wait(struct qent_dst *const be); 11559+void qent_dst_delete(struct qent_dst *const be); 11560+// Returns a qent_dst to its mbc free Q or deletes it if the mbc is dead 11561+void qent_dst_unref(struct qent_dst ** const pbe_dst); 11562+struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst); 11563+ 11564+const uint8_t * qent_dst_data(struct qent_dst *const be, unsigned int buf_no); 11565+MediaBufsStatus qent_dst_read_start(struct qent_dst *const be); 11566+MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be); 11567+/* Import an fd unattached to any mediabuf */ 11568+MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst, 11569+ unsigned int plane, 11570+ int fd, size_t size); 11571+ 11572+const char * mediabufs_memory_name(const enum mediabufs_memory m); 11573+ 11574+MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc, 11575+ struct media_request **const pmreq, 11576+ struct qent_src **const psrc_be, 11577+ struct qent_dst *const dst_be, 11578+ const bool is_final); 11579+// Get / alloc a dst buffer & associate with a slot 11580+// If the dst pool is empty then behaviour depends on the fixed flag passed to 11581+// dst_slots_create. Default is !fixed = unlimited alloc 11582+struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, 11583+ struct dmabufs_ctl *const dbsc); 11584+// Create dst slots without alloc 11585+// If fixed true then qent_alloc will only get slots from this pool and will 11586+// block until a qent has been unrefed 11587+MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype); 11588+ 11589+MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc); 11590+MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc); 11591+const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc); 11592+ 11593+typedef int mediabufs_dst_fmt_accept_fn(void * v, const struct v4l2_fmtdesc *fmtdesc); 11594+ 11595+MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc, 11596+ const unsigned int width, 11597+ const unsigned int height, 11598+ mediabufs_dst_fmt_accept_fn *const accept_fn, 11599+ void *const accept_v); 11600+struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc); 11601+void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src); 11602+ 11603+int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, 11604+ struct v4l2_ext_control control_array[], unsigned int n); 11605+MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc, 11606+ struct media_request * const mreq, 11607+ unsigned int id, void *data, 11608+ unsigned int size); 11609+int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n); 11610+ 11611+int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc); 11612+ 11613+MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc, 11614+ enum v4l2_buf_type buf_type, 11615+ const uint32_t pixfmt, 11616+ const uint32_t width, const uint32_t height, 11617+ const size_t bufsize); 11618+ 11619+MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw, 11620+ struct dmabufs_ctl * const dbsc, 11621+ unsigned int n, 11622+ const enum mediabufs_memory memtype); 11623+ 11624+// Want to have appropriate formats set first 11625+MediaBufsStatus mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype); 11626+MediaBufsStatus mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype); 11627+ 11628+#define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c)) 11629+unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc); 11630+ 11631+struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, 11632+ const char *vpath, struct pollqueue *const pq); 11633+void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc); 11634+struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc); 11635+ 11636+ 11637+#endif 11638--- /dev/null 11639+++ b/libavcodec/v4l2_req_pollqueue.c 11640@@ -0,0 +1,361 @@ 11641+#include <errno.h> 11642+#include <limits.h> 11643+#include <poll.h> 11644+#include <pthread.h> 11645+#include <semaphore.h> 11646+#include <stdatomic.h> 11647+#include <stdbool.h> 11648+#include <stdlib.h> 11649+#include <stdint.h> 11650+#include <stdio.h> 11651+#include <string.h> 11652+#include <unistd.h> 11653+#include <sys/eventfd.h> 11654+ 11655+#include "v4l2_req_pollqueue.h" 11656+#include "v4l2_req_utils.h" 11657+ 11658+ 11659+struct pollqueue; 11660+ 11661+enum polltask_state { 11662+ POLLTASK_UNQUEUED = 0, 11663+ POLLTASK_QUEUED, 11664+ POLLTASK_RUNNING, 11665+ POLLTASK_Q_KILL, 11666+ POLLTASK_RUN_KILL, 11667+}; 11668+ 11669+struct polltask { 11670+ struct polltask *next; 11671+ struct polltask *prev; 11672+ struct pollqueue *q; 11673+ enum polltask_state state; 11674+ 11675+ int fd; 11676+ short events; 11677+ 11678+ void (*fn)(void *v, short revents); 11679+ void * v; 11680+ 11681+ uint64_t timeout; /* CLOCK_MONOTONIC time, 0 => never */ 11682+ sem_t kill_sem; 11683+}; 11684+ 11685+struct pollqueue { 11686+ atomic_int ref_count; 11687+ pthread_mutex_t lock; 11688+ 11689+ struct polltask *head; 11690+ struct polltask *tail; 11691+ 11692+ bool kill; 11693+ bool no_prod; 11694+ int prod_fd; 11695+ struct polltask *prod_pt; 11696+ pthread_t worker; 11697+}; 11698+ 11699+struct polltask *polltask_new(struct pollqueue *const pq, 11700+ const int fd, const short events, 11701+ void (*const fn)(void *v, short revents), 11702+ void *const v) 11703+{ 11704+ struct polltask *pt; 11705+ 11706+ if (!events) 11707+ return NULL; 11708+ 11709+ pt = malloc(sizeof(*pt)); 11710+ if (!pt) 11711+ return NULL; 11712+ 11713+ *pt = (struct polltask){ 11714+ .next = NULL, 11715+ .prev = NULL, 11716+ .q = pollqueue_ref(pq), 11717+ .fd = fd, 11718+ .events = events, 11719+ .fn = fn, 11720+ .v = v 11721+ }; 11722+ 11723+ sem_init(&pt->kill_sem, 0, 0); 11724+ 11725+ return pt; 11726+} 11727+ 11728+static void pollqueue_rem_task(struct pollqueue *const pq, struct polltask *const pt) 11729+{ 11730+ if (pt->prev) 11731+ pt->prev->next = pt->next; 11732+ else 11733+ pq->head = pt->next; 11734+ if (pt->next) 11735+ pt->next->prev = pt->prev; 11736+ else 11737+ pq->tail = pt->prev; 11738+ pt->next = NULL; 11739+ pt->prev = NULL; 11740+} 11741+ 11742+static void polltask_free(struct polltask * const pt) 11743+{ 11744+ sem_destroy(&pt->kill_sem); 11745+ free(pt); 11746+} 11747+ 11748+static int pollqueue_prod(const struct pollqueue *const pq) 11749+{ 11750+ static const uint64_t one = 1; 11751+ return write(pq->prod_fd, &one, sizeof(one)); 11752+} 11753+ 11754+void polltask_delete(struct polltask **const ppt) 11755+{ 11756+ struct polltask *const pt = *ppt; 11757+ struct pollqueue * pq; 11758+ enum polltask_state state; 11759+ bool prodme; 11760+ 11761+ if (!pt) 11762+ return; 11763+ 11764+ pq = pt->q; 11765+ pthread_mutex_lock(&pq->lock); 11766+ state = pt->state; 11767+ pt->state = (state == POLLTASK_RUNNING) ? POLLTASK_RUN_KILL : POLLTASK_Q_KILL; 11768+ prodme = !pq->no_prod; 11769+ pthread_mutex_unlock(&pq->lock); 11770+ 11771+ if (state != POLLTASK_UNQUEUED) { 11772+ if (prodme) 11773+ pollqueue_prod(pq); 11774+ while (sem_wait(&pt->kill_sem) && errno == EINTR) 11775+ /* loop */; 11776+ } 11777+ 11778+ // Leave zapping the ref until we have DQed the PT as might well be 11779+ // legitimately used in it 11780+ *ppt = NULL; 11781+ polltask_free(pt); 11782+ pollqueue_unref(&pq); 11783+} 11784+ 11785+static uint64_t pollqueue_now(int timeout) 11786+{ 11787+ struct timespec now; 11788+ uint64_t now_ms; 11789+ 11790+ if (clock_gettime(CLOCK_MONOTONIC, &now)) 11791+ return 0; 11792+ now_ms = (now.tv_nsec / 1000000) + (uint64_t)now.tv_sec * 1000 + timeout; 11793+ return now_ms ? now_ms : (uint64_t)1; 11794+} 11795+ 11796+void pollqueue_add_task(struct polltask *const pt, const int timeout) 11797+{ 11798+ bool prodme = false; 11799+ struct pollqueue * const pq = pt->q; 11800+ 11801+ pthread_mutex_lock(&pq->lock); 11802+ if (pt->state != POLLTASK_Q_KILL && pt->state != POLLTASK_RUN_KILL) { 11803+ if (pq->tail) 11804+ pq->tail->next = pt; 11805+ else 11806+ pq->head = pt; 11807+ pt->prev = pq->tail; 11808+ pt->next = NULL; 11809+ pt->state = POLLTASK_QUEUED; 11810+ pt->timeout = timeout < 0 ? 0 : pollqueue_now(timeout); 11811+ pq->tail = pt; 11812+ prodme = !pq->no_prod; 11813+ } 11814+ pthread_mutex_unlock(&pq->lock); 11815+ if (prodme) 11816+ pollqueue_prod(pq); 11817+} 11818+ 11819+static void *poll_thread(void *v) 11820+{ 11821+ struct pollqueue *const pq = v; 11822+ struct pollfd *a = NULL; 11823+ size_t asize = 0; 11824+ 11825+ pthread_mutex_lock(&pq->lock); 11826+ do { 11827+ unsigned int i; 11828+ unsigned int n = 0; 11829+ struct polltask *pt; 11830+ struct polltask *pt_next; 11831+ uint64_t now = pollqueue_now(0); 11832+ int timeout = -1; 11833+ int rv; 11834+ 11835+ for (pt = pq->head; pt; pt = pt_next) { 11836+ int64_t t; 11837+ 11838+ pt_next = pt->next; 11839+ 11840+ if (pt->state == POLLTASK_Q_KILL) { 11841+ pollqueue_rem_task(pq, pt); 11842+ sem_post(&pt->kill_sem); 11843+ continue; 11844+ } 11845+ 11846+ if (n >= asize) { 11847+ asize = asize ? asize * 2 : 4; 11848+ a = realloc(a, asize * sizeof(*a)); 11849+ if (!a) { 11850+ request_log("Failed to realloc poll array to %zd\n", asize); 11851+ goto fail_locked; 11852+ } 11853+ } 11854+ 11855+ a[n++] = (struct pollfd){ 11856+ .fd = pt->fd, 11857+ .events = pt->events 11858+ }; 11859+ 11860+ t = (int64_t)(pt->timeout - now); 11861+ if (pt->timeout && t < INT_MAX && 11862+ (timeout < 0 || (int)t < timeout)) 11863+ timeout = (t < 0) ? 0 : (int)t; 11864+ } 11865+ pthread_mutex_unlock(&pq->lock); 11866+ 11867+ if ((rv = poll(a, n, timeout)) == -1) { 11868+ if (errno != EINTR) { 11869+ request_log("Poll error: %s\n", strerror(errno)); 11870+ goto fail_unlocked; 11871+ } 11872+ } 11873+ 11874+ pthread_mutex_lock(&pq->lock); 11875+ now = pollqueue_now(0); 11876+ 11877+ /* Prodding in this loop is pointless and might lead to 11878+ * infinite looping 11879+ */ 11880+ pq->no_prod = true; 11881+ for (i = 0, pt = pq->head; i < n; ++i, pt = pt_next) { 11882+ pt_next = pt->next; 11883+ 11884+ /* Pending? */ 11885+ if (a[i].revents || 11886+ (pt->timeout && (int64_t)(now - pt->timeout) >= 0)) { 11887+ pollqueue_rem_task(pq, pt); 11888+ if (pt->state == POLLTASK_QUEUED) 11889+ pt->state = POLLTASK_RUNNING; 11890+ if (pt->state == POLLTASK_Q_KILL) 11891+ pt->state = POLLTASK_RUN_KILL; 11892+ pthread_mutex_unlock(&pq->lock); 11893+ 11894+ /* This can add new entries to the Q but as 11895+ * those are added to the tail our existing 11896+ * chain remains intact 11897+ */ 11898+ pt->fn(pt->v, a[i].revents); 11899+ 11900+ pthread_mutex_lock(&pq->lock); 11901+ if (pt->state == POLLTASK_RUNNING) 11902+ pt->state = POLLTASK_UNQUEUED; 11903+ if (pt->state == POLLTASK_RUN_KILL) 11904+ sem_post(&pt->kill_sem); 11905+ } 11906+ } 11907+ pq->no_prod = false; 11908+ 11909+ } while (!pq->kill); 11910+ 11911+fail_locked: 11912+ pthread_mutex_unlock(&pq->lock); 11913+fail_unlocked: 11914+ free(a); 11915+ return NULL; 11916+} 11917+ 11918+static void prod_fn(void *v, short revents) 11919+{ 11920+ struct pollqueue *const pq = v; 11921+ char buf[8]; 11922+ if (revents) 11923+ read(pq->prod_fd, buf, 8); 11924+ if (!pq->kill) 11925+ pollqueue_add_task(pq->prod_pt, -1); 11926+} 11927+ 11928+struct pollqueue * pollqueue_new(void) 11929+{ 11930+ struct pollqueue *pq = malloc(sizeof(*pq)); 11931+ if (!pq) 11932+ return NULL; 11933+ *pq = (struct pollqueue){ 11934+ .ref_count = ATOMIC_VAR_INIT(0), 11935+ .lock = PTHREAD_MUTEX_INITIALIZER, 11936+ .head = NULL, 11937+ .tail = NULL, 11938+ .kill = false, 11939+ .prod_fd = -1 11940+ }; 11941+ 11942+ pq->prod_fd = eventfd(0, EFD_NONBLOCK); 11943+ if (pq->prod_fd == 1) 11944+ goto fail1; 11945+ pq->prod_pt = polltask_new(pq, pq->prod_fd, POLLIN, prod_fn, pq); 11946+ if (!pq->prod_pt) 11947+ goto fail2; 11948+ pollqueue_add_task(pq->prod_pt, -1); 11949+ if (pthread_create(&pq->worker, NULL, poll_thread, pq)) 11950+ goto fail3; 11951+ // Reset ref count which will have been inced by the add_task 11952+ atomic_store(&pq->ref_count, 0); 11953+ return pq; 11954+ 11955+fail3: 11956+ polltask_free(pq->prod_pt); 11957+fail2: 11958+ close(pq->prod_fd); 11959+fail1: 11960+ free(pq); 11961+ return NULL; 11962+} 11963+ 11964+static void pollqueue_free(struct pollqueue *const pq) 11965+{ 11966+ void *rv; 11967+ 11968+ pthread_mutex_lock(&pq->lock); 11969+ pq->kill = true; 11970+ pollqueue_prod(pq); 11971+ pthread_mutex_unlock(&pq->lock); 11972+ 11973+ pthread_join(pq->worker, &rv); 11974+ polltask_free(pq->prod_pt); 11975+ pthread_mutex_destroy(&pq->lock); 11976+ close(pq->prod_fd); 11977+ free(pq); 11978+} 11979+ 11980+struct pollqueue * pollqueue_ref(struct pollqueue *const pq) 11981+{ 11982+ atomic_fetch_add(&pq->ref_count, 1); 11983+ return pq; 11984+} 11985+ 11986+void pollqueue_unref(struct pollqueue **const ppq) 11987+{ 11988+ struct pollqueue * const pq = *ppq; 11989+ 11990+ if (!pq) 11991+ return; 11992+ *ppq = NULL; 11993+ 11994+ if (atomic_fetch_sub(&pq->ref_count, 1) != 0) 11995+ return; 11996+ 11997+ pollqueue_free(pq); 11998+} 11999+ 12000+ 12001+ 12002--- /dev/null 12003+++ b/libavcodec/v4l2_req_pollqueue.h 12004@@ -0,0 +1,18 @@ 12005+#ifndef POLLQUEUE_H_ 12006+#define POLLQUEUE_H_ 12007+ 12008+struct polltask; 12009+struct pollqueue; 12010+ 12011+struct polltask *polltask_new(struct pollqueue *const pq, 12012+ const int fd, const short events, 12013+ void (*const fn)(void *v, short revents), 12014+ void *const v); 12015+void polltask_delete(struct polltask **const ppt); 12016+ 12017+void pollqueue_add_task(struct polltask *const pt, const int timeout); 12018+struct pollqueue * pollqueue_new(void); 12019+void pollqueue_unref(struct pollqueue **const ppq); 12020+struct pollqueue * pollqueue_ref(struct pollqueue *const pq); 12021+ 12022+#endif /* POLLQUEUE_H_ */ 12023--- /dev/null 12024+++ b/libavcodec/v4l2_req_utils.h 12025@@ -0,0 +1,27 @@ 12026+#ifndef AVCODEC_V4L2_REQ_UTILS_H 12027+#define AVCODEC_V4L2_REQ_UTILS_H 12028+ 12029+#include <stdint.h> 12030+#include "libavutil/log.h" 12031+ 12032+#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__) 12033+ 12034+#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__) 12035+#define request_warn(_ctx, ...) av_log(_ctx, AV_LOG_WARNING, __VA_ARGS__) 12036+#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__) 12037+#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__) 12038+ 12039+static inline char safechar(char c) { 12040+ return c > 0x20 && c < 0x7f ? c : '.'; 12041+} 12042+ 12043+static inline const char * strfourcc(char tbuf[5], uint32_t fcc) { 12044+ tbuf[0] = safechar((fcc >> 0) & 0xff); 12045+ tbuf[1] = safechar((fcc >> 8) & 0xff); 12046+ tbuf[2] = safechar((fcc >> 16) & 0xff); 12047+ tbuf[3] = safechar((fcc >> 24) & 0xff); 12048+ tbuf[4] = '\0'; 12049+ return tbuf; 12050+} 12051+ 12052+#endif 12053--- /dev/null 12054+++ b/libavcodec/v4l2_request_hevc.c 12055@@ -0,0 +1,351 @@ 12056+/* 12057+ * This file is part of FFmpeg. 12058+ * 12059+ * FFmpeg is free software; you can redistribute it and/or 12060+ * modify it under the terms of the GNU Lesser General Public 12061+ * License as published by the Free Software Foundation; either 12062+ * version 2.1 of the License, or (at your option) any later version. 12063+ * 12064+ * FFmpeg is distributed in the hope that it will be useful, 12065+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 12066+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12067+ * Lesser General Public License for more details. 12068+ * 12069+ * You should have received a copy of the GNU Lesser General Public 12070+ * License along with FFmpeg; if not, write to the Free Software 12071+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 12072+ */ 12073+ 12074+ 12075+#include "config.h" 12076+#include "decode.h" 12077+#include "hevcdec.h" 12078+#include "hwconfig.h" 12079+#include "internal.h" 12080+ 12081+#include "v4l2_request_hevc.h" 12082+ 12083+#include "libavutil/hwcontext_drm.h" 12084+#include "libavutil/pixdesc.h" 12085+ 12086+#include "v4l2_req_devscan.h" 12087+#include "v4l2_req_dmabufs.h" 12088+#include "v4l2_req_pollqueue.h" 12089+#include "v4l2_req_media.h" 12090+#include "v4l2_req_utils.h" 12091+ 12092+static size_t bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8) 12093+{ 12094+ const size_t wxh = w * h; 12095+ size_t bits_alloc; 12096+ 12097+ /* Annex A gives a min compression of 2 @ lvl 3.1 12098+ * (wxh <= 983040) and min 4 thereafter but avoid 12099+ * the odity of 983041 having a lower limit than 12100+ * 983040. 12101+ * Multiply by 3/2 for 4:2:0 12102+ */ 12103+ bits_alloc = wxh < 983040 ? wxh * 3 / 4 : 12104+ wxh < 983040 * 2 ? 983040 * 3 / 4 : 12105+ wxh * 3 / 8; 12106+ /* Allow for bit depth */ 12107+ bits_alloc += (bits_alloc * bits_minus8) / 8; 12108+ /* Add a few bytes (16k) for overhead */ 12109+ bits_alloc += 0x4000; 12110+ return bits_alloc; 12111+} 12112+ 12113+static int v4l2_req_hevc_start_frame(AVCodecContext *avctx, 12114+ av_unused const uint8_t *buffer, 12115+ av_unused uint32_t size) 12116+{ 12117+ const V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; 12118+ return ctx->fns->start_frame(avctx, buffer, size); 12119+} 12120+ 12121+static int v4l2_req_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) 12122+{ 12123+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; 12124+ return ctx->fns->decode_slice(avctx, buffer, size); 12125+} 12126+ 12127+static int v4l2_req_hevc_end_frame(AVCodecContext *avctx) 12128+{ 12129+ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; 12130+ return ctx->fns->end_frame(avctx); 12131+} 12132+ 12133+static void v4l2_req_hevc_abort_frame(AVCodecContext * const avctx) 12134+{ 12135+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; 12136+ ctx->fns->abort_frame(avctx); 12137+} 12138+ 12139+static int v4l2_req_hevc_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) 12140+{ 12141+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; 12142+ return ctx->fns->frame_params(avctx, hw_frames_ctx); 12143+} 12144+ 12145+static int v4l2_req_hevc_alloc_frame(AVCodecContext * avctx, AVFrame *frame) 12146+{ 12147+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; 12148+ return ctx->fns->alloc_frame(avctx, frame); 12149+} 12150+ 12151+ 12152+static int v4l2_request_hevc_uninit(AVCodecContext *avctx) 12153+{ 12154+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; 12155+ 12156+ av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); 12157+ 12158+ decode_q_wait(&ctx->decode_q, NULL); // Wait for all other threads to be out of decode 12159+ 12160+ mediabufs_ctl_unref(&ctx->mbufs); 12161+ media_pool_delete(&ctx->mpool); 12162+ pollqueue_unref(&ctx->pq); 12163+ dmabufs_ctl_unref(&ctx->dbufs); 12164+ devscan_delete(&ctx->devscan); 12165+ 12166+ decode_q_uninit(&ctx->decode_q); 12167+ 12168+// if (avctx->hw_frames_ctx) { 12169+// AVHWFramesContext *hwfc = (AVHWFramesContext*)avctx->hw_frames_ctx->data; 12170+// av_buffer_pool_flush(hwfc->pool); 12171+// } 12172+ return 0; 12173+} 12174+ 12175+static int dst_fmt_accept_cb(void * v, const struct v4l2_fmtdesc *fmtdesc) 12176+{ 12177+ AVCodecContext *const avctx = v; 12178+ const HEVCContext *const h = avctx->priv_data; 12179+ 12180+ if (h->ps.sps->bit_depth == 8) { 12181+ if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_COL128 || 12182+ fmtdesc->pixelformat == V4L2_PIX_FMT_NV12) { 12183+ return 1; 12184+ } 12185+ } 12186+ else if (h->ps.sps->bit_depth == 10) { 12187+ if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_10_COL128) { 12188+ return 1; 12189+ } 12190+ } 12191+ return 0; 12192+} 12193+ 12194+static int v4l2_request_hevc_init(AVCodecContext *avctx) 12195+{ 12196+ const HEVCContext *h = avctx->priv_data; 12197+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; 12198+ const HEVCSPS * const sps = h->ps.sps; 12199+ int ret; 12200+ const struct decdev * decdev; 12201+ const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 4).src_pix_fmt_v4l2; // Assuming constant for all APIs but avoiding V4L2 includes 12202+ size_t src_size; 12203+ enum mediabufs_memory src_memtype; 12204+ enum mediabufs_memory dst_memtype; 12205+ 12206+ av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); 12207+ 12208+ // Give up immediately if this is something that we have no code to deal with 12209+ if (h->ps.sps->chroma_format_idc != 1) { 12210+ av_log(avctx, AV_LOG_WARNING, "chroma_format_idc(%d) != 1: Not implemented\n", h->ps.sps->chroma_format_idc); 12211+ return AVERROR_PATCHWELCOME; 12212+ } 12213+ if (!(h->ps.sps->bit_depth == 10 || h->ps.sps->bit_depth == 8) || 12214+ h->ps.sps->bit_depth != h->ps.sps->bit_depth_chroma) { 12215+ av_log(avctx, AV_LOG_WARNING, "Bit depth Y:%d C:%d: Not implemented\n", h->ps.sps->bit_depth, h->ps.sps->bit_depth_chroma); 12216+ return AVERROR_PATCHWELCOME; 12217+ } 12218+ 12219+ if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) { 12220+ av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n"); 12221+ return (AVERROR(-ret)); 12222+ } 12223+ ret = AVERROR(ENOMEM); // Assume mem fail by default for these 12224+ 12225+ if ((decdev = devscan_find(ctx->devscan, src_pix_fmt)) == NULL) 12226+ { 12227+ av_log(avctx, AV_LOG_WARNING, "Failed to find a V4L2 device for H265\n"); 12228+ ret = AVERROR(ENODEV); 12229+ goto fail0; 12230+ } 12231+ av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n", 12232+ decdev_media_path(decdev), decdev_video_path(decdev)); 12233+ 12234+ if ((ctx->pq = pollqueue_new()) == NULL) { 12235+ av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n"); 12236+ goto fail1; 12237+ } 12238+ 12239+ if ((ctx->mpool = media_pool_new(decdev_media_path(decdev), ctx->pq, 4)) == NULL) { 12240+ av_log(avctx, AV_LOG_ERROR, "Unable to create media pool\n"); 12241+ goto fail2; 12242+ } 12243+ 12244+ if ((ctx->mbufs = mediabufs_ctl_new(avctx, decdev_video_path(decdev), ctx->pq)) == NULL) { 12245+ av_log(avctx, AV_LOG_ERROR, "Unable to create media controls\n"); 12246+ goto fail3; 12247+ } 12248+ 12249+ // Version test for functional Pi5 HEVC iommu. 12250+ // rpivid kernel patch was merged in 6.1.57 12251+ // *** Remove when it is unlikely that there are any broken kernels left 12252+ if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(6,1,57)) 12253+ ctx->dbufs = dmabufs_ctl_new_vidbuf_cached(); 12254+ else 12255+ ctx->dbufs = dmabufs_ctl_new(); 12256+ 12257+ if (ctx->dbufs == NULL) { 12258+ av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n"); 12259+ src_memtype = MEDIABUFS_MEMORY_MMAP; 12260+ dst_memtype = MEDIABUFS_MEMORY_MMAP; 12261+ } 12262+ else { 12263+ av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n"); 12264+ src_memtype = MEDIABUFS_MEMORY_DMABUF; 12265+ dst_memtype = MEDIABUFS_MEMORY_DMABUF; 12266+ } 12267+ 12268+ // Ask for an initial bitbuf size of max size / 4 12269+ // We will realloc if we need more 12270+ // Must use sps->h/w as avctx contains cropped size 12271+retry_src_memtype: 12272+ src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8); 12273+ if (src_memtype == MEDIABUFS_MEMORY_DMABUF && mediabufs_src_resizable(ctx->mbufs)) 12274+ src_size /= 4; 12275+ // Kludge for conformance tests which break Annex A limits 12276+ else if (src_size < 0x40000) 12277+ src_size = 0x40000; 12278+ 12279+ if (mediabufs_src_fmt_set(ctx->mbufs, decdev_src_type(decdev), src_pix_fmt, 12280+ sps->width, sps->height, src_size)) { 12281+ char tbuf1[5]; 12282+ av_log(avctx, AV_LOG_ERROR, "Failed to set source format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height); 12283+ goto fail4; 12284+ } 12285+ 12286+ if (mediabufs_src_chk_memtype(ctx->mbufs, src_memtype)) { 12287+ if (src_memtype == MEDIABUFS_MEMORY_DMABUF) { 12288+ src_memtype = MEDIABUFS_MEMORY_MMAP; 12289+ goto retry_src_memtype; 12290+ } 12291+ av_log(avctx, AV_LOG_ERROR, "Failed to get src memory type\n"); 12292+ goto fail4; 12293+ } 12294+ 12295+ if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) 12296+ ctx->fns = &V2(ff_v4l2_req_hevc, 4); 12297+#if CONFIG_V4L2_REQ_HEVC_VX 12298+ else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) 12299+ ctx->fns = &V2(ff_v4l2_req_hevc, 3); 12300+ else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) 12301+ ctx->fns = &V2(ff_v4l2_req_hevc, 2); 12302+ else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0) 12303+ ctx->fns = &V2(ff_v4l2_req_hevc, 1); 12304+#endif 12305+ else { 12306+ av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n"); 12307+ ret = AVERROR(EINVAL); 12308+ goto fail4; 12309+ } 12310+ 12311+ av_log(avctx, AV_LOG_DEBUG, "%s probed successfully: driver v %#x\n", 12312+ ctx->fns->name, mediabufs_ctl_driver_version(ctx->mbufs)); 12313+ 12314+ if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) { 12315+ char tbuf1[5]; 12316+ av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height); 12317+ goto fail4; 12318+ } 12319+ 12320+ if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6, src_memtype)) { 12321+ av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n"); 12322+ goto fail4; 12323+ } 12324+ 12325+ { 12326+ unsigned int dst_slots = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering + 12327+ avctx->thread_count + (avctx->extra_hw_frames > 0 ? avctx->extra_hw_frames : 6); 12328+ av_log(avctx, AV_LOG_DEBUG, "Slots=%d: Reordering=%d, threads=%d, hw+=%d\n", dst_slots, 12329+ sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering, 12330+ avctx->thread_count, avctx->extra_hw_frames); 12331+ 12332+ if (mediabufs_dst_chk_memtype(ctx->mbufs, dst_memtype)) { 12333+ if (dst_memtype != MEDIABUFS_MEMORY_DMABUF) { 12334+ av_log(avctx, AV_LOG_ERROR, "Failed to get dst memory type\n"); 12335+ goto fail4; 12336+ } 12337+ av_log(avctx, AV_LOG_DEBUG, "Dst DMABUF not supported - trying mmap\n"); 12338+ dst_memtype = MEDIABUFS_MEMORY_MMAP; 12339+ } 12340+ 12341+ // extra_hw_frames is -1 if unset 12342+ if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0), dst_memtype)) { 12343+ av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n"); 12344+ goto fail4; 12345+ } 12346+ } 12347+ 12348+ if (mediabufs_stream_on(ctx->mbufs)) { 12349+ av_log(avctx, AV_LOG_ERROR, "Failed stream on\n"); 12350+ goto fail4; 12351+ } 12352+ 12353+ if ((ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_DRM)) != 0) { 12354+ av_log(avctx, AV_LOG_ERROR, "Failed to create frame ctx\n"); 12355+ goto fail4; 12356+ } 12357+ 12358+ if ((ret = ctx->fns->set_controls(avctx, ctx)) != 0) { 12359+ av_log(avctx, AV_LOG_ERROR, "Failed set controls\n"); 12360+ goto fail5; 12361+ } 12362+ 12363+ decode_q_init(&ctx->decode_q); 12364+ 12365+ // Set our s/w format 12366+ avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format; 12367+ 12368+ av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s; swfmt=%s\n", 12369+ ctx->fns->name, 12370+ decdev_media_path(decdev), decdev_video_path(decdev), 12371+ mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype), 12372+ av_get_pix_fmt_name(avctx->sw_pix_fmt)); 12373+ 12374+ return 0; 12375+ 12376+fail5: 12377+ av_buffer_unref(&avctx->hw_frames_ctx); 12378+fail4: 12379+ mediabufs_ctl_unref(&ctx->mbufs); 12380+fail3: 12381+ media_pool_delete(&ctx->mpool); 12382+fail2: 12383+ pollqueue_unref(&ctx->pq); 12384+fail1: 12385+ dmabufs_ctl_unref(&ctx->dbufs); 12386+fail0: 12387+ devscan_delete(&ctx->devscan); 12388+ return ret; 12389+} 12390+ 12391+const AVHWAccel ff_hevc_v4l2request_hwaccel = { 12392+ .name = "hevc_v4l2request", 12393+ .type = AVMEDIA_TYPE_VIDEO, 12394+ .id = AV_CODEC_ID_HEVC, 12395+ .pix_fmt = AV_PIX_FMT_DRM_PRIME, 12396+ .alloc_frame = v4l2_req_hevc_alloc_frame, 12397+ .start_frame = v4l2_req_hevc_start_frame, 12398+ .decode_slice = v4l2_req_hevc_decode_slice, 12399+ .end_frame = v4l2_req_hevc_end_frame, 12400+ .abort_frame = v4l2_req_hevc_abort_frame, 12401+ .init = v4l2_request_hevc_init, 12402+ .uninit = v4l2_request_hevc_uninit, 12403+ .priv_data_size = sizeof(V4L2RequestContextHEVC), 12404+ .frame_params = v4l2_req_hevc_frame_params, 12405+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE, 12406+}; 12407--- /dev/null 12408+++ b/libavcodec/v4l2_request_hevc.h 12409@@ -0,0 +1,102 @@ 12410+#ifndef AVCODEC_V4L2_REQUEST_HEVC_H 12411+#define AVCODEC_V4L2_REQUEST_HEVC_H 12412+ 12413+#include <stdint.h> 12414+#include <drm_fourcc.h> 12415+#include "v4l2_req_decode_q.h" 12416+ 12417+#ifndef DRM_FORMAT_NV15 12418+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') 12419+#endif 12420+ 12421+#ifndef DRM_FORMAT_NV20 12422+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') 12423+#endif 12424+ 12425+// P030 should be defined in drm_fourcc.h and hopefully will be sometime 12426+// in the future but until then... 12427+#ifndef DRM_FORMAT_P030 12428+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') 12429+#endif 12430+ 12431+#ifndef DRM_FORMAT_NV15 12432+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') 12433+#endif 12434+ 12435+#ifndef DRM_FORMAT_NV20 12436+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') 12437+#endif 12438+ 12439+#include <linux/videodev2.h> 12440+#ifndef V4L2_CID_CODEC_BASE 12441+#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE 12442+#endif 12443+ 12444+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined 12445+// in drm_fourcc.h hopefully will be sometime in the future but until then... 12446+#ifndef V4L2_PIX_FMT_NV12_10_COL128 12447+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') 12448+#endif 12449+ 12450+#ifndef V4L2_PIX_FMT_NV12_COL128 12451+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ 12452+#endif 12453+ 12454+#ifndef V4L2_CTRL_FLAG_DYNAMIC_ARRAY 12455+#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY 0x0800 12456+#endif 12457+ 12458+#define VCAT(name, version) name##_v##version 12459+#define V2(n,v) VCAT(n, v) 12460+#define V(n) V2(n, HEVC_CTRLS_VERSION) 12461+ 12462+#define S2(x) #x 12463+#define STR(x) S2(x) 12464+ 12465+// 1 per decoder 12466+struct v4l2_req_decode_fns; 12467+ 12468+typedef struct V4L2RequestContextHEVC { 12469+// V4L2RequestContext base; 12470+ const struct v4l2_req_decode_fns * fns; 12471+ 12472+ unsigned int timestamp; // ?? maybe uint64_t 12473+ 12474+ int decode_mode; 12475+ int start_code; 12476+ unsigned int max_slices; // 0 => not wanted (frame mode) 12477+ unsigned int max_offsets; // 0 => not wanted 12478+ 12479+ req_decode_q decode_q; 12480+ 12481+ struct devscan *devscan; 12482+ struct dmabufs_ctl *dbufs; 12483+ struct pollqueue *pq; 12484+ struct media_pool * mpool; 12485+ struct mediabufs_ctl *mbufs; 12486+} V4L2RequestContextHEVC; 12487+ 12488+typedef struct v4l2_req_decode_fns { 12489+ int src_pix_fmt_v4l2; 12490+ const char * name; 12491+ 12492+ // Init setup 12493+ int (*probe)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx); 12494+ int (*set_controls)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx); 12495+ 12496+ // Passthrough of hwaccel fns 12497+ int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size); 12498+ int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size); 12499+ int (*end_frame)(AVCodecContext *avctx); 12500+ void (*abort_frame)(AVCodecContext *avctx); 12501+ int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); 12502+ int (*alloc_frame)(AVCodecContext * avctx, AVFrame *frame); 12503+} v4l2_req_decode_fns; 12504+ 12505+ 12506+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1); 12507+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2); 12508+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3); 12509+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4); 12510+ 12511+#endif 12512--- /dev/null 12513+++ b/libavcodec/weak_link.c 12514@@ -0,0 +1,103 @@ 12515+#include <stdlib.h> 12516+#include <pthread.h> 12517+#include <stdatomic.h> 12518+#include "weak_link.h" 12519+ 12520+struct ff_weak_link_master { 12521+ atomic_int ref_count; /* 0 is single ref for easier atomics */ 12522+ pthread_rwlock_t lock; 12523+ void * ptr; 12524+}; 12525+ 12526+static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c) 12527+{ 12528+ return (struct ff_weak_link_master *)c; 12529+} 12530+ 12531+struct ff_weak_link_master * ff_weak_link_new(void * p) 12532+{ 12533+ struct ff_weak_link_master * w = malloc(sizeof(*w)); 12534+ if (!w) 12535+ return NULL; 12536+ atomic_init(&w->ref_count, 0); 12537+ w->ptr = p; 12538+ if (pthread_rwlock_init(&w->lock, NULL)) { 12539+ free(w); 12540+ return NULL; 12541+ } 12542+ return w; 12543+} 12544+ 12545+static void weak_link_do_unref(struct ff_weak_link_master * const w) 12546+{ 12547+ int n = atomic_fetch_sub(&w->ref_count, 1); 12548+ if (n) 12549+ return; 12550+ 12551+ pthread_rwlock_destroy(&w->lock); 12552+ free(w); 12553+} 12554+ 12555+// Unref & break link 12556+void ff_weak_link_break(struct ff_weak_link_master ** ppLink) 12557+{ 12558+ struct ff_weak_link_master * const w = *ppLink; 12559+ if (!w) 12560+ return; 12561+ 12562+ *ppLink = NULL; 12563+ pthread_rwlock_wrlock(&w->lock); 12564+ w->ptr = NULL; 12565+ pthread_rwlock_unlock(&w->lock); 12566+ 12567+ weak_link_do_unref(w); 12568+} 12569+ 12570+struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w) 12571+{ 12572+ if (!w) 12573+ return NULL; 12574+ atomic_fetch_add(&w->ref_count, 1); 12575+ return (struct ff_weak_link_client*)w; 12576+} 12577+ 12578+void ff_weak_link_unref(struct ff_weak_link_client ** ppLink) 12579+{ 12580+ struct ff_weak_link_master * const w = weak_link_x(*ppLink); 12581+ if (!w) 12582+ return; 12583+ 12584+ *ppLink = NULL; 12585+ weak_link_do_unref(w); 12586+} 12587+ 12588+void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink) 12589+{ 12590+ struct ff_weak_link_master * const w = weak_link_x(*ppLink); 12591+ 12592+ if (!w) 12593+ return NULL; 12594+ 12595+ if (pthread_rwlock_rdlock(&w->lock)) 12596+ goto broken; 12597+ 12598+ if (w->ptr) 12599+ return w->ptr; 12600+ 12601+ pthread_rwlock_unlock(&w->lock); 12602+ 12603+broken: 12604+ *ppLink = NULL; 12605+ weak_link_do_unref(w); 12606+ return NULL; 12607+} 12608+ 12609+// Ignores a NULL c (so can be on the return path of both broken & live links) 12610+void ff_weak_link_unlock(struct ff_weak_link_client * c) 12611+{ 12612+ struct ff_weak_link_master * const w = weak_link_x(c); 12613+ if (w) 12614+ pthread_rwlock_unlock(&w->lock); 12615+} 12616+ 12617+ 12618--- /dev/null 12619+++ b/libavcodec/weak_link.h 12620@@ -0,0 +1,23 @@ 12621+struct ff_weak_link_master; 12622+struct ff_weak_link_client; 12623+ 12624+struct ff_weak_link_master * ff_weak_link_new(void * p); 12625+void ff_weak_link_break(struct ff_weak_link_master ** ppLink); 12626+ 12627+struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w); 12628+void ff_weak_link_unref(struct ff_weak_link_client ** ppLink); 12629+ 12630+// Returns NULL if link broken - in this case it will also zap 12631+// *ppLink and unref the weak_link. 12632+// Returns NULL if *ppLink is NULL (so a link once broken stays broken) 12633+// 12634+// The above does mean that there is a race if this is called simultainiously 12635+// by two threads using the same weak_link_client (so don't do that) 12636+void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink); 12637+void ff_weak_link_unlock(struct ff_weak_link_client * c); 12638+ 12639+ 12640+ 12641+ 12642+ 12643+ 12644--- a/libavdevice/Makefile 12645+++ b/libavdevice/Makefile 12646@@ -48,6 +48,8 @@ OBJS-$(CONFIG_SNDIO_OUTDEV) 12647 OBJS-$(CONFIG_V4L2_INDEV) += v4l2.o v4l2-common.o timefilter.o 12648 OBJS-$(CONFIG_V4L2_OUTDEV) += v4l2enc.o v4l2-common.o 12649 OBJS-$(CONFIG_VFWCAP_INDEV) += vfwcap.o 12650+OBJS-$(CONFIG_VOUT_DRM_OUTDEV) += drm_vout.o 12651+OBJS-$(CONFIG_VOUT_EGL_OUTDEV) += egl_vout.o 12652 OBJS-$(CONFIG_XCBGRAB_INDEV) += xcbgrab.o 12653 OBJS-$(CONFIG_XV_OUTDEV) += xv.o 12654 12655--- a/libavdevice/alldevices.c 12656+++ b/libavdevice/alldevices.c 12657@@ -51,6 +51,8 @@ extern const AVOutputFormat ff_sndio_mux 12658 extern const AVInputFormat ff_v4l2_demuxer; 12659 extern const AVOutputFormat ff_v4l2_muxer; 12660 extern const AVInputFormat ff_vfwcap_demuxer; 12661+extern const AVOutputFormat ff_vout_drm_muxer; 12662+extern const AVOutputFormat ff_vout_egl_muxer; 12663 extern const AVInputFormat ff_xcbgrab_demuxer; 12664 extern const AVOutputFormat ff_xv_muxer; 12665 12666--- /dev/null 12667+++ b/libavdevice/drm_vout.c 12668@@ -0,0 +1,680 @@ 12669+/* 12670+ * Copyright (c) 2020 John Cox for Raspberry Pi Trading 12671+ * 12672+ * This file is part of FFmpeg. 12673+ * 12674+ * FFmpeg is free software; you can redistribute it and/or 12675+ * modify it under the terms of the GNU Lesser General Public 12676+ * License as published by the Free Software Foundation; either 12677+ * version 2.1 of the License, or (at your option) any later version. 12678+ * 12679+ * FFmpeg is distributed in the hope that it will be useful, 12680+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 12681+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12682+ * Lesser General Public License for more details. 12683+ * 12684+ * You should have received a copy of the GNU Lesser General Public 12685+ * License along with FFmpeg; if not, write to the Free Software 12686+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 12687+ */ 12688+ 12689+ 12690+// *** This module is a work in progress and its utility is strictly 12691+// limited to testing. 12692+ 12693+#include "libavutil/opt.h" 12694+#include "libavutil/pixdesc.h" 12695+#include "libavutil/hwcontext_drm.h" 12696+#include "libavformat/internal.h" 12697+#include "avdevice.h" 12698+ 12699+#include "pthread.h" 12700+#include <semaphore.h> 12701+#include <unistd.h> 12702+ 12703+#include <xf86drm.h> 12704+#include <xf86drmMode.h> 12705+#include <drm_fourcc.h> 12706+ 12707+#define TRACE_ALL 0 12708+ 12709+#define DRM_MODULE "vc4" 12710+ 12711+#define ERRSTR strerror(errno) 12712+ 12713+struct drm_setup { 12714+ int conId; 12715+ uint32_t crtcId; 12716+ int crtcIdx; 12717+ uint32_t planeId; 12718+ unsigned int out_fourcc; 12719+ struct { 12720+ int x, y, width, height; 12721+ } compose; 12722+}; 12723+ 12724+typedef struct drm_aux_s { 12725+ unsigned int fb_handle; 12726+ uint32_t bo_handles[AV_DRM_MAX_PLANES]; 12727+ AVFrame * frame; 12728+} drm_aux_t; 12729+ 12730+// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS 12731+// we get initial flicker probably due to dodgy drm timing 12732+#define AUX_SIZE 3 12733+typedef struct drm_display_env_s 12734+{ 12735+ AVClass *class; 12736+ 12737+ int drm_fd; 12738+ uint32_t con_id; 12739+ struct drm_setup setup; 12740+ enum AVPixelFormat avfmt; 12741+ 12742+ int show_all; 12743+ const char * drm_module; 12744+ 12745+ unsigned int ano; 12746+ drm_aux_t aux[AUX_SIZE]; 12747+ 12748+ pthread_t q_thread; 12749+ sem_t q_sem_in; 12750+ sem_t q_sem_out; 12751+ int q_terminate; 12752+ AVFrame * q_next; 12753+ 12754+} drm_display_env_t; 12755+ 12756+ 12757+static int drm_vout_write_trailer(AVFormatContext *s) 12758+{ 12759+#if TRACE_ALL 12760+ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); 12761+#endif 12762+ 12763+ return 0; 12764+} 12765+ 12766+static int drm_vout_write_header(AVFormatContext *s) 12767+{ 12768+ const AVCodecParameters * const par = s->streams[0]->codecpar; 12769+ 12770+#if TRACE_ALL 12771+ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); 12772+#endif 12773+ if ( s->nb_streams > 1 12774+ || par->codec_type != AVMEDIA_TYPE_VIDEO 12775+ || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) { 12776+ av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n"); 12777+ return AVERROR(EINVAL); 12778+ } 12779+ 12780+ return 0; 12781+} 12782+ 12783+static int find_plane(struct AVFormatContext * const avctx, 12784+ const int drmfd, const int crtcidx, const uint32_t format, 12785+ uint32_t * const pplane_id) 12786+{ 12787+ drmModePlaneResPtr planes; 12788+ drmModePlanePtr plane; 12789+ drmModeObjectPropertiesPtr props = NULL; 12790+ drmModePropertyPtr prop = NULL; 12791+ unsigned int i; 12792+ unsigned int j; 12793+ int ret = -1; 12794+ 12795+ planes = drmModeGetPlaneResources(drmfd); 12796+ if (!planes) 12797+ { 12798+ av_log(avctx, AV_LOG_WARNING, "drmModeGetPlaneResources failed: %s\n", ERRSTR); 12799+ return -1; 12800+ } 12801+ 12802+ for (i = 0; i < planes->count_planes; ++i) { 12803+ plane = drmModeGetPlane(drmfd, planes->planes[i]); 12804+ if (!planes) 12805+ { 12806+ av_log(avctx, AV_LOG_WARNING, "drmModeGetPlane failed: %s\n", ERRSTR); 12807+ break; 12808+ } 12809+ 12810+ if (!(plane->possible_crtcs & (1 << crtcidx))) { 12811+ drmModeFreePlane(plane); 12812+ continue; 12813+ } 12814+ 12815+ for (j = 0; j < plane->count_formats; ++j) { 12816+ if (plane->formats[j] == format) 12817+ break; 12818+ } 12819+ 12820+ if (j == plane->count_formats) { 12821+ drmModeFreePlane(plane); 12822+ continue; 12823+ } 12824+ 12825+ *pplane_id = plane->plane_id; 12826+ drmModeFreePlane(plane); 12827+ break; 12828+ } 12829+ 12830+ if (i == planes->count_planes) { 12831+ ret = -1; 12832+ goto fail; 12833+ } 12834+ 12835+ props = drmModeObjectGetProperties(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE); 12836+ if (!props) 12837+ goto fail; 12838+ for (i = 0; i != props->count_props; ++i) { 12839+ if (prop) 12840+ drmModeFreeProperty(prop); 12841+ prop = drmModeGetProperty(drmfd, props->props[i]); 12842+ if (!prop) 12843+ goto fail; 12844+ if (strcmp("zpos", prop->name) == 0) { 12845+ if (drmModeObjectSetProperty(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE, props->props[i], prop->values[1]) == 0) 12846+ av_log(avctx, AV_LOG_DEBUG, "ZPOS set to %d\n", (int)prop->values[1]); 12847+ else 12848+ av_log(avctx, AV_LOG_WARNING, "Failed to set ZPOS on DRM plane\n"); 12849+ break; 12850+ } 12851+ } 12852+ 12853+ ret = 0; 12854+fail: 12855+ if (props) 12856+ drmModeFreeObjectProperties(props); 12857+ if (prop) 12858+ drmModeFreeProperty(prop); 12859+ drmModeFreePlaneResources(planes); 12860+ return ret; 12861+} 12862+ 12863+static void da_uninit(drm_display_env_t * const de, drm_aux_t * da) 12864+{ 12865+ if (da->fb_handle != 0) { 12866+ drmModeRmFB(de->drm_fd, da->fb_handle); 12867+ da->fb_handle = 0; 12868+ } 12869+ 12870+ for (unsigned int i = 0; i != AV_DRM_MAX_PLANES; ++i) { 12871+ if (da->bo_handles[i]) { 12872+ struct drm_gem_close gem_close = {.handle = da->bo_handles[i]}; 12873+ drmIoctl(de->drm_fd, DRM_IOCTL_GEM_CLOSE, &gem_close); 12874+ da->bo_handles[i] = 0; 12875+ } 12876+ } 12877+ av_frame_free(&da->frame); 12878+} 12879+ 12880+static int do_display(AVFormatContext * const s, drm_display_env_t * const de, AVFrame * frame) 12881+{ 12882+ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0]; 12883+ drm_aux_t * da = de->aux + de->ano; 12884+ const uint32_t format = desc->layers[0].format; 12885+ int ret = 0; 12886+ 12887+#if TRACE_ALL 12888+ av_log(s, AV_LOG_DEBUG, "<<< %s: fd=%d\n", __func__, desc->objects[0].fd); 12889+#endif 12890+ 12891+ if (de->setup.out_fourcc != format) { 12892+ if (find_plane(s, de->drm_fd, de->setup.crtcIdx, format, &de->setup.planeId)) { 12893+ av_frame_free(&frame); 12894+ av_log(s, AV_LOG_WARNING, "No plane for format: %#x\n", format); 12895+ return -1; 12896+ } 12897+ de->setup.out_fourcc = format; 12898+ } 12899+ 12900+ { 12901+ drmVBlank vbl = { 12902+ .request = { 12903+ .type = DRM_VBLANK_RELATIVE, 12904+ .sequence = 0 12905+ } 12906+ }; 12907+ 12908+ while (drmWaitVBlank(de->drm_fd, &vbl)) { 12909+ if (errno != EINTR) { 12910+// av_log(s, AV_LOG_WARNING, "drmWaitVBlank failed: %s\n", ERRSTR); 12911+ break; 12912+ } 12913+ } 12914+ } 12915+ 12916+ da_uninit(de, da); 12917+ 12918+ { 12919+ uint32_t pitches[4] = {0}; 12920+ uint32_t offsets[4] = {0}; 12921+ uint64_t modifiers[4] = {0}; 12922+ uint32_t bo_handles[4] = {0}; 12923+ int has_mods = 0; 12924+ int i, j, n; 12925+ 12926+ da->frame = frame; 12927+ 12928+ for (i = 0; i < desc->nb_objects; ++i) { 12929+ if (drmPrimeFDToHandle(de->drm_fd, desc->objects[i].fd, da->bo_handles + i) != 0) { 12930+ av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR); 12931+ return -1; 12932+ } 12933+ if (desc->objects[i].format_modifier != DRM_FORMAT_MOD_LINEAR && 12934+ desc->objects[i].format_modifier != DRM_FORMAT_MOD_INVALID) 12935+ has_mods = 1; 12936+ } 12937+ 12938+ n = 0; 12939+ for (i = 0; i < desc->nb_layers; ++i) { 12940+ for (j = 0; j < desc->layers[i].nb_planes; ++j) { 12941+ const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j; 12942+ const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index; 12943+ pitches[n] = p->pitch; 12944+ offsets[n] = p->offset; 12945+ modifiers[n] = obj->format_modifier; 12946+ bo_handles[n] = da->bo_handles[p->object_index]; 12947+ ++n; 12948+ } 12949+ } 12950+ 12951+#if 1 && TRACE_ALL 12952+ av_log(s, AV_LOG_DEBUG, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d," 12953+ " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n", 12954+ av_frame_cropped_width(frame), 12955+ av_frame_cropped_height(frame), 12956+ desc->layers[0].format, 12957+ bo_handles[0], 12958+ bo_handles[1], 12959+ bo_handles[2], 12960+ bo_handles[3], 12961+ pitches[0], 12962+ pitches[1], 12963+ pitches[2], 12964+ pitches[3], 12965+ offsets[0], 12966+ offsets[1], 12967+ offsets[2], 12968+ offsets[3], 12969+ (long long)modifiers[0], 12970+ (long long)modifiers[1], 12971+ (long long)modifiers[2], 12972+ (long long)modifiers[3] 12973+ ); 12974+#endif 12975+ 12976+ if (drmModeAddFB2WithModifiers(de->drm_fd, 12977+ av_frame_cropped_width(frame), 12978+ av_frame_cropped_height(frame), 12979+ desc->layers[0].format, bo_handles, 12980+ pitches, offsets, 12981+ has_mods ? modifiers : NULL, 12982+ &da->fb_handle, 12983+ has_mods ? DRM_MODE_FB_MODIFIERS : 0) != 0) { 12984+ av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR); 12985+ return -1; 12986+ } 12987+ } 12988+ 12989+ ret = drmModeSetPlane(de->drm_fd, de->setup.planeId, de->setup.crtcId, 12990+ da->fb_handle, 0, 12991+ de->setup.compose.x, de->setup.compose.y, 12992+ de->setup.compose.width, 12993+ de->setup.compose.height, 12994+ 0, 0, 12995+ av_frame_cropped_width(frame) << 16, 12996+ av_frame_cropped_height(frame) << 16); 12997+ 12998+ if (ret != 0) { 12999+ av_log(s, AV_LOG_WARNING, "drmModeSetPlane failed: %s\n", ERRSTR); 13000+ } 13001+ 13002+ de->ano = de->ano + 1 >= AUX_SIZE ? 0 : de->ano + 1; 13003+ 13004+ return ret; 13005+} 13006+ 13007+static int do_sem_wait(sem_t * const sem, const int nowait) 13008+{ 13009+ while (nowait ? sem_trywait(sem) : sem_wait(sem)) { 13010+ if (errno != EINTR) 13011+ return -errno; 13012+ } 13013+ return 0; 13014+} 13015+ 13016+static void * display_thread(void * v) 13017+{ 13018+ AVFormatContext * const s = v; 13019+ drm_display_env_t * const de = s->priv_data; 13020+ int i; 13021+ 13022+#if TRACE_ALL 13023+ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); 13024+#endif 13025+ 13026+ sem_post(&de->q_sem_out); 13027+ 13028+ for (;;) { 13029+ AVFrame * frame; 13030+ 13031+ do_sem_wait(&de->q_sem_in, 0); 13032+ 13033+ if (de->q_terminate) 13034+ break; 13035+ 13036+ frame = de->q_next; 13037+ de->q_next = NULL; 13038+ sem_post(&de->q_sem_out); 13039+ 13040+ do_display(s, de, frame); 13041+ } 13042+ 13043+#if TRACE_ALL 13044+ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); 13045+#endif 13046+ 13047+ for (i = 0; i != AUX_SIZE; ++i) 13048+ da_uninit(de, de->aux + i); 13049+ 13050+ av_frame_free(&de->q_next); 13051+ 13052+ return NULL; 13053+} 13054+ 13055+static int drm_vout_write_packet(AVFormatContext *s, AVPacket *pkt) 13056+{ 13057+ const AVFrame * const src_frame = (AVFrame *)pkt->data; 13058+ AVFrame * frame; 13059+ drm_display_env_t * const de = s->priv_data; 13060+ int ret; 13061+ 13062+#if TRACE_ALL 13063+ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); 13064+#endif 13065+ 13066+ if ((src_frame->flags & AV_FRAME_FLAG_CORRUPT) != 0) { 13067+ av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", src_frame->format, src_frame->pts); 13068+ return 0; 13069+ } 13070+ 13071+ if (src_frame->format == AV_PIX_FMT_DRM_PRIME) { 13072+ frame = av_frame_alloc(); 13073+ av_frame_ref(frame, src_frame); 13074+ } 13075+ else if (src_frame->format == AV_PIX_FMT_VAAPI) { 13076+ frame = av_frame_alloc(); 13077+ frame->format = AV_PIX_FMT_DRM_PRIME; 13078+ if (av_hwframe_map(frame, src_frame, 0) != 0) 13079+ { 13080+ av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format); 13081+ av_frame_free(&frame); 13082+ return AVERROR(EINVAL); 13083+ } 13084+ } 13085+ else { 13086+ av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format); 13087+ return AVERROR(EINVAL); 13088+ } 13089+ 13090+ ret = do_sem_wait(&de->q_sem_out, !de->show_all); 13091+ if (ret) { 13092+ av_frame_free(&frame); 13093+ } 13094+ else { 13095+ de->q_next = frame; 13096+ sem_post(&de->q_sem_in); 13097+ } 13098+ 13099+ return 0; 13100+} 13101+ 13102+static int drm_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe, 13103+ unsigned flags) 13104+{ 13105+ av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags); 13106+ return AVERROR_PATCHWELCOME; 13107+} 13108+ 13109+static int drm_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size) 13110+{ 13111+#if TRACE_ALL 13112+ av_log(s, AV_LOG_DEBUG, "%s: %d\n", __func__, type); 13113+#endif 13114+ switch(type) { 13115+ case AV_APP_TO_DEV_WINDOW_REPAINT: 13116+ return 0; 13117+ default: 13118+ break; 13119+ } 13120+ return AVERROR(ENOSYS); 13121+} 13122+ 13123+static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm_setup *s, uint32_t * const pConId) 13124+{ 13125+ int ret = -1; 13126+ int i; 13127+ drmModeRes *res = drmModeGetResources(drmfd); 13128+ drmModeConnector *c; 13129+ 13130+ if(!res) 13131+ { 13132+ printf( "drmModeGetResources failed: %s\n", ERRSTR); 13133+ return -1; 13134+ } 13135+ 13136+ if (res->count_crtcs <= 0) 13137+ { 13138+ printf( "drm: no crts\n"); 13139+ goto fail_res; 13140+ } 13141+ 13142+ if (!s->conId) { 13143+ fprintf(stderr, 13144+ "No connector ID specified. Choosing default from list:\n"); 13145+ 13146+ for (i = 0; i < res->count_connectors; i++) { 13147+ drmModeConnector *con = 13148+ drmModeGetConnector(drmfd, res->connectors[i]); 13149+ drmModeEncoder *enc = NULL; 13150+ drmModeCrtc *crtc = NULL; 13151+ 13152+ if (con->encoder_id) { 13153+ enc = drmModeGetEncoder(drmfd, con->encoder_id); 13154+ if (enc->crtc_id) { 13155+ crtc = drmModeGetCrtc(drmfd, enc->crtc_id); 13156+ } 13157+ } 13158+ 13159+ if (!s->conId && crtc) { 13160+ s->conId = con->connector_id; 13161+ s->crtcId = crtc->crtc_id; 13162+ } 13163+ 13164+ av_log(avctx, AV_LOG_DEBUG, "Connector %d (crtc %d): type %d, %dx%d%s\n", 13165+ con->connector_id, 13166+ crtc ? crtc->crtc_id : 0, 13167+ con->connector_type, 13168+ crtc ? crtc->width : 0, 13169+ crtc ? crtc->height : 0, 13170+ (s->conId == (int)con->connector_id ? 13171+ " (chosen)" : "")); 13172+ 13173+ if (crtc) 13174+ drmModeFreeCrtc(crtc); 13175+ if (enc) 13176+ drmModeFreeEncoder(enc); 13177+ if (con) 13178+ drmModeFreeConnector(con); 13179+ } 13180+ 13181+ if (!s->conId) { 13182+ av_log(avctx, AV_LOG_ERROR, 13183+ "No suitable enabled connector found.\n"); 13184+ return -1;; 13185+ } 13186+ } 13187+ 13188+ s->crtcIdx = -1; 13189+ 13190+ for (i = 0; i < res->count_crtcs; ++i) { 13191+ if (s->crtcId == res->crtcs[i]) { 13192+ s->crtcIdx = i; 13193+ break; 13194+ } 13195+ } 13196+ 13197+ if (s->crtcIdx == -1) 13198+ { 13199+ av_log(avctx, AV_LOG_WARNING, "drm: CRTC %u not found\n", s->crtcId); 13200+ goto fail_res; 13201+ } 13202+ 13203+ if (res->count_connectors <= 0) 13204+ { 13205+ av_log(avctx, AV_LOG_WARNING, "drm: no connectors\n"); 13206+ goto fail_res; 13207+ } 13208+ 13209+ c = drmModeGetConnector(drmfd, s->conId); 13210+ if (!c) 13211+ { 13212+ av_log(avctx, AV_LOG_WARNING, "drmModeGetConnector failed: %s\n", ERRSTR); 13213+ goto fail_res; 13214+ } 13215+ 13216+ if (!c->count_modes) 13217+ { 13218+ av_log(avctx, AV_LOG_WARNING, "connector supports no mode\n"); 13219+ goto fail_conn; 13220+ } 13221+ 13222+ { 13223+ drmModeCrtc *crtc = drmModeGetCrtc(drmfd, s->crtcId); 13224+ s->compose.x = crtc->x; 13225+ s->compose.y = crtc->y; 13226+ s->compose.width = crtc->width; 13227+ s->compose.height = crtc->height; 13228+ drmModeFreeCrtc(crtc); 13229+ } 13230+ 13231+ if (pConId) 13232+ *pConId = c->connector_id; 13233+ ret = 0; 13234+ 13235+fail_conn: 13236+ drmModeFreeConnector(c); 13237+ 13238+fail_res: 13239+ drmModeFreeResources(res); 13240+ 13241+ return ret; 13242+} 13243+ 13244+// deinit is called if init fails so no need to clean up explicity here 13245+static int drm_vout_init(struct AVFormatContext * s) 13246+{ 13247+ drm_display_env_t * const de = s->priv_data; 13248+ int rv; 13249+ 13250+ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); 13251+ 13252+ de->drm_fd = -1; 13253+ de->con_id = 0; 13254+ de->setup = (struct drm_setup){0}; 13255+ de->q_terminate = 0; 13256+ 13257+ if ((de->drm_fd = drmOpen(de->drm_module, NULL)) < 0) 13258+ { 13259+ rv = AVERROR(errno); 13260+ av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", de->drm_module, av_err2str(rv)); 13261+ return rv; 13262+ } 13263+ 13264+ if (find_crtc(s, de->drm_fd, &de->setup, &de->con_id) != 0) 13265+ { 13266+ av_log(s, AV_LOG_ERROR, "failed to find valid mode\n"); 13267+ rv = AVERROR(EINVAL); 13268+ goto fail_close; 13269+ } 13270+ 13271+ sem_init(&de->q_sem_in, 0, 0); 13272+ sem_init(&de->q_sem_out, 0, 0); 13273+ if (pthread_create(&de->q_thread, NULL, display_thread, s)) { 13274+ rv = AVERROR(errno); 13275+ av_log(s, AV_LOG_ERROR, "Failed to create display thread: %s\n", av_err2str(rv)); 13276+ goto fail_close; 13277+ } 13278+ 13279+ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); 13280+ 13281+ return 0; 13282+ 13283+fail_close: 13284+ close(de->drm_fd); 13285+ de->drm_fd = -1; 13286+ av_log(s, AV_LOG_DEBUG, ">>> %s: FAIL\n", __func__); 13287+ 13288+ return rv; 13289+} 13290+ 13291+static void drm_vout_deinit(struct AVFormatContext * s) 13292+{ 13293+ drm_display_env_t * const de = s->priv_data; 13294+ 13295+ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); 13296+ 13297+ de->q_terminate = 1; 13298+ sem_post(&de->q_sem_in); 13299+ pthread_join(de->q_thread, NULL); 13300+ sem_destroy(&de->q_sem_in); 13301+ sem_destroy(&de->q_sem_out); 13302+ 13303+ for (unsigned int i = 0; i != AUX_SIZE; ++i) 13304+ da_uninit(de, de->aux + i); 13305+ 13306+ av_frame_free(&de->q_next); 13307+ 13308+ if (de->drm_fd >= 0) { 13309+ close(de->drm_fd); 13310+ de->drm_fd = -1; 13311+ } 13312+ 13313+ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); 13314+} 13315+ 13316+ 13317+#define OFFSET(x) offsetof(drm_display_env_t, x) 13318+static const AVOption options[] = { 13319+ { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, 13320+ { "drm_module", "drm_module name to use, default=" DRM_MODULE, OFFSET(drm_module), AV_OPT_TYPE_STRING, { .str = DRM_MODULE }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, 13321+ { NULL } 13322+}; 13323+ 13324+static const AVClass drm_vout_class = { 13325+ .class_name = "drm vid outdev", 13326+ .item_name = av_default_item_name, 13327+ .option = options, 13328+ .version = LIBAVUTIL_VERSION_INT, 13329+ .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT, 13330+}; 13331+ 13332+AVOutputFormat ff_vout_drm_muxer = { 13333+ .name = "vout_drm", 13334+ .long_name = NULL_IF_CONFIG_SMALL("Drm video output device"), 13335+ .priv_data_size = sizeof(drm_display_env_t), 13336+ .audio_codec = AV_CODEC_ID_NONE, 13337+ .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME, 13338+ .write_header = drm_vout_write_header, 13339+ .write_packet = drm_vout_write_packet, 13340+ .write_uncoded_frame = drm_vout_write_frame, 13341+ .write_trailer = drm_vout_write_trailer, 13342+ .control_message = drm_vout_control_message, 13343+ .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS, 13344+ .priv_class = &drm_vout_class, 13345+ .init = drm_vout_init, 13346+ .deinit = drm_vout_deinit, 13347+}; 13348+ 13349--- /dev/null 13350+++ b/libavdevice/egl_vout.c 13351@@ -0,0 +1,781 @@ 13352+/* 13353+ * Copyright (c) 2020 John Cox for Raspberry Pi Trading 13354+ * 13355+ * This file is part of FFmpeg. 13356+ * 13357+ * FFmpeg is free software; you can redistribute it and/or 13358+ * modify it under the terms of the GNU Lesser General Public 13359+ * License as published by the Free Software Foundation; either 13360+ * version 2.1 of the License, or (at your option) any later version. 13361+ * 13362+ * FFmpeg is distributed in the hope that it will be useful, 13363+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 13364+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13365+ * Lesser General Public License for more details. 13366+ * 13367+ * You should have received a copy of the GNU Lesser General Public 13368+ * License along with FFmpeg; if not, write to the Free Software 13369+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 13370+ */ 13371+ 13372+ 13373+// *** This module is a work in progress and its utility is strictly 13374+// limited to testing. 13375+// Amongst other issues it doesn't wait for the pic to be displayed before 13376+// returning the buffer so flikering does occur. 13377+ 13378+#include <epoxy/gl.h> 13379+#include <epoxy/egl.h> 13380+ 13381+#include "libavutil/opt.h" 13382+#include "libavutil/avassert.h" 13383+#include "libavutil/pixdesc.h" 13384+#include "libavutil/imgutils.h" 13385+#include "libavutil/hwcontext_drm.h" 13386+#include "libavformat/internal.h" 13387+#include "avdevice.h" 13388+ 13389+#include "pthread.h" 13390+#include <semaphore.h> 13391+#include <stdatomic.h> 13392+#include <unistd.h> 13393+ 13394+#include <X11/Xlib.h> 13395+#include <X11/Xutil.h> 13396+ 13397+#include "libavutil/rpi_sand_fns.h" 13398+ 13399+#define TRACE_ALL 0 13400+ 13401+struct egl_setup { 13402+ int conId; 13403+ 13404+ Display *dpy; 13405+ EGLDisplay egl_dpy; 13406+ EGLContext ctx; 13407+ EGLSurface surf; 13408+ Window win; 13409+ 13410+ uint32_t crtcId; 13411+ int crtcIdx; 13412+ uint32_t planeId; 13413+ struct { 13414+ int x, y, width, height; 13415+ } compose; 13416+}; 13417+ 13418+typedef struct egl_aux_s { 13419+ int fd; 13420+ GLuint texture; 13421+ 13422+} egl_aux_t; 13423+ 13424+typedef struct egl_display_env_s { 13425+ AVClass *class; 13426+ 13427+ struct egl_setup setup; 13428+ enum AVPixelFormat avfmt; 13429+ 13430+ int show_all; 13431+ int window_width, window_height; 13432+ int window_x, window_y; 13433+ int fullscreen; 13434+ 13435+ egl_aux_t aux[32]; 13436+ 13437+ pthread_t q_thread; 13438+ pthread_mutex_t q_lock; 13439+ sem_t display_start_sem; 13440+ sem_t q_sem; 13441+ int q_terminate; 13442+ AVFrame *q_this; 13443+ AVFrame *q_next; 13444+ 13445+} egl_display_env_t; 13446+ 13447+ 13448+/** 13449+ * Remove window border/decorations. 13450+ */ 13451+static void 13452+no_border(Display *dpy, Window w) 13453+{ 13454+ static const unsigned MWM_HINTS_DECORATIONS = (1 << 1); 13455+ static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5; 13456+ 13457+ typedef struct { 13458+ unsigned long flags; 13459+ unsigned long functions; 13460+ unsigned long decorations; 13461+ long inputMode; 13462+ unsigned long status; 13463+ } PropMotifWmHints; 13464+ 13465+ PropMotifWmHints motif_hints; 13466+ Atom prop, proptype; 13467+ unsigned long flags = 0; 13468+ 13469+ /* setup the property */ 13470+ motif_hints.flags = MWM_HINTS_DECORATIONS; 13471+ motif_hints.decorations = flags; 13472+ 13473+ /* get the atom for the property */ 13474+ prop = XInternAtom(dpy, "_MOTIF_WM_HINTS", True); 13475+ if (!prop) { 13476+ /* something went wrong! */ 13477+ return; 13478+ } 13479+ 13480+ /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */ 13481+ proptype = prop; 13482+ 13483+ XChangeProperty(dpy, w, /* display, window */ 13484+ prop, proptype, /* property, type */ 13485+ 32, /* format: 32-bit datums */ 13486+ PropModeReplace, /* mode */ 13487+ (unsigned char *)&motif_hints, /* data */ 13488+ PROP_MOTIF_WM_HINTS_ELEMENTS /* nelements */ 13489+ ); 13490+} 13491+ 13492+ 13493+/* 13494+ * Create an RGB, double-buffered window. 13495+ * Return the window and context handles. 13496+ */ 13497+static int 13498+make_window(struct AVFormatContext *const s, 13499+ egl_display_env_t *const de, 13500+ Display *dpy, EGLDisplay egl_dpy, const char *name, 13501+ Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet) 13502+{ 13503+ int scrnum = DefaultScreen(dpy); 13504+ XSetWindowAttributes attr; 13505+ unsigned long mask; 13506+ Window root = RootWindow(dpy, scrnum); 13507+ Window win; 13508+ EGLContext ctx; 13509+ const int fullscreen = de->fullscreen; 13510+ EGLConfig config; 13511+ int x = de->window_x; 13512+ int y = de->window_y; 13513+ int width = de->window_width ? de->window_width : 1280; 13514+ int height = de->window_height ? de->window_height : 720; 13515+ 13516+ 13517+ if (fullscreen) { 13518+ int scrnum = DefaultScreen(dpy); 13519+ 13520+ x = 0; y = 0; 13521+ width = DisplayWidth(dpy, scrnum); 13522+ height = DisplayHeight(dpy, scrnum); 13523+ } 13524+ 13525+ { 13526+ EGLint num_configs; 13527+ static const EGLint attribs[] = { 13528+ EGL_RED_SIZE, 1, 13529+ EGL_GREEN_SIZE, 1, 13530+ EGL_BLUE_SIZE, 1, 13531+ EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, 13532+ EGL_NONE 13533+ }; 13534+ 13535+ if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) { 13536+ av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n"); 13537+ return -1; 13538+ } 13539+ } 13540+ 13541+ { 13542+ EGLint vid; 13543+ if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) { 13544+ av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n"); 13545+ return -1; 13546+ } 13547+ 13548+ { 13549+ XVisualInfo visTemplate = { 13550+ .visualid = vid, 13551+ }; 13552+ int num_visuals; 13553+ XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask, 13554+ &visTemplate, &num_visuals); 13555+ 13556+ /* window attributes */ 13557+ attr.background_pixel = 0; 13558+ attr.border_pixel = 0; 13559+ attr.colormap = XCreateColormap(dpy, root, visinfo->visual, AllocNone); 13560+ attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask; 13561+ /* XXX this is a bad way to get a borderless window! */ 13562+ mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask; 13563+ 13564+ win = XCreateWindow(dpy, root, x, y, width, height, 13565+ 0, visinfo->depth, InputOutput, 13566+ visinfo->visual, mask, &attr); 13567+ XFree(visinfo); 13568+ } 13569+ } 13570+ 13571+ if (fullscreen) 13572+ no_border(dpy, win); 13573+ 13574+ /* set hints and properties */ 13575+ { 13576+ XSizeHints sizehints; 13577+ sizehints.x = x; 13578+ sizehints.y = y; 13579+ sizehints.width = width; 13580+ sizehints.height = height; 13581+ sizehints.flags = USSize | USPosition; 13582+ XSetNormalHints(dpy, win, &sizehints); 13583+ XSetStandardProperties(dpy, win, name, name, 13584+ None, (char **)NULL, 0, &sizehints); 13585+ } 13586+ 13587+ eglBindAPI(EGL_OPENGL_ES_API); 13588+ 13589+ { 13590+ static const EGLint ctx_attribs[] = { 13591+ EGL_CONTEXT_CLIENT_VERSION, 2, 13592+ EGL_NONE 13593+ }; 13594+ ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs); 13595+ if (!ctx) { 13596+ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); 13597+ return -1; 13598+ } 13599+ } 13600+ 13601+ 13602+ XMapWindow(dpy, win); 13603+ 13604+ { 13605+ EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL); 13606+ if (!surf) { 13607+ av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n"); 13608+ return -1; 13609+ } 13610+ 13611+ if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) { 13612+ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); 13613+ return -1; 13614+ } 13615+ 13616+ *winRet = win; 13617+ *ctxRet = ctx; 13618+ *surfRet = surf; 13619+ } 13620+ 13621+ return 0; 13622+} 13623+ 13624+static GLint 13625+compile_shader(struct AVFormatContext *const avctx, GLenum target, const char *source) 13626+{ 13627+ GLuint s = glCreateShader(target); 13628+ 13629+ if (s == 0) { 13630+ av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n"); 13631+ return 0; 13632+ } 13633+ 13634+ glShaderSource(s, 1, (const GLchar **)&source, NULL); 13635+ glCompileShader(s); 13636+ 13637+ { 13638+ GLint ok; 13639+ glGetShaderiv(s, GL_COMPILE_STATUS, &ok); 13640+ 13641+ if (!ok) { 13642+ GLchar *info; 13643+ GLint size; 13644+ 13645+ glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size); 13646+ info = malloc(size); 13647+ 13648+ glGetShaderInfoLog(s, size, NULL, info); 13649+ av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source); 13650+ 13651+ return 0; 13652+ } 13653+ } 13654+ 13655+ return s; 13656+} 13657+ 13658+static GLuint link_program(struct AVFormatContext *const s, GLint vs, GLint fs) 13659+{ 13660+ GLuint prog = glCreateProgram(); 13661+ 13662+ if (prog == 0) { 13663+ av_log(s, AV_LOG_ERROR, "Failed to create program\n"); 13664+ return 0; 13665+ } 13666+ 13667+ glAttachShader(prog, vs); 13668+ glAttachShader(prog, fs); 13669+ glLinkProgram(prog); 13670+ 13671+ { 13672+ GLint ok; 13673+ glGetProgramiv(prog, GL_LINK_STATUS, &ok); 13674+ if (!ok) { 13675+ /* Some drivers return a size of 1 for an empty log. This is the size 13676+ * of a log that contains only a terminating NUL character. 13677+ */ 13678+ GLint size; 13679+ GLchar *info = NULL; 13680+ glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size); 13681+ if (size > 1) { 13682+ info = malloc(size); 13683+ glGetProgramInfoLog(prog, size, NULL, info); 13684+ } 13685+ 13686+ av_log(s, AV_LOG_ERROR, "Failed to link: %s\n", 13687+ (info != NULL) ? info : "<empty log>"); 13688+ return 0; 13689+ } 13690+ } 13691+ 13692+ return prog; 13693+} 13694+ 13695+static int 13696+gl_setup(struct AVFormatContext *const s) 13697+{ 13698+ const char *vs = 13699+ "attribute vec4 pos;\n" 13700+ "varying vec2 texcoord;\n" 13701+ "\n" 13702+ "void main() {\n" 13703+ " gl_Position = pos;\n" 13704+ " texcoord.x = (pos.x + 1.0) / 2.0;\n" 13705+ " texcoord.y = (-pos.y + 1.0) / 2.0;\n" 13706+ "}\n"; 13707+ const char *fs = 13708+ "#extension GL_OES_EGL_image_external : enable\n" 13709+ "precision mediump float;\n" 13710+ "uniform samplerExternalOES s;\n" 13711+ "varying vec2 texcoord;\n" 13712+ "void main() {\n" 13713+ " gl_FragColor = texture2D(s, texcoord);\n" 13714+ "}\n"; 13715+ 13716+ GLuint vs_s; 13717+ GLuint fs_s; 13718+ GLuint prog; 13719+ 13720+ if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) || 13721+ !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) || 13722+ !(prog = link_program(s, vs_s, fs_s))) 13723+ return -1; 13724+ 13725+ glUseProgram(prog); 13726+ 13727+ { 13728+ static const float verts[] = { 13729+ -1, -1, 13730+ 1, -1, 13731+ 1, 1, 13732+ -1, 1, 13733+ }; 13734+ glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts); 13735+ } 13736+ 13737+ glEnableVertexAttribArray(0); 13738+ return 0; 13739+} 13740+ 13741+static int egl_vout_write_trailer(AVFormatContext *s) 13742+{ 13743+#if TRACE_ALL 13744+ av_log(s, AV_LOG_INFO, "%s\n", __func__); 13745+#endif 13746+ 13747+ return 0; 13748+} 13749+ 13750+static int egl_vout_write_header(AVFormatContext *s) 13751+{ 13752+ const AVCodecParameters *const par = s->streams[0]->codecpar; 13753+ 13754+#if TRACE_ALL 13755+ av_log(s, AV_LOG_INFO, "%s\n", __func__); 13756+#endif 13757+ if (s->nb_streams > 1 13758+ || par->codec_type != AVMEDIA_TYPE_VIDEO 13759+ || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) { 13760+ av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n"); 13761+ return AVERROR(EINVAL); 13762+ } 13763+ 13764+ return 0; 13765+} 13766+ 13767+ 13768+static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVFrame *const frame) 13769+{ 13770+ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)frame->data[0]; 13771+ egl_aux_t *da = NULL; 13772+ unsigned int i; 13773+ 13774+#if TRACE_ALL 13775+ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); 13776+#endif 13777+ 13778+ for (i = 0; i != 32; ++i) { 13779+ if (de->aux[i].fd == -1 || de->aux[i].fd == desc->objects[0].fd) { 13780+ da = de->aux + i; 13781+ break; 13782+ } 13783+ } 13784+ 13785+ if (da == NULL) { 13786+ av_log(s, AV_LOG_INFO, "%s: Out of handles\n", __func__); 13787+ return AVERROR(EINVAL); 13788+ } 13789+ 13790+ if (da->texture == 0) { 13791+ EGLint attribs[50]; 13792+ EGLint *a = attribs; 13793+ int i, j; 13794+ static const EGLint anames[] = { 13795+ EGL_DMA_BUF_PLANE0_FD_EXT, 13796+ EGL_DMA_BUF_PLANE0_OFFSET_EXT, 13797+ EGL_DMA_BUF_PLANE0_PITCH_EXT, 13798+ EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, 13799+ EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, 13800+ EGL_DMA_BUF_PLANE1_FD_EXT, 13801+ EGL_DMA_BUF_PLANE1_OFFSET_EXT, 13802+ EGL_DMA_BUF_PLANE1_PITCH_EXT, 13803+ EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT, 13804+ EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT, 13805+ EGL_DMA_BUF_PLANE2_FD_EXT, 13806+ EGL_DMA_BUF_PLANE2_OFFSET_EXT, 13807+ EGL_DMA_BUF_PLANE2_PITCH_EXT, 13808+ EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT, 13809+ EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT, 13810+ }; 13811+ const EGLint *b = anames; 13812+ 13813+ *a++ = EGL_WIDTH; 13814+ *a++ = av_frame_cropped_width(frame); 13815+ *a++ = EGL_HEIGHT; 13816+ *a++ = av_frame_cropped_height(frame); 13817+ *a++ = EGL_LINUX_DRM_FOURCC_EXT; 13818+ *a++ = desc->layers[0].format; 13819+ 13820+ for (i = 0; i < desc->nb_layers; ++i) { 13821+ for (j = 0; j < desc->layers[i].nb_planes; ++j) { 13822+ const AVDRMPlaneDescriptor *const p = desc->layers[i].planes + j; 13823+ const AVDRMObjectDescriptor *const obj = desc->objects + p->object_index; 13824+ *a++ = *b++; 13825+ *a++ = obj->fd; 13826+ *a++ = *b++; 13827+ *a++ = p->offset; 13828+ *a++ = *b++; 13829+ *a++ = p->pitch; 13830+ if (obj->format_modifier == 0) { 13831+ b += 2; 13832+ } 13833+ else { 13834+ *a++ = *b++; 13835+ *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF); 13836+ *a++ = *b++; 13837+ *a++ = (EGLint)(obj->format_modifier >> 32); 13838+ } 13839+ } 13840+ } 13841+ 13842+ *a = EGL_NONE; 13843+ 13844+#if TRACE_ALL 13845+ for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) { 13846+ av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]); 13847+ } 13848+#endif 13849+ { 13850+ const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy, 13851+ EGL_NO_CONTEXT, 13852+ EGL_LINUX_DMA_BUF_EXT, 13853+ NULL, attribs); 13854+ if (!image) { 13855+ av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd); 13856+ return -1; 13857+ } 13858+ 13859+ glGenTextures(1, &da->texture); 13860+ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); 13861+ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 13862+ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 13863+ glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image); 13864+ 13865+ eglDestroyImageKHR(de->setup.egl_dpy, image); 13866+ } 13867+ 13868+ da->fd = desc->objects[0].fd; 13869+ } 13870+ 13871+ glClearColor(0.5, 0.5, 0.5, 0.5); 13872+ glClear(GL_COLOR_BUFFER_BIT); 13873+ 13874+ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); 13875+ glDrawArrays(GL_TRIANGLE_FAN, 0, 4); 13876+ eglSwapBuffers(de->setup.egl_dpy, de->setup.surf); 13877+ 13878+ glDeleteTextures(1, &da->texture); 13879+ da->texture = 0; 13880+ da->fd = -1; 13881+ 13882+ return 0; 13883+} 13884+ 13885+static void* display_thread(void *v) 13886+{ 13887+ AVFormatContext *const s = v; 13888+ egl_display_env_t *const de = s->priv_data; 13889+ 13890+#if TRACE_ALL 13891+ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); 13892+#endif 13893+ { 13894+ EGLint egl_major, egl_minor; 13895+ 13896+ de->setup.dpy = XOpenDisplay(NULL); 13897+ if (!de->setup.dpy) { 13898+ av_log(s, AV_LOG_ERROR, "Couldn't open X display\n"); 13899+ goto fail; 13900+ } 13901+ 13902+ de->setup.egl_dpy = eglGetDisplay(de->setup.dpy); 13903+ if (!de->setup.egl_dpy) { 13904+ av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n"); 13905+ goto fail; 13906+ } 13907+ 13908+ if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) { 13909+ av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n"); 13910+ goto fail; 13911+ } 13912+ 13913+ av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor); 13914+ 13915+ if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) { 13916+ av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n"); 13917+ goto fail; 13918+ } 13919+ } 13920+ 13921+ if (!de->window_width || !de->window_height) { 13922+ de->window_width = 1280; 13923+ de->window_height = 720; 13924+ } 13925+ if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout", 13926+ &de->setup.win, &de->setup.ctx, &de->setup.surf)) { 13927+ av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__); 13928+ goto fail; 13929+ } 13930+ 13931+ if (gl_setup(s)) { 13932+ av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__); 13933+ goto fail; 13934+ } 13935+ 13936+#if TRACE_ALL 13937+ av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__); 13938+#endif 13939+ sem_post(&de->display_start_sem); 13940+ 13941+ for (;;) { 13942+ AVFrame *frame; 13943+ 13944+ while (sem_wait(&de->q_sem) != 0) { 13945+ av_assert0(errno == EINTR); 13946+ } 13947+ 13948+ if (de->q_terminate) 13949+ break; 13950+ 13951+ pthread_mutex_lock(&de->q_lock); 13952+ frame = de->q_next; 13953+ de->q_next = NULL; 13954+ pthread_mutex_unlock(&de->q_lock); 13955+ 13956+ do_display(s, de, frame); 13957+ 13958+ av_frame_free(&de->q_this); 13959+ de->q_this = frame; 13960+ } 13961+ 13962+#if TRACE_ALL 13963+ av_log(s, AV_LOG_INFO, ">>> %s\n", __func__); 13964+#endif 13965+ 13966+ return NULL; 13967+ 13968+fail: 13969+#if TRACE_ALL 13970+ av_log(s, AV_LOG_INFO, ">>> %s: FAIL\n", __func__); 13971+#endif 13972+ de->q_terminate = 1; 13973+ sem_post(&de->display_start_sem); 13974+ 13975+ return NULL; 13976+} 13977+ 13978+static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt) 13979+{ 13980+ const AVFrame *const src_frame = (AVFrame *)pkt->data; 13981+ AVFrame *frame; 13982+ egl_display_env_t *const de = s->priv_data; 13983+ 13984+#if TRACE_ALL 13985+ av_log(s, AV_LOG_INFO, "%s\n", __func__); 13986+#endif 13987+ 13988+ if (src_frame->format == AV_PIX_FMT_DRM_PRIME) { 13989+ frame = av_frame_alloc(); 13990+ av_frame_ref(frame, src_frame); 13991+ } 13992+ else if (src_frame->format == AV_PIX_FMT_VAAPI) { 13993+ frame = av_frame_alloc(); 13994+ frame->format = AV_PIX_FMT_DRM_PRIME; 13995+ if (av_hwframe_map(frame, src_frame, 0) != 0) { 13996+ av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format); 13997+ av_frame_free(&frame); 13998+ return AVERROR(EINVAL); 13999+ } 14000+ } 14001+ else { 14002+ av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format); 14003+ return AVERROR(EINVAL); 14004+ } 14005+ 14006+ // Really hacky sync 14007+ while (de->show_all && de->q_next) { 14008+ usleep(3000); 14009+ } 14010+ 14011+ pthread_mutex_lock(&de->q_lock); 14012+ { 14013+ AVFrame *const t = de->q_next; 14014+ de->q_next = frame; 14015+ frame = t; 14016+ } 14017+ pthread_mutex_unlock(&de->q_lock); 14018+ 14019+ if (frame == NULL) 14020+ sem_post(&de->q_sem); 14021+ else 14022+ av_frame_free(&frame); 14023+ 14024+ return 0; 14025+} 14026+ 14027+static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe, 14028+ unsigned flags) 14029+{ 14030+ av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags); 14031+ return AVERROR_PATCHWELCOME; 14032+} 14033+ 14034+static int egl_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size) 14035+{ 14036+#if TRACE_ALL 14037+ av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type); 14038+#endif 14039+ switch (type) { 14040+ case AV_APP_TO_DEV_WINDOW_REPAINT: 14041+ return 0; 14042+ default: 14043+ break; 14044+ } 14045+ return AVERROR(ENOSYS); 14046+} 14047+ 14048+// deinit is called if init fails so no need to clean up explicity here 14049+static int egl_vout_init(struct AVFormatContext *s) 14050+{ 14051+ egl_display_env_t *const de = s->priv_data; 14052+ unsigned int i; 14053+ 14054+ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); 14055+ 14056+ de->setup = (struct egl_setup) { 0 }; 14057+ 14058+ for (i = 0; i != 32; ++i) { 14059+ de->aux[i].fd = -1; 14060+ } 14061+ 14062+ de->q_terminate = 0; 14063+ pthread_mutex_init(&de->q_lock, NULL); 14064+ sem_init(&de->q_sem, 0, 0); 14065+ sem_init(&de->display_start_sem, 0, 0); 14066+ av_assert0(pthread_create(&de->q_thread, NULL, display_thread, s) == 0); 14067+ 14068+ sem_wait(&de->display_start_sem); 14069+ if (de->q_terminate) { 14070+ av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__); 14071+ return -1; 14072+ } 14073+ 14074+ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); 14075+ 14076+ return 0; 14077+} 14078+ 14079+static void egl_vout_deinit(struct AVFormatContext *s) 14080+{ 14081+ egl_display_env_t *const de = s->priv_data; 14082+ 14083+ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); 14084+ 14085+ de->q_terminate = 1; 14086+ sem_post(&de->q_sem); 14087+ pthread_join(de->q_thread, NULL); 14088+ sem_destroy(&de->q_sem); 14089+ pthread_mutex_destroy(&de->q_lock); 14090+ 14091+ av_frame_free(&de->q_next); 14092+ av_frame_free(&de->q_this); 14093+ 14094+ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); 14095+} 14096+ 14097+#define OFFSET(x) offsetof(egl_display_env_t, x) 14098+static const AVOption options[] = { 14099+ { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, 14100+ { "window_size", "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, 14101+ { "window_x", "set window x offset", OFFSET(window_x), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, 14102+ { "window_y", "set window y offset", OFFSET(window_y), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, 14103+ { "fullscreen", "set fullscreen display", OFFSET(fullscreen), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, 14104+ { NULL } 14105+ 14106+}; 14107+ 14108+static const AVClass egl_vout_class = { 14109+ .class_name = "egl vid outdev", 14110+ .item_name = av_default_item_name, 14111+ .option = options, 14112+ .version = LIBAVUTIL_VERSION_INT, 14113+ .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT, 14114+}; 14115+ 14116+AVOutputFormat ff_vout_egl_muxer = { 14117+ .name = "vout_egl", 14118+ .long_name = NULL_IF_CONFIG_SMALL("Egl video output device"), 14119+ .priv_data_size = sizeof(egl_display_env_t), 14120+ .audio_codec = AV_CODEC_ID_NONE, 14121+ .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME, 14122+ .write_header = egl_vout_write_header, 14123+ .write_packet = egl_vout_write_packet, 14124+ .write_uncoded_frame = egl_vout_write_frame, 14125+ .write_trailer = egl_vout_write_trailer, 14126+ .control_message = egl_vout_control_message, 14127+ .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS, 14128+ .priv_class = &egl_vout_class, 14129+ .init = egl_vout_init, 14130+ .deinit = egl_vout_deinit, 14131+}; 14132+ 14133--- a/libavfilter/Makefile 14134+++ b/libavfilter/Makefile 14135@@ -254,6 +254,7 @@ OBJS-$(CONFIG_DEFLATE_FILTER) 14136 OBJS-$(CONFIG_DEFLICKER_FILTER) += vf_deflicker.o 14137 OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER) += vf_deinterlace_qsv.o 14138 OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER) += vf_deinterlace_vaapi.o vaapi_vpp.o 14139+OBJS-$(CONFIG_DEINTERLACE_V4L2M2M_FILTER) += vf_deinterlace_v4l2m2m.o 14140 OBJS-$(CONFIG_DEJUDDER_FILTER) += vf_dejudder.o 14141 OBJS-$(CONFIG_DELOGO_FILTER) += vf_delogo.o 14142 OBJS-$(CONFIG_DENOISE_VAAPI_FILTER) += vf_misc_vaapi.o vaapi_vpp.o 14143@@ -509,6 +510,7 @@ OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER) 14144 OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER) += vf_transpose_vulkan.o vulkan.o vulkan_filter.o 14145 OBJS-$(CONFIG_TRIM_FILTER) += trim.o 14146 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o 14147+OBJS-$(CONFIG_UNSAND_FILTER) += vf_unsand.o 14148 OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o 14149 OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER) += vf_unsharp_opencl.o opencl.o \ 14150 opencl/unsharp.o 14151--- a/libavfilter/aarch64/Makefile 14152+++ b/libavfilter/aarch64/Makefile 14153@@ -1,3 +1,5 @@ 14154+OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_init_aarch64.o 14155 OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o 14156 14157+NEON-OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_neon.o 14158 NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o 14159--- /dev/null 14160+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c 14161@@ -0,0 +1,125 @@ 14162+/* 14163+ * bwdif aarch64 NEON optimisations 14164+ * 14165+ * Copyright (c) 2023 John Cox <jc@kynesim.co.uk> 14166+ * 14167+ * This file is part of FFmpeg. 14168+ * 14169+ * FFmpeg is free software; you can redistribute it and/or 14170+ * modify it under the terms of the GNU Lesser General Public 14171+ * License as published by the Free Software Foundation; either 14172+ * version 2.1 of the License, or (at your option) any later version. 14173+ * 14174+ * FFmpeg is distributed in the hope that it will be useful, 14175+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 14176+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14177+ * Lesser General Public License for more details. 14178+ * 14179+ * You should have received a copy of the GNU Lesser General Public 14180+ * License along with FFmpeg; if not, write to the Free Software 14181+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 14182+ */ 14183+ 14184+#include "libavutil/common.h" 14185+#include "libavfilter/bwdif.h" 14186+#include "libavutil/aarch64/cpu.h" 14187+ 14188+void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void *next1, 14189+ int w, int prefs, int mrefs, int prefs2, int mrefs2, 14190+ int parity, int clip_max, int spat); 14191+ 14192+void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs, 14193+ int prefs3, int mrefs3, int parity, int clip_max); 14194+ 14195+void ff_bwdif_filter_line_neon(void *dst1, void *prev1, void *cur1, void *next1, 14196+ int w, int prefs, int mrefs, int prefs2, int mrefs2, 14197+ int prefs3, int mrefs3, int prefs4, int mrefs4, 14198+ int parity, int clip_max); 14199+ 14200+void ff_bwdif_filter_line3_neon(void * dst1, int d_stride, 14201+ const void * prev1, const void * cur1, const void * next1, int s_stride, 14202+ int w, int parity, int clip_max); 14203+ 14204+ 14205+static void filter_line3_helper(void * dst1, int d_stride, 14206+ const void * prev1, const void * cur1, const void * next1, int s_stride, 14207+ int w, int parity, int clip_max) 14208+{ 14209+ // Asm works on 16 byte chunks 14210+ // If w is a multiple of 16 then all is good - if not then if width rounded 14211+ // up to nearest 16 will fit in both src & dst strides then allow the asm 14212+ // to write over the padding bytes as that is almost certainly faster than 14213+ // having to invoke the C version to clean up the tail. 14214+ const int w1 = FFALIGN(w, 16); 14215+ const int w0 = clip_max != 255 ? 0 : 14216+ d_stride <= w1 && s_stride <= w1 ? w : w & ~15; 14217+ 14218+ ff_bwdif_filter_line3_neon(dst1, d_stride, 14219+ prev1, cur1, next1, s_stride, 14220+ w0, parity, clip_max); 14221+ 14222+ if (w0 < w) 14223+ ff_bwdif_filter_line3_c((char *)dst1 + w0, d_stride, 14224+ (const char *)prev1 + w0, (const char *)cur1 + w0, (const char *)next1 + w0, s_stride, 14225+ w - w0, parity, clip_max); 14226+} 14227+ 14228+static void filter_line_helper(void *dst1, void *prev1, void *cur1, void *next1, 14229+ int w, int prefs, int mrefs, int prefs2, int mrefs2, 14230+ int prefs3, int mrefs3, int prefs4, int mrefs4, 14231+ int parity, int clip_max) 14232+{ 14233+ const int w0 = clip_max != 255 ? 0 : w & ~15; 14234+ 14235+ ff_bwdif_filter_line_neon(dst1, prev1, cur1, next1, 14236+ w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max); 14237+ 14238+ if (w0 < w) 14239+ ff_bwdif_filter_line_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0, 14240+ w - w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max); 14241+} 14242+ 14243+static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void *next1, 14244+ int w, int prefs, int mrefs, int prefs2, int mrefs2, 14245+ int parity, int clip_max, int spat) 14246+{ 14247+ const int w0 = clip_max != 255 ? 0 : w & ~15; 14248+ 14249+ ff_bwdif_filter_edge_neon(dst1, prev1, cur1, next1, w0, prefs, mrefs, prefs2, mrefs2, 14250+ parity, clip_max, spat); 14251+ 14252+ if (w0 < w) 14253+ ff_bwdif_filter_edge_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0, 14254+ w - w0, prefs, mrefs, prefs2, mrefs2, 14255+ parity, clip_max, spat); 14256+} 14257+ 14258+static void filter_intra_helper(void *dst1, void *cur1, int w, int prefs, int mrefs, 14259+ int prefs3, int mrefs3, int parity, int clip_max) 14260+{ 14261+ const int w0 = clip_max != 255 ? 0 : w & ~15; 14262+ 14263+ ff_bwdif_filter_intra_neon(dst1, cur1, w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max); 14264+ 14265+ if (w0 < w) 14266+ ff_bwdif_filter_intra_c((char *)dst1 + w0, (char *)cur1 + w0, 14267+ w - w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max); 14268+} 14269+ 14270+void 14271+ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth) 14272+{ 14273+ const int cpu_flags = av_get_cpu_flags(); 14274+ 14275+ if (bit_depth != 8) 14276+ return; 14277+ 14278+ if (!have_neon(cpu_flags)) 14279+ return; 14280+ 14281+ s->filter_intra = filter_intra_helper; 14282+ s->filter_line = filter_line_helper; 14283+ s->filter_edge = filter_edge_helper; 14284+ s->filter_line3 = filter_line3_helper; 14285+} 14286+ 14287--- /dev/null 14288+++ b/libavfilter/aarch64/vf_bwdif_neon.S 14289@@ -0,0 +1,788 @@ 14290+/* 14291+ * bwdif aarch64 NEON optimisations 14292+ * 14293+ * Copyright (c) 2023 John Cox <jc@kynesim.co.uk> 14294+ * 14295+ * This file is part of FFmpeg. 14296+ * 14297+ * FFmpeg is free software; you can redistribute it and/or 14298+ * modify it under the terms of the GNU Lesser General Public 14299+ * License as published by the Free Software Foundation; either 14300+ * version 2.1 of the License, or (at your option) any later version. 14301+ * 14302+ * FFmpeg is distributed in the hope that it will be useful, 14303+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 14304+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14305+ * Lesser General Public License for more details. 14306+ * 14307+ * You should have received a copy of the GNU Lesser General Public 14308+ * License along with FFmpeg; if not, write to the Free Software 14309+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 14310+ */ 14311+ 14312+ 14313+#include "libavutil/aarch64/asm.S" 14314+ 14315+// Space taken on the stack by an int (32-bit) 14316+#ifdef __APPLE__ 14317+.set SP_INT, 4 14318+#else 14319+.set SP_INT, 8 14320+#endif 14321+ 14322+.macro SQSHRUNN b, s0, s1, s2, s3, n 14323+ sqshrun \s0\().4h, \s0\().4s, #\n - 8 14324+ sqshrun2 \s0\().8h, \s1\().4s, #\n - 8 14325+ sqshrun \s1\().4h, \s2\().4s, #\n - 8 14326+ sqshrun2 \s1\().8h, \s3\().4s, #\n - 8 14327+ uzp2 \b\().16b, \s0\().16b, \s1\().16b 14328+.endm 14329+ 14330+.macro SMULL4K a0, a1, a2, a3, s0, s1, k 14331+ smull \a0\().4s, \s0\().4h, \k 14332+ smull2 \a1\().4s, \s0\().8h, \k 14333+ smull \a2\().4s, \s1\().4h, \k 14334+ smull2 \a3\().4s, \s1\().8h, \k 14335+.endm 14336+ 14337+.macro UMULL4K a0, a1, a2, a3, s0, s1, k 14338+ umull \a0\().4s, \s0\().4h, \k 14339+ umull2 \a1\().4s, \s0\().8h, \k 14340+ umull \a2\().4s, \s1\().4h, \k 14341+ umull2 \a3\().4s, \s1\().8h, \k 14342+.endm 14343+ 14344+.macro UMLAL4K a0, a1, a2, a3, s0, s1, k 14345+ umlal \a0\().4s, \s0\().4h, \k 14346+ umlal2 \a1\().4s, \s0\().8h, \k 14347+ umlal \a2\().4s, \s1\().4h, \k 14348+ umlal2 \a3\().4s, \s1\().8h, \k 14349+.endm 14350+ 14351+.macro UMLSL4K a0, a1, a2, a3, s0, s1, k 14352+ umlsl \a0\().4s, \s0\().4h, \k 14353+ umlsl2 \a1\().4s, \s0\().8h, \k 14354+ umlsl \a2\().4s, \s1\().4h, \k 14355+ umlsl2 \a3\().4s, \s1\().8h, \k 14356+.endm 14357+ 14358+// int b = m2s1 - m1; 14359+// int f = p2s1 - p1; 14360+// int dc = c0s1 - m1; 14361+// int de = c0s1 - p1; 14362+// int sp_max = FFMIN(p1 - c0s1, m1 - c0s1); 14363+// sp_max = FFMIN(sp_max, FFMAX(-b,-f)); 14364+// int sp_min = FFMIN(c0s1 - p1, c0s1 - m1); 14365+// sp_min = FFMIN(sp_min, FFMAX(b,f)); 14366+// diff = diff == 0 ? 0 : FFMAX3(diff, sp_min, sp_max); 14367+.macro SPAT_CHECK diff, m2s1, m1, c0s1, p1, p2s1, t0, t1, t2, t3 14368+ uqsub \t0\().16b, \p1\().16b, \c0s1\().16b 14369+ uqsub \t2\().16b, \m1\().16b, \c0s1\().16b 14370+ umin \t2\().16b, \t0\().16b, \t2\().16b 14371+ 14372+ uqsub \t1\().16b, \m1\().16b, \m2s1\().16b 14373+ uqsub \t3\().16b, \p1\().16b, \p2s1\().16b 14374+ umax \t3\().16b, \t3\().16b, \t1\().16b 14375+ umin \t3\().16b, \t3\().16b, \t2\().16b 14376+ 14377+ uqsub \t0\().16b, \c0s1\().16b, \p1\().16b 14378+ uqsub \t2\().16b, \c0s1\().16b, \m1\().16b 14379+ umin \t2\().16b, \t0\().16b, \t2\().16b 14380+ 14381+ uqsub \t1\().16b, \m2s1\().16b, \m1\().16b 14382+ uqsub \t0\().16b, \p2s1\().16b, \p1\().16b 14383+ umax \t0\().16b, \t0\().16b, \t1\().16b 14384+ umin \t2\().16b, \t2\().16b, \t0\().16b 14385+ 14386+ cmeq \t1\().16b, \diff\().16b, #0 14387+ umax \diff\().16b, \diff\().16b, \t3\().16b 14388+ umax \diff\().16b, \diff\().16b, \t2\().16b 14389+ bic \diff\().16b, \diff\().16b, \t1\().16b 14390+.endm 14391+ 14392+// i0 = s0; 14393+// if (i0 > d0 + diff0) 14394+// i0 = d0 + diff0; 14395+// else if (i0 < d0 - diff0) 14396+// i0 = d0 - diff0; 14397+// 14398+// i0 = s0 is safe 14399+.macro DIFF_CLIP i0, s0, d0, diff, t0, t1 14400+ uqadd \t0\().16b, \d0\().16b, \diff\().16b 14401+ uqsub \t1\().16b, \d0\().16b, \diff\().16b 14402+ umin \i0\().16b, \s0\().16b, \t0\().16b 14403+ umax \i0\().16b, \i0\().16b, \t1\().16b 14404+.endm 14405+ 14406+// i0 = FFABS(m1 - p1) > td0 ? i1 : i2; 14407+// DIFF_CLIP 14408+// 14409+// i0 = i1 is safe 14410+.macro INTERPOL i0, i1, i2, m1, d0, p1, td0, diff, t0, t1, t2 14411+ uabd \t0\().16b, \m1\().16b, \p1\().16b 14412+ cmhi \t0\().16b, \t0\().16b, \td0\().16b 14413+ bsl \t0\().16b, \i1\().16b, \i2\().16b 14414+ DIFF_CLIP \i0, \t0, \d0, \diff, \t1, \t2 14415+.endm 14416+ 14417+.macro PUSH_VREGS 14418+ stp d8, d9, [sp, #-64]! 14419+ stp d10, d11, [sp, #16] 14420+ stp d12, d13, [sp, #32] 14421+ stp d14, d15, [sp, #48] 14422+.endm 14423+ 14424+.macro POP_VREGS 14425+ ldp d14, d15, [sp, #48] 14426+ ldp d12, d13, [sp, #32] 14427+ ldp d10, d11, [sp, #16] 14428+ ldp d8, d9, [sp], #64 14429+.endm 14430+ 14431+.macro LDR_COEFFS d, t0 14432+ movrel \t0, coeffs, 0 14433+ ld1 {\d\().8h}, [\t0] 14434+.endm 14435+ 14436+// static const uint16_t coef_lf[2] = { 4309, 213 }; 14437+// static const uint16_t coef_hf[3] = { 5570, 3801, 1016 }; 14438+// static const uint16_t coef_sp[2] = { 5077, 981 }; 14439+ 14440+const coeffs, align=4 // align 4 means align on 2^4 boundry 14441+ .hword 4309 * 4, 213 * 4 // lf[0]*4 = v0.h[0] 14442+ .hword 5570, 3801, 1016, -3801 // hf[0] = v0.h[2], -hf[1] = v0.h[5] 14443+ .hword 5077, 981 // sp[0] = v0.h[6] 14444+endconst 14445+ 14446+// =========================================================================== 14447+// 14448+// void ff_bwdif_filter_line3_neon( 14449+// void * dst1, // x0 14450+// int d_stride, // w1 14451+// const void * prev1, // x2 14452+// const void * cur1, // x3 14453+// const void * next1, // x4 14454+// int s_stride, // w5 14455+// int w, // w6 14456+// int parity, // w7 14457+// int clip_max); // [sp, #0] (Ignored) 14458+ 14459+function ff_bwdif_filter_line3_neon, export=1 14460+ // Sanity check w 14461+ cmp w6, #0 14462+ ble 99f 14463+ 14464+ LDR_COEFFS v0, x17 14465+ 14466+// #define prev2 cur 14467+// const uint8_t * restrict next2 = parity ? prev : next; 14468+ cmp w7, #0 14469+ csel x17, x2, x4, ne 14470+ 14471+ // We want all the V registers - save all the ones we must 14472+ PUSH_VREGS 14473+ 14474+ // Some rearrangement of initial values for nice layout of refs in regs 14475+ mov w10, w6 // w10 = loop count 14476+ neg w9, w5 // w9 = mref 14477+ lsl w8, w9, #1 // w8 = mref2 14478+ add w7, w9, w9, LSL #1 // w7 = mref3 14479+ lsl w6, w9, #2 // w6 = mref4 14480+ mov w11, w5 // w11 = pref 14481+ lsl w12, w5, #1 // w12 = pref2 14482+ add w13, w5, w5, LSL #1 // w13 = pref3 14483+ lsl w14, w5, #2 // w14 = pref4 14484+ add w15, w5, w5, LSL #2 // w15 = pref5 14485+ add w16, w14, w12 // w16 = pref6 14486+ 14487+ lsl w5, w1, #1 // w5 = d_stride * 2 14488+ 14489+// for (x = 0; x < w; x++) { 14490+// int diff0, diff2; 14491+// int d0, d2; 14492+// int temporal_diff0, temporal_diff2; 14493+// 14494+// int i1, i2; 14495+// int j1, j2; 14496+// int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4; 14497+ 14498+10: 14499+// c0 = prev2[0] + next2[0]; // c0 = v20, v21 14500+// d0 = c0 >> 1; // d0 = v10 14501+// temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11 14502+ ldr q31, [x3] 14503+ ldr q21, [x17] 14504+ uhadd v10.16b, v31.16b, v21.16b 14505+ uabd v11.16b, v31.16b, v21.16b 14506+ uaddl v20.8h, v21.8b, v31.8b 14507+ uaddl2 v21.8h, v21.16b, v31.16b 14508+ 14509+ ldr q31, [x3, w6, sxtw] 14510+ ldr q23, [x17, w6, sxtw] 14511+ 14512+// i1 = coef_hf[0] * c0; // i1 = v2-v5 14513+ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[2] 14514+ 14515+ ldr q30, [x3, w14, sxtw] 14516+ ldr q25, [x17, w14, sxtw] 14517+ 14518+// m4 = prev2[mrefs4] + next2[mrefs4]; // m4 = v22,v23 14519+ uaddl v22.8h, v23.8b, v31.8b 14520+ uaddl2 v23.8h, v23.16b, v31.16b 14521+ 14522+// p4 = prev2[prefs4] + next2[prefs4]; // p4 = v24,v25, (p4 >> 1) = v12 14523+ uhadd v12.16b, v25.16b, v30.16b 14524+ uaddl v24.8h, v25.8b, v30.8b 14525+ uaddl2 v25.8h, v25.16b, v30.16b 14526+ 14527+// j1 = -coef_hf[1] * (c0 + p4); // j1 = v6-v9 (-c0:v20,v21) 14528+ add v20.8h, v20.8h, v24.8h 14529+ add v21.8h, v21.8h, v25.8h 14530+ SMULL4K v6, v7, v8, v9, v20, v21, v0.h[5] 14531+ 14532+// m3 = cur[mrefs3]; // m3 = v20 14533+ ldr q20, [x3, w7, sxtw] 14534+ 14535+// p3 = cur[prefs3]; // p3 = v21 14536+ ldr q21, [x3, w13, sxtw] 14537+ 14538+// i1 += coef_hf[2] * (m4 + p4); // (-m4:v22,v23) (-p4:v24,v25) 14539+ add v22.8h, v22.8h, v24.8h 14540+ add v23.8h, v23.8h, v25.8h 14541+ UMLAL4K v2, v3, v4, v5, v22, v23, v0.h[4] 14542+ 14543+ ldr q29, [x3, w8, sxtw] 14544+ ldr q23, [x17, w8, sxtw] 14545+ 14546+// i1 -= coef_lf[1] * 4 * (m3 + p3); // - 14547+ uaddl v30.8h, v20.8b, v21.8b 14548+ uaddl2 v31.8h, v20.16b, v21.16b 14549+ 14550+ ldr q28, [x3, w16, sxtw] 14551+ ldr q25, [x17, w16, sxtw] 14552+ 14553+ UMLSL4K v2, v3, v4, v5, v30, v31, v0.h[1] 14554+ 14555+// m2 = prev2[mrefs2] + next2[mrefs2]; // m2 = v22,v23, (m2 >> 1) = v13 14556+ uhadd v13.16b, v23.16b, v29.16b 14557+ uaddl v22.8h, v23.8b, v29.8b 14558+ uaddl2 v23.8h, v23.16b, v29.16b 14559+ 14560+ ldr q31, [x3, w12, sxtw] 14561+ ldr q27, [x17, w12, sxtw] 14562+ 14563+// p6 = prev2[prefs6] + next2[prefs6]; // p6 = v24,v25 14564+ uaddl v24.8h, v25.8b, v28.8b 14565+ uaddl2 v25.8h, v25.16b, v28.16b 14566+ 14567+// j1 += coef_hf[2] * (m2 + p6); // (-p6:v24,v25) 14568+ add v24.8h, v24.8h, v22.8h 14569+ add v25.8h, v25.8h, v23.8h 14570+ UMLAL4K v6, v7, v8, v9, v24, v25, v0.h[4] 14571+ 14572+// m1 = cur[mrefs]; // m1 = v24 14573+ ldr q24, [x3, w9, sxtw] 14574+ 14575+// p5 = cur[prefs5]; // p5 = v25 14576+ ldr q25, [x3, w15, sxtw] 14577+ 14578+// p2 = prev2[prefs2] + next2[prefs2]; // p2 = v26, v27 14579+// temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14 14580+// d2 = p2 >> 1; // d2 = v15 14581+ uabd v14.16b, v31.16b, v27.16b 14582+ uhadd v15.16b, v31.16b, v27.16b 14583+ uaddl v26.8h, v27.8b, v31.8b 14584+ uaddl2 v27.8h, v27.16b, v31.16b 14585+ 14586+// j1 += coef_hf[0] * p2; // - 14587+ UMLAL4K v6, v7, v8, v9, v26, v27, v0.h[2] 14588+ 14589+// i1 -= coef_hf[1] * (m2 + p2); // (-m2:v22,v23*) (-p2:v26*,v27*) 14590+ add v22.8h, v22.8h, v26.8h 14591+ add v23.8h, v23.8h, v27.8h 14592+ UMLSL4K v2, v3, v4, v5, v22, v23, v0.h[3] 14593+ 14594+// p1 = cur[prefs]; // p1 = v22 14595+ ldr q22, [x3, w11, sxtw] 14596+ 14597+// j1 -= coef_lf[1] * 4 * (m1 + p5); // - 14598+ uaddl v26.8h, v24.8b, v25.8b 14599+ uaddl2 v27.8h, v24.16b, v25.16b 14600+ UMLSL4K v6, v7, v8, v9, v26, v27, v0.h[1] 14601+ 14602+// j2 = (coef_sp[0] * (p1 + p3) - coef_sp[1] * (m1 + p5)) >> 13; // (-p5:v25*) j2=v16 14603+ uaddl v18.8h, v22.8b, v21.8b 14604+ uaddl2 v19.8h, v22.16b, v21.16b 14605+ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6] 14606+ 14607+ uaddl v18.8h, v24.8b, v25.8b 14608+ uaddl2 v19.8h, v24.16b, v25.16b 14609+ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7] 14610+ 14611+ SQSHRUNN v16, v28, v29, v30, v31, 13 14612+ 14613+// i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17 14614+ uaddl v18.8h, v22.8b, v24.8b 14615+ uaddl2 v19.8h, v22.16b, v24.16b 14616+ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6] 14617+ 14618+ uaddl v18.8h, v20.8b, v21.8b 14619+ uaddl2 v19.8h, v20.16b, v21.16b 14620+ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7] 14621+ 14622+ SQSHRUNN v17, v28, v29, v30, v31, 13 14623+ 14624+// i1 += coef_lf[0] * 4 * (m1 + p1); // p1 = v22, m1 = v24 14625+ uaddl v26.8h, v24.8b, v22.8b 14626+ uaddl2 v27.8h, v24.16b, v22.16b 14627+ UMLAL4K v2, v3, v4, v5, v26, v27, v0.h[0] 14628+ 14629+ ldr q31, [x2, w9, sxtw] 14630+ ldr q29, [x4, w9, sxtw] 14631+ 14632+// j1 += coef_lf[0] * 4 * (p1 + p3); // p1 = v22, p3 = v21 14633+ uaddl v26.8h, v21.8b, v22.8b 14634+ uaddl2 v27.8h, v21.16b, v22.16b 14635+ UMLAL4K v6, v7, v8, v9, v26, v27, v0.h[0] 14636+ 14637+ ldr q30, [x2, w11, sxtw] 14638+ ldr q28, [x4, w11, sxtw] 14639+ 14640+// i1 >>= 15; // i1 = v2, -v3, -v4*, -v5* 14641+ SQSHRUNN v2, v2, v3, v4, v5, 15 14642+ 14643+// j1 >>= 15; // j1 = v3, -v6*, -v7*, -v8*, -v9* 14644+ SQSHRUNN v3, v6, v7, v8, v9, 15 14645+ 14646+// { 14647+// int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; 14648+// int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; 14649+ uabd v30.16b, v22.16b, v30.16b 14650+ uabd v31.16b, v24.16b, v31.16b 14651+ uabd v28.16b, v22.16b, v28.16b 14652+ uabd v29.16b, v24.16b, v29.16b 14653+ uhadd v31.16b, v31.16b, v30.16b 14654+ uhadd v29.16b, v29.16b, v28.16b 14655+ 14656+ ldr q27, [x2, w13, sxtw] 14657+ ldr q26, [x4, w13, sxtw] 14658+ 14659+// diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18 14660+ ushr v18.16b, v11.16b, #1 14661+ umax v18.16b, v18.16b, v31.16b 14662+ umax v18.16b, v18.16b, v29.16b 14663+// } // v28, v30 preserved for next block 14664+// { // tdiff2 = v14 14665+// int t1 =(FFABS(prev[prefs] - p1) + FFABS(prev[prefs3] - p3)) >> 1; 14666+// int t2 =(FFABS(next[prefs] - p1) + FFABS(next[prefs3] - p3)) >> 1; 14667+ uabd v31.16b, v21.16b, v27.16b 14668+ uabd v29.16b, v21.16b, v26.16b 14669+ uhadd v31.16b, v31.16b, v30.16b 14670+ uhadd v29.16b, v29.16b, v28.16b 14671+ 14672+// diff2 = FFMAX3(temporal_diff2 >> 1, t1, t2); // diff2=v19 14673+ ushr v19.16b, v14.16b, #1 14674+ umax v19.16b, v19.16b, v31.16b 14675+ umax v19.16b, v19.16b, v29.16b 14676+// } 14677+ 14678+ // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15 14679+ SPAT_CHECK v18, v13, v24, v10, v22, v15, v31, v30, v29, v28 14680+ 14681+ // diff2 = v19, d0 = v10, p1 = v22, d2 = v15, p3 = v21, (p4 >> 1) = v12 14682+ SPAT_CHECK v19, v10, v22, v15, v21, v12, v31, v30, v29, v28 14683+ 14684+ // j1 = v3, j2 = v16, p1 = v22, d2 = v15, p3 = v21, td2 = v14, diff2 = v19 14685+ INTERPOL v3, v3, v16, v22, v15, v21, v14, v19, v31, v30, v29 14686+ 14687+// dst[d_stride * 2] = av_clip_uint8(interpol); 14688+ str q3, [x0, w5, sxtw] 14689+ 14690+// dst[d_stride] = p1; 14691+ str q22, [x0, w1, sxtw] 14692+ 14693+ // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18 14694+ INTERPOL v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29 14695+ 14696+// dst[0] = av_clip_uint8(interpol); 14697+ str q2, [x0], #16 14698+// } 14699+// 14700+// dst++; 14701+// cur++; 14702+// prev++; 14703+// prev2++; 14704+// next++; 14705+// } 14706+ subs w10, w10, #16 14707+ add x2, x2, #16 14708+ add x3, x3, #16 14709+ add x4, x4, #16 14710+ add x17, x17, #16 14711+ bgt 10b 14712+ 14713+ POP_VREGS 14714+99: 14715+ ret 14716+endfunc 14717+ 14718+// =========================================================================== 14719+// 14720+// void filter_line( 14721+// void *dst1, // x0 14722+// void *prev1, // x1 14723+// void *cur1, // x2 14724+// void *next1, // x3 14725+// int w, // w4 14726+// int prefs, // w5 14727+// int mrefs, // w6 14728+// int prefs2, // w7 14729+// int mrefs2, // [sp, #0] 14730+// int prefs3, // [sp, #SP_INT] 14731+// int mrefs3, // [sp, #SP_INT*2] 14732+// int prefs4, // [sp, #SP_INT*3] 14733+// int mrefs4, // [sp, #SP_INT*4] 14734+// int parity, // [sp, #SP_INT*5] 14735+// int clip_max) // [sp, #SP_INT*6] 14736+ 14737+function ff_bwdif_filter_line_neon, export=1 14738+ // Sanity check w 14739+ cmp w4, #0 14740+ ble 99f 14741+ 14742+ // Rearrange regs to be the same as line3 for ease of debug! 14743+ mov w10, w4 // w10 = loop count 14744+ mov w9, w6 // w9 = mref 14745+ mov w12, w7 // w12 = pref2 14746+ mov w11, w5 // w11 = pref 14747+ ldr w8, [sp, #0] // w8 = mref2 14748+ ldr w7, [sp, #SP_INT*2] // w7 = mref3 14749+ ldr w6, [sp, #SP_INT*4] // w6 = mref4 14750+ ldr w13, [sp, #SP_INT] // w13 = pref3 14751+ ldr w14, [sp, #SP_INT*3] // w14 = pref4 14752+ 14753+ mov x4, x3 14754+ mov x3, x2 14755+ mov x2, x1 14756+ 14757+ LDR_COEFFS v0, x17 14758+ 14759+// #define prev2 cur 14760+// const uint8_t * restrict next2 = parity ? prev : next; 14761+ ldr w17, [sp, #SP_INT*5] // parity 14762+ cmp w17, #0 14763+ csel x17, x2, x4, ne 14764+ 14765+ PUSH_VREGS 14766+ 14767+// for (x = 0; x < w; x++) { 14768+// int diff0, diff2; 14769+// int d0, d2; 14770+// int temporal_diff0, temporal_diff2; 14771+// 14772+// int i1, i2; 14773+// int j1, j2; 14774+// int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4; 14775+ 14776+10: 14777+// c0 = prev2[0] + next2[0]; // c0 = v20, v21 14778+// d0 = c0 >> 1; // d0 = v10 14779+// temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11 14780+ ldr q31, [x3] 14781+ ldr q21, [x17] 14782+ uhadd v10.16b, v31.16b, v21.16b 14783+ uabd v11.16b, v31.16b, v21.16b 14784+ uaddl v20.8h, v21.8b, v31.8b 14785+ uaddl2 v21.8h, v21.16b, v31.16b 14786+ 14787+ ldr q31, [x3, w6, sxtw] 14788+ ldr q23, [x17, w6, sxtw] 14789+ 14790+// i1 = coef_hf[0] * c0; // i1 = v2-v5 14791+ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[2] 14792+ 14793+ ldr q30, [x3, w14, sxtw] 14794+ ldr q25, [x17, w14, sxtw] 14795+ 14796+// m4 = prev2[mrefs4] + next2[mrefs4]; // m4 = v22,v23 14797+ uaddl v22.8h, v23.8b, v31.8b 14798+ uaddl2 v23.8h, v23.16b, v31.16b 14799+ 14800+// p4 = prev2[prefs4] + next2[prefs4]; // p4 = v24,v25, (p4 >> 1) = v12 14801+ uhadd v12.16b, v25.16b, v30.16b 14802+ uaddl v24.8h, v25.8b, v30.8b 14803+ uaddl2 v25.8h, v25.16b, v30.16b 14804+ 14805+// m3 = cur[mrefs3]; // m3 = v20 14806+ ldr q20, [x3, w7, sxtw] 14807+ 14808+// p3 = cur[prefs3]; // p3 = v21 14809+ ldr q21, [x3, w13, sxtw] 14810+ 14811+// i1 += coef_hf[2] * (m4 + p4); // (-m4:v22,v23) (-p4:v24,v25) 14812+ add v22.8h, v22.8h, v24.8h 14813+ add v23.8h, v23.8h, v25.8h 14814+ UMLAL4K v2, v3, v4, v5, v22, v23, v0.h[4] 14815+ 14816+ ldr q29, [x3, w8, sxtw] 14817+ ldr q23, [x17, w8, sxtw] 14818+ 14819+// i1 -= coef_lf[1] * 4 * (m3 + p3); // - 14820+ uaddl v30.8h, v20.8b, v21.8b 14821+ uaddl2 v31.8h, v20.16b, v21.16b 14822+ 14823+ UMLSL4K v2, v3, v4, v5, v30, v31, v0.h[1] 14824+ 14825+ ldr q31, [x3, w12, sxtw] 14826+ ldr q27, [x17, w12, sxtw] 14827+ 14828+// m2 = prev2[mrefs2] + next2[mrefs2]; // m2 = v22,v23, (m2 >> 1) = v13 14829+ uhadd v13.16b, v23.16b, v29.16b 14830+ uaddl v22.8h, v23.8b, v29.8b 14831+ uaddl2 v23.8h, v23.16b, v29.16b 14832+ 14833+// m1 = cur[mrefs]; // m1 = v24 14834+ ldr q24, [x3, w9, sxtw] 14835+ 14836+// p2 = prev2[prefs2] + next2[prefs2]; // p2 = v26, v27 14837+// temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14 14838+// d2 = p2 >> 1; // d2 = v15 14839+ uabd v14.16b, v31.16b, v27.16b 14840+ uhadd v15.16b, v31.16b, v27.16b 14841+ uaddl v26.8h, v27.8b, v31.8b 14842+ uaddl2 v27.8h, v27.16b, v31.16b 14843+ 14844+// i1 -= coef_hf[1] * (m2 + p2); // (-m2:v22,v23*) (-p2:v26*,v27*) 14845+ add v22.8h, v22.8h, v26.8h 14846+ add v23.8h, v23.8h, v27.8h 14847+ UMLSL4K v2, v3, v4, v5, v22, v23, v0.h[3] 14848+ 14849+// p1 = cur[prefs]; // p1 = v22 14850+ ldr q22, [x3, w11, sxtw] 14851+ 14852+// i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17 14853+ uaddl v18.8h, v22.8b, v24.8b 14854+ uaddl2 v19.8h, v22.16b, v24.16b 14855+ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6] 14856+ 14857+ uaddl v18.8h, v20.8b, v21.8b 14858+ uaddl2 v19.8h, v20.16b, v21.16b 14859+ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7] 14860+ 14861+ SQSHRUNN v17, v28, v29, v30, v31, 13 14862+ 14863+// i1 += coef_lf[0] * 4 * (m1 + p1); // p1 = v22, m1 = v24 14864+ uaddl v26.8h, v24.8b, v22.8b 14865+ uaddl2 v27.8h, v24.16b, v22.16b 14866+ UMLAL4K v2, v3, v4, v5, v26, v27, v0.h[0] 14867+ 14868+ ldr q31, [x2, w9, sxtw] 14869+ ldr q29, [x4, w9, sxtw] 14870+ 14871+ ldr q30, [x2, w11, sxtw] 14872+ ldr q28, [x4, w11, sxtw] 14873+ 14874+// i1 >>= 15; // i1 = v2, -v3, -v4*, -v5* 14875+ SQSHRUNN v2, v2, v3, v4, v5, 15 14876+ 14877+// { 14878+// int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; 14879+// int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; 14880+ uabd v30.16b, v22.16b, v30.16b 14881+ uabd v31.16b, v24.16b, v31.16b 14882+ uabd v28.16b, v22.16b, v28.16b 14883+ uabd v29.16b, v24.16b, v29.16b 14884+ uhadd v31.16b, v31.16b, v30.16b 14885+ uhadd v29.16b, v29.16b, v28.16b 14886+ 14887+// diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18 14888+ ushr v18.16b, v11.16b, #1 14889+ umax v18.16b, v18.16b, v31.16b 14890+ umax v18.16b, v18.16b, v29.16b 14891+ 14892+ // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15 14893+ SPAT_CHECK v18, v13, v24, v10, v22, v15, v31, v30, v29, v28 14894+ 14895+ // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18 14896+ INTERPOL v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29 14897+ 14898+// dst[0] = av_clip_uint8(interpol); 14899+ str q2, [x0], #16 14900+// } 14901+// 14902+// dst++; 14903+// cur++; 14904+// prev++; 14905+// prev2++; 14906+// next++; 14907+// } 14908+ 14909+ subs w10, w10, #16 14910+ add x2, x2, #16 14911+ add x3, x3, #16 14912+ add x4, x4, #16 14913+ add x17, x17, #16 14914+ bgt 10b 14915+ 14916+ POP_VREGS 14917+99: 14918+ ret 14919+endfunc 14920+ 14921+// ============================================================================ 14922+// 14923+// void ff_bwdif_filter_edge_neon( 14924+// void *dst1, // x0 14925+// void *prev1, // x1 14926+// void *cur1, // x2 14927+// void *next1, // x3 14928+// int w, // w4 14929+// int prefs, // w5 14930+// int mrefs, // w6 14931+// int prefs2, // w7 14932+// int mrefs2, // [sp, #0] 14933+// int parity, // [sp, #SP_INT] 14934+// int clip_max, // [sp, #SP_INT*2] unused 14935+// int spat); // [sp, #SP_INT*3] 14936+ 14937+function ff_bwdif_filter_edge_neon, export=1 14938+ // Sanity check w 14939+ cmp w4, #0 14940+ ble 99f 14941+ 14942+// #define prev2 cur 14943+// const uint8_t * restrict next2 = parity ? prev : next; 14944+ 14945+ ldr w8, [sp, #0] // mrefs2 14946+ 14947+ ldr w17, [sp, #SP_INT] // parity 14948+ ldr w16, [sp, #SP_INT*3] // spat 14949+ cmp w17, #0 14950+ csel x17, x1, x3, ne 14951+ 14952+// for (x = 0; x < w; x++) { 14953+ 14954+10: 14955+// int m1 = cur[mrefs]; 14956+// int d = (prev2[0] + next2[0]) >> 1; 14957+// int p1 = cur[prefs]; 14958+// int temporal_diff0 = FFABS(prev2[0] - next2[0]); 14959+// int temporal_diff1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; 14960+// int temporal_diff2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; 14961+// int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); 14962+ ldr q31, [x2] 14963+ ldr q21, [x17] 14964+ uhadd v16.16b, v31.16b, v21.16b // d0 = v16 14965+ uabd v17.16b, v31.16b, v21.16b // td0 = v17 14966+ ldr q24, [x2, w6, sxtw] // m1 = v24 14967+ ldr q22, [x2, w5, sxtw] // p1 = v22 14968+ 14969+ ldr q0, [x1, w6, sxtw] // prev[mrefs] 14970+ ldr q2, [x1, w5, sxtw] // prev[prefs] 14971+ ldr q1, [x3, w6, sxtw] // next[mrefs] 14972+ ldr q3, [x3, w5, sxtw] // next[prefs] 14973+ 14974+ ushr v29.16b, v17.16b, #1 14975+ 14976+ uabd v31.16b, v0.16b, v24.16b 14977+ uabd v30.16b, v2.16b, v22.16b 14978+ uhadd v0.16b, v31.16b, v30.16b // td1 = q0 14979+ 14980+ uabd v31.16b, v1.16b, v24.16b 14981+ uabd v30.16b, v3.16b, v22.16b 14982+ uhadd v1.16b, v31.16b, v30.16b // td2 = q1 14983+ 14984+ umax v0.16b, v0.16b, v29.16b 14985+ umax v0.16b, v0.16b, v1.16b // diff = v0 14986+ 14987+// if (spat) { 14988+// SPAT_CHECK() 14989+// } 14990+// i0 = (m1 + p1) >> 1; 14991+ cbz w16, 1f 14992+ 14993+ ldr q31, [x2, w8, sxtw] 14994+ ldr q18, [x17, w8, sxtw] 14995+ ldr q30, [x2, w7, sxtw] 14996+ ldr q19, [x17, w7, sxtw] 14997+ uhadd v18.16b, v18.16b, v31.16b 14998+ uhadd v19.16b, v19.16b, v30.16b 14999+ 15000+ SPAT_CHECK v0, v18, v24, v16, v22, v19, v31, v30, v29, v28 15001+ 15002+1: 15003+ uhadd v2.16b, v22.16b, v24.16b 15004+ 15005+ // i0 = v2, s0 = v2, d0 = v16, diff = v0, t0 = v31, t1 = v30 15006+ DIFF_CLIP v2, v2, v16, v0, v31, v30 15007+ 15008+// dst[0] = av_clip(interpol, 0, clip_max); 15009+ str q2, [x0], #16 15010+ 15011+// dst++; 15012+// cur++; 15013+// } 15014+ subs w4, w4, #16 15015+ add x1, x1, #16 15016+ add x2, x2, #16 15017+ add x3, x3, #16 15018+ add x17, x17, #16 15019+ bgt 10b 15020+ 15021+99: 15022+ ret 15023+endfunc 15024+ 15025+// ============================================================================ 15026+// 15027+// void ff_bwdif_filter_intra_neon( 15028+// void *dst1, // x0 15029+// void *cur1, // x1 15030+// int w, // w2 15031+// int prefs, // w3 15032+// int mrefs, // w4 15033+// int prefs3, // w5 15034+// int mrefs3, // w6 15035+// int parity, // w7 unused 15036+// int clip_max) // [sp, #0] unused 15037+ 15038+function ff_bwdif_filter_intra_neon, export=1 15039+ cmp w2, #0 15040+ ble 99f 15041+ 15042+ LDR_COEFFS v0, x17 15043+ 15044+// for (x = 0; x < w; x++) { 15045+10: 15046+ 15047+// interpol = (coef_sp[0] * (cur[mrefs] + cur[prefs]) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; 15048+ ldr q31, [x1, w4, sxtw] 15049+ ldr q30, [x1, w3, sxtw] 15050+ ldr q29, [x1, w6, sxtw] 15051+ ldr q28, [x1, w5, sxtw] 15052+ 15053+ uaddl v20.8h, v31.8b, v30.8b 15054+ uaddl2 v21.8h, v31.16b, v30.16b 15055+ 15056+ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[6] 15057+ 15058+ uaddl v20.8h, v29.8b, v28.8b 15059+ uaddl2 v21.8h, v29.16b, v28.16b 15060+ 15061+ UMLSL4K v2, v3, v4, v5, v20, v21, v0.h[7] 15062+ 15063+// dst[0] = av_clip(interpol, 0, clip_max); 15064+ SQSHRUNN v2, v2, v3, v4, v5, 13 15065+ str q2, [x0], #16 15066+ 15067+// dst++; 15068+// cur++; 15069+// } 15070+ 15071+ subs w2, w2, #16 15072+ add x1, x1, #16 15073+ bgt 10b 15074+ 15075+99: 15076+ ret 15077+endfunc 15078--- a/libavfilter/allfilters.c 15079+++ b/libavfilter/allfilters.c 15080@@ -242,6 +242,7 @@ extern const AVFilter ff_vf_derain; 15081 extern const AVFilter ff_vf_deshake; 15082 extern const AVFilter ff_vf_deshake_opencl; 15083 extern const AVFilter ff_vf_despill; 15084+extern const AVFilter ff_vf_deinterlace_v4l2m2m; 15085 extern const AVFilter ff_vf_detelecine; 15086 extern const AVFilter ff_vf_dilation; 15087 extern const AVFilter ff_vf_dilation_opencl; 15088@@ -414,6 +415,7 @@ extern const AVFilter ff_vf_scale; 15089 extern const AVFilter ff_vf_scale_cuda; 15090 extern const AVFilter ff_vf_scale_npp; 15091 extern const AVFilter ff_vf_scale_qsv; 15092+extern const AVFilter ff_vf_scale_v4l2m2m; 15093 extern const AVFilter ff_vf_scale_vaapi; 15094 extern const AVFilter ff_vf_scale_vulkan; 15095 extern const AVFilter ff_vf_scale2ref; 15096@@ -483,6 +485,7 @@ extern const AVFilter ff_vf_trim; 15097 extern const AVFilter ff_vf_unpremultiply; 15098 extern const AVFilter ff_vf_unsharp; 15099 extern const AVFilter ff_vf_unsharp_opencl; 15100+extern const AVFilter ff_vf_unsand; 15101 extern const AVFilter ff_vf_untile; 15102 extern const AVFilter ff_vf_uspp; 15103 extern const AVFilter ff_vf_v360; 15104--- a/libavfilter/buffersink.c 15105+++ b/libavfilter/buffersink.c 15106@@ -62,6 +62,11 @@ typedef struct BufferSinkContext { 15107 int sample_rates_size; 15108 15109 AVFrame *peeked_frame; 15110+ 15111+ union { 15112+ av_buffersink_alloc_video_frame * video; 15113+ } alloc_cb; 15114+ void * alloc_v; 15115 } BufferSinkContext; 15116 15117 #define NB_ITEMS(list) (list ## _size / sizeof(*list)) 15118@@ -154,6 +159,22 @@ int attribute_align_arg av_buffersink_ge 15119 return get_frame_internal(ctx, frame, 0, nb_samples); 15120 } 15121 15122+static AVFrame * alloc_video_buffer(AVFilterLink *link, int w, int h) 15123+{ 15124+ AVFilterContext * const ctx = link->dst; 15125+ BufferSinkContext * const bs = ctx->priv; 15126+ return bs->alloc_cb.video ? bs->alloc_cb.video(ctx, bs->alloc_v, w, h) : 15127+ ff_default_get_video_buffer(link, w, h); 15128+} 15129+ 15130+int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v) 15131+{ 15132+ BufferSinkContext * const bs = ctx->priv; 15133+ bs->alloc_cb.video = cb; 15134+ bs->alloc_v = v; 15135+ return 0; 15136+} 15137+ 15138 #if FF_API_BUFFERSINK_ALLOC 15139 AVBufferSinkParams *av_buffersink_params_alloc(void) 15140 { 15141@@ -403,6 +424,7 @@ static const AVFilterPad avfilter_vsink_ 15142 { 15143 .name = "default", 15144 .type = AVMEDIA_TYPE_VIDEO, 15145+ .get_buffer = {.video = alloc_video_buffer}, 15146 }, 15147 }; 15148 15149--- a/libavfilter/buffersink.h 15150+++ b/libavfilter/buffersink.h 15151@@ -202,6 +202,9 @@ int av_buffersink_get_frame(AVFilterCont 15152 */ 15153 int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples); 15154 15155+typedef AVFrame * av_buffersink_alloc_video_frame(AVFilterContext * ctx, void * v, int w, int h); 15156+int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v); 15157+ 15158 /** 15159 * @} 15160 */ 15161--- a/libavfilter/buffersrc.c 15162+++ b/libavfilter/buffersrc.c 15163@@ -204,7 +204,7 @@ FF_ENABLE_DEPRECATION_WARNINGS 15164 15165 switch (ctx->outputs[0]->type) { 15166 case AVMEDIA_TYPE_VIDEO: 15167- CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height, 15168+ CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame), 15169 frame->format, frame->pts); 15170 break; 15171 case AVMEDIA_TYPE_AUDIO: 15172--- a/libavfilter/bwdif.h 15173+++ b/libavfilter/bwdif.h 15174@@ -35,8 +35,29 @@ typedef struct BWDIFContext { 15175 void (*filter_edge)(void *dst, void *prev, void *cur, void *next, 15176 int w, int prefs, int mrefs, int prefs2, int mrefs2, 15177 int parity, int clip_max, int spat); 15178+ void (*filter_line3)(void *dst, int dstride, 15179+ const void *prev, const void *cur, const void *next, int prefs, 15180+ int w, int parity, int clip_max); 15181 } BWDIFContext; 15182 15183-void ff_bwdif_init_x86(BWDIFContext *bwdif); 15184+void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth); 15185+void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth); 15186+void ff_bwdif_init_aarch64(BWDIFContext *bwdif, int bit_depth); 15187+ 15188+void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1, 15189+ int w, int prefs, int mrefs, int prefs2, int mrefs2, 15190+ int parity, int clip_max, int spat); 15191+ 15192+void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs, 15193+ int prefs3, int mrefs3, int parity, int clip_max); 15194+ 15195+void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, 15196+ int w, int prefs, int mrefs, int prefs2, int mrefs2, 15197+ int prefs3, int mrefs3, int prefs4, int mrefs4, 15198+ int parity, int clip_max); 15199+ 15200+void ff_bwdif_filter_line3_c(void * dst1, int d_stride, 15201+ const void * prev1, const void * cur1, const void * next1, int s_stride, 15202+ int w, int parity, int clip_max); 15203 15204 #endif /* AVFILTER_BWDIF_H */ 15205--- a/libavfilter/vf_bwdif.c 15206+++ b/libavfilter/vf_bwdif.c 15207@@ -122,8 +122,8 @@ typedef struct ThreadData { 15208 next2++; \ 15209 } 15210 15211-static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs, 15212- int prefs3, int mrefs3, int parity, int clip_max) 15213+void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs, 15214+ int prefs3, int mrefs3, int parity, int clip_max) 15215 { 15216 uint8_t *dst = dst1; 15217 uint8_t *cur = cur1; 15218@@ -132,10 +132,10 @@ static void filter_intra(void *dst1, voi 15219 FILTER_INTRA() 15220 } 15221 15222-static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, 15223- int w, int prefs, int mrefs, int prefs2, int mrefs2, 15224- int prefs3, int mrefs3, int prefs4, int mrefs4, 15225- int parity, int clip_max) 15226+void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, 15227+ int w, int prefs, int mrefs, int prefs2, int mrefs2, 15228+ int prefs3, int mrefs3, int prefs4, int mrefs4, 15229+ int parity, int clip_max) 15230 { 15231 uint8_t *dst = dst1; 15232 uint8_t *prev = prev1; 15233@@ -150,9 +150,34 @@ static void filter_line_c(void *dst1, vo 15234 FILTER2() 15235 } 15236 15237-static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1, 15238- int w, int prefs, int mrefs, int prefs2, int mrefs2, 15239- int parity, int clip_max, int spat) 15240+#define NEXT_LINE()\ 15241+ dst += d_stride; \ 15242+ prev += prefs; \ 15243+ cur += prefs; \ 15244+ next += prefs; 15245+ 15246+void ff_bwdif_filter_line3_c(void * dst1, int d_stride, 15247+ const void * prev1, const void * cur1, const void * next1, int s_stride, 15248+ int w, int parity, int clip_max) 15249+{ 15250+ const int prefs = s_stride; 15251+ uint8_t * dst = dst1; 15252+ const uint8_t * prev = prev1; 15253+ const uint8_t * cur = cur1; 15254+ const uint8_t * next = next1; 15255+ 15256+ ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w, 15257+ prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max); 15258+ NEXT_LINE(); 15259+ memcpy(dst, cur, w); 15260+ NEXT_LINE(); 15261+ ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w, 15262+ prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max); 15263+} 15264+ 15265+void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1, 15266+ int w, int prefs, int mrefs, int prefs2, int mrefs2, 15267+ int parity, int clip_max, int spat) 15268 { 15269 uint8_t *dst = dst1; 15270 uint8_t *prev = prev1; 15271@@ -212,6 +237,13 @@ static void filter_edge_16bit(void *dst1 15272 FILTER2() 15273 } 15274 15275+// Round job start line down to multiple of 4 so that if filter_line3 exists 15276+// and the frame is a multiple of 4 high then filter_line will never be called 15277+static inline int job_start(const int jobnr, const int nb_jobs, const int h) 15278+{ 15279+ return jobnr >= nb_jobs ? h : ((h * jobnr) / nb_jobs) & ~3; 15280+} 15281+ 15282 static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) 15283 { 15284 BWDIFContext *s = ctx->priv; 15285@@ -221,8 +253,8 @@ static int filter_slice(AVFilterContext 15286 int clip_max = (1 << (yadif->csp->comp[td->plane].depth)) - 1; 15287 int df = (yadif->csp->comp[td->plane].depth + 7) / 8; 15288 int refs = linesize / df; 15289- int slice_start = (td->h * jobnr ) / nb_jobs; 15290- int slice_end = (td->h * (jobnr+1)) / nb_jobs; 15291+ int slice_start = job_start(jobnr, nb_jobs, td->h); 15292+ int slice_end = job_start(jobnr + 1, nb_jobs, td->h); 15293 int y; 15294 15295 for (y = slice_start; y < slice_end; y++) { 15296@@ -244,6 +276,11 @@ static int filter_slice(AVFilterContext 15297 refs << 1, -(refs << 1), 15298 td->parity ^ td->tff, clip_max, 15299 (y < 2) || ((y + 3) > td->h) ? 0 : 1); 15300+ } else if (s->filter_line3 && y + 2 < slice_end && y + 6 < td->h) { 15301+ s->filter_line3(dst, td->frame->linesize[td->plane], 15302+ prev, cur, next, linesize, td->w, 15303+ td->parity ^ td->tff, clip_max); 15304+ y += 2; 15305 } else { 15306 s->filter_line(dst, prev, cur, next, td->w, 15307 refs, -refs, refs << 1, -(refs << 1), 15308@@ -265,22 +302,31 @@ static void filter(AVFilterContext *ctx, 15309 YADIFContext *yadif = &bwdif->yadif; 15310 ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff }; 15311 int i; 15312+ int last_plane = -1; 15313 15314 for (i = 0; i < yadif->csp->nb_components; i++) { 15315 int w = dstpic->width; 15316 int h = dstpic->height; 15317+ const AVComponentDescriptor * const comp = yadif->csp->comp + i; 15318+ 15319+ // If the last plane was the same as this plane assume we've dealt 15320+ // with all the pels already 15321+ if (last_plane == comp->plane) 15322+ continue; 15323+ last_plane = comp->plane; 15324 15325 if (i == 1 || i == 2) { 15326 w = AV_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w); 15327 h = AV_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h); 15328 } 15329 15330- td.w = w; 15331- td.h = h; 15332- td.plane = i; 15333+ // comp step is in bytes but td.w is in pels 15334+ td.w = w * comp->step / ((comp->depth + 7) / 8); 15335+ td.h = h; 15336+ td.plane = comp->plane; 15337 15338 ff_filter_execute(ctx, filter_slice, &td, NULL, 15339- FFMIN(h, ff_filter_get_nb_threads(ctx))); 15340+ FFMIN((h+3)/4, ff_filter_get_nb_threads(ctx))); 15341 } 15342 if (yadif->current_field == YADIF_FIELD_END) { 15343 yadif->current_field = YADIF_FIELD_NORMAL; 15344@@ -313,6 +359,7 @@ static const enum AVPixelFormat pix_fmts 15345 AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9, 15346 AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10, 15347 AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16, 15348+ AV_PIX_FMT_NV12, 15349 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, 15350 AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, 15351 AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16, 15352@@ -340,21 +387,29 @@ static int config_props(AVFilterLink *li 15353 15354 yadif->csp = av_pix_fmt_desc_get(link->format); 15355 yadif->filter = filter; 15356- if (yadif->csp->comp[0].depth > 8) { 15357+ ff_bwdif_init_filter_line(s, yadif->csp->comp[0].depth); 15358+ 15359+ return 0; 15360+} 15361+ 15362+av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth) 15363+{ 15364+ s->filter_line3 = 0; 15365+ if (bit_depth > 8) { 15366 s->filter_intra = filter_intra_16bit; 15367 s->filter_line = filter_line_c_16bit; 15368 s->filter_edge = filter_edge_16bit; 15369 } else { 15370- s->filter_intra = filter_intra; 15371- s->filter_line = filter_line_c; 15372- s->filter_edge = filter_edge; 15373+ s->filter_intra = ff_bwdif_filter_intra_c; 15374+ s->filter_line = ff_bwdif_filter_line_c; 15375+ s->filter_edge = ff_bwdif_filter_edge_c; 15376 } 15377 15378 #if ARCH_X86 15379- ff_bwdif_init_x86(s); 15380+ ff_bwdif_init_x86(s, bit_depth); 15381+#elif ARCH_AARCH64 15382+ ff_bwdif_init_aarch64(s, bit_depth); 15383 #endif 15384- 15385- return 0; 15386 } 15387 15388 15389--- /dev/null 15390+++ b/libavfilter/vf_deinterlace_v4l2m2m.c 15391@@ -0,0 +1,2102 @@ 15392+/* 15393+ * This file is part of FFmpeg. 15394+ * 15395+ * FFmpeg is free software; you can redistribute it and/or 15396+ * modify it under the terms of the GNU Lesser General Public 15397+ * License as published by the Free Software Foundation; either 15398+ * version 2.1 of the License, or (at your option) any later version. 15399+ * 15400+ * FFmpeg is distributed in the hope that it will be useful, 15401+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 15402+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15403+ * Lesser General Public License for more details. 15404+ * 15405+ * You should have received a copy of the GNU Lesser General Public 15406+ * License along with FFmpeg; if not, write to the Free Software 15407+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 15408+ */ 15409+ 15410+/** 15411+ * @file 15412+ * deinterlace video filter - V4L2 M2M 15413+ */ 15414+ 15415+#include <drm_fourcc.h> 15416+ 15417+#include <linux/videodev2.h> 15418+ 15419+#include <dirent.h> 15420+#include <fcntl.h> 15421+#include <poll.h> 15422+#include <stdatomic.h> 15423+#include <stdio.h> 15424+#include <string.h> 15425+#include <sys/ioctl.h> 15426+#include <sys/mman.h> 15427+#include <unistd.h> 15428+ 15429+#include "config.h" 15430+ 15431+#include "libavutil/avassert.h" 15432+#include "libavutil/avstring.h" 15433+#include "libavutil/common.h" 15434+#include "libavutil/hwcontext.h" 15435+#include "libavutil/hwcontext_drm.h" 15436+#include "libavutil/internal.h" 15437+#include "libavutil/mathematics.h" 15438+#include "libavutil/opt.h" 15439+#include "libavutil/pixdesc.h" 15440+#include "libavutil/time.h" 15441+ 15442+#define FF_INTERNAL_FIELDS 1 15443+#include "framequeue.h" 15444+#include "filters.h" 15445+#include "avfilter.h" 15446+#include "formats.h" 15447+#include "internal.h" 15448+#include "scale_eval.h" 15449+#include "video.h" 15450+ 15451+#ifndef DRM_FORMAT_P030 15452+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ 15453+#endif 15454+ 15455+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined 15456+// in drm_fourcc.h hopefully will be sometime in the future but until then... 15457+#ifndef V4L2_PIX_FMT_NV12_10_COL128 15458+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') 15459+#endif 15460+ 15461+#ifndef V4L2_PIX_FMT_NV12_COL128 15462+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ 15463+#endif 15464+ 15465+typedef struct V4L2Queue V4L2Queue; 15466+typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared; 15467+ 15468+typedef enum filter_type_v4l2_e 15469+{ 15470+ FILTER_V4L2_DEINTERLACE = 1, 15471+ FILTER_V4L2_SCALE, 15472+} filter_type_v4l2_t; 15473+ 15474+typedef struct V4L2Buffer { 15475+ int enqueued; 15476+ int reenqueue; 15477+ struct v4l2_buffer buffer; 15478+ AVFrame frame; 15479+ struct v4l2_plane planes[VIDEO_MAX_PLANES]; 15480+ int num_planes; 15481+ AVDRMFrameDescriptor drm_frame; 15482+ V4L2Queue *q; 15483+} V4L2Buffer; 15484+ 15485+typedef struct V4L2Queue { 15486+ struct v4l2_format format; 15487+ struct v4l2_selection sel; 15488+ int eos; 15489+ int num_buffers; 15490+ V4L2Buffer *buffers; 15491+ const char * name; 15492+ DeintV4L2M2MContextShared *ctx; 15493+} V4L2Queue; 15494+ 15495+typedef struct pts_stats_s 15496+{ 15497+ void * logctx; 15498+ const char * name; // For debug 15499+ unsigned int last_count; 15500+ unsigned int last_interval; 15501+ int64_t last_pts; 15502+} pts_stats_t; 15503+ 15504+#define PTS_TRACK_SIZE 32 15505+typedef struct pts_track_el_s 15506+{ 15507+ uint32_t n; 15508+ unsigned int interval; 15509+ AVFrame * props; 15510+} pts_track_el_t; 15511+ 15512+typedef struct pts_track_s 15513+{ 15514+ uint32_t n; 15515+ uint32_t last_n; 15516+ int got_2; 15517+ void * logctx; 15518+ pts_stats_t stats; 15519+ pts_track_el_t a[PTS_TRACK_SIZE]; 15520+} pts_track_t; 15521+ 15522+typedef enum drain_state_e 15523+{ 15524+ DRAIN_NONE = 0, // Not draining 15525+ DRAIN_TIMEOUT, // Drain until normal timeout setup yields no frame 15526+ DRAIN_LAST, // Drain with long timeout last_frame in received on output expected 15527+ DRAIN_EOS, // Drain with long timeout EOS expected 15528+ DRAIN_DONE // Drained 15529+} drain_state_t; 15530+ 15531+typedef struct DeintV4L2M2MContextShared { 15532+ void * logctx; // For logging - will be NULL when done 15533+ filter_type_v4l2_t filter_type; 15534+ 15535+ int fd; 15536+ int done; // fd closed - awating all refs dropped 15537+ int width; 15538+ int height; 15539+ 15540+ int drain; // EOS received (inlink status) 15541+ drain_state_t drain_state; 15542+ int64_t drain_pts; // PTS associated with inline status 15543+ 15544+ unsigned int frames_rx; 15545+ unsigned int frames_tx; 15546+ 15547+ // from options 15548+ int output_width; 15549+ int output_height; 15550+ enum AVPixelFormat output_format; 15551+ 15552+ int has_enc_stop; 15553+ // We expect to get exactly the same number of frames out as we put in 15554+ // We can drain by matching input to output 15555+ int one_to_one; 15556+ 15557+ int orig_width; 15558+ int orig_height; 15559+ atomic_uint refcount; 15560+ 15561+ AVBufferRef *hw_frames_ctx; 15562+ 15563+ unsigned int field_order; 15564+ 15565+ pts_track_t track; 15566+ 15567+ V4L2Queue output; 15568+ V4L2Queue capture; 15569+} DeintV4L2M2MContextShared; 15570+ 15571+typedef struct DeintV4L2M2MContext { 15572+ const AVClass *class; 15573+ 15574+ DeintV4L2M2MContextShared *shared; 15575+ 15576+ char * w_expr; 15577+ char * h_expr; 15578+ char * output_format_string;; 15579+ 15580+ int force_original_aspect_ratio; 15581+ int force_divisible_by; 15582+ 15583+ char *colour_primaries_string; 15584+ char *colour_transfer_string; 15585+ char *colour_matrix_string; 15586+ int colour_range; 15587+ char *chroma_location_string; 15588+ 15589+ enum AVColorPrimaries colour_primaries; 15590+ enum AVColorTransferCharacteristic colour_transfer; 15591+ enum AVColorSpace colour_matrix; 15592+ enum AVChromaLocation chroma_location; 15593+} DeintV4L2M2MContext; 15594+ 15595+ 15596+static inline int drain_frame_expected(const drain_state_t d) 15597+{ 15598+ return d == DRAIN_EOS || d == DRAIN_LAST; 15599+} 15600+ 15601+// These just list the ones we know we can cope with 15602+static uint32_t 15603+fmt_av_to_v4l2(const enum AVPixelFormat avfmt) 15604+{ 15605+ switch (avfmt) { 15606+ case AV_PIX_FMT_YUV420P: 15607+ return V4L2_PIX_FMT_YUV420; 15608+ case AV_PIX_FMT_NV12: 15609+ return V4L2_PIX_FMT_NV12; 15610+#if CONFIG_SAND 15611+ case AV_PIX_FMT_RPI4_8: 15612+ case AV_PIX_FMT_SAND128: 15613+ return V4L2_PIX_FMT_NV12_COL128; 15614+#endif 15615+ default: 15616+ break; 15617+ } 15618+ return 0; 15619+} 15620+ 15621+static enum AVPixelFormat 15622+fmt_v4l2_to_av(const uint32_t pixfmt) 15623+{ 15624+ switch (pixfmt) { 15625+ case V4L2_PIX_FMT_YUV420: 15626+ return AV_PIX_FMT_YUV420P; 15627+ case V4L2_PIX_FMT_NV12: 15628+ return AV_PIX_FMT_NV12; 15629+#if CONFIG_SAND 15630+ case V4L2_PIX_FMT_NV12_COL128: 15631+ return AV_PIX_FMT_RPI4_8; 15632+#endif 15633+ default: 15634+ break; 15635+ } 15636+ return AV_PIX_FMT_NONE; 15637+} 15638+ 15639+static unsigned int pts_stats_interval(const pts_stats_t * const stats) 15640+{ 15641+ return stats->last_interval; 15642+} 15643+ 15644+// Pick 64 for max last count - that is >1sec at 60fps 15645+#define STATS_LAST_COUNT_MAX 64 15646+#define STATS_INTERVAL_MAX (1 << 30) 15647+static void pts_stats_add(pts_stats_t * const stats, int64_t pts) 15648+{ 15649+ if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) { 15650+ if (stats->last_count < STATS_LAST_COUNT_MAX) 15651+ ++stats->last_count; 15652+ return; 15653+ } 15654+ 15655+ if (stats->last_pts != AV_NOPTS_VALUE) { 15656+ const int64_t interval = pts - stats->last_pts; 15657+ 15658+ if (interval < 0 || interval >= STATS_INTERVAL_MAX || 15659+ stats->last_count >= STATS_LAST_COUNT_MAX) { 15660+ if (stats->last_interval != 0) 15661+ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n", 15662+ __func__, stats->name, interval, stats->last_count); 15663+ stats->last_interval = 0; 15664+ } 15665+ else { 15666+ const int64_t frame_time = interval / (int64_t)stats->last_count; 15667+ 15668+ if (frame_time != stats->last_interval) 15669+ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n", 15670+ __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time); 15671+ stats->last_interval = frame_time; 15672+ } 15673+ } 15674+ 15675+ stats->last_pts = pts; 15676+ stats->last_count = 1; 15677+} 15678+ 15679+static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name) 15680+{ 15681+ *stats = (pts_stats_t){ 15682+ .logctx = logctx, 15683+ .name = name, 15684+ .last_count = 1, 15685+ .last_interval = 0, 15686+ .last_pts = AV_NOPTS_VALUE 15687+ }; 15688+} 15689+ 15690+static inline uint32_t pts_track_next_n(pts_track_t * const trk) 15691+{ 15692+ if (++trk->n == 0) 15693+ trk->n = 1; 15694+ return trk->n; 15695+} 15696+ 15697+static int pts_track_get_frame(pts_track_t * const trk, const struct timeval tv, AVFrame * const dst) 15698+{ 15699+ uint32_t n = (uint32_t)(tv.tv_usec / 2 + tv.tv_sec * 500000); 15700+ pts_track_el_t * t; 15701+ 15702+ // As a first guess assume that n==0 means last frame 15703+ if (n == 0) { 15704+ n = trk->last_n; 15705+ if (n == 0) 15706+ goto fail; 15707+ } 15708+ 15709+ t = trk->a + (n & (PTS_TRACK_SIZE - 1)); 15710+ 15711+ if (t->n != n) { 15712+ av_log(trk->logctx, AV_LOG_ERROR, "%s: track failure: got %u, expected %u\n", __func__, n, trk->n); 15713+ goto fail; 15714+ } 15715+ 15716+ // 1st frame is simple - just believe it 15717+ if (n != trk->last_n) { 15718+ trk->last_n = n; 15719+ trk->got_2 = 0; 15720+ return av_frame_copy_props(dst, t->props); 15721+ } 15722+ 15723+ // Only believe in a single interpolated frame 15724+ if (trk->got_2) 15725+ goto fail; 15726+ trk->got_2 = 1; 15727+ 15728+ av_frame_copy_props(dst, t->props); 15729+ 15730+ 15731+ // If we can't guess - don't 15732+ if (t->interval == 0) { 15733+ dst->best_effort_timestamp = AV_NOPTS_VALUE; 15734+ dst->pts = AV_NOPTS_VALUE; 15735+ dst->pkt_dts = AV_NOPTS_VALUE; 15736+ } 15737+ else { 15738+ if (dst->best_effort_timestamp != AV_NOPTS_VALUE) 15739+ dst->best_effort_timestamp += t->interval / 2; 15740+ if (dst->pts != AV_NOPTS_VALUE) 15741+ dst->pts += t->interval / 2; 15742+ if (dst->pkt_dts != AV_NOPTS_VALUE) 15743+ dst->pkt_dts += t->interval / 2; 15744+ } 15745+ 15746+ return 0; 15747+ 15748+fail: 15749+ trk->last_n = 0; 15750+ trk->got_2 = 0; 15751+ dst->pts = AV_NOPTS_VALUE; 15752+ dst->pkt_dts = AV_NOPTS_VALUE; 15753+ return 0; 15754+} 15755+ 15756+// We are only ever expecting in-order frames so nothing more clever is required 15757+static unsigned int 15758+pts_track_count(const pts_track_t * const trk) 15759+{ 15760+ return (trk->n - trk->last_n) & (PTS_TRACK_SIZE - 1); 15761+} 15762+ 15763+static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src) 15764+{ 15765+ const uint32_t n = pts_track_next_n(trk); 15766+ pts_track_el_t * const t = trk->a + (n & (PTS_TRACK_SIZE - 1)); 15767+ 15768+ pts_stats_add(&trk->stats, src->pts); 15769+ 15770+ t->n = n; 15771+ t->interval = pts_stats_interval(&trk->stats); // guess that next interval is the same as the last 15772+ av_frame_unref(t->props); 15773+ av_frame_copy_props(t->props, src); 15774+ 15775+ // We now know what the previous interval was, rather than having to guess, 15776+ // so set it. There is a better than decent chance that this is before 15777+ // we use it. 15778+ if (t->interval != 0) { 15779+ pts_track_el_t * const prev_t = trk->a + ((n - 1) & (PTS_TRACK_SIZE - 1)); 15780+ prev_t->interval = t->interval; 15781+ } 15782+ 15783+ // In case deinterlace interpolates frames use every other usec 15784+ return (struct timeval){.tv_sec = n / 500000, .tv_usec = (n % 500000) * 2}; 15785+} 15786+ 15787+static void pts_track_uninit(pts_track_t * const trk) 15788+{ 15789+ unsigned int i; 15790+ for (i = 0; i != PTS_TRACK_SIZE; ++i) { 15791+ trk->a[i].n = 0; 15792+ av_frame_free(&trk->a[i].props); 15793+ } 15794+} 15795+ 15796+static int pts_track_init(pts_track_t * const trk, void *logctx) 15797+{ 15798+ unsigned int i; 15799+ trk->n = 1; 15800+ pts_stats_init(&trk->stats, logctx, "track"); 15801+ for (i = 0; i != PTS_TRACK_SIZE; ++i) { 15802+ trk->a[i].n = 0; 15803+ if ((trk->a[i].props = av_frame_alloc()) == NULL) { 15804+ pts_track_uninit(trk); 15805+ return AVERROR(ENOMEM); 15806+ } 15807+ } 15808+ return 0; 15809+} 15810+ 15811+static inline uint32_t 15812+fmt_bpl(const struct v4l2_format * const fmt, const unsigned int plane_n) 15813+{ 15814+ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.plane_fmt[plane_n].bytesperline : fmt->fmt.pix.bytesperline; 15815+} 15816+ 15817+static inline uint32_t 15818+fmt_height(const struct v4l2_format * const fmt) 15819+{ 15820+ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; 15821+} 15822+ 15823+static inline uint32_t 15824+fmt_width(const struct v4l2_format * const fmt) 15825+{ 15826+ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; 15827+} 15828+ 15829+static inline uint32_t 15830+fmt_pixelformat(const struct v4l2_format * const fmt) 15831+{ 15832+ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat; 15833+} 15834+ 15835+static inline uint32_t 15836+buf_bytesused0(const struct v4l2_buffer * const buf) 15837+{ 15838+ return V4L2_TYPE_IS_MULTIPLANAR(buf->type) ? buf->m.planes[0].bytesused : buf->bytesused; 15839+} 15840+ 15841+static void 15842+init_format(V4L2Queue * const q, const uint32_t format_type) 15843+{ 15844+ memset(&q->format, 0, sizeof(q->format)); 15845+ memset(&q->sel, 0, sizeof(q->sel)); 15846+ q->format.type = format_type; 15847+ q->sel.type = format_type; 15848+} 15849+ 15850+static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx) 15851+{ 15852+ struct v4l2_capability cap; 15853+ int ret; 15854+ 15855+ memset(&cap, 0, sizeof(cap)); 15856+ ret = ioctl(ctx->fd, VIDIOC_QUERYCAP, &cap); 15857+ if (ret < 0) 15858+ return ret; 15859+ 15860+ if (ctx->filter_type == FILTER_V4L2_SCALE && 15861+ strcmp("bcm2835-codec-isp", cap.card) != 0) 15862+ { 15863+ av_log(ctx->logctx, AV_LOG_DEBUG, "Not ISP\n"); 15864+ return AVERROR(EINVAL); 15865+ } 15866+ 15867+ if (!(cap.capabilities & V4L2_CAP_STREAMING)) { 15868+ av_log(ctx->logctx, AV_LOG_DEBUG, "No streaming\n"); 15869+ return AVERROR(EINVAL); 15870+ } 15871+ 15872+ if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) { 15873+ init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); 15874+ init_format(&ctx->output, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); 15875+ } 15876+ else if (cap.capabilities & V4L2_CAP_VIDEO_M2M) { 15877+ init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE); 15878+ init_format(&ctx->output, V4L2_BUF_TYPE_VIDEO_OUTPUT); 15879+ } 15880+ else { 15881+ av_log(ctx->logctx, AV_LOG_DEBUG, "Not M2M\n"); 15882+ return AVERROR(EINVAL); 15883+ } 15884+ 15885+ return 0; 15886+} 15887+ 15888+// Just use for probe - doesn't modify q format 15889+static int deint_v4l2m2m_try_format(V4L2Queue *queue, const uint32_t width, const uint32_t height, const enum AVPixelFormat avfmt) 15890+{ 15891+ struct v4l2_format fmt = {.type = queue->format.type}; 15892+ DeintV4L2M2MContextShared *ctx = queue->ctx; 15893+ int ret, field; 15894+ // Pick YUV to test with if not otherwise specified 15895+ uint32_t pixelformat = avfmt == AV_PIX_FMT_NONE ? V4L2_PIX_FMT_YUV420 : fmt_av_to_v4l2(avfmt); 15896+ enum AVPixelFormat r_avfmt; 15897+ 15898+ 15899+ ret = ioctl(ctx->fd, VIDIOC_G_FMT, &fmt); 15900+ if (ret) 15901+ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret); 15902+ 15903+ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && V4L2_TYPE_IS_OUTPUT(fmt.type)) 15904+ field = V4L2_FIELD_INTERLACED_TB; 15905+ else 15906+ field = V4L2_FIELD_NONE; 15907+ 15908+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { 15909+ fmt.fmt.pix_mp.pixelformat = pixelformat; 15910+ fmt.fmt.pix_mp.field = field; 15911+ fmt.fmt.pix_mp.width = width; 15912+ fmt.fmt.pix_mp.height = height; 15913+ } else { 15914+ fmt.fmt.pix.pixelformat = pixelformat; 15915+ fmt.fmt.pix.field = field; 15916+ fmt.fmt.pix.width = width; 15917+ fmt.fmt.pix.height = height; 15918+ } 15919+ 15920+ av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__, 15921+ fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height, 15922+ fmt.fmt.pix_mp.pixelformat, 15923+ fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline); 15924+ 15925+ ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, &fmt); 15926+ if (ret) 15927+ return AVERROR(EINVAL); 15928+ 15929+ av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__, 15930+ fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height, 15931+ fmt.fmt.pix_mp.pixelformat, 15932+ fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline); 15933+ 15934+ r_avfmt = fmt_v4l2_to_av(fmt_pixelformat(&fmt)); 15935+ if (r_avfmt != avfmt && avfmt != AV_PIX_FMT_NONE) { 15936+ av_log(ctx->logctx, AV_LOG_DEBUG, "Unable to set format %s on %s port\n", av_get_pix_fmt_name(avfmt), V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src"); 15937+ return AVERROR(EINVAL); 15938+ } 15939+ if (r_avfmt == AV_PIX_FMT_NONE) { 15940+ av_log(ctx->logctx, AV_LOG_DEBUG, "No supported format on %s port\n", V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src"); 15941+ return AVERROR(EINVAL); 15942+ } 15943+ 15944+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { 15945+ if (fmt.fmt.pix_mp.field != field) { 15946+ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type); 15947+ 15948+ return AVERROR(EINVAL); 15949+ } 15950+ } else { 15951+ if (fmt.fmt.pix.field != field) { 15952+ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type); 15953+ 15954+ return AVERROR(EINVAL); 15955+ } 15956+ } 15957+ 15958+ return 0; 15959+} 15960+ 15961+static int 15962+do_s_fmt(V4L2Queue * const q) 15963+{ 15964+ DeintV4L2M2MContextShared * const ctx = q->ctx; 15965+ const uint32_t pixelformat = fmt_pixelformat(&q->format); 15966+ int ret; 15967+ 15968+ ret = ioctl(ctx->fd, VIDIOC_S_FMT, &q->format); 15969+ if (ret) { 15970+ ret = AVERROR(errno); 15971+ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %s\n", av_err2str(ret)); 15972+ return ret; 15973+ } 15974+ 15975+ if (pixelformat != fmt_pixelformat(&q->format)) { 15976+ av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt_pixelformat(&q->format))); 15977+ return AVERROR(EINVAL); 15978+ } 15979+ 15980+ q->sel.target = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE, 15981+ q->sel.flags = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_FLAG_LE : V4L2_SEL_FLAG_GE; 15982+ 15983+ ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &q->sel); 15984+ if (ret) { 15985+ ret = AVERROR(errno); 15986+ av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %s\n", av_err2str(ret)); 15987+ } 15988+ 15989+ return 0; 15990+} 15991+ 15992+static void 15993+set_fmt_color(struct v4l2_format *const fmt, 15994+ const enum AVColorPrimaries avcp, 15995+ const enum AVColorSpace avcs, 15996+ const enum AVColorTransferCharacteristic avxc) 15997+{ 15998+ enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT; 15999+ enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT; 16000+ enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT; 16001+ 16002+ switch (avcp) { 16003+ case AVCOL_PRI_BT709: 16004+ cs = V4L2_COLORSPACE_REC709; 16005+ ycbcr = V4L2_YCBCR_ENC_709; 16006+ break; 16007+ case AVCOL_PRI_BT470M: 16008+ cs = V4L2_COLORSPACE_470_SYSTEM_M; 16009+ ycbcr = V4L2_YCBCR_ENC_601; 16010+ break; 16011+ case AVCOL_PRI_BT470BG: 16012+ cs = V4L2_COLORSPACE_470_SYSTEM_BG; 16013+ break; 16014+ case AVCOL_PRI_SMPTE170M: 16015+ cs = V4L2_COLORSPACE_SMPTE170M; 16016+ break; 16017+ case AVCOL_PRI_SMPTE240M: 16018+ cs = V4L2_COLORSPACE_SMPTE240M; 16019+ break; 16020+ case AVCOL_PRI_BT2020: 16021+ cs = V4L2_COLORSPACE_BT2020; 16022+ break; 16023+ case AVCOL_PRI_SMPTE428: 16024+ case AVCOL_PRI_SMPTE431: 16025+ case AVCOL_PRI_SMPTE432: 16026+ case AVCOL_PRI_EBU3213: 16027+ case AVCOL_PRI_RESERVED: 16028+ case AVCOL_PRI_FILM: 16029+ case AVCOL_PRI_UNSPECIFIED: 16030+ default: 16031+ break; 16032+ } 16033+ 16034+ switch (avcs) { 16035+ case AVCOL_SPC_RGB: 16036+ cs = V4L2_COLORSPACE_SRGB; 16037+ break; 16038+ case AVCOL_SPC_BT709: 16039+ cs = V4L2_COLORSPACE_REC709; 16040+ break; 16041+ case AVCOL_SPC_FCC: 16042+ cs = V4L2_COLORSPACE_470_SYSTEM_M; 16043+ break; 16044+ case AVCOL_SPC_BT470BG: 16045+ cs = V4L2_COLORSPACE_470_SYSTEM_BG; 16046+ break; 16047+ case AVCOL_SPC_SMPTE170M: 16048+ cs = V4L2_COLORSPACE_SMPTE170M; 16049+ break; 16050+ case AVCOL_SPC_SMPTE240M: 16051+ cs = V4L2_COLORSPACE_SMPTE240M; 16052+ break; 16053+ case AVCOL_SPC_BT2020_CL: 16054+ cs = V4L2_COLORSPACE_BT2020; 16055+ ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM; 16056+ break; 16057+ case AVCOL_SPC_BT2020_NCL: 16058+ cs = V4L2_COLORSPACE_BT2020; 16059+ break; 16060+ default: 16061+ break; 16062+ } 16063+ 16064+ switch (xfer) { 16065+ case AVCOL_TRC_BT709: 16066+ xfer = V4L2_XFER_FUNC_709; 16067+ break; 16068+ case AVCOL_TRC_IEC61966_2_1: 16069+ xfer = V4L2_XFER_FUNC_SRGB; 16070+ break; 16071+ case AVCOL_TRC_SMPTE240M: 16072+ xfer = V4L2_XFER_FUNC_SMPTE240M; 16073+ break; 16074+ case AVCOL_TRC_SMPTE2084: 16075+ xfer = V4L2_XFER_FUNC_SMPTE2084; 16076+ break; 16077+ default: 16078+ break; 16079+ } 16080+ 16081+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { 16082+ fmt->fmt.pix_mp.colorspace = cs; 16083+ fmt->fmt.pix_mp.ycbcr_enc = ycbcr; 16084+ fmt->fmt.pix_mp.xfer_func = xfer; 16085+ } else { 16086+ fmt->fmt.pix.colorspace = cs; 16087+ fmt->fmt.pix.ycbcr_enc = ycbcr; 16088+ fmt->fmt.pix.xfer_func = xfer; 16089+ } 16090+} 16091+ 16092+static void 16093+set_fmt_color_range(struct v4l2_format *const fmt, const enum AVColorRange avcr) 16094+{ 16095+ const enum v4l2_quantization q = 16096+ avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE : 16097+ avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE : 16098+ V4L2_QUANTIZATION_DEFAULT; 16099+ 16100+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { 16101+ fmt->fmt.pix_mp.quantization = q; 16102+ } else { 16103+ fmt->fmt.pix.quantization = q; 16104+ } 16105+} 16106+ 16107+static enum AVColorPrimaries get_color_primaries(const struct v4l2_format *const fmt) 16108+{ 16109+ enum v4l2_ycbcr_encoding ycbcr; 16110+ enum v4l2_colorspace cs; 16111+ 16112+ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? 16113+ fmt->fmt.pix_mp.colorspace : 16114+ fmt->fmt.pix.colorspace; 16115+ 16116+ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? 16117+ fmt->fmt.pix_mp.ycbcr_enc: 16118+ fmt->fmt.pix.ycbcr_enc; 16119+ 16120+ switch(ycbcr) { 16121+ case V4L2_YCBCR_ENC_XV709: 16122+ case V4L2_YCBCR_ENC_709: return AVCOL_PRI_BT709; 16123+ case V4L2_YCBCR_ENC_XV601: 16124+ case V4L2_YCBCR_ENC_601:return AVCOL_PRI_BT470M; 16125+ default: 16126+ break; 16127+ } 16128+ 16129+ switch(cs) { 16130+ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_PRI_BT470BG; 16131+ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_PRI_SMPTE170M; 16132+ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_PRI_SMPTE240M; 16133+ case V4L2_COLORSPACE_BT2020: return AVCOL_PRI_BT2020; 16134+ default: 16135+ break; 16136+ } 16137+ 16138+ return AVCOL_PRI_UNSPECIFIED; 16139+} 16140+ 16141+static enum AVColorSpace get_color_space(const struct v4l2_format *const fmt) 16142+{ 16143+ enum v4l2_ycbcr_encoding ycbcr; 16144+ enum v4l2_colorspace cs; 16145+ 16146+ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? 16147+ fmt->fmt.pix_mp.colorspace : 16148+ fmt->fmt.pix.colorspace; 16149+ 16150+ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? 16151+ fmt->fmt.pix_mp.ycbcr_enc: 16152+ fmt->fmt.pix.ycbcr_enc; 16153+ 16154+ switch(cs) { 16155+ case V4L2_COLORSPACE_SRGB: return AVCOL_SPC_RGB; 16156+ case V4L2_COLORSPACE_REC709: return AVCOL_SPC_BT709; 16157+ case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_SPC_FCC; 16158+ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_SPC_BT470BG; 16159+ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_SPC_SMPTE170M; 16160+ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_SPC_SMPTE240M; 16161+ case V4L2_COLORSPACE_BT2020: 16162+ if (ycbcr == V4L2_YCBCR_ENC_BT2020_CONST_LUM) 16163+ return AVCOL_SPC_BT2020_CL; 16164+ else 16165+ return AVCOL_SPC_BT2020_NCL; 16166+ default: 16167+ break; 16168+ } 16169+ 16170+ return AVCOL_SPC_UNSPECIFIED; 16171+} 16172+ 16173+static enum AVColorTransferCharacteristic get_color_trc(const struct v4l2_format *const fmt) 16174+{ 16175+ enum v4l2_ycbcr_encoding ycbcr; 16176+ enum v4l2_xfer_func xfer; 16177+ enum v4l2_colorspace cs; 16178+ 16179+ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? 16180+ fmt->fmt.pix_mp.colorspace : 16181+ fmt->fmt.pix.colorspace; 16182+ 16183+ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? 16184+ fmt->fmt.pix_mp.ycbcr_enc: 16185+ fmt->fmt.pix.ycbcr_enc; 16186+ 16187+ xfer = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? 16188+ fmt->fmt.pix_mp.xfer_func: 16189+ fmt->fmt.pix.xfer_func; 16190+ 16191+ switch (xfer) { 16192+ case V4L2_XFER_FUNC_709: return AVCOL_TRC_BT709; 16193+ case V4L2_XFER_FUNC_SRGB: return AVCOL_TRC_IEC61966_2_1; 16194+ default: 16195+ break; 16196+ } 16197+ 16198+ switch (cs) { 16199+ case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_TRC_GAMMA22; 16200+ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_TRC_GAMMA28; 16201+ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_TRC_SMPTE170M; 16202+ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_TRC_SMPTE240M; 16203+ default: 16204+ break; 16205+ } 16206+ 16207+ switch (ycbcr) { 16208+ case V4L2_YCBCR_ENC_XV709: 16209+ case V4L2_YCBCR_ENC_XV601: return AVCOL_TRC_BT1361_ECG; 16210+ default: 16211+ break; 16212+ } 16213+ 16214+ return AVCOL_TRC_UNSPECIFIED; 16215+} 16216+ 16217+static enum AVColorRange get_color_range(const struct v4l2_format *const fmt) 16218+{ 16219+ enum v4l2_quantization qt; 16220+ 16221+ qt = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? 16222+ fmt->fmt.pix_mp.quantization : 16223+ fmt->fmt.pix.quantization; 16224+ 16225+ switch (qt) { 16226+ case V4L2_QUANTIZATION_LIM_RANGE: return AVCOL_RANGE_MPEG; 16227+ case V4L2_QUANTIZATION_FULL_RANGE: return AVCOL_RANGE_JPEG; 16228+ default: 16229+ break; 16230+ } 16231+ 16232+ return AVCOL_RANGE_UNSPECIFIED; 16233+} 16234+ 16235+static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame) 16236+{ 16237+ struct v4l2_format *const format = &q->format; 16238+ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; 16239+ 16240+ const uint32_t drm_fmt = src->layers[0].format; 16241+ // Treat INVALID as LINEAR 16242+ const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ? 16243+ DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier; 16244+ uint32_t pix_fmt = 0; 16245+ uint32_t w = 0; 16246+ uint32_t h = 0; 16247+ uint32_t bpl = src->layers[0].planes[0].pitch; 16248+ 16249+ // We really don't expect multiple layers 16250+ // All formats that we currently cope with are single object 16251+ 16252+ if (src->nb_layers != 1 || src->nb_objects != 1) 16253+ return AVERROR(EINVAL); 16254+ 16255+ switch (drm_fmt) { 16256+ case DRM_FORMAT_YUV420: 16257+ if (mod == DRM_FORMAT_MOD_LINEAR) { 16258+ if (src->layers[0].nb_planes != 3) 16259+ break; 16260+ pix_fmt = V4L2_PIX_FMT_YUV420; 16261+ h = src->layers[0].planes[1].offset / bpl; 16262+ w = bpl; 16263+ } 16264+ break; 16265+ 16266+ case DRM_FORMAT_NV12: 16267+ if (mod == DRM_FORMAT_MOD_LINEAR) { 16268+ if (src->layers[0].nb_planes != 2) 16269+ break; 16270+ pix_fmt = V4L2_PIX_FMT_NV12; 16271+ h = src->layers[0].planes[1].offset / bpl; 16272+ w = bpl; 16273+ } 16274+#if CONFIG_SAND 16275+ else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { 16276+ if (src->layers[0].nb_planes != 2) 16277+ break; 16278+ pix_fmt = V4L2_PIX_FMT_NV12_COL128; 16279+ w = bpl; 16280+ h = src->layers[0].planes[1].offset / 128; 16281+ bpl = fourcc_mod_broadcom_param(mod); 16282+ } 16283+#endif 16284+ break; 16285+ 16286+ case DRM_FORMAT_P030: 16287+#if CONFIG_SAND 16288+ if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { 16289+ if (src->layers[0].nb_planes != 2) 16290+ break; 16291+ pix_fmt = V4L2_PIX_FMT_NV12_10_COL128; 16292+ w = bpl / 2; // Matching lie to how we construct this 16293+ h = src->layers[0].planes[1].offset / 128; 16294+ bpl = fourcc_mod_broadcom_param(mod); 16295+ } 16296+#endif 16297+ break; 16298+ 16299+ default: 16300+ break; 16301+ } 16302+ 16303+ if (!pix_fmt) 16304+ return AVERROR(EINVAL); 16305+ 16306+ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { 16307+ struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp; 16308+ 16309+ pix->width = w; 16310+ pix->height = h; 16311+ pix->pixelformat = pix_fmt; 16312+ pix->plane_fmt[0].bytesperline = bpl; 16313+ pix->num_planes = 1; 16314+ } 16315+ else { 16316+ struct v4l2_pix_format *const pix = &format->fmt.pix; 16317+ 16318+ pix->width = w; 16319+ pix->height = h; 16320+ pix->pixelformat = pix_fmt; 16321+ pix->bytesperline = bpl; 16322+ } 16323+ 16324+ set_fmt_color(format, frame->color_primaries, frame->colorspace, frame->color_trc); 16325+ set_fmt_color_range(format, frame->color_range); 16326+ 16327+ q->sel.r.width = frame->width - (frame->crop_left + frame->crop_right); 16328+ q->sel.r.height = frame->height - (frame->crop_top + frame->crop_bottom); 16329+ q->sel.r.left = frame->crop_left; 16330+ q->sel.r.top = frame->crop_top; 16331+ 16332+ return 0; 16333+} 16334+ 16335+ 16336+static int set_dst_format(DeintV4L2M2MContext * const priv, V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height) 16337+{ 16338+ struct v4l2_format * const fmt = &queue->format; 16339+ struct v4l2_selection *const sel = &queue->sel; 16340+ 16341+ memset(&fmt->fmt, 0, sizeof(fmt->fmt)); 16342+ 16343+ // Align w/h to 16 here in case there are alignment requirements at the next 16344+ // stage of the filter chain (also RPi deinterlace setup is bust and this 16345+ // fixes it) 16346+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { 16347+ fmt->fmt.pix_mp.pixelformat = pixelformat; 16348+ fmt->fmt.pix_mp.field = field; 16349+ fmt->fmt.pix_mp.width = FFALIGN(width, 16); 16350+ fmt->fmt.pix_mp.height = FFALIGN(height, 16); 16351+ } else { 16352+ fmt->fmt.pix.pixelformat = pixelformat; 16353+ fmt->fmt.pix.field = field; 16354+ fmt->fmt.pix.width = FFALIGN(width, 16); 16355+ fmt->fmt.pix.height = FFALIGN(height, 16); 16356+ } 16357+ 16358+ set_fmt_color(fmt, priv->colour_primaries, priv->colour_matrix, priv->colour_transfer); 16359+ set_fmt_color_range(fmt, priv->colour_range); 16360+ 16361+ sel->r.width = width; 16362+ sel->r.height = height; 16363+ sel->r.left = 0; 16364+ sel->r.top = 0; 16365+ 16366+ return do_s_fmt(queue); 16367+} 16368+ 16369+static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node) 16370+{ 16371+ int ret; 16372+ 16373+ ctx->fd = open(node, O_RDWR | O_NONBLOCK, 0); 16374+ if (ctx->fd < 0) 16375+ return AVERROR(errno); 16376+ 16377+ ret = deint_v4l2m2m_prepare_context(ctx); 16378+ if (ret) { 16379+ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to prepare context\n"); 16380+ goto fail; 16381+ } 16382+ 16383+ ret = deint_v4l2m2m_try_format(&ctx->capture, ctx->output_width, ctx->output_height, ctx->output_format); 16384+ if (ret) { 16385+ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try dst format\n"); 16386+ goto fail; 16387+ } 16388+ 16389+ ret = deint_v4l2m2m_try_format(&ctx->output, ctx->width, ctx->height, AV_PIX_FMT_NONE); 16390+ if (ret) { 16391+ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try src format\n"); 16392+ goto fail; 16393+ } 16394+ 16395+ return 0; 16396+ 16397+fail: 16398+ close(ctx->fd); 16399+ ctx->fd = -1; 16400+ 16401+ return ret; 16402+} 16403+ 16404+static int deint_v4l2m2m_find_device(DeintV4L2M2MContextShared *ctx) 16405+{ 16406+ int ret = AVERROR(EINVAL); 16407+ struct dirent *entry; 16408+ char node[PATH_MAX]; 16409+ DIR *dirp; 16410+ 16411+ dirp = opendir("/dev"); 16412+ if (!dirp) 16413+ return AVERROR(errno); 16414+ 16415+ for (entry = readdir(dirp); entry; entry = readdir(dirp)) { 16416+ 16417+ if (strncmp(entry->d_name, "video", 5)) 16418+ continue; 16419+ 16420+ snprintf(node, sizeof(node), "/dev/%s", entry->d_name); 16421+ av_log(ctx->logctx, AV_LOG_DEBUG, "probing device %s\n", node); 16422+ ret = deint_v4l2m2m_probe_device(ctx, node); 16423+ if (!ret) 16424+ break; 16425+ } 16426+ 16427+ closedir(dirp); 16428+ 16429+ if (ret) { 16430+ av_log(ctx->logctx, AV_LOG_ERROR, "Could not find a valid device\n"); 16431+ ctx->fd = -1; 16432+ 16433+ return ret; 16434+ } 16435+ 16436+ av_log(ctx->logctx, AV_LOG_INFO, "Using device %s\n", node); 16437+ 16438+ return 0; 16439+} 16440+ 16441+static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf) 16442+{ 16443+ int ret; 16444+ 16445+ ret = ioctl(buf->q->ctx->fd, VIDIOC_QBUF, &buf->buffer); 16446+ if (ret < 0) 16447+ return AVERROR(errno); 16448+ 16449+ buf->enqueued = 1; 16450+ 16451+ return 0; 16452+} 16453+ 16454+static void 16455+drm_frame_init(AVDRMFrameDescriptor * const d) 16456+{ 16457+ unsigned int i; 16458+ for (i = 0; i != AV_DRM_MAX_PLANES; ++i) { 16459+ d->objects[i].fd = -1; 16460+ } 16461+} 16462+ 16463+static void 16464+drm_frame_uninit(AVDRMFrameDescriptor * const d) 16465+{ 16466+ unsigned int i; 16467+ for (i = 0; i != d->nb_objects; ++i) { 16468+ if (d->objects[i].fd != -1) { 16469+ close(d->objects[i].fd); 16470+ d->objects[i].fd = -1; 16471+ } 16472+ } 16473+} 16474+ 16475+static void 16476+avbufs_delete(V4L2Buffer** ppavbufs, const unsigned int n) 16477+{ 16478+ unsigned int i; 16479+ V4L2Buffer* const avbufs = *ppavbufs; 16480+ 16481+ if (avbufs == NULL) 16482+ return; 16483+ *ppavbufs = NULL; 16484+ 16485+ for (i = 0; i != n; ++i) { 16486+ V4L2Buffer* const avbuf = avbufs + i; 16487+ drm_frame_uninit(&avbuf->drm_frame); 16488+ } 16489+ 16490+ av_free(avbufs); 16491+} 16492+ 16493+static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf) 16494+{ 16495+ struct v4l2_exportbuffer expbuf; 16496+ int i, ret; 16497+ uint64_t mod = DRM_FORMAT_MOD_LINEAR; 16498+ 16499+ AVDRMFrameDescriptor * const drm_desc = &avbuf->drm_frame; 16500+ AVDRMLayerDescriptor * const layer = &drm_desc->layers[0]; 16501+ const struct v4l2_format *const fmt = &q->format; 16502+ const uint32_t height = fmt_height(fmt); 16503+ ptrdiff_t bpl0; 16504+ 16505+ /* fill the DRM frame descriptor */ 16506+ drm_desc->nb_layers = 1; 16507+ layer->nb_planes = avbuf->num_planes; 16508+ 16509+ for (int i = 0; i < avbuf->num_planes; i++) { 16510+ layer->planes[i].object_index = i; 16511+ layer->planes[i].offset = 0; 16512+ layer->planes[i].pitch = fmt_bpl(fmt, i); 16513+ } 16514+ bpl0 = layer->planes[0].pitch; 16515+ 16516+ switch (fmt_pixelformat(fmt)) { 16517+#if CONFIG_SAND 16518+ case V4L2_PIX_FMT_NV12_COL128: 16519+ mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0); 16520+ layer->format = V4L2_PIX_FMT_NV12; 16521+ 16522+ if (avbuf->num_planes > 1) 16523+ break; 16524+ 16525+ layer->nb_planes = 2; 16526+ layer->planes[1].object_index = 0; 16527+ layer->planes[1].offset = height * 128; 16528+ layer->planes[0].pitch = fmt_width(fmt); 16529+ layer->planes[1].pitch = layer->planes[0].pitch; 16530+ break; 16531+#endif 16532+ 16533+ case DRM_FORMAT_NV12: 16534+ layer->format = V4L2_PIX_FMT_NV12; 16535+ 16536+ if (avbuf->num_planes > 1) 16537+ break; 16538+ 16539+ layer->nb_planes = 2; 16540+ layer->planes[1].object_index = 0; 16541+ layer->planes[1].offset = bpl0 * height; 16542+ layer->planes[1].pitch = bpl0; 16543+ break; 16544+ 16545+ case V4L2_PIX_FMT_YUV420: 16546+ layer->format = DRM_FORMAT_YUV420; 16547+ 16548+ if (avbuf->num_planes > 1) 16549+ break; 16550+ 16551+ layer->nb_planes = 3; 16552+ layer->planes[1].object_index = 0; 16553+ layer->planes[1].offset = bpl0 * height; 16554+ layer->planes[1].pitch = bpl0 / 2; 16555+ layer->planes[2].object_index = 0; 16556+ layer->planes[2].offset = layer->planes[1].offset + ((bpl0 * height) / 4); 16557+ layer->planes[2].pitch = bpl0 / 2; 16558+ break; 16559+ 16560+ default: 16561+ drm_desc->nb_layers = 0; 16562+ return AVERROR(EINVAL); 16563+ } 16564+ 16565+ drm_desc->nb_objects = 0; 16566+ for (i = 0; i < avbuf->num_planes; i++) { 16567+ memset(&expbuf, 0, sizeof(expbuf)); 16568+ 16569+ expbuf.index = avbuf->buffer.index; 16570+ expbuf.type = avbuf->buffer.type; 16571+ expbuf.plane = i; 16572+ 16573+ ret = ioctl(avbuf->q->ctx->fd, VIDIOC_EXPBUF, &expbuf); 16574+ if (ret < 0) 16575+ return AVERROR(errno); 16576+ 16577+ drm_desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type) ? 16578+ avbuf->buffer.m.planes[i].length : avbuf->buffer.length; 16579+ drm_desc->objects[i].fd = expbuf.fd; 16580+ drm_desc->objects[i].format_modifier = mod; 16581+ drm_desc->nb_objects = i + 1; 16582+ } 16583+ 16584+ return 0; 16585+} 16586+ 16587+static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue) 16588+{ 16589+ struct v4l2_format *fmt = &queue->format; 16590+ DeintV4L2M2MContextShared *ctx = queue->ctx; 16591+ struct v4l2_requestbuffers req; 16592+ int ret, i, multiplanar; 16593+ uint32_t memory; 16594+ 16595+ memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ? 16596+ V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; 16597+ 16598+ multiplanar = V4L2_TYPE_IS_MULTIPLANAR(fmt->type); 16599+ 16600+ memset(&req, 0, sizeof(req)); 16601+ req.count = queue->num_buffers; 16602+ req.memory = memory; 16603+ req.type = fmt->type; 16604+ 16605+ ret = ioctl(ctx->fd, VIDIOC_REQBUFS, &req); 16606+ if (ret < 0) { 16607+ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_REQBUFS failed: %s\n", strerror(errno)); 16608+ 16609+ return AVERROR(errno); 16610+ } 16611+ 16612+ queue->num_buffers = req.count; 16613+ queue->buffers = av_mallocz(queue->num_buffers * sizeof(V4L2Buffer)); 16614+ if (!queue->buffers) { 16615+ av_log(ctx->logctx, AV_LOG_ERROR, "malloc enomem\n"); 16616+ 16617+ return AVERROR(ENOMEM); 16618+ } 16619+ 16620+ for (i = 0; i < queue->num_buffers; i++) { 16621+ V4L2Buffer * const buf = &queue->buffers[i]; 16622+ 16623+ buf->enqueued = 0; 16624+ buf->q = queue; 16625+ 16626+ buf->buffer.type = fmt->type; 16627+ buf->buffer.memory = memory; 16628+ buf->buffer.index = i; 16629+ 16630+ if (multiplanar) { 16631+ buf->buffer.length = VIDEO_MAX_PLANES; 16632+ buf->buffer.m.planes = buf->planes; 16633+ } 16634+ 16635+ drm_frame_init(&buf->drm_frame); 16636+ } 16637+ 16638+ for (i = 0; i < queue->num_buffers; i++) { 16639+ V4L2Buffer * const buf = &queue->buffers[i]; 16640+ 16641+ ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer); 16642+ if (ret < 0) { 16643+ ret = AVERROR(errno); 16644+ 16645+ goto fail; 16646+ } 16647+ 16648+ buf->num_planes = multiplanar ? buf->buffer.length : 1; 16649+ 16650+ if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) { 16651+ ret = deint_v4l2m2m_enqueue_buffer(buf); 16652+ if (ret) 16653+ goto fail; 16654+ 16655+ ret = v4l2_buffer_export_drm(queue, buf); 16656+ if (ret) 16657+ goto fail; 16658+ } 16659+ } 16660+ 16661+ return 0; 16662+ 16663+fail: 16664+ avbufs_delete(&queue->buffers, queue->num_buffers); 16665+ queue->num_buffers = 0; 16666+ return ret; 16667+} 16668+ 16669+static int deint_v4l2m2m_streamon(V4L2Queue *queue) 16670+{ 16671+ DeintV4L2M2MContextShared * const ctx = queue->ctx; 16672+ int type = queue->format.type; 16673+ int ret; 16674+ 16675+ ret = ioctl(ctx->fd, VIDIOC_STREAMON, &type); 16676+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno)); 16677+ if (ret < 0) 16678+ return AVERROR(errno); 16679+ 16680+ return 0; 16681+} 16682+ 16683+static int deint_v4l2m2m_streamoff(V4L2Queue *queue) 16684+{ 16685+ DeintV4L2M2MContextShared * const ctx = queue->ctx; 16686+ int type = queue->format.type; 16687+ int ret; 16688+ 16689+ ret = ioctl(ctx->fd, VIDIOC_STREAMOFF, &type); 16690+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno)); 16691+ if (ret < 0) 16692+ return AVERROR(errno); 16693+ 16694+ return 0; 16695+} 16696+ 16697+// timeout in ms 16698+static V4L2Buffer* deint_v4l2m2m_dequeue_buffer(V4L2Queue *queue, int timeout) 16699+{ 16700+ struct v4l2_plane planes[VIDEO_MAX_PLANES]; 16701+ DeintV4L2M2MContextShared *ctx = queue->ctx; 16702+ struct v4l2_buffer buf = { 0 }; 16703+ V4L2Buffer* avbuf = NULL; 16704+ struct pollfd pfd; 16705+ short events; 16706+ int ret; 16707+ 16708+ if (V4L2_TYPE_IS_OUTPUT(queue->format.type)) 16709+ events = POLLOUT | POLLWRNORM; 16710+ else 16711+ events = POLLIN | POLLRDNORM; 16712+ 16713+ pfd.events = events; 16714+ pfd.fd = ctx->fd; 16715+ 16716+ for (;;) { 16717+ ret = poll(&pfd, 1, timeout); 16718+ if (ret > 0) 16719+ break; 16720+ if (errno == EINTR) 16721+ continue; 16722+ return NULL; 16723+ } 16724+ 16725+ if (pfd.revents & POLLERR) 16726+ return NULL; 16727+ 16728+ if (pfd.revents & events) { 16729+ memset(&buf, 0, sizeof(buf)); 16730+ buf.memory = V4L2_MEMORY_MMAP; 16731+ buf.type = queue->format.type; 16732+ if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) { 16733+ memset(planes, 0, sizeof(planes)); 16734+ buf.length = VIDEO_MAX_PLANES; 16735+ buf.m.planes = planes; 16736+ } 16737+ 16738+ ret = ioctl(ctx->fd, VIDIOC_DQBUF, &buf); 16739+ if (ret) { 16740+ if (errno != EAGAIN) 16741+ av_log(ctx->logctx, AV_LOG_DEBUG, "VIDIOC_DQBUF, errno (%s)\n", 16742+ av_err2str(AVERROR(errno))); 16743+ return NULL; 16744+ } 16745+ 16746+ avbuf = &queue->buffers[buf.index]; 16747+ avbuf->enqueued = 0; 16748+ avbuf->buffer = buf; 16749+ if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) { 16750+ memcpy(avbuf->planes, planes, sizeof(planes)); 16751+ avbuf->buffer.m.planes = avbuf->planes; 16752+ } 16753+ return avbuf; 16754+ } 16755+ 16756+ return NULL; 16757+} 16758+ 16759+static V4L2Buffer *deint_v4l2m2m_find_free_buf(V4L2Queue *queue) 16760+{ 16761+ int i; 16762+ V4L2Buffer *buf = NULL; 16763+ 16764+ for (i = 0; i < queue->num_buffers; i++) 16765+ if (!queue->buffers[i].enqueued) { 16766+ buf = &queue->buffers[i]; 16767+ break; 16768+ } 16769+ return buf; 16770+} 16771+ 16772+static void deint_v4l2m2m_unref_queued(V4L2Queue *queue) 16773+{ 16774+ int i; 16775+ V4L2Buffer *buf = NULL; 16776+ 16777+ if (!queue || !queue->buffers) 16778+ return; 16779+ for (i = 0; i < queue->num_buffers; i++) { 16780+ buf = &queue->buffers[i]; 16781+ if (queue->buffers[i].enqueued) 16782+ av_frame_unref(&buf->frame); 16783+ } 16784+} 16785+ 16786+static void recycle_q(V4L2Queue * const queue) 16787+{ 16788+ V4L2Buffer* avbuf; 16789+ while (avbuf = deint_v4l2m2m_dequeue_buffer(queue, 0), avbuf) { 16790+ av_frame_unref(&avbuf->frame); 16791+ } 16792+} 16793+ 16794+static int count_enqueued(V4L2Queue *queue) 16795+{ 16796+ int i; 16797+ int n = 0; 16798+ 16799+ if (queue->buffers == NULL) 16800+ return 0; 16801+ 16802+ for (i = 0; i < queue->num_buffers; i++) 16803+ if (queue->buffers[i].enqueued) 16804+ ++n; 16805+ return n; 16806+} 16807+ 16808+static int deint_v4l2m2m_enqueue_frame(V4L2Queue * const queue, AVFrame * const frame) 16809+{ 16810+ DeintV4L2M2MContextShared *const ctx = queue->ctx; 16811+ AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)frame->data[0]; 16812+ V4L2Buffer *buf; 16813+ int i; 16814+ 16815+ if (V4L2_TYPE_IS_OUTPUT(queue->format.type)) 16816+ recycle_q(queue); 16817+ 16818+ buf = deint_v4l2m2m_find_free_buf(queue); 16819+ if (!buf) { 16820+ av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d finding free buf\n", __func__, 0); 16821+ return AVERROR(EAGAIN); 16822+ } 16823+ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type)) 16824+ for (i = 0; i < drm_desc->nb_objects; i++) 16825+ buf->buffer.m.planes[i].m.fd = drm_desc->objects[i].fd; 16826+ else 16827+ buf->buffer.m.fd = drm_desc->objects[0].fd; 16828+ 16829+ buf->buffer.field = !frame->interlaced_frame ? V4L2_FIELD_NONE : 16830+ frame->top_field_first ? V4L2_FIELD_INTERLACED_TB : 16831+ V4L2_FIELD_INTERLACED_BT; 16832+ 16833+ if (ctx->field_order != buf->buffer.field) { 16834+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Field changed: %d->%d\n", __func__, ctx->field_order, buf->buffer.field); 16835+ ctx->field_order = buf->buffer.field; 16836+ } 16837+ 16838+ buf->buffer.timestamp = pts_track_add_frame(&ctx->track, frame); 16839+ 16840+ buf->drm_frame.objects[0].fd = drm_desc->objects[0].fd; 16841+ 16842+ av_frame_move_ref(&buf->frame, frame); 16843+ 16844+ return deint_v4l2m2m_enqueue_buffer(buf); 16845+} 16846+ 16847+static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx) 16848+{ 16849+ if (atomic_fetch_sub(&ctx->refcount, 1) == 1) { 16850+ V4L2Queue *capture = &ctx->capture; 16851+ V4L2Queue *output = &ctx->output; 16852+ 16853+ av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__); 16854+ 16855+ if (ctx->fd >= 0) { 16856+ deint_v4l2m2m_streamoff(capture); 16857+ deint_v4l2m2m_streamoff(output); 16858+ } 16859+ 16860+ avbufs_delete(&capture->buffers, capture->num_buffers); 16861+ 16862+ deint_v4l2m2m_unref_queued(output); 16863+ 16864+ av_buffer_unref(&ctx->hw_frames_ctx); 16865+ 16866+ if (capture->buffers) 16867+ av_free(capture->buffers); 16868+ 16869+ if (output->buffers) 16870+ av_free(output->buffers); 16871+ 16872+ if (ctx->fd >= 0) { 16873+ close(ctx->fd); 16874+ ctx->fd = -1; 16875+ } 16876+ 16877+ av_free(ctx); 16878+ } 16879+} 16880+ 16881+static void v4l2_free_buffer(void *opaque, uint8_t *unused) 16882+{ 16883+ V4L2Buffer *buf = opaque; 16884+ DeintV4L2M2MContextShared *ctx = buf->q->ctx; 16885+ 16886+ if (!ctx->done) 16887+ deint_v4l2m2m_enqueue_buffer(buf); 16888+ 16889+ deint_v4l2m2m_destroy_context(ctx); 16890+} 16891+ 16892+// timeout in ms 16893+static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout) 16894+{ 16895+ DeintV4L2M2MContextShared *ctx = queue->ctx; 16896+ V4L2Buffer* avbuf; 16897+ enum AVColorPrimaries color_primaries; 16898+ enum AVColorSpace colorspace; 16899+ enum AVColorTransferCharacteristic color_trc; 16900+ enum AVColorRange color_range; 16901+ 16902+ av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__); 16903+ 16904+ if (queue->eos) { 16905+ av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: EOS\n", __func__); 16906+ return AVERROR_EOF; 16907+ } 16908+ 16909+ avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout); 16910+ if (!avbuf) { 16911+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout); 16912+ return AVERROR(EAGAIN); 16913+ } 16914+ 16915+ if (V4L2_TYPE_IS_CAPTURE(avbuf->buffer.type)) { 16916+ if ((avbuf->buffer.flags & V4L2_BUF_FLAG_LAST) != 0) 16917+ queue->eos = 1; 16918+ if (buf_bytesused0(&avbuf->buffer) == 0) 16919+ return queue->eos ? AVERROR_EOF : AVERROR(EINVAL); 16920+ } 16921+ 16922+ // Fill in PTS and anciliary info from src frame 16923+ pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame); 16924+ 16925+ frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame, 16926+ sizeof(avbuf->drm_frame), v4l2_free_buffer, 16927+ avbuf, AV_BUFFER_FLAG_READONLY); 16928+ if (!frame->buf[0]) { 16929+ av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d creating buffer\n", __func__, 0); 16930+ return AVERROR(ENOMEM); 16931+ } 16932+ 16933+ atomic_fetch_add(&ctx->refcount, 1); 16934+ 16935+ frame->data[0] = (uint8_t *)&avbuf->drm_frame; 16936+ frame->format = AV_PIX_FMT_DRM_PRIME; 16937+ if (ctx->hw_frames_ctx) 16938+ frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx); 16939+ frame->height = ctx->output_height; 16940+ frame->width = ctx->output_width; 16941+ 16942+ color_primaries = get_color_primaries(&ctx->capture.format); 16943+ colorspace = get_color_space(&ctx->capture.format); 16944+ color_trc = get_color_trc(&ctx->capture.format); 16945+ color_range = get_color_range(&ctx->capture.format); 16946+ 16947+ // If the color parameters are unspecified by V4L2 then leave alone as they 16948+ // will have been copied from src 16949+ if (color_primaries != AVCOL_PRI_UNSPECIFIED) 16950+ frame->color_primaries = color_primaries; 16951+ if (colorspace != AVCOL_SPC_UNSPECIFIED) 16952+ frame->colorspace = colorspace; 16953+ if (color_trc != AVCOL_TRC_UNSPECIFIED) 16954+ frame->color_trc = color_trc; 16955+ if (color_range != AVCOL_RANGE_UNSPECIFIED) 16956+ frame->color_range = color_range; 16957+ 16958+ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE) { 16959+ // Not interlaced now 16960+ frame->interlaced_frame = 0; // *** Fill in from dst buffer? 16961+ frame->top_field_first = 0; 16962+ // Pkt duration halved 16963+ frame->pkt_duration /= 2; 16964+ } 16965+ 16966+ if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) { 16967+ av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n"); 16968+ frame->decode_error_flags |= FF_DECODE_ERROR_INVALID_BITSTREAM; 16969+ } 16970+ 16971+ av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: PTS=%"PRId64"\n", __func__, frame->pts); 16972+ return 0; 16973+} 16974+ 16975+static int deint_v4l2m2m_config_props(AVFilterLink *outlink) 16976+{ 16977+ AVFilterLink *inlink = outlink->src->inputs[0]; 16978+ AVFilterContext *avctx = outlink->src; 16979+ DeintV4L2M2MContext *priv = avctx->priv; 16980+ DeintV4L2M2MContextShared *ctx = priv->shared; 16981+ int ret; 16982+ 16983+ ctx->height = avctx->inputs[0]->h; 16984+ ctx->width = avctx->inputs[0]->w; 16985+ 16986+ if (ctx->filter_type == FILTER_V4L2_SCALE) { 16987+ if ((ret = ff_scale_eval_dimensions(priv, 16988+ priv->w_expr, priv->h_expr, 16989+ inlink, outlink, 16990+ &ctx->output_width, &ctx->output_height)) < 0) 16991+ return ret; 16992+ 16993+ ff_scale_adjust_dimensions(inlink, &ctx->output_width, &ctx->output_height, 16994+ priv->force_original_aspect_ratio, priv->force_divisible_by); 16995+ } 16996+ else { 16997+ ctx->output_width = ctx->width; 16998+ ctx->output_height = ctx->height; 16999+ } 17000+ 17001+ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d FR: %d/%d->%d/%d\n", __func__, 17002+ ctx->width, ctx->height, ctx->output_width, ctx->output_height, 17003+ inlink->frame_rate.num, inlink->frame_rate.den, outlink->frame_rate.num, outlink->frame_rate.den); 17004+ 17005+ outlink->time_base = inlink->time_base; 17006+ outlink->w = ctx->output_width; 17007+ outlink->h = ctx->output_height; 17008+ outlink->format = inlink->format; 17009+ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && inlink->frame_rate.den != 0) 17010+ outlink->frame_rate = (AVRational){inlink->frame_rate.num * 2, inlink->frame_rate.den}; 17011+ 17012+ if (inlink->sample_aspect_ratio.num) 17013+ outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio); 17014+ else 17015+ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; 17016+ 17017+ ret = deint_v4l2m2m_find_device(ctx); 17018+ if (ret) 17019+ return ret; 17020+ 17021+ if (inlink->hw_frames_ctx) { 17022+ ctx->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); 17023+ if (!ctx->hw_frames_ctx) 17024+ return AVERROR(ENOMEM); 17025+ } 17026+ return 0; 17027+} 17028+ 17029+static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc) 17030+{ 17031+ const uint64_t mod = drm_desc->objects[0].format_modifier; 17032+ const int is_linear = (mod == DRM_FORMAT_MOD_LINEAR || mod == DRM_FORMAT_MOD_INVALID); 17033+ 17034+ // Only currently support single object things 17035+ if (drm_desc->nb_objects != 1) 17036+ return 0; 17037+ 17038+ switch (drm_desc->layers[0].format) { 17039+ case DRM_FORMAT_YUV420: 17040+ return is_linear ? V4L2_PIX_FMT_YUV420 : 0; 17041+ case DRM_FORMAT_NV12: 17042+ return is_linear ? V4L2_PIX_FMT_NV12 : 17043+#if CONFIG_SAND 17044+ fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 : 17045+#endif 17046+ 0; 17047+ default: 17048+ break; 17049+ } 17050+ return 0; 17051+} 17052+ 17053+static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) 17054+{ 17055+ AVFilterContext *avctx = link->dst; 17056+ DeintV4L2M2MContext *priv = avctx->priv; 17057+ DeintV4L2M2MContextShared *ctx = priv->shared; 17058+ V4L2Queue *capture = &ctx->capture; 17059+ V4L2Queue *output = &ctx->output; 17060+ int ret; 17061+ 17062+ av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" dts: %"PRId64" field :%d interlaced: %d aspect:%d/%d\n", 17063+ __func__, in->pts, in->pkt_dts, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den); 17064+ av_log(priv, AV_LOG_DEBUG, "--- %s: in status in %d/ot %d; out status in %d/out %d\n", __func__, 17065+ avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out); 17066+ 17067+ if (ctx->field_order == V4L2_FIELD_ANY) { 17068+ const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0]; 17069+ uint32_t pixelformat = desc_pixelformat(drm_desc); 17070+ 17071+ if (pixelformat == 0) { 17072+ av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n", 17073+ av_fourcc2str(drm_desc->layers[0].format), 17074+ drm_desc->nb_objects, drm_desc->objects[0].format_modifier); 17075+ return AVERROR(EINVAL); 17076+ } 17077+ 17078+ ctx->orig_width = drm_desc->layers[0].planes[0].pitch; 17079+ ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width; 17080+ 17081+ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height, 17082+ drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset); 17083+ 17084+ if ((ret = set_src_fmt(output, in)) != 0) { 17085+ av_log(avctx, AV_LOG_WARNING, "Unknown input DRM format: %s mod: %#" PRIx64 "\n", 17086+ av_fourcc2str(drm_desc->layers[0].format), drm_desc->objects[0].format_modifier); 17087+ return ret; 17088+ } 17089+ 17090+ ret = do_s_fmt(output); 17091+ if (ret) { 17092+ av_log(avctx, AV_LOG_WARNING, "Failed to set source format\n"); 17093+ return ret; 17094+ } 17095+ 17096+ if (ctx->output_format != AV_PIX_FMT_NONE) 17097+ pixelformat = fmt_av_to_v4l2(ctx->output_format); 17098+ ret = set_dst_format(priv, capture, pixelformat, V4L2_FIELD_NONE, ctx->output_width, ctx->output_height); 17099+ if (ret) { 17100+ av_log(avctx, AV_LOG_WARNING, "Failed to set destination format\n"); 17101+ return ret; 17102+ } 17103+ 17104+ ret = deint_v4l2m2m_allocate_buffers(capture); 17105+ if (ret) { 17106+ av_log(avctx, AV_LOG_WARNING, "Failed to allocate destination buffers\n"); 17107+ return ret; 17108+ } 17109+ 17110+ ret = deint_v4l2m2m_streamon(capture); 17111+ if (ret) { 17112+ av_log(avctx, AV_LOG_WARNING, "Failed set destination streamon: %s\n", av_err2str(ret)); 17113+ return ret; 17114+ } 17115+ 17116+ ret = deint_v4l2m2m_allocate_buffers(output); 17117+ if (ret) { 17118+ av_log(avctx, AV_LOG_WARNING, "Failed to allocate src buffers\n"); 17119+ return ret; 17120+ } 17121+ 17122+ ret = deint_v4l2m2m_streamon(output); 17123+ if (ret) { 17124+ av_log(avctx, AV_LOG_WARNING, "Failed set src streamon: %s\n", av_err2str(ret)); 17125+ return ret; 17126+ } 17127+ 17128+ if (in->top_field_first) 17129+ ctx->field_order = V4L2_FIELD_INTERLACED_TB; 17130+ else 17131+ ctx->field_order = V4L2_FIELD_INTERLACED_BT; 17132+ 17133+ { 17134+ struct v4l2_encoder_cmd ecmd = { 17135+ .cmd = V4L2_ENC_CMD_STOP 17136+ }; 17137+ ctx->has_enc_stop = 0; 17138+ if (ioctl(ctx->fd, VIDIOC_TRY_ENCODER_CMD, &ecmd) == 0) { 17139+ av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop succeeded\n"); 17140+ ctx->has_enc_stop = 1; 17141+ } 17142+ else { 17143+ av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop fail: %s\n", av_err2str(AVERROR(errno))); 17144+ } 17145+ 17146+ } 17147+ } 17148+ 17149+ ret = deint_v4l2m2m_enqueue_frame(output, in); 17150+ 17151+ av_log(priv, AV_LOG_TRACE, ">>> %s: %s\n", __func__, av_err2str(ret)); 17152+ return ret; 17153+} 17154+ 17155+static int 17156+ack_inlink(AVFilterContext * const avctx, DeintV4L2M2MContextShared *const s, 17157+ AVFilterLink * const inlink) 17158+{ 17159+ int instatus; 17160+ int64_t inpts; 17161+ 17162+ if (ff_inlink_acknowledge_status(inlink, &instatus, &inpts) <= 0) 17163+ return 0; 17164+ 17165+ s->drain = instatus; 17166+ s->drain_pts = inpts; 17167+ s->drain_state = DRAIN_TIMEOUT; 17168+ 17169+ if (s->field_order == V4L2_FIELD_ANY) { // Not yet started 17170+ s->drain_state = DRAIN_DONE; 17171+ } 17172+ else if (s->one_to_one) { 17173+ s->drain_state = DRAIN_LAST; 17174+ } 17175+ else if (s->has_enc_stop) { 17176+ struct v4l2_encoder_cmd ecmd = { 17177+ .cmd = V4L2_ENC_CMD_STOP 17178+ }; 17179+ if (ioctl(s->fd, VIDIOC_ENCODER_CMD, &ecmd) == 0) { 17180+ av_log(avctx->priv, AV_LOG_DEBUG, "Do Encode stop\n"); 17181+ s->drain_state = DRAIN_EOS; 17182+ } 17183+ else { 17184+ av_log(avctx->priv, AV_LOG_WARNING, "Encode stop fail: %s\n", av_err2str(AVERROR(errno))); 17185+ } 17186+ } 17187+ return 1; 17188+} 17189+ 17190+static int deint_v4l2m2m_activate(AVFilterContext *avctx) 17191+{ 17192+ DeintV4L2M2MContext * const priv = avctx->priv; 17193+ DeintV4L2M2MContextShared *const s = priv->shared; 17194+ AVFilterLink * const outlink = avctx->outputs[0]; 17195+ AVFilterLink * const inlink = avctx->inputs[0]; 17196+ int n = 0; 17197+ int cn = 99; 17198+ int did_something = 0; 17199+ 17200+ av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__); 17201+ 17202+ FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx); 17203+ 17204+ ack_inlink(avctx, s, inlink); 17205+ 17206+ if (s->field_order != V4L2_FIELD_ANY) // Can't DQ if no setup! 17207+ { 17208+ AVFrame * frame = av_frame_alloc(); 17209+ int rv; 17210+ 17211+ recycle_q(&s->output); 17212+ n = count_enqueued(&s->output); 17213+ 17214+ if (frame == NULL) { 17215+ av_log(priv, AV_LOG_ERROR, "%s: error allocating frame\n", __func__); 17216+ return AVERROR(ENOMEM); 17217+ } 17218+ 17219+ rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame, 17220+ drain_frame_expected(s->drain_state) || n > 4 ? 300 : 0); 17221+ if (rv != 0) { 17222+ av_frame_free(&frame); 17223+ if (rv == AVERROR_EOF) { 17224+ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ EOF\n", __func__); 17225+ s->drain_state = DRAIN_DONE; 17226+ } 17227+ else if (rv == AVERROR(EAGAIN)) { 17228+ if (s->drain_state != DRAIN_NONE) { 17229+ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ empty - drain done\n", __func__); 17230+ s->drain_state = DRAIN_DONE; 17231+ } 17232+ } 17233+ else { 17234+ av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv)); 17235+ return rv; 17236+ } 17237+ } 17238+ else { 17239+ frame->interlaced_frame = 0; 17240+ // frame is always consumed by filter_frame - even on error despite 17241+ // a somewhat confusing comment in the header 17242+ rv = ff_filter_frame(outlink, frame); 17243+ ++s->frames_tx; 17244+ 17245+ av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv)); 17246+ did_something = 1; 17247+ 17248+ if (s->drain_state != DRAIN_NONE && pts_track_count(&s->track) == 0) { 17249+ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ last - drain done\n", __func__); 17250+ s->drain_state = DRAIN_DONE; 17251+ } 17252+ } 17253+ 17254+ cn = count_enqueued(&s->capture); 17255+ } 17256+ 17257+ if (s->drain_state == DRAIN_DONE) { 17258+ ff_outlink_set_status(outlink, s->drain, s->drain_pts); 17259+ av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(s->drain)); 17260+ return 0; 17261+ } 17262+ 17263+ recycle_q(&s->output); 17264+ n = count_enqueued(&s->output); 17265+ 17266+ while (n < 6 && !s->drain) { 17267+ AVFrame * frame; 17268+ int rv; 17269+ 17270+ if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) { 17271+ av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv)); 17272+ return rv; 17273+ } 17274+ 17275+ if (frame == NULL) { 17276+ av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__); 17277+ if (!ack_inlink(avctx, s, inlink)) { 17278+ ff_inlink_request_frame(inlink); 17279+ av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__); 17280+ } 17281+ break; 17282+ } 17283+ ++s->frames_rx; 17284+ 17285+ rv = deint_v4l2m2m_filter_frame(inlink, frame); 17286+ av_frame_free(&frame); 17287+ 17288+ if (rv != 0) 17289+ return rv; 17290+ 17291+ av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__); 17292+ did_something = 1; 17293+ ++n; 17294+ } 17295+ 17296+ if ((n > 4 || s->drain) && ff_outlink_frame_wanted(outlink)) { 17297+ ff_filter_set_ready(avctx, 1); 17298+ did_something = 1; 17299+ av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__); 17300+ } 17301+ 17302+ av_log(priv, AV_LOG_TRACE, ">>> %s: OK (n=%d, cn=%d)\n", __func__, n, cn); 17303+ return did_something ? 0 : FFERROR_NOT_READY; 17304+} 17305+ 17306+static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filter_type_v4l2_t filter_type) 17307+{ 17308+ DeintV4L2M2MContext * const priv = avctx->priv; 17309+ DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared)); 17310+ 17311+ if (!ctx) { 17312+ av_log(priv, AV_LOG_ERROR, "%s: error %d allocating context\n", __func__, 0); 17313+ return AVERROR(ENOMEM); 17314+ } 17315+ priv->shared = ctx; 17316+ ctx->logctx = priv; 17317+ ctx->filter_type = filter_type; 17318+ ctx->fd = -1; 17319+ ctx->output.ctx = ctx; 17320+ ctx->output.num_buffers = 8; 17321+ ctx->output.name = "OUTPUT"; 17322+ ctx->capture.ctx = ctx; 17323+ ctx->capture.num_buffers = 12; 17324+ ctx->capture.name = "CAPTURE"; 17325+ ctx->done = 0; 17326+ ctx->field_order = V4L2_FIELD_ANY; 17327+ 17328+ pts_track_init(&ctx->track, priv); 17329+ 17330+ atomic_init(&ctx->refcount, 1); 17331+ 17332+ if (priv->output_format_string) { 17333+ ctx->output_format = av_get_pix_fmt(priv->output_format_string); 17334+ if (ctx->output_format == AV_PIX_FMT_NONE) { 17335+ av_log(avctx, AV_LOG_ERROR, "Invalid ffmpeg output format '%s'.\n", priv->output_format_string); 17336+ return AVERROR(EINVAL); 17337+ } 17338+ if (fmt_av_to_v4l2(ctx->output_format) == 0) { 17339+ av_log(avctx, AV_LOG_ERROR, "Unsupported output format for V4L2: %s.\n", av_get_pix_fmt_name(ctx->output_format)); 17340+ return AVERROR(EINVAL); 17341+ } 17342+ } else { 17343+ // Use the input format once that is configured. 17344+ ctx->output_format = AV_PIX_FMT_NONE; 17345+ } 17346+ 17347+#define STRING_OPTION(var_name, func_name, default_value) do { \ 17348+ if (priv->var_name ## _string) { \ 17349+ int var = av_ ## func_name ## _from_name(priv->var_name ## _string); \ 17350+ if (var < 0) { \ 17351+ av_log(avctx, AV_LOG_ERROR, "Invalid %s.\n", #var_name); \ 17352+ return AVERROR(EINVAL); \ 17353+ } \ 17354+ priv->var_name = var; \ 17355+ } else { \ 17356+ priv->var_name = default_value; \ 17357+ } \ 17358+ } while (0) 17359+ 17360+ STRING_OPTION(colour_primaries, color_primaries, AVCOL_PRI_UNSPECIFIED); 17361+ STRING_OPTION(colour_transfer, color_transfer, AVCOL_TRC_UNSPECIFIED); 17362+ STRING_OPTION(colour_matrix, color_space, AVCOL_SPC_UNSPECIFIED); 17363+ STRING_OPTION(chroma_location, chroma_location, AVCHROMA_LOC_UNSPECIFIED); 17364+ 17365+ return 0; 17366+} 17367+ 17368+static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx) 17369+{ 17370+ return common_v4l2m2m_init(avctx, FILTER_V4L2_DEINTERLACE); 17371+} 17372+ 17373+static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx) 17374+{ 17375+ int rv; 17376+ DeintV4L2M2MContext * priv; 17377+ DeintV4L2M2MContextShared * ctx; 17378+ 17379+ if ((rv = common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE)) != 0) 17380+ return rv; 17381+ 17382+ priv = avctx->priv; 17383+ ctx = priv->shared; 17384+ 17385+ ctx->one_to_one = 1; 17386+ return 0; 17387+} 17388+ 17389+static void deint_v4l2m2m_uninit(AVFilterContext *avctx) 17390+{ 17391+ DeintV4L2M2MContext *priv = avctx->priv; 17392+ DeintV4L2M2MContextShared *ctx = priv->shared; 17393+ 17394+ av_log(priv, AV_LOG_VERBOSE, "Frames Rx: %u, Frames Tx: %u\n", 17395+ ctx->frames_rx, ctx->frames_tx); 17396+ ctx->done = 1; 17397+ ctx->logctx = NULL; // Log to NULL works, log to missing crashes 17398+ pts_track_uninit(&ctx->track); 17399+ deint_v4l2m2m_destroy_context(ctx); 17400+} 17401+ 17402+static const AVOption deinterlace_v4l2m2m_options[] = { 17403+ { NULL }, 17404+}; 17405+ 17406+AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m); 17407+ 17408+#define OFFSET(x) offsetof(DeintV4L2M2MContext, x) 17409+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) 17410+ 17411+static const AVOption scale_v4l2m2m_options[] = { 17412+ { "w", "Output video width", 17413+ OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS }, 17414+ { "h", "Output video height", 17415+ OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS }, 17416+ { "format", "Output video format (software format of hardware frames)", 17417+ OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS }, 17418+ // These colour properties match the ones of the same name in vf_scale. 17419+ { "out_color_matrix", "Output colour matrix coefficient set", 17420+ OFFSET(colour_matrix_string), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS }, 17421+ { "out_range", "Output colour range", 17422+ OFFSET(colour_range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED }, 17423+ AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, FLAGS, "range" }, 17424+ { "full", "Full range", 17425+ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, 17426+ { "limited", "Limited range", 17427+ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, 17428+ { "jpeg", "Full range", 17429+ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, 17430+ { "mpeg", "Limited range", 17431+ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, 17432+ { "tv", "Limited range", 17433+ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, 17434+ { "pc", "Full range", 17435+ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, 17436+ // These colour properties match the ones in the VAAPI scaler 17437+ { "out_color_primaries", "Output colour primaries", 17438+ OFFSET(colour_primaries_string), AV_OPT_TYPE_STRING, 17439+ { .str = NULL }, .flags = FLAGS }, 17440+ { "out_color_transfer", "Output colour transfer characteristics", 17441+ OFFSET(colour_transfer_string), AV_OPT_TYPE_STRING, 17442+ { .str = NULL }, .flags = FLAGS }, 17443+ { "out_chroma_location", "Output chroma sample location", 17444+ OFFSET(chroma_location_string), AV_OPT_TYPE_STRING, 17445+ { .str = NULL }, .flags = FLAGS }, 17446+ { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" }, 17447+ { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS }, 17448+ { NULL }, 17449+}; 17450+ 17451+AVFILTER_DEFINE_CLASS(scale_v4l2m2m); 17452+ 17453+static const AVFilterPad deint_v4l2m2m_inputs[] = { 17454+ { 17455+ .name = "default", 17456+ .type = AVMEDIA_TYPE_VIDEO, 17457+ }, 17458+}; 17459+ 17460+static const AVFilterPad deint_v4l2m2m_outputs[] = { 17461+ { 17462+ .name = "default", 17463+ .type = AVMEDIA_TYPE_VIDEO, 17464+ .config_props = deint_v4l2m2m_config_props, 17465+ }, 17466+}; 17467+ 17468+AVFilter ff_vf_deinterlace_v4l2m2m = { 17469+ .name = "deinterlace_v4l2m2m", 17470+ .description = NULL_IF_CONFIG_SMALL("V4L2 M2M deinterlacer"), 17471+ .priv_size = sizeof(DeintV4L2M2MContext), 17472+ .init = &deint_v4l2m2m_init, 17473+ .uninit = &deint_v4l2m2m_uninit, 17474+ FILTER_INPUTS(deint_v4l2m2m_inputs), 17475+ FILTER_OUTPUTS(deint_v4l2m2m_outputs), 17476+ FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME), 17477+ .priv_class = &deinterlace_v4l2m2m_class, 17478+ .activate = deint_v4l2m2m_activate, 17479+}; 17480+ 17481+AVFilter ff_vf_scale_v4l2m2m = { 17482+ .name = "scale_v4l2m2m", 17483+ .description = NULL_IF_CONFIG_SMALL("V4L2 M2M scaler"), 17484+ .priv_size = sizeof(DeintV4L2M2MContext), 17485+ .init = &scale_v4l2m2m_init, 17486+ .uninit = &deint_v4l2m2m_uninit, 17487+ FILTER_INPUTS(deint_v4l2m2m_inputs), 17488+ FILTER_OUTPUTS(deint_v4l2m2m_outputs), 17489+ FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME), 17490+ .priv_class = &scale_v4l2m2m_class, 17491+ .activate = deint_v4l2m2m_activate, 17492+}; 17493+ 17494--- /dev/null 17495+++ b/libavfilter/vf_unsand.c 17496@@ -0,0 +1,228 @@ 17497+/* 17498+ * Copyright (c) 2007 Bobby Bingham 17499+ * 17500+ * This file is part of FFmpeg. 17501+ * 17502+ * FFmpeg is free software; you can redistribute it and/or 17503+ * modify it under the terms of the GNU Lesser General Public 17504+ * License as published by the Free Software Foundation; either 17505+ * version 2.1 of the License, or (at your option) any later version. 17506+ * 17507+ * FFmpeg is distributed in the hope that it will be useful, 17508+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 17509+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17510+ * Lesser General Public License for more details. 17511+ * 17512+ * You should have received a copy of the GNU Lesser General Public 17513+ * License along with FFmpeg; if not, write to the Free Software 17514+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17515+ */ 17516+ 17517+/** 17518+ * @file 17519+ * format and noformat video filters 17520+ */ 17521+ 17522+#include <string.h> 17523+ 17524+#include "libavutil/internal.h" 17525+#include "libavutil/mem.h" 17526+#include "libavutil/pixdesc.h" 17527+#include "libavutil/opt.h" 17528+#include "libavutil/rpi_sand_fns.h" 17529+ 17530+#include "avfilter.h" 17531+#include "formats.h" 17532+#include "internal.h" 17533+#include "video.h" 17534+ 17535+typedef struct UnsandContext { 17536+ const AVClass *class; 17537+} UnsandContext; 17538+ 17539+static av_cold void uninit(AVFilterContext *ctx) 17540+{ 17541+// UnsandContext *s = ctx->priv; 17542+} 17543+ 17544+static av_cold int init(AVFilterContext *ctx) 17545+{ 17546+// UnsandContext *s = ctx->priv; 17547+ 17548+ return 0; 17549+} 17550+ 17551+ 17552+static int filter_frame(AVFilterLink *link, AVFrame *in) 17553+{ 17554+ AVFilterLink * const outlink = link->dst->outputs[0]; 17555+ AVFrame *out = NULL; 17556+ int rv = 0; 17557+ 17558+ if (outlink->format == in->format) { 17559+ // If nothing to do then do nothing 17560+ out = in; 17561+ } 17562+ else 17563+ { 17564+ if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL) 17565+ { 17566+ rv = AVERROR(ENOMEM); 17567+ goto fail; 17568+ } 17569+ if (av_rpi_sand_to_planar_frame(out, in) != 0) 17570+ { 17571+ rv = -1; 17572+ goto fail; 17573+ } 17574+ 17575+ av_frame_free(&in); 17576+ } 17577+ 17578+ return ff_filter_frame(outlink, out); 17579+ 17580+fail: 17581+ av_frame_free(&out); 17582+ av_frame_free(&in); 17583+ return rv; 17584+} 17585+ 17586+#if 0 17587+static void dump_fmts(const AVFilterFormats * fmts) 17588+{ 17589+ int i; 17590+ if (fmts== NULL) { 17591+ printf("NULL\n"); 17592+ return; 17593+ } 17594+ for (i = 0; i < fmts->nb_formats; ++i) { 17595+ printf(" %d", fmts->formats[i]); 17596+ } 17597+ printf("\n"); 17598+} 17599+#endif 17600+ 17601+static int query_formats(AVFilterContext *ctx) 17602+{ 17603+// UnsandContext *s = ctx->priv; 17604+ int ret; 17605+ 17606+ // If we aren't connected at both ends then just do nothing 17607+ if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL) 17608+ return 0; 17609+ 17610+ // Our output formats depend on our input formats and we can't/don't 17611+ // want to convert between bit depths so we need to wait for the source 17612+ // to have an opinion before we do 17613+ if (ctx->inputs[0]->incfg.formats == NULL) 17614+ return AVERROR(EAGAIN); 17615+ 17616+ // Accept anything 17617+ if (ctx->inputs[0]->outcfg.formats == NULL && 17618+ (ret = ff_formats_ref(ctx->inputs[0]->incfg.formats, &ctx->inputs[0]->outcfg.formats)) < 0) 17619+ return ret; 17620+ 17621+ // Filter out sand formats 17622+ 17623+ // Generate a container if we don't already have one 17624+ if (ctx->outputs[0]->incfg.formats == NULL) 17625+ { 17626+ // Somewhat rubbish way of ensuring we have a good structure 17627+ const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE}; 17628+ AVFilterFormats *formats = ff_make_format_list(out_fmts); 17629+ 17630+ if (formats == NULL) 17631+ return AVERROR(ENOMEM); 17632+ if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats)) < 0) 17633+ return ret; 17634+ } 17635+ 17636+ // Replace old format list with new filtered list derived from what our 17637+ // input says it can do 17638+ { 17639+ const AVFilterFormats * const src_ff = ctx->inputs[0]->outcfg.formats; 17640+ AVFilterFormats * const dst_ff = ctx->outputs[0]->incfg.formats; 17641+ enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats); 17642+ int i; 17643+ int n = 0; 17644+ int seen_420p = 0; 17645+ int seen_420p10 = 0; 17646+ 17647+ for (i = 0; i < src_ff->nb_formats; ++i) { 17648+ const enum AVPixelFormat f = src_ff->formats[i]; 17649+ 17650+ switch (f){ 17651+ case AV_PIX_FMT_YUV420P: 17652+ case AV_PIX_FMT_SAND128: 17653+ case AV_PIX_FMT_RPI4_8: 17654+ if (!seen_420p) { 17655+ seen_420p = 1; 17656+ dst_fmts[n++] = AV_PIX_FMT_YUV420P; 17657+ } 17658+ break; 17659+ case AV_PIX_FMT_SAND64_10: 17660+ case AV_PIX_FMT_YUV420P10: 17661+ case AV_PIX_FMT_RPI4_10: 17662+ if (!seen_420p10) { 17663+ seen_420p10 = 1; 17664+ dst_fmts[n++] = AV_PIX_FMT_YUV420P10; 17665+ } 17666+ break; 17667+ default: 17668+ dst_fmts[n++] = f; 17669+ break; 17670+ } 17671+ } 17672+ 17673+ av_freep(&dst_ff->formats); 17674+ dst_ff->formats = dst_fmts; 17675+ dst_ff->nb_formats = n; 17676+ } 17677+ 17678+// printf("Unsand: %s calc: ", __func__); 17679+// dump_fmts(ctx->outputs[0]->incfg.formats); 17680+ 17681+ return 0; 17682+} 17683+ 17684+ 17685+#define OFFSET(x) offsetof(UnsandContext, x) 17686+static const AVOption unsand_options[] = { 17687+ { NULL } 17688+}; 17689+ 17690+ 17691+AVFILTER_DEFINE_CLASS(unsand); 17692+ 17693+static const AVFilterPad avfilter_vf_unsand_inputs[] = { 17694+ { 17695+ .name = "default", 17696+ .type = AVMEDIA_TYPE_VIDEO, 17697+ .filter_frame = filter_frame, 17698+ }, 17699+ { NULL } 17700+}; 17701+ 17702+static const AVFilterPad avfilter_vf_unsand_outputs[] = { 17703+ { 17704+ .name = "default", 17705+ .type = AVMEDIA_TYPE_VIDEO 17706+ }, 17707+}; 17708+ 17709+AVFilter ff_vf_unsand = { 17710+ .name = "unsand", 17711+ .description = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"), 17712+ 17713+ .init = init, 17714+ .uninit = uninit, 17715+ 17716+ FILTER_QUERY_FUNC(query_formats), 17717+ 17718+ .priv_size = sizeof(UnsandContext), 17719+ .priv_class = &unsand_class, 17720+ 17721+ FILTER_INPUTS(avfilter_vf_unsand_inputs), 17722+ FILTER_OUTPUTS(avfilter_vf_unsand_outputs), 17723+}; 17724+ 17725--- a/libavfilter/x86/vf_bwdif_init.c 17726+++ b/libavfilter/x86/vf_bwdif_init.c 17727@@ -42,11 +42,9 @@ void ff_bwdif_filter_line_12bit_ssse3(vo 17728 int mrefs2, int prefs3, int mrefs3, int prefs4, 17729 int mrefs4, int parity, int clip_max); 17730 17731-av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif) 17732+av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth) 17733 { 17734- YADIFContext *yadif = &bwdif->yadif; 17735 int cpu_flags = av_get_cpu_flags(); 17736- int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth; 17737 17738 if (bit_depth <= 8) { 17739 if (EXTERNAL_SSE2(cpu_flags)) 17740--- a/libavformat/matroskaenc.c 17741+++ b/libavformat/matroskaenc.c 17742@@ -75,6 +75,10 @@ 17743 17744 #define IS_WEBM(mkv) (CONFIG_WEBM_MUXER && CONFIG_MATROSKA_MUXER ? \ 17745 ((mkv)->mode == MODE_WEBM) : CONFIG_WEBM_MUXER) 17746+ 17747+/* Reserved size for H264 headers if not extant at init time */ 17748+#define MAX_H264_HEADER_SIZE 1024 17749+ 17750 #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \ 17751 !(mkv)->is_live) 17752 17753@@ -1119,8 +1123,12 @@ static int mkv_assemble_native_codecpriv 17754 case AV_CODEC_ID_WAVPACK: 17755 return put_wv_codecpriv(dyn_cp, extradata, extradata_size); 17756 case AV_CODEC_ID_H264: 17757- return ff_isom_write_avcc(dyn_cp, extradata, 17758- extradata_size); 17759+ if (extradata_size) 17760+ return ff_isom_write_avcc(dyn_cp, extradata, 17761+ extradata_size); 17762+ else 17763+ *size_to_reserve = MAX_H264_HEADER_SIZE; 17764+ break; 17765 case AV_CODEC_ID_HEVC: 17766 return ff_isom_write_hvcc(dyn_cp, extradata, 17767 extradata_size, 0); 17768@@ -2726,8 +2734,8 @@ static int mkv_check_new_extra_data(AVFo 17769 } 17770 break; 17771 #endif 17772- // FIXME: Remove the following once libaom starts propagating proper extradata during init() 17773- // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2208 17774+ // FIXME: Remove the following once libaom starts propagating extradata during init() 17775+ // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012 17776 case AV_CODEC_ID_AV1: 17777 if (side_data_size && mkv->track.bc && !par->extradata_size) { 17778 // If the reserved space doesn't suffice, only write 17779@@ -2739,6 +2747,16 @@ static int mkv_check_new_extra_data(AVFo 17780 } else if (!par->extradata_size) 17781 return AVERROR_INVALIDDATA; 17782 break; 17783+ // H264 V4L2 has a similar issue 17784+ case AV_CODEC_ID_H264: 17785+ if (side_data_size && mkv->track.bc && !par->extradata_size) { 17786+ ret = mkv_update_codecprivate(s, mkv, side_data, side_data_size, 17787+ par, mkv->track.bc, track, 0); 17788+ if (ret < 0) 17789+ return ret; 17790+ } else if (!par->extradata_size) 17791+ return AVERROR_INVALIDDATA; 17792+ break; 17793 default: 17794 if (side_data_size) 17795 av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index); 17796@@ -3171,9 +3189,15 @@ static int mkv_init(struct AVFormatConte 17797 track->reformat = mkv_reformat_wavpack; 17798 break; 17799 case AV_CODEC_ID_H264: 17800+ // Default to reformat if no extradata as the only current 17801+ // encoder which does this is v4l2m2m which needs reformat 17802+ if (par->extradata_size == 0 || 17803+ (par->extradata_size > 3 && 17804+ (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1))) 17805+ track->reformat = mkv_reformat_h2645; 17806+ break; 17807 case AV_CODEC_ID_HEVC: 17808- if ((par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 0 || 17809- par->codec_id == AV_CODEC_ID_HEVC && par->extradata_size > 6) && 17810+ if (par->extradata_size > 6 && 17811 (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1)) 17812 track->reformat = mkv_reformat_h2645; 17813 break; 17814--- a/libavformat/movenc.c 17815+++ b/libavformat/movenc.c 17816@@ -6318,6 +6318,7 @@ static int mov_write_single_packet(AVFor 17817 if (trk->par->codec_id == AV_CODEC_ID_MP4ALS || 17818 trk->par->codec_id == AV_CODEC_ID_AAC || 17819 trk->par->codec_id == AV_CODEC_ID_AV1 || 17820+ trk->par->codec_id == AV_CODEC_ID_H264 || 17821 trk->par->codec_id == AV_CODEC_ID_FLAC) { 17822 size_t side_size; 17823 uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); 17824--- a/libavformat/rtpenc.c 17825+++ b/libavformat/rtpenc.c 17826@@ -19,6 +19,7 @@ 17827 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17828 */ 17829 17830+#include "avc.h" 17831 #include "avformat.h" 17832 #include "mpegts.h" 17833 #include "internal.h" 17834@@ -584,8 +585,25 @@ static int rtp_write_packet(AVFormatCont 17835 ff_rtp_send_vc2hq(s1, pkt->data, size, st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0); 17836 break; 17837 case AV_CODEC_ID_H264: 17838+ { 17839+ uint8_t *side_data; 17840+ size_t side_data_size = 0; 17841+ 17842+ side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, 17843+ &side_data_size); 17844+ 17845+ if (side_data_size != 0) { 17846+ int ps_size = side_data_size; 17847+ uint8_t * ps_buf = NULL; 17848+ 17849+ ff_avc_write_annexb_extradata(side_data, &ps_buf, &ps_size); 17850+ av_log(s1, AV_LOG_TRACE, "H264: write side data=%d\n", ps_size); 17851+ ff_rtp_send_h264_hevc(s1, ps_buf ? ps_buf : side_data, ps_size); 17852+ av_free(ps_buf); 17853+ } 17854 ff_rtp_send_h264_hevc(s1, pkt->data, size); 17855 break; 17856+ } 17857 case AV_CODEC_ID_H261: 17858 ff_rtp_send_h261(s1, pkt->data, size); 17859 break; 17860--- a/libavutil/Makefile 17861+++ b/libavutil/Makefile 17862@@ -72,6 +72,7 @@ HEADERS = adler32.h 17863 rational.h \ 17864 replaygain.h \ 17865 ripemd.h \ 17866+ rpi_sand_fns.h \ 17867 samplefmt.h \ 17868 sha.h \ 17869 sha512.h \ 17870@@ -191,6 +192,7 @@ OBJS-$(CONFIG_MACOS_KPERF) 17871 OBJS-$(CONFIG_MEDIACODEC) += hwcontext_mediacodec.o 17872 OBJS-$(CONFIG_OPENCL) += hwcontext_opencl.o 17873 OBJS-$(CONFIG_QSV) += hwcontext_qsv.o 17874+OBJS-$(CONFIG_SAND) += rpi_sand_fns.o 17875 OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o 17876 OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o 17877 OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o 17878@@ -211,6 +213,7 @@ SKIPHEADERS-$(CONFIG_D3D11VA) + 17879 SKIPHEADERS-$(CONFIG_DXVA2) += hwcontext_dxva2.h 17880 SKIPHEADERS-$(CONFIG_QSV) += hwcontext_qsv.h 17881 SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h 17882+SKIPHEADERS-$(CONFIG-RPI) += rpi_sand_fn_pw.h 17883 SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h 17884 SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h 17885 SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h 17886--- a/libavutil/aarch64/Makefile 17887+++ b/libavutil/aarch64/Makefile 17888@@ -1,4 +1,6 @@ 17889 OBJS += aarch64/cpu.o \ 17890 aarch64/float_dsp_init.o \ 17891 17892-NEON-OBJS += aarch64/float_dsp_neon.o 17893+NEON-OBJS += aarch64/float_dsp_neon.o \ 17894+ aarch64/rpi_sand_neon.o \ 17895+ 17896--- /dev/null 17897+++ b/libavutil/aarch64/rpi_sand_neon.S 17898@@ -0,0 +1,672 @@ 17899+/* 17900+Copyright (c) 2021 Michael Eiler 17901+ 17902+Redistribution and use in source and binary forms, with or without 17903+modification, are permitted provided that the following conditions are met: 17904+ * Redistributions of source code must retain the above copyright 17905+ notice, this list of conditions and the following disclaimer. 17906+ * Redistributions in binary form must reproduce the above copyright 17907+ notice, this list of conditions and the following disclaimer in the 17908+ documentation and/or other materials provided with the distribution. 17909+ * Neither the name of the copyright holder nor the 17910+ names of its contributors may be used to endorse or promote products 17911+ derived from this software without specific prior written permission. 17912+ 17913+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17914+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17915+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17916+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 17917+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17918+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 17919+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 17920+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 17921+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 17922+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 17923+ 17924+Authors: Michael Eiler <eiler.mike@gmail.com> 17925+*/ 17926+ 17927+#include "asm.S" 17928+ 17929+// void ff_rpi_sand8_lines_to_planar_y8( 17930+// uint8_t * dest, : x0 17931+// unsigned int dst_stride, : w1 17932+// const uint8_t * src, : x2 17933+// unsigned int src_stride1, : w3, always 128 17934+// unsigned int src_stride2, : w4 17935+// unsigned int _x, : w5 17936+// unsigned int y, : w6 17937+// unsigned int _w, : w7 17938+// unsigned int h); : [sp, #0] 17939+ 17940+function ff_rpi_sand8_lines_to_planar_y8, export=1 17941+ // w15 contains the number of rows we need to process 17942+ ldr w15, [sp, #0] 17943+ 17944+ // w8 will contain the number of blocks per row 17945+ // w8 = floor(_w/stride1) 17946+ // stride1 is assumed to always be 128 17947+ mov w8, w1 17948+ lsr w8, w8, #7 17949+ 17950+ // in case the width of the image is not a multiple of 128, there will 17951+ // be an incomplete block at the end of every row 17952+ // w9 contains the number of pixels stored within this block 17953+ // w9 = _w - w8 * 128 17954+ lsl w9, w8, #7 17955+ sub w9, w7, w9 17956+ 17957+ // this is the value we have to add to the src pointer after reading a complete block 17958+ // it will move the address to the start of the next block 17959+ // w10 = stride2 * stride1 - stride1 17960+ mov w10, w4 17961+ lsl w10, w10, #7 17962+ sub w10, w10, #128 17963+ 17964+ // w11 is the row offset, meaning the start offset of the first block of every collumn 17965+ // this will be increased with stride1 within every iteration of the row_loop 17966+ eor w11, w11, w11 17967+ 17968+ // w12 = 0, processed row count 17969+ eor w12, w12, w12 17970+row_loop: 17971+ // start of the first block within the current row 17972+ // x13 = row offset + src 17973+ mov x13, x2 17974+ add x13, x13, x11 17975+ 17976+ // w14 = 0, processed block count 17977+ eor w14, w14, w14 17978+ 17979+ cmp w8, #0 17980+ beq no_main_y8 17981+ 17982+block_loop: 17983+ // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128 17984+ // fortunately these aren't callee saved ones, meaning we don't need to backup them 17985+ ld1 { v0.16b, v1.16b, v2.16b, v3.16b}, [x13], #64 17986+ ld1 { v4.16b, v5.16b, v6.16b, v7.16b}, [x13], #64 17987+ 17988+ // write these registers back to the destination vector and increase the dst address by 128 17989+ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 17990+ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x0], #64 17991+ 17992+ // move the source register to the beginning of the next block (x13 = src + block offset) 17993+ add x13, x13, x10 17994+ // increase the block counter 17995+ add w14, w14, #1 17996+ 17997+ // continue with the block_loop if we haven't copied all full blocks yet 17998+ cmp w8, w14 17999+ bgt block_loop 18000+ 18001+ // handle the last block at the end of each row 18002+ // at most 127 byte values copied from src to dst 18003+no_main_y8: 18004+ eor w5, w5, w5 // i = 0 18005+incomplete_block_loop_y8: 18006+ cmp w5, w9 18007+ bge incomplete_block_loop_end_y8 18008+ 18009+ ldrb w6, [x13] 18010+ strb w6, [x0] 18011+ add x13, x13, #1 18012+ add x0, x0, #1 18013+ 18014+ add w5, w5, #1 18015+ b incomplete_block_loop_y8 18016+incomplete_block_loop_end_y8: 18017+ 18018+ 18019+ // increase the row offset by 128 (stride1) 18020+ add w11, w11, #128 18021+ // increment the row counter 18022+ add w12, w12, #1 18023+ 18024+ // process the next row if we haven't finished yet 18025+ cmp w15, w12 18026+ bgt row_loop 18027+ 18028+ ret 18029+endfunc 18030+ 18031+ 18032+ 18033+// void ff_rpi_sand8_lines_to_planar_c8( 18034+// uint8_t * dst_u, : x0 18035+// unsigned int dst_stride_u, : w1 == width 18036+// uint8_t * dst_v, : x2 18037+// unsigned int dst_stride_v, : w3 == width 18038+// const uint8_t * src, : x4 18039+// unsigned int stride1, : w5 == 128 18040+// unsigned int stride2, : w6 18041+// unsigned int _x, : w7 18042+// unsigned int y, : [sp, #0] 18043+// unsigned int _w, : [sp, #8] 18044+// unsigned int h); : [sp, #16] 18045+ 18046+function ff_rpi_sand8_lines_to_planar_c8, export=1 18047+ // w7 = width 18048+ ldr w7, [sp, #8] 18049+ 18050+ // w15 contains the number of rows we need to process 18051+ // counts down 18052+ ldr w15, [sp, #16] 18053+ 18054+ // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6 18055+ mov w8, w7 18056+ lsr w8, w8, #6 18057+ 18058+ // number of pixels in block at the end of every row 18059+ // w9 = _w - (w8 * 64) 18060+ lsl w9, w8, #6 18061+ sub w9, w7, w9 18062+ 18063+ // Skip at the end of the line to account for stride 18064+ sub w12, w1, w7 18065+ 18066+ // address delta to the beginning of the next block 18067+ // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128 18068+ lsl w10, w6, #7 18069+ sub w10, w10, #128 18070+ 18071+ // w11 = row address start offset = 0 18072+ eor w11, w11, w11 18073+ 18074+row_loop_c8: 18075+ // start of the first block within the current row 18076+ // x13 = row offset + src 18077+ mov x13, x4 18078+ add x13, x13, x11 18079+ 18080+ // w14 = 0, processed block count 18081+ eor w14, w14, w14 18082+ 18083+ cmp w8, #0 18084+ beq no_main_c8 18085+ 18086+block_loop_c8: 18087+ // load the full block -> 128 bytes, the block contains 64 interleaved U and V values 18088+ ld2 { v0.16b, v1.16b }, [x13], #32 18089+ ld2 { v2.16b, v3.16b }, [x13], #32 18090+ ld2 { v4.16b, v5.16b }, [x13], #32 18091+ ld2 { v6.16b, v7.16b }, [x13], #32 18092+ 18093+ // swap register so that we can write them out with a single instruction 18094+ mov v16.16b, v1.16b 18095+ mov v17.16b, v3.16b 18096+ mov v18.16b, v5.16b 18097+ mov v1.16b, v2.16b 18098+ mov v2.16b, v4.16b 18099+ mov v3.16b, v6.16b 18100+ mov v4.16b, v16.16b 18101+ mov v5.16b, v17.16b 18102+ mov v6.16b, v18.16b 18103+ 18104+ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 18105+ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x2], #64 18106+ 18107+ // increment row counter and move src to the beginning of the next block 18108+ add w14, w14, #1 18109+ add x13, x13, x10 18110+ 18111+ // jump to block_loop_c8 iff the block count is smaller than the number of full blocks 18112+ cmp w8, w14 18113+ bgt block_loop_c8 18114+ 18115+no_main_c8: 18116+ // handle incomplete block at the end of every row 18117+ eor w5, w5, w5 // point counter, this might be 18118+incomplete_block_loop_c8: 18119+ cmp w5, w9 18120+ bge incomplete_block_loop_end_c8 18121+ 18122+ ldrb w1, [x13] 18123+ strb w1, [x0] 18124+ add x13, x13, #1 18125+ 18126+ ldrb w1, [x13] 18127+ strb w1, [x2] 18128+ add x13, x13, #1 18129+ 18130+ add x0, x0, #1 18131+ add x2, x2, #1 18132+ 18133+ add w5, w5, #1 18134+ b incomplete_block_loop_c8 18135+incomplete_block_loop_end_c8: 18136+ 18137+ // increase row_offset by stride1 18138+ add w11, w11, #128 18139+ add x0, x0, w12, sxtw 18140+ add x2, x2, w12, sxtw 18141+ 18142+ // jump to row_Loop_c8 iff the row count is small than the height 18143+ subs w15, w15, #1 18144+ bgt row_loop_c8 18145+ 18146+ ret 18147+endfunc 18148+ 18149+// Unzip chroma 18150+// 18151+// On entry: 18152+// a0 = V0, U2, ... 18153+// a1 = U0, V1, ... 18154+// a2 = U1, V2, ... 18155+// b0 = V8, U10, ... 18156+// b1 = U8, V9, ... 18157+// b2 = U9, V10, ... 18158+// 18159+// On exit: 18160+// d0 = U0, U3, ... 18161+// ... 18162+// a0 = V0, V3, .. 18163+// ... 18164+// 18165+// Reg order for USAND is a1, a0, a2 (i.e. swap natural order of 1st 2 dest regs) 18166+ 18167+.macro UZPH_C d0, d1, d2, a0, a1, a2, b0, b1, b2 18168+ uzp1 \d0\().8h, \a1\().8h, \b1\().8h 18169+ uzp1 \d1\().8h, \a2\().8h, \b2\().8h 18170+ uzp2 \d2\().8h, \a0\().8h, \b0\().8h 18171+ 18172+ uzp1 \a0\().8h, \a0\().8h, \b0\().8h 18173+ uzp2 \a1\().8h, \a1\().8h, \b1\().8h 18174+ uzp2 \a2\().8h, \a2\().8h, \b2\().8h 18175+.endm 18176+ 18177+// SAND30 -> 10bit 18178+.macro USAND10 d0, d1, d2, a0, a1 18179+ shrn \d2\().4h, \a0\().4s, #14 18180+ shrn \d1\().4h, \a0\().4s, #10 18181+ 18182+ shrn2 \d2\().8h, \a1\().4s, #14 18183+ shrn2 \d1\().8h, \a1\().4s, #10 18184+ uzp1 \d0\().8h, \a0\().8h, \a1\().8h 18185+ 18186+ ushr \d2\().8h, \d2\().8h, #6 18187+ bic \d0\().8h, #0xfc, lsl #8 18188+ bic \d1\().8h, #0xfc, lsl #8 18189+.endm 18190+ 18191+// SAND30 -> 8bit 18192+.macro USAND8 d0, d1, d2, a0, a1, a2, a3, t0, t1, t2 18193+ shrn \d1\().4h, \a0\().4s, #12 18194+ shrn2 \d1\().8h, \a1\().4s, #12 18195+ uzp1 \d0\().8h, \a0\().8h, \a1\().8h 18196+ uzp2 \d2\().8h, \a0\().8h, \a1\().8h 18197+ 18198+ shrn \t1\().4h, \a2\().4s, #12 18199+ shrn2 \t1\().8h, \a3\().4s, #12 18200+ uzp1 \t0\().8h, \a2\().8h, \a3\().8h 18201+ uzp2 \t2\().8h, \a2\().8h, \a3\().8h 18202+ 18203+ shrn \d0\().8b, \d0\().8h, #2 18204+ shrn2 \d0\().16b, \t0\().8h, #2 18205+ shrn \d2\().8b, \d2\().8h, #6 18206+ shrn2 \d2\().16b, \t2\().8h, #6 18207+ uzp1 \d1\().16b, \d1\().16b, \t1\().16b 18208+.endm 18209+ 18210+ 18211+// void ff_rpi_sand30_lines_to_planar_c16( 18212+// uint8_t * dst_u, // [x0] 18213+// unsigned int dst_stride_u, // [w1] 18214+// uint8_t * dst_v, // [x2] 18215+// unsigned int dst_stride_v, // [w3] 18216+// const uint8_t * src, // [x4] 18217+// unsigned int stride1, // [w5] 128 18218+// unsigned int stride2, // [w6] 18219+// unsigned int _x, // [w7] 0 18220+// unsigned int y, // [sp, #0] 18221+// unsigned int _w, // [sp, #8] w9 18222+// unsigned int h); // [sp, #16] w10 18223+ 18224+function ff_rpi_sand30_lines_to_planar_c16, export=1 18225+ ldr w7, [sp, #0] // y 18226+ ldr w8, [sp, #8] // _w 18227+ ldr w10, [sp, #16] // h 18228+ lsl w6, w6, #7 // Fixup stride2 18229+ sub w6, w6, #64 18230+ uxtw x6, w6 18231+ sub w1, w1, w8, LSL #1 // Fixup chroma strides 18232+ sub w3, w3, w8, LSL #1 18233+ lsl w7, w7, #7 // Add y to src 18234+ add x4, x4, w7, UXTW 18235+10: 18236+ mov w13, #0 18237+ mov x5, x4 18238+ mov w9, w8 18239+1: 18240+ ld1 {v0.4s-v3.4s}, [x5], #64 18241+ ld1 {v4.4s-v7.4s}, [x5], x6 18242+ subs w9, w9, #48 18243+ 18244+ USAND10 v17, v16, v18, v0, v1 18245+ USAND10 v20, v19, v21, v2, v3 18246+ UZPH_C v0, v1, v2, v16, v17, v18, v19, v20, v21 18247+ USAND10 v23, v22, v24, v4, v5 18248+ USAND10 v26, v25, v27, v6, v7 18249+ UZPH_C v4, v5, v6, v22, v23, v24, v25, v26, v27 18250+ 18251+ blt 2f 18252+ 18253+ st3 {v0.8h-v2.8h}, [x0], #48 18254+ st3 {v4.8h-v6.8h}, [x0], #48 18255+ st3 {v16.8h-v18.8h}, [x2], #48 18256+ st3 {v22.8h-v24.8h}, [x2], #48 18257+ 18258+ bne 1b 18259+11: 18260+ subs w10, w10, #1 18261+ add x4, x4, #128 18262+ add x0, x0, w1, UXTW 18263+ add x2, x2, w3, UXTW 18264+ bne 10b 18265+99: 18266+ ret 18267+ 18268+// Partial final write 18269+2: 18270+ cmp w9, #24-48 18271+ blt 1f 18272+ st3 {v0.8h - v2.8h}, [x0], #48 18273+ st3 {v16.8h - v18.8h}, [x2], #48 18274+ beq 11b 18275+ mov v0.16b, v4.16b 18276+ mov v1.16b, v5.16b 18277+ sub w9, w9, #24 18278+ mov v2.16b, v6.16b 18279+ mov v16.16b, v22.16b 18280+ mov v17.16b, v23.16b 18281+ mov v18.16b, v24.16b 18282+1: 18283+ cmp w9, #12-48 18284+ blt 1f 18285+ st3 {v0.4h - v2.4h}, [x0], #24 18286+ st3 {v16.4h - v18.4h}, [x2], #24 18287+ beq 11b 18288+ mov v0.d[0], v0.d[1] 18289+ sub w9, w9, #12 18290+ mov v1.d[0], v1.d[1] 18291+ mov v2.d[0], v2.d[1] 18292+ mov v16.d[0], v16.d[1] 18293+ mov v17.d[0], v17.d[1] 18294+ mov v18.d[0], v18.d[1] 18295+1: 18296+ cmp w9, #6-48 18297+ blt 1f 18298+ st3 {v0.h - v2.h}[0], [x0], #6 18299+ st3 {v0.h - v2.h}[1], [x0], #6 18300+ st3 {v16.h - v18.h}[0], [x2], #6 18301+ st3 {v16.h - v18.h}[1], [x2], #6 18302+ beq 11b 18303+ mov v0.s[0], v0.s[1] 18304+ sub w9, w9, #6 18305+ mov v1.s[0], v1.s[1] 18306+ mov v2.s[0], v2.s[1] 18307+ mov v16.s[0], v16.s[1] 18308+ mov v17.s[0], v17.s[1] 18309+ mov v18.s[0], v18.s[1] 18310+1: 18311+ cmp w9, #3-48 18312+ blt 1f 18313+ st3 {v0.h - v2.h}[0], [x0], #6 18314+ st3 {v16.h - v18.h}[0], [x2], #6 18315+ beq 11b 18316+ mov v0.h[0], v0.h[1] 18317+ sub w9, w9, #3 18318+ mov v1.h[0], v1.h[1] 18319+ mov v16.h[0], v16.h[1] 18320+ mov v17.h[0], v17.h[1] 18321+1: 18322+ cmp w9, #2-48 18323+ blt 1f 18324+ st2 {v0.h - v1.h}[0], [x0], #4 18325+ st2 {v16.h - v17.h}[0], [x2], #4 18326+ b 11b 18327+1: 18328+ st1 {v0.h}[0], [x0], #2 18329+ st1 {v16.h}[0], [x2], #2 18330+ b 11b 18331+endfunc 18332+ 18333+ 18334+//void ff_rpi_sand30_lines_to_planar_p010( 18335+// uint8_t * dest, 18336+// unsigned int dst_stride, 18337+// const uint8_t * src, 18338+// unsigned int src_stride1, 18339+// unsigned int src_stride2, 18340+// unsigned int _x, 18341+// unsigned int y, 18342+// unsigned int _w, 18343+// unsigned int h); 18344+ 18345+// void ff_rpi_sand30_lines_to_planar_y8( 18346+// uint8_t * dest, : x0 18347+// unsigned int dst_stride, : w1 18348+// const uint8_t * src, : x2 18349+// unsigned int src_stride1, : w3, always 128 18350+// unsigned int src_stride2, : w4 18351+// unsigned int _x, : w5 18352+// unsigned int y, : w6 18353+// unsigned int _w, : w7 18354+// unsigned int h); : [sp, #0] 18355+// 18356+// Assumes that we are starting on a stripe boundary and that overreading 18357+// within the stripe is OK. However it does respect the dest size for wri 18358+ 18359+function ff_rpi_sand30_lines_to_planar_y16, export=1 18360+ lsl w4, w4, #7 18361+ sub w4, w4, #64 18362+ uxtw x4, w4 18363+ sub w1, w1, w7, lsl #1 18364+ uxtw x6, w6 18365+ add x8, x2, x6, lsl #7 18366+ ldr w6, [sp, #0] 18367+ 18368+10: 18369+ mov x2, x8 18370+ mov w5, w7 18371+1: 18372+ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 18373+ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 18374+ 18375+ subs w5, w5, #96 18376+ 18377+ USAND10 v16, v17, v18, v0, v1 18378+ USAND10 v19, v20, v21, v2, v3 18379+ USAND10 v22, v23, v24, v4, v5 18380+ USAND10 v25, v26, v27, v6, v7 18381+ 18382+ blt 2f 18383+ 18384+ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 18385+ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 18386+ st3 {v22.8h, v23.8h, v24.8h}, [x0], #48 18387+ st3 {v25.8h, v26.8h, v27.8h}, [x0], #48 18388+ 18389+ bne 1b 18390+ 18391+11: 18392+ subs w6, w6, #1 18393+ add x0, x0, w1, uxtw 18394+ add x8, x8, #128 18395+ bne 10b 18396+ 18397+ ret 18398+ 18399+// Partial final write 18400+2: 18401+ cmp w5, #48-96 18402+ blt 1f 18403+ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 18404+ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 18405+ beq 11b 18406+ mov v16.16b, v22.16b 18407+ mov v17.16b, v23.16b 18408+ sub w5, w5, #48 18409+ mov v18.16b, v24.16b 18410+ mov v19.16b, v25.16b 18411+ mov v20.16b, v26.16b 18412+ mov v21.16b, v27.16b 18413+1: 18414+ cmp w5, #24-96 18415+ blt 1f 18416+ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 18417+ beq 11b 18418+ mov v16.16b, v19.16b 18419+ mov v17.16b, v20.16b 18420+ sub w5, w5, #24 18421+ mov v18.16b, v21.16b 18422+1: 18423+ cmp w5, #12-96 18424+ blt 1f 18425+ st3 {v16.4h, v17.4h, v18.4h}, [x0], #24 18426+ beq 11b 18427+ mov v16.d[0], v16.d[1] 18428+ sub w5, w5, #12 18429+ mov v17.d[0], v17.d[1] 18430+ mov v18.d[0], v18.d[1] 18431+1: 18432+ cmp w5, #6-96 18433+ blt 1f 18434+ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 18435+ st3 {v16.h, v17.h, v18.h}[1], [x0], #6 18436+ beq 11b 18437+ mov v16.s[0], v16.s[1] 18438+ sub w5, w5, #6 18439+ mov v17.s[0], v17.s[1] 18440+ mov v18.s[0], v18.s[1] 18441+1: 18442+ cmp w5, #3-96 18443+ blt 1f 18444+ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 18445+ beq 11b 18446+ mov v16.h[0], v16.h[1] 18447+ sub w5, w5, #3 18448+ mov v17.h[0], v17.h[1] 18449+1: 18450+ cmp w5, #2-96 18451+ blt 1f 18452+ st2 {v16.h, v17.h}[0], [x0], #4 18453+ b 11b 18454+1: 18455+ st1 {v16.h}[0], [x0], #2 18456+ b 11b 18457+ 18458+endfunc 18459+ 18460+// void ff_rpi_sand30_lines_to_planar_y8( 18461+// uint8_t * dest, : x0 18462+// unsigned int dst_stride, : w1 18463+// const uint8_t * src, : x2 18464+// unsigned int src_stride1, : w3, always 128 18465+// unsigned int src_stride2, : w4 18466+// unsigned int _x, : w5 18467+// unsigned int y, : w6 18468+// unsigned int _w, : w7 18469+// unsigned int h); : [sp, #0] 18470+// 18471+// Assumes that we are starting on a stripe boundary and that overreading 18472+// within the stripe is OK. However it does respect the dest size for wri 18473+ 18474+function ff_rpi_sand30_lines_to_planar_y8, export=1 18475+ lsl w4, w4, #7 18476+ sub w4, w4, #64 18477+ uxtw x4, w4 18478+ sub w1, w1, w7 18479+ uxtw x6, w6 18480+ add x8, x2, x6, lsl #7 18481+ ldr w6, [sp, #0] 18482+ 18483+10: 18484+ mov x2, x8 18485+ mov w5, w7 18486+1: 18487+ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 18488+ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 18489+ 18490+ subs w5, w5, #96 18491+ 18492+ // v0, v1 18493+ USAND8 v16, v17, v18, v0, v1, v2, v3, v22, v23, v24 18494+ USAND8 v19, v20, v21, v4, v5, v6, v7, v22, v23, v24 18495+ 18496+ blt 2f 18497+ 18498+ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 18499+ st3 {v19.16b, v20.16b, v21.16b}, [x0], #48 18500+ 18501+ bne 1b 18502+ 18503+11: 18504+ subs w6, w6, #1 18505+ add x0, x0, w1, uxtw 18506+ add x8, x8, #128 18507+ bne 10b 18508+ 18509+ ret 18510+ 18511+// Partial final write 18512+2: 18513+ cmp w5, #48-96 18514+ blt 1f 18515+ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 18516+ beq 11b 18517+ mov v16.16b, v22.16b 18518+ mov v17.16b, v23.16b 18519+ sub w5, w5, #48 18520+ mov v18.16b, v24.16b 18521+1: 18522+ cmp w5, #24-96 18523+ blt 1f 18524+ st3 {v16.8b, v17.8b, v18.8b}, [x0], #24 18525+ beq 11b 18526+ mov v16.d[0], v16.d[1] 18527+ sub w5, w5, #24 18528+ mov v17.d[0], v17.d[1] 18529+ mov v18.d[0], v18.d[1] 18530+1: 18531+ cmp w5, #12-96 18532+ blt 1f 18533+ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 18534+ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 18535+ st3 {v16.b, v17.b, v18.b}[2], [x0], #3 18536+ st3 {v16.b, v17.b, v18.b}[3], [x0], #3 18537+ beq 11b 18538+ mov v16.s[0], v16.s[1] 18539+ sub w5, w5, #12 18540+ mov v17.s[0], v17.s[1] 18541+ mov v18.s[0], v18.s[1] 18542+1: 18543+ cmp w5, #6-96 18544+ blt 1f 18545+ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 18546+ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 18547+ beq 11b 18548+ mov v16.h[0], v16.h[1] 18549+ sub w5, w5, #6 18550+ mov v17.h[0], v17.h[1] 18551+ mov v18.h[0], v18.h[1] 18552+1: 18553+ cmp w5, #3-96 18554+ blt 1f 18555+ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 18556+ beq 11b 18557+ mov v16.b[0], v16.b[1] 18558+ sub w5, w5, #3 18559+ mov v17.b[0], v17.b[1] 18560+1: 18561+ cmp w5, #2-96 18562+ blt 1f 18563+ st2 {v16.b, v17.b}[0], [x0], #2 18564+ b 11b 18565+1: 18566+ st1 {v16.b}[0], [x0], #1 18567+ b 11b 18568+ 18569+endfunc 18570+ 18571--- /dev/null 18572+++ b/libavutil/aarch64/rpi_sand_neon.h 18573@@ -0,0 +1,59 @@ 18574+/* 18575+Copyright (c) 2021 Michael Eiler 18576+ 18577+Redistribution and use in source and binary forms, with or without 18578+modification, are permitted provided that the following conditions are met: 18579+ * Redistributions of source code must retain the above copyright 18580+ notice, this list of conditions and the following disclaimer. 18581+ * Redistributions in binary form must reproduce the above copyright 18582+ notice, this list of conditions and the following disclaimer in the 18583+ documentation and/or other materials provided with the distribution. 18584+ * Neither the name of the copyright holder nor the 18585+ names of its contributors may be used to endorse or promote products 18586+ derived from this software without specific prior written permission. 18587+ 18588+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18589+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18590+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18591+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 18592+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18593+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18594+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 18595+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 18596+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 18597+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 18598+ 18599+Authors: Michael Eiler <eiler.mike@gmail.com> 18600+*/ 18601+ 18602+#pragma once 18603+ 18604+#ifdef __cplusplus 18605+extern "C" { 18606+#endif 18607+ 18608+void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, 18609+ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, 18610+ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); 18611+ 18612+void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u, 18613+ uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, 18614+ unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y, 18615+ unsigned int _w, unsigned int h); 18616+ 18617+void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride, 18618+ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, 18619+ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); 18620+ 18621+void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u, 18622+ uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1, 18623+ unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); 18624+ 18625+void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, 18626+ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, 18627+ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); 18628+ 18629+#ifdef __cplusplus 18630+} 18631+#endif 18632+ 18633--- a/libavutil/arm/Makefile 18634+++ b/libavutil/arm/Makefile 18635@@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o 18636 18637 NEON-OBJS += arm/float_dsp_init_neon.o \ 18638 arm/float_dsp_neon.o \ 18639+ arm/rpi_sand_neon.o \ 18640--- /dev/null 18641+++ b/libavutil/arm/rpi_sand_neon.S 18642@@ -0,0 +1,925 @@ 18643+/* 18644+Copyright (c) 2018 Raspberry Pi (Trading) Ltd. 18645+All rights reserved. 18646+ 18647+Redistribution and use in source and binary forms, with or without 18648+modification, are permitted provided that the following conditions are met: 18649+ * Redistributions of source code must retain the above copyright 18650+ notice, this list of conditions and the following disclaimer. 18651+ * Redistributions in binary form must reproduce the above copyright 18652+ notice, this list of conditions and the following disclaimer in the 18653+ documentation and/or other materials provided with the distribution. 18654+ * Neither the name of the copyright holder nor the 18655+ names of its contributors may be used to endorse or promote products 18656+ derived from this software without specific prior written permission. 18657+ 18658+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18659+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18660+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18661+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 18662+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18663+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18664+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 18665+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 18666+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 18667+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 18668+ 18669+Authors: John Cox 18670+*/ 18671+ 18672+#include "libavutil/arm/asm.S" 18673+ 18674+ 18675+@ General notes: 18676+@ Having done some timing on this in sand8->y8 (Pi4) 18677+@ vst1 (680fps) is a bit faster than vstm (660fps) 18678+@ vldm (680fps) is noticably faster than vld1 (480fps) 18679+@ (or it might be that a mix is what is required) 18680+@ 18681+@ At least on a Pi4 it is no more expensive to have a single auto-inc register 18682+@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted 18683+@ the latter was better) 18684+@ 18685+@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless 18686+@ the memory is uncached. 18687+@ As these are Sand -> planar we can assume that src is going to be aligned but 18688+@ it is possible that dest isn't (converting to .yuv or other packed format). 18689+@ Luckily vst1 is faster than vstm :-) so all is well 18690+@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4 18691+@ .8 stores would let us do non-word aligned stores into uncached but it 18692+@ probably isn't worth it. 18693+ 18694+ 18695+ 18696+ 18697+@ void ff_rpi_sand128b_stripe_to_8_10( 18698+@ uint8_t * dest, // [r0] 18699+@ const uint8_t * src1, // [r1] 18700+@ const uint8_t * src2, // [r2] 18701+@ unsigned int lines); // [r3] 18702+ 18703+.macro stripe2_to_8, bit_depth 18704+ vpush {q4-q7} 18705+1: 18706+ vldm r1!, {q0-q7} 18707+ subs r3, #1 18708+ vldm r2!, {q8-q15} 18709+ vqrshrn.u16 d0, q0, #\bit_depth - 8 18710+ vqrshrn.u16 d1, q1, #\bit_depth - 8 18711+ vqrshrn.u16 d2, q2, #\bit_depth - 8 18712+ vqrshrn.u16 d3, q3, #\bit_depth - 8 18713+ vqrshrn.u16 d4, q4, #\bit_depth - 8 18714+ vqrshrn.u16 d5, q5, #\bit_depth - 8 18715+ vqrshrn.u16 d6, q6, #\bit_depth - 8 18716+ vqrshrn.u16 d7, q7, #\bit_depth - 8 18717+ vqrshrn.u16 d8, q8, #\bit_depth - 8 18718+ vqrshrn.u16 d9, q9, #\bit_depth - 8 18719+ vqrshrn.u16 d10, q10, #\bit_depth - 8 18720+ vqrshrn.u16 d11, q11, #\bit_depth - 8 18721+ vqrshrn.u16 d12, q12, #\bit_depth - 8 18722+ vqrshrn.u16 d13, q13, #\bit_depth - 8 18723+ vqrshrn.u16 d14, q14, #\bit_depth - 8 18724+ vqrshrn.u16 d15, q15, #\bit_depth - 8 18725+ vstm r0!, {q0-q7} 18726+ bne 1b 18727+ vpop {q4-q7} 18728+ bx lr 18729+.endm 18730+ 18731+function ff_rpi_sand128b_stripe_to_8_10, export=1 18732+ stripe2_to_8 10 18733+endfunc 18734+ 18735+@ void ff_rpi_sand8_lines_to_planar_y8( 18736+@ uint8_t * dest, // [r0] 18737+@ unsigned int dst_stride, // [r1] 18738+@ const uint8_t * src, // [r2] 18739+@ unsigned int src_stride1, // [r3] Ignored - assumed 128 18740+@ unsigned int src_stride2, // [sp, #0] -> r3 18741+@ unsigned int _x, // [sp, #4] Ignored - 0 18742+@ unsigned int y, // [sp, #8] (r7 in prefix) 18743+@ unsigned int _w, // [sp, #12] -> r6 (cur r5) 18744+@ unsigned int h); // [sp, #16] -> r7 18745+@ 18746+@ Assumes that we are starting on a stripe boundary and that overreading 18747+@ within the stripe is OK. However it does respect the dest size for writing 18748+ 18749+function ff_rpi_sand8_lines_to_planar_y8, export=1 18750+ push {r4-r8, lr} @ +24 L 18751+ ldr r3, [sp, #24] 18752+ ldr r6, [sp, #36] 18753+ ldr r7, [sp, #32] @ y 18754+ lsl r3, #7 18755+ sub r1, r6 18756+ add r8, r2, r7, lsl #7 18757+ ldr r7, [sp, #40] 18758+ 18759+10: 18760+ mov r2, r8 18761+ add r4, r0, #24 18762+ mov r5, r6 18763+ mov lr, #0 18764+1: 18765+ vldm r2, {q8-q15} 18766+ add r2, r3 18767+ subs r5, #128 18768+ blt 2f 18769+ vst1.8 {d16, d17, d18, d19}, [r0]! 18770+ vst1.8 {d20, d21, d22, d23}, [r0]! 18771+ vst1.8 {d24, d25, d26, d27}, [r0]! 18772+ vst1.8 {d28, d29, d30, d31}, [r0]! 18773+ bne 1b 18774+11: 18775+ subs r7, #1 18776+ add r0, r1 18777+ add r8, #128 18778+ bne 10b 18779+ 18780+ pop {r4-r8, pc} 18781+ 18782+@ Partial final write 18783+2: 18784+ cmp r5, #64-128 18785+ blt 1f 18786+ vst1.8 {d16, d17, d18, d19}, [r0]! 18787+ vst1.8 {d20, d21, d22, d23}, [r0]! 18788+ beq 11b 18789+ vmov q8, q12 18790+ vmov q9, q13 18791+ sub r5, #64 18792+ vmov q10, q14 18793+ vmov q11, q15 18794+1: 18795+ cmp r5, #32-128 18796+ blt 1f 18797+ vst1.8 {d16, d17, d18, d19}, [r0]! 18798+ beq 11b 18799+ vmov q8, q10 18800+ sub r5, #32 18801+ vmov q9, q11 18802+1: 18803+ cmp r5, #16-128 18804+ blt 1f 18805+ vst1.8 {d16, d17}, [r0]! 18806+ beq 11b 18807+ sub r5, #16 18808+ vmov q8, q9 18809+1: 18810+ cmp r5, #8-128 18811+ blt 1f 18812+ vst1.8 {d16}, [r0]! 18813+ beq 11b 18814+ sub r5, #8 18815+ vmov d16, d17 18816+1: 18817+ cmp r5, #4-128 18818+ blt 1f 18819+ vst1.32 {d16[0]}, [r0]! 18820+ beq 11b 18821+ sub r5, #4 18822+ vshr.u64 d16, #32 18823+1: 18824+ cmp r5, #2-128 18825+ blt 1f 18826+ vst1.16 {d16[0]}, [r0]! 18827+ beq 11b 18828+ vst1.8 {d16[2]}, [r0]! 18829+ b 11b 18830+1: 18831+ vst1.8 {d16[0]}, [r0]! 18832+ b 11b 18833+endfunc 18834+ 18835+@ void ff_rpi_sand8_lines_to_planar_c8( 18836+@ uint8_t * dst_u, // [r0] 18837+@ unsigned int dst_stride_u, // [r1] 18838+@ uint8_t * dst_v, // [r2] 18839+@ unsigned int dst_stride_v, // [r3] 18840+@ const uint8_t * src, // [sp, #0] -> r4, r5 18841+@ unsigned int stride1, // [sp, #4] 128 18842+@ unsigned int stride2, // [sp, #8] -> r8 18843+@ unsigned int _x, // [sp, #12] 0 18844+@ unsigned int y, // [sp, #16] (r7 in prefix) 18845+@ unsigned int _w, // [sp, #20] -> r12, r6 18846+@ unsigned int h); // [sp, #24] -> r7 18847+@ 18848+@ Assumes that we are starting on a stripe boundary and that overreading 18849+@ within the stripe is OK. However it does respect the dest size for writing 18850+ 18851+function ff_rpi_sand8_lines_to_planar_c8, export=1 18852+ push {r4-r8, lr} @ +24 18853+ 18854+ ldr r5, [sp, #24] 18855+ ldr r8, [sp, #32] 18856+ ldr r7, [sp, #40] 18857+ ldr r6, [sp, #44] 18858+ lsl r8, #7 18859+ add r5, r5, r7, lsl #7 18860+ sub r1, r1, r6 18861+ sub r3, r3, r6 18862+ ldr r7, [sp, #48] 18863+ vpush {q4-q7} 18864+ 18865+10: 18866+ mov r4, r5 18867+ mov r12, r6 18868+1: 18869+ subs r12, #64 18870+ vldm r4, {q0-q7} 18871+ add r4, r8 18872+ it gt 18873+ vldmgt r4, {q8-q15} 18874+ add r4, r8 18875+ 18876+ vuzp.8 q0, q1 18877+ vuzp.8 q2, q3 18878+ vuzp.8 q4, q5 18879+ vuzp.8 q6, q7 18880+ 18881+ vuzp.8 q8, q9 18882+ vuzp.8 q10, q11 18883+ vuzp.8 q12, q13 18884+ vuzp.8 q14, q15 18885+ subs r12, #64 18886+ 18887+ @ Rearrange regs so we can use vst1 with 4 regs 18888+ vswp q1, q2 18889+ vswp q5, q6 18890+ vswp q9, q10 18891+ vswp q13, q14 18892+ blt 2f 18893+ 18894+ vst1.8 {d0, d1, d2, d3 }, [r0]! 18895+ vst1.8 {d8, d9, d10, d11}, [r0]! 18896+ vst1.8 {d16, d17, d18, d19}, [r0]! 18897+ vst1.8 {d24, d25, d26, d27}, [r0]! 18898+ 18899+ vst1.8 {d4, d5, d6, d7 }, [r2]! 18900+ vst1.8 {d12, d13, d14, d15}, [r2]! 18901+ vst1.8 {d20, d21, d22, d23}, [r2]! 18902+ vst1.8 {d28, d29, d30, d31}, [r2]! 18903+ bne 1b 18904+11: 18905+ subs r7, #1 18906+ add r5, #128 18907+ add r0, r1 18908+ add r2, r3 18909+ bne 10b 18910+ vpop {q4-q7} 18911+ pop {r4-r8,pc} 18912+ 18913+2: 18914+ cmp r12, #64-128 18915+ blt 1f 18916+ vst1.8 {d0, d1, d2, d3 }, [r0]! 18917+ vst1.8 {d8, d9, d10, d11}, [r0]! 18918+ vst1.8 {d4, d5, d6, d7 }, [r2]! 18919+ vst1.8 {d12, d13, d14, d15}, [r2]! 18920+ beq 11b 18921+ sub r12, #64 18922+ vmov q0, q8 18923+ vmov q1, q9 18924+ vmov q2, q10 18925+ vmov q3, q11 18926+ vmov q4, q12 18927+ vmov q5, q13 18928+ vmov q6, q14 18929+ vmov q7, q15 18930+1: 18931+ cmp r12, #32-128 18932+ blt 1f 18933+ vst1.8 {d0, d1, d2, d3 }, [r0]! 18934+ vst1.8 {d4, d5, d6, d7 }, [r2]! 18935+ beq 11b 18936+ sub r12, #32 18937+ vmov q0, q4 18938+ vmov q1, q5 18939+ vmov q2, q6 18940+ vmov q3, q7 18941+1: 18942+ cmp r12, #16-128 18943+ blt 1f 18944+ vst1.8 {d0, d1 }, [r0]! 18945+ vst1.8 {d4, d5 }, [r2]! 18946+ beq 11b 18947+ sub r12, #16 18948+ vmov q0, q1 18949+ vmov q2, q3 18950+1: 18951+ cmp r12, #8-128 18952+ blt 1f 18953+ vst1.8 {d0}, [r0]! 18954+ vst1.8 {d4}, [r2]! 18955+ beq 11b 18956+ sub r12, #8 18957+ vmov d0, d1 18958+ vmov d4, d5 18959+1: 18960+ cmp r12, #4-128 18961+ blt 1f 18962+ vst1.32 {d0[0]}, [r0]! 18963+ vst1.32 {d4[0]}, [r2]! 18964+ beq 11b 18965+ sub r12, #4 18966+ vmov s0, s1 18967+ vmov s8, s9 18968+1: 18969+ cmp r12, #2-128 18970+ blt 1f 18971+ vst1.16 {d0[0]}, [r0]! 18972+ vst1.16 {d4[0]}, [r2]! 18973+ beq 11b 18974+ vst1.8 {d0[2]}, [r0]! 18975+ vst1.8 {d4[2]}, [r2]! 18976+ b 11b 18977+1: 18978+ vst1.8 {d0[0]}, [r0]! 18979+ vst1.8 {d4[0]}, [r2]! 18980+ b 11b 18981+endfunc 18982+ 18983+ 18984+ 18985+@ void ff_rpi_sand30_lines_to_planar_y16( 18986+@ uint8_t * dest, // [r0] 18987+@ unsigned int dst_stride, // [r1] 18988+@ const uint8_t * src, // [r2] 18989+@ unsigned int src_stride1, // [r3] Ignored - assumed 128 18990+@ unsigned int src_stride2, // [sp, #0] -> r3 18991+@ unsigned int _x, // [sp, #4] Ignored - 0 18992+@ unsigned int y, // [sp, #8] (r7 in prefix) 18993+@ unsigned int _w, // [sp, #12] -> r6 (cur r5) 18994+@ unsigned int h); // [sp, #16] -> r7 18995+@ 18996+@ Assumes that we are starting on a stripe boundary and that overreading 18997+@ within the stripe is OK. However it does respect the dest size for writing 18998+ 18999+function ff_rpi_sand30_lines_to_planar_y16, export=1 19000+ push {r4-r8, lr} @ +24 19001+ ldr r3, [sp, #24] 19002+ ldr r6, [sp, #36] 19003+ ldr r7, [sp, #32] @ y 19004+ mov r12, #48 19005+ sub r3, #1 19006+ lsl r3, #7 19007+ sub r1, r1, r6, lsl #1 19008+ add r8, r2, r7, lsl #7 19009+ ldr r7, [sp, #40] 19010+ 19011+10: 19012+ mov r2, r8 19013+ add r4, r0, #24 19014+ mov r5, r6 19015+ mov lr, #0 19016+1: 19017+ vldm r2!, {q10-q13} 19018+ add lr, #64 19019+ 19020+ vshrn.u32 d4 , q10, #14 @ Cannot vshrn.u32 #20! 19021+ ands lr, #127 19022+ vshrn.u32 d2, q10, #10 19023+ vmovn.u32 d0, q10 19024+ 19025+ vshrn.u32 d5, q11, #14 19026+ it eq 19027+ addeq r2, r3 19028+ vshrn.u32 d3, q11, #10 19029+ vmovn.u32 d1, q11 19030+ 19031+ subs r5, #48 19032+ vshr.u16 q2, #6 19033+ vbic.u16 q0, #0xfc00 19034+ vbic.u16 q1, #0xfc00 19035+ 19036+ vshrn.u32 d20, q12, #14 19037+ vshrn.u32 d18, q12, #10 19038+ vmovn.u32 d16, q12 19039+ 19040+ vshrn.u32 d21, q13, #14 19041+ vshrn.u32 d19, q13, #10 19042+ vmovn.u32 d17, q13 19043+ 19044+ vshr.u16 q10, #6 19045+ vbic.u16 q8, #0xfc00 19046+ vbic.u16 q9 , #0xfc00 19047+ blt 2f 19048+ 19049+ vst3.16 {d0, d2, d4}, [r0], r12 19050+ vst3.16 {d1, d3, d5}, [r4], r12 19051+ vst3.16 {d16, d18, d20}, [r0], r12 19052+ vst3.16 {d17, d19, d21}, [r4], r12 19053+ 19054+ bne 1b 19055+ 19056+11: 19057+ subs r7, #1 19058+ add r0, r1 19059+ add r8, #128 19060+ bne 10b 19061+ 19062+ pop {r4-r8, pc} 19063+ 19064+@ Partial final write 19065+2: 19066+ cmp r5, #24-48 19067+ blt 1f 19068+ vst3.16 {d0, d2, d4}, [r0], r12 19069+ vst3.16 {d1, d3, d5}, [r4] 19070+ beq 11b 19071+ vmov q0, q8 19072+ sub r5, #24 19073+ vmov q1, q9 19074+ vmov q2, q10 19075+1: 19076+ cmp r5, #12-48 19077+ blt 1f 19078+ vst3.16 {d0, d2, d4}, [r0]! 19079+ beq 11b 19080+ vmov d0, d1 19081+ sub r5, #12 19082+ vmov d2, d3 19083+ vmov d4, d5 19084+1: 19085+ cmp r5, #6-48 19086+ add r4, r0, #6 @ avoid [r0]! on sequential instructions 19087+ blt 1f 19088+ vst3.16 {d0[0], d2[0], d4[0]}, [r0] 19089+ vst3.16 {d0[1], d2[1], d4[1]}, [r4] 19090+ add r0, #12 19091+ beq 11b 19092+ vmov s0, s1 19093+ sub r5, #6 19094+ vmov s4, s5 19095+ vmov s8, s9 19096+1: 19097+ cmp r5, #3-48 19098+ blt 1f 19099+ vst3.16 {d0[0], d2[0], d4[0]}, [r0]! 19100+ beq 11b 19101+ sub r5, #3 19102+ vshr.u32 d0, #16 19103+ vshr.u32 d2, #16 19104+1: 19105+ cmp r5, #2-48 19106+ blt 1f 19107+ vst2.16 {d0[0], d2[0]}, [r0]! 19108+ b 11b 19109+1: 19110+ vst1.16 {d0[0]}, [r0]! 19111+ b 11b 19112+ 19113+endfunc 19114+ 19115+ 19116+@ void ff_rpi_sand30_lines_to_planar_c16( 19117+@ uint8_t * dst_u, // [r0] 19118+@ unsigned int dst_stride_u, // [r1] 19119+@ uint8_t * dst_v, // [r2] 19120+@ unsigned int dst_stride_v, // [r3] 19121+@ const uint8_t * src, // [sp, #0] -> r4, r5 19122+@ unsigned int stride1, // [sp, #4] 128 19123+@ unsigned int stride2, // [sp, #8] -> r8 19124+@ unsigned int _x, // [sp, #12] 0 19125+@ unsigned int y, // [sp, #16] (r7 in prefix) 19126+@ unsigned int _w, // [sp, #20] -> r6, r9 19127+@ unsigned int h); // [sp, #24] -> r7 19128+@ 19129+@ Assumes that we are starting on a stripe boundary and that overreading 19130+@ within the stripe is OK. However it does respect the dest size for writing 19131+ 19132+function ff_rpi_sand30_lines_to_planar_c16, export=1 19133+ push {r4-r10, lr} @ +32 19134+ ldr r5, [sp, #32] 19135+ ldr r8, [sp, #40] 19136+ ldr r7, [sp, #48] 19137+ ldr r9, [sp, #52] 19138+ mov r12, #48 19139+ sub r8, #1 19140+ lsl r8, #7 19141+ add r5, r5, r7, lsl #7 19142+ sub r1, r1, r9, lsl #1 19143+ sub r3, r3, r9, lsl #1 19144+ ldr r7, [sp, #56] 19145+10: 19146+ mov lr, #0 19147+ mov r4, r5 19148+ mov r6, r9 19149+1: 19150+ vldm r4!, {q0-q3} 19151+ add lr, #64 19152+ 19153+ @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2 19154+ vshrn.u32 d20, q0, #14 19155+ vmovn.u32 d18, q0 19156+ vshrn.u32 d0, q0, #10 19157+ ands lr, #127 19158+ 19159+ vshrn.u32 d21, q1, #14 19160+ vmovn.u32 d19, q1 19161+ vshrn.u32 d1, q1, #10 19162+ 19163+ vshrn.u32 d22, q2, #10 19164+ vmovn.u32 d2, q2 19165+ vshrn.u32 d4, q2, #14 19166+ 19167+ add r10, r0, #24 19168+ vshrn.u32 d23, q3, #10 19169+ vmovn.u32 d3, q3 19170+ vshrn.u32 d5, q3, #14 19171+ 19172+ it eq 19173+ addeq r4, r8 19174+ vuzp.16 q0, q11 19175+ vuzp.16 q9, q1 19176+ vuzp.16 q10, q2 19177+ 19178+ @ q0 V0, V3,.. 19179+ @ q9 U0, U3... 19180+ @ q10 U1, U4... 19181+ @ q11 U2, U5,.. 19182+ @ q1 V1, V4, 19183+ @ q2 V2, V5,.. 19184+ 19185+ subs r6, #24 19186+ vbic.u16 q11, #0xfc00 19187+ vbic.u16 q9, #0xfc00 19188+ vshr.u16 q10, #6 19189+ vshr.u16 q2, #6 19190+ vbic.u16 q0, #0xfc00 19191+ vbic.u16 q1, #0xfc00 19192+ 19193+ blt 2f 19194+ 19195+ vst3.16 {d18, d20, d22}, [r0], r12 19196+ vst3.16 {d19, d21, d23}, [r10] 19197+ add r10, r2, #24 19198+ vst3.16 {d0, d2, d4}, [r2], r12 19199+ vst3.16 {d1, d3, d5}, [r10] 19200+ 19201+ bne 1b 19202+ 19203+11: 19204+ subs r7, #1 19205+ add r5, #128 19206+ add r0, r1 19207+ add r2, r3 19208+ bne 10b 19209+ 19210+ pop {r4-r10, pc} 19211+ 19212+@ Partial final write 19213+2: 19214+ cmp r6, #-12 19215+ blt 1f 19216+ vst3.16 {d18, d20, d22}, [r0]! 19217+ vst3.16 {d0, d2, d4}, [r2]! 19218+ beq 11b 19219+ vmov d18, d19 19220+ vmov d20, d21 19221+ vmov d22, d23 19222+ sub r6, #12 19223+ vmov d0, d1 19224+ vmov d2, d3 19225+ vmov d4, d5 19226+1: 19227+ cmp r6, #-18 19228+ @ Rezip here as it makes the remaining tail handling easier 19229+ vzip.16 d0, d18 19230+ vzip.16 d2, d20 19231+ vzip.16 d4, d22 19232+ blt 1f 19233+ vst3.16 {d0[1], d2[1], d4[1]}, [r0]! 19234+ vst3.16 {d0[0], d2[0], d4[0]}, [r2]! 19235+ vst3.16 {d0[3], d2[3], d4[3]}, [r0]! 19236+ vst3.16 {d0[2], d2[2], d4[2]}, [r2]! 19237+ beq 11b 19238+ vmov d0, d18 19239+ vmov d2, d20 19240+ sub r6, #6 19241+ vmov d4, d22 19242+1: 19243+ cmp r6, #-21 19244+ blt 1f 19245+ vst3.16 {d0[1], d2[1], d4[1]}, [r0]! 19246+ vst3.16 {d0[0], d2[0], d4[0]}, [r2]! 19247+ beq 11b 19248+ vmov s4, s5 19249+ sub r6, #3 19250+ vmov s0, s1 19251+1: 19252+ cmp r6, #-22 19253+ blt 1f 19254+ vst2.16 {d0[1], d2[1]}, [r0]! 19255+ vst2.16 {d0[0], d2[0]}, [r2]! 19256+ b 11b 19257+1: 19258+ vst1.16 {d0[1]}, [r0]! 19259+ vst1.16 {d0[0]}, [r2]! 19260+ b 11b 19261+ 19262+endfunc 19263+ 19264+@ void ff_rpi_sand30_lines_to_planar_p010( 19265+@ uint8_t * dest, // [r0] 19266+@ unsigned int dst_stride, // [r1] 19267+@ const uint8_t * src, // [r2] 19268+@ unsigned int src_stride1, // [r3] Ignored - assumed 128 19269+@ unsigned int src_stride2, // [sp, #0] -> r3 19270+@ unsigned int _x, // [sp, #4] Ignored - 0 19271+@ unsigned int y, // [sp, #8] (r7 in prefix) 19272+@ unsigned int _w, // [sp, #12] -> r6 (cur r5) 19273+@ unsigned int h); // [sp, #16] -> r7 19274+@ 19275+@ Assumes that we are starting on a stripe boundary and that overreading 19276+@ within the stripe is OK. However it does respect the dest size for writing 19277+ 19278+function ff_rpi_sand30_lines_to_planar_p010, export=1 19279+ push {r4-r8, lr} @ +24 19280+ ldr r3, [sp, #24] 19281+ ldr r6, [sp, #36] 19282+ ldr r7, [sp, #32] @ y 19283+ mov r12, #48 19284+ vmov.u16 q15, #0xffc0 19285+ sub r3, #1 19286+ lsl r3, #7 19287+ sub r1, r1, r6, lsl #1 19288+ add r8, r2, r7, lsl #7 19289+ ldr r7, [sp, #40] 19290+ 19291+10: 19292+ mov r2, r8 19293+ add r4, r0, #24 19294+ mov r5, r6 19295+ mov lr, #0 19296+1: 19297+ vldm r2!, {q10-q13} 19298+ add lr, #64 19299+ 19300+ vshl.u32 q14, q10, #6 19301+ ands lr, #127 19302+ vshrn.u32 d4, q10, #14 19303+ vshrn.u32 d2, q10, #4 19304+ vmovn.u32 d0, q14 19305+ 19306+ vshl.u32 q14, q11, #6 19307+ it eq 19308+ addeq r2, r3 19309+ vshrn.u32 d5, q11, #14 19310+ vshrn.u32 d3, q11, #4 19311+ vmovn.u32 d1, q14 19312+ 19313+ subs r5, #48 19314+ vand q2, q15 19315+ vand q1, q15 19316+ vand q0, q15 19317+ 19318+ vshl.u32 q14, q12, #6 19319+ vshrn.u32 d20, q12, #14 19320+ vshrn.u32 d18, q12, #4 19321+ vmovn.u32 d16, q14 19322+ 19323+ vshl.u32 q14, q13, #6 19324+ vshrn.u32 d21, q13, #14 19325+ vshrn.u32 d19, q13, #4 19326+ vmovn.u32 d17, q14 19327+ 19328+ vand q10, q15 19329+ vand q9, q15 19330+ vand q8, q15 19331+ blt 2f 19332+ 19333+ vst3.16 {d0, d2, d4}, [r0], r12 19334+ vst3.16 {d1, d3, d5}, [r4], r12 19335+ vst3.16 {d16, d18, d20}, [r0], r12 19336+ vst3.16 {d17, d19, d21}, [r4], r12 19337+ 19338+ bne 1b 19339+ 19340+11: 19341+ subs r7, #1 19342+ add r0, r1 19343+ add r8, #128 19344+ bne 10b 19345+ 19346+ pop {r4-r8, pc} 19347+ 19348+@ Partial final write 19349+2: 19350+ cmp r5, #24-48 19351+ blt 1f 19352+ vst3.16 {d0, d2, d4}, [r0], r12 19353+ vst3.16 {d1, d3, d5}, [r4] 19354+ beq 11b 19355+ vmov q0, q8 19356+ sub r5, #24 19357+ vmov q1, q9 19358+ vmov q2, q10 19359+1: 19360+ cmp r5, #12-48 19361+ blt 1f 19362+ vst3.16 {d0, d2, d4}, [r0]! 19363+ beq 11b 19364+ vmov d0, d1 19365+ sub r5, #12 19366+ vmov d2, d3 19367+ vmov d4, d5 19368+1: 19369+ cmp r5, #6-48 19370+ add r4, r0, #6 @ avoid [r0]! on sequential instructions 19371+ blt 1f 19372+ vst3.16 {d0[0], d2[0], d4[0]}, [r0] 19373+ vst3.16 {d0[1], d2[1], d4[1]}, [r4] 19374+ add r0, #12 19375+ beq 11b 19376+ vmov s0, s1 19377+ sub r5, #6 19378+ vmov s4, s5 19379+ vmov s8, s9 19380+1: 19381+ cmp r5, #3-48 19382+ blt 1f 19383+ vst3.16 {d0[0], d2[0], d4[0]}, [r0]! 19384+ beq 11b 19385+ sub r5, #3 19386+ vshr.u32 d0, #16 19387+ vshr.u32 d2, #16 19388+1: 19389+ cmp r5, #2-48 19390+ blt 1f 19391+ vst2.16 {d0[0], d2[0]}, [r0]! 19392+ b 11b 19393+1: 19394+ vst1.16 {d0[0]}, [r0]! 19395+ b 11b 19396+ 19397+endfunc 19398+ 19399+ 19400+@ void ff_rpi_sand30_lines_to_planar_y8( 19401+@ uint8_t * dest, // [r0] 19402+@ unsigned int dst_stride, // [r1] 19403+@ const uint8_t * src, // [r2] 19404+@ unsigned int src_stride1, // [r3] Ignored - assumed 128 19405+@ unsigned int src_stride2, // [sp, #0] -> r3 19406+@ unsigned int _x, // [sp, #4] Ignored - 0 19407+@ unsigned int y, // [sp, #8] (r7 in prefix) 19408+@ unsigned int _w, // [sp, #12] -> r6 (cur r5) 19409+@ unsigned int h); // [sp, #16] -> r7 19410+@ 19411+@ Assumes that we are starting on a stripe boundary and that overreading 19412+@ within the stripe is OK. However it does respect the dest size for wri 19413+ 19414+function ff_rpi_sand30_lines_to_planar_y8, export=1 19415+ push {r4-r8, lr} @ +24 19416+ ldr r3, [sp, #24] 19417+ ldr r6, [sp, #36] 19418+ ldr r7, [sp, #32] @ y 19419+ mov r12, #48 19420+ lsl r3, #7 19421+ sub r1, r1, r6 19422+ add r8, r2, r7, lsl #7 19423+ ldr r7, [sp, #40] 19424+ 19425+10: 19426+ mov r2, r8 19427+ add r4, r0, #24 19428+ mov r5, r6 19429+1: 19430+ vldm r2, {q8-q15} 19431+ 19432+ subs r5, #96 19433+ 19434+ vmovn.u32 d0, q8 19435+ vshrn.u32 d2, q8, #12 19436+ vshrn.u32 d4, q8, #16 @ Cannot vshrn.u32 #20! 19437+ 19438+ add r2, r3 19439+ 19440+ vmovn.u32 d1, q9 19441+ vshrn.u32 d3, q9, #12 19442+ vshrn.u32 d5, q9, #16 19443+ 19444+ pld [r2, #0] 19445+ 19446+ vshrn.u16 d0, q0, #2 19447+ vmovn.u16 d1, q1 19448+ vshrn.u16 d2, q2, #6 19449+ 19450+ vmovn.u32 d16, q10 19451+ vshrn.u32 d18, q10, #12 19452+ vshrn.u32 d20, q10, #16 19453+ 19454+ vmovn.u32 d17, q11 19455+ vshrn.u32 d19, q11, #12 19456+ vshrn.u32 d21, q11, #16 19457+ 19458+ pld [r2, #64] 19459+ 19460+ vshrn.u16 d4, q8, #2 19461+ vmovn.u16 d5, q9 19462+ vshrn.u16 d6, q10, #6 19463+ 19464+ vmovn.u32 d16, q12 19465+ vshrn.u32 d18, q12, #12 19466+ vshrn.u32 d20, q12, #16 19467+ 19468+ vmovn.u32 d17, q13 19469+ vshrn.u32 d19, q13, #12 19470+ vshrn.u32 d21, q13, #16 19471+ 19472+ vshrn.u16 d16, q8, #2 19473+ vmovn.u16 d17, q9 19474+ vshrn.u16 d18, q10, #6 19475+ 19476+ vmovn.u32 d20, q14 19477+ vshrn.u32 d22, q14, #12 19478+ vshrn.u32 d24, q14, #16 19479+ 19480+ vmovn.u32 d21, q15 19481+ vshrn.u32 d23, q15, #12 19482+ vshrn.u32 d25, q15, #16 19483+ 19484+ vshrn.u16 d20, q10, #2 19485+ vmovn.u16 d21, q11 19486+ vshrn.u16 d22, q12, #6 19487+ 19488+ blt 2f 19489+ 19490+ vst3.8 {d0, d1, d2}, [r0], r12 19491+ vst3.8 {d4, d5, d6}, [r4], r12 19492+ vst3.8 {d16, d17, d18}, [r0], r12 19493+ vst3.8 {d20, d21, d22}, [r4], r12 19494+ 19495+ bne 1b 19496+ 19497+11: 19498+ subs r7, #1 19499+ add r0, r1 19500+ add r8, #128 19501+ bne 10b 19502+ 19503+ pop {r4-r8, pc} 19504+ 19505+@ Partial final write 19506+2: 19507+ cmp r5, #48-96 19508+ blt 1f 19509+ vst3.8 {d0, d1, d2}, [r0], r12 19510+ vst3.8 {d4, d5, d6}, [r4], r12 19511+ beq 11b 19512+ vmov q0, q8 19513+ vmov q2, q10 19514+ sub r5, #48 19515+ vmov d2, d18 19516+ vmov d6, d22 19517+1: 19518+ cmp r5, #24-96 19519+ blt 1f 19520+ vst3.8 {d0, d1, d2}, [r0]! 19521+ beq 11b 19522+ vmov q0, q2 19523+ sub r5, #24 19524+ vmov d2, d6 19525+1: 19526+ cmp r5, #12-96 19527+ blt 1f 19528+ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! 19529+ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! 19530+ vst3.8 {d0[2], d1[2], d2[2]}, [r0]! 19531+ vst3.8 {d0[3], d1[3], d2[3]}, [r0]! 19532+ beq 11b 19533+ vmov s0, s1 19534+ sub r5, #12 19535+ vmov s2, s3 19536+ vmov s4, s5 19537+1: 19538+ cmp r5, #6-96 19539+ blt 1f 19540+ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! 19541+ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! 19542+ add r0, #12 19543+ beq 11b 19544+ vshr.u32 d0, #16 19545+ sub r5, #6 19546+ vshr.u32 d1, #16 19547+ vshr.u32 d2, #16 19548+1: 19549+ cmp r5, #3-96 19550+ blt 1f 19551+ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! 19552+ beq 11b 19553+ sub r5, #3 19554+ vshr.u32 d0, #8 19555+ vshr.u32 d1, #8 19556+1: 19557+ cmp r5, #2-96 19558+ blt 1f 19559+ vst2.8 {d0[0], d1[0]}, [r0]! 19560+ b 11b 19561+1: 19562+ vst1.8 {d0[0]}, [r0]! 19563+ b 11b 19564+ 19565+endfunc 19566+ 19567+ 19568--- /dev/null 19569+++ b/libavutil/arm/rpi_sand_neon.h 19570@@ -0,0 +1,110 @@ 19571+/* 19572+Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 19573+All rights reserved. 19574+ 19575+Redistribution and use in source and binary forms, with or without 19576+modification, are permitted provided that the following conditions are met: 19577+ * Redistributions of source code must retain the above copyright 19578+ notice, this list of conditions and the following disclaimer. 19579+ * Redistributions in binary form must reproduce the above copyright 19580+ notice, this list of conditions and the following disclaimer in the 19581+ documentation and/or other materials provided with the distribution. 19582+ * Neither the name of the copyright holder nor the 19583+ names of its contributors may be used to endorse or promote products 19584+ derived from this software without specific prior written permission. 19585+ 19586+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19587+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19588+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19589+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 19590+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19591+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19592+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19593+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 19594+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 19595+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 19596+ 19597+Authors: John Cox 19598+*/ 19599+ 19600+#ifndef AVUTIL_ARM_SAND_NEON_H 19601+#define AVUTIL_ARM_SAND_NEON_H 19602+ 19603+void ff_rpi_sand128b_stripe_to_8_10( 19604+ uint8_t * dest, // [r0] 19605+ const uint8_t * src1, // [r1] 19606+ const uint8_t * src2, // [r2] 19607+ unsigned int lines); // [r3] 19608+ 19609+void ff_rpi_sand8_lines_to_planar_y8( 19610+ uint8_t * dest, // [r0] 19611+ unsigned int dst_stride, // [r1] 19612+ const uint8_t * src, // [r2] 19613+ unsigned int src_stride1, // [r3] Ignored - assumed 128 19614+ unsigned int src_stride2, // [sp, #0] -> r3 19615+ unsigned int _x, // [sp, #4] Ignored - 0 19616+ unsigned int y, // [sp, #8] (r7 in prefix) 19617+ unsigned int _w, // [sp, #12] -> r6 (cur r5) 19618+ unsigned int h); // [sp, #16] -> r7 19619+ 19620+void ff_rpi_sand8_lines_to_planar_c8( 19621+ uint8_t * dst_u, // [r0] 19622+ unsigned int dst_stride_u, // [r1] 19623+ uint8_t * dst_v, // [r2] 19624+ unsigned int dst_stride_v, // [r3] 19625+ const uint8_t * src, // [sp, #0] -> r4, r5 19626+ unsigned int stride1, // [sp, #4] 128 19627+ unsigned int stride2, // [sp, #8] -> r8 19628+ unsigned int _x, // [sp, #12] 0 19629+ unsigned int y, // [sp, #16] (r7 in prefix) 19630+ unsigned int _w, // [sp, #20] -> r12, r6 19631+ unsigned int h); // [sp, #24] -> r7 19632+ 19633+void ff_rpi_sand30_lines_to_planar_y16( 19634+ uint8_t * dest, // [r0] 19635+ unsigned int dst_stride, // [r1] 19636+ const uint8_t * src, // [r2] 19637+ unsigned int src_stride1, // [r3] Ignored - assumed 128 19638+ unsigned int src_stride2, // [sp, #0] -> r3 19639+ unsigned int _x, // [sp, #4] Ignored - 0 19640+ unsigned int y, // [sp, #8] (r7 in prefix) 19641+ unsigned int _w, // [sp, #12] -> r6 (cur r5) 19642+ unsigned int h); // [sp, #16] -> r7 19643+ 19644+void ff_rpi_sand30_lines_to_planar_c16( 19645+ uint8_t * dst_u, // [r0] 19646+ unsigned int dst_stride_u, // [r1] 19647+ uint8_t * dst_v, // [r2] 19648+ unsigned int dst_stride_v, // [r3] 19649+ const uint8_t * src, // [sp, #0] -> r4, r5 19650+ unsigned int stride1, // [sp, #4] 128 19651+ unsigned int stride2, // [sp, #8] -> r8 19652+ unsigned int _x, // [sp, #12] 0 19653+ unsigned int y, // [sp, #16] (r7 in prefix) 19654+ unsigned int _w, // [sp, #20] -> r6, r9 19655+ unsigned int h); // [sp, #24] -> r7 19656+ 19657+void ff_rpi_sand30_lines_to_planar_p010( 19658+ uint8_t * dest, // [r0] 19659+ unsigned int dst_stride, // [r1] 19660+ const uint8_t * src, // [r2] 19661+ unsigned int src_stride1, // [r3] Ignored - assumed 128 19662+ unsigned int src_stride2, // [sp, #0] -> r3 19663+ unsigned int _x, // [sp, #4] Ignored - 0 19664+ unsigned int y, // [sp, #8] (r7 in prefix) 19665+ unsigned int _w, // [sp, #12] -> r6 (cur r5) 19666+ unsigned int h); // [sp, #16] -> r7 19667+ 19668+void ff_rpi_sand30_lines_to_planar_y8( 19669+ uint8_t * dest, // [r0] 19670+ unsigned int dst_stride, // [r1] 19671+ const uint8_t * src, // [r2] 19672+ unsigned int src_stride1, // [r3] Ignored - assumed 128 19673+ unsigned int src_stride2, // [sp, #0] -> r3 19674+ unsigned int _x, // [sp, #4] Ignored - 0 19675+ unsigned int y, // [sp, #8] (r7 in prefix) 19676+ unsigned int _w, // [sp, #12] -> r6 (cur r5) 19677+ unsigned int h); // [sp, #16] -> r7 19678+ 19679+#endif // AVUTIL_ARM_SAND_NEON_H 19680+ 19681--- a/libavutil/frame.c 19682+++ b/libavutil/frame.c 19683@@ -16,6 +16,8 @@ 19684 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19685 */ 19686 19687+#include "config.h" 19688+ 19689 #include "channel_layout.h" 19690 #include "avassert.h" 19691 #include "buffer.h" 19692@@ -27,6 +29,9 @@ 19693 #include "mem.h" 19694 #include "samplefmt.h" 19695 #include "hwcontext.h" 19696+#if CONFIG_SAND 19697+#include "rpi_sand_fns.h" 19698+#endif 19699 19700 #if FF_API_OLD_CHANNEL_LAYOUT 19701 #define CHECK_CHANNELS_CONSISTENCY(frame) \ 19702@@ -875,6 +880,12 @@ int av_frame_apply_cropping(AVFrame *fra 19703 (frame->crop_top + frame->crop_bottom) >= frame->height) 19704 return AVERROR(ERANGE); 19705 19706+#if CONFIG_SAND 19707+ // Sand cannot be cropped - do not try 19708+ if (av_rpi_is_sand_format(frame->format)) 19709+ return 0; 19710+#endif 19711+ 19712 desc = av_pix_fmt_desc_get(frame->format); 19713 if (!desc) 19714 return AVERROR_BUG; 19715--- a/libavutil/frame.h 19716+++ b/libavutil/frame.h 19717@@ -940,6 +940,16 @@ int av_frame_apply_cropping(AVFrame *fra 19718 */ 19719 const char *av_frame_side_data_name(enum AVFrameSideDataType type); 19720 19721+ 19722+static inline int av_frame_cropped_width(const AVFrame * const frame) 19723+{ 19724+ return frame->width - (frame->crop_left + frame->crop_right); 19725+} 19726+static inline int av_frame_cropped_height(const AVFrame * const frame) 19727+{ 19728+ return frame->height - (frame->crop_top + frame->crop_bottom); 19729+} 19730+ 19731 /** 19732 * @} 19733 */ 19734--- a/libavutil/hwcontext_drm.c 19735+++ b/libavutil/hwcontext_drm.c 19736@@ -21,6 +21,7 @@ 19737 #include <fcntl.h> 19738 #include <sys/mman.h> 19739 #include <unistd.h> 19740+#include <sys/ioctl.h> 19741 19742 /* This was introduced in version 4.6. And may not exist all without an 19743 * optional package. So to prevent a hard dependency on needing the Linux 19744@@ -31,6 +32,7 @@ 19745 #endif 19746 19747 #include <drm.h> 19748+#include <libdrm/drm_fourcc.h> 19749 #include <xf86drm.h> 19750 19751 #include "avassert.h" 19752@@ -38,7 +40,9 @@ 19753 #include "hwcontext_drm.h" 19754 #include "hwcontext_internal.h" 19755 #include "imgutils.h" 19756- 19757+#if CONFIG_SAND 19758+#include "libavutil/rpi_sand_fns.h" 19759+#endif 19760 19761 static void drm_device_free(AVHWDeviceContext *hwdev) 19762 { 19763@@ -53,6 +57,11 @@ static int drm_device_create(AVHWDeviceC 19764 AVDRMDeviceContext *hwctx = hwdev->hwctx; 19765 drmVersionPtr version; 19766 19767+ if (device == NULL) { 19768+ hwctx->fd = -1; 19769+ return 0; 19770+ } 19771+ 19772 hwctx->fd = open(device, O_RDWR); 19773 if (hwctx->fd < 0) 19774 return AVERROR(errno); 19775@@ -139,6 +148,8 @@ static int drm_map_frame(AVHWFramesConte 19776 if (flags & AV_HWFRAME_MAP_WRITE) 19777 mmap_prot |= PROT_WRITE; 19778 19779+ if (dst->format == AV_PIX_FMT_NONE) 19780+ dst->format = hwfc->sw_format; 19781 #if HAVE_LINUX_DMA_BUF_H 19782 if (flags & AV_HWFRAME_MAP_READ) 19783 map->sync_flags |= DMA_BUF_SYNC_READ; 19784@@ -185,6 +196,23 @@ static int drm_map_frame(AVHWFramesConte 19785 19786 dst->width = src->width; 19787 dst->height = src->height; 19788+ dst->crop_top = src->crop_top; 19789+ dst->crop_bottom = src->crop_bottom; 19790+ dst->crop_left = src->crop_left; 19791+ dst->crop_right = src->crop_right; 19792+ 19793+#if CONFIG_SAND 19794+ // Rework for sand frames 19795+ if (av_rpi_is_sand_frame(dst)) { 19796+ // As it stands the sand formats hold stride2 in linesize[3] 19797+ // linesize[0] & [1] contain stride1 which is always 128 for everything we do 19798+ // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1] 19799+ dst->linesize[3] = fourcc_mod_broadcom_param(desc->objects[0].format_modifier); 19800+ dst->linesize[0] = 128; 19801+ dst->linesize[1] = 128; 19802+ // *** Are we sure src->height is actually what we want ??? 19803+ } 19804+#endif 19805 19806 err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, 19807 &drm_unmap_frame, map); 19808@@ -206,16 +234,29 @@ static int drm_transfer_get_formats(AVHW 19809 enum AVHWFrameTransferDirection dir, 19810 enum AVPixelFormat **formats) 19811 { 19812- enum AVPixelFormat *pix_fmts; 19813+ enum AVPixelFormat *p; 19814 19815- pix_fmts = av_malloc_array(2, sizeof(*pix_fmts)); 19816- if (!pix_fmts) 19817+ p = *formats = av_malloc_array(3, sizeof(*p)); 19818+ if (!p) 19819 return AVERROR(ENOMEM); 19820 19821- pix_fmts[0] = ctx->sw_format; 19822- pix_fmts[1] = AV_PIX_FMT_NONE; 19823+ // **** Offer native sand too ???? 19824+ *p++ = 19825+#if CONFIG_SAND 19826+ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ? 19827+ AV_PIX_FMT_YUV420P : 19828+ ctx->sw_format == AV_PIX_FMT_RPI4_10 ? 19829+ AV_PIX_FMT_YUV420P10LE : 19830+#endif 19831+ ctx->sw_format; 19832+ 19833+#if CONFIG_SAND 19834+ if (ctx->sw_format == AV_PIX_FMT_RPI4_10 || 19835+ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128) 19836+ *p++ = AV_PIX_FMT_NV12; 19837+#endif 19838 19839- *formats = pix_fmts; 19840+ *p = AV_PIX_FMT_NONE; 19841 return 0; 19842 } 19843 19844@@ -231,18 +272,62 @@ static int drm_transfer_data_from(AVHWFr 19845 map = av_frame_alloc(); 19846 if (!map) 19847 return AVERROR(ENOMEM); 19848- map->format = dst->format; 19849 19850+ // Map to default 19851+ map->format = AV_PIX_FMT_NONE; 19852 err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ); 19853 if (err) 19854 goto fail; 19855 19856- map->width = dst->width; 19857- map->height = dst->height; 19858+#if 0 19859+ av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__, 19860+ hwfc->sw_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE, 19861+ map->width, map->height, 19862+ map->linesize[0], 19863+ map->linesize[1], 19864+ map->linesize[2], 19865+ map->linesize[3], 19866+ dst->width, dst->height, 19867+ dst->linesize[0], 19868+ dst->linesize[1], 19869+ dst->linesize[2]); 19870+#endif 19871+#if CONFIG_SAND 19872+ if (av_rpi_is_sand_frame(map)) { 19873+ // Preserve crop - later ffmpeg code assumes that we have in that it 19874+ // overwrites any crop that we create with the old values 19875+ const unsigned int w = FFMIN(dst->width, map->width); 19876+ const unsigned int h = FFMIN(dst->height, map->height); 19877+ 19878+ map->crop_top = 0; 19879+ map->crop_bottom = 0; 19880+ map->crop_left = 0; 19881+ map->crop_right = 0; 19882+ 19883+ if (av_rpi_sand_to_planar_frame(dst, map) != 0) 19884+ { 19885+ av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__); 19886+ err = AVERROR(EINVAL); 19887+ goto fail; 19888+ } 19889+ 19890+ dst->width = w; 19891+ dst->height = h; 19892+ } 19893+ else 19894+#endif 19895+ { 19896+ // Kludge mapped h/w s.t. frame_copy works 19897+ map->width = dst->width; 19898+ map->height = dst->height; 19899+ err = av_frame_copy(dst, map); 19900+ } 19901 19902- err = av_frame_copy(dst, map); 19903 if (err) 19904+ { 19905+ av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__); 19906 goto fail; 19907+ } 19908 19909 err = 0; 19910 fail: 19911@@ -257,7 +342,10 @@ static int drm_transfer_data_to(AVHWFram 19912 int err; 19913 19914 if (src->width > hwfc->width || src->height > hwfc->height) 19915+ { 19916+ av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height); 19917 return AVERROR(EINVAL); 19918+ } 19919 19920 map = av_frame_alloc(); 19921 if (!map) 19922--- a/libavutil/hwcontext_vulkan.c 19923+++ b/libavutil/hwcontext_vulkan.c 19924@@ -57,6 +57,14 @@ 19925 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) 19926 #endif 19927 19928+// Sometimes missing definitions 19929+#ifndef VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME 19930+#define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264" 19931+#endif 19932+#ifndef VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME 19933+#define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265" 19934+#endif 19935+ 19936 typedef struct VulkanQueueCtx { 19937 VkFence fence; 19938 VkQueue queue; 19939--- a/libavutil/pixdesc.c 19940+++ b/libavutil/pixdesc.c 19941@@ -2491,6 +2491,50 @@ static const AVPixFmtDescriptor av_pix_f 19942 }, 19943 .flags = AV_PIX_FMT_FLAG_PLANAR, 19944 }, 19945+ [AV_PIX_FMT_SAND128] = { 19946+ .name = "sand128", 19947+ .nb_components = 3, 19948+ .log2_chroma_w = 1, 19949+ .log2_chroma_h = 1, 19950+ .comp = { 19951+ { 0, 1, 0, 0, 8 }, /* Y */ 19952+ { 1, 2, 0, 0, 8 }, /* U */ 19953+ { 1, 2, 1, 0, 8 }, /* V */ 19954+ }, 19955+ .flags = 0, 19956+ }, 19957+ [AV_PIX_FMT_SAND64_10] = { 19958+ .name = "sand64_10", 19959+ .nb_components = 3, 19960+ .log2_chroma_w = 1, 19961+ .log2_chroma_h = 1, 19962+ .comp = { 19963+ { 0, 2, 0, 0, 10 }, /* Y */ 19964+ { 1, 4, 0, 0, 10 }, /* U */ 19965+ { 1, 4, 2, 0, 10 }, /* V */ 19966+ }, 19967+ .flags = 0, 19968+ }, 19969+ [AV_PIX_FMT_SAND64_16] = { 19970+ .name = "sand64_16", 19971+ .nb_components = 3, 19972+ .log2_chroma_w = 1, 19973+ .log2_chroma_h = 1, 19974+ .comp = { 19975+ { 0, 2, 0, 0, 16 }, /* Y */ 19976+ { 1, 4, 0, 0, 16 }, /* U */ 19977+ { 1, 4, 2, 0, 16 }, /* V */ 19978+ }, 19979+ .flags = 0, 19980+ }, 19981+ [AV_PIX_FMT_RPI4_8] = { 19982+ .name = "rpi4_8", 19983+ .flags = AV_PIX_FMT_FLAG_HWACCEL, 19984+ }, 19985+ [AV_PIX_FMT_RPI4_10] = { 19986+ .name = "rpi4_10", 19987+ .flags = AV_PIX_FMT_FLAG_HWACCEL, 19988+ }, 19989 }; 19990 19991 static const char * const color_range_names[] = { 19992--- a/libavutil/pixfmt.h 19993+++ b/libavutil/pixfmt.h 19994@@ -349,6 +349,14 @@ enum AVPixelFormat { 19995 19996 AV_PIX_FMT_Y210BE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian 19997 AV_PIX_FMT_Y210LE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian 19998+// RPI - not on ifdef so can be got at by calling progs 19999+// #define so code that uses this can know it is there 20000+#define AVUTIL_HAVE_PIX_FMT_SAND 1 20001+ AV_PIX_FMT_SAND128, ///< 4:2:0 8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding 20002+ AV_PIX_FMT_SAND64_10, ///< 4:2:0 10-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding 20003+ AV_PIX_FMT_SAND64_16, ///< 4:2:0 16-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding 20004+ AV_PIX_FMT_RPI4_8, 20005+ AV_PIX_FMT_RPI4_10, 20006 20007 AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined 20008 AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined 20009--- /dev/null 20010+++ b/libavutil/rpi_sand_fn_pw.h 20011@@ -0,0 +1,227 @@ 20012+/* 20013+Copyright (c) 2018 Raspberry Pi (Trading) Ltd. 20014+All rights reserved. 20015+ 20016+Redistribution and use in source and binary forms, with or without 20017+modification, are permitted provided that the following conditions are met: 20018+ * Redistributions of source code must retain the above copyright 20019+ notice, this list of conditions and the following disclaimer. 20020+ * Redistributions in binary form must reproduce the above copyright 20021+ notice, this list of conditions and the following disclaimer in the 20022+ documentation and/or other materials provided with the distribution. 20023+ * Neither the name of the copyright holder nor the 20024+ names of its contributors may be used to endorse or promote products 20025+ derived from this software without specific prior written permission. 20026+ 20027+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20028+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20029+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20030+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 20031+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20032+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20033+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20034+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20035+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 20036+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 20037+ 20038+Authors: John Cox 20039+*/ 20040+ 20041+// * Included twice from rpi_sand_fn with different PW 20042+ 20043+#define STRCAT(x,y) x##y 20044+ 20045+#if PW == 1 20046+#define pixel uint8_t 20047+#define FUNC(f) STRCAT(f, 8) 20048+#elif PW == 2 20049+#define pixel uint16_t 20050+#define FUNC(f) STRCAT(f, 16) 20051+#else 20052+#error Unexpected PW 20053+#endif 20054+ 20055+// Fetches a single patch - offscreen fixup not done here 20056+// w <= stride1 20057+// unclipped 20058+void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride, 20059+ const uint8_t * src, 20060+ unsigned int stride1, unsigned int stride2, 20061+ unsigned int _x, unsigned int y, 20062+ unsigned int _w, unsigned int h) 20063+{ 20064+ const unsigned int x = _x; 20065+ const unsigned int w = _w; 20066+ const unsigned int mask = stride1 - 1; 20067+ 20068+#if PW == 1 && HAVE_SAND_ASM 20069+ if (_x == 0) { 20070+ ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride, 20071+ src, stride1, stride2, _x, y, _w, h); 20072+ return; 20073+ } 20074+#endif 20075+ 20076+ if ((x & ~mask) == ((x + w) & ~mask)) { 20077+ // All in one sand stripe 20078+ const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; 20079+ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) { 20080+ memcpy(dst, p, w); 20081+ } 20082+ } 20083+ else 20084+ { 20085+ // Two+ stripe 20086+ const unsigned int sstride = stride1 * stride2; 20087+ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; 20088+ const uint8_t * p2 = p1 + sstride - (x & mask); 20089+ const unsigned int w1 = stride1 - (x & mask); 20090+ const unsigned int w3 = (x + w) & mask; 20091+ const unsigned int w2 = w - (w1 + w3); 20092+ 20093+ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) { 20094+ unsigned int j; 20095+ const uint8_t * p = p2; 20096+ uint8_t * d = dst; 20097+ memcpy(d, p1, w1); 20098+ d += w1; 20099+ for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) { 20100+ memcpy(d, p, stride1); 20101+ } 20102+ memcpy(d, p, w3); 20103+ } 20104+ } 20105+} 20106+ 20107+// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V) 20108+ 20109+void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u, 20110+ uint8_t * dst_v, const unsigned int dst_stride_v, 20111+ const uint8_t * src, 20112+ unsigned int stride1, unsigned int stride2, 20113+ unsigned int _x, unsigned int y, 20114+ unsigned int _w, unsigned int h) 20115+{ 20116+ const unsigned int x = _x * 2; 20117+ const unsigned int w = _w * 2; 20118+ const unsigned int mask = stride1 - 1; 20119+ 20120+#if PW == 1 && HAVE_SAND_ASM 20121+ if (_x == 0) { 20122+ ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v, 20123+ src, stride1, stride2, _x, y, _w, h); 20124+ return; 20125+ } 20126+#endif 20127+ 20128+ if ((x & ~mask) == ((x + w) & ~mask)) { 20129+ // All in one sand stripe 20130+ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; 20131+ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) { 20132+ pixel * du = (pixel *)dst_u; 20133+ pixel * dv = (pixel *)dst_v; 20134+ const pixel * p = (const pixel *)p1; 20135+ for (unsigned int k = 0; k < w; k += 2 * PW) { 20136+ *du++ = *p++; 20137+ *dv++ = *p++; 20138+ } 20139+ } 20140+ } 20141+ else 20142+ { 20143+ // Two+ stripe 20144+ const unsigned int sstride = stride1 * stride2; 20145+ const unsigned int sstride_p = (sstride - stride1) / PW; 20146+ 20147+ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; 20148+ const uint8_t * p2 = p1 + sstride - (x & mask); 20149+ const unsigned int w1 = stride1 - (x & mask); 20150+ const unsigned int w3 = (x + w) & mask; 20151+ const unsigned int w2 = w - (w1 + w3); 20152+ 20153+ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) { 20154+ unsigned int j; 20155+ const pixel * p = (const pixel *)p1; 20156+ pixel * du = (pixel *)dst_u; 20157+ pixel * dv = (pixel *)dst_v; 20158+ for (unsigned int k = 0; k < w1; k += 2 * PW) { 20159+ *du++ = *p++; 20160+ *dv++ = *p++; 20161+ } 20162+ for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) { 20163+ for (unsigned int k = 0; k < stride1; k += 2 * PW) { 20164+ *du++ = *p++; 20165+ *dv++ = *p++; 20166+ } 20167+ } 20168+ for (unsigned int k = 0; k < w3; k += 2 * PW) { 20169+ *du++ = *p++; 20170+ *dv++ = *p++; 20171+ } 20172+ } 20173+ } 20174+} 20175+ 20176+void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c, 20177+ unsigned int stride1, unsigned int stride2, 20178+ const uint8_t * src_u, const unsigned int src_stride_u, 20179+ const uint8_t * src_v, const unsigned int src_stride_v, 20180+ unsigned int _x, unsigned int y, 20181+ unsigned int _w, unsigned int h) 20182+{ 20183+ const unsigned int x = _x * 2; 20184+ const unsigned int w = _w * 2; 20185+ const unsigned int mask = stride1 - 1; 20186+ if ((x & ~mask) == ((x + w) & ~mask)) { 20187+ // All in one sand stripe 20188+ uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2; 20189+ for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) { 20190+ const pixel * su = (const pixel *)src_u; 20191+ const pixel * sv = (const pixel *)src_v; 20192+ pixel * p = (pixel *)p1; 20193+ for (unsigned int k = 0; k < w; k += 2 * PW) { 20194+ *p++ = *su++; 20195+ *p++ = *sv++; 20196+ } 20197+ } 20198+ } 20199+ else 20200+ { 20201+ // Two+ stripe 20202+ const unsigned int sstride = stride1 * stride2; 20203+ const unsigned int sstride_p = (sstride - stride1) / PW; 20204+ 20205+ const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2; 20206+ const uint8_t * p2 = p1 + sstride - (x & mask); 20207+ const unsigned int w1 = stride1 - (x & mask); 20208+ const unsigned int w3 = (x + w) & mask; 20209+ const unsigned int w2 = w - (w1 + w3); 20210+ 20211+ for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) { 20212+ unsigned int j; 20213+ const pixel * su = (const pixel *)src_u; 20214+ const pixel * sv = (const pixel *)src_v; 20215+ pixel * p = (pixel *)p1; 20216+ for (unsigned int k = 0; k < w1; k += 2 * PW) { 20217+ *p++ = *su++; 20218+ *p++ = *sv++; 20219+ } 20220+ for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) { 20221+ for (unsigned int k = 0; k < stride1; k += 2 * PW) { 20222+ *p++ = *su++; 20223+ *p++ = *sv++; 20224+ } 20225+ } 20226+ for (unsigned int k = 0; k < w3; k += 2 * PW) { 20227+ *p++ = *su++; 20228+ *p++ = *sv++; 20229+ } 20230+ } 20231+ } 20232+} 20233+ 20234+ 20235+#undef pixel 20236+#undef STRCAT 20237+#undef FUNC 20238+ 20239--- /dev/null 20240+++ b/libavutil/rpi_sand_fns.c 20241@@ -0,0 +1,447 @@ 20242+/* 20243+Copyright (c) 2018 Raspberry Pi (Trading) Ltd. 20244+All rights reserved. 20245+ 20246+Redistribution and use in source and binary forms, with or without 20247+modification, are permitted provided that the following conditions are met: 20248+ * Redistributions of source code must retain the above copyright 20249+ notice, this list of conditions and the following disclaimer. 20250+ * Redistributions in binary form must reproduce the above copyright 20251+ notice, this list of conditions and the following disclaimer in the 20252+ documentation and/or other materials provided with the distribution. 20253+ * Neither the name of the copyright holder nor the 20254+ names of its contributors may be used to endorse or promote products 20255+ derived from this software without specific prior written permission. 20256+ 20257+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20258+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20259+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20260+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 20261+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20262+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20263+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20264+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20265+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 20266+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 20267+ 20268+Authors: John Cox 20269+*/ 20270+ 20271+#include "config.h" 20272+#include <stdint.h> 20273+#include <string.h> 20274+#include "rpi_sand_fns.h" 20275+#include "avassert.h" 20276+#include "frame.h" 20277+ 20278+#if ARCH_ARM && HAVE_NEON 20279+#include "libavutil/arm/cpu.h" 20280+#include "libavutil/arm/rpi_sand_neon.h" 20281+#define HAVE_SAND_ASM 1 20282+#elif ARCH_AARCH64 && HAVE_NEON 20283+#include "libavutil/aarch64/cpu.h" 20284+#include "libavutil/aarch64/rpi_sand_neon.h" 20285+#define HAVE_SAND_ASM 1 20286+#else 20287+#define HAVE_SAND_ASM 0 20288+#endif 20289+ 20290+#define PW 1 20291+#include "rpi_sand_fn_pw.h" 20292+#undef PW 20293+ 20294+#define PW 2 20295+#include "rpi_sand_fn_pw.h" 20296+#undef PW 20297+ 20298+#if 1 20299+// Simple round 20300+static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr) 20301+{ 20302+ const unsigned int rnd = (1 << shr) >> 1; 20303+ const uint16_t * src = (const uint16_t *)_src; 20304+ 20305+ for (; n != 0; --n) { 20306+ *dst++ = (*src++ + rnd) >> shr; 20307+ } 20308+} 20309+#else 20310+// Dithered variation 20311+static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr) 20312+{ 20313+ unsigned int rnd = (1 << shr) >> 1; 20314+ const unsigned int mask = ((1 << shr) - 1); 20315+ const uint16_t * src = (const uint16_t *)_src; 20316+ 20317+ for (; n != 0; --n) { 20318+ rnd = *src++ + (rnd & mask); 20319+ *dst++ = rnd >> shr; 20320+ } 20321+} 20322+#endif 20323+ 20324+// Fetches a single patch - offscreen fixup not done here 20325+// w <= stride1 20326+// unclipped 20327+// _x & _w in pixels, strides in bytes 20328+void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, 20329+ const uint8_t * src, 20330+ unsigned int stride1, unsigned int stride2, 20331+ unsigned int _x, unsigned int y, 20332+ unsigned int _w, unsigned int h) 20333+{ 20334+ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word 20335+ const unsigned int xskip0 = _x - (x0 >> 2) * 3; 20336+ const unsigned int x1 = ((_x + _w) / 3) * 4; 20337+ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; 20338+ const unsigned int mask = stride1 - 1; 20339+ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; 20340+ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words 20341+ 20342+#if HAVE_SAND_ASM 20343+ if (_x == 0 && have_neon(av_get_cpu_flags())) { 20344+ ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); 20345+ return; 20346+ } 20347+#endif 20348+ 20349+ if (x0 == x1) { 20350+ // ******************* 20351+ // Partial single word xfer 20352+ return; 20353+ } 20354+ 20355+ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) 20356+ { 20357+ unsigned int x = x0; 20358+ const uint32_t * p = (const uint32_t *)p0; 20359+ uint16_t * d = (uint16_t *)dst; 20360+ 20361+ if (xskip0 != 0) { 20362+ const uint32_t p3 = *p++; 20363+ 20364+ if (xskip0 == 1) 20365+ *d++ = (p3 >> 10) & 0x3ff; 20366+ *d++ = (p3 >> 20) & 0x3ff; 20367+ 20368+ if (((x += 4) & mask) == 0) 20369+ p += slice_inc; 20370+ } 20371+ 20372+ while (x != x1) { 20373+ const uint32_t p3 = *p++; 20374+ *d++ = p3 & 0x3ff; 20375+ *d++ = (p3 >> 10) & 0x3ff; 20376+ *d++ = (p3 >> 20) & 0x3ff; 20377+ 20378+ if (((x += 4) & mask) == 0) 20379+ p += slice_inc; 20380+ } 20381+ 20382+ if (xrem1 != 0) { 20383+ const uint32_t p3 = *p; 20384+ 20385+ *d++ = p3 & 0x3ff; 20386+ if (xrem1 == 2) 20387+ *d++ = (p3 >> 10) & 0x3ff; 20388+ } 20389+ } 20390+} 20391+ 20392+ 20393+void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, 20394+ uint8_t * dst_v, const unsigned int dst_stride_v, 20395+ const uint8_t * src, 20396+ unsigned int stride1, unsigned int stride2, 20397+ unsigned int _x, unsigned int y, 20398+ unsigned int _w, unsigned int h) 20399+{ 20400+ const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word 20401+ const unsigned int xskip0 = _x - (x0 >> 3) * 3; 20402+ const unsigned int x1 = ((_x + _w) / 3) * 8; 20403+ const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3; 20404+ const unsigned int mask = stride1 - 1; 20405+ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; 20406+ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words 20407+ 20408+#if HAVE_SAND_ASM 20409+ if (_x == 0 && have_neon(av_get_cpu_flags())) { 20410+ ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v, 20411+ src, stride1, stride2, _x, y, _w, h); 20412+ return; 20413+ } 20414+#endif 20415+ 20416+ if (x0 == x1) { 20417+ // ******************* 20418+ // Partial single word xfer 20419+ return; 20420+ } 20421+ 20422+ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1) 20423+ { 20424+ unsigned int x = x0; 20425+ const uint32_t * p = (const uint32_t *)p0; 20426+ uint16_t * du = (uint16_t *)dst_u; 20427+ uint16_t * dv = (uint16_t *)dst_v; 20428+ 20429+ if (xskip0 != 0) { 20430+ const uint32_t p3a = *p++; 20431+ const uint32_t p3b = *p++; 20432+ 20433+ if (xskip0 == 1) 20434+ { 20435+ *du++ = (p3a >> 20) & 0x3ff; 20436+ *dv++ = (p3b >> 0) & 0x3ff; 20437+ } 20438+ *du++ = (p3b >> 10) & 0x3ff; 20439+ *dv++ = (p3b >> 20) & 0x3ff; 20440+ 20441+ if (((x += 8) & mask) == 0) 20442+ p += slice_inc; 20443+ } 20444+ 20445+ while (x != x1) { 20446+ const uint32_t p3a = *p++; 20447+ const uint32_t p3b = *p++; 20448+ 20449+ *du++ = p3a & 0x3ff; 20450+ *dv++ = (p3a >> 10) & 0x3ff; 20451+ *du++ = (p3a >> 20) & 0x3ff; 20452+ *dv++ = p3b & 0x3ff; 20453+ *du++ = (p3b >> 10) & 0x3ff; 20454+ *dv++ = (p3b >> 20) & 0x3ff; 20455+ 20456+ if (((x += 8) & mask) == 0) 20457+ p += slice_inc; 20458+ } 20459+ 20460+ if (xrem1 != 0) { 20461+ const uint32_t p3a = *p++; 20462+ const uint32_t p3b = *p++; 20463+ 20464+ *du++ = p3a & 0x3ff; 20465+ *dv++ = (p3a >> 10) & 0x3ff; 20466+ if (xrem1 == 2) 20467+ { 20468+ *du++ = (p3a >> 20) & 0x3ff; 20469+ *dv++ = p3b & 0x3ff; 20470+ } 20471+ } 20472+ } 20473+} 20474+ 20475+// Fetches a single patch - offscreen fixup not done here 20476+// w <= stride1 20477+// single lose bottom 2 bits truncation 20478+// _x & _w in pixels, strides in bytes 20479+void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, 20480+ const uint8_t * src, 20481+ unsigned int stride1, unsigned int stride2, 20482+ unsigned int _x, unsigned int y, 20483+ unsigned int _w, unsigned int h) 20484+{ 20485+ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word 20486+ const unsigned int xskip0 = _x - (x0 >> 2) * 3; 20487+ const unsigned int x1 = ((_x + _w) / 3) * 4; 20488+ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; 20489+ const unsigned int mask = stride1 - 1; 20490+ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; 20491+ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words 20492+ 20493+#if HAVE_SAND_ASM 20494+ if (_x == 0) { 20495+ ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); 20496+ return; 20497+ } 20498+#endif 20499+ 20500+ if (x0 == x1) { 20501+ // ******************* 20502+ // Partial single word xfer 20503+ return; 20504+ } 20505+ 20506+ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) 20507+ { 20508+ unsigned int x = x0; 20509+ const uint32_t * p = (const uint32_t *)p0; 20510+ uint8_t * d = dst; 20511+ 20512+ if (xskip0 != 0) { 20513+ const uint32_t p3 = *p++; 20514+ 20515+ if (xskip0 == 1) 20516+ *d++ = (p3 >> 12) & 0xff; 20517+ *d++ = (p3 >> 22) & 0xff; 20518+ 20519+ if (((x += 4) & mask) == 0) 20520+ p += slice_inc; 20521+ } 20522+ 20523+ while (x != x1) { 20524+ const uint32_t p3 = *p++; 20525+ *d++ = (p3 >> 2) & 0xff; 20526+ *d++ = (p3 >> 12) & 0xff; 20527+ *d++ = (p3 >> 22) & 0xff; 20528+ 20529+ if (((x += 4) & mask) == 0) 20530+ p += slice_inc; 20531+ } 20532+ 20533+ if (xrem1 != 0) { 20534+ const uint32_t p3 = *p; 20535+ 20536+ *d++ = (p3 >> 2) & 0xff; 20537+ if (xrem1 == 2) 20538+ *d++ = (p3 >> 12) & 0xff; 20539+ } 20540+ } 20541+} 20542+ 20543+ 20544+ 20545+// w/h in pixels 20546+void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, 20547+ const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2, 20548+ unsigned int w, unsigned int h, const unsigned int shr) 20549+{ 20550+ const unsigned int n = dst_stride1 / 2; 20551+ unsigned int j; 20552+ 20553+ // This is true for our current layouts 20554+ av_assert0(dst_stride1 == src_stride1); 20555+ 20556+ // As we have the same stride1 for src & dest and src is wider than dest 20557+ // then if we loop on src we can always write contiguously to dest 20558+ // We make no effort to copy an exact width - round up to nearest src stripe 20559+ // as we will always have storage in dest for that 20560+ 20561+#if ARCH_ARM && HAVE_NEON 20562+ if (shr == 3 && src_stride1 == 128) { 20563+ for (j = 0; j + n < w; j += dst_stride1) { 20564+ uint8_t * d = dst + j * dst_stride2; 20565+ const uint8_t * s1 = src + j * 2 * src_stride2; 20566+ const uint8_t * s2 = s1 + src_stride1 * src_stride2; 20567+ 20568+ ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h); 20569+ } 20570+ } 20571+ else 20572+#endif 20573+ { 20574+ for (j = 0; j + n < w; j += dst_stride1) { 20575+ uint8_t * d = dst + j * dst_stride2; 20576+ const uint8_t * s1 = src + j * 2 * src_stride2; 20577+ const uint8_t * s2 = s1 + src_stride1 * src_stride2; 20578+ 20579+ for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) { 20580+ cpy16_to_8(d, s1, n, shr); 20581+ cpy16_to_8(d + n, s2, n, shr); 20582+ } 20583+ } 20584+ } 20585+ 20586+ // Fix up a trailing dest half stripe 20587+ if (j < w) { 20588+ uint8_t * d = dst + j * dst_stride2; 20589+ const uint8_t * s1 = src + j * 2 * src_stride2; 20590+ 20591+ for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) { 20592+ cpy16_to_8(d, s1, n, shr); 20593+ } 20594+ } 20595+} 20596+ 20597+int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src) 20598+{ 20599+ const int w = av_frame_cropped_width(src); 20600+ const int h = av_frame_cropped_height(src); 20601+ const int x = src->crop_left; 20602+ const int y = src->crop_top; 20603+ 20604+ // We will crop as part of the conversion 20605+ dst->crop_top = 0; 20606+ dst->crop_left = 0; 20607+ dst->crop_bottom = 0; 20608+ dst->crop_right = 0; 20609+ 20610+ switch (src->format){ 20611+ case AV_PIX_FMT_SAND128: 20612+ case AV_PIX_FMT_RPI4_8: 20613+ switch (dst->format){ 20614+ case AV_PIX_FMT_YUV420P: 20615+ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], 20616+ src->data[0], 20617+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), 20618+ x, y, w, h); 20619+ av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1], 20620+ dst->data[2], dst->linesize[2], 20621+ src->data[1], 20622+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), 20623+ x/2, y/2, w/2, h/2); 20624+ break; 20625+ case AV_PIX_FMT_NV12: 20626+ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], 20627+ src->data[0], 20628+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), 20629+ x, y, w, h); 20630+ av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1], 20631+ src->data[1], 20632+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), 20633+ x/2, y/2, w, h/2); 20634+ break; 20635+ default: 20636+ return -1; 20637+ } 20638+ break; 20639+ case AV_PIX_FMT_SAND64_10: 20640+ switch (dst->format){ 20641+ case AV_PIX_FMT_YUV420P10: 20642+ av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0], 20643+ src->data[0], 20644+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), 20645+ x*2, y, w*2, h); 20646+ av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1], 20647+ dst->data[2], dst->linesize[2], 20648+ src->data[1], 20649+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), 20650+ x, y/2, w, h/2); 20651+ break; 20652+ default: 20653+ return -1; 20654+ } 20655+ break; 20656+ case AV_PIX_FMT_RPI4_10: 20657+ switch (dst->format){ 20658+ case AV_PIX_FMT_YUV420P10: 20659+ av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0], 20660+ src->data[0], 20661+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), 20662+ x, y, w, h); 20663+ av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1], 20664+ dst->data[2], dst->linesize[2], 20665+ src->data[1], 20666+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), 20667+ x/2, y/2, w/2, h/2); 20668+ break; 20669+ case AV_PIX_FMT_NV12: 20670+ av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0], 20671+ src->data[0], 20672+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), 20673+ x, y, w, h); 20674+ av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1], 20675+ src->data[1], 20676+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), 20677+ x/2, y/2, w, h/2); 20678+ break; 20679+ default: 20680+ return -1; 20681+ } 20682+ break; 20683+ default: 20684+ return -1; 20685+ } 20686+ 20687+ return av_frame_copy_props(dst, src); 20688+} 20689--- /dev/null 20690+++ b/libavutil/rpi_sand_fns.h 20691@@ -0,0 +1,188 @@ 20692+/* 20693+Copyright (c) 2018 Raspberry Pi (Trading) Ltd. 20694+All rights reserved. 20695+ 20696+Redistribution and use in source and binary forms, with or without 20697+modification, are permitted provided that the following conditions are met: 20698+ * Redistributions of source code must retain the above copyright 20699+ notice, this list of conditions and the following disclaimer. 20700+ * Redistributions in binary form must reproduce the above copyright 20701+ notice, this list of conditions and the following disclaimer in the 20702+ documentation and/or other materials provided with the distribution. 20703+ * Neither the name of the copyright holder nor the 20704+ names of its contributors may be used to endorse or promote products 20705+ derived from this software without specific prior written permission. 20706+ 20707+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20708+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20709+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20710+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 20711+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20712+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20713+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20714+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20715+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 20716+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 20717+ 20718+Authors: John Cox 20719+*/ 20720+ 20721+#ifndef AVUTIL_RPI_SAND_FNS 20722+#define AVUTIL_RPI_SAND_FNS 20723+ 20724+#include "libavutil/frame.h" 20725+ 20726+// For all these fns _x & _w are measured as coord * PW 20727+// For the C fns coords are in chroma pels (so luma / 2) 20728+// Strides are in bytes 20729+ 20730+void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, 20731+ const uint8_t * src, 20732+ unsigned int stride1, unsigned int stride2, 20733+ unsigned int _x, unsigned int y, 20734+ unsigned int _w, unsigned int h); 20735+void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, 20736+ const uint8_t * src, 20737+ unsigned int stride1, unsigned int stride2, 20738+ unsigned int _x, unsigned int y, 20739+ unsigned int _w, unsigned int h); 20740+ 20741+void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u, 20742+ uint8_t * dst_v, const unsigned int dst_stride_v, 20743+ const uint8_t * src, 20744+ unsigned int stride1, unsigned int stride2, 20745+ unsigned int _x, unsigned int y, 20746+ unsigned int _w, unsigned int h); 20747+void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, 20748+ uint8_t * dst_v, const unsigned int dst_stride_v, 20749+ const uint8_t * src, 20750+ unsigned int stride1, unsigned int stride2, 20751+ unsigned int _x, unsigned int y, 20752+ unsigned int _w, unsigned int h); 20753+ 20754+void av_rpi_planar_to_sand_c8(uint8_t * dst_c, 20755+ unsigned int stride1, unsigned int stride2, 20756+ const uint8_t * src_u, const unsigned int src_stride_u, 20757+ const uint8_t * src_v, const unsigned int src_stride_v, 20758+ unsigned int _x, unsigned int y, 20759+ unsigned int _w, unsigned int h); 20760+void av_rpi_planar_to_sand_c16(uint8_t * dst_c, 20761+ unsigned int stride1, unsigned int stride2, 20762+ const uint8_t * src_u, const unsigned int src_stride_u, 20763+ const uint8_t * src_v, const unsigned int src_stride_v, 20764+ unsigned int _x, unsigned int y, 20765+ unsigned int _w, unsigned int h); 20766+ 20767+void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, 20768+ const uint8_t * src, 20769+ unsigned int stride1, unsigned int stride2, 20770+ unsigned int _x, unsigned int y, 20771+ unsigned int _w, unsigned int h); 20772+void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, 20773+ uint8_t * dst_v, const unsigned int dst_stride_v, 20774+ const uint8_t * src, 20775+ unsigned int stride1, unsigned int stride2, 20776+ unsigned int _x, unsigned int y, 20777+ unsigned int _w, unsigned int h); 20778+ 20779+void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, 20780+ const uint8_t * src, 20781+ unsigned int stride1, unsigned int stride2, 20782+ unsigned int _x, unsigned int y, 20783+ unsigned int _w, unsigned int h); 20784+ 20785+// w/h in pixels 20786+void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, 20787+ const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2, 20788+ unsigned int w, unsigned int h, const unsigned int shr); 20789+ 20790+ 20791+// dst must contain required pixel format & allocated data buffers 20792+// Cropping on the src buffer will be honoured and dst crop will be set to zero 20793+int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src); 20794+ 20795+ 20796+static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame) 20797+{ 20798+#ifdef RPI_ZC_SAND128_ONLY 20799+ // If we are sure we only only support 128 byte sand formats replace the 20800+ // var with a constant which should allow for better optimisation 20801+ return 128; 20802+#else 20803+ return frame->linesize[0]; 20804+#endif 20805+} 20806+ 20807+static inline unsigned int av_rpi_sand_frame_stride2(const AVFrame * const frame) 20808+{ 20809+ return frame->linesize[3]; 20810+} 20811+ 20812+ 20813+static inline int av_rpi_is_sand_format(const int format) 20814+{ 20815+ return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10); 20816+} 20817+ 20818+static inline int av_rpi_is_sand_frame(const AVFrame * const frame) 20819+{ 20820+ return av_rpi_is_sand_format(frame->format); 20821+} 20822+ 20823+static inline int av_rpi_is_sand8_frame(const AVFrame * const frame) 20824+{ 20825+ return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8); 20826+} 20827+ 20828+static inline int av_rpi_is_sand16_frame(const AVFrame * const frame) 20829+{ 20830+ return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16); 20831+} 20832+ 20833+static inline int av_rpi_is_sand30_frame(const AVFrame * const frame) 20834+{ 20835+ return (frame->format == AV_PIX_FMT_RPI4_10); 20836+} 20837+ 20838+static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame) 20839+{ 20840+ return av_rpi_is_sand8_frame(frame) ? 0 : 1; 20841+} 20842+ 20843+// If x is measured in bytes (not pixels) then this works for sand64_16 as 20844+// well as sand128 - but in the general case we work that out 20845+ 20846+static inline unsigned int av_rpi_sand_frame_off_y(const AVFrame * const frame, const unsigned int x_y, const unsigned int y) 20847+{ 20848+ const unsigned int stride1 = av_rpi_sand_frame_stride1(frame); 20849+ const unsigned int stride2 = av_rpi_sand_frame_stride2(frame); 20850+ const unsigned int x = x_y << av_rpi_sand_frame_xshl(frame); 20851+ const unsigned int x1 = x & (stride1 - 1); 20852+ const unsigned int x2 = x ^ x1; 20853+ 20854+ return x1 + stride1 * y + stride2 * x2; 20855+} 20856+ 20857+static inline unsigned int av_rpi_sand_frame_off_c(const AVFrame * const frame, const unsigned int x_c, const unsigned int y_c) 20858+{ 20859+ const unsigned int stride1 = av_rpi_sand_frame_stride1(frame); 20860+ const unsigned int stride2 = av_rpi_sand_frame_stride2(frame); 20861+ const unsigned int x = x_c << (av_rpi_sand_frame_xshl(frame) + 1); 20862+ const unsigned int x1 = x & (stride1 - 1); 20863+ const unsigned int x2 = x ^ x1; 20864+ 20865+ return x1 + stride1 * y_c + stride2 * x2; 20866+} 20867+ 20868+static inline uint8_t * av_rpi_sand_frame_pos_y(const AVFrame * const frame, const unsigned int x, const unsigned int y) 20869+{ 20870+ return frame->data[0] + av_rpi_sand_frame_off_y(frame, x, y); 20871+} 20872+ 20873+static inline uint8_t * av_rpi_sand_frame_pos_c(const AVFrame * const frame, const unsigned int x, const unsigned int y) 20874+{ 20875+ return frame->data[1] + av_rpi_sand_frame_off_c(frame, x, y); 20876+} 20877+ 20878+#endif 20879+ 20880--- a/libswscale/aarch64/rgb2rgb.c 20881+++ b/libswscale/aarch64/rgb2rgb.c 20882@@ -30,6 +30,12 @@ 20883 void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2, 20884 uint8_t *dest, int width, int height, 20885 int src1Stride, int src2Stride, int dstStride); 20886+void ff_bgr24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 20887+ uint8_t *vdst, int width, int height, int lumStride, 20888+ int chromStride, int srcStride, int32_t *rgb2yuv); 20889+void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 20890+ uint8_t *vdst, int width, int height, int lumStride, 20891+ int chromStride, int srcStride, int32_t *rgb2yuv); 20892 20893 av_cold void rgb2rgb_init_aarch64(void) 20894 { 20895@@ -37,5 +43,7 @@ av_cold void rgb2rgb_init_aarch64(void) 20896 20897 if (have_neon(cpu_flags)) { 20898 interleaveBytes = ff_interleave_bytes_neon; 20899+ ff_rgb24toyv12 = ff_rgb24toyv12_aarch64; 20900+ ff_bgr24toyv12 = ff_bgr24toyv12_aarch64; 20901 } 20902 } 20903--- a/libswscale/aarch64/rgb2rgb_neon.S 20904+++ b/libswscale/aarch64/rgb2rgb_neon.S 20905@@ -77,3 +77,359 @@ function ff_interleave_bytes_neon, expor 20906 0: 20907 ret 20908 endfunc 20909+ 20910+// Expand rgb2 into r0+r1/g0+g1/b0+b1 20911+.macro XRGB3Y r0, g0, b0, r1, g1, b1, r2, g2, b2 20912+ uxtl \r0\().8h, \r2\().8b 20913+ uxtl \g0\().8h, \g2\().8b 20914+ uxtl \b0\().8h, \b2\().8b 20915+ 20916+ uxtl2 \r1\().8h, \r2\().16b 20917+ uxtl2 \g1\().8h, \g2\().16b 20918+ uxtl2 \b1\().8h, \b2\().16b 20919+.endm 20920+ 20921+// Expand rgb2 into r0+r1/g0+g1/b0+b1 20922+// and pick every other el to put back into rgb2 for chroma 20923+.macro XRGB3YC r0, g0, b0, r1, g1, b1, r2, g2, b2 20924+ XRGB3Y \r0, \g0, \b0, \r1, \g1, \b1, \r2, \g2, \b2 20925+ 20926+ bic \r2\().8h, #0xff, LSL #8 20927+ bic \g2\().8h, #0xff, LSL #8 20928+ bic \b2\().8h, #0xff, LSL #8 20929+.endm 20930+ 20931+.macro SMLAL3 d0, d1, s0, s1, s2, c0, c1, c2 20932+ smull \d0\().4s, \s0\().4h, \c0 20933+ smlal \d0\().4s, \s1\().4h, \c1 20934+ smlal \d0\().4s, \s2\().4h, \c2 20935+ smull2 \d1\().4s, \s0\().8h, \c0 20936+ smlal2 \d1\().4s, \s1\().8h, \c1 20937+ smlal2 \d1\().4s, \s2\().8h, \c2 20938+.endm 20939+ 20940+// d0 may be s0 20941+// s0, s2 corrupted 20942+.macro SHRN_Y d0, s0, s1, s2, s3, k128h 20943+ shrn \s0\().4h, \s0\().4s, #12 20944+ shrn2 \s0\().8h, \s1\().4s, #12 20945+ add \s0\().8h, \s0\().8h, \k128h\().8h // +128 (>> 3 = 16) 20946+ sqrshrun \d0\().8b, \s0\().8h, #3 20947+ shrn \s2\().4h, \s2\().4s, #12 20948+ shrn2 \s2\().8h, \s3\().4s, #12 20949+ add \s2\().8h, \s2\().8h, \k128h\().8h 20950+ sqrshrun2 \d0\().16b, v28.8h, #3 20951+.endm 20952+ 20953+.macro SHRN_C d0, s0, s1, k128b 20954+ shrn \s0\().4h, \s0\().4s, #14 20955+ shrn2 \s0\().8h, \s1\().4s, #14 20956+ sqrshrn \s0\().8b, \s0\().8h, #1 20957+ add \d0\().8b, \s0\().8b, \k128b\().8b // +128 20958+.endm 20959+ 20960+.macro STB2V s0, n, a 20961+ st1 {\s0\().b}[(\n+0)], [\a], #1 20962+ st1 {\s0\().b}[(\n+1)], [\a], #1 20963+.endm 20964+ 20965+.macro STB4V s0, n, a 20966+ STB2V \s0, (\n+0), \a 20967+ STB2V \s0, (\n+2), \a 20968+.endm 20969+ 20970+ 20971+// void ff_rgb24toyv12_aarch64( 20972+// const uint8_t *src, // x0 20973+// uint8_t *ydst, // x1 20974+// uint8_t *udst, // x2 20975+// uint8_t *vdst, // x3 20976+// int width, // w4 20977+// int height, // w5 20978+// int lumStride, // w6 20979+// int chromStride, // w7 20980+// int srcStr, // [sp, #0] 20981+// int32_t *rgb2yuv); // [sp, #8] 20982+ 20983+function ff_rgb24toyv12_aarch64, export=1 20984+ ldr x15, [sp, #8] 20985+ ld3 {v3.s, v4.s, v5.s}[0], [x15], #12 20986+ ld3 {v3.s, v4.s, v5.s}[1], [x15], #12 20987+ ld3 {v3.s, v4.s, v5.s}[2], [x15] 20988+ mov v6.16b, v3.16b 20989+ mov v3.16b, v5.16b 20990+ mov v5.16b, v6.16b 20991+ b 99f 20992+endfunc 20993+ 20994+// void ff_bgr24toyv12_aarch64( 20995+// const uint8_t *src, // x0 20996+// uint8_t *ydst, // x1 20997+// uint8_t *udst, // x2 20998+// uint8_t *vdst, // x3 20999+// int width, // w4 21000+// int height, // w5 21001+// int lumStride, // w6 21002+// int chromStride, // w7 21003+// int srcStr, // [sp, #0] 21004+// int32_t *rgb2yuv); // [sp, #8] (including Mac) 21005+ 21006+// regs 21007+// v0-2 Src bytes - reused as chroma src 21008+// v3-5 Coeffs (packed very inefficiently - could be squashed) 21009+// v6 128b 21010+// v7 128h 21011+// v8-15 Reserved 21012+// v16-18 Lo Src expanded as H 21013+// v19 - 21014+// v20-22 Hi Src expanded as H 21015+// v23 - 21016+// v24 U out 21017+// v25 U tmp 21018+// v26 Y out 21019+// v27-29 Y tmp 21020+// v30 V out 21021+// v31 V tmp 21022+ 21023+function ff_bgr24toyv12_aarch64, export=1 21024+ ldr x15, [sp, #8] 21025+ ld3 {v3.s, v4.s, v5.s}[0], [x15], #12 21026+ ld3 {v3.s, v4.s, v5.s}[1], [x15], #12 21027+ ld3 {v3.s, v4.s, v5.s}[2], [x15] 21028+ 21029+99: 21030+ ldr w14, [sp, #0] 21031+ movi v7.8b, #128 21032+ uxtl v6.8h, v7.8b 21033+ // Ensure if nothing to do then we do nothing 21034+ cmp w4, #0 21035+ b.le 90f 21036+ cmp w5, #0 21037+ b.le 90f 21038+ // If w % 16 != 0 then -16 so we do main loop 1 fewer times with 21039+ // the remainder done in the tail 21040+ tst w4, #15 21041+ b.eq 1f 21042+ sub w4, w4, #16 21043+1: 21044+ 21045+// -------------------- Even line body - YUV 21046+11: 21047+ subs w9, w4, #0 21048+ mov x10, x0 21049+ mov x11, x1 21050+ mov x12, x2 21051+ mov x13, x3 21052+ b.lt 12f 21053+ 21054+ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 21055+ subs w9, w9, #16 21056+ b.le 13f 21057+ 21058+10: 21059+ XRGB3YC v16, v17, v18, v20, v21, v22, v0, v1, v2 21060+ 21061+ // Testing shows it is faster to stack the smull/smlal ops together 21062+ // rather than interleave them between channels and indeed even the 21063+ // shift/add sections seem happier not interleaved 21064+ 21065+ // Y0 21066+ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] 21067+ // Y1 21068+ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] 21069+ SHRN_Y v26, v26, v27, v28, v29, v6 21070+ 21071+ // U 21072+ // Vector subscript *2 as we loaded into S but are only using H 21073+ SMLAL3 v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2] 21074+ 21075+ // V 21076+ SMLAL3 v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4] 21077+ 21078+ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 21079+ 21080+ SHRN_C v24, v24, v25, v7 21081+ SHRN_C v30, v30, v31, v7 21082+ 21083+ subs w9, w9, #16 21084+ 21085+ st1 {v26.16b}, [x11], #16 21086+ st1 {v24.8b}, [x12], #8 21087+ st1 {v30.8b}, [x13], #8 21088+ 21089+ b.gt 10b 21090+ 21091+// -------------------- Even line tail - YUV 21092+// If width % 16 == 0 then simply runs once with preloaded RGB 21093+// If other then deals with preload & then does remaining tail 21094+ 21095+13: 21096+ // Body is simple copy of main loop body minus preload 21097+ 21098+ XRGB3YC v16, v17, v18, v20, v21, v22, v0, v1, v2 21099+ // Y0 21100+ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] 21101+ // Y1 21102+ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] 21103+ SHRN_Y v26, v26, v27, v28, v29, v6 21104+ // U 21105+ SMLAL3 v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2] 21106+ // V 21107+ SMLAL3 v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4] 21108+ 21109+ cmp w9, #-16 21110+ 21111+ SHRN_C v24, v24, v25, v7 21112+ SHRN_C v30, v30, v31, v7 21113+ 21114+ // Here: 21115+ // w9 == 0 width % 16 == 0, tail done 21116+ // w9 > -16 1st tail done (16 pels), remainder still to go 21117+ // w9 == -16 shouldn't happen 21118+ // w9 > -32 2nd tail done 21119+ // w9 <= -32 shouldn't happen 21120+ 21121+ b.lt 2f 21122+ st1 {v26.16b}, [x11], #16 21123+ st1 {v24.8b}, [x12], #8 21124+ st1 {v30.8b}, [x13], #8 21125+ cbz w9, 3f 21126+ 21127+12: 21128+ sub w9, w9, #16 21129+ 21130+ tbz w9, #3, 1f 21131+ ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24 21132+1: tbz w9, #2, 1f 21133+ ld3 {v0.b, v1.b, v2.b}[8], [x10], #3 21134+ ld3 {v0.b, v1.b, v2.b}[9], [x10], #3 21135+ ld3 {v0.b, v1.b, v2.b}[10], [x10], #3 21136+ ld3 {v0.b, v1.b, v2.b}[11], [x10], #3 21137+1: tbz w9, #1, 1f 21138+ ld3 {v0.b, v1.b, v2.b}[12], [x10], #3 21139+ ld3 {v0.b, v1.b, v2.b}[13], [x10], #3 21140+1: tbz w9, #0, 13b 21141+ ld3 {v0.b, v1.b, v2.b}[14], [x10], #3 21142+ b 13b 21143+ 21144+2: 21145+ tbz w9, #3, 1f 21146+ st1 {v26.8b}, [x11], #8 21147+ STB4V v24, 0, x12 21148+ STB4V v30, 0, x13 21149+1: tbz w9, #2, 1f 21150+ STB4V v26 8, x11 21151+ STB2V v24, 4, x12 21152+ STB2V v30, 4, x13 21153+1: tbz w9, #1, 1f 21154+ STB2V v26, 12, x11 21155+ st1 {v24.b}[6], [x12], #1 21156+ st1 {v30.b}[6], [x13], #1 21157+1: tbz w9, #0, 1f 21158+ st1 {v26.b}[14], [x11] 21159+ st1 {v24.b}[7], [x12] 21160+ st1 {v30.b}[7], [x13] 21161+1: 21162+3: 21163+ 21164+// -------------------- Odd line body - Y only 21165+ 21166+ subs w5, w5, #1 21167+ b.eq 90f 21168+ 21169+ subs w9, w4, #0 21170+ add x0, x0, w14, sxtw 21171+ add x1, x1, w6, sxtw 21172+ mov x10, x0 21173+ mov x11, x1 21174+ b.lt 12f 21175+ 21176+ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 21177+ subs w9, w9, #16 21178+ b.le 13f 21179+ 21180+10: 21181+ XRGB3Y v16, v17, v18, v20, v21, v22, v0, v1, v2 21182+ // Y0 21183+ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] 21184+ // Y1 21185+ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] 21186+ 21187+ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 21188+ 21189+ SHRN_Y v26, v26, v27, v28, v29, v6 21190+ 21191+ subs w9, w9, #16 21192+ 21193+ st1 {v26.16b}, [x11], #16 21194+ 21195+ b.gt 10b 21196+ 21197+// -------------------- Odd line tail - Y 21198+// If width % 16 == 0 then simply runs once with preloaded RGB 21199+// If other then deals with preload & then does remaining tail 21200+ 21201+13: 21202+ // Body is simple copy of main loop body minus preload 21203+ 21204+ XRGB3Y v16, v17, v18, v20, v21, v22, v0, v1, v2 21205+ // Y0 21206+ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] 21207+ // Y1 21208+ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] 21209+ 21210+ cmp w9, #-16 21211+ 21212+ SHRN_Y v26, v26, v27, v28, v29, v6 21213+ 21214+ // Here: 21215+ // w9 == 0 width % 16 == 0, tail done 21216+ // w9 > -16 1st tail done (16 pels), remainder still to go 21217+ // w9 == -16 shouldn't happen 21218+ // w9 > -32 2nd tail done 21219+ // w9 <= -32 shouldn't happen 21220+ 21221+ b.lt 2f 21222+ st1 {v26.16b}, [x11], #16 21223+ cbz w9, 3f 21224+ 21225+12: 21226+ sub w9, w9, #16 21227+ 21228+ tbz w9, #3, 1f 21229+ ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24 21230+1: tbz w9, #2, 1f 21231+ ld3 {v0.b, v1.b, v2.b}[8], [x10], #3 21232+ ld3 {v0.b, v1.b, v2.b}[9], [x10], #3 21233+ ld3 {v0.b, v1.b, v2.b}[10], [x10], #3 21234+ ld3 {v0.b, v1.b, v2.b}[11], [x10], #3 21235+1: tbz w9, #1, 1f 21236+ ld3 {v0.b, v1.b, v2.b}[12], [x10], #3 21237+ ld3 {v0.b, v1.b, v2.b}[13], [x10], #3 21238+1: tbz w9, #0, 13b 21239+ ld3 {v0.b, v1.b, v2.b}[14], [x10], #3 21240+ b 13b 21241+ 21242+2: 21243+ tbz w9, #3, 1f 21244+ st1 {v26.8b}, [x11], #8 21245+1: tbz w9, #2, 1f 21246+ STB4V v26, 8, x11 21247+1: tbz w9, #1, 1f 21248+ STB2V v26, 12, x11 21249+1: tbz w9, #0, 1f 21250+ st1 {v26.b}[14], [x11] 21251+1: 21252+3: 21253+ 21254+// ------------------- Loop to start 21255+ 21256+ add x0, x0, w14, sxtw 21257+ add x1, x1, w6, sxtw 21258+ add x2, x2, w7, sxtw 21259+ add x3, x3, w7, sxtw 21260+ subs w5, w5, #1 21261+ b.gt 11b 21262+90: 21263+ ret 21264+endfunc 21265--- a/libswscale/rgb2rgb.c 21266+++ b/libswscale/rgb2rgb.c 21267@@ -83,6 +83,31 @@ void (*ff_rgb24toyv12)(const uint8_t *sr 21268 int width, int height, 21269 int lumStride, int chromStride, int srcStride, 21270 int32_t *rgb2yuv); 21271+void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, 21272+ uint8_t *udst, uint8_t *vdst, 21273+ int width, int height, 21274+ int lumStride, int chromStride, int srcStride, 21275+ int32_t *rgb2yuv); 21276+void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, 21277+ uint8_t *udst, uint8_t *vdst, 21278+ int width, int height, 21279+ int lumStride, int chromStride, int srcStride, 21280+ int32_t *rgb2yuv); 21281+void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, 21282+ uint8_t *udst, uint8_t *vdst, 21283+ int width, int height, 21284+ int lumStride, int chromStride, int srcStride, 21285+ int32_t *rgb2yuv); 21286+void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, 21287+ uint8_t *udst, uint8_t *vdst, 21288+ int width, int height, 21289+ int lumStride, int chromStride, int srcStride, 21290+ int32_t *rgb2yuv); 21291+void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, 21292+ uint8_t *udst, uint8_t *vdst, 21293+ int width, int height, 21294+ int lumStride, int chromStride, int srcStride, 21295+ int32_t *rgb2yuv); 21296 void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, 21297 int srcStride, int dstStride); 21298 void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, 21299--- a/libswscale/rgb2rgb.h 21300+++ b/libswscale/rgb2rgb.h 21301@@ -79,6 +79,9 @@ void rgb12to15(const uint8_t *src, ui 21302 void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 21303 uint8_t *vdst, int width, int height, int lumStride, 21304 int chromStride, int srcStride, int32_t *rgb2yuv); 21305+void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 21306+ uint8_t *vdst, int width, int height, int lumStride, 21307+ int chromStride, int srcStride, int32_t *rgb2yuv); 21308 21309 /** 21310 * Height should be a multiple of 2 and width should be a multiple of 16. 21311@@ -128,6 +131,26 @@ extern void (*ff_rgb24toyv12)(const uint 21312 int width, int height, 21313 int lumStride, int chromStride, int srcStride, 21314 int32_t *rgb2yuv); 21315+extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 21316+ int width, int height, 21317+ int lumStride, int chromStride, int srcStride, 21318+ int32_t *rgb2yuv); 21319+extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 21320+ int width, int height, 21321+ int lumStride, int chromStride, int srcStride, 21322+ int32_t *rgb2yuv); 21323+extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 21324+ int width, int height, 21325+ int lumStride, int chromStride, int srcStride, 21326+ int32_t *rgb2yuv); 21327+extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 21328+ int width, int height, 21329+ int lumStride, int chromStride, int srcStride, 21330+ int32_t *rgb2yuv); 21331+extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 21332+ int width, int height, 21333+ int lumStride, int chromStride, int srcStride, 21334+ int32_t *rgb2yuv); 21335 extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, 21336 int srcStride, int dstStride); 21337 21338--- a/libswscale/rgb2rgb_template.c 21339+++ b/libswscale/rgb2rgb_template.c 21340@@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const ui 21341 * others are ignored in the C version. 21342 * FIXME: Write HQ version. 21343 */ 21344-void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 21345+static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 21346 uint8_t *vdst, int width, int height, int lumStride, 21347- int chromStride, int srcStride, int32_t *rgb2yuv) 21348+ int chromStride, int srcStride, int32_t *rgb2yuv, 21349+ const uint8_t x[9]) 21350 { 21351- int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; 21352- int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; 21353- int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; 21354+ int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]]; 21355+ int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]]; 21356+ int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]]; 21357 int y; 21358 const int chromWidth = width >> 1; 21359 21360@@ -678,6 +679,19 @@ void ff_rgb24toyv12_c(const uint8_t *src 21361 Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; 21362 ydst[2 * i + 1] = Y; 21363 } 21364+ if ((width & 1) != 0) { 21365+ unsigned int b = src[6 * i + 0]; 21366+ unsigned int g = src[6 * i + 1]; 21367+ unsigned int r = src[6 * i + 2]; 21368+ 21369+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; 21370+ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; 21371+ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; 21372+ 21373+ udst[i] = U; 21374+ vdst[i] = V; 21375+ ydst[2 * i] = Y; 21376+ } 21377 ydst += lumStride; 21378 src += srcStride; 21379 21380@@ -700,6 +714,15 @@ void ff_rgb24toyv12_c(const uint8_t *src 21381 Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; 21382 ydst[2 * i + 1] = Y; 21383 } 21384+ if ((width & 1) != 0) { 21385+ unsigned int b = src[6 * i + 0]; 21386+ unsigned int g = src[6 * i + 1]; 21387+ unsigned int r = src[6 * i + 2]; 21388+ 21389+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; 21390+ 21391+ ydst[2 * i] = Y; 21392+ } 21393 udst += chromStride; 21394 vdst += chromStride; 21395 ydst += lumStride; 21396@@ -707,6 +730,147 @@ void ff_rgb24toyv12_c(const uint8_t *src 21397 } 21398 } 21399 21400+static const uint8_t x_rgb[9] = { 21401+ RY_IDX, GY_IDX, BY_IDX, 21402+ RU_IDX, GU_IDX, BU_IDX, 21403+ RV_IDX, GV_IDX, BV_IDX, 21404+}; 21405+ 21406+static const uint8_t x_bgr[9] = { 21407+ BY_IDX, GY_IDX, RY_IDX, 21408+ BU_IDX, GU_IDX, RU_IDX, 21409+ BV_IDX, GV_IDX, RV_IDX, 21410+}; 21411+ 21412+void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 21413+ uint8_t *vdst, int width, int height, int lumStride, 21414+ int chromStride, int srcStride, int32_t *rgb2yuv) 21415+{ 21416+ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); 21417+} 21418+ 21419+void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 21420+ uint8_t *vdst, int width, int height, int lumStride, 21421+ int chromStride, int srcStride, int32_t *rgb2yuv) 21422+{ 21423+ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); 21424+} 21425+ 21426+static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 21427+ uint8_t *vdst, int width, int height, int lumStride, 21428+ int chromStride, int srcStride, int32_t *rgb2yuv, 21429+ const uint8_t x[9]) 21430+{ 21431+ int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]]; 21432+ int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]]; 21433+ int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]]; 21434+ int y; 21435+ const int chromWidth = width >> 1; 21436+ 21437+ for (y = 0; y < height; y += 2) { 21438+ int i; 21439+ for (i = 0; i < chromWidth; i++) { 21440+ unsigned int b = src[8 * i + 2]; 21441+ unsigned int g = src[8 * i + 1]; 21442+ unsigned int r = src[8 * i + 0]; 21443+ 21444+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; 21445+ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; 21446+ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; 21447+ 21448+ udst[i] = U; 21449+ vdst[i] = V; 21450+ ydst[2 * i] = Y; 21451+ 21452+ b = src[8 * i + 6]; 21453+ g = src[8 * i + 5]; 21454+ r = src[8 * i + 4]; 21455+ 21456+ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; 21457+ ydst[2 * i + 1] = Y; 21458+ } 21459+ if ((width & 1) != 0) { 21460+ unsigned int b = src[8 * i + 2]; 21461+ unsigned int g = src[8 * i + 1]; 21462+ unsigned int r = src[8 * i + 0]; 21463+ 21464+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; 21465+ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; 21466+ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; 21467+ 21468+ udst[i] = U; 21469+ vdst[i] = V; 21470+ ydst[2 * i] = Y; 21471+ } 21472+ ydst += lumStride; 21473+ src += srcStride; 21474+ 21475+ if (y+1 == height) 21476+ break; 21477+ 21478+ for (i = 0; i < chromWidth; i++) { 21479+ unsigned int b = src[8 * i + 2]; 21480+ unsigned int g = src[8 * i + 1]; 21481+ unsigned int r = src[8 * i + 0]; 21482+ 21483+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; 21484+ 21485+ ydst[2 * i] = Y; 21486+ 21487+ b = src[8 * i + 6]; 21488+ g = src[8 * i + 5]; 21489+ r = src[8 * i + 4]; 21490+ 21491+ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; 21492+ ydst[2 * i + 1] = Y; 21493+ } 21494+ if ((width & 1) != 0) { 21495+ unsigned int b = src[8 * i + 2]; 21496+ unsigned int g = src[8 * i + 1]; 21497+ unsigned int r = src[8 * i + 0]; 21498+ 21499+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; 21500+ 21501+ ydst[2 * i] = Y; 21502+ } 21503+ udst += chromStride; 21504+ vdst += chromStride; 21505+ ydst += lumStride; 21506+ src += srcStride; 21507+ } 21508+} 21509+ 21510+static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 21511+ uint8_t *vdst, int width, int height, int lumStride, 21512+ int chromStride, int srcStride, int32_t *rgb2yuv) 21513+{ 21514+ rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); 21515+} 21516+ 21517+static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 21518+ uint8_t *vdst, int width, int height, int lumStride, 21519+ int chromStride, int srcStride, int32_t *rgb2yuv) 21520+{ 21521+ rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); 21522+} 21523+ 21524+// As the general code does no SIMD-like ops simply adding 1 to the src address 21525+// will fix the ignored alpha position 21526+static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 21527+ uint8_t *vdst, int width, int height, int lumStride, 21528+ int chromStride, int srcStride, int32_t *rgb2yuv) 21529+{ 21530+ rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); 21531+} 21532+ 21533+static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, 21534+ uint8_t *vdst, int width, int height, int lumStride, 21535+ int chromStride, int srcStride, int32_t *rgb2yuv) 21536+{ 21537+ rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); 21538+} 21539+ 21540+ 21541 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, 21542 uint8_t *dest, int width, int height, 21543 int src1Stride, int src2Stride, int dstStride) 21544@@ -980,6 +1144,11 @@ static av_cold void rgb2rgb_init_c(void) 21545 yuy2toyv12 = yuy2toyv12_c; 21546 planar2x = planar2x_c; 21547 ff_rgb24toyv12 = ff_rgb24toyv12_c; 21548+ ff_bgr24toyv12 = ff_bgr24toyv12_c; 21549+ ff_rgbxtoyv12 = ff_rgbxtoyv12_c; 21550+ ff_bgrxtoyv12 = ff_bgrxtoyv12_c; 21551+ ff_xrgbtoyv12 = ff_xrgbtoyv12_c; 21552+ ff_xbgrtoyv12 = ff_xbgrtoyv12_c; 21553 interleaveBytes = interleaveBytes_c; 21554 deinterleaveBytes = deinterleaveBytes_c; 21555 vu9_to_vu12 = vu9_to_vu12_c; 21556--- a/libswscale/swscale_unscaled.c 21557+++ b/libswscale/swscale_unscaled.c 21558@@ -1654,6 +1654,91 @@ static int bgr24ToYv12Wrapper(SwsContext 21559 return srcSliceH; 21560 } 21561 21562+static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], 21563+ int srcStride[], int srcSliceY, int srcSliceH, 21564+ uint8_t *dst[], int dstStride[]) 21565+{ 21566+ ff_bgr24toyv12( 21567+ src[0], 21568+ dst[0] + srcSliceY * dstStride[0], 21569+ dst[1] + (srcSliceY >> 1) * dstStride[1], 21570+ dst[2] + (srcSliceY >> 1) * dstStride[2], 21571+ c->srcW, srcSliceH, 21572+ dstStride[0], dstStride[1], srcStride[0], 21573+ c->input_rgb2yuv_table); 21574+ if (dst[3]) 21575+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); 21576+ return srcSliceH; 21577+} 21578+ 21579+static int bgrxToYv12Wrapper(SwsContext *c, const uint8_t *src[], 21580+ int srcStride[], int srcSliceY, int srcSliceH, 21581+ uint8_t *dst[], int dstStride[]) 21582+{ 21583+ ff_bgrxtoyv12( 21584+ src[0], 21585+ dst[0] + srcSliceY * dstStride[0], 21586+ dst[1] + (srcSliceY >> 1) * dstStride[1], 21587+ dst[2] + (srcSliceY >> 1) * dstStride[2], 21588+ c->srcW, srcSliceH, 21589+ dstStride[0], dstStride[1], srcStride[0], 21590+ c->input_rgb2yuv_table); 21591+ if (dst[3]) 21592+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); 21593+ return srcSliceH; 21594+} 21595+ 21596+static int rgbxToYv12Wrapper(SwsContext *c, const uint8_t *src[], 21597+ int srcStride[], int srcSliceY, int srcSliceH, 21598+ uint8_t *dst[], int dstStride[]) 21599+{ 21600+ ff_rgbxtoyv12( 21601+ src[0], 21602+ dst[0] + srcSliceY * dstStride[0], 21603+ dst[1] + (srcSliceY >> 1) * dstStride[1], 21604+ dst[2] + (srcSliceY >> 1) * dstStride[2], 21605+ c->srcW, srcSliceH, 21606+ dstStride[0], dstStride[1], srcStride[0], 21607+ c->input_rgb2yuv_table); 21608+ if (dst[3]) 21609+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); 21610+ return srcSliceH; 21611+} 21612+ 21613+static int xbgrToYv12Wrapper(SwsContext *c, const uint8_t *src[], 21614+ int srcStride[], int srcSliceY, int srcSliceH, 21615+ uint8_t *dst[], int dstStride[]) 21616+{ 21617+ ff_xbgrtoyv12( 21618+ src[0], 21619+ dst[0] + srcSliceY * dstStride[0], 21620+ dst[1] + (srcSliceY >> 1) * dstStride[1], 21621+ dst[2] + (srcSliceY >> 1) * dstStride[2], 21622+ c->srcW, srcSliceH, 21623+ dstStride[0], dstStride[1], srcStride[0], 21624+ c->input_rgb2yuv_table); 21625+ if (dst[3]) 21626+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); 21627+ return srcSliceH; 21628+} 21629+ 21630+static int xrgbToYv12Wrapper(SwsContext *c, const uint8_t *src[], 21631+ int srcStride[], int srcSliceY, int srcSliceH, 21632+ uint8_t *dst[], int dstStride[]) 21633+{ 21634+ ff_xrgbtoyv12( 21635+ src[0], 21636+ dst[0] + srcSliceY * dstStride[0], 21637+ dst[1] + (srcSliceY >> 1) * dstStride[1], 21638+ dst[2] + (srcSliceY >> 1) * dstStride[2], 21639+ c->srcW, srcSliceH, 21640+ dstStride[0], dstStride[1], srcStride[0], 21641+ c->input_rgb2yuv_table); 21642+ if (dst[3]) 21643+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); 21644+ return srcSliceH; 21645+} 21646+ 21647 static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[], 21648 int srcStride[], int srcSliceY, int srcSliceH, 21649 uint8_t *dst[], int dstStride[]) 21650@@ -1977,7 +2062,6 @@ void ff_get_unscaled_swscale(SwsContext 21651 const enum AVPixelFormat dstFormat = c->dstFormat; 21652 const int flags = c->flags; 21653 const int dstH = c->dstH; 21654- const int dstW = c->dstW; 21655 int needsDither; 21656 21657 needsDither = isAnyRGB(dstFormat) && 21658@@ -2035,8 +2119,34 @@ void ff_get_unscaled_swscale(SwsContext 21659 /* bgr24toYV12 */ 21660 if (srcFormat == AV_PIX_FMT_BGR24 && 21661 (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && 21662- !(flags & SWS_ACCURATE_RND) && !(dstW&1)) 21663+ !(flags & SWS_ACCURATE_RND)) 21664 c->convert_unscaled = bgr24ToYv12Wrapper; 21665+ /* rgb24toYV12 */ 21666+ if (srcFormat == AV_PIX_FMT_RGB24 && 21667+ (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && 21668+ !(flags & SWS_ACCURATE_RND)) 21669+ c->convert_unscaled = rgb24ToYv12Wrapper; 21670+ 21671+ /* bgrxtoYV12 */ 21672+ if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) || 21673+ (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && 21674+ !(flags & SWS_ACCURATE_RND)) 21675+ c->convert_unscaled = bgrxToYv12Wrapper; 21676+ /* rgbx24toYV12 */ 21677+ if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) || 21678+ (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && 21679+ !(flags & SWS_ACCURATE_RND)) 21680+ c->convert_unscaled = rgbxToYv12Wrapper; 21681+ /* xbgrtoYV12 */ 21682+ if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) || 21683+ (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && 21684+ !(flags & SWS_ACCURATE_RND)) 21685+ c->convert_unscaled = xbgrToYv12Wrapper; 21686+ /* xrgb24toYV12 */ 21687+ if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) || 21688+ (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && 21689+ !(flags & SWS_ACCURATE_RND)) 21690+ c->convert_unscaled = xrgbToYv12Wrapper; 21691 21692 /* RGB/BGR -> RGB/BGR (no dither needed forms) */ 21693 if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c) 21694--- a/libswscale/tests/swscale.c 21695+++ b/libswscale/tests/swscale.c 21696@@ -23,6 +23,7 @@ 21697 #include <string.h> 21698 #include <inttypes.h> 21699 #include <stdarg.h> 21700+#include <time.h> 21701 21702 #undef HAVE_AV_CONFIG_H 21703 #include "libavutil/cpu.h" 21704@@ -78,6 +79,15 @@ struct Results { 21705 uint32_t crc; 21706 }; 21707 21708+static int time_rep = 0; 21709+ 21710+static uint64_t utime(void) 21711+{ 21712+ struct timespec ts; 21713+ clock_gettime(CLOCK_MONOTONIC, &ts); 21714+ return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000; 21715+} 21716+ 21717 // test by ref -> src -> dst -> out & compare out against ref 21718 // ref & out are YV12 21719 static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h, 21720@@ -174,7 +184,7 @@ static int doTest(const uint8_t * const 21721 goto end; 21722 } 21723 21724- printf(" %s %dx%d -> %s %3dx%3d flags=%2d", 21725+ printf(" %s %4dx%4d -> %s %4dx%4d flags=%2d", 21726 desc_src->name, srcW, srcH, 21727 desc_dst->name, dstW, dstH, 21728 flags); 21729@@ -182,6 +192,17 @@ static int doTest(const uint8_t * const 21730 21731 sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); 21732 21733+ if (time_rep != 0) 21734+ { 21735+ const uint64_t now = utime(); 21736+ uint64_t done; 21737+ for (i = 1; i != time_rep; ++i) { 21738+ sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); 21739+ } 21740+ done = utime(); 21741+ printf(" T=%7"PRId64"us ", done-now); 21742+ } 21743+ 21744 for (i = 0; i < 4 && dstStride[i]; i++) 21745 crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], 21746 dstStride[i] * dstH); 21747@@ -355,56 +376,78 @@ static int fileTest(const uint8_t * cons 21748 return 0; 21749 } 21750 21751-#define W 96 21752-#define H 96 21753- 21754 int main(int argc, char **argv) 21755 { 21756+ unsigned int W = 96; 21757+ unsigned int H = 96; 21758+ unsigned int W2; 21759+ unsigned int H2; 21760+ unsigned int S; 21761 enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE; 21762 enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE; 21763- uint8_t *rgb_data = av_malloc(W * H * 4); 21764- const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL }; 21765- int rgb_stride[4] = { 4 * W, 0, 0, 0 }; 21766- uint8_t *data = av_malloc(4 * W * H); 21767- const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 }; 21768- int stride[4] = { W, W, W, W }; 21769 int x, y; 21770 struct SwsContext *sws; 21771 AVLFG rand; 21772 int res = -1; 21773 int i; 21774 FILE *fp = NULL; 21775- 21776- if (!rgb_data || !data) 21777- return -1; 21778+ uint8_t *rgb_data; 21779+ uint8_t * rgb_src[4] = { NULL }; 21780+ int rgb_stride[4] = { 0 }; 21781+ uint8_t *data; 21782+ uint8_t * src[4] = { NULL }; 21783+ int stride[4] = { 0 }; 21784 21785 for (i = 1; i < argc; i += 2) { 21786+ const char * const arg2 = argv[i+1]; 21787+ 21788 if (argv[i][0] != '-' || i + 1 == argc) 21789 goto bad_option; 21790 if (!strcmp(argv[i], "-ref")) { 21791- fp = fopen(argv[i + 1], "r"); 21792+ fp = fopen(arg2, "r"); 21793 if (!fp) { 21794- fprintf(stderr, "could not open '%s'\n", argv[i + 1]); 21795+ fprintf(stderr, "could not open '%s'\n", arg2); 21796 goto error; 21797 } 21798 } else if (!strcmp(argv[i], "-cpuflags")) { 21799 unsigned flags = av_get_cpu_flags(); 21800- int ret = av_parse_cpu_caps(&flags, argv[i + 1]); 21801+ int ret = av_parse_cpu_caps(&flags, arg2); 21802 if (ret < 0) { 21803- fprintf(stderr, "invalid cpu flags %s\n", argv[i + 1]); 21804+ fprintf(stderr, "invalid cpu flags %s\n", arg2); 21805 return ret; 21806 } 21807 av_force_cpu_flags(flags); 21808 } else if (!strcmp(argv[i], "-src")) { 21809- srcFormat = av_get_pix_fmt(argv[i + 1]); 21810+ srcFormat = av_get_pix_fmt(arg2); 21811 if (srcFormat == AV_PIX_FMT_NONE) { 21812- fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); 21813+ fprintf(stderr, "invalid pixel format %s\n", arg2); 21814 return -1; 21815 } 21816 } else if (!strcmp(argv[i], "-dst")) { 21817- dstFormat = av_get_pix_fmt(argv[i + 1]); 21818+ dstFormat = av_get_pix_fmt(arg2); 21819 if (dstFormat == AV_PIX_FMT_NONE) { 21820- fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); 21821+ fprintf(stderr, "invalid pixel format %s\n", arg2); 21822+ return -1; 21823+ } 21824+ } else if (!strcmp(argv[i], "-w")) { 21825+ char * p = NULL; 21826+ W = strtoul(arg2, &p, 0); 21827+ if (!W || *p) { 21828+ fprintf(stderr, "bad width %s\n", arg2); 21829+ return -1; 21830+ } 21831+ } else if (!strcmp(argv[i], "-h")) { 21832+ char * p = NULL; 21833+ H = strtoul(arg2, &p, 0); 21834+ if (!H || *p) { 21835+ fprintf(stderr, "bad height '%s'\n", arg2); 21836+ return -1; 21837+ } 21838+ } else if (!strcmp(argv[i], "-t")) { 21839+ char * p = NULL; 21840+ time_rep = (int)strtol(arg2, &p, 0); 21841+ if (*p) { 21842+ fprintf(stderr, "bad time repetitions '%s'\n", arg2); 21843 return -1; 21844 } 21845 } else { 21846@@ -414,15 +457,34 @@ bad_option: 21847 } 21848 } 21849 21850- sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H, 21851+ S = (W + 15) & ~15; 21852+ rgb_data = av_mallocz(S * H * 4); 21853+ rgb_src[0] = rgb_data; 21854+ rgb_stride[0] = 4 * S; 21855+ data = av_mallocz(4 * S * H); 21856+ src[0] = data; 21857+ src[1] = data + S * H; 21858+ src[2] = data + S * H * 2; 21859+ src[3] = data + S * H * 3; 21860+ stride[0] = S; 21861+ stride[1] = S; 21862+ stride[2] = S; 21863+ stride[3] = S; 21864+ H2 = H < 96 ? 8 : H / 12; 21865+ W2 = W < 96 ? 8 : W / 12; 21866+ 21867+ if (!rgb_data || !data) 21868+ return -1; 21869+ 21870+ sws = sws_getContext(W2, H2, AV_PIX_FMT_RGB32, W, H, 21871 AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); 21872 21873 av_lfg_init(&rand, 1); 21874 21875 for (y = 0; y < H; y++) 21876 for (x = 0; x < W * 4; x++) 21877- rgb_data[ x + y * 4 * W] = av_lfg_get(&rand); 21878- res = sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride); 21879+ rgb_data[ x + y * 4 * S] = av_lfg_get(&rand); 21880+ res = sws_scale(sws, (const uint8_t * const *)rgb_src, rgb_stride, 0, H2, (uint8_t * const *) src, stride); 21881 if (res < 0 || res != H) { 21882 res = -1; 21883 goto error; 21884@@ -431,10 +493,10 @@ bad_option: 21885 av_free(rgb_data); 21886 21887 if(fp) { 21888- res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat); 21889+ res = fileTest((const uint8_t * const *)src, stride, W, H, fp, srcFormat, dstFormat); 21890 fclose(fp); 21891 } else { 21892- selfTest(src, stride, W, H, srcFormat, dstFormat); 21893+ selfTest((const uint8_t * const *)src, stride, W, H, srcFormat, dstFormat); 21894 res = 0; 21895 } 21896 error: 21897--- /dev/null 21898+++ b/pi-util/BUILD.txt 21899@@ -0,0 +1,67 @@ 21900+Building Pi FFmpeg 21901+================== 21902+ 21903+Current only building on a Pi is supported. 21904+This builds ffmpeg the way I've tested it 21905+ 21906+Get all dependencies - the current package dependencies are good enough 21907+ 21908+$ sudo apt-get build-dep ffmpeg 21909+ 21910+Configure using the pi-util/conf_native.sh script 21911+------------------------------------------------- 21912+ 21913+This sets the normal release options and creates an ouutput dir to build into 21914+The directory name will depend on system and options but will be under out/ 21915+ 21916+There are a few choices here 21917+ --mmal build including the legacy mmal-based decoders and zero-copy code 21918+ this requires appropriate libraries which currently will exist for 21919+ armv7 but not arm64 21920+ --noshared 21921+ Build a static image rather than a shared library one. Static is 21922+ easier for testing as there is no need to worry about library 21923+ paths being confused and therefore running the wrong code, Shared 21924+ is what is needed, in most cases, when building for use by other 21925+ programs. 21926+ --usr Set install dir to /usr (i.e. system default) rather than in 21927+ <builddir>/install 21928+ 21929+So for a static build 21930+--------------------- 21931+ 21932+$ pi-util/conf_native.sh --noshared 21933+ 21934+$ make -j8 -C out/<wherever the script said it was building to> 21935+ 21936+You can now run ffmpeg directly from where it was built 21937+ 21938+For a shared build 21939+------------------ 21940+ 21941+There are two choices here 21942+ 21943+$ pi-util/conf_native.sh 21944+$ make -j8 -C out/<builddir> install 21945+ 21946+This sets the install prefix to <builddir>/install and is probably what you 21947+want if you don't want to overwrite the system files. 21948+ 21949+You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was 21950+built. You can copy the contents of <build dir>/install to /usr and that mostly 21951+works. The only downside is that paths in pkgconfig end up being set to the 21952+install directory in your build directory which may be less than ideal when 21953+building other packages. 21954+ 21955+The alternative if you just want to replace the system libs is: 21956+ 21957+$ pi-util/conf_native.sh --usr 21958+$ make -j8 -C out/<builddir> 21959+$ sudo pi-util/clean_usr_libs.sh 21960+$ sudo make -j8 -C out/<builddir> install 21961+ 21962+The clean_usr_libs.sh step wipes any existing libs & includes (for all 21963+architectures) from the system which helps avoid confusion when running other 21964+progs as you can be sure you're not running old code which is unfortunately 21965+easy to do otherwise. 21966+ 21967--- /dev/null 21968+++ b/pi-util/NOTES.txt 21969@@ -0,0 +1,69 @@ 21970+Notes on the hevc_rpi decoder & associated support code 21971+------------------------------------------------------- 21972+ 21973+There are 3 main parts to the existing code: 21974+ 21975+1) The decoder - this is all in libavcodec as rpi_hevc*. 21976+ 21977+2) A few filters to deal with Sand frames and a small patch to 21978+automatically select the sand->i420 converter when required. 21979+ 21980+3) A kludge in ffmpeg.c to display the decoded video. This could & should 21981+be converted into a proper ffmpeg display module. 21982+ 21983+ 21984+Decoder 21985+------- 21986+ 21987+The decoder is a modified version of the existing ffmpeg hevc decoder. 21988+Generally it is ~100% faster than the existing ffmpeg hevc s/w decoder. 21989+More complex bitstreams can be up to ~200% faster but particularly easy 21990+streams can cut its advantage down to ~50%. This means that a Pi3+ can 21991+display nearly all 8-bit 1080p30 streams and with some overclocking it can 21992+display most lower bitrate 10-bit 1080p30 streams - this latter case is 21993+not helped by the requirement to downsample to 8-bit before display on a 21994+Pi. 21995+ 21996+It has had co-processor offload added for inter-pred and large block 21997+residual transform. Various parts have had optimized ARM NEON assembler 21998+added and the existing ARM asm sections have been profiled and 21999+re-optimized for A53. The main C code has been substantially reworked at 22000+its lower levels in an attempt to optimize it and minimize memory 22001+bandwidth. To some extent code paths that deal with frame types that it 22002+doesn't support have been pruned. 22003+ 22004+It outputs frames in Broadcom Sand format. This is a somewhat annoying 22005+layout that doesn't fit into ffmpegs standard frame descriptions. It has 22006+vertical stripes of 128 horizontal pixels (64 in 10 bit forms) with Y for 22007+the stripe followed by interleaved U & V, that is then followed by the Y 22008+for the next stripe, etc. The final stripe is always padded to 22009+stripe-width. This is used in an attempt to help with cache locality and 22010+cut down on the number of dram bank switches. It is annoying to use for 22011+inter-pred with conventional processing but the way the Pi QPU (which is 22012+used for inter-pred) works means that it has negligible downsides here and 22013+the improved memory performance exceeds the overhead of the increased 22014+complexity in the rest of the code. 22015+ 22016+Frames must be allocated out of GPU memory (as otherwise they can't be 22017+accessed by the co-processors). Utility functions (in rpi_zc.c) have been 22018+written to make this easier. As the frames are already in GPU memory they 22019+can be displayed by the Pi h/w without any further copying. 22020+ 22021+ 22022+Known non-features 22023+------------------ 22024+ 22025+Frame allocation should probably be done in some other way in order to fit 22026+into the standard framework better. 22027+ 22028+Sand frames are currently declared as software frames, there is an 22029+argument that they should be hardware frames but they aren't really. 22030+ 22031+There must be a better way of auto-selecting the hevc_rpi decoder over the 22032+normal s/w hevc decoder, but I became confused by the existing h/w 22033+acceleration framework and what I wanted to do didn't seem to fit in 22034+neatly. 22035+ 22036+Display should be a proper device rather than a kludge in ffmpeg.c 22037+ 22038+ 22039--- /dev/null 22040+++ b/pi-util/TESTMESA.txt 22041@@ -0,0 +1,82 @@ 22042+# Setup & Build instructions for testing Argon30 mesa support (on Pi4) 22043+ 22044+# These assume that the drm_mmal test for Sand8 has been built on this Pi 22045+# as build relies on many of the same files 22046+ 22047+# 1st get everything required to build ffmpeg 22048+# If sources aren't already enabled on your Pi then enable them 22049+sudo su 22050+sed "s/#deb-src/deb-src/" /etc/apt/sources.list > /tmp/sources.list 22051+sed "s/#deb-src/deb-src/" /etc/apt/sources.list.d/raspi.list > /tmp/raspi.list 22052+mv /tmp/sources.list /etc/apt/ 22053+mv /tmp/raspi.list /etc/apt/sources.list.d/ 22054+apt update 22055+ 22056+# Get dependancies 22057+sudo apt build-dep ffmpeg 22058+ 22059+sudo apt install meson libepoxy-dev libxcb-dri3-dev libxcb1-dev libx11-dev libx11-xcb-dev libdrm-dev 22060+ 22061+# Enable H265 V4L2 request decoder 22062+sudo su 22063+echo dtoverlay=rpivid-v4l2 >> /boot/config.txt 22064+# You may also want to add more CMA if you are going to try 4k videos 22065+# Change the dtoverlay=vc4-fkms-v3d line in config.txt to read 22066+# dtoverlay=vc4-fkms-v3d,cma-512 22067+reboot 22068+# Check it has turned up 22069+ls -la /dev/video* 22070+# This should include video19 22071+# crw-rw----+ 1 root video 81, 7 Aug 4 17:25 /dev/video19 22072+ 22073+# Currently on the Pi the linux headers from the debian distro don't match 22074+# the kernel that we ship and we need to update them - hopefully this step 22075+# will be unneeded in the future 22076+sudo apt install git bc bison flex libssl-dev make 22077+git clone --depth=1 https://github.com/raspberrypi/linux --branch rpi-5.10.y 22078+cd linux 22079+KERNEL=kernel7l 22080+make bcm2711_defconfig 22081+make headers_install 22082+sudo cp -r usr/include/linux /usr/include 22083+cd .. 22084+ 22085+# Config - this builds a staticly linked ffmpeg which is easier for testing 22086+pi-util/conf_native.sh --noshared 22087+ 22088+# Build (this is a bit dull) 22089+# If you want to poke the source the libavdevice/egl_vout.c contains the 22090+# output code - 22091+cd out/armv7-static-rel 22092+ 22093+# Check that you have actually configured V4L2 request 22094+grep HEVC_V4L2REQUEST config.h 22095+# You are hoping for 22096+# #define CONFIG_HEVC_V4L2REQUEST_HWACCEL 1 22097+# if you get 0 then the config has failed 22098+ 22099+make -j6 22100+ 22101+# Grab test streams 22102+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-h264.mkv 22103+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc.mkv 22104+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc-10bit.mkv 22105+ 22106+# Test i420 output (works currently) 22107+./ffmpeg -no_cvt_hw -vcodec h264_v4l2m2m -i jellyfish-3-mbps-hd-h264.mkv -f vout_egl - 22108+ 22109+# Test Sand8 output - doesn't currently work but should once you have 22110+# Sand8 working in drm_mmal. I can't guarantee that this will work as 22111+# I can't test this path with a known working format, but the debug looks 22112+# good. If this doesn't work & drm_mmal does with sand8 then come back to me 22113+# The "show_all 1" forces vout to display every frame otherwise it drops any 22114+# frame that would cause it to block 22115+./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc.mkv -show_all 1 -f vout_egl - 22116+ 22117+# Test Sand30 - doesn't currently work 22118+# (Beware that when FFmpeg errors out it often leaves your teminal window 22119+# in a state where you need to reset it) 22120+./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc-10bit.mkv -f vout_egl - 22121+ 22122+ 22123+ 22124--- /dev/null 22125+++ b/pi-util/clean_usr_libs.sh 22126@@ -0,0 +1,42 @@ 22127+set -e 22128+U=/usr/include/arm-linux-gnueabihf 22129+rm -rf $U/libavcodec 22130+rm -rf $U/libavdevice 22131+rm -rf $U/libavfilter 22132+rm -rf $U/libavformat 22133+rm -rf $U/libavutil 22134+rm -rf $U/libswresample 22135+rm -rf $U/libswscale 22136+U=/usr/include/aarch64-linux-gnu 22137+rm -rf $U/libavcodec 22138+rm -rf $U/libavdevice 22139+rm -rf $U/libavfilter 22140+rm -rf $U/libavformat 22141+rm -rf $U/libavutil 22142+rm -rf $U/libswresample 22143+rm -rf $U/libswscale 22144+U=/usr/lib/arm-linux-gnueabihf 22145+rm -f $U/libavcodec.* 22146+rm -f $U/libavdevice.* 22147+rm -f $U/libavfilter.* 22148+rm -f $U/libavformat.* 22149+rm -f $U/libavutil.* 22150+rm -f $U/libswresample.* 22151+rm -f $U/libswscale.* 22152+U=/usr/lib/arm-linux-gnueabihf/neon/vfp 22153+rm -f $U/libavcodec.* 22154+rm -f $U/libavdevice.* 22155+rm -f $U/libavfilter.* 22156+rm -f $U/libavformat.* 22157+rm -f $U/libavutil.* 22158+rm -f $U/libswresample.* 22159+rm -f $U/libswscale.* 22160+U=/usr/lib/aarch64-linux-gnu 22161+rm -f $U/libavcodec.* 22162+rm -f $U/libavdevice.* 22163+rm -f $U/libavfilter.* 22164+rm -f $U/libavformat.* 22165+rm -f $U/libavutil.* 22166+rm -f $U/libswresample.* 22167+rm -f $U/libswscale.* 22168+ 22169--- /dev/null 22170+++ b/pi-util/conf_arm64_native.sh 22171@@ -0,0 +1,45 @@ 22172+echo "Configure for ARM64 native build" 22173+ 22174+#RPI_KEEPS="-save-temps=obj" 22175+ 22176+SHARED_LIBS="--enable-shared" 22177+if [ "$1" == "--noshared" ]; then 22178+ SHARED_LIBS="--disable-shared" 22179+ echo Static libs 22180+ OUT=out/arm64-static-rel 22181+else 22182+ echo Shared libs 22183+ OUT=out/arm64-shared-rel 22184+fi 22185+ 22186+mkdir -p $OUT 22187+cd $OUT 22188+ 22189+A=aarch64-linux-gnu 22190+USR_PREFIX=`pwd`/install 22191+LIB_PREFIX=$USR_PREFIX/lib/$A 22192+INC_PREFIX=$USR_PREFIX/include/$A 22193+ 22194+../../configure \ 22195+ --prefix=$USR_PREFIX\ 22196+ --libdir=$LIB_PREFIX\ 22197+ --incdir=$INC_PREFIX\ 22198+ --disable-stripping\ 22199+ --disable-thumb\ 22200+ --disable-mmal\ 22201+ --enable-sand\ 22202+ --enable-v4l2-request\ 22203+ --enable-libdrm\ 22204+ --enable-epoxy\ 22205+ --enable-libudev\ 22206+ --enable-vout-drm\ 22207+ --enable-vout-egl\ 22208+ $SHARED_LIBS\ 22209+ --extra-cflags="-ggdb" 22210+ 22211+# --enable-decoder=hevc_rpi\ 22212+# --enable-extra-warnings\ 22213+# --arch=armv71\ 22214+ 22215+# gcc option for getting asm listing 22216+# -Wa,-ahls 22217--- /dev/null 22218+++ b/pi-util/conf_h265.2016.csv 22219@@ -0,0 +1,195 @@ 22220+1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5,8 22221+1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5,8 22222+1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5,8 22223+1,HEVC_v1/AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5,8 22224+1,HEVC_v1/AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5,8 22225+1,HEVC_v1/AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5,8 22226+1,HEVC_v1/AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5,8 22227+1,HEVC_v1/AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5,8 22228+1,HEVC_v1/BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5,8 22229+1,HEVC_v1/CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5,8 22230+1,HEVC_v1/CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5,8 22231+1,HEVC_v1/CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5,8 22232+1,HEVC_v1/CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5,8 22233+1,HEVC_v1/CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5,8 22234+1,HEVC_v1/CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5,8 22235+1,HEVC_v1/CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5,8 22236+1,HEVC_v1/CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5,8 22237+1,HEVC_v1/CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5,8 22238+1,HEVC_v1/cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5,8 22239+1,HEVC_v1/CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5,8 22240+1,HEVC_v1/CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5,8 22241+1,HEVC_v1/DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5,10 22242+1,HEVC_v1/DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5,8 22243+1,HEVC_v1/DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5,8 22244+1,HEVC_v1/DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5,8 22245+1,HEVC_v1/DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5,8 22246+1,HEVC_v1/DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5,8 22247+1,HEVC_v1/DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5,8 22248+1,HEVC_v1/DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5,8 22249+1,HEVC_v1/DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5,8 22250+1,HEVC_v1/DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5,8 22251+1,HEVC_v1/DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5,8 22252+1,HEVC_v1/DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5,8 22253+1,HEVC_v1/DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5,8 22254+1,HEVC_v1/DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5,8 22255+1,HEVC_v1/ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5,8 22256+1,HEVC_v1/ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5,8 22257+1,HEVC_v1/ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5,8 22258+1,HEVC_v1/EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5,8 22259+1,HEVC_v1/FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5,8 22260+1,HEVC_v1/HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5,8 22261+1,HEVC_v1/INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5,8 22262+1,HEVC_v1/INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5,10 22263+1,HEVC_v1/ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5,8 22264+1,HEVC_v1/ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5,8 22265+1,HEVC_v1/ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5,8 22266+1,HEVC_v1/ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5,8 22267+1,HEVC_v1/ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5,8 22268+1,HEVC_v1/IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5,8 22269+1,HEVC_v1/IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5,8 22270+1,HEVC_v1/IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5,8 22271+1,HEVC_v1/LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5,8 22272+1,HEVC_v1/LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5,8 22273+1,HEVC_v1/LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5,8 22274+1,HEVC_v1/MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5,8 22275+1,HEVC_v1/MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5,8 22276+1,HEVC_v1/MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5,8 22277+1,HEVC_v1/MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5,8 22278+1,HEVC_v1/MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5,8 22279+1,HEVC_v1/MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5,8 22280+1,HEVC_v1/MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5,8 22281+1,HEVC_v1/MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5,8 22282+1,HEVC_v1/MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5,8 22283+1,HEVC_v1/MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5,8 22284+1,HEVC_v1/MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5,8 22285+1,HEVC_v1/MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5,8 22286+1,HEVC_v1/MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5,8 22287+1,HEVC_v1/NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5,8 22288+1,HEVC_v1/NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5,8 22289+1,HEVC_v1/NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5,8 22290+1,HEVC_v1/OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5,8 22291+1,HEVC_v1/OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5,8 22292+1,HEVC_v1/OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5,8 22293+1,HEVC_v1/PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5,8 22294+1,HEVC_v1/PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5,8 22295+1,HEVC_v1/PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5,8 22296+1,HEVC_v1/PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5,8 22297+1,HEVC_v1/PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5,8 22298+1,HEVC_v1/PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5,8 22299+1,HEVC_v1/PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5,8 22300+1,HEVC_v1/PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5,8 22301+1,HEVC_v1/PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5,8 22302+1,HEVC_v1/POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5,8 22303+1,HEVC_v1/PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5,8 22304+1,HEVC_v1/PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5,8 22305+1,HEVC_v1/RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5,8 22306+1,HEVC_v1/RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5,8 22307+1,HEVC_v1/RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5,8 22308+1,HEVC_v1/RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5,8 22309+1,HEVC_v1/RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5,8 22310+1,HEVC_v1/RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5,8 22311+1,HEVC_v1/RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5,8 22312+1,HEVC_v1/RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5,8 22313+1,HEVC_v1/RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5,8 22314+1,HEVC_v1/RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5,8 22315+1,HEVC_v1/RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5,8 22316+1,HEVC_v1/RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5,8 22317+1,HEVC_v1/RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5,8 22318+1,HEVC_v1/RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5,8 22319+1,HEVC_v1/RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5,8 22320+1,HEVC_v1/RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5,8 22321+1,HEVC_v1/RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5,8 22322+1,HEVC_v1/SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5,8 22323+1,HEVC_v1/SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5,8 22324+1,HEVC_v1/SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5,8 22325+1,HEVC_v1/SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5,8 22326+1,HEVC_v1/SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5,8 22327+1,HEVC_v1/SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5,8 22328+1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5,8 22329+1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5,8 22330+1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt,8 22331+1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt,8 22332+1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5,8 22333+1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5,8 22334+1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5,8 22335+1,HEVC_v1/SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5,8 22336+1,HEVC_v1/SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5,8 22337+1,HEVC_v1/SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5,8 22338+1,HEVC_v1/SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5,8 22339+1,HEVC_v1/STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5,8 22340+1,HEVC_v1/STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5,8 22341+1,HEVC_v1/TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5,8 22342+1,HEVC_v1/TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5,8 22343+1,HEVC_v1/TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5,8 22344+1,HEVC_v1/TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5,8 22345+1,HEVC_v1/TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5,8 22346+1,HEVC_v1/TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5,8 22347+3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10 22348+1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8 22349+1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8 22350+3,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8 22351+1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10 22352+1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8 22353+1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8 22354+1,HEVC_v1/WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5,10 22355+1,HEVC_v1/WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5,10 22356+1,HEVC_v1/WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5,8 22357+1,HEVC_v1/WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5,10 22358+1,HEVC_v1/WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5,8 22359+1,HEVC_v1/WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5,10 22360+1,HEVC_v1/WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5,8 22361+1,HEVC_v1/WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5,10 22362+1,HEVC_v1/WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5,8 22363+1,HEVC_v1/WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5,10 22364+1,HEVC_v1/WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5,8 22365+1,HEVC_v1/WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5,10 22366+1,HEVC_v1/WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5,8 22367+1,RExt/ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_yuv_2.md5,0 22368+0,RExt/Bitdepth_A_RExt_Sony_1,Bitdepth_A_RExt_Sony_1.bin,md5sum.txt,8 22369+0,RExt/Bitdepth_B_RExt_Sony_1,Bitdepth_B_RExt_Sony_1.bin,md5sum.txt,8 22370+0,RExt/CCP_10bit_RExt_QCOM,CCP_10bit_RExt_QCOM.bin,CCP_10bit_RExt_QCOM_md5sum.txt,10 22371+0,RExt/CCP_12bit_RExt_QCOM,CCP_12bit_RExt_QCOM.bin,CCP_12bit_RExt_QCOM_md5sum.txt,8 22372+0,RExt/CCP_8bit_RExt_QCOM,CCP_8bit_RExt_QCOM.bin,CCP_8bit_RExt_QCOM_md5sum.txt,8 22373+1,RExt/ExplicitRdpcm_A_BBC_1,ExplicitRdpcm_A_BBC_1.bit,md5sum.txt,0 22374+0,RExt/ExplicitRdpcm_B_BBC_2,ExplicitRdpcm_B_BBC_1.bit,md5sum.txt,8 22375+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.md5,10 22376+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.md5,8 22377+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.md5,8 22378+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.md5,8 22379+0,RExt/EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.md5,10 22380+0,RExt/EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.md5,8 22381+0,RExt/EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.md5,8 22382+0,RExt/EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.md5,8 22383+1,RExt/GENERAL_10b_420_RExt_Sony_1,GENERAL_10b_420_RExt_Sony_1.bit,GENERAL_10b_420_RExt_Sony_1.md5,10 22384+1,RExt/GENERAL_10b_422_RExt_Sony_1,GENERAL_10b_422_RExt_Sony_1.bit,GENERAL_10b_422_RExt_Sony_1.md5,0 22385+1,RExt/GENERAL_10b_444_RExt_Sony_2,GENERAL_10b_444_RExt_Sony_2.bit,GENERAL_10b_444_RExt_Sony_2.md5,0 22386+1,RExt/GENERAL_12b_400_RExt_Sony_1,GENERAL_12b_400_RExt_Sony_1.bit,GENERAL_12b_400_RExt_Sony_1.md5,0 22387+1,RExt/GENERAL_12b_420_RExt_Sony_1,GENERAL_12b_420_RExt_Sony_1.bit,GENERAL_12b_420_RExt_Sony_1.md5,0 22388+1,RExt/GENERAL_12b_422_RExt_Sony_1,GENERAL_12b_422_RExt_Sony_1.bit,GENERAL_12b_422_RExt_Sony_1.md5,0 22389+1,RExt/GENERAL_12b_444_RExt_Sony_2,GENERAL_12b_444_RExt_Sony_2.bit,GENERAL_12b_444_RExt_Sony_2.md5,0 22390+0,RExt/GENERAL_16b_400_RExt_Sony_1,GENERAL_16b_400_RExt_Sony_1.bit,GENERAL_16b_400_RExt_Sony_1.md5,0 22391+0,RExt/GENERAL_16b_444_highThroughput_RExt_Sony_2,GENERAL_16b_444_highThroughput_RExt_Sony_2.bit,GENERAL_16b_444_highThroughput_RExt_Sony_2.md5,8 22392+0,RExt/GENERAL_16b_444_RExt_Sony_2,GENERAL_16b_444_RExt_Sony_2.bit,GENERAL_16b_444_RExt_Sony_2.md5,8 22393+1,RExt/GENERAL_8b_400_RExt_Sony_1,GENERAL_8b_400_RExt_Sony_1.bit,GENERAL_8b_400_RExt_Sony_1.md5,0 22394+1,RExt/GENERAL_8b_420_RExt_Sony_1,GENERAL_8b_420_RExt_Sony_1.bit,GENERAL_8b_420_RExt_Sony_1.md5,8 22395+1,RExt/GENERAL_8b_444_RExt_Sony_2,GENERAL_8b_444_RExt_Sony_2.bit,GENERAL_8b_444_RExt_Sony_2.md5,0 22396+1,RExt/IPCM_A_RExt_NEC_2,IPCM_A_RExt_NEC_2.bit,IPCM_A_RExt_NEC_2_yuv.md5,0 22397+1,RExt/IPCM_B_RExt_NEC,IPCM_B_RExt_NEC.bit,IPCM_B_RExt_NEC_yuv.md5,0 22398+1,RExt/Main_422_10_A_RExt_Sony_2,Main_422_10_A_RExt_Sony_2.bin,md5sum.txt,0 22399+1,RExt/Main_422_10_B_RExt_Sony_2,Main_422_10_B_RExt_Sony_2.bin,md5sum.txt,0 22400+1,RExt/PERSIST_RPARAM_A_RExt_Sony_3,PERSIST_RPARAM_A_RExt_Sony_3.bit,PERSIST_RPARAM_A_RExt_Sony_3.md5,0 22401+1,RExt/QMATRIX_A_RExt_Sony_1,QMATRIX_A_RExt_Sony_1.bit,QMATRIX_A_RExt_Sony_1.md5,0 22402+0,RExt/SAO_A_RExt_MediaTek_1,SAO_A_RExt_MediaTek_1.bit,SAO_A_RExt_MediaTek_1.md5, # Runs out of memory - could be fixed,8 22403+0,RExt/TSCTX_10bit_I_RExt_SHARP_1,TSCTX_10bit_I_RExt_SHARP_1.bin,TSCTX_10bit_I_RExt_SHARP_1.md5,10 22404+0,RExt/TSCTX_10bit_RExt_SHARP_1,TSCTX_10bit_RExt_SHARP_1.bin,TSCTX_10bit_RExt_SHARP_1.md5,10 22405+0,RExt/TSCTX_12bit_I_RExt_SHARP_1,TSCTX_12bit_I_RExt_SHARP_1.bin,TSCTX_12bit_I_RExt_SHARP_1.md5,8 22406+0,RExt/TSCTX_12bit_RExt_SHARP_1,TSCTX_12bit_RExt_SHARP_1.bin,TSCTX_12bit_RExt_SHARP_1.md5,8 22407+0,RExt/TSCTX_8bit_I_RExt_SHARP_1,TSCTX_8bit_I_RExt_SHARP_1.bin,TSCTX_8bit_I_RExt_SHARP_1.md5,8 22408+0,RExt/TSCTX_8bit_RExt_SHARP_1,TSCTX_8bit_RExt_SHARP_1.bin,TSCTX_8bit_RExt_SHARP_1.md5,8 22409+0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5,8 22410+1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5,8 22411+1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5,8 22412+1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5,8 22413+1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5,8 22414+1,local/intra_pred_21_laps,intra_pred_21_laps.265,intra_pred_21_laps.md5,8 22415--- /dev/null 22416+++ b/pi-util/conf_h265.2016_HEVC_v1.csv 22417@@ -0,0 +1,147 @@ 22418+1,AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5 22419+1,AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5 22420+1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5 22421+1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5 22422+1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5 22423+1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5 22424+1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5 22425+1,AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5 22426+1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5 22427+1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5 22428+1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5 22429+1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5 22430+1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5 22431+1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5 22432+1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5 22433+1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5 22434+1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5 22435+1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5 22436+1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5 22437+1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5 22438+1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5 22439+1,DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5 22440+1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5 22441+1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5 22442+1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5 22443+1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5 22444+1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5 22445+1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5 22446+1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5 22447+1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5 22448+1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5 22449+1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5 22450+1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5 22451+1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5 22452+1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5 22453+1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5 22454+1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5 22455+1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5 22456+1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5 22457+1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5 22458+1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5 22459+1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5 22460+1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5 22461+1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5 22462+1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5 22463+1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5 22464+1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5 22465+1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5 22466+1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5 22467+1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5 22468+1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5 22469+1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5 22470+1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5 22471+1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5 22472+1,MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5 22473+1,MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5 22474+1,MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5 22475+1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5 22476+1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5 22477+1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5 22478+1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5 22479+1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5 22480+1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5 22481+1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5 22482+1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5 22483+1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5 22484+1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5 22485+1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5 22486+1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5 22487+1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5 22488+1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5 22489+1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5 22490+1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5 22491+1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5 22492+1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5 22493+1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5 22494+1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5 22495+1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5 22496+1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5 22497+1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5 22498+1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5 22499+1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5 22500+1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5 22501+1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5 22502+1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5 22503+1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5 22504+1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5 22505+1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5 22506+1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5 22507+1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5 22508+1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5 22509+1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5 22510+1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5 22511+1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5 22512+1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5 22513+1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5 22514+1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5 22515+1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5 22516+1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5 22517+1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5 22518+1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5 22519+1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5 22520+1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5 22521+1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5 22522+1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5 22523+1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5 22524+1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5 22525+1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5 22526+1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5 22527+1,SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5 22528+2,SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt 22529+2,SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt 22530+1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5 22531+1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5 22532+1,SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5 22533+1,SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5 22534+1,SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5 22535+1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5 22536+1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5 22537+1,STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5 22538+1,STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5 22539+1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5 22540+1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5 22541+1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5 22542+1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5 22543+1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5 22544+1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5 22545+3,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth 22546+1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5 22547+1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5 22548+3,VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ??? 22549+1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5 22550+1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5 22551+1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5 22552+1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5 22553+1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5 22554+1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5 22555+1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5 22556+1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5 22557+1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5 22558+1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5 22559+1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5 22560+1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5 22561+1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5 22562+1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5 22563+1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5 22564+1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5 22565--- /dev/null 22566+++ b/pi-util/conf_h265.csv 22567@@ -0,0 +1,144 @@ 22568+1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.md5 22569+1,AMP_A_Samsung_6,AMP_A_Samsung_6.bin,AMP_A_Samsung_6.md5 22570+1,AMP_B_Samsung_6,AMP_B_Samsung_6.bin,AMP_B_Samsung_6.md5 22571+1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5 22572+1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5 22573+1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5 22574+1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5 22575+1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5 22576+1,AMVP_C_Samsung_6,AMVP_C_Samsung_6.bin,AMVP_C_Samsung_6.md5 22577+1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5 22578+1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5 22579+1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5 22580+1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5 22581+1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5 22582+1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5 22583+1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5 22584+1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5 22585+1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5 22586+1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5 22587+1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5 22588+1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5 22589+1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5 22590+1,DBLK_A_MAIN10_VIXS_3,DBLK_A_MAIN10_VIXS_3.bit,DBLK_A_MAIN10_VIXS_3.md5 22591+1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5 22592+1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5 22593+1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5 22594+1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5 22595+1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5 22596+1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5 22597+1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5 22598+1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5 22599+1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5 22600+1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5 22601+1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5 22602+1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5 22603+1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5 22604+1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5 22605+1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5 22606+1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5 22607+1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5 22608+1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5 22609+1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5 22610+1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5 22611+1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5 22612+1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5 22613+1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5 22614+1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5 22615+1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5 22616+1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5 22617+1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5 22618+1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5 22619+1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5 22620+1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5 22621+1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5 22622+1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5 22623+1,MAXBINS_A_TI_4,MAXBINS_A_TI_4.bit,MAXBINS_A_TI_4.md5 22624+1,MAXBINS_B_TI_4,MAXBINS_B_TI_4.bit,MAXBINS_B_TI_4.md5 22625+1,MAXBINS_C_TI_4,MAXBINS_C_TI_4.bit,MAXBINS_C_TI_4.md5 22626+1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5 22627+1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5 22628+1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5 22629+1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5 22630+1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5 22631+1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5 22632+1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5 22633+1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5 22634+1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5 22635+1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5 22636+1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5 22637+1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5 22638+1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5 22639+1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5 22640+1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5 22641+1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5 22642+1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5 22643+1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5 22644+1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5 22645+1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5 22646+1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5 22647+1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5 22648+1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5 22649+1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5 22650+1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5 22651+1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5 22652+1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5 22653+1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5 22654+1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5 22655+1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5 22656+1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5 22657+1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5 22658+1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5 22659+1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5 22660+1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5 22661+1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5 22662+1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5 22663+1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5 22664+1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5 22665+1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5 22666+1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5 22667+1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5 22668+1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5 22669+1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5 22670+1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5 22671+1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5 22672+1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5 22673+1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5 22674+1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5 22675+1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5 22676+1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5 22677+1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5 22678+1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5 22679+1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5 22680+1,SLIST_A_Sony_4,str.bin,SLIST_A_Sony_4_yuv.md5 22681+1,SLIST_B_Sony_8,str.bin,SLIST_B_Sony_8_yuv.md5 22682+1,SLIST_C_Sony_3,str.bin,SLIST_C_Sony_3_yuv.md5 22683+1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5 22684+1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5 22685+1,STRUCT_A_Samsung_6,STRUCT_A_Samsung_6.bin,STRUCT_A_Samsung_6.md5 22686+1,STRUCT_B_Samsung_6,STRUCT_B_Samsung_6.bin,STRUCT_B_Samsung_6.md5 22687+1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5 22688+1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5 22689+1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5 22690+1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5 22691+1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5 22692+1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5 22693+0,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # Y/C bit depth unmatched 22694+1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5 22695+1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5 22696+1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5 22697+1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5 22698+1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5 22699+1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5 22700+1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5 22701+1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5 22702+1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5 22703+1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5 22704+1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5 22705+1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5 22706+1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5 22707+1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5 22708+1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5 22709+1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5 22710+1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5 22711+1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5 22712--- /dev/null 22713+++ b/pi-util/conf_native.sh 22714@@ -0,0 +1,135 @@ 22715+echo "Configure for native build" 22716+ 22717+FFSRC=`pwd` 22718+MC=`dpkg --print-architecture` 22719+BUILDBASE=$FFSRC/out 22720+ 22721+#RPI_KEEPS="-save-temps=obj" 22722+RPI_KEEPS="" 22723+ 22724+NOSHARED= 22725+MMAL= 22726+USR_PREFIX= 22727+TOOLCHAIN= 22728+R=rel 22729+ 22730+while [ "$1" != "" ] ; do 22731+ case $1 in 22732+ --noshared) 22733+ NOSHARED=1 22734+ ;; 22735+ --mmal) 22736+ MMAL=1 22737+ ;; 22738+ --usr) 22739+ USR_PREFIX=/usr 22740+ ;; 22741+ --tsan) 22742+ TOOLCHAIN="--toolchain=gcc-tsan" 22743+ R=tsan 22744+ ;; 22745+ *) 22746+ echo "Usage $0: [--noshared] [--mmal] [--usr]" 22747+ echo " noshared Build static libs and executable - good for testing" 22748+ echo " mmal Build mmal decoders" 22749+ echo " usr Set install prefix to /usr [default=<build-dir>/install]" 22750+ exit 1 22751+ ;; 22752+ esac 22753+ shift 22754+done 22755+ 22756+ 22757+MCOPTS= 22758+RPI_INCLUDES= 22759+RPI_LIBDIRS= 22760+RPI_DEFINES= 22761+RPI_EXTRALIBS= 22762+ 22763+# uname -m gives kernel type which may not have the same 22764+# 32/64bitness as userspace :-( getconf shoudl provide the answer 22765+# but use uname to check we are on the right processor 22766+MC=`uname -m` 22767+LB=`getconf LONG_BIT` 22768+if [ "$MC" == "armv7l" ] || [ "$MC" == "aarch64" ]; then 22769+ if [ "$LB" == "32" ]; then 22770+ echo "M/C armv7" 22771+ A=arm-linux-gnueabihf 22772+ B=armv7 22773+ MCOPTS="--arch=armv6t2 --cpu=cortex-a7" 22774+ RPI_DEFINES=-mfpu=neon-vfpv4 22775+ elif [ "$LB" == "64" ]; then 22776+ echo "M/C aarch64" 22777+ A=aarch64-linux-gnu 22778+ B=arm64 22779+ else 22780+ echo "Unknown LONG_BIT name: $LB" 22781+ exit 1 22782+ fi 22783+else 22784+ echo "Unknown machine name: $MC" 22785+ exit 1 22786+fi 22787+ 22788+if [ $MMAL ]; then 22789+ RPI_OPT_VC=/opt/vc 22790+ RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux" 22791+ RPI_LIBDIRS="-L$RPI_OPT_VC/lib" 22792+ RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000" 22793+ RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group" 22794+ RPIOPTS="--enable-mmal" 22795+else 22796+ RPIOPTS="--disable-mmal" 22797+fi 22798+ 22799+C=`lsb_release -sc` 22800+V=`cat RELEASE` 22801+ 22802+SHARED_LIBS="--enable-shared" 22803+if [ $NOSHARED ]; then 22804+ SHARED_LIBS="--disable-shared" 22805+ OUT=$BUILDBASE/$B-$C-$V-static-$R 22806+ echo Static libs 22807+else 22808+ echo Shared libs 22809+ OUT=$BUILDBASE/$B-$C-$V-shared-$R 22810+fi 22811+ 22812+if [ ! $USR_PREFIX ]; then 22813+ USR_PREFIX=$OUT/install 22814+fi 22815+LIB_PREFIX=$USR_PREFIX/lib/$A 22816+INC_PREFIX=$USR_PREFIX/include/$A 22817+ 22818+echo Destination directory: $OUT 22819+mkdir -p $OUT 22820+# Nothing under here need worry git - including this .gitignore! 22821+echo "**" > $BUILDBASE/.gitignore 22822+cd $OUT 22823+ 22824+$FFSRC/configure \ 22825+ --prefix=$USR_PREFIX\ 22826+ --libdir=$LIB_PREFIX\ 22827+ --incdir=$INC_PREFIX\ 22828+ $MCOPTS\ 22829+ $TOOLCHAIN\ 22830+ --disable-stripping\ 22831+ --disable-thumb\ 22832+ --enable-sand\ 22833+ --enable-v4l2-request\ 22834+ --enable-libdrm\ 22835+ --enable-vout-egl\ 22836+ --enable-vout-drm\ 22837+ --enable-gpl\ 22838+ $SHARED_LIBS\ 22839+ $RPIOPTS\ 22840+ --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\ 22841+ --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\ 22842+ --extra-ldflags="$RPI_LIBDIRS"\ 22843+ --extra-libs="$RPI_EXTRALIBS"\ 22844+ --extra-version="rpi" 22845+ 22846+echo "Configured into $OUT" 22847+ 22848+# gcc option for getting asm listing 22849+# -Wa,-ahls 22850--- /dev/null 22851+++ b/pi-util/ffconf.py 22852@@ -0,0 +1,215 @@ 22853+#!/usr/bin/env python3 22854+ 22855+import string 22856+import os 22857+import subprocess 22858+import re 22859+import argparse 22860+import sys 22861+import csv 22862+from stat import * 22863+ 22864+CODEC_HEVC_RPI = 1 22865+HWACCEL_RPI = 2 22866+HWACCEL_DRM = 3 22867+HWACCEL_VAAPI = 4 22868+ 22869+def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_exec): 22870+ hwaccel = "" 22871+ if dectype == HWACCEL_RPI: 22872+ hwaccel = "rpi" 22873+ elif dectype == HWACCEL_DRM: 22874+ hwaccel = "drm" 22875+ elif dectype == HWACCEL_VAAPI: 22876+ hwaccel = "vaapi" 22877+ 22878+ pix_fmt = [] 22879+ if pix == "8": 22880+ pix_fmt = ["-pix_fmt", "yuv420p"] 22881+ elif pix == "10": 22882+ pix_fmt = ["-pix_fmt", "yuv420p10le"] 22883+ elif pix == "12": 22884+ pix_fmt = ["-pix_fmt", "yuv420p12le"] 22885+ 22886+ tmp_root = "/tmp" 22887+ 22888+ names = srcname.split('/') 22889+ while len(names) > 1: 22890+ tmp_root = os.path.join(tmp_root, names[0]) 22891+ del names[0] 22892+ name = names[0] 22893+ 22894+ if not os.path.exists(tmp_root): 22895+ os.makedirs(tmp_root) 22896+ 22897+ dec_file = os.path.join(tmp_root, name + ".dec.md5") 22898+ try: 22899+ os.remove(dec_file) 22900+ except: 22901+ pass 22902+ 22903+ flog = open(os.path.join(tmp_root, name + ".log"), "wt") 22904+ 22905+ ffargs = [ffmpeg_exec, "-flags", "unaligned", "-hwaccel", hwaccel, "-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] + pix_fmt + ["-f", "md5", dec_file] 22906+ 22907+ # Unaligned needed for cropping conformance 22908+ if hwaccel: 22909+ rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT) 22910+ else: 22911+ rstr = subprocess.call( 22912+ [ffmpeg_exec, "-flags", "unaligned", "-vcodec", vcodec, "-i", os.path.join(fileroot, es_file), "-f", "md5", dec_file], 22913+ stdout=flog, stderr=subprocess.STDOUT) 22914+ 22915+ try: 22916+ m1 = None 22917+ m2 = None 22918+ with open(os.path.join(fileroot, md5_file)) as f: 22919+ for line in f: 22920+ m1 = re.search("[0-9a-f]{32}", line.lower()) 22921+ if m1: 22922+ break 22923+ 22924+ with open(dec_file) as f: 22925+ m2 = re.search("[0-9a-f]{32}", f.readline()) 22926+ except: 22927+ pass 22928+ 22929+ if m1 and m2 and m1.group() == m2.group(): 22930+ print("Match: " + m1.group(), file=flog) 22931+ rv = 0 22932+ elif not m1: 22933+ print("****** Cannot find m1", file=flog) 22934+ rv = 3 22935+ elif not m2: 22936+ print("****** Cannot find m2", file=flog) 22937+ rv = 2 22938+ else: 22939+ print("****** Mismatch: " + m1.group() + " != " + m2.group(), file=flog) 22940+ rv = 1 22941+ flog.close() 22942+ return rv 22943+ 22944+def scandir(root): 22945+ aconf = [] 22946+ ents = os.listdir(root) 22947+ ents.sort(key=str.lower) 22948+ for name in ents: 22949+ test_path = os.path.join(root, name) 22950+ if S_ISDIR(os.stat(test_path).st_mode): 22951+ files = os.listdir(test_path) 22952+ es_file = "?" 22953+ md5_file = "?" 22954+ for f in files: 22955+ (base, ext) = os.path.splitext(f) 22956+ if base[0] == '.': 22957+ pass 22958+ elif ext == ".bit" or ext == ".bin": 22959+ es_file = f 22960+ elif ext == ".md5" or (ext == ".txt" and (base[-4:] == "_md5" or base[-6:] == "md5sum")): 22961+ if md5_file == "?": 22962+ md5_file = f 22963+ elif base[-3:] == "yuv": 22964+ md5_file = f 22965+ aconf.append((1, name, es_file, md5_file)) 22966+ return aconf 22967+ 22968+def runtest(name, tests): 22969+ if not tests: 22970+ return True 22971+ for t in tests: 22972+ if name[0:len(t)] == t or name.find("/" + t) != -1: 22973+ return True 22974+ return False 22975+ 22976+def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec): 22977+ unx_failures = [] 22978+ unx_success = [] 22979+ failures = 0 22980+ successes = 0 22981+ for a in csva: 22982+ exp_test = int(a[0]) 22983+ if (exp_test and runtest(a[1], tests)): 22984+ name = a[1] 22985+ print ("==== ", name, end="") 22986+ sys.stdout.flush() 22987+ 22988+ rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, ffmpeg_exec=ffmpeg_exec) 22989+ if (rv == 0): 22990+ successes += 1 22991+ else: 22992+ failures += 1 22993+ 22994+ if (rv == 0): 22995+ if exp_test == 2: 22996+ print(": * OK *") 22997+ unx_success.append(name) 22998+ else: 22999+ print(": ok") 23000+ elif exp_test == 2 and rv == 1: 23001+ print(": fail") 23002+ elif exp_test == 3 and rv == 2: 23003+ # Call an expected "crash" an abort 23004+ print(": abort") 23005+ else: 23006+ unx_failures.append(name) 23007+ if rv == 1: 23008+ print(": * FAIL *") 23009+ elif (rv == 2) : 23010+ print(": * CRASH *") 23011+ elif (rv == 3) : 23012+ print(": * MD5 MISSING *") 23013+ else : 23014+ print(": * BANG *") 23015+ 23016+ if unx_failures or unx_success: 23017+ print("Unexpected Failures:", unx_failures) 23018+ print("Unexpected Success: ", unx_success) 23019+ else: 23020+ print("All tests normal:", successes, "ok,", failures, "failed") 23021+ 23022+ 23023+class ConfCSVDialect(csv.Dialect): 23024+ delimiter = ',' 23025+ doublequote = True 23026+ lineterminator = '\n' 23027+ quotechar='"' 23028+ quoting = csv.QUOTE_MINIMAL 23029+ skipinitialspace = True 23030+ strict = True 23031+ 23032+if __name__ == '__main__': 23033+ 23034+ argp = argparse.ArgumentParser(description="FFmpeg h265 conformance tester") 23035+ argp.add_argument("tests", nargs='*') 23036+ argp.add_argument("--pi4", action='store_true', help="Force pi4 cmd line") 23037+ argp.add_argument("--drm", action='store_true', help="Force v4l2 drm cmd line") 23038+ argp.add_argument("--vaapi", action='store_true', help="Force vaapi cmd line") 23039+ argp.add_argument("--test_root", default="/opt/conform/h265.2016", help="Root dir for test") 23040+ argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir") 23041+ argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename") 23042+ argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use") 23043+ argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name") 23044+ args = argp.parse_args() 23045+ 23046+ if args.csvgen: 23047+ csv.writer(sys.stdout).writerows(scandir(args.test_root)) 23048+ exit(0) 23049+ 23050+ with open(args.csv, 'rt') as csvfile: 23051+ csva = [a for a in csv.reader(csvfile, ConfCSVDialect())] 23052+ 23053+ dectype = CODEC_HEVC_RPI 23054+ if os.path.exists("/dev/rpivid-hevcmem"): 23055+ dectype = HWACCEL_RPI 23056+ if args.drm or os.path.exists("/sys/module/rpivid_hevc"): 23057+ dectype = HWACCEL_DRM 23058+ 23059+ if args.pi4: 23060+ dectype = HWACCEL_RPI 23061+ elif args.drm: 23062+ dectype = HWACCEL_DRM 23063+ elif args.vaapi: 23064+ dectype = HWACCEL_VAAPI 23065+ 23066+ doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args.ffmpeg) 23067+ 23068--- /dev/null 23069+++ b/pi-util/ffperf.py 23070@@ -0,0 +1,128 @@ 23071+#!/usr/bin/env python3 23072+ 23073+import time 23074+import string 23075+import os 23076+import tempfile 23077+import subprocess 23078+import re 23079+import argparse 23080+import sys 23081+import csv 23082+from stat import * 23083+ 23084+class tstats: 23085+ close_threshold = 0.01 23086+ 23087+ def __init__(self, stats_dict=None): 23088+ if stats_dict != None: 23089+ self.name = stats_dict["name"] 23090+ self.elapsed = float(stats_dict["elapsed"]) 23091+ self.user = float(stats_dict["user"]) 23092+ self.sys = float(stats_dict["sys"]) 23093+ 23094+ def times_str(self): 23095+ ctime = self.sys + self.user 23096+ return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed) 23097+ 23098+ def dict(self): 23099+ return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys} 23100+ 23101+ def is_close(self, other): 23102+ return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold 23103+ 23104+ def __lt__(self, other): 23105+ return self.elapsed < other.elapsed 23106+ def __gt__(self, other): 23107+ return self.elapsed > other.elapsed 23108+ 23109+ def time_file(name, prefix, ffmpeg="./ffmpeg"): 23110+ stats = tstats() 23111+ stats.name = name 23112+ start_time = time.clock_gettime(time.CLOCK_MONOTONIC); 23113+ cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw", 23114+ "-vcodec", "hevc_rpi", 23115+ "-t", "30", "-i", prefix + name, 23116+ "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog); 23117+ pinfo = os.wait4(cproc.pid, 0) 23118+ end_time = time.clock_gettime(time.CLOCK_MONOTONIC); 23119+ stats.elapsed = end_time - start_time 23120+ stats.user = pinfo[2].ru_utime 23121+ stats.sys = pinfo[2].ru_stime 23122+ return stats 23123+ 23124+ 23125+def common_prefix(s1, s2): 23126+ for i in range(min(len(s1),len(s2))): 23127+ if s1[i] != s2[i]: 23128+ return s1[:i] 23129+ return s1[:i+1] 23130+ 23131+def main(): 23132+ global flog 23133+ 23134+ argp = argparse.ArgumentParser(description="FFmpeg performance tester", epilog=""" 23135+To blank the screen before starting use "xdg-screensaver activate" 23136+(For some reason this doesn't seem to work from within python). 23137+""") 23138+ 23139+ argp.add_argument("streams", nargs='*') 23140+ argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename") 23141+ argp.add_argument("--csv_in", help="CSV input filename") 23142+ argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).") 23143+ argp.add_argument("--repeat", default=3, type=int, help="Run repeat count") 23144+ argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable") 23145+ 23146+ args = argp.parse_args() 23147+ 23148+ csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"]) 23149+ csv_out.writeheader() 23150+ 23151+ stats_in = {} 23152+ if args.csv_in != None: 23153+ with open(args.csv_in, 'r', newline='') as f_in: 23154+ stats_in = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} 23155+ 23156+ flog = open(os.path.join(tempfile.gettempdir(), "ffperf.log"), "wt") 23157+ 23158+ streams = args.streams 23159+ if not streams: 23160+ if not stats_in: 23161+ print ("No source streams specified") 23162+ return 1 23163+ prefix = "" if args.prefix == None else args.prefix 23164+ streams = [k for k in stats_in] 23165+ elif args.prefix != None: 23166+ prefix = args.prefix 23167+ else: 23168+ prefix = streams[0] 23169+ for f in streams[1:]: 23170+ prefix = common_prefix(prefix, f) 23171+ pp = prefix.rpartition(os.sep) 23172+ prefix = pp[0] + pp[1] 23173+ streams = [s[len(prefix):] for s in streams] 23174+ 23175+ for f in sorted(streams, key=lambda x : "~" * x.count(os.sep) + x.lower()): 23176+ print ("====", f) 23177+ 23178+ t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999}) 23179+ for i in range(args.repeat): 23180+ t = tstats.time_file(f, prefix, args.ffmpeg) 23181+ print ("...", t.times_str()) 23182+ if t0 > t: 23183+ t0 = t 23184+ 23185+ if t0.name in stats_in: 23186+ pstat = stats_in[t0.name] 23187+ print("---" if pstat.is_close(t0) else "<<<" if t0 < pstat else ">>>", pstat.times_str()) 23188+ 23189+ csv_out.writerow(t0.dict()) 23190+ 23191+ print () 23192+ 23193+ return 0 23194+ 23195+ 23196+if __name__ == '__main__': 23197+ exit(main()) 23198+ 23199--- /dev/null 23200+++ b/pi-util/genpatch.sh 23201@@ -0,0 +1,35 @@ 23202+set -e 23203+ 23204+NOPATCH= 23205+if [ "$1" == "--notag" ]; then 23206+ shift 23207+ NOPATCH=1 23208+fi 23209+ 23210+if [ "$1" == "" ]; then 23211+ echo Usage: $0 [--notag] \<patch_tag\> 23212+ echo e.g.: $0 mmal_4 23213+ exit 1 23214+fi 23215+ 23216+VERSION=`cat RELEASE` 23217+if [ "$VERSION" == "" ]; then 23218+ echo Can\'t find version RELEASE 23219+ exit 1 23220+fi 23221+ 23222+PATCHFILE=../ffmpeg-$VERSION-$1.patch 23223+ 23224+if [ $NOPATCH ]; then 23225+ echo Not tagged 23226+else 23227+ # Only continue if we are all comitted 23228+ git diff --name-status --exit-code 23229+ 23230+ PATCHTAG=pi/$VERSION/$1 23231+ echo Tagging: $PATCHTAG 23232+ 23233+ git tag $PATCHTAG 23234+fi 23235+echo Generating patch: $PATCHFILE 23236+git diff n$VERSION -- > $PATCHFILE 23237--- /dev/null 23238+++ b/pi-util/make_array.py 23239@@ -0,0 +1,23 @@ 23240+#!/usr/bin/env python 23241+ 23242+# Usage 23243+# make_array file.bin 23244+# Produces file.h with array of bytes. 23245+# 23246+import sys 23247+for file in sys.argv[1:]: 23248+ prefix,suffix = file.split('.') 23249+ assert suffix=='bin' 23250+ name=prefix.split('/')[-1] 23251+ print 'Converting',file 23252+ with open(prefix+'.h','wb') as out: 23253+ print >>out, 'static const unsigned char',name,'[] = {' 23254+ with open(file,'rb') as fd: 23255+ i = 0 23256+ for byte in fd.read(): 23257+ print >>out, '0x%02x, ' % ord(byte), 23258+ i = i + 1 23259+ if i % 8 == 0: 23260+ print >>out, ' // %04x' % (i - 8) 23261+ print >>out,'};' 23262+ 23263--- /dev/null 23264+++ b/pi-util/mkinst.sh 23265@@ -0,0 +1,5 @@ 23266+set -e 23267+ 23268+make install 23269+ 23270+cp -r install/* ../vlc/sysroot/raspian_stretch_pi1-sysroot/usr 23271--- /dev/null 23272+++ b/pi-util/patkodi.sh 23273@@ -0,0 +1,9 @@ 23274+set -e 23275+KODIBASE=/home/jc/rpi/kodi/xbmc 23276+JOBS=-j20 23277+make $JOBS 23278+git diff xbmc/release/4.3-kodi > $KODIBASE/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch 23279+make -C $KODIBASE/tools/depends/target/ffmpeg $JOBS 23280+make -C $KODIBASE/build install 23281+ 23282+ 23283--- /dev/null 23284+++ b/pi-util/perfcmp.py 23285@@ -0,0 +1,101 @@ 23286+#!/usr/bin/env python3 23287+ 23288+import time 23289+import string 23290+import os 23291+import tempfile 23292+import subprocess 23293+import re 23294+import argparse 23295+import sys 23296+import csv 23297+from stat import * 23298+ 23299+class tstats: 23300+ close_threshold = 0.01 23301+ 23302+ def __init__(self, stats_dict=None): 23303+ if stats_dict != None: 23304+ self.name = stats_dict["name"] 23305+ self.elapsed = float(stats_dict["elapsed"]) 23306+ self.user = float(stats_dict["user"]) 23307+ self.sys = float(stats_dict["sys"]) 23308+ 23309+ def times_str(self): 23310+ ctime = self.sys + self.user 23311+ return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed) 23312+ 23313+ def dict(self): 23314+ return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys} 23315+ 23316+ def is_close(self, other): 23317+ return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold 23318+ 23319+ def __lt__(self, other): 23320+ return self.elapsed < other.elapsed 23321+ def __gt__(self, other): 23322+ return self.elapsed > other.elapsed 23323+ 23324+ def time_file(name, prefix): 23325+ stats = tstats() 23326+ stats.name = name 23327+ start_time = time.clock_gettime(time.CLOCK_MONOTONIC); 23328+ cproc = subprocess.Popen(["./ffmpeg", "-t", "30", "-i", prefix + name, 23329+ "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog); 23330+ pinfo = os.wait4(cproc.pid, 0) 23331+ end_time = time.clock_gettime(time.CLOCK_MONOTONIC); 23332+ stats.elapsed = end_time - start_time 23333+ stats.user = pinfo[2].ru_utime 23334+ stats.sys = pinfo[2].ru_stime 23335+ return stats 23336+ 23337+ 23338+def common_prefix(s1, s2): 23339+ for i in range(min(len(s1),len(s2))): 23340+ if s1[i] != s2[i]: 23341+ return s1[:i] 23342+ return s1[:i+1] 23343+ 23344+def main(): 23345+ argp = argparse.ArgumentParser(description="FFmpeg performance compare") 23346+ 23347+ argp.add_argument("stream0", help="CSV to compare") 23348+ argp.add_argument("stream1", nargs='?', default="ffperf_out.csv", help="CSV to compare") 23349+ 23350+ args = argp.parse_args() 23351+ 23352+ with open(args.stream0, 'r', newline='') as f_in: 23353+ stats0 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} 23354+ with open(args.stream1, 'r', newline='') as f_in: 23355+ stats1 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} 23356+ 23357+ print (args.stream0, "<<-->>", args.stream1) 23358+ print () 23359+ 23360+ for f in sorted(stats0.keys() | stats1.keys(), key=lambda x : "~" * x.count(os.sep) + x.lower()): 23361+ if not (f in stats0) : 23362+ print (" XX :", f) 23363+ continue 23364+ if not (f in stats1) : 23365+ print (" XX :", f) 23366+ continue 23367+ 23368+ s0 = stats0[f] 23369+ s1 = stats1[f] 23370+ 23371+ pcent = ((s0.elapsed - s1.elapsed) / s0.elapsed) * 100.0 23372+ thresh = 0.3 23373+ tc = 6 23374+ 23375+ nchar = min(tc - 1, int(abs(pcent) / thresh)) 23376+ cc = " -- " if nchar == 0 else "<" * nchar + " " * (tc - nchar) if pcent < 0 else " " * (tc - nchar) + ">" * nchar 23377+ 23378+ print ("%6.2f %s%6.2f (%+5.2f) : %s" % 23379+ (s0.elapsed, cc, s1.elapsed, pcent, f)) 23380+ 23381+ return 0 23382+ 23383+ 23384+if __name__ == '__main__': 23385+ exit(main()) 23386+ 23387--- /dev/null 23388+++ b/pi-util/qem.sh 23389@@ -0,0 +1,9 @@ 23390+TARGET_DIR=../src/eupton_vc4dev_2012a/software/vc4/DEV/applications/tutorials/user_shader_example_tex 23391+QASM=python\ ../local/bin/qasm.py 23392+SRC_FILE=libavcodec/rpi_hevc_shader.qasm 23393+DST_BASE=shader 23394+ 23395+cp libavcodec/rpi_hevc_shader_cmd.h $TARGET_DIR 23396+$QASM -mc_c:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.c 23397+$QASM -mc_h:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.h 23398+ 23399--- /dev/null 23400+++ b/pi-util/testfilt.py 23401@@ -0,0 +1,83 @@ 23402+#!/usr/bin/env python3 23403+ 23404+import string 23405+import os 23406+import subprocess 23407+import re 23408+import argparse 23409+import sys 23410+import csv 23411+from stat import * 23412+ 23413+class validator: 23414+ def __init__(self): 23415+ self.ok = False 23416+ 23417+ def isok(self): 23418+ return self.ok 23419+ 23420+ def setok(self): 23421+ self.ok = True 23422+ 23423+class valid_regex(validator): 23424+ def __init__(self, regex): 23425+ super().__init__() 23426+ self.regex = re.compile(regex) 23427+ 23428+ def scanline(self, line): 23429+ if self.isok() or self.regex.search(line): 23430+ self.setok() 23431+ 23432+ 23433+def validate(validators, flog): 23434+ for line in flog: 23435+ for v in validators: 23436+ v.scanline(line) 23437+ 23438+ ok = True 23439+ for v in validators: 23440+ if not v.isok(): 23441+ ok = False 23442+ # complain 23443+ print("Test failed") 23444+ 23445+ if ok: 23446+ print("OK") 23447+ return ok 23448+ 23449+def runtest(name, ffmpeg, args, suffix, validators): 23450+ log_root = os.path.join("/tmp", "testfilt", name) 23451+ ofilename = os.path.join(log_root, name + suffix) 23452+ 23453+ if not os.path.exists(log_root): 23454+ os.makedirs(log_root) 23455+ 23456+ try: 23457+ os.remove(ofilename) 23458+ except: 23459+ pass 23460+ 23461+ flog = open(os.path.join(log_root, name + ".log"), "wb") 23462+ ffargs = [ffmpeg] + args + [ofilename] 23463+ 23464+ subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT, text=False) 23465+ flog.close 23466+ 23467+ flog = open(os.path.join(log_root, name + ".log"), "rt") 23468+ return validate(validators, flog) 23469+ 23470+def sayok(log_root, flog): 23471+ print("Woohoo") 23472+ return True 23473+ 23474+if __name__ == '__main__': 23475+ 23476+ argp = argparse.ArgumentParser(description="FFmpeg filter tester") 23477+ argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name") 23478+ args = argp.parse_args() 23479+ 23480+ runtest("ATest", args.ffmpeg, ["-v", "verbose", "-no_cvt_hw", "-an", "-c:v", "h264_v4l2m2m", "-i", 23481+ "/home/johncox/server/TestMedia/Sony/jellyfish-10-mbps-hd-h264.mkv", 23482+# "/home/jc/rpi/streams/jellyfish-3-mbps-hd-h264.mkv", 23483+ "-c:v", "h264_v4l2m2m", "-b:v", "2M"], ".mkv", 23484+ [valid_regex(r'Output stream #0:0 \(video\): 900 frames encoded; 900 packets muxed')]) 23485--- /dev/null 23486+++ b/pi-util/v3dusage.py 23487@@ -0,0 +1,128 @@ 23488+#!/usr/bin/env python 23489+ 23490+import sys 23491+import argparse 23492+import re 23493+ 23494+def do_logparse(logname): 23495+ 23496+ rmatch = re.compile(r'^([0-9]+\.[0-9]{3}): (done )?((vpu0)|(vpu1)|(qpu1)) ([A-Z_]+) cb:([0-9a-f]+) ') 23497+ rqcycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs doing vertex/coordinate shading +([0-9]+)$') 23498+ rqtscycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs stalled waiting for TMUs +([0-9]+)$') 23499+ rl2hits = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: L2C Total Level 2 cache ([a-z]+) +([0-9]+)$') 23500+ 23501+ ttotal = {'idle':0.0} 23502+ tstart = {} 23503+ qctotal = {} 23504+ qtstotal = {} 23505+ l2hits = {} 23506+ l2total = {} 23507+ time0 = None 23508+ idle_start = None 23509+ qpu_op_no = 0 23510+ op_count = 0 23511+ 23512+ with open(logname, "rt") as infile: 23513+ for line in infile: 23514+ match = rmatch.match(line) 23515+ if match: 23516+# print match.group(1), ":", match.group(2), ":", match.group(3), ":", match.group(7), ":" 23517+ time = float(match.group(1)) 23518+ unit = match.group(3) 23519+ opstart = not match.group(2) 23520+ optype = match.group(7) 23521+ hascb = match.group(8) != "0" 23522+ 23523+ if unit == 'qpu1': 23524+ unit = unit + "." + str(qpu_op_no) 23525+ if not opstart: 23526+ if hascb or optype == 'EXECUTE_SYNC': 23527+ qpu_op_no = 0 23528+ else: 23529+ qpu_op_no += 1 23530+ 23531+ # Ignore sync type 23532+ if optype == 'EXECUTE_SYNC': 23533+ continue 23534+ 23535+ if not time0: 23536+ time0 = time 23537+ 23538+ if opstart: 23539+ tstart[unit] = time; 23540+ elif unit in tstart: 23541+ op_count += 1 23542+ if not unit in ttotal: 23543+ ttotal[unit] = 0.0 23544+ ttotal[unit] += time - tstart[unit] 23545+ del tstart[unit] 23546+ 23547+ if not idle_start and not tstart: 23548+ idle_start = time 23549+ elif idle_start and tstart: 23550+ ttotal['idle'] += time - idle_start 23551+ idle_start = None 23552+ 23553+ match = rqcycle.match(line) 23554+ if match: 23555+ unit = "qpu1." + str(qpu_op_no) 23556+ if not unit in qctotal: 23557+ qctotal[unit] = 0 23558+ qctotal[unit] += int(match.group(2)) 23559+ 23560+ match = rqtscycle.match(line) 23561+ if match: 23562+ unit = "qpu1." + str(qpu_op_no) 23563+ if not unit in qtstotal: 23564+ qtstotal[unit] = 0 23565+ qtstotal[unit] += int(match.group(2)) 23566+ 23567+ match = rl2hits.match(line) 23568+ if match: 23569+ unit = "qpu1." + str(qpu_op_no) 23570+ if not unit in l2total: 23571+ l2total[unit] = 0 23572+ l2hits[unit] = 0 23573+ l2total[unit] += int(match.group(3)) 23574+ if match.group(2) == "hits": 23575+ l2hits[unit] += int(match.group(3)) 23576+ 23577+ 23578+ if not time0: 23579+ print "No v3d profile records found" 23580+ else: 23581+ tlogged = time - time0 23582+ 23583+ print "Logged time:", tlogged, " Op count:", op_count 23584+ for unit in sorted(ttotal): 23585+ print b'%6s: %10.3f %7.3f%%' % (unit, ttotal[unit], ttotal[unit] * 100.0 / tlogged) 23586+ print 23587+ for unit in sorted(qctotal): 23588+ if not unit in qtstotal: 23589+ qtstotal[unit] = 0; 23590+ print b'%6s: Qcycles: %10d, TMU stall: %10d (%7.3f%%)' % (unit, qctotal[unit], qtstotal[unit], (qtstotal[unit] * 100.0)/qctotal[unit]) 23591+ if unit in l2total: 23592+ print b' L2Total: %10d, hits: %10d (%7.3f%%)' % (l2total[unit], l2hits[unit], (l2hits[unit] * 100.0)/l2total[unit]) 23593+ 23594+ 23595+ 23596+if __name__ == '__main__': 23597+ argp = argparse.ArgumentParser( 23598+ formatter_class=argparse.RawDescriptionHelpFormatter, 23599+ description="QPU/VPU perf summary from VC logging", 23600+ epilog = """ 23601+Will also summarise TMU stalls if logging requests set in qpu noflush param 23602+in the profiled code. 23603+ 23604+Example use: 23605+ vcgencmd set_logging level=0xc0 23606+ <command to profile> 23607+ sudo vcdbg log msg >& t.log 23608+ v3dusage.py t.log 23609+""") 23610+ 23611+ argp.add_argument("logfile") 23612+ args = argp.parse_args() 23613+ 23614+ do_logparse(args.logfile) 23615+ 23616--- a/tests/checkasm/Makefile 23617+++ b/tests/checkasm/Makefile 23618@@ -38,6 +38,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC) 23619 # libavfilter tests 23620 AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o 23621 AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o 23622+AVFILTEROBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o 23623 AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o 23624 AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o 23625 AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o 23626@@ -56,8 +57,9 @@ CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWS 23627 AVUTILOBJS += av_tx.o 23628 AVUTILOBJS += fixed_dsp.o 23629 AVUTILOBJS += float_dsp.o 23630+AVUTILOBJS-$(CONFIG_SAND) += rpi_sand.o 23631 23632-CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS) 23633+CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS) $(AVUTILOBJS-yes) 23634 23635 CHECKASMOBJS-$(ARCH_AARCH64) += aarch64/checkasm.o 23636 CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o 23637--- a/tests/checkasm/checkasm.c 23638+++ b/tests/checkasm/checkasm.c 23639@@ -173,6 +173,9 @@ static const struct { 23640 #if CONFIG_BLEND_FILTER 23641 { "vf_blend", checkasm_check_blend }, 23642 #endif 23643+ #if CONFIG_BWDIF_FILTER 23644+ { "vf_bwdif", checkasm_check_vf_bwdif }, 23645+ #endif 23646 #if CONFIG_COLORSPACE_FILTER 23647 { "vf_colorspace", checkasm_check_colorspace }, 23648 #endif 23649@@ -201,6 +204,9 @@ static const struct { 23650 { "fixed_dsp", checkasm_check_fixed_dsp }, 23651 { "float_dsp", checkasm_check_float_dsp }, 23652 { "av_tx", checkasm_check_av_tx }, 23653+ #if CONFIG_SAND 23654+ { "rpi_sand", checkasm_check_rpi_sand }, 23655+ #endif 23656 #endif 23657 { NULL } 23658 }; 23659--- a/tests/checkasm/checkasm.h 23660+++ b/tests/checkasm/checkasm.h 23661@@ -72,6 +72,7 @@ void checkasm_check_motion(void); 23662 void checkasm_check_nlmeans(void); 23663 void checkasm_check_opusdsp(void); 23664 void checkasm_check_pixblockdsp(void); 23665+void checkasm_check_rpi_sand(void); 23666 void checkasm_check_sbrdsp(void); 23667 void checkasm_check_synth_filter(void); 23668 void checkasm_check_sw_gbrp(void); 23669@@ -81,6 +82,7 @@ void checkasm_check_utvideodsp(void); 23670 void checkasm_check_v210dec(void); 23671 void checkasm_check_v210enc(void); 23672 void checkasm_check_vc1dsp(void); 23673+void checkasm_check_vf_bwdif(void); 23674 void checkasm_check_vf_eq(void); 23675 void checkasm_check_vf_gblur(void); 23676 void checkasm_check_vf_hflip(void); 23677--- /dev/null 23678+++ b/tests/checkasm/rpi_sand.c 23679@@ -0,0 +1,118 @@ 23680+/* 23681+ * Copyright (c) 2023 John Cox 23682+ * 23683+ * This file is part of FFmpeg. 23684+ * 23685+ * FFmpeg is free software; you can redistribute it and/or modify 23686+ * it under the terms of the GNU General Public License as published by 23687+ * the Free Software Foundation; either version 2 of the License, or 23688+ * (at your option) any later version. 23689+ * 23690+ * FFmpeg is distributed in the hope that it will be useful, 23691+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 23692+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 23693+ * GNU General Public License for more details. 23694+ * 23695+ * You should have received a copy of the GNU General Public License along 23696+ * with FFmpeg; if not, write to the Free Software Foundation, Inc., 23697+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 23698+ */ 23699+ 23700+#include <string.h> 23701+#include "checkasm.h" 23702+#include "libavutil/common.h" 23703+#include "libavutil/rpi_sand_fns.h" 23704+ 23705+#if ARCH_ARM 23706+#include "libavutil/arm/cpu.h" 23707+#include "libavutil/arm/rpi_sand_neon.h" 23708+#elif ARCH_AARCH64 23709+#include "libavutil/aarch64/cpu.h" 23710+#include "libavutil/aarch64/rpi_sand_neon.h" 23711+#endif 23712+ 23713+static inline uint32_t pack30(unsigned int a, unsigned int b, unsigned int c) 23714+{ 23715+ return (a & 0x3ff) | ((b & 0x3ff) << 10) | ((c & 0x3ff) << 20); 23716+} 23717+ 23718+void checkasm_check_rpi_sand(void) 23719+{ 23720+ const unsigned int w = 1280; 23721+ const unsigned int h = 66; 23722+ const unsigned int stride1 = 128; 23723+ const unsigned int stride2 = h*3/2; 23724+ const unsigned int ssize = ((w+95)/96)*128*h*3/2; 23725+ const unsigned int ysize = ((w + 32) * (h + 32) * 2); 23726+ 23727+ uint8_t * sbuf0 = malloc(ssize); 23728+ uint8_t * sbuf1 = malloc(ssize); 23729+ uint8_t * ybuf0 = malloc(ysize); 23730+ uint8_t * ybuf1 = malloc(ysize); 23731+ uint8_t * vbuf0 = malloc(ysize); 23732+ uint8_t * vbuf1 = malloc(ysize); 23733+ uint8_t * yframe0 = (w + 32) * 16 + ybuf0; 23734+ uint8_t * yframe1 = (w + 32) * 16 + ybuf1; 23735+ uint8_t * vframe0 = (w + 32) * 16 + vbuf0; 23736+ uint8_t * vframe1 = (w + 32) * 16 + vbuf1; 23737+ unsigned int i; 23738+ 23739+ for (i = 0; i != ssize; i += 4) 23740+ *(uint32_t*)(sbuf0 + i) = rnd(); 23741+ memcpy(sbuf1, sbuf0, ssize); 23742+ 23743+ if (check_func(have_neon(av_get_cpu_flags()) ? ff_rpi_sand30_lines_to_planar_y16 : av_rpi_sand30_to_planar_y16, "rpi_sand30_to_planar_y16")) { 23744+ declare_func(void, uint8_t * dst, const unsigned int dst_stride, 23745+ const uint8_t * src, 23746+ unsigned int stride1, unsigned int stride2, 23747+ unsigned int _x, unsigned int y, 23748+ unsigned int _w, unsigned int h); 23749+ 23750+ memset(ybuf0, 0xbb, ysize); 23751+ memset(ybuf1, 0xbb, ysize); 23752+ 23753+ call_ref(yframe0, (w + 32) * 2, sbuf0, stride1, stride2, 0, 0, w, h); 23754+ call_new(yframe1, (w + 32) * 2, sbuf1, stride1, stride2, 0, 0, w, h); 23755+ 23756+ if (memcmp(sbuf0, sbuf1, ssize) 23757+ || memcmp(ybuf0, ybuf1, ysize)) 23758+ fail(); 23759+ 23760+ bench_new(ybuf1, (w + 32) * 2, sbuf1, stride1, stride2, 0, 0, w, h); 23761+ } 23762+ 23763+ if (check_func(have_neon(av_get_cpu_flags()) ? ff_rpi_sand30_lines_to_planar_c16 : av_rpi_sand30_to_planar_c16, "rpi_sand30_to_planar_c16")) { 23764+ declare_func(void, uint8_t * u_dst, const unsigned int u_stride, 23765+ uint8_t * v_dst, const unsigned int v_stride, 23766+ const uint8_t * src, 23767+ unsigned int stride1, unsigned int stride2, 23768+ unsigned int _x, unsigned int y, 23769+ unsigned int _w, unsigned int h); 23770+ 23771+ memset(ybuf0, 0xbb, ysize); 23772+ memset(ybuf1, 0xbb, ysize); 23773+ memset(vbuf0, 0xbb, ysize); 23774+ memset(vbuf1, 0xbb, ysize); 23775+ 23776+ call_ref(yframe0, (w + 32), vframe0, (w + 32), sbuf0, stride1, stride2, 0, 0, w/2, h/2); 23777+ call_new(yframe1, (w + 32), vframe1, (w + 32), sbuf1, stride1, stride2, 0, 0, w/2, h/2); 23778+ 23779+ if (memcmp(sbuf0, sbuf1, ssize) 23780+ || memcmp(ybuf0, ybuf1, ysize) 23781+ || memcmp(vbuf0, vbuf1, ysize)) 23782+ fail(); 23783+ 23784+ bench_new(yframe1, (w + 32), vframe1, (w + 32), sbuf1, stride1, stride2, 0, 0, w/2, h/2); 23785+ } 23786+ 23787+ 23788+ report("sand30"); 23789+ 23790+ free(sbuf0); 23791+ free(sbuf1); 23792+ free(ybuf0); 23793+ free(ybuf1); 23794+ free(vbuf0); 23795+ free(vbuf1); 23796+} 23797+ 23798--- /dev/null 23799+++ b/tests/checkasm/vf_bwdif.c 23800@@ -0,0 +1,256 @@ 23801+/* 23802+ * This file is part of FFmpeg. 23803+ * 23804+ * FFmpeg is free software; you can redistribute it and/or modify 23805+ * it under the terms of the GNU General Public License as published by 23806+ * the Free Software Foundation; either version 2 of the License, or 23807+ * (at your option) any later version. 23808+ * 23809+ * FFmpeg is distributed in the hope that it will be useful, 23810+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 23811+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 23812+ * GNU General Public License for more details. 23813+ * 23814+ * You should have received a copy of the GNU General Public License along 23815+ * with FFmpeg; if not, write to the Free Software Foundation, Inc., 23816+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 23817+ */ 23818+ 23819+#include <string.h> 23820+#include "checkasm.h" 23821+#include "libavcodec/internal.h" 23822+#include "libavfilter/bwdif.h" 23823+#include "libavutil/mem_internal.h" 23824+ 23825+#define WIDTH 256 23826+ 23827+#define randomize_buffers(buf0, buf1, mask, count) \ 23828+ for (size_t i = 0; i < count; i++) \ 23829+ buf0[i] = buf1[i] = rnd() & mask 23830+ 23831+#define randomize_overflow_check(buf0, buf1, mask, count) \ 23832+ for (size_t i = 0; i < count; i++) \ 23833+ buf0[i] = buf1[i] = (rnd() & 1) != 0 ? mask : 0; 23834+ 23835+#define BODY(type, depth) \ 23836+ do { \ 23837+ type prev0[9*WIDTH], prev1[9*WIDTH]; \ 23838+ type next0[9*WIDTH], next1[9*WIDTH]; \ 23839+ type cur0[9*WIDTH], cur1[9*WIDTH]; \ 23840+ type dst0[WIDTH], dst1[WIDTH]; \ 23841+ const int stride = WIDTH; \ 23842+ const int mask = (1<<depth)-1; \ 23843+ \ 23844+ declare_func(void, void *dst, void *prev, void *cur, void *next, \ 23845+ int w, int prefs, int mrefs, int prefs2, int mrefs2, \ 23846+ int prefs3, int mrefs3, int prefs4, int mrefs4, \ 23847+ int parity, int clip_max); \ 23848+ \ 23849+ randomize_buffers(prev0, prev1, mask, 9*WIDTH); \ 23850+ randomize_buffers(next0, next1, mask, 9*WIDTH); \ 23851+ randomize_buffers( cur0, cur1, mask, 9*WIDTH); \ 23852+ \ 23853+ call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH, \ 23854+ WIDTH, stride, -stride, 2*stride, -2*stride, \ 23855+ 3*stride, -3*stride, 4*stride, -4*stride, \ 23856+ 0, mask); \ 23857+ call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, \ 23858+ WIDTH, stride, -stride, 2*stride, -2*stride, \ 23859+ 3*stride, -3*stride, 4*stride, -4*stride, \ 23860+ 0, mask); \ 23861+ \ 23862+ if (memcmp(dst0, dst1, sizeof dst0) \ 23863+ || memcmp(prev0, prev1, sizeof prev0) \ 23864+ || memcmp(next0, next1, sizeof next0) \ 23865+ || memcmp( cur0, cur1, sizeof cur0)) \ 23866+ fail(); \ 23867+ bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, \ 23868+ WIDTH, stride, -stride, 2*stride, -2*stride, \ 23869+ 3*stride, -3*stride, 4*stride, -4*stride, \ 23870+ 0, mask); \ 23871+ } while (0) 23872+ 23873+void checkasm_check_vf_bwdif(void) 23874+{ 23875+ BWDIFContext ctx_8, ctx_10; 23876+ 23877+ ff_bwdif_init_filter_line(&ctx_8, 8); 23878+ ff_bwdif_init_filter_line(&ctx_10, 10); 23879+ 23880+ if (check_func(ctx_8.filter_line, "bwdif8")) { 23881+ BODY(uint8_t, 8); 23882+ report("bwdif8"); 23883+ } 23884+ 23885+ if (check_func(ctx_10.filter_line, "bwdif10")) { 23886+ BODY(uint16_t, 10); 23887+ report("bwdif10"); 23888+ } 23889+ 23890+ if (!ctx_8.filter_line3) 23891+ ctx_8.filter_line3 = ff_bwdif_filter_line3_c; 23892+ 23893+ { 23894+ LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]); 23895+ LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]); 23896+ LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]); 23897+ LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]); 23898+ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]); 23899+ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]); 23900+ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]); 23901+ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]); 23902+ const int stride = WIDTH; 23903+ const int mask = (1<<8)-1; 23904+ int parity; 23905+ 23906+ for (parity = 0; parity != 2; ++parity) { 23907+ if (check_func(ctx_8.filter_line3, "bwdif8.line3.rnd.p%d", parity)) { 23908+ 23909+ declare_func(void, void * dst1, int d_stride, 23910+ const void * prev1, const void * cur1, const void * next1, int prefs, 23911+ int w, int parity, int clip_max); 23912+ 23913+ randomize_buffers(prev0, prev1, mask, 11*WIDTH); 23914+ randomize_buffers(next0, next1, mask, 11*WIDTH); 23915+ randomize_buffers( cur0, cur1, mask, 11*WIDTH); 23916+ 23917+ call_ref(dst0, stride, 23918+ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride, 23919+ WIDTH, parity, mask); 23920+ call_new(dst1, stride, 23921+ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride, 23922+ WIDTH, parity, mask); 23923+ 23924+ if (memcmp(dst0, dst1, WIDTH*3) 23925+ || memcmp(prev0, prev1, WIDTH*11) 23926+ || memcmp(next0, next1, WIDTH*11) 23927+ || memcmp( cur0, cur1, WIDTH*11)) 23928+ fail(); 23929+ 23930+ bench_new(dst1, stride, 23931+ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride, 23932+ WIDTH, parity, mask); 23933+ } 23934+ } 23935+ 23936+ // Use just 0s and ~0s to try to provoke bad cropping or overflow 23937+ // Parity makes no difference to this test so just test 0 23938+ if (check_func(ctx_8.filter_line3, "bwdif8.line3.overflow")) { 23939+ 23940+ declare_func(void, void * dst1, int d_stride, 23941+ const void * prev1, const void * cur1, const void * next1, int prefs, 23942+ int w, int parity, int clip_max); 23943+ 23944+ randomize_overflow_check(prev0, prev1, mask, 11*WIDTH); 23945+ randomize_overflow_check(next0, next1, mask, 11*WIDTH); 23946+ randomize_overflow_check( cur0, cur1, mask, 11*WIDTH); 23947+ 23948+ call_ref(dst0, stride, 23949+ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride, 23950+ WIDTH, 0, mask); 23951+ call_new(dst1, stride, 23952+ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride, 23953+ WIDTH, 0, mask); 23954+ 23955+ if (memcmp(dst0, dst1, WIDTH*3) 23956+ || memcmp(prev0, prev1, WIDTH*11) 23957+ || memcmp(next0, next1, WIDTH*11) 23958+ || memcmp( cur0, cur1, WIDTH*11)) 23959+ fail(); 23960+ 23961+ // No point to benching 23962+ } 23963+ 23964+ report("bwdif8.line3"); 23965+ } 23966+ 23967+ { 23968+ LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]); 23969+ LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]); 23970+ LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]); 23971+ LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]); 23972+ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]); 23973+ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]); 23974+ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]); 23975+ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]); 23976+ const int stride = WIDTH; 23977+ const int mask = (1<<8)-1; 23978+ int spat; 23979+ int parity; 23980+ 23981+ for (spat = 0; spat != 2; ++spat) { 23982+ for (parity = 0; parity != 2; ++parity) { 23983+ if (check_func(ctx_8.filter_edge, "bwdif8.edge.s%d.p%d", spat, parity)) { 23984+ 23985+ declare_func(void, void *dst1, void *prev1, void *cur1, void *next1, 23986+ int w, int prefs, int mrefs, int prefs2, int mrefs2, 23987+ int parity, int clip_max, int spat); 23988+ 23989+ randomize_buffers(prev0, prev1, mask, 11*WIDTH); 23990+ randomize_buffers(next0, next1, mask, 11*WIDTH); 23991+ randomize_buffers( cur0, cur1, mask, 11*WIDTH); 23992+ memset(dst0, 0xba, WIDTH * 3); 23993+ memset(dst1, 0xba, WIDTH * 3); 23994+ 23995+ call_ref(dst0 + stride, 23996+ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, WIDTH, 23997+ stride, -stride, stride * 2, -stride * 2, 23998+ parity, mask, spat); 23999+ call_new(dst1 + stride, 24000+ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH, 24001+ stride, -stride, stride * 2, -stride * 2, 24002+ parity, mask, spat); 24003+ 24004+ if (memcmp(dst0, dst1, WIDTH*3) 24005+ || memcmp(prev0, prev1, WIDTH*11) 24006+ || memcmp(next0, next1, WIDTH*11) 24007+ || memcmp( cur0, cur1, WIDTH*11)) 24008+ fail(); 24009+ 24010+ bench_new(dst1 + stride, 24011+ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH, 24012+ stride, -stride, stride * 2, -stride * 2, 24013+ parity, mask, spat); 24014+ } 24015+ } 24016+ } 24017+ 24018+ report("bwdif8.edge"); 24019+ } 24020+ 24021+ if (check_func(ctx_8.filter_intra, "bwdif8.intra")) { 24022+ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]); 24023+ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]); 24024+ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]); 24025+ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]); 24026+ const int stride = WIDTH; 24027+ const int mask = (1<<8)-1; 24028+ 24029+ declare_func(void, void *dst1, void *cur1, int w, int prefs, int mrefs, 24030+ int prefs3, int mrefs3, int parity, int clip_max); 24031+ 24032+ randomize_buffers( cur0, cur1, mask, 11*WIDTH); 24033+ memset(dst0, 0xba, WIDTH * 3); 24034+ memset(dst1, 0xba, WIDTH * 3); 24035+ 24036+ call_ref(dst0 + stride, 24037+ cur0 + stride * 4, WIDTH, 24038+ stride, -stride, stride * 3, -stride * 3, 24039+ 0, mask); 24040+ call_new(dst1 + stride, 24041+ cur0 + stride * 4, WIDTH, 24042+ stride, -stride, stride * 3, -stride * 3, 24043+ 0, mask); 24044+ 24045+ if (memcmp(dst0, dst1, WIDTH*3) 24046+ || memcmp( cur0, cur1, WIDTH*11)) 24047+ fail(); 24048+ 24049+ bench_new(dst1 + stride, 24050+ cur0 + stride * 4, WIDTH, 24051+ stride, -stride, stride * 3, -stride * 3, 24052+ 0, mask); 24053+ 24054+ report("bwdif8.intra"); 24055+ } 24056+} 24057--- a/tests/fate/checkasm.mak 24058+++ b/tests/fate/checkasm.mak 24059@@ -26,6 +26,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp 24060 fate-checkasm-motion \ 24061 fate-checkasm-opusdsp \ 24062 fate-checkasm-pixblockdsp \ 24063+ fate-checkasm-rpi_sand \ 24064 fate-checkasm-sbrdsp \ 24065 fate-checkasm-synth_filter \ 24066 fate-checkasm-sw_gbrp \ 24067@@ -36,6 +37,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp 24068 fate-checkasm-v210enc \ 24069 fate-checkasm-vc1dsp \ 24070 fate-checkasm-vf_blend \ 24071+ fate-checkasm-vf_bwdif \ 24072 fate-checkasm-vf_colorspace \ 24073 fate-checkasm-vf_eq \ 24074 fate-checkasm-vf_gblur \ 24075