1
2Upstream-Status: Inappropriate
3
4RPI-Distro repo clones original ffmpeg and applies patches to enable
5raspiberry pi support.
6
7--- a/configure
8+++ b/configure
9@@ -205,6 +205,7 @@ External library support:
10   --disable-bzlib          disable bzlib [autodetect]
11   --disable-coreimage      disable Apple CoreImage framework [autodetect]
12   --enable-chromaprint     enable audio fingerprinting with chromaprint [no]
13+  --disable-epoxy          disable epoxy [autodetect]
14   --enable-frei0r          enable frei0r video filtering [no]
15   --enable-gcrypt          enable gcrypt, needed for rtmp(t)e support
16                            if openssl, librtmp or gmp is not used [no]
17@@ -281,6 +282,7 @@ External library support:
18                            if openssl, gnutls or mbedtls is not used [no]
19   --enable-libtwolame      enable MP2 encoding via libtwolame [no]
20   --enable-libuavs3d       enable AVS3 decoding via libuavs3d [no]
21+  --disable-libudev        disable libudev [autodetect]
22   --enable-libv4l2         enable libv4l2/v4l-utils [no]
23   --enable-libvidstab      enable video stabilization using vid.stab [no]
24   --enable-libvmaf         enable vmaf filter via libvmaf [no]
25@@ -343,12 +345,16 @@ External library support:
26   --enable-libmfx          enable Intel MediaSDK (AKA Quick Sync Video) code via libmfx [no]
27   --enable-libnpp          enable Nvidia Performance Primitives-based code [no]
28   --enable-mmal            enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no]
29+  --enable-sand            enable sand video formats [rpi]
30+  --enable-vout-drm        enable the vout_drm module - for internal testing only [no]
31+  --enable-vout-egl        enable the vout_egl module - for internal testing only [no]
32   --disable-nvdec          disable Nvidia video decoding acceleration (via hwaccel) [autodetect]
33   --disable-nvenc          disable Nvidia video encoding code [autodetect]
34   --enable-omx             enable OpenMAX IL code [no]
35   --enable-omx-rpi         enable OpenMAX IL code for Raspberry Pi [no]
36   --enable-rkmpp           enable Rockchip Media Process Platform code [no]
37   --disable-v4l2-m2m       disable V4L2 mem2mem code [autodetect]
38+  --enable-v4l2-request    enable V4L2 request API code [no]
39   --disable-vaapi          disable Video Acceleration API (mainly Unix/Intel) code [autodetect]
40   --disable-vdpau          disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
41   --disable-videotoolbox   disable VideoToolbox code [autodetect]
42@@ -1754,7 +1760,9 @@ EXTERNAL_AUTODETECT_LIBRARY_LIST="
43     avfoundation
44     bzlib
45     coreimage
46+    epoxy
47     iconv
48+    libudev
49     libxcb
50     libxcb_shm
51     libxcb_shape
52@@ -1924,6 +1932,7 @@ HWACCEL_LIBRARY_LIST="
53     mmal
54     omx
55     opencl
56+    v4l2_request
57 "
58
59 DOCUMENT_LIST="
60@@ -1941,10 +1950,14 @@ FEATURE_LIST="
61     omx_rpi
62     runtime_cpudetect
63     safe_bitstream_reader
64+    sand
65     shared
66     small
67     static
68     swscale_alpha
69+    vout_drm
70+    vout_egl
71+    v4l2_req_hevc_vx
72 "
73
74 # this list should be kept in linking order
75@@ -2501,6 +2514,7 @@ CONFIG_EXTRA="
76     rtpdec
77     rtpenc_chain
78     rv34dsp
79+    sand
80     scene_sad
81     sinewin
82     snappy
83@@ -3011,6 +3025,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder
84 dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
85 ffnvcodec_deps_any="libdl LoadLibrary"
86 nvdec_deps="ffnvcodec"
87+v4l2_request_deps="linux_videodev2_h linux_media_h v4l2_timeval_to_ns libdrm libudev"
88 vaapi_x11_deps="xlib_x11"
89 videotoolbox_hwaccel_deps="videotoolbox pthreads"
90 videotoolbox_hwaccel_extralibs="-framework QuartzCore"
91@@ -3054,6 +3069,8 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicP
92 hevc_dxva2_hwaccel_select="hevc_decoder"
93 hevc_nvdec_hwaccel_deps="nvdec"
94 hevc_nvdec_hwaccel_select="hevc_decoder"
95+hevc_v4l2request_hwaccel_deps="v4l2_request"
96+hevc_v4l2request_hwaccel_select="hevc_decoder"
97 hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC"
98 hevc_vaapi_hwaccel_select="hevc_decoder"
99 hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
100@@ -3539,8 +3556,11 @@ sndio_indev_deps="sndio"
101 sndio_outdev_deps="sndio"
102 v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h"
103 v4l2_indev_suggest="libv4l2"
104+v4l2_outdev_deps="libdrm"
105 v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h"
106 v4l2_outdev_suggest="libv4l2"
107+vout_drm_outdev_deps="libdrm"
108+vout_egl_outdev_deps="xlib epoxy"
109 vfwcap_indev_deps="vfw32 vfwcap_defines"
110 xcbgrab_indev_deps="libxcb"
111 xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes"
112@@ -3745,6 +3765,7 @@ tonemap_opencl_filter_deps="opencl const
113 transpose_opencl_filter_deps="opencl"
114 transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
115 transpose_vulkan_filter_deps="vulkan spirv_compiler"
116+unsand_filter_select="sand"
117 unsharp_opencl_filter_deps="opencl"
118 uspp_filter_deps="gpl avcodec"
119 vaguedenoiser_filter_deps="gpl"
120@@ -6296,6 +6317,12 @@ if enabled xlib; then
121         disable xlib
122 fi
123
124+enabled libudev &&
125+    check_pkg_config libudev libudev libudev.h udev_new
126+
127+enabled epoxy &&
128+    check_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version
129+
130 check_headers direct.h
131 check_headers dirent.h
132 check_headers dxgidebug.h
133@@ -6735,8 +6762,16 @@ enabled rkmpp             && { require_p
134                                { enabled libdrm ||
135                                  die "ERROR: rkmpp requires --enable-libdrm"; }
136                              }
137+enabled v4l2_request      && { enabled libdrm ||
138+                               die "ERROR: v4l2-request requires --enable-libdrm"; } &&
139+                             { enabled libudev ||
140+                               die "ERROR: v4l2-request requires libudev"; }
141 enabled vapoursynth       && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init
142
143+enabled vout_drm && { enabled libdrm || die "ERROR: vout_drm requires --enable-libdrm"; }
144+
145+enabled vout_egl && { enabled epoxy || die "ERROR: vout_egl requires epoxy"; } &&
146+                    { enabled xlib  || die "ERROR: vout_egl requires xlib"; }
147
148 if enabled gcrypt; then
149     GCRYPT_CONFIG="${cross_prefix}libgcrypt-config"
150@@ -6817,6 +6852,10 @@ if enabled v4l2_m2m; then
151     check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;"
152 fi
153
154+check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns
155+check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;"
156+disable v4l2_req_hevc_vx
157+
158 check_headers sys/videoio.h
159 test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
160
161@@ -7305,6 +7344,9 @@ check_deps $CONFIG_LIST       \
162
163 enabled threads && ! enabled pthreads && ! enabled atomics_native && die "non pthread threading without atomics not supported, try adding --enable-pthreads or --cpu=i486 or higher if you are on x86"
164
165+# Sub-feature of hevc_v4l2request_hwaccel - can only be set once deps are done
166+enabled hevc_v4l2request_hwaccel && disabled hevc_v4l2_request && enable v4l2_req_hevc_vx
167+
168 case $target_os in
169 haiku)
170     disable memalign
171--- a/fftools/ffmpeg.c
172+++ b/fftools/ffmpeg.c
173@@ -1953,8 +1953,8 @@ static int ifilter_send_frame(InputFilte
174                        av_channel_layout_compare(&ifilter->ch_layout, &frame->ch_layout);
175         break;
176     case AVMEDIA_TYPE_VIDEO:
177-        need_reinit |= ifilter->width  != frame->width ||
178-                       ifilter->height != frame->height;
179+        need_reinit |= ifilter->width  != av_frame_cropped_width(frame) ||
180+                       ifilter->height != av_frame_cropped_height(frame);
181         break;
182     }
183
184@@ -1965,6 +1965,9 @@ static int ifilter_send_frame(InputFilte
185         (ifilter->hw_frames_ctx && ifilter->hw_frames_ctx->data != frame->hw_frames_ctx->data))
186         need_reinit = 1;
187
188+    if (no_cvt_hw && fg->graph)
189+        need_reinit = 0;
190+
191     if (sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX)) {
192         if (!ifilter->displaymatrix || memcmp(sd->data, ifilter->displaymatrix, sizeof(int32_t) * 9))
193             need_reinit = 1;
194@@ -2220,8 +2223,7 @@ static int decode_video(InputStream *ist
195         decoded_frame->top_field_first = ist->top_field_first;
196
197     ist->frames_decoded++;
198-
199-    if (ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) {
200+    if (!no_cvt_hw && ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) {
201         err = ist->hwaccel_retrieve_data(ist->dec_ctx, decoded_frame);
202         if (err < 0)
203             goto fail;
204@@ -2418,7 +2420,12 @@ static int process_input_packet(InputStr
205         case AVMEDIA_TYPE_VIDEO:
206             ret = decode_video    (ist, repeating ? NULL : avpkt, &got_output, &duration_pts, !pkt,
207                                    &decode_failed);
208-            if (!repeating || !pkt || got_output) {
209+            // Pi: Do not inc dts if no_cvt_hw set
210+            // V4L2 H264 decode has long latency and sometimes spits out a long
211+            // stream of output without input. In this case incrementing DTS is wrong.
212+            // There may be cases where the condition as written is correct so only
213+            // "fix" in the cases which cause problems
214+            if (!repeating || !pkt || (got_output && !no_cvt_hw)) {
215                 if (pkt && pkt->duration) {
216                     duration_dts = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
217                 } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) {
218@@ -2564,12 +2571,15 @@ static enum AVPixelFormat get_format(AVC
219             break;
220
221         if (ist->hwaccel_id == HWACCEL_GENERIC ||
222-            ist->hwaccel_id == HWACCEL_AUTO) {
223+            ist->hwaccel_id == HWACCEL_AUTO ||
224+            no_cvt_hw) {
225             for (i = 0;; i++) {
226                 config = avcodec_get_hw_config(s->codec, i);
227                 if (!config)
228                     break;
229-                if (!(config->methods &
230+                if (no_cvt_hw && (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL))
231+                    av_log(s, AV_LOG_DEBUG, "no_cvt_hw so trying pix_fmt %d with codec internal hwaccel\n", *p);
232+                else if (!(config->methods &
233                       AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
234                     continue;
235                 if (config->pix_fmt == *p)
236--- a/fftools/ffmpeg.h
237+++ b/fftools/ffmpeg.h
238@@ -626,6 +626,7 @@ extern enum VideoSyncMethod video_sync_m
239 extern float frame_drop_threshold;
240 extern int do_benchmark;
241 extern int do_benchmark_all;
242+extern int no_cvt_hw;
243 extern int do_deinterlace;
244 extern int do_hex_dump;
245 extern int do_pkt_dump;
246--- a/fftools/ffmpeg_filter.c
247+++ b/fftools/ffmpeg_filter.c
248@@ -1175,8 +1175,8 @@ int ifilter_parameters_from_frame(InputF
249
250     ifilter->format = frame->format;
251
252-    ifilter->width               = frame->width;
253-    ifilter->height              = frame->height;
254+    ifilter->width               = av_frame_cropped_width(frame);
255+    ifilter->height              = av_frame_cropped_height(frame);
256     ifilter->sample_aspect_ratio = frame->sample_aspect_ratio;
257
258     ifilter->sample_rate         = frame->sample_rate;
259--- a/fftools/ffmpeg_hw.c
260+++ b/fftools/ffmpeg_hw.c
261@@ -75,6 +75,8 @@ static char *hw_device_default_name(enum
262     char *name;
263     size_t index_pos;
264     int index, index_limit = 1000;
265+    if (!type_name)
266+        return NULL;
267     index_pos = strlen(type_name);
268     name = av_malloc(index_pos + 4);
269     if (!name)
270--- a/fftools/ffmpeg_opt.c
271+++ b/fftools/ffmpeg_opt.c
272@@ -162,6 +162,7 @@ enum VideoSyncMethod video_sync_method =
273 float frame_drop_threshold = 0;
274 int do_benchmark      = 0;
275 int do_benchmark_all  = 0;
276+int no_cvt_hw         = 0;
277 int do_hex_dump       = 0;
278 int do_pkt_dump       = 0;
279 int copy_ts           = 0;
280@@ -3724,6 +3725,8 @@ const OptionDef options[] = {
281         "add timings for benchmarking" },
282     { "benchmark_all",  OPT_BOOL | OPT_EXPERT,                       { &do_benchmark_all },
283       "add timings for each task" },
284+    { "no_cvt_hw",      OPT_BOOL | OPT_EXPERT,                       { &no_cvt_hw },
285+      "do not auto-convert hw frames to sw" },
286     { "progress",       HAS_ARG | OPT_EXPERT,                        { .func_arg = opt_progress },
287       "write program-readable progress information", "url" },
288     { "stdin",          OPT_BOOL | OPT_EXPERT,                       { &stdin_interaction },
289--- a/libavcodec/Makefile
290+++ b/libavcodec/Makefile
291@@ -161,7 +161,10 @@ OBJS-$(CONFIG_VIDEODSP)                +
292 OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
293 OBJS-$(CONFIG_VP56DSP)                 += vp56dsp.o
294 OBJS-$(CONFIG_VP8DSP)                  += vp8dsp.o
295-OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o
296+OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\
297+                                          weak_link.o v4l2_req_dmabufs.o
298+OBJS-$(CONFIG_V4L2_REQUEST)            += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\
299+					  v4l2_req_devscan.o weak_link.o
300 OBJS-$(CONFIG_WMA_FREQS)               += wma_freqs.o
301 OBJS-$(CONFIG_WMV2DSP)                 += wmv2dsp.o
302
303@@ -972,6 +975,8 @@ OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL)
304 OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
305 OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL)         += nvdec_hevc.o
306 OBJS-$(CONFIG_HEVC_QSV_HWACCEL)           += qsvdec.o
307+OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL)   += v4l2_request_hevc.o v4l2_req_decode_q.o v4l2_req_hevc_v4.o
308+OBJS-$(CONFIG_V4L2_REQ_HEVC_VX)           += v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o
309 OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o h265_profile_level.o
310 OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o h265_profile_level.o
311 OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL)        += nvdec_mjpeg.o
312--- a/libavcodec/avcodec.h
313+++ b/libavcodec/avcodec.h
314@@ -2212,6 +2212,17 @@ typedef struct AVHWAccel {
315      * that avctx->hwaccel_priv_data is invalid.
316      */
317     int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
318+
319+    /**
320+     * Called if parsing fails
321+     *
322+     * An error has occured, end_frame will not be called
323+     * start_frame & decode_slice may or may not have been called
324+     * Optional
325+     *
326+     * @param avctx the codec context
327+     */
328+    void (*abort_frame)(AVCodecContext *avctx);
329 } AVHWAccel;
330
331 /**
332--- /dev/null
333+++ b/libavcodec/hevc-ctrls-v1.h
334@@ -0,0 +1,229 @@
335+/* SPDX-License-Identifier: GPL-2.0 */
336+/*
337+ * These are the HEVC state controls for use with stateless HEVC
338+ * codec drivers.
339+ *
340+ * It turns out that these structs are not stable yet and will undergo
341+ * more changes. So keep them private until they are stable and ready to
342+ * become part of the official public API.
343+ */
344+
345+#ifndef _HEVC_CTRLS_H_
346+#define _HEVC_CTRLS_H_
347+
348+#include <linux/videodev2.h>
349+
350+/* The pixel format isn't stable at the moment and will likely be renamed. */
351+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
352+
353+#define V4L2_CID_MPEG_VIDEO_HEVC_SPS		(V4L2_CID_MPEG_BASE + 1008)
354+#define V4L2_CID_MPEG_VIDEO_HEVC_PPS		(V4L2_CID_MPEG_BASE + 1009)
355+#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS	(V4L2_CID_MPEG_BASE + 1010)
356+#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX	(V4L2_CID_MPEG_BASE + 1011)
357+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE	(V4L2_CID_MPEG_BASE + 1015)
358+#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE	(V4L2_CID_MPEG_BASE + 1016)
359+
360+/* enum v4l2_ctrl_type type values */
361+#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
362+#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
363+#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
364+#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
365+
366+enum v4l2_mpeg_video_hevc_decode_mode {
367+	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
368+	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
369+};
370+
371+enum v4l2_mpeg_video_hevc_start_code {
372+	V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
373+	V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
374+};
375+
376+#define V4L2_HEVC_SLICE_TYPE_B	0
377+#define V4L2_HEVC_SLICE_TYPE_P	1
378+#define V4L2_HEVC_SLICE_TYPE_I	2
379+
380+#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE		(1ULL << 0)
381+#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED			(1ULL << 1)
382+#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED				(1ULL << 2)
383+#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET		(1ULL << 3)
384+#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED				(1ULL << 4)
385+#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED		(1ULL << 5)
386+#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT		(1ULL << 6)
387+#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED		(1ULL << 7)
388+#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED	(1ULL << 8)
389+
390+/* The controls are not stable at the moment and will likely be reworked. */
391+struct v4l2_ctrl_hevc_sps {
392+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
393+	__u16	pic_width_in_luma_samples;
394+	__u16	pic_height_in_luma_samples;
395+	__u8	bit_depth_luma_minus8;
396+	__u8	bit_depth_chroma_minus8;
397+	__u8	log2_max_pic_order_cnt_lsb_minus4;
398+	__u8	sps_max_dec_pic_buffering_minus1;
399+	__u8	sps_max_num_reorder_pics;
400+	__u8	sps_max_latency_increase_plus1;
401+	__u8	log2_min_luma_coding_block_size_minus3;
402+	__u8	log2_diff_max_min_luma_coding_block_size;
403+	__u8	log2_min_luma_transform_block_size_minus2;
404+	__u8	log2_diff_max_min_luma_transform_block_size;
405+	__u8	max_transform_hierarchy_depth_inter;
406+	__u8	max_transform_hierarchy_depth_intra;
407+	__u8	pcm_sample_bit_depth_luma_minus1;
408+	__u8	pcm_sample_bit_depth_chroma_minus1;
409+	__u8	log2_min_pcm_luma_coding_block_size_minus3;
410+	__u8	log2_diff_max_min_pcm_luma_coding_block_size;
411+	__u8	num_short_term_ref_pic_sets;
412+	__u8	num_long_term_ref_pics_sps;
413+	__u8	chroma_format_idc;
414+	__u8	sps_max_sub_layers_minus1;
415+
416+	__u64	flags;
417+};
418+
419+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT		(1ULL << 0)
420+#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
421+#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
422+#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
423+#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED		(1ULL << 4)
424+#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED		(1ULL << 5)
425+#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED			(1ULL << 6)
426+#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT	(1ULL << 7)
427+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED			(1ULL << 8)
428+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED			(1ULL << 9)
429+#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED		(1ULL << 10)
430+#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED			(1ULL << 11)
431+#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED		(1ULL << 12)
432+#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED	(1ULL << 13)
433+#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
434+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED	(1ULL << 15)
435+#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
436+#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
437+#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
438+
439+struct v4l2_ctrl_hevc_pps {
440+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
441+	__u8	num_extra_slice_header_bits;
442+	__s8	init_qp_minus26;
443+	__u8	diff_cu_qp_delta_depth;
444+	__s8	pps_cb_qp_offset;
445+	__s8	pps_cr_qp_offset;
446+	__u8	num_tile_columns_minus1;
447+	__u8	num_tile_rows_minus1;
448+	__u8	column_width_minus1[20];
449+	__u8	row_height_minus1[22];
450+	__s8	pps_beta_offset_div2;
451+	__s8	pps_tc_offset_div2;
452+	__u8	log2_parallel_merge_level_minus2;
453+
454+	__u8	padding[4];
455+	__u64	flags;
456+};
457+
458+#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE	0x01
459+#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER	0x02
460+#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR		0x03
461+
462+#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX		16
463+
464+struct v4l2_hevc_dpb_entry {
465+	__u64	timestamp;
466+	__u8	rps;
467+	__u8	field_pic;
468+	__u16	pic_order_cnt[2];
469+	__u8	padding[2];
470+};
471+
472+struct v4l2_hevc_pred_weight_table {
473+	__s8	delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
474+	__s8	luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
475+	__s8	delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
476+	__s8	chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
477+
478+	__s8	delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
479+	__s8	luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
480+	__s8	delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
481+	__s8	chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
482+
483+	__u8	padding[6];
484+
485+	__u8	luma_log2_weight_denom;
486+	__s8	delta_chroma_log2_weight_denom;
487+};
488+
489+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA		(1ULL << 0)
490+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA		(1ULL << 1)
491+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED	(1ULL << 2)
492+#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO			(1ULL << 3)
493+#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT			(1ULL << 4)
494+#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0		(1ULL << 5)
495+#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
496+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
497+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
498+#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT		(1ULL << 9)
499+
500+struct v4l2_ctrl_hevc_slice_params {
501+	__u32	bit_size;
502+	__u32	data_bit_offset;
503+
504+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
505+	__u32	slice_segment_addr;
506+	__u32	num_entry_point_offsets;
507+
508+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
509+	__u8	nal_unit_type;
510+	__u8	nuh_temporal_id_plus1;
511+
512+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
513+	__u8	slice_type;
514+	__u8	colour_plane_id;
515+	__u16	slice_pic_order_cnt;
516+	__u8	num_ref_idx_l0_active_minus1;
517+	__u8	num_ref_idx_l1_active_minus1;
518+	__u8	collocated_ref_idx;
519+	__u8	five_minus_max_num_merge_cand;
520+	__s8	slice_qp_delta;
521+	__s8	slice_cb_qp_offset;
522+	__s8	slice_cr_qp_offset;
523+	__s8	slice_act_y_qp_offset;
524+	__s8	slice_act_cb_qp_offset;
525+	__s8	slice_act_cr_qp_offset;
526+	__s8	slice_beta_offset_div2;
527+	__s8	slice_tc_offset_div2;
528+
529+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
530+	__u8	pic_struct;
531+
532+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
533+	__u8	num_active_dpb_entries;
534+	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
535+	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
536+
537+	__u8	num_rps_poc_st_curr_before;
538+	__u8	num_rps_poc_st_curr_after;
539+	__u8	num_rps_poc_lt_curr;
540+
541+	__u8	padding;
542+
543+	__u32	entry_point_offset_minus1[256];
544+
545+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
546+	struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
547+
548+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
549+	struct v4l2_hevc_pred_weight_table pred_weight_table;
550+
551+	__u64	flags;
552+};
553+
554+struct v4l2_ctrl_hevc_scaling_matrix {
555+	__u8	scaling_list_4x4[6][16];
556+	__u8	scaling_list_8x8[6][64];
557+	__u8	scaling_list_16x16[6][64];
558+	__u8	scaling_list_32x32[2][64];
559+	__u8	scaling_list_dc_coef_16x16[6];
560+	__u8	scaling_list_dc_coef_32x32[2];
561+};
562+
563+#endif
564--- /dev/null
565+++ b/libavcodec/hevc-ctrls-v2.h
566@@ -0,0 +1,257 @@
567+/* SPDX-License-Identifier: GPL-2.0 */
568+/*
569+ * These are the HEVC state controls for use with stateless HEVC
570+ * codec drivers.
571+ *
572+ * It turns out that these structs are not stable yet and will undergo
573+ * more changes. So keep them private until they are stable and ready to
574+ * become part of the official public API.
575+ */
576+
577+#ifndef _HEVC_CTRLS_H_
578+#define _HEVC_CTRLS_H_
579+
580+#include <linux/videodev2.h>
581+
582+/* The pixel format isn't stable at the moment and will likely be renamed. */
583+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
584+
585+#define V4L2_CID_MPEG_VIDEO_HEVC_SPS		(V4L2_CID_CODEC_BASE + 1008)
586+#define V4L2_CID_MPEG_VIDEO_HEVC_PPS		(V4L2_CID_CODEC_BASE + 1009)
587+#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_BASE + 1010)
588+#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX	(V4L2_CID_CODEC_BASE + 1011)
589+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS	(V4L2_CID_CODEC_BASE + 1012)
590+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE	(V4L2_CID_CODEC_BASE + 1015)
591+#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE	(V4L2_CID_CODEC_BASE + 1016)
592+
593+/* enum v4l2_ctrl_type type values */
594+#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
595+#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
596+#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
597+#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
598+#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124
599+
600+enum v4l2_mpeg_video_hevc_decode_mode {
601+	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
602+	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
603+};
604+
605+enum v4l2_mpeg_video_hevc_start_code {
606+	V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
607+	V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
608+};
609+
610+#define V4L2_HEVC_SLICE_TYPE_B	0
611+#define V4L2_HEVC_SLICE_TYPE_P	1
612+#define V4L2_HEVC_SLICE_TYPE_I	2
613+
614+#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE		(1ULL << 0)
615+#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED			(1ULL << 1)
616+#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED				(1ULL << 2)
617+#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET		(1ULL << 3)
618+#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED				(1ULL << 4)
619+#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED		(1ULL << 5)
620+#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT		(1ULL << 6)
621+#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED		(1ULL << 7)
622+#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED	(1ULL << 8)
623+
624+/* The controls are not stable at the moment and will likely be reworked. */
625+struct v4l2_ctrl_hevc_sps {
626+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
627+	__u16	pic_width_in_luma_samples;
628+	__u16	pic_height_in_luma_samples;
629+	__u8	bit_depth_luma_minus8;
630+	__u8	bit_depth_chroma_minus8;
631+	__u8	log2_max_pic_order_cnt_lsb_minus4;
632+	__u8	sps_max_dec_pic_buffering_minus1;
633+	__u8	sps_max_num_reorder_pics;
634+	__u8	sps_max_latency_increase_plus1;
635+	__u8	log2_min_luma_coding_block_size_minus3;
636+	__u8	log2_diff_max_min_luma_coding_block_size;
637+	__u8	log2_min_luma_transform_block_size_minus2;
638+	__u8	log2_diff_max_min_luma_transform_block_size;
639+	__u8	max_transform_hierarchy_depth_inter;
640+	__u8	max_transform_hierarchy_depth_intra;
641+	__u8	pcm_sample_bit_depth_luma_minus1;
642+	__u8	pcm_sample_bit_depth_chroma_minus1;
643+	__u8	log2_min_pcm_luma_coding_block_size_minus3;
644+	__u8	log2_diff_max_min_pcm_luma_coding_block_size;
645+	__u8	num_short_term_ref_pic_sets;
646+	__u8	num_long_term_ref_pics_sps;
647+	__u8	chroma_format_idc;
648+	__u8	sps_max_sub_layers_minus1;
649+
650+	__u64	flags;
651+};
652+
653+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED	(1ULL << 0)
654+#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
655+#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
656+#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
657+#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED		(1ULL << 4)
658+#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED		(1ULL << 5)
659+#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED			(1ULL << 6)
660+#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT	(1ULL << 7)
661+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED			(1ULL << 8)
662+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED			(1ULL << 9)
663+#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED		(1ULL << 10)
664+#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED			(1ULL << 11)
665+#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED		(1ULL << 12)
666+#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED	(1ULL << 13)
667+#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
668+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED	(1ULL << 15)
669+#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
670+#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
671+#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
672+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT	(1ULL << 19)
673+#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING			(1ULL << 20)
674+
675+struct v4l2_ctrl_hevc_pps {
676+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
677+	__u8	num_extra_slice_header_bits;
678+	__u8	num_ref_idx_l0_default_active_minus1;
679+	__u8	num_ref_idx_l1_default_active_minus1;
680+	__s8	init_qp_minus26;
681+	__u8	diff_cu_qp_delta_depth;
682+	__s8	pps_cb_qp_offset;
683+	__s8	pps_cr_qp_offset;
684+	__u8	num_tile_columns_minus1;
685+	__u8	num_tile_rows_minus1;
686+	__u8	column_width_minus1[20];
687+	__u8	row_height_minus1[22];
688+	__s8	pps_beta_offset_div2;
689+	__s8	pps_tc_offset_div2;
690+	__u8	log2_parallel_merge_level_minus2;
691+
692+	__u8	padding[4];
693+	__u64	flags;
694+};
695+
696+#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE	0x01
697+#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER	0x02
698+#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR		0x03
699+
700+#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX		16
701+
702+struct v4l2_hevc_dpb_entry {
703+	__u64	timestamp;
704+	__u8	rps;
705+	__u8	field_pic;
706+	__u16	pic_order_cnt[2];
707+	__u8	padding[2];
708+};
709+
710+struct v4l2_hevc_pred_weight_table {
711+	__s8	delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
712+	__s8	luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
713+	__s8	delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
714+	__s8	chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
715+
716+	__s8	delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
717+	__s8	luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
718+	__s8	delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
719+	__s8	chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
720+
721+	__u8	padding[6];
722+
723+	__u8	luma_log2_weight_denom;
724+	__s8	delta_chroma_log2_weight_denom;
725+};
726+
727+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA		(1ULL << 0)
728+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA		(1ULL << 1)
729+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED	(1ULL << 2)
730+#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO			(1ULL << 3)
731+#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT			(1ULL << 4)
732+#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0		(1ULL << 5)
733+#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
734+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
735+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
736+#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT	(1ULL << 9)
737+
738+struct v4l2_ctrl_hevc_slice_params {
739+	__u32	bit_size;
740+	__u32	data_bit_offset;
741+
742+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
743+	__u32	slice_segment_addr;
744+	__u32	num_entry_point_offsets;
745+
746+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
747+	__u8	nal_unit_type;
748+	__u8	nuh_temporal_id_plus1;
749+
750+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
751+	__u8	slice_type;
752+	__u8	colour_plane_id;
753+	__u16	slice_pic_order_cnt;
754+	__u8	num_ref_idx_l0_active_minus1;
755+	__u8	num_ref_idx_l1_active_minus1;
756+	__u8	collocated_ref_idx;
757+	__u8	five_minus_max_num_merge_cand;
758+	__s8	slice_qp_delta;
759+	__s8	slice_cb_qp_offset;
760+	__s8	slice_cr_qp_offset;
761+	__s8	slice_act_y_qp_offset;
762+	__s8	slice_act_cb_qp_offset;
763+	__s8	slice_act_cr_qp_offset;
764+	__s8	slice_beta_offset_div2;
765+	__s8	slice_tc_offset_div2;
766+
767+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
768+	__u8	pic_struct;
769+
770+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
771+	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
772+	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
773+
774+	__u8	padding[5];
775+
776+	__u32	entry_point_offset_minus1[256];
777+
778+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
779+	struct v4l2_hevc_pred_weight_table pred_weight_table;
780+
781+	__u64	flags;
782+};
783+
784+#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC		0x1
785+#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC		0x2
786+#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR  0x4
787+
788+struct v4l2_ctrl_hevc_decode_params {
789+	__s32	pic_order_cnt_val;
790+	__u8	num_active_dpb_entries;
791+	struct	v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
792+	__u8	num_poc_st_curr_before;
793+	__u8	num_poc_st_curr_after;
794+	__u8	num_poc_lt_curr;
795+	__u8	poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
796+	__u8	poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
797+	__u8	poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
798+	__u64	flags;
799+};
800+
801+/*  MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */
802+#define V4L2_CID_CODEC_HANTRO_BASE				(V4L2_CTRL_CLASS_CODEC | 0x1200)
803+/*
804+ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP -
805+ * the number of data (in bits) to skip in the
806+ * slice segment header.
807+ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag"
808+ * to before syntax element "slice_temporal_mvp_enabled_flag".
809+ * If IDR, the skipped bits are just "pic_output_flag"
810+ * (separate_colour_plane_flag is not supported).
811+ */
812+#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP	(V4L2_CID_CODEC_HANTRO_BASE + 0)
813+
814+struct v4l2_ctrl_hevc_scaling_matrix {
815+	__u8	scaling_list_4x4[6][16];
816+	__u8	scaling_list_8x8[6][64];
817+	__u8	scaling_list_16x16[6][64];
818+	__u8	scaling_list_32x32[2][64];
819+	__u8	scaling_list_dc_coef_16x16[6];
820+	__u8	scaling_list_dc_coef_32x32[2];
821+};
822+
823+#endif
824--- /dev/null
825+++ b/libavcodec/hevc-ctrls-v3.h
826@@ -0,0 +1,255 @@
827+/* SPDX-License-Identifier: GPL-2.0 */
828+/*
829+ * These are the HEVC state controls for use with stateless HEVC
830+ * codec drivers.
831+ *
832+ * It turns out that these structs are not stable yet and will undergo
833+ * more changes. So keep them private until they are stable and ready to
834+ * become part of the official public API.
835+ */
836+
837+#ifndef _HEVC_CTRLS_H_
838+#define _HEVC_CTRLS_H_
839+
840+#include <linux/videodev2.h>
841+
842+/* The pixel format isn't stable at the moment and will likely be renamed. */
843+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
844+
845+#define V4L2_CID_MPEG_VIDEO_HEVC_SPS		(V4L2_CID_CODEC_BASE + 1008)
846+#define V4L2_CID_MPEG_VIDEO_HEVC_PPS		(V4L2_CID_CODEC_BASE + 1009)
847+#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_BASE + 1010)
848+#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX	(V4L2_CID_CODEC_BASE + 1011)
849+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS	(V4L2_CID_CODEC_BASE + 1012)
850+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE	(V4L2_CID_CODEC_BASE + 1015)
851+#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE	(V4L2_CID_CODEC_BASE + 1016)
852+
853+/* enum v4l2_ctrl_type type values */
854+#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
855+#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
856+#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
857+#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
858+#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124
859+
860+enum v4l2_mpeg_video_hevc_decode_mode {
861+	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
862+	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
863+};
864+
865+enum v4l2_mpeg_video_hevc_start_code {
866+	V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
867+	V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
868+};
869+
870+#define V4L2_HEVC_SLICE_TYPE_B	0
871+#define V4L2_HEVC_SLICE_TYPE_P	1
872+#define V4L2_HEVC_SLICE_TYPE_I	2
873+
874+#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE		(1ULL << 0)
875+#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED			(1ULL << 1)
876+#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED				(1ULL << 2)
877+#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET		(1ULL << 3)
878+#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED				(1ULL << 4)
879+#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED		(1ULL << 5)
880+#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT		(1ULL << 6)
881+#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED		(1ULL << 7)
882+#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED	(1ULL << 8)
883+
884+/* The controls are not stable at the moment and will likely be reworked. */
885+struct v4l2_ctrl_hevc_sps {
886+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
887+	__u16	pic_width_in_luma_samples;
888+	__u16	pic_height_in_luma_samples;
889+	__u8	bit_depth_luma_minus8;
890+	__u8	bit_depth_chroma_minus8;
891+	__u8	log2_max_pic_order_cnt_lsb_minus4;
892+	__u8	sps_max_dec_pic_buffering_minus1;
893+	__u8	sps_max_num_reorder_pics;
894+	__u8	sps_max_latency_increase_plus1;
895+	__u8	log2_min_luma_coding_block_size_minus3;
896+	__u8	log2_diff_max_min_luma_coding_block_size;
897+	__u8	log2_min_luma_transform_block_size_minus2;
898+	__u8	log2_diff_max_min_luma_transform_block_size;
899+	__u8	max_transform_hierarchy_depth_inter;
900+	__u8	max_transform_hierarchy_depth_intra;
901+	__u8	pcm_sample_bit_depth_luma_minus1;
902+	__u8	pcm_sample_bit_depth_chroma_minus1;
903+	__u8	log2_min_pcm_luma_coding_block_size_minus3;
904+	__u8	log2_diff_max_min_pcm_luma_coding_block_size;
905+	__u8	num_short_term_ref_pic_sets;
906+	__u8	num_long_term_ref_pics_sps;
907+	__u8	chroma_format_idc;
908+	__u8	sps_max_sub_layers_minus1;
909+
910+	__u64	flags;
911+};
912+
913+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED	(1ULL << 0)
914+#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
915+#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
916+#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
917+#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED		(1ULL << 4)
918+#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED		(1ULL << 5)
919+#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED			(1ULL << 6)
920+#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT	(1ULL << 7)
921+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED			(1ULL << 8)
922+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED			(1ULL << 9)
923+#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED		(1ULL << 10)
924+#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED			(1ULL << 11)
925+#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED		(1ULL << 12)
926+#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED	(1ULL << 13)
927+#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
928+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED	(1ULL << 15)
929+#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
930+#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
931+#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
932+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT	(1ULL << 19)
933+#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING			(1ULL << 20)
934+
935+struct v4l2_ctrl_hevc_pps {
936+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
937+	__u8	num_extra_slice_header_bits;
938+	__u8	num_ref_idx_l0_default_active_minus1;
939+	__u8	num_ref_idx_l1_default_active_minus1;
940+	__s8	init_qp_minus26;
941+	__u8	diff_cu_qp_delta_depth;
942+	__s8	pps_cb_qp_offset;
943+	__s8	pps_cr_qp_offset;
944+	__u8	num_tile_columns_minus1;
945+	__u8	num_tile_rows_minus1;
946+	__u8	column_width_minus1[20];
947+	__u8	row_height_minus1[22];
948+	__s8	pps_beta_offset_div2;
949+	__s8	pps_tc_offset_div2;
950+	__u8	log2_parallel_merge_level_minus2;
951+
952+	__u8	padding[4];
953+	__u64	flags;
954+};
955+
956+#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE	0x01
957+
958+#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX		16
959+
960+struct v4l2_hevc_dpb_entry {
961+	__u64	timestamp;
962+	__u8	flags;
963+	__u8	field_pic;
964+	__u16	pic_order_cnt[2];
965+	__u8	padding[2];
966+};
967+
968+struct v4l2_hevc_pred_weight_table {
969+	__s8	delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
970+	__s8	luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
971+	__s8	delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
972+	__s8	chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
973+
974+	__s8	delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
975+	__s8	luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
976+	__s8	delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
977+	__s8	chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
978+
979+	__u8	padding[6];
980+
981+	__u8	luma_log2_weight_denom;
982+	__s8	delta_chroma_log2_weight_denom;
983+};
984+
985+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA		(1ULL << 0)
986+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA		(1ULL << 1)
987+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED	(1ULL << 2)
988+#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO			(1ULL << 3)
989+#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT			(1ULL << 4)
990+#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0		(1ULL << 5)
991+#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
992+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
993+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
994+#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT	(1ULL << 9)
995+
996+struct v4l2_ctrl_hevc_slice_params {
997+	__u32	bit_size;
998+	__u32	data_bit_offset;
999+
1000+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
1001+	__u32	slice_segment_addr;
1002+	__u32	num_entry_point_offsets;
1003+
1004+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
1005+	__u8	nal_unit_type;
1006+	__u8	nuh_temporal_id_plus1;
1007+
1008+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
1009+	__u8	slice_type;
1010+	__u8	colour_plane_id;
1011+	__u16	slice_pic_order_cnt;
1012+	__u8	num_ref_idx_l0_active_minus1;
1013+	__u8	num_ref_idx_l1_active_minus1;
1014+	__u8	collocated_ref_idx;
1015+	__u8	five_minus_max_num_merge_cand;
1016+	__s8	slice_qp_delta;
1017+	__s8	slice_cb_qp_offset;
1018+	__s8	slice_cr_qp_offset;
1019+	__s8	slice_act_y_qp_offset;
1020+	__s8	slice_act_cb_qp_offset;
1021+	__s8	slice_act_cr_qp_offset;
1022+	__s8	slice_beta_offset_div2;
1023+	__s8	slice_tc_offset_div2;
1024+
1025+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
1026+	__u8	pic_struct;
1027+
1028+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
1029+	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1030+	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1031+
1032+	__u8	padding[5];
1033+
1034+	__u32	entry_point_offset_minus1[256];
1035+
1036+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
1037+	struct v4l2_hevc_pred_weight_table pred_weight_table;
1038+
1039+	__u64	flags;
1040+};
1041+
1042+#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC		0x1
1043+#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC		0x2
1044+#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR  0x4
1045+
1046+struct v4l2_ctrl_hevc_decode_params {
1047+	__s32	pic_order_cnt_val;
1048+	__u8	num_active_dpb_entries;
1049+	struct	v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1050+	__u8	num_poc_st_curr_before;
1051+	__u8	num_poc_st_curr_after;
1052+	__u8	num_poc_lt_curr;
1053+	__u8	poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1054+	__u8	poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1055+	__u8	poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1056+	__u64	flags;
1057+};
1058+
1059+struct v4l2_ctrl_hevc_scaling_matrix {
1060+	__u8	scaling_list_4x4[6][16];
1061+	__u8	scaling_list_8x8[6][64];
1062+	__u8	scaling_list_16x16[6][64];
1063+	__u8	scaling_list_32x32[2][64];
1064+	__u8	scaling_list_dc_coef_16x16[6];
1065+	__u8	scaling_list_dc_coef_32x32[2];
1066+};
1067+
1068+/*  MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */
1069+#define V4L2_CID_CODEC_HANTRO_BASE				(V4L2_CTRL_CLASS_CODEC | 0x1200)
1070+/*
1071+ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP -
1072+ * the number of data (in bits) to skip in the
1073+ * slice segment header.
1074+ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag"
1075+ * to before syntax element "slice_temporal_mvp_enabled_flag".
1076+ * If IDR, the skipped bits are just "pic_output_flag"
1077+ * (separate_colour_plane_flag is not supported).
1078+ */
1079+#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP	(V4L2_CID_CODEC_HANTRO_BASE + 0)
1080+
1081+#endif
1082--- /dev/null
1083+++ b/libavcodec/hevc-ctrls-v4.h
1084@@ -0,0 +1,524 @@
1085+/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */
1086+/*
1087+ *  Video for Linux Two controls header file
1088+ *
1089+ *  Copyright (C) 1999-2012 the contributors
1090+ *
1091+ *  This program is free software; you can redistribute it and/or modify
1092+ *  it under the terms of the GNU General Public License as published by
1093+ *  the Free Software Foundation; either version 2 of the License, or
1094+ *  (at your option) any later version.
1095+ *
1096+ *  This program is distributed in the hope that it will be useful,
1097+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
1098+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1099+ *  GNU General Public License for more details.
1100+ *
1101+ *  Alternatively you can redistribute this file under the terms of the
1102+ *  BSD license as stated below:
1103+ *
1104+ *  Redistribution and use in source and binary forms, with or without
1105+ *  modification, are permitted provided that the following conditions
1106+ *  are met:
1107+ *  1. Redistributions of source code must retain the above copyright
1108+ *     notice, this list of conditions and the following disclaimer.
1109+ *  2. Redistributions in binary form must reproduce the above copyright
1110+ *     notice, this list of conditions and the following disclaimer in
1111+ *     the documentation and/or other materials provided with the
1112+ *     distribution.
1113+ *  3. The names of its contributors may not be used to endorse or promote
1114+ *     products derived from this software without specific prior written
1115+ *     permission.
1116+ *
1117+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1118+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1119+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1120+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1121+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1122+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
1123+ *  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
1124+ *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
1125+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
1126+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
1127+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1128+ *
1129+ *  The contents of this header was split off from videodev2.h. All control
1130+ *  definitions should be added to this header, which is included by
1131+ *  videodev2.h.
1132+ */
1133+
1134+#ifndef AVCODEC_HEVC_CTRLS_V4_H
1135+#define AVCODEC_HEVC_CTRLS_V4_H
1136+
1137+#include <linux/const.h>
1138+#include <linux/types.h>
1139+
1140+#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS
1141+#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000	/* Stateless codecs controls */
1142+#endif
1143+#ifndef V4L2_CID_CODEC_STATELESS_BASE
1144+#define V4L2_CID_CODEC_STATELESS_BASE		(V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900)
1145+#endif
1146+
1147+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
1148+
1149+#define V4L2_CID_STATELESS_HEVC_SPS		(V4L2_CID_CODEC_STATELESS_BASE + 400)
1150+#define V4L2_CID_STATELESS_HEVC_PPS		(V4L2_CID_CODEC_STATELESS_BASE + 401)
1151+#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_STATELESS_BASE + 402)
1152+#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX	(V4L2_CID_CODEC_STATELESS_BASE + 403)
1153+#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS	(V4L2_CID_CODEC_STATELESS_BASE + 404)
1154+#define V4L2_CID_STATELESS_HEVC_DECODE_MODE	(V4L2_CID_CODEC_STATELESS_BASE + 405)
1155+#define V4L2_CID_STATELESS_HEVC_START_CODE	(V4L2_CID_CODEC_STATELESS_BASE + 406)
1156+#define V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS (V4L2_CID_CODEC_STATELESS_BASE + 407)
1157+
1158+enum v4l2_stateless_hevc_decode_mode {
1159+	V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED,
1160+	V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
1161+};
1162+
1163+enum v4l2_stateless_hevc_start_code {
1164+	V4L2_STATELESS_HEVC_START_CODE_NONE,
1165+	V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
1166+};
1167+
1168+#define V4L2_HEVC_SLICE_TYPE_B	0
1169+#define V4L2_HEVC_SLICE_TYPE_P	1
1170+#define V4L2_HEVC_SLICE_TYPE_I	2
1171+
1172+#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE		(1ULL << 0)
1173+#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED			(1ULL << 1)
1174+#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED				(1ULL << 2)
1175+#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET		(1ULL << 3)
1176+#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED				(1ULL << 4)
1177+#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED		(1ULL << 5)
1178+#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT		(1ULL << 6)
1179+#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED		(1ULL << 7)
1180+#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED	(1ULL << 8)
1181+
1182+/**
1183+ * struct v4l2_ctrl_hevc_sps - ITU-T Rec. H.265: Sequence parameter set
1184+ *
1185+ * @video_parameter_set_id: specifies the value of the
1186+ *			vps_video_parameter_set_id of the active VPS
1187+ * @seq_parameter_set_id: provides an identifier for the SPS for
1188+ *			  reference by other syntax elements
1189+ * @pic_width_in_luma_samples:	specifies the width of each decoded picture
1190+ *				in units of luma samples
1191+ * @pic_height_in_luma_samples: specifies the height of each decoded picture
1192+ *				in units of luma samples
1193+ * @bit_depth_luma_minus8: this value plus 8specifies the bit depth of the
1194+ *                         samples of the luma array
1195+ * @bit_depth_chroma_minus8: this value plus 8 specifies the bit depth of the
1196+ *                           samples of the chroma arrays
1197+ * @log2_max_pic_order_cnt_lsb_minus4: this value plus 4 specifies the value of
1198+ *                                     the variable MaxPicOrderCntLsb
1199+ * @sps_max_dec_pic_buffering_minus1: this value plus 1 specifies the maximum
1200+ *                                    required size of the decoded picture
1201+ *                                    buffer for the codec video sequence
1202+ * @sps_max_num_reorder_pics: indicates the maximum allowed number of pictures
1203+ * @sps_max_latency_increase_plus1: not equal to 0 is used to compute the
1204+ *				    value of SpsMaxLatencyPictures array
1205+ * @log2_min_luma_coding_block_size_minus3: plus 3 specifies the minimum
1206+ *					    luma coding block size
1207+ * @log2_diff_max_min_luma_coding_block_size: specifies the difference between
1208+ *					      the maximum and minimum luma
1209+ *					      coding block size
1210+ * @log2_min_luma_transform_block_size_minus2: plus 2 specifies the minimum luma
1211+ *					       transform block size
1212+ * @log2_diff_max_min_luma_transform_block_size: specifies the difference between
1213+ *						 the maximum and minimum luma
1214+ *						 transform block size
1215+ * @max_transform_hierarchy_depth_inter: specifies the maximum hierarchy
1216+ *					 depth for transform units of
1217+ *					 coding units coded in inter
1218+ *					 prediction mode
1219+ * @max_transform_hierarchy_depth_intra: specifies the maximum hierarchy
1220+ *					 depth for transform units of
1221+ *					 coding units coded in intra
1222+ *					 prediction mode
1223+ * @pcm_sample_bit_depth_luma_minus1: this value plus 1 specifies the number of
1224+ *                                    bits used to represent each of PCM sample
1225+ *                                    values of the luma component
1226+ * @pcm_sample_bit_depth_chroma_minus1: this value plus 1 specifies the number
1227+ *                                      of bits used to represent each of PCM
1228+ *                                      sample values of the chroma components
1229+ * @log2_min_pcm_luma_coding_block_size_minus3: this value plus 3 specifies the
1230+ *                                              minimum size of coding blocks
1231+ * @log2_diff_max_min_pcm_luma_coding_block_size: specifies the difference between
1232+ *						  the maximum and minimum size of
1233+ *						  coding blocks
1234+ * @num_short_term_ref_pic_sets: specifies the number of st_ref_pic_set()
1235+ *				 syntax structures included in the SPS
1236+ * @num_long_term_ref_pics_sps: specifies the number of candidate long-term
1237+ *				reference pictures that are specified in the SPS
1238+ * @chroma_format_idc: specifies the chroma sampling
1239+ * @sps_max_sub_layers_minus1: this value plus 1 specifies the maximum number
1240+ *                             of temporal sub-layers
1241+ * @reserved: padding field. Should be zeroed by applications.
1242+ * @flags: see V4L2_HEVC_SPS_FLAG_{}
1243+ */
1244+struct v4l2_ctrl_hevc_sps {
1245+	__u8	video_parameter_set_id;
1246+	__u8	seq_parameter_set_id;
1247+	__u16	pic_width_in_luma_samples;
1248+	__u16	pic_height_in_luma_samples;
1249+	__u8	bit_depth_luma_minus8;
1250+	__u8	bit_depth_chroma_minus8;
1251+	__u8	log2_max_pic_order_cnt_lsb_minus4;
1252+	__u8	sps_max_dec_pic_buffering_minus1;
1253+	__u8	sps_max_num_reorder_pics;
1254+	__u8	sps_max_latency_increase_plus1;
1255+	__u8	log2_min_luma_coding_block_size_minus3;
1256+	__u8	log2_diff_max_min_luma_coding_block_size;
1257+	__u8	log2_min_luma_transform_block_size_minus2;
1258+	__u8	log2_diff_max_min_luma_transform_block_size;
1259+	__u8	max_transform_hierarchy_depth_inter;
1260+	__u8	max_transform_hierarchy_depth_intra;
1261+	__u8	pcm_sample_bit_depth_luma_minus1;
1262+	__u8	pcm_sample_bit_depth_chroma_minus1;
1263+	__u8	log2_min_pcm_luma_coding_block_size_minus3;
1264+	__u8	log2_diff_max_min_pcm_luma_coding_block_size;
1265+	__u8	num_short_term_ref_pic_sets;
1266+	__u8	num_long_term_ref_pics_sps;
1267+	__u8	chroma_format_idc;
1268+	__u8	sps_max_sub_layers_minus1;
1269+
1270+	__u8	reserved[6];
1271+	__u64	flags;
1272+};
1273+
1274+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED	(1ULL << 0)
1275+#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
1276+#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
1277+#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
1278+#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED		(1ULL << 4)
1279+#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED		(1ULL << 5)
1280+#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED			(1ULL << 6)
1281+#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT	(1ULL << 7)
1282+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED			(1ULL << 8)
1283+#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED			(1ULL << 9)
1284+#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED		(1ULL << 10)
1285+#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED			(1ULL << 11)
1286+#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED		(1ULL << 12)
1287+#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED	(1ULL << 13)
1288+#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
1289+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED	(1ULL << 15)
1290+#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
1291+#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
1292+#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
1293+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT	(1ULL << 19)
1294+#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING			(1ULL << 20)
1295+
1296+/**
1297+ * struct v4l2_ctrl_hevc_pps - ITU-T Rec. H.265: Picture parameter set
1298+ *
1299+ * @pic_parameter_set_id: identifies the PPS for reference by other
1300+ *			  syntax elements
1301+ * @num_extra_slice_header_bits: specifies the number of extra slice header
1302+ *				 bits that are present in the slice header RBSP
1303+ *				 for coded pictures referring to the PPS.
1304+ * @num_ref_idx_l0_default_active_minus1: this value plus 1 specifies the
1305+ *                                        inferred value of num_ref_idx_l0_active_minus1
1306+ * @num_ref_idx_l1_default_active_minus1: this value plus 1 specifies the
1307+ *                                        inferred value of num_ref_idx_l1_active_minus1
1308+ * @init_qp_minus26: this value plus 26 specifies the initial value of SliceQp Y for
1309+ *		     each slice referring to the PPS
1310+ * @diff_cu_qp_delta_depth: specifies the difference between the luma coding
1311+ *			    tree block size and the minimum luma coding block
1312+ *			    size of coding units that convey cu_qp_delta_abs
1313+ *			    and cu_qp_delta_sign_flag
1314+ * @pps_cb_qp_offset: specify the offsets to the luma quantization parameter Cb
1315+ * @pps_cr_qp_offset: specify the offsets to the luma quantization parameter Cr
1316+ * @num_tile_columns_minus1: this value plus 1 specifies the number of tile columns
1317+ *			     partitioning the picture
1318+ * @num_tile_rows_minus1: this value plus 1 specifies the number of tile rows partitioning
1319+ *			  the picture
1320+ * @column_width_minus1: this value plus 1 specifies the width of the each tile column in
1321+ *			 units of coding tree blocks
1322+ * @row_height_minus1: this value plus 1 specifies the height of the each tile row in
1323+ *		       units of coding tree blocks
1324+ * @pps_beta_offset_div2: specify the default deblocking parameter offsets for
1325+ *			  beta divided by 2
1326+ * @pps_tc_offset_div2: specify the default deblocking parameter offsets for tC
1327+ *			divided by 2
1328+ * @log2_parallel_merge_level_minus2: this value plus 2 specifies the value of
1329+ *                                    the variable Log2ParMrgLevel
1330+ * @reserved: padding field. Should be zeroed by applications.
1331+ * @flags: see V4L2_HEVC_PPS_FLAG_{}
1332+ */
1333+struct v4l2_ctrl_hevc_pps {
1334+	__u8	pic_parameter_set_id;
1335+	__u8	num_extra_slice_header_bits;
1336+	__u8	num_ref_idx_l0_default_active_minus1;
1337+	__u8	num_ref_idx_l1_default_active_minus1;
1338+	__s8	init_qp_minus26;
1339+	__u8	diff_cu_qp_delta_depth;
1340+	__s8	pps_cb_qp_offset;
1341+	__s8	pps_cr_qp_offset;
1342+	__u8	num_tile_columns_minus1;
1343+	__u8	num_tile_rows_minus1;
1344+	__u8	column_width_minus1[20];
1345+	__u8	row_height_minus1[22];
1346+	__s8	pps_beta_offset_div2;
1347+	__s8	pps_tc_offset_div2;
1348+	__u8	log2_parallel_merge_level_minus2;
1349+	__u8	reserved;
1350+	__u64	flags;
1351+};
1352+
1353+#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE	0x01
1354+
1355+#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME				0
1356+#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_FIELD			1
1357+#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_FIELD			2
1358+#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM			3
1359+#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP			4
1360+#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM_TOP			5
1361+#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM		6
1362+#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING			7
1363+#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING			8
1364+#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_PREVIOUS_BOTTOM	9
1365+#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_PREVIOUS_TOP	10
1366+#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_NEXT_BOTTOM		11
1367+#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_NEXT_TOP		12
1368+
1369+#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX		16
1370+
1371+/**
1372+ * struct v4l2_hevc_dpb_entry - HEVC decoded picture buffer entry
1373+ *
1374+ * @timestamp: timestamp of the V4L2 capture buffer to use as reference.
1375+ * @flags: long term flag for the reference frame
1376+ * @field_pic: whether the reference is a field picture or a frame.
1377+ * @reserved: padding field. Should be zeroed by applications.
1378+ * @pic_order_cnt_val: the picture order count of the current picture.
1379+ */
1380+struct v4l2_hevc_dpb_entry {
1381+	__u64	timestamp;
1382+	__u8	flags;
1383+	__u8	field_pic;
1384+	__u16	reserved;
1385+	__s32	pic_order_cnt_val;
1386+};
1387+
1388+/**
1389+ * struct v4l2_hevc_pred_weight_table - HEVC weighted prediction parameters
1390+ *
1391+ * @delta_luma_weight_l0: the difference of the weighting factor applied
1392+ *			  to the luma prediction value for list 0
1393+ * @luma_offset_l0: the additive offset applied to the luma prediction value
1394+ *		    for list 0
1395+ * @delta_chroma_weight_l0: the difference of the weighting factor applied
1396+ *			    to the chroma prediction values for list 0
1397+ * @chroma_offset_l0: the difference of the additive offset applied to
1398+ *		      the chroma prediction values for list 0
1399+ * @delta_luma_weight_l1: the difference of the weighting factor applied
1400+ *			  to the luma prediction value for list 1
1401+ * @luma_offset_l1: the additive offset applied to the luma prediction value
1402+ *		    for list 1
1403+ * @delta_chroma_weight_l1: the difference of the weighting factor applied
1404+ *			    to the chroma prediction values for list 1
1405+ * @chroma_offset_l1: the difference of the additive offset applied to
1406+ *		      the chroma prediction values for list 1
1407+ * @luma_log2_weight_denom: the base 2 logarithm of the denominator for
1408+ *			    all luma weighting factors
1409+ * @delta_chroma_log2_weight_denom: the difference of the base 2 logarithm
1410+ *				    of the denominator for all chroma
1411+ *				    weighting factors
1412+ */
1413+struct v4l2_hevc_pred_weight_table {
1414+	__s8	delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1415+	__s8	luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1416+	__s8	delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
1417+	__s8	chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
1418+
1419+	__s8	delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1420+	__s8	luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1421+	__s8	delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
1422+	__s8	chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
1423+
1424+	__u8	luma_log2_weight_denom;
1425+	__s8	delta_chroma_log2_weight_denom;
1426+};
1427+
1428+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA		(1ULL << 0)
1429+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA		(1ULL << 1)
1430+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED	(1ULL << 2)
1431+#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO			(1ULL << 3)
1432+#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT			(1ULL << 4)
1433+#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0		(1ULL << 5)
1434+#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
1435+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
1436+#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
1437+#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT	(1ULL << 9)
1438+
1439+/**
1440+ * struct v4l2_ctrl_hevc_slice_params - HEVC slice parameters
1441+ *
1442+ * This control is a dynamically sized 1-dimensional array,
1443+ * V4L2_CTRL_FLAG_DYNAMIC_ARRAY flag must be set when using it.
1444+ *
1445+ * @bit_size: size (in bits) of the current slice data
1446+ * @data_byte_offset: offset (in bytes) to the video data in the current slice data
1447+ * @num_entry_point_offsets: specifies the number of entry point offset syntax
1448+ *			     elements in the slice header.
1449+ * @nal_unit_type: specifies the coding type of the slice (B, P or I)
1450+ * @nuh_temporal_id_plus1: minus 1 specifies a temporal identifier for the NAL unit
1451+ * @slice_type: see V4L2_HEVC_SLICE_TYPE_{}
1452+ * @colour_plane_id: specifies the colour plane associated with the current slice
1453+ * @slice_pic_order_cnt: specifies the picture order count
1454+ * @num_ref_idx_l0_active_minus1: this value plus 1 specifies the maximum
1455+ *                                reference index for reference picture list 0
1456+ *                                that may be used to decode the slice
1457+ * @num_ref_idx_l1_active_minus1: this value plus 1 specifies the maximum
1458+ *                                reference index for reference picture list 1
1459+ *                                that may be used to decode the slice
1460+ * @collocated_ref_idx: specifies the reference index of the collocated picture used
1461+ *			for temporal motion vector prediction
1462+ * @five_minus_max_num_merge_cand: specifies the maximum number of merging
1463+ *				   motion vector prediction candidates supported in
1464+ *				   the slice subtracted from 5
1465+ * @slice_qp_delta: specifies the initial value of QpY to be used for the coding
1466+ *		    blocks in the slice
1467+ * @slice_cb_qp_offset: specifies a difference to be added to the value of pps_cb_qp_offset
1468+ * @slice_cr_qp_offset: specifies a difference to be added to the value of pps_cr_qp_offset
1469+ * @slice_act_y_qp_offset: screen content extension parameters
1470+ * @slice_act_cb_qp_offset: screen content extension parameters
1471+ * @slice_act_cr_qp_offset: screen content extension parameters
1472+ * @slice_beta_offset_div2: specify the deblocking parameter offsets for beta divided by 2
1473+ * @slice_tc_offset_div2: specify the deblocking parameter offsets for tC divided by 2
1474+ * @pic_struct: indicates whether a picture should be displayed as a frame or as one or
1475+ *		more fields
1476+ * @reserved0: padding field. Should be zeroed by applications.
1477+ * @slice_segment_addr: specifies the address of the first coding tree block in
1478+ *			the slice segment
1479+ * @ref_idx_l0: the list of L0 reference elements as indices in the DPB
1480+ * @ref_idx_l1: the list of L1 reference elements as indices in the DPB
1481+ * @short_term_ref_pic_set_size: specifies the size of short-term reference
1482+ *				 pictures set included in the SPS
1483+ * @long_term_ref_pic_set_size: specifies the size of long-term reference
1484+ *				pictures set include in the SPS
1485+ * @pred_weight_table: the prediction weight coefficients for inter-picture
1486+ *		       prediction
1487+ * @reserved1: padding field. Should be zeroed by applications.
1488+ * @flags: see V4L2_HEVC_SLICE_PARAMS_FLAG_{}
1489+ */
1490+struct v4l2_ctrl_hevc_slice_params {
1491+	__u32	bit_size;
1492+	__u32	data_byte_offset;
1493+	__u32	num_entry_point_offsets;
1494+
1495+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
1496+	__u8	nal_unit_type;
1497+	__u8	nuh_temporal_id_plus1;
1498+
1499+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
1500+	__u8	slice_type;
1501+	__u8	colour_plane_id;
1502+	__s32	slice_pic_order_cnt;
1503+	__u8	num_ref_idx_l0_active_minus1;
1504+	__u8	num_ref_idx_l1_active_minus1;
1505+	__u8	collocated_ref_idx;
1506+	__u8	five_minus_max_num_merge_cand;
1507+	__s8	slice_qp_delta;
1508+	__s8	slice_cb_qp_offset;
1509+	__s8	slice_cr_qp_offset;
1510+	__s8	slice_act_y_qp_offset;
1511+	__s8	slice_act_cb_qp_offset;
1512+	__s8	slice_act_cr_qp_offset;
1513+	__s8	slice_beta_offset_div2;
1514+	__s8	slice_tc_offset_div2;
1515+
1516+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
1517+	__u8	pic_struct;
1518+
1519+	__u8	reserved0[3];
1520+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
1521+	__u32	slice_segment_addr;
1522+	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1523+	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1524+	__u16	short_term_ref_pic_set_size;
1525+	__u16	long_term_ref_pic_set_size;
1526+
1527+	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
1528+	struct v4l2_hevc_pred_weight_table pred_weight_table;
1529+
1530+	__u8	reserved1[2];
1531+	__u64	flags;
1532+};
1533+
1534+#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC		0x1
1535+#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC		0x2
1536+#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR  0x4
1537+
1538+/**
1539+ * struct v4l2_ctrl_hevc_decode_params - HEVC decode parameters
1540+ *
1541+ * @pic_order_cnt_val: picture order count
1542+ * @short_term_ref_pic_set_size: specifies the size of short-term reference
1543+ *				 pictures set included in the SPS of the first slice
1544+ * @long_term_ref_pic_set_size: specifies the size of long-term reference
1545+ *				pictures set include in the SPS of the first slice
1546+ * @num_active_dpb_entries: the number of entries in dpb
1547+ * @num_poc_st_curr_before: the number of reference pictures in the short-term
1548+ *			    set that come before the current frame
1549+ * @num_poc_st_curr_after: the number of reference pictures in the short-term
1550+ *			   set that come after the current frame
1551+ * @num_poc_lt_curr: the number of reference pictures in the long-term set
1552+ * @poc_st_curr_before: provides the index of the short term before references
1553+ *			in DPB array
1554+ * @poc_st_curr_after: provides the index of the short term after references
1555+ *		       in DPB array
1556+ * @poc_lt_curr: provides the index of the long term references in DPB array
1557+ * @reserved: padding field. Should be zeroed by applications.
1558+ * @dpb: the decoded picture buffer, for meta-data about reference frames
1559+ * @flags: see V4L2_HEVC_DECODE_PARAM_FLAG_{}
1560+ */
1561+struct v4l2_ctrl_hevc_decode_params {
1562+	__s32	pic_order_cnt_val;
1563+	__u16	short_term_ref_pic_set_size;
1564+	__u16	long_term_ref_pic_set_size;
1565+	__u8	num_active_dpb_entries;
1566+	__u8	num_poc_st_curr_before;
1567+	__u8	num_poc_st_curr_after;
1568+	__u8	num_poc_lt_curr;
1569+	__u8	poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1570+	__u8	poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1571+	__u8	poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1572+	__u8	reserved[4];
1573+	struct	v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
1574+	__u64	flags;
1575+};
1576+
1577+/**
1578+ * struct v4l2_ctrl_hevc_scaling_matrix - HEVC scaling lists parameters
1579+ *
1580+ * @scaling_list_4x4: scaling list is used for the scaling process for
1581+ *		      transform coefficients. The values on each scaling
1582+ *		      list are expected in raster scan order
1583+ * @scaling_list_8x8: scaling list is used for the scaling process for
1584+ *		      transform coefficients. The values on each scaling
1585+ *		      list are expected in raster scan order
1586+ * @scaling_list_16x16:	scaling list is used for the scaling process for
1587+ *			transform coefficients. The values on each scaling
1588+ *			list are expected in raster scan order
1589+ * @scaling_list_32x32:	scaling list is used for the scaling process for
1590+ *			transform coefficients. The values on each scaling
1591+ *			list are expected in raster scan order
1592+ * @scaling_list_dc_coef_16x16:	scaling list is used for the scaling process
1593+ *				for transform coefficients. The values on each
1594+ *				scaling list are expected in raster scan order.
1595+ * @scaling_list_dc_coef_32x32:	scaling list is used for the scaling process
1596+ *				for transform coefficients. The values on each
1597+ *				scaling list are expected in raster scan order.
1598+ */
1599+struct v4l2_ctrl_hevc_scaling_matrix {
1600+	__u8	scaling_list_4x4[6][16];
1601+	__u8	scaling_list_8x8[6][64];
1602+	__u8	scaling_list_16x16[6][64];
1603+	__u8	scaling_list_32x32[2][64];
1604+	__u8	scaling_list_dc_coef_16x16[6];
1605+	__u8	scaling_list_dc_coef_32x32[2];
1606+};
1607+
1608+#endif
1609--- a/libavcodec/hevc_parser.c
1610+++ b/libavcodec/hevc_parser.c
1611@@ -97,6 +97,19 @@ static int hevc_parse_slice_header(AVCod
1612     avctx->profile  = ps->sps->ptl.general_ptl.profile_idc;
1613     avctx->level    = ps->sps->ptl.general_ptl.level_idc;
1614
1615+    if (ps->sps->chroma_format_idc == 1) {
1616+        avctx->chroma_sample_location = ps->sps->vui.chroma_loc_info_present_flag ?
1617+            ps->sps->vui.chroma_sample_loc_type_top_field + 1 :
1618+            AVCHROMA_LOC_LEFT;
1619+    }
1620+    else if (ps->sps->chroma_format_idc == 2 ||
1621+             ps->sps->chroma_format_idc == 3) {
1622+        avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
1623+    }
1624+    else {
1625+        avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
1626+    }
1627+
1628     if (ps->vps->vps_timing_info_present_flag) {
1629         num = ps->vps->vps_num_units_in_tick;
1630         den = ps->vps->vps_time_scale;
1631--- a/libavcodec/hevc_refs.c
1632+++ b/libavcodec/hevc_refs.c
1633@@ -98,18 +98,22 @@ static HEVCFrame *alloc_frame(HEVCContex
1634         if (!frame->rpl_buf)
1635             goto fail;
1636
1637-        frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
1638-        if (!frame->tab_mvf_buf)
1639-            goto fail;
1640-        frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
1641+        if (s->tab_mvf_pool) {
1642+            frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
1643+            if (!frame->tab_mvf_buf)
1644+                goto fail;
1645+            frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
1646+        }
1647
1648-        frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
1649-        if (!frame->rpl_tab_buf)
1650-            goto fail;
1651-        frame->rpl_tab   = (RefPicListTab **)frame->rpl_tab_buf->data;
1652-        frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
1653-        for (j = 0; j < frame->ctb_count; j++)
1654-            frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
1655+        if (s->rpl_tab_pool) {
1656+            frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
1657+            if (!frame->rpl_tab_buf)
1658+                goto fail;
1659+            frame->rpl_tab   = (RefPicListTab **)frame->rpl_tab_buf->data;
1660+            frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
1661+            for (j = 0; j < frame->ctb_count; j++)
1662+                frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
1663+        }
1664
1665         frame->frame->top_field_first  = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD;
1666         frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD);
1667@@ -284,14 +288,17 @@ static int init_slice_rpl(HEVCContext *s
1668     int ctb_count    = frame->ctb_count;
1669     int ctb_addr_ts  = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
1670     int i;
1671+    RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
1672
1673     if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab))
1674         return AVERROR_INVALIDDATA;
1675
1676-    for (i = ctb_addr_ts; i < ctb_count; i++)
1677-        frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
1678+    if (frame->rpl_tab) {
1679+        for (i = ctb_addr_ts; i < ctb_count; i++)
1680+            frame->rpl_tab[i] = tab;
1681+    }
1682
1683-    frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts];
1684+    frame->refPicList = tab->refPicList;
1685
1686     return 0;
1687 }
1688--- a/libavcodec/hevcdec.c
1689+++ b/libavcodec/hevcdec.c
1690@@ -340,6 +340,19 @@ static void export_stream_params(HEVCCon
1691
1692     ff_set_sar(avctx, sps->vui.sar);
1693
1694+    if (sps->chroma_format_idc == 1) {
1695+        avctx->chroma_sample_location = sps->vui.chroma_loc_info_present_flag ?
1696+            sps->vui.chroma_sample_loc_type_top_field + 1 :
1697+            AVCHROMA_LOC_LEFT;
1698+    }
1699+    else if (sps->chroma_format_idc == 2 ||
1700+             sps->chroma_format_idc == 3) {
1701+        avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
1702+    }
1703+    else {
1704+        avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
1705+    }
1706+
1707     if (sps->vui.video_signal_type_present_flag)
1708         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
1709                                                             : AVCOL_RANGE_MPEG;
1710@@ -402,6 +415,7 @@ static enum AVPixelFormat get_format(HEV
1711 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
1712                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
1713                      CONFIG_HEVC_NVDEC_HWACCEL + \
1714+                     CONFIG_HEVC_V4L2REQUEST_HWACCEL + \
1715                      CONFIG_HEVC_VAAPI_HWACCEL + \
1716                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
1717                      CONFIG_HEVC_VDPAU_HWACCEL)
1718@@ -429,6 +443,9 @@ static enum AVPixelFormat get_format(HEV
1719 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
1720         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
1721 #endif
1722+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
1723+        *fmt++ = AV_PIX_FMT_DRM_PRIME;
1724+#endif
1725         break;
1726     case AV_PIX_FMT_YUV420P10:
1727 #if CONFIG_HEVC_DXVA2_HWACCEL
1728@@ -450,6 +467,9 @@ static enum AVPixelFormat get_format(HEV
1729 #if CONFIG_HEVC_NVDEC_HWACCEL
1730         *fmt++ = AV_PIX_FMT_CUDA;
1731 #endif
1732+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
1733+        *fmt++ = AV_PIX_FMT_DRM_PRIME;
1734+#endif
1735         break;
1736     case AV_PIX_FMT_YUV444P:
1737 #if CONFIG_HEVC_VDPAU_HWACCEL
1738@@ -504,6 +524,16 @@ static int set_sps(HEVCContext *s, const
1739     if (!sps)
1740         return 0;
1741
1742+    // If hwaccel then we don't need all the s/w decode helper arrays
1743+    if (s->avctx->hwaccel) {
1744+        export_stream_params(s, sps);
1745+
1746+        s->avctx->pix_fmt = pix_fmt;
1747+        s->ps.sps = sps;
1748+        s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
1749+        return 0;
1750+    }
1751+
1752     ret = pic_arrays_init(s, sps);
1753     if (ret < 0)
1754         goto fail;
1755@@ -3011,11 +3041,13 @@ static int hevc_frame_start(HEVCContext
1756                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
1757     int ret;
1758
1759-    memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
1760-    memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
1761-    memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
1762-    memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
1763-    memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
1764+    if (s->horizontal_bs) {
1765+        memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
1766+        memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
1767+        memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
1768+        memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
1769+        memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
1770+    }
1771
1772     s->is_decoded        = 0;
1773     s->first_nal_type    = s->nal_unit_type;
1774@@ -3507,8 +3539,13 @@ static int hevc_decode_frame(AVCodecCont
1775
1776     s->ref = NULL;
1777     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
1778-    if (ret < 0)
1779+    if (ret < 0) {
1780+        // Ensure that hwaccel knows this frame is over
1781+        if (s->avctx->hwaccel && s->avctx->hwaccel->abort_frame)
1782+            s->avctx->hwaccel->abort_frame(s->avctx);
1783+
1784         return ret;
1785+    }
1786
1787     if (avctx->hwaccel) {
1788         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
1789@@ -3558,15 +3595,19 @@ static int hevc_ref_frame(HEVCContext *s
1790         dst->needs_fg = 1;
1791     }
1792
1793-    dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
1794-    if (!dst->tab_mvf_buf)
1795-        goto fail;
1796-    dst->tab_mvf = src->tab_mvf;
1797+    if (src->tab_mvf_buf) {
1798+        dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
1799+        if (!dst->tab_mvf_buf)
1800+            goto fail;
1801+        dst->tab_mvf = src->tab_mvf;
1802+    }
1803
1804-    dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
1805-    if (!dst->rpl_tab_buf)
1806-        goto fail;
1807-    dst->rpl_tab = src->rpl_tab;
1808+    if (src->rpl_tab_buf) {
1809+        dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
1810+        if (!dst->rpl_tab_buf)
1811+            goto fail;
1812+        dst->rpl_tab = src->rpl_tab;
1813+    }
1814
1815     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
1816     if (!dst->rpl_buf)
1817@@ -3900,6 +3941,9 @@ const FFCodec ff_hevc_decoder = {
1818 #if CONFIG_HEVC_NVDEC_HWACCEL
1819                                HWACCEL_NVDEC(hevc),
1820 #endif
1821+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
1822+                               HWACCEL_V4L2REQUEST(hevc),
1823+#endif
1824 #if CONFIG_HEVC_VAAPI_HWACCEL
1825                                HWACCEL_VAAPI(hevc),
1826 #endif
1827--- a/libavcodec/hwaccels.h
1828+++ b/libavcodec/hwaccels.h
1829@@ -40,6 +40,7 @@ extern const AVHWAccel ff_hevc_d3d11va_h
1830 extern const AVHWAccel ff_hevc_d3d11va2_hwaccel;
1831 extern const AVHWAccel ff_hevc_dxva2_hwaccel;
1832 extern const AVHWAccel ff_hevc_nvdec_hwaccel;
1833+extern const AVHWAccel ff_hevc_v4l2request_hwaccel;
1834 extern const AVHWAccel ff_hevc_vaapi_hwaccel;
1835 extern const AVHWAccel ff_hevc_vdpau_hwaccel;
1836 extern const AVHWAccel ff_hevc_videotoolbox_hwaccel;
1837--- a/libavcodec/hwconfig.h
1838+++ b/libavcodec/hwconfig.h
1839@@ -24,6 +24,7 @@
1840
1841
1842 #define HWACCEL_CAP_ASYNC_SAFE      (1 << 0)
1843+#define HWACCEL_CAP_MT_SAFE         (1 << 1)
1844
1845
1846 typedef struct AVCodecHWConfigInternal {
1847@@ -70,6 +71,8 @@ typedef struct AVCodecHWConfigInternal {
1848     HW_CONFIG_HWACCEL(1, 1, 0, D3D11,        D3D11VA,      ff_ ## codec ## _d3d11va2_hwaccel)
1849 #define HWACCEL_NVDEC(codec) \
1850     HW_CONFIG_HWACCEL(1, 1, 0, CUDA,         CUDA,         ff_ ## codec ## _nvdec_hwaccel)
1851+#define HWACCEL_V4L2REQUEST(codec) \
1852+    HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME,    DRM,          ff_ ## codec ## _v4l2request_hwaccel)
1853 #define HWACCEL_VAAPI(codec) \
1854     HW_CONFIG_HWACCEL(1, 1, 1, VAAPI,        VAAPI,        ff_ ## codec ## _vaapi_hwaccel)
1855 #define HWACCEL_VDPAU(codec) \
1856--- a/libavcodec/mmaldec.c
1857+++ b/libavcodec/mmaldec.c
1858@@ -24,6 +24,9 @@
1859  * MMAL Video Decoder
1860  */
1861
1862+#pragma GCC diagnostic push
1863+// Many many redundant decls in the header files
1864+#pragma GCC diagnostic ignored "-Wredundant-decls"
1865 #include <bcm_host.h>
1866 #include <interface/mmal/mmal.h>
1867 #include <interface/mmal/mmal_parameters_video.h>
1868@@ -31,6 +34,7 @@
1869 #include <interface/mmal/util/mmal_util_params.h>
1870 #include <interface/mmal/util/mmal_default_components.h>
1871 #include <interface/mmal/vc/mmal_vc_api.h>
1872+#pragma GCC diagnostic pop
1873 #include <stdatomic.h>
1874
1875 #include "avcodec.h"
1876--- a/libavcodec/pthread_frame.c
1877+++ b/libavcodec/pthread_frame.c
1878@@ -217,7 +217,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
1879
1880         /* if the previous thread uses hwaccel then we take the lock to ensure
1881          * the threads don't run concurrently */
1882-        if (avctx->hwaccel) {
1883+        if (avctx->hwaccel &&
1884+            !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
1885             pthread_mutex_lock(&p->parent->hwaccel_mutex);
1886             p->hwaccel_serializing = 1;
1887         }
1888@@ -243,7 +244,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
1889             p->hwaccel_serializing = 0;
1890             pthread_mutex_unlock(&p->parent->hwaccel_mutex);
1891         }
1892-        av_assert0(!avctx->hwaccel);
1893+        av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
1894
1895         if (p->async_serializing) {
1896             p->async_serializing = 0;
1897@@ -331,6 +332,12 @@ FF_ENABLE_DEPRECATION_WARNINGS
1898         }
1899
1900         dst->hwaccel_flags = src->hwaccel_flags;
1901+        if (src->hwaccel &&
1902+            (src->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
1903+            dst->hwaccel = src->hwaccel;
1904+            dst->hwaccel_context = src->hwaccel_context;
1905+            dst->internal->hwaccel_priv_data = src->internal->hwaccel_priv_data;
1906+        }
1907
1908         err = av_buffer_replace(&dst->internal->pool, src->internal->pool);
1909         if (err < 0)
1910@@ -461,10 +468,13 @@ static int submit_packet(PerThreadContex
1911     }
1912
1913     /* transfer the stashed hwaccel state, if any */
1914-    av_assert0(!p->avctx->hwaccel);
1915-    FFSWAP(const AVHWAccel*, p->avctx->hwaccel,                     fctx->stash_hwaccel);
1916-    FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
1917-    FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
1918+    av_assert0(!p->avctx->hwaccel || (p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
1919+    if (p->avctx->hwaccel &&
1920+        !(p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
1921+        FFSWAP(const AVHWAccel*, p->avctx->hwaccel,                     fctx->stash_hwaccel);
1922+        FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
1923+        FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
1924+    }
1925
1926     av_packet_unref(p->avpkt);
1927     ret = av_packet_ref(p->avpkt, avpkt);
1928@@ -656,7 +666,9 @@ void ff_thread_finish_setup(AVCodecConte
1929
1930     if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return;
1931
1932-    if (avctx->hwaccel && !p->hwaccel_serializing) {
1933+    if (avctx->hwaccel &&
1934+        !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) &&
1935+        !p->hwaccel_serializing) {
1936         pthread_mutex_lock(&p->parent->hwaccel_mutex);
1937         p->hwaccel_serializing = 1;
1938     }
1939@@ -673,9 +685,12 @@ void ff_thread_finish_setup(AVCodecConte
1940      * this is done here so that this worker thread can wipe its own hwaccel
1941      * state after decoding, without requiring synchronization */
1942     av_assert0(!p->parent->stash_hwaccel);
1943-    p->parent->stash_hwaccel         = avctx->hwaccel;
1944-    p->parent->stash_hwaccel_context = avctx->hwaccel_context;
1945-    p->parent->stash_hwaccel_priv    = avctx->internal->hwaccel_priv_data;
1946+    if (avctx->hwaccel &&
1947+        !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
1948+        p->parent->stash_hwaccel         = avctx->hwaccel;
1949+        p->parent->stash_hwaccel_context = avctx->hwaccel_context;
1950+        p->parent->stash_hwaccel_priv    = avctx->internal->hwaccel_priv_data;
1951+    }
1952
1953     pthread_mutex_lock(&p->progress_mutex);
1954     if(atomic_load(&p->state) == STATE_SETUP_FINISHED){
1955@@ -730,6 +745,15 @@ void ff_frame_thread_free(AVCodecContext
1956
1957     park_frame_worker_threads(fctx, thread_count);
1958
1959+     if (fctx->prev_thread &&
1960+         avctx->hwaccel && (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) &&
1961+         avctx->internal->hwaccel_priv_data !=
1962+                             fctx->prev_thread->avctx->internal->hwaccel_priv_data) {
1963+        if (update_context_from_thread(avctx, fctx->prev_thread->avctx, 1) < 0) {
1964+            av_log(avctx, AV_LOG_ERROR, "Failed to update user thread.\n");
1965+        }
1966+    }
1967+
1968     for (i = 0; i < thread_count; i++) {
1969         PerThreadContext *p = &fctx->threads[i];
1970         AVCodecContext *ctx = p->avctx;
1971@@ -778,10 +802,13 @@ void ff_frame_thread_free(AVCodecContext
1972
1973     /* if we have stashed hwaccel state, move it to the user-facing context,
1974      * so it will be freed in avcodec_close() */
1975-    av_assert0(!avctx->hwaccel);
1976-    FFSWAP(const AVHWAccel*, avctx->hwaccel,                     fctx->stash_hwaccel);
1977-    FFSWAP(void*,            avctx->hwaccel_context,             fctx->stash_hwaccel_context);
1978-    FFSWAP(void*,            avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
1979+    av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
1980+    if (avctx->hwaccel &&
1981+        !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
1982+        FFSWAP(const AVHWAccel*, avctx->hwaccel,                     fctx->stash_hwaccel);
1983+        FFSWAP(void*,            avctx->hwaccel_context,             fctx->stash_hwaccel_context);
1984+        FFSWAP(void*,            avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
1985+    }
1986
1987     av_freep(&avctx->internal->thread_ctx);
1988 }
1989--- a/libavcodec/raw.c
1990+++ b/libavcodec/raw.c
1991@@ -294,6 +294,12 @@ static const PixelFormatTag raw_pix_fmt_
1992     { AV_PIX_FMT_RGB565LE,MKTAG( 3 ,  0 ,  0 ,  0 ) }, /* flipped RGB565LE */
1993     { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */
1994
1995+    /* RPI (Might as well define for everything) */
1996+    { AV_PIX_FMT_SAND128,     MKTAG('S', 'A', 'N', 'D') },
1997+    { AV_PIX_FMT_RPI4_8,      MKTAG('S', 'A', 'N', 'D') },
1998+    { AV_PIX_FMT_SAND64_10,   MKTAG('S', 'N', 'D', 'A') },
1999+    { AV_PIX_FMT_RPI4_10,     MKTAG('S', 'N', 'D', 'B') },
2000+
2001     { AV_PIX_FMT_NONE, 0 },
2002 };
2003
2004--- a/libavcodec/rawenc.c
2005+++ b/libavcodec/rawenc.c
2006@@ -24,6 +24,7 @@
2007  * Raw Video Encoder
2008  */
2009
2010+#include "config.h"
2011 #include "avcodec.h"
2012 #include "codec_internal.h"
2013 #include "encode.h"
2014@@ -33,6 +34,10 @@
2015 #include "libavutil/intreadwrite.h"
2016 #include "libavutil/imgutils.h"
2017 #include "libavutil/internal.h"
2018+#include "libavutil/avassert.h"
2019+#if CONFIG_SAND
2020+#include "libavutil/rpi_sand_fns.h"
2021+#endif
2022
2023 static av_cold int raw_encode_init(AVCodecContext *avctx)
2024 {
2025@@ -46,22 +51,114 @@ static av_cold int raw_encode_init(AVCod
2026     return 0;
2027 }
2028
2029+#if CONFIG_SAND
2030+static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
2031+                      const AVFrame *frame)
2032+{
2033+    const int width = av_frame_cropped_width(frame);
2034+    const int height = av_frame_cropped_height(frame);
2035+    const int x0 = frame->crop_left;
2036+    const int y0 = frame->crop_top;
2037+    const int size = width * height * 3 / 2;
2038+    uint8_t * dst;
2039+    int ret;
2040+
2041+    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
2042+        return ret;
2043+
2044+    dst = pkt->data;
2045+
2046+    av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
2047+    dst += width * height;
2048+    av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2,
2049+                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0 / 2, y0 / 2, width / 2, height / 2);
2050+    return 0;
2051+}
2052+
2053+static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
2054+                      const AVFrame *frame)
2055+{
2056+    const int width = av_frame_cropped_width(frame);
2057+    const int height = av_frame_cropped_height(frame);
2058+    const int x0 = frame->crop_left;
2059+    const int y0 = frame->crop_top;
2060+    const int size = width * height * 3;
2061+    uint8_t * dst;
2062+    int ret;
2063+
2064+    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
2065+        return ret;
2066+
2067+    dst = pkt->data;
2068+
2069+    av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0 * 2, y0, width * 2, height);
2070+    dst += width * height * 2;
2071+    av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width,
2072+                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0, y0 / 2, width, height / 2);
2073+    return 0;
2074+}
2075+
2076+static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
2077+                      const AVFrame *frame)
2078+{
2079+    const int width = av_frame_cropped_width(frame);
2080+    const int height = av_frame_cropped_height(frame);
2081+    const int x0 = frame->crop_left;
2082+    const int y0 = frame->crop_top;
2083+    const int size = width * height * 3;
2084+    uint8_t * dst;
2085+    int ret;
2086+
2087+    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
2088+        return ret;
2089+
2090+    dst = pkt->data;
2091+
2092+    av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
2093+    dst += width * height * 2;
2094+    av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width,
2095+                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0/2, y0 / 2, width/2, height / 2);
2096+    return 0;
2097+}
2098+#endif
2099+
2100+
2101 static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
2102-                      const AVFrame *frame, int *got_packet)
2103+                      const AVFrame *src_frame, int *got_packet)
2104 {
2105-    int ret = av_image_get_buffer_size(frame->format,
2106-                                       frame->width, frame->height, 1);
2107+    int ret;
2108+    AVFrame * frame = NULL;
2109
2110-    if (ret < 0)
2111+#if CONFIG_SAND
2112+    if (av_rpi_is_sand_frame(src_frame)) {
2113+        ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) :
2114+            av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) :
2115+            av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1;
2116+        *got_packet = (ret == 0);
2117         return ret;
2118+    }
2119+#endif
2120+
2121+    if ((frame = av_frame_clone(src_frame)) == NULL) {
2122+        ret = AVERROR(ENOMEM);
2123+        goto fail;
2124+    }
2125+
2126+    if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0)
2127+        goto fail;
2128+
2129+    ret = av_image_get_buffer_size(frame->format,
2130+                                       frame->width, frame->height, 1);
2131+    if (ret < 0)
2132+        goto fail;
2133
2134     if ((ret = ff_get_encode_buffer(avctx, pkt, ret, 0)) < 0)
2135-        return ret;
2136+        goto fail;
2137     if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size,
2138                                        (const uint8_t **)frame->data, frame->linesize,
2139                                        frame->format,
2140                                        frame->width, frame->height, 1)) < 0)
2141-        return ret;
2142+        goto fail;
2143
2144     if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 &&
2145        frame->format   == AV_PIX_FMT_YUYV422) {
2146@@ -77,8 +174,15 @@ static int raw_encode(AVCodecContext *av
2147             AV_WB64(&pkt->data[8 * x], v << 48 | v >> 16);
2148         }
2149     }
2150+    pkt->flags |= AV_PKT_FLAG_KEY;
2151+    av_frame_free(&frame);
2152     *got_packet = 1;
2153     return 0;
2154+
2155+fail:
2156+    av_frame_free(&frame);
2157+    *got_packet = 0;
2158+    return ret;
2159 }
2160
2161 const FFCodec ff_rawvideo_encoder = {
2162--- a/libavcodec/v4l2_buffers.c
2163+++ b/libavcodec/v4l2_buffers.c
2164@@ -21,6 +21,7 @@
2165  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2166  */
2167
2168+#include <drm_fourcc.h>
2169 #include <linux/videodev2.h>
2170 #include <sys/ioctl.h>
2171 #include <sys/mman.h>
2172@@ -28,57 +29,89 @@
2173 #include <fcntl.h>
2174 #include <poll.h>
2175 #include "libavcodec/avcodec.h"
2176+#include "libavcodec/internal.h"
2177+#include "libavutil/avassert.h"
2178 #include "libavutil/pixdesc.h"
2179+#include "libavutil/hwcontext.h"
2180 #include "v4l2_context.h"
2181 #include "v4l2_buffers.h"
2182 #include "v4l2_m2m.h"
2183+#include "v4l2_req_dmabufs.h"
2184+#include "weak_link.h"
2185
2186 #define USEC_PER_SEC 1000000
2187-static AVRational v4l2_timebase = { 1, USEC_PER_SEC };
2188+static const AVRational v4l2_timebase = { 1, USEC_PER_SEC };
2189
2190-static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf)
2191+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
2192 {
2193-    return V4L2_TYPE_IS_OUTPUT(buf->context->type) ?
2194-        container_of(buf->context, V4L2m2mContext, output) :
2195-        container_of(buf->context, V4L2m2mContext, capture);
2196+    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
2197+        container_of(ctx, V4L2m2mContext, output) :
2198+        container_of(ctx, V4L2m2mContext, capture);
2199 }
2200
2201-static inline AVCodecContext *logger(V4L2Buffer *buf)
2202+static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf)
2203 {
2204-    return buf_to_m2mctx(buf)->avctx;
2205+    return ctx_to_m2mctx(buf->context);
2206 }
2207
2208-static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf)
2209+static inline AVCodecContext *logger(const V4L2Buffer * const buf)
2210 {
2211-    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
2212+    return buf_to_m2mctx(buf)->avctx;
2213+}
2214
2215-    if (s->avctx->pkt_timebase.num)
2216-        return s->avctx->pkt_timebase;
2217-    return s->avctx->time_base;
2218+static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf)
2219+{
2220+    const V4L2m2mContext *s = buf_to_m2mctx(avbuf);
2221+    const AVRational tb = s->avctx->pkt_timebase.num ?
2222+        s->avctx->pkt_timebase :
2223+        s->avctx->time_base;
2224+    return tb.num && tb.den ? tb : v4l2_timebase;
2225 }
2226
2227-static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts)
2228+static inline struct timeval tv_from_int(const int64_t t)
2229 {
2230-    int64_t v4l2_pts;
2231+    return (struct timeval){
2232+        .tv_usec = t % USEC_PER_SEC,
2233+        .tv_sec  = t / USEC_PER_SEC
2234+    };
2235+}
2236
2237-    if (pts == AV_NOPTS_VALUE)
2238-        pts = 0;
2239+static inline int64_t int_from_tv(const struct timeval t)
2240+{
2241+    return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec;
2242+}
2243
2244+static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts)
2245+{
2246     /* convert pts to v4l2 timebase */
2247-    v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
2248-    out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC;
2249-    out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC;
2250+    const int64_t v4l2_pts =
2251+        pts == AV_NOPTS_VALUE ? 0 :
2252+            av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
2253+    out->buf.timestamp = tv_from_int(v4l2_pts);
2254 }
2255
2256-static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf)
2257+static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf)
2258 {
2259-    int64_t v4l2_pts;
2260-
2261+    const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp);
2262+    return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE;
2263+#if 0
2264     /* convert pts back to encoder timebase */
2265-    v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
2266-                        avbuf->buf.timestamp.tv_usec;
2267+    return
2268+        avbuf->context->no_pts_rescale ? v4l2_pts :
2269+        v4l2_pts == 0 ? AV_NOPTS_VALUE :
2270+            av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
2271+#endif
2272+}
2273
2274-    return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
2275+static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length)
2276+{
2277+    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
2278+        out->planes[plane].bytesused = bytesused;
2279+        out->planes[plane].length = length;
2280+    } else {
2281+        out->buf.bytesused = bytesused;
2282+        out->buf.length = length;
2283+    }
2284 }
2285
2286 static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
2287@@ -115,6 +148,105 @@ static enum AVColorPrimaries v4l2_get_co
2288     return AVCOL_PRI_UNSPECIFIED;
2289 }
2290
2291+static void v4l2_set_color(V4L2Buffer *buf,
2292+                           const enum AVColorPrimaries avcp,
2293+                           const enum AVColorSpace avcs,
2294+                           const enum AVColorTransferCharacteristic avxc)
2295+{
2296+    enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
2297+    enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
2298+    enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
2299+
2300+    switch (avcp) {
2301+    case AVCOL_PRI_BT709:
2302+        cs = V4L2_COLORSPACE_REC709;
2303+        ycbcr = V4L2_YCBCR_ENC_709;
2304+        break;
2305+    case AVCOL_PRI_BT470M:
2306+        cs = V4L2_COLORSPACE_470_SYSTEM_M;
2307+        ycbcr = V4L2_YCBCR_ENC_601;
2308+        break;
2309+    case AVCOL_PRI_BT470BG:
2310+        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
2311+        break;
2312+    case AVCOL_PRI_SMPTE170M:
2313+        cs = V4L2_COLORSPACE_SMPTE170M;
2314+        break;
2315+    case AVCOL_PRI_SMPTE240M:
2316+        cs = V4L2_COLORSPACE_SMPTE240M;
2317+        break;
2318+    case AVCOL_PRI_BT2020:
2319+        cs = V4L2_COLORSPACE_BT2020;
2320+        break;
2321+    case AVCOL_PRI_SMPTE428:
2322+    case AVCOL_PRI_SMPTE431:
2323+    case AVCOL_PRI_SMPTE432:
2324+    case AVCOL_PRI_EBU3213:
2325+    case AVCOL_PRI_RESERVED:
2326+    case AVCOL_PRI_FILM:
2327+    case AVCOL_PRI_UNSPECIFIED:
2328+    default:
2329+        break;
2330+    }
2331+
2332+    switch (avcs) {
2333+    case AVCOL_SPC_RGB:
2334+        cs = V4L2_COLORSPACE_SRGB;
2335+        break;
2336+    case AVCOL_SPC_BT709:
2337+        cs = V4L2_COLORSPACE_REC709;
2338+        break;
2339+    case AVCOL_SPC_FCC:
2340+        cs = V4L2_COLORSPACE_470_SYSTEM_M;
2341+        break;
2342+    case AVCOL_SPC_BT470BG:
2343+        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
2344+        break;
2345+    case AVCOL_SPC_SMPTE170M:
2346+        cs = V4L2_COLORSPACE_SMPTE170M;
2347+        break;
2348+    case AVCOL_SPC_SMPTE240M:
2349+        cs = V4L2_COLORSPACE_SMPTE240M;
2350+        break;
2351+    case AVCOL_SPC_BT2020_CL:
2352+        cs = V4L2_COLORSPACE_BT2020;
2353+        ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
2354+        break;
2355+    case AVCOL_SPC_BT2020_NCL:
2356+        cs = V4L2_COLORSPACE_BT2020;
2357+        break;
2358+    default:
2359+        break;
2360+    }
2361+
2362+    switch (xfer) {
2363+    case AVCOL_TRC_BT709:
2364+        xfer = V4L2_XFER_FUNC_709;
2365+        break;
2366+    case AVCOL_TRC_IEC61966_2_1:
2367+        xfer = V4L2_XFER_FUNC_SRGB;
2368+        break;
2369+    case AVCOL_TRC_SMPTE240M:
2370+        xfer = V4L2_XFER_FUNC_SMPTE240M;
2371+        break;
2372+    case AVCOL_TRC_SMPTE2084:
2373+        xfer = V4L2_XFER_FUNC_SMPTE2084;
2374+        break;
2375+    default:
2376+        break;
2377+    }
2378+
2379+    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
2380+        buf->context->format.fmt.pix_mp.colorspace = cs;
2381+        buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr;
2382+        buf->context->format.fmt.pix_mp.xfer_func = xfer;
2383+    } else {
2384+        buf->context->format.fmt.pix.colorspace = cs;
2385+        buf->context->format.fmt.pix.ycbcr_enc = ycbcr;
2386+        buf->context->format.fmt.pix.xfer_func = xfer;
2387+    }
2388+}
2389+
2390 static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
2391 {
2392     enum v4l2_quantization qt;
2393@@ -133,6 +265,20 @@ static enum AVColorRange v4l2_get_color_
2394      return AVCOL_RANGE_UNSPECIFIED;
2395 }
2396
2397+static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr)
2398+{
2399+    const enum v4l2_quantization q =
2400+        avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
2401+        avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
2402+            V4L2_QUANTIZATION_DEFAULT;
2403+
2404+    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
2405+        buf->context->format.fmt.pix_mp.quantization = q;
2406+    } else {
2407+        buf->context->format.fmt.pix.quantization = q;
2408+    }
2409+}
2410+
2411 static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf)
2412 {
2413     enum v4l2_ycbcr_encoding ycbcr;
2414@@ -209,73 +355,218 @@ static enum AVColorTransferCharacteristi
2415     return AVCOL_TRC_UNSPECIFIED;
2416 }
2417
2418-static void v4l2_free_buffer(void *opaque, uint8_t *unused)
2419+static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf)
2420 {
2421-    V4L2Buffer* avbuf = opaque;
2422-    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
2423+    return V4L2_FIELD_IS_INTERLACED(buf->buf.field);
2424+}
2425
2426-    if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) {
2427-        atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
2428+static int v4l2_buf_is_top_first(const V4L2Buffer * const buf)
2429+{
2430+    return buf->buf.field == V4L2_FIELD_INTERLACED_TB;
2431+}
2432
2433-        if (s->reinit) {
2434-            if (!atomic_load(&s->refcount))
2435-                sem_post(&s->refsync);
2436-        } else {
2437-            if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) {
2438-                /* no need to queue more buffers to the driver */
2439-                avbuf->status = V4L2BUF_AVAILABLE;
2440-            }
2441-            else if (avbuf->context->streamon)
2442-                ff_v4l2_buffer_enqueue(avbuf);
2443-        }
2444+static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff)
2445+{
2446+    buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE :
2447+        is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT;
2448+}
2449
2450-        av_buffer_unref(&avbuf->context_ref);
2451+static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
2452+{
2453+    AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
2454+    AVDRMLayerDescriptor *layer;
2455+
2456+    /* fill the DRM frame descriptor */
2457+    drm_desc->nb_objects = avbuf->num_planes;
2458+    drm_desc->nb_layers = 1;
2459+
2460+    layer = &drm_desc->layers[0];
2461+    layer->nb_planes = avbuf->num_planes;
2462+
2463+    for (int i = 0; i < avbuf->num_planes; i++) {
2464+        layer->planes[i].object_index = i;
2465+        layer->planes[i].offset = avbuf->plane_info[i].offset;
2466+        layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
2467+    }
2468+
2469+    switch (avbuf->context->av_pix_fmt) {
2470+    case AV_PIX_FMT_0BGR:
2471+        layer->format = DRM_FORMAT_RGBX8888;
2472+        break;
2473+    case AV_PIX_FMT_RGB0:
2474+        layer->format = DRM_FORMAT_XBGR8888;
2475+        break;
2476+    case AV_PIX_FMT_0RGB:
2477+        layer->format = DRM_FORMAT_BGRX8888;
2478+        break;
2479+    case AV_PIX_FMT_BGR0:
2480+        layer->format = DRM_FORMAT_XRGB8888;
2481+        break;
2482+
2483+    case AV_PIX_FMT_ABGR:
2484+        layer->format = DRM_FORMAT_RGBA8888;
2485+        break;
2486+    case AV_PIX_FMT_RGBA:
2487+        layer->format = DRM_FORMAT_ABGR8888;
2488+        break;
2489+    case AV_PIX_FMT_ARGB:
2490+        layer->format = DRM_FORMAT_BGRA8888;
2491+        break;
2492+    case AV_PIX_FMT_BGRA:
2493+        layer->format = DRM_FORMAT_ARGB8888;
2494+        break;
2495+
2496+    case AV_PIX_FMT_BGR24:
2497+        layer->format = DRM_FORMAT_BGR888;
2498+        break;
2499+    case AV_PIX_FMT_RGB24:
2500+        layer->format = DRM_FORMAT_RGB888;
2501+        break;
2502+
2503+    case AV_PIX_FMT_YUYV422:
2504+
2505+        layer->format = DRM_FORMAT_YUYV;
2506+        layer->nb_planes = 1;
2507+
2508+        break;
2509+
2510+    case AV_PIX_FMT_NV12:
2511+    case AV_PIX_FMT_NV21:
2512+
2513+        layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ?
2514+            DRM_FORMAT_NV12 : DRM_FORMAT_NV21;
2515+
2516+        if (avbuf->num_planes > 1)
2517+            break;
2518+
2519+        layer->nb_planes = 2;
2520+
2521+        layer->planes[1].object_index = 0;
2522+        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
2523+            avbuf->context->format.fmt.pix.height;
2524+        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;
2525+        break;
2526+
2527+    case AV_PIX_FMT_YUV420P:
2528+
2529+        layer->format = DRM_FORMAT_YUV420;
2530+
2531+        if (avbuf->num_planes > 1)
2532+            break;
2533+
2534+        layer->nb_planes = 3;
2535+
2536+        layer->planes[1].object_index = 0;
2537+        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
2538+            avbuf->context->format.fmt.pix.height;
2539+        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1;
2540+
2541+        layer->planes[2].object_index = 0;
2542+        layer->planes[2].offset = layer->planes[1].offset +
2543+            ((avbuf->plane_info[0].bytesperline *
2544+              avbuf->context->format.fmt.pix.height) >> 2);
2545+        layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1;
2546+        break;
2547+
2548+    default:
2549+        drm_desc->nb_layers = 0;
2550+        break;
2551     }
2552+
2553+    return (uint8_t *) drm_desc;
2554 }
2555
2556-static int v4l2_buf_increase_ref(V4L2Buffer *in)
2557+static void v4l2_free_bufref(void *opaque, uint8_t *data)
2558 {
2559-    V4L2m2mContext *s = buf_to_m2mctx(in);
2560+    AVBufferRef * bufref = (AVBufferRef *)data;
2561+    V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data;
2562+    struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl);
2563
2564-    if (in->context_ref)
2565-        atomic_fetch_add(&in->context_refcount, 1);
2566-    else {
2567-        in->context_ref = av_buffer_ref(s->self_ref);
2568-        if (!in->context_ref)
2569-            return AVERROR(ENOMEM);
2570+    if (ctx != NULL) {
2571+        // Buffer still attached to context
2572+        V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
2573+
2574+        if (!s->output_drm && avbuf->dmabuf[0] != NULL) {
2575+            for (unsigned int i = 0; i != avbuf->num_planes; ++i)
2576+                dmabuf_read_end(avbuf->dmabuf[i]);
2577+        }
2578+
2579+        ff_mutex_lock(&ctx->lock);
2580+
2581+        ff_v4l2_buffer_set_avail(avbuf);
2582+        avbuf->buf.timestamp.tv_sec = 0;
2583+        avbuf->buf.timestamp.tv_usec = 0;
2584+
2585+        if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
2586+            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name);
2587+        }
2588+        else if (ctx->streamon) {
2589+            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name);
2590+            ff_v4l2_buffer_enqueue(avbuf);  // will set to IN_DRIVER
2591+        }
2592+        else {
2593+            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name);
2594+        }
2595
2596-        in->context_refcount = 1;
2597+        ff_mutex_unlock(&ctx->lock);
2598     }
2599
2600-    in->status = V4L2BUF_RET_USER;
2601-    atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed);
2602+    ff_weak_link_unlock(avbuf->context_wl);
2603+    av_buffer_unref(&bufref);
2604+}
2605
2606-    return 0;
2607+static inline uint32_t ff_v4l2_buf_len(const struct v4l2_buffer * b, unsigned int i)
2608+{
2609+    return V4L2_TYPE_IS_MULTIPLANAR(b->type) ? b->m.planes[i].length : b->length;
2610 }
2611
2612-static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
2613+static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
2614 {
2615-    int ret;
2616+    int i, ret;
2617+    const V4L2m2mContext * const s = buf_to_m2mctx(avbuf);
2618
2619-    if (plane >= in->num_planes)
2620-        return AVERROR(EINVAL);
2621+    for (i = 0; i < avbuf->num_planes; i++) {
2622+        int dma_fd = -1;
2623+        const uint32_t blen = ff_v4l2_buf_len(&avbuf->buf, i);
2624
2625-    /* even though most encoders return 0 in data_offset encoding vp8 does require this value */
2626-    *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset,
2627-                            in->plane_info[plane].length, v4l2_free_buffer, in, 0);
2628-    if (!*buf)
2629-        return AVERROR(ENOMEM);
2630+        if (s->db_ctl != NULL) {
2631+            if ((avbuf->dmabuf[i] = dmabuf_alloc(s->db_ctl, blen)) == NULL)
2632+                return AVERROR(ENOMEM);
2633+            dma_fd = dmabuf_fd(avbuf->dmabuf[i]);
2634+            if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type))
2635+                avbuf->buf.m.planes[i].m.fd = dma_fd;
2636+            else
2637+                avbuf->buf.m.fd = dma_fd;
2638
2639-    ret = v4l2_buf_increase_ref(in);
2640-    if (ret)
2641-        av_buffer_unref(buf);
2642+            if (!s->output_drm)
2643+                avbuf->plane_info[i].mm_addr = dmabuf_map(avbuf->dmabuf[i]);
2644+        }
2645+        else {
2646+            struct v4l2_exportbuffer expbuf;
2647+            memset(&expbuf, 0, sizeof(expbuf));
2648+
2649+            expbuf.index = avbuf->buf.index;
2650+            expbuf.type = avbuf->buf.type;
2651+            expbuf.plane = i;
2652+
2653+            ret = ioctl(s->fd, VIDIOC_EXPBUF, &expbuf);
2654+            if (ret < 0)
2655+                return AVERROR(errno);
2656+            dma_fd = expbuf.fd;
2657+        }
2658
2659-    return ret;
2660+        avbuf->drm_frame.objects[i].size = blen;
2661+        avbuf->drm_frame.objects[i].fd = dma_fd;
2662+        avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
2663+    }
2664+
2665+    return 0;
2666 }
2667
2668 static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset)
2669 {
2670     unsigned int bytesused, length;
2671+    int rv = 0;
2672
2673     if (plane >= out->num_planes)
2674         return AVERROR(EINVAL);
2675@@ -283,32 +574,61 @@ static int v4l2_bufref_to_buf(V4L2Buffer
2676     length = out->plane_info[plane].length;
2677     bytesused = FFMIN(size+offset, length);
2678
2679-    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset));
2680-
2681-    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
2682-        out->planes[plane].bytesused = bytesused;
2683-        out->planes[plane].length = length;
2684-    } else {
2685-        out->buf.bytesused = bytesused;
2686-        out->buf.length = length;
2687+    if (size > length - offset) {
2688+        size = length - offset;
2689+        rv = AVERROR(ENOMEM);
2690     }
2691
2692-    return 0;
2693+    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size);
2694+
2695+    set_buf_length(out, plane, bytesused, length);
2696+
2697+    return rv;
2698+}
2699+
2700+static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf)
2701+{
2702+    AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]);
2703+    AVBufferRef * newbuf;
2704+
2705+    if (!bufref)
2706+        return NULL;
2707+
2708+    newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0);
2709+    if (newbuf == NULL)
2710+        av_buffer_unref(&bufref);
2711+
2712+    avbuf->status = V4L2BUF_RET_USER;
2713+    return newbuf;
2714 }
2715
2716 static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
2717 {
2718-    int i, ret;
2719+    int i;
2720
2721     frame->format = avbuf->context->av_pix_fmt;
2722
2723-    for (i = 0; i < avbuf->num_planes; i++) {
2724-        ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]);
2725-        if (ret)
2726-            return ret;
2727+    frame->buf[0] = wrap_avbuf(avbuf);
2728+    if (frame->buf[0] == NULL)
2729+        return AVERROR(ENOMEM);
2730+
2731+    if (buf_to_m2mctx(avbuf)->output_drm) {
2732+        /* 1. get references to the actual data */
2733+        const int rv = ff_v4l2_context_frames_set(avbuf->context);
2734+        if (rv != 0)
2735+            return rv;
2736+
2737+        frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf);
2738+        frame->format = AV_PIX_FMT_DRM_PRIME;
2739+        frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref);
2740+        return 0;
2741+    }
2742+
2743
2744+    /* 1. get references to the actual data */
2745+    for (i = 0; i < avbuf->num_planes; i++) {
2746+        frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset;
2747         frame->linesize[i] = avbuf->plane_info[i].bytesperline;
2748-        frame->data[i] = frame->buf[i]->data;
2749     }
2750
2751     /* fixup special cases */
2752@@ -317,88 +637,152 @@ static int v4l2_buffer_buf_to_swframe(AV
2753     case AV_PIX_FMT_NV21:
2754         if (avbuf->num_planes > 1)
2755             break;
2756-        frame->linesize[1] = avbuf->plane_info[0].bytesperline;
2757-        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
2758+        frame->linesize[1] = frame->linesize[0];
2759+        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
2760         break;
2761
2762     case AV_PIX_FMT_YUV420P:
2763         if (avbuf->num_planes > 1)
2764             break;
2765-        frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1;
2766-        frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1;
2767-        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
2768-        frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2);
2769+        frame->linesize[1] = frame->linesize[0] / 2;
2770+        frame->linesize[2] = frame->linesize[1];
2771+        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
2772+        frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2;
2773         break;
2774
2775     default:
2776         break;
2777     }
2778
2779+    if (avbuf->dmabuf[0] != NULL) {
2780+        for (unsigned int i = 0; i != avbuf->num_planes; ++i)
2781+            dmabuf_read_start(avbuf->dmabuf[i]);
2782+    }
2783+
2784+    return 0;
2785+}
2786+
2787+static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h)
2788+{
2789+    if (dst_stride == src_stride && w + 32 >= dst_stride) {
2790+        memcpy(dst, src, dst_stride * h);
2791+    }
2792+    else {
2793+        while (--h >= 0) {
2794+            memcpy(dst, src, w);
2795+            dst += dst_stride;
2796+            src += src_stride;
2797+        }
2798+    }
2799+}
2800+
2801+static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes)
2802+{
2803+    return i != 0  && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA));
2804+}
2805+
2806+static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
2807+{
2808+    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
2809+
2810+    if (frame->format != AV_PIX_FMT_DRM_PRIME || !src)
2811+        return AVERROR(EINVAL);
2812+
2813+    av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF);
2814+
2815+    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
2816+        // Only currently cope with single buffer types
2817+        if (out->buf.length != 1)
2818+            return AVERROR_PATCHWELCOME;
2819+        if (src->nb_objects != 1)
2820+            return AVERROR(EINVAL);
2821+
2822+        out->planes[0].m.fd = src->objects[0].fd;
2823+    }
2824+    else {
2825+        if (src->nb_objects != 1)
2826+            return AVERROR(EINVAL);
2827+
2828+        out->buf.m.fd      = src->objects[0].fd;
2829+    }
2830+
2831+    // No need to copy src AVDescriptor and if we did then we may confuse
2832+    // fd close on free
2833+    out->ref_buf = av_buffer_ref(frame->buf[0]);
2834+
2835     return 0;
2836 }
2837
2838 static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
2839 {
2840-    int i, ret;
2841-    struct v4l2_format fmt = out->context->format;
2842-    int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
2843-                       fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat;
2844-    int height       = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
2845-                       fmt.fmt.pix_mp.height : fmt.fmt.pix.height;
2846-    int is_planar_format = 0;
2847-
2848-    switch (pixel_format) {
2849-    case V4L2_PIX_FMT_YUV420M:
2850-    case V4L2_PIX_FMT_YVU420M:
2851-#ifdef V4L2_PIX_FMT_YUV422M
2852-    case V4L2_PIX_FMT_YUV422M:
2853-#endif
2854-#ifdef V4L2_PIX_FMT_YVU422M
2855-    case V4L2_PIX_FMT_YVU422M:
2856-#endif
2857-#ifdef V4L2_PIX_FMT_YUV444M
2858-    case V4L2_PIX_FMT_YUV444M:
2859-#endif
2860-#ifdef V4L2_PIX_FMT_YVU444M
2861-    case V4L2_PIX_FMT_YVU444M:
2862-#endif
2863-    case V4L2_PIX_FMT_NV12M:
2864-    case V4L2_PIX_FMT_NV21M:
2865-    case V4L2_PIX_FMT_NV12MT_16X16:
2866-    case V4L2_PIX_FMT_NV12MT:
2867-    case V4L2_PIX_FMT_NV16M:
2868-    case V4L2_PIX_FMT_NV61M:
2869-        is_planar_format = 1;
2870-    }
2871-
2872-    if (!is_planar_format) {
2873-        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2874-        int planes_nb = 0;
2875-        int offset = 0;
2876-
2877-        for (i = 0; i < desc->nb_components; i++)
2878-            planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
2879-
2880-        for (i = 0; i < planes_nb; i++) {
2881-            int size, h = height;
2882-            if (i == 1 || i == 2) {
2883+    int i;
2884+    int num_planes = 0;
2885+    int pel_strides[4] = {0};
2886+
2887+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2888+
2889+    if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) {
2890+        av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__);
2891+        return -1;
2892+    }
2893+
2894+    for (i = 0; i != desc->nb_components; ++i) {
2895+        if (desc->comp[i].plane >= num_planes)
2896+            num_planes = desc->comp[i].plane + 1;
2897+        pel_strides[desc->comp[i].plane] = desc->comp[i].step;
2898+    }
2899+
2900+    if (out->num_planes > 1) {
2901+        if (num_planes != out->num_planes) {
2902+            av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes);
2903+            return -1;
2904+        }
2905+        for (i = 0; i != num_planes; ++i) {
2906+            int w = frame->width;
2907+            int h = frame->height;
2908+            if (is_chroma(desc, i, num_planes)) {
2909+                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
2910                 h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
2911             }
2912-            size = frame->linesize[i] * h;
2913-            ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset);
2914-            if (ret)
2915-                return ret;
2916-            offset += size;
2917+
2918+            cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline,
2919+                   frame->data[i], frame->linesize[i],
2920+                   w * pel_strides[i], h);
2921+            set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length);
2922         }
2923-        return 0;
2924     }
2925+    else
2926+    {
2927+        unsigned int offset = 0;
2928+
2929+        for (i = 0; i != num_planes; ++i) {
2930+            int w = frame->width;
2931+            int h = frame->height;
2932+            int dst_stride = out->plane_info[0].bytesperline;
2933+            uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset;
2934+
2935+            if (is_chroma(desc, i, num_planes)) {
2936+                // Is chroma
2937+                dst_stride >>= desc->log2_chroma_w;
2938+                offset += dst_stride * (out->context->height >> desc->log2_chroma_h);
2939+                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
2940+                h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
2941+            }
2942+            else {
2943+                // Is luma or alpha
2944+                offset += dst_stride * out->context->height;
2945+            }
2946+            if (offset > out->plane_info[0].length) {
2947+                av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length);
2948+                return -1;
2949+            }
2950
2951-    for (i = 0; i < out->num_planes; i++) {
2952-        ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0);
2953-        if (ret)
2954-            return ret;
2955+            cpy_2d(dst, dst_stride,
2956+                   frame->data[i], frame->linesize[i],
2957+                   w * pel_strides[i], h);
2958+        }
2959+        set_buf_length(out, 0, offset, out->plane_info[0].length);
2960     }
2961-
2962     return 0;
2963 }
2964
2965@@ -408,16 +792,31 @@ static int v4l2_buffer_swframe_to_buf(co
2966  *
2967  ******************************************************************************/
2968
2969-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
2970+int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts)
2971 {
2972-    v4l2_set_pts(out, frame->pts);
2973-
2974-    return v4l2_buffer_swframe_to_buf(frame, out);
2975+    out->buf.flags = frame->key_frame ?
2976+        (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
2977+        (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
2978+    // Beware that colour info is held in format rather than the actual
2979+    // v4l2 buffer struct so this may not be as useful as you might hope
2980+    v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc);
2981+    v4l2_set_color_range(out, frame->color_range);
2982+    // PTS & interlace are buffer vars
2983+    if (track_ts)
2984+        out->buf.timestamp = tv_from_int(track_ts);
2985+    else
2986+        v4l2_set_pts(out, frame->pts);
2987+    v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first);
2988+
2989+    return frame->format == AV_PIX_FMT_DRM_PRIME ?
2990+        v4l2_buffer_primeframe_to_buf(frame, out) :
2991+        v4l2_buffer_swframe_to_buf(frame, out);
2992 }
2993
2994 int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
2995 {
2996     int ret;
2997+    V4L2Context * const ctx = avbuf->context;
2998
2999     av_frame_unref(frame);
3000
3001@@ -428,17 +827,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFram
3002
3003     /* 2. get frame information */
3004     frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME);
3005+    frame->pict_type = frame->key_frame ? AV_PICTURE_TYPE_I :
3006+        (avbuf->buf.flags & V4L2_BUF_FLAG_PFRAME) != 0 ? AV_PICTURE_TYPE_P :
3007+        (avbuf->buf.flags & V4L2_BUF_FLAG_BFRAME) != 0 ? AV_PICTURE_TYPE_B :
3008+            AV_PICTURE_TYPE_NONE;
3009     frame->color_primaries = v4l2_get_color_primaries(avbuf);
3010     frame->colorspace = v4l2_get_color_space(avbuf);
3011     frame->color_range = v4l2_get_color_range(avbuf);
3012     frame->color_trc = v4l2_get_color_trc(avbuf);
3013     frame->pts = v4l2_get_pts(avbuf);
3014     frame->pkt_dts = AV_NOPTS_VALUE;
3015+    frame->interlaced_frame = v4l2_buf_is_interlaced(avbuf);
3016+    frame->top_field_first = v4l2_buf_is_top_first(avbuf);
3017
3018     /* these values are updated also during re-init in v4l2_process_driver_event */
3019-    frame->height = avbuf->context->height;
3020-    frame->width = avbuf->context->width;
3021-    frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio;
3022+    frame->height = ctx->height;
3023+    frame->width = ctx->width;
3024+    frame->sample_aspect_ratio = ctx->sample_aspect_ratio;
3025+
3026+    if (ctx->selection.height && ctx->selection.width) {
3027+        frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0;
3028+        frame->crop_top  = ctx->selection.top < frame->height ? ctx->selection.top  : 0;
3029+        frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ?
3030+            frame->width - (ctx->selection.left + ctx->selection.width) : 0;
3031+        frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ?
3032+            frame->height - (ctx->selection.top + ctx->selection.height) : 0;
3033+    }
3034
3035     /* 3. report errors upstream */
3036     if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) {
3037@@ -451,15 +865,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFram
3038
3039 int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
3040 {
3041-    int ret;
3042-
3043     av_packet_unref(pkt);
3044-    ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf);
3045-    if (ret)
3046-        return ret;
3047+
3048+    pkt->buf = wrap_avbuf(avbuf);
3049+    if (pkt->buf == NULL)
3050+        return AVERROR(ENOMEM);
3051
3052     pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused;
3053-    pkt->data = pkt->buf->data;
3054+    pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset;
3055+    pkt->flags = 0;
3056
3057     if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME)
3058         pkt->flags |= AV_PKT_FLAG_KEY;
3059@@ -474,39 +888,108 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket
3060     return 0;
3061 }
3062
3063-int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
3064+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
3065+                                    const void *extdata, size_t extlen,
3066+                                    const int64_t timestamp)
3067 {
3068     int ret;
3069
3070-    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0);
3071-    if (ret)
3072+    if (extlen) {
3073+        ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0);
3074+        if (ret)
3075+            return ret;
3076+    }
3077+
3078+    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen);
3079+    if (ret && ret != AVERROR(ENOMEM))
3080         return ret;
3081
3082-    v4l2_set_pts(out, pkt->pts);
3083+    if (timestamp)
3084+        out->buf.timestamp = tv_from_int(timestamp);
3085+    else
3086+        v4l2_set_pts(out, pkt->pts);
3087+
3088+    out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ?
3089+        (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
3090+        (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
3091
3092-    if (pkt->flags & AV_PKT_FLAG_KEY)
3093-        out->flags = V4L2_BUF_FLAG_KEYFRAME;
3094+    return ret;
3095+}
3096
3097-    return 0;
3098+int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
3099+{
3100+    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0);
3101+}
3102+
3103+
3104+static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data)
3105+{
3106+    V4L2Buffer * const avbuf = (V4L2Buffer *)data;
3107+    int i;
3108+
3109+    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) {
3110+        struct V4L2Plane_info *p = avbuf->plane_info + i;
3111+        if (p->mm_addr != NULL)
3112+            munmap(p->mm_addr, p->length);
3113+    }
3114+
3115+    if (avbuf->dmabuf[0] == NULL) {
3116+        for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
3117+            if (avbuf->drm_frame.objects[i].fd != -1)
3118+                close(avbuf->drm_frame.objects[i].fd);
3119+        }
3120+    }
3121+    else {
3122+        for (i = 0; i != FF_ARRAY_ELEMS(avbuf->dmabuf); ++i) {
3123+            dmabuf_free(avbuf->dmabuf[i]);
3124+        }
3125+    }
3126+
3127+    av_buffer_unref(&avbuf->ref_buf);
3128+
3129+    ff_weak_link_unref(&avbuf->context_wl);
3130+
3131+    av_free(avbuf);
3132 }
3133
3134-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
3135+
3136+int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem)
3137 {
3138-    V4L2Context *ctx = avbuf->context;
3139     int ret, i;
3140+    V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf));
3141+    AVBufferRef * bufref;
3142+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
3143+    int want_mmap;
3144
3145-    avbuf->buf.memory = V4L2_MEMORY_MMAP;
3146+    *pbufref = NULL;
3147+    if (avbuf == NULL)
3148+        return AVERROR(ENOMEM);
3149+
3150+    bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0);
3151+    if (bufref == NULL) {
3152+        av_free(avbuf);
3153+        return AVERROR(ENOMEM);
3154+    }
3155+
3156+    avbuf->context = ctx;
3157+    avbuf->buf.memory = mem;
3158     avbuf->buf.type = ctx->type;
3159     avbuf->buf.index = index;
3160
3161+    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
3162+        avbuf->drm_frame.objects[i].fd = -1;
3163+    }
3164+
3165+    avbuf->context_wl = ff_weak_link_ref(ctx->wl_master);
3166+
3167     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
3168         avbuf->buf.length = VIDEO_MAX_PLANES;
3169         avbuf->buf.m.planes = avbuf->planes;
3170     }
3171
3172-    ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf);
3173+    ret = ioctl(s->fd, VIDIOC_QUERYBUF, &avbuf->buf);
3174     if (ret < 0)
3175-        return AVERROR(errno);
3176+        goto fail;
3177
3178     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
3179         avbuf->num_planes = 0;
3180@@ -518,33 +1001,41 @@ int ff_v4l2_buffer_initialize(V4L2Buffer
3181     } else
3182         avbuf->num_planes = 1;
3183
3184-    for (i = 0; i < avbuf->num_planes; i++) {
3185+    want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP &&
3186+        (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm);
3187
3188+    for (i = 0; i < avbuf->num_planes; i++) {
3189         avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
3190             ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline :
3191             ctx->format.fmt.pix.bytesperline;
3192
3193         if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
3194             avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length;
3195-            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
3196-                                           PROT_READ | PROT_WRITE, MAP_SHARED,
3197-                                           buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
3198+            avbuf->plane_info[i].offset = avbuf->buf.m.planes[i].data_offset;
3199+
3200+            if (want_mmap)
3201+                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
3202+                                               PROT_READ | PROT_WRITE, MAP_SHARED,
3203+                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
3204         } else {
3205             avbuf->plane_info[i].length = avbuf->buf.length;
3206-            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
3207-                                          PROT_READ | PROT_WRITE, MAP_SHARED,
3208-                                          buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
3209+            avbuf->plane_info[i].offset = 0;
3210+
3211+            if (want_mmap)
3212+                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
3213+                                               PROT_READ | PROT_WRITE, MAP_SHARED,
3214+                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
3215         }
3216
3217-        if (avbuf->plane_info[i].mm_addr == MAP_FAILED)
3218-            return AVERROR(ENOMEM);
3219+        if (avbuf->plane_info[i].mm_addr == MAP_FAILED) {
3220+            avbuf->plane_info[i].mm_addr = NULL;
3221+            ret = AVERROR(ENOMEM);
3222+            goto fail;
3223+        }
3224     }
3225
3226     avbuf->status = V4L2BUF_AVAILABLE;
3227
3228-    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
3229-        return 0;
3230-
3231     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
3232         avbuf->buf.m.planes = avbuf->planes;
3233         avbuf->buf.length   = avbuf->num_planes;
3234@@ -554,20 +1045,52 @@ int ff_v4l2_buffer_initialize(V4L2Buffer
3235         avbuf->buf.length    = avbuf->planes[0].length;
3236     }
3237
3238-    return ff_v4l2_buffer_enqueue(avbuf);
3239+    if (V4L2_TYPE_IS_CAPTURE(ctx->type) && !want_mmap) {
3240+        // export_drm does dmabuf alloc if we aren't using v4l2 alloc
3241+        ret = v4l2_buffer_export_drm(avbuf);
3242+        if (ret) {
3243+            av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n");
3244+            goto fail;
3245+        }
3246+    }
3247+
3248+    *pbufref = bufref;
3249+    return 0;
3250+
3251+fail:
3252+    av_buffer_unref(&bufref);
3253+    return ret;
3254 }
3255
3256 int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
3257 {
3258     int ret;
3259+    int qc;
3260
3261-    avbuf->buf.flags = avbuf->flags;
3262+    if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) {
3263+        av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
3264+               avbuf->context->name, avbuf->buf.index,
3265+               avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec,
3266+               avbuf->context->q_count);
3267+    }
3268
3269     ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf);
3270-    if (ret < 0)
3271-        return AVERROR(errno);
3272+    if (ret < 0) {
3273+        int err = errno;
3274+        av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n",
3275+               avbuf->context->name, avbuf->buf.index,
3276+               err, strerror(err));
3277+        return AVERROR(err);
3278+    }
3279
3280+    // Lock not wanted - if called from buffer free then lock already obtained
3281+    qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1;
3282     avbuf->status = V4L2BUF_IN_DRIVER;
3283+    pthread_cond_broadcast(&avbuf->context->cond);
3284+
3285+    av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
3286+           avbuf->context->name, avbuf->buf.index,
3287+           avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc);
3288
3289     return 0;
3290 }
3291--- a/libavcodec/v4l2_buffers.h
3292+++ b/libavcodec/v4l2_buffers.h
3293@@ -28,31 +28,47 @@
3294 #include <stddef.h>
3295 #include <linux/videodev2.h>
3296
3297+#include "avcodec.h"
3298 #include "libavutil/buffer.h"
3299 #include "libavutil/frame.h"
3300+#include "libavutil/hwcontext_drm.h"
3301 #include "packet.h"
3302
3303 enum V4L2Buffer_status {
3304     V4L2BUF_AVAILABLE,
3305     V4L2BUF_IN_DRIVER,
3306+    V4L2BUF_IN_USE,
3307     V4L2BUF_RET_USER,
3308 };
3309
3310 /**
3311  * V4L2Buffer (wrapper for v4l2_buffer management)
3312  */
3313+struct V4L2Context;
3314+struct ff_weak_link_client;
3315+struct dmabuf_h;
3316+
3317 typedef struct V4L2Buffer {
3318-    /* each buffer needs to have a reference to its context */
3319+    /* each buffer needs to have a reference to its context
3320+     * The pointer is good enough for most operation but once the buffer has
3321+     * been passed to the user the buffer may become orphaned so for free ops
3322+     * the weak link must be used to ensure that the context is actually
3323+     * there
3324+     */
3325     struct V4L2Context *context;
3326+    struct ff_weak_link_client *context_wl;
3327
3328-    /* This object is refcounted per-plane, so we need to keep track
3329-     * of how many context-refs we are holding. */
3330-    AVBufferRef *context_ref;
3331-    atomic_uint context_refcount;
3332+    /* DRM descriptor */
3333+    AVDRMFrameDescriptor drm_frame;
3334+    /* For DRM_PRIME encode - need to keep a ref to the source buffer till we
3335+     * are done
3336+     */
3337+    AVBufferRef * ref_buf;
3338
3339     /* keep track of the mmap address and mmap length */
3340     struct V4L2Plane_info {
3341-        int bytesperline;
3342+        size_t bytesperline;
3343+        size_t offset;
3344         void * mm_addr;
3345         size_t length;
3346     } plane_info[VIDEO_MAX_PLANES];
3347@@ -63,9 +79,9 @@ typedef struct V4L2Buffer {
3348     struct v4l2_buffer buf;
3349     struct v4l2_plane planes[VIDEO_MAX_PLANES];
3350
3351-    int flags;
3352     enum V4L2Buffer_status status;
3353
3354+    struct dmabuf_h * dmabuf[VIDEO_MAX_PLANES]; // If externally alloced dmabufs - stash other info here
3355 } V4L2Buffer;
3356
3357 /**
3358@@ -101,6 +117,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket
3359  */
3360 int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
3361
3362+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
3363+                                    const void *extdata, size_t extlen,
3364+                                    const int64_t timestamp);
3365+
3366 /**
3367  * Extracts the data from an AVFrame to a V4L2Buffer
3368  *
3369@@ -109,7 +129,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AV
3370  *
3371  * @returns 0 in case of success, a negative AVERROR code otherwise
3372  */
3373-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
3374+int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts);
3375
3376 /**
3377  * Initializes a V4L2Buffer
3378@@ -119,7 +139,7 @@ int ff_v4l2_buffer_avframe_to_buf(const
3379  *
3380  * @returns 0 in case of success, a negative AVERROR code otherwise
3381  */
3382-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
3383+int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem);
3384
3385 /**
3386  * Enqueues a V4L2Buffer
3387@@ -130,5 +150,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer
3388  */
3389 int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf);
3390
3391+static inline void
3392+ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf)
3393+{
3394+    avbuf->status = V4L2BUF_AVAILABLE;
3395+    av_buffer_unref(&avbuf->ref_buf);
3396+}
3397+
3398
3399 #endif // AVCODEC_V4L2_BUFFERS_H
3400--- a/libavcodec/v4l2_context.c
3401+++ b/libavcodec/v4l2_context.c
3402@@ -27,11 +27,14 @@
3403 #include <unistd.h>
3404 #include <fcntl.h>
3405 #include <poll.h>
3406+#include "libavutil/avassert.h"
3407+#include "libavutil/pixdesc.h"
3408 #include "libavcodec/avcodec.h"
3409 #include "libavcodec/internal.h"
3410 #include "v4l2_buffers.h"
3411 #include "v4l2_fmt.h"
3412 #include "v4l2_m2m.h"
3413+#include "weak_link.h"
3414
3415 struct v4l2_format_update {
3416     uint32_t v4l2_fmt;
3417@@ -41,26 +44,168 @@ struct v4l2_format_update {
3418     int update_avfmt;
3419 };
3420
3421-static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx)
3422+
3423+static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n)
3424 {
3425-    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
3426-        container_of(ctx, V4L2m2mContext, output) :
3427-        container_of(ctx, V4L2m2mContext, capture);
3428+    return (int64_t)n;
3429 }
3430
3431-static inline AVCodecContext *logger(V4L2Context *ctx)
3432+static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts)
3433 {
3434-    return ctx_to_m2mctx(ctx)->avctx;
3435+    return (unsigned int)pts;
3436+}
3437+
3438+// FFmpeg requires us to propagate a number of vars from the coded pkt into
3439+// the decoded frame. The only thing that tracks like that in V4L2 stateful
3440+// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no
3441+// guarantees about PTS being unique or specified for every frame so replace
3442+// the supplied PTS with a simple incrementing number and keep a circular
3443+// buffer of all the things we want preserved (including the original PTS)
3444+// indexed by the tracking no.
3445+static int64_t
3446+xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt)
3447+{
3448+    int64_t track_pts;
3449+
3450+    // Avoid 0
3451+    if (++x->track_no == 0)
3452+        x->track_no = 1;
3453+
3454+    track_pts = track_to_pts(avctx, x->track_no);
3455+
3456+    av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no);
3457+    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
3458+        .discard          = 0,
3459+        .pending          = 1,
3460+        .pkt_size         = avpkt->size,
3461+        .pts              = avpkt->pts,
3462+        .dts              = avpkt->dts,
3463+        .reordered_opaque = avctx->reordered_opaque,
3464+        .pkt_pos          = avpkt->pos,
3465+        .pkt_duration     = avpkt->duration,
3466+        .track_pts        = track_pts
3467+    };
3468+    return track_pts;
3469+}
3470+
3471+static int64_t
3472+xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame)
3473+{
3474+    int64_t track_pts;
3475+
3476+    // Avoid 0
3477+    if (++x->track_no == 0)
3478+        x->track_no = 1;
3479+
3480+    track_pts = track_to_pts(avctx, x->track_no);
3481+
3482+    av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no);
3483+    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
3484+        .discard          = 0,
3485+        .pending          = 1,
3486+        .pkt_size         = 0,
3487+        .pts              = frame->pts,
3488+        .dts              = AV_NOPTS_VALUE,
3489+        .reordered_opaque = frame->reordered_opaque,
3490+        .pkt_pos          = frame->pkt_pos,
3491+        .pkt_duration     = frame->pkt_duration,
3492+        .track_pts        = track_pts
3493+    };
3494+    return track_pts;
3495+}
3496+
3497+
3498+// Returns -1 if we should discard the frame
3499+static int
3500+xlat_pts_frame_out(AVCodecContext *const avctx,
3501+             xlat_track_t * const x,
3502+             AVFrame *const frame)
3503+{
3504+    unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
3505+    V4L2m2mTrackEl *const t = x->track_els + n;
3506+    if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
3507+    {
3508+        av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
3509+               "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
3510+        frame->pts              = AV_NOPTS_VALUE;
3511+        frame->pkt_dts          = AV_NOPTS_VALUE;
3512+        frame->reordered_opaque = x->last_opaque;
3513+        frame->pkt_pos          = -1;
3514+        frame->pkt_duration     = 0;
3515+        frame->pkt_size         = -1;
3516+    }
3517+    else if (!t->discard)
3518+    {
3519+        frame->pts              = t->pending ? t->pts : AV_NOPTS_VALUE;
3520+        frame->pkt_dts          = t->dts;
3521+        frame->reordered_opaque = t->reordered_opaque;
3522+        frame->pkt_pos          = t->pkt_pos;
3523+        frame->pkt_duration     = t->pkt_duration;
3524+        frame->pkt_size         = t->pkt_size;
3525+
3526+        x->last_opaque = x->track_els[n].reordered_opaque;
3527+        if (frame->pts != AV_NOPTS_VALUE)
3528+            x->last_pts = frame->pts;
3529+        t->pending = 0;
3530+    }
3531+    else
3532+    {
3533+        av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
3534+        return -1;
3535+    }
3536+
3537+    av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n",
3538+           frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n);
3539+    return 0;
3540+}
3541+
3542+// Returns -1 if we should discard the frame
3543+static int
3544+xlat_pts_pkt_out(AVCodecContext *const avctx,
3545+             xlat_track_t * const x,
3546+             AVPacket *const pkt)
3547+{
3548+    unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE;
3549+    V4L2m2mTrackEl *const t = x->track_els + n;
3550+    if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts)
3551+    {
3552+        av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
3553+               "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
3554+        pkt->pts                = AV_NOPTS_VALUE;
3555+    }
3556+    else if (!t->discard)
3557+    {
3558+        pkt->pts                = t->pending ? t->pts : AV_NOPTS_VALUE;
3559+
3560+        x->last_opaque = x->track_els[n].reordered_opaque;
3561+        if (pkt->pts != AV_NOPTS_VALUE)
3562+            x->last_pts = pkt->pts;
3563+        t->pending = 0;
3564+    }
3565+    else
3566+    {
3567+        av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
3568+        return -1;
3569+    }
3570+
3571+    // * Would like something much better than this...xlat(offset + out_count)?
3572+    pkt->dts = pkt->pts;
3573+    av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n",
3574+           pkt->pts, t->track_pts, n);
3575+    return 0;
3576 }
3577
3578-static inline unsigned int v4l2_get_width(struct v4l2_format *fmt)
3579+
3580+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
3581 {
3582-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
3583+    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
3584+        container_of(ctx, V4L2m2mContext, output) :
3585+        container_of(ctx, V4L2m2mContext, capture);
3586 }
3587
3588-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt)
3589+static inline AVCodecContext *logger(const V4L2Context *ctx)
3590 {
3591-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
3592+    return ctx_to_m2mctx(ctx)->avctx;
3593 }
3594
3595 static AVRational v4l2_get_sar(V4L2Context *ctx)
3596@@ -81,21 +226,29 @@ static AVRational v4l2_get_sar(V4L2Conte
3597     return sar;
3598 }
3599
3600-static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2)
3601+static inline int ctx_buffers_alloced(const V4L2Context * const ctx)
3602+{
3603+    return ctx->bufrefs != NULL;
3604+}
3605+
3606+// Width/Height changed or we don't have an alloc in the first place?
3607+static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2)
3608 {
3609-    struct v4l2_format *fmt1 = &ctx->format;
3610-    int ret =  V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
3611-        fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
3612-        fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
3613-        :
3614-        fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
3615-        fmt1->fmt.pix.height != fmt2->fmt.pix.height;
3616+    const struct v4l2_format *fmt1 = &ctx->format;
3617+    int ret = !ctx_buffers_alloced(ctx) ||
3618+        (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
3619+            fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
3620+            fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
3621+            :
3622+            fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
3623+            fmt1->fmt.pix.height != fmt2->fmt.pix.height);
3624
3625     if (ret)
3626-        av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n",
3627+        av_log(logger(ctx), AV_LOG_DEBUG, "V4L2 %s changed: alloc=%d (%dx%d) -> (%dx%d)\n",
3628             ctx->name,
3629-            v4l2_get_width(fmt1), v4l2_get_height(fmt1),
3630-            v4l2_get_width(fmt2), v4l2_get_height(fmt2));
3631+            ctx_buffers_alloced(ctx),
3632+            ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1),
3633+            ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2));
3634
3635     return ret;
3636 }
3637@@ -153,76 +306,100 @@ static inline void v4l2_save_to_context(
3638     }
3639 }
3640
3641-static int v4l2_start_decode(V4L2Context *ctx)
3642+static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r)
3643 {
3644-    struct v4l2_decoder_cmd cmd = {
3645-        .cmd = V4L2_DEC_CMD_START,
3646-        .flags = 0,
3647+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
3648+    struct v4l2_selection selection = {
3649+        .type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
3650+        .target = V4L2_SEL_TGT_COMPOSE
3651     };
3652-    int ret;
3653
3654-    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DECODER_CMD, &cmd);
3655-    if (ret)
3656+    memset(r, 0, sizeof(*r));
3657+    if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection))
3658         return AVERROR(errno);
3659
3660+    *r = selection.r;
3661     return 0;
3662 }
3663
3664-/**
3665- * handle resolution change event and end of stream event
3666- * returns 1 if reinit was successful, negative if it failed
3667- * returns 0 if reinit was not executed
3668- */
3669-static int v4l2_handle_event(V4L2Context *ctx)
3670+static int do_source_change(V4L2m2mContext * const s)
3671 {
3672-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
3673-    struct v4l2_format cap_fmt = s->capture.format;
3674-    struct v4l2_event evt = { 0 };
3675+    AVCodecContext *const avctx = s->avctx;
3676+
3677     int ret;
3678+    int reinit;
3679+    struct v4l2_format cap_fmt = s->capture.format;
3680
3681-    ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt);
3682-    if (ret < 0) {
3683-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name);
3684-        return 0;
3685-    }
3686+    s->capture.done = 0;
3687
3688-    if (evt.type == V4L2_EVENT_EOS) {
3689-        ctx->done = 1;
3690+    ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
3691+    if (ret) {
3692+        av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name);
3693         return 0;
3694     }
3695
3696-    if (evt.type != V4L2_EVENT_SOURCE_CHANGE)
3697-        return 0;
3698+    get_default_selection(&s->capture, &s->capture.selection);
3699
3700-    ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
3701-    if (ret) {
3702-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name);
3703-        return 0;
3704+    reinit = ctx_resolution_changed(&s->capture, &cap_fmt);
3705+    if ((s->quirks & FF_V4L2_QUIRK_REINIT_ALWAYS) != 0)
3706+        reinit = 1;
3707+
3708+    s->capture.format = cap_fmt;
3709+    if (reinit) {
3710+        s->capture.height = ff_v4l2_get_format_height(&cap_fmt);
3711+        s->capture.width = ff_v4l2_get_format_width(&cap_fmt);
3712     }
3713
3714-    if (v4l2_resolution_changed(&s->capture, &cap_fmt)) {
3715-        s->capture.height = v4l2_get_height(&cap_fmt);
3716-        s->capture.width = v4l2_get_width(&cap_fmt);
3717-        s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
3718-    } else {
3719-        v4l2_start_decode(ctx);
3720-        return 0;
3721+    // If we don't support selection (or it is bust) and we obviously have HD then kludge
3722+    if ((s->capture.selection.width == 0 || s->capture.selection.height == 0) &&
3723+        (s->capture.height == 1088 && s->capture.width == 1920)) {
3724+        s->capture.selection = (struct v4l2_rect){.width = 1920, .height = 1080};
3725     }
3726
3727-    s->reinit = 1;
3728+    s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
3729
3730-    if (s->avctx)
3731-        ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height);
3732-    if (ret < 0)
3733-        av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n");
3734+    av_log(avctx, AV_LOG_DEBUG, "Source change: Fmt: %s, SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n",
3735+           av_fourcc2str(ff_v4l2_get_format_pixelformat(&cap_fmt)),
3736+           s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den,
3737+           s->capture.width, s->capture.height,
3738+           s->capture.selection.width, s->capture.selection.height,
3739+           s->capture.selection.left, s->capture.selection.top, reinit);
3740
3741-    ret = ff_v4l2_m2m_codec_reinit(s);
3742-    if (ret) {
3743-        av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n");
3744-        return AVERROR(EINVAL);
3745+    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
3746+    if (ret)
3747+        av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n");
3748+    s->draining = 0;
3749+
3750+    if (!reinit) {
3751+        /* Buffers are OK so just stream off to ack */
3752+        av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__);
3753+    }
3754+    else {
3755+        if (avctx)
3756+            ret = ff_set_dimensions(s->avctx,
3757+                                    s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width,
3758+                                    s->capture.selection.height != 0 ? s->capture.selection.height : s->capture.height);
3759+        if (ret < 0)
3760+            av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n");
3761+
3762+        ff_v4l2_context_release(&s->capture);
3763+
3764+        if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) ||
3765+            s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) {
3766+            av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n",
3767+                   s->capture.width, s->capture.height,
3768+                   ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format));
3769+            return AVERROR(EINVAL);
3770+        }
3771+
3772+        // Update pixel format - should only actually do something on initial change
3773+        s->capture.av_pix_fmt =
3774+            ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO);
3775+        avctx->pix_fmt = s->output_drm ? AV_PIX_FMT_DRM_PRIME : s->capture.av_pix_fmt;
3776+        avctx->sw_pix_fmt = s->capture.av_pix_fmt;
3777     }
3778
3779-    /* reinit executed */
3780+    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON);
3781     return 1;
3782 }
3783
3784@@ -266,171 +443,293 @@ static int v4l2_stop_encode(V4L2Context
3785     return 0;
3786 }
3787
3788-static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout)
3789-{
3790-    struct v4l2_plane planes[VIDEO_MAX_PLANES];
3791-    struct v4l2_buffer buf = { 0 };
3792-    V4L2Buffer *avbuf;
3793-    struct pollfd pfd = {
3794-        .events =  POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */
3795-        .fd = ctx_to_m2mctx(ctx)->fd,
3796+// DQ a buffer
3797+// Amalgamates all the various ways there are of signalling EOS/Event to
3798+// generate a consistant EPIPE.
3799+//
3800+// Sets ctx->flag_last if next dq would produce EPIPE (i.e. stream has stopped)
3801+//
3802+// Returns:
3803+//  0               Success
3804+//  AVERROR(EPIPE)  Nothing more to read
3805+//  AVERROR(ENOSPC) No buffers in Q to put result in
3806+//  *               AVERROR(..)
3807+
3808+ static int
3809+dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf)
3810+{
3811+    V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
3812+    AVCodecContext * const avctx = m->avctx;
3813+    V4L2Buffer * avbuf;
3814+    const int is_mp = V4L2_TYPE_IS_MULTIPLANAR(ctx->type);
3815+
3816+    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
3817+
3818+    struct v4l2_buffer buf = {
3819+        .type = ctx->type,
3820+        .memory = V4L2_MEMORY_MMAP,
3821     };
3822-    int i, ret;
3823
3824-    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) {
3825-        for (i = 0; i < ctx->num_buffers; i++) {
3826-            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
3827-                break;
3828-        }
3829-        if (i == ctx->num_buffers)
3830-            av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to "
3831-                                                "userspace. Increase num_capture_buffers "
3832-                                                "to prevent device deadlock or dropped "
3833-                                                "packets/frames.\n");
3834+    *ppavbuf = NULL;
3835+
3836+    if (ctx->flag_last)
3837+        return AVERROR(EPIPE);
3838+
3839+    if (is_mp) {
3840+        buf.length = VIDEO_MAX_PLANES;
3841+        buf.m.planes = planes;
3842     }
3843
3844-    /* if we are draining and there are no more capture buffers queued in the driver we are done */
3845-    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) {
3846-        for (i = 0; i < ctx->num_buffers; i++) {
3847-            /* capture buffer initialization happens during decode hence
3848-             * detection happens at runtime
3849-             */
3850-            if (!ctx->buffers)
3851-                break;
3852+    while (ioctl(m->fd, VIDIOC_DQBUF, &buf) != 0) {
3853+        const int err = errno;
3854+        av_assert0(AVERROR(err) < 0);
3855+        if (err != EINTR) {
3856+            av_log(avctx, AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
3857+                ctx->name, av_err2str(AVERROR(err)));
3858+
3859+            if (err == EPIPE)
3860+                ctx->flag_last = 1;
3861
3862-            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
3863-                goto start;
3864+            return AVERROR(err);
3865         }
3866-        ctx->done = 1;
3867-        return NULL;
3868     }
3869+    atomic_fetch_sub(&ctx->q_count, 1);
3870
3871-start:
3872-    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
3873-        pfd.events =  POLLOUT | POLLWRNORM;
3874-    else {
3875-        /* no need to listen to requests for more input while draining */
3876-        if (ctx_to_m2mctx(ctx)->draining)
3877-            pfd.events =  POLLIN | POLLRDNORM | POLLPRI;
3878+    avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
3879+    ff_v4l2_buffer_set_avail(avbuf);
3880+    avbuf->buf = buf;
3881+    if (is_mp) {
3882+        memcpy(avbuf->planes, planes, sizeof(planes));
3883+        avbuf->buf.m.planes = avbuf->planes;
3884+    }
3885+    // Done with any attached buffer
3886+    av_buffer_unref(&avbuf->ref_buf);
3887+
3888+    if (V4L2_TYPE_IS_CAPTURE(ctx->type)) {
3889+        // Zero length cap buffer return == EOS
3890+        if ((is_mp ? buf.m.planes[0].bytesused : buf.bytesused) == 0) {
3891+            av_log(avctx, AV_LOG_DEBUG, "Buffer empty - reQ\n");
3892+
3893+            // Must reQ so we don't leak
3894+            // May not matter if the next thing we do is release all the
3895+            // buffers but better to be tidy.
3896+            ff_v4l2_buffer_enqueue(avbuf);
3897+
3898+            ctx->flag_last = 1;
3899+            return AVERROR(EPIPE);
3900+        }
3901+
3902+#ifdef V4L2_BUF_FLAG_LAST
3903+        // If flag_last set then this contains data but is the last frame
3904+        // so remember that but return OK
3905+        if ((buf.flags & V4L2_BUF_FLAG_LAST) != 0)
3906+            ctx->flag_last = 1;
3907+#endif
3908     }
3909
3910-    for (;;) {
3911-        ret = poll(&pfd, 1, timeout);
3912-        if (ret > 0)
3913-            break;
3914-        if (errno == EINTR)
3915+    *ppavbuf = avbuf;
3916+    return 0;
3917+}
3918+
3919+/**
3920+ * handle resolution change event and end of stream event
3921+ * Expects to be called after the stream has stopped
3922+ *
3923+ * returns 1 if reinit was successful, negative if it failed
3924+ * returns 0 if reinit was not executed
3925+ */
3926+static int
3927+get_event(V4L2m2mContext * const m)
3928+{
3929+    AVCodecContext * const avctx = m->avctx;
3930+    struct v4l2_event evt = { 0 };
3931+
3932+    while (ioctl(m->fd, VIDIOC_DQEVENT, &evt) != 0) {
3933+        const int rv = AVERROR(errno);
3934+        if (rv == AVERROR(EINTR))
3935             continue;
3936-        return NULL;
3937+        if (rv == AVERROR(EAGAIN)) {
3938+            av_log(avctx, AV_LOG_WARNING, "V4L2 failed to get expected event - assume EOS\n");
3939+            return AVERROR_EOF;
3940+        }
3941+        av_log(avctx, AV_LOG_ERROR, "V4L2 VIDIOC_DQEVENT: %s\n", av_err2str(rv));
3942+        return rv;
3943     }
3944
3945-    /* 0. handle errors */
3946-    if (pfd.revents & POLLERR) {
3947-        /* if we are trying to get free buffers but none have been queued yet
3948-           no need to raise a warning */
3949-        if (timeout == 0) {
3950-            for (i = 0; i < ctx->num_buffers; i++) {
3951-                if (ctx->buffers[i].status != V4L2BUF_AVAILABLE)
3952-                    av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
3953-            }
3954-        }
3955-        else
3956-            av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
3957+    av_log(avctx, AV_LOG_DEBUG, "Dq event %d\n", evt.type);
3958
3959-        return NULL;
3960+    if (evt.type == V4L2_EVENT_EOS) {
3961+        av_log(avctx, AV_LOG_TRACE, "V4L2 VIDIOC_EVENT_EOS\n");
3962+        return AVERROR_EOF;
3963     }
3964
3965-    /* 1. handle resolution changes */
3966-    if (pfd.revents & POLLPRI) {
3967-        ret = v4l2_handle_event(ctx);
3968-        if (ret < 0) {
3969-            /* if re-init failed, abort */
3970-            ctx->done = 1;
3971-            return NULL;
3972+    if (evt.type == V4L2_EVENT_SOURCE_CHANGE)
3973+        return do_source_change(m);
3974+
3975+    return 0;
3976+}
3977+
3978+static inline int
3979+dq_ok(const V4L2Context * const c)
3980+{
3981+    return c->streamon && atomic_load(&c->q_count) != 0;
3982+}
3983+
3984+// Get a buffer
3985+// If output then just gets the buffer in the expected way
3986+// If capture then runs the capture state m/c to deal with res change etc.
3987+// If return value == 0 then *ppavbuf != NULL
3988+
3989+static int
3990+get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout)
3991+{
3992+    V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
3993+    AVCodecContext * const avctx = m->avctx;
3994+    const int is_cap = V4L2_TYPE_IS_CAPTURE(ctx->type);
3995+
3996+    const unsigned int poll_cap = (POLLIN | POLLRDNORM);
3997+    const unsigned int poll_out = (POLLOUT | POLLWRNORM);
3998+    const unsigned int poll_event = POLLPRI;
3999+
4000+    *ppavbuf = NULL;
4001+
4002+    for (;;) {
4003+        struct pollfd pfd = {
4004+            .fd = m->fd,
4005+            // If capture && stream not started then assume we are waiting for the initial event
4006+            .events = !is_cap ? poll_out :
4007+                !ff_v4l2_ctx_eos(ctx) && ctx->streamon ? poll_cap :
4008+                    poll_event,
4009+        };
4010+        int ret;
4011+
4012+        if (ctx->done) {
4013+            av_log(avctx, AV_LOG_TRACE, "V4L2 %s already done\n", ctx->name);
4014+            return AVERROR_EOF;
4015         }
4016-        if (ret) {
4017-            /* if re-init was successful drop the buffer (if there was one)
4018-             * since we had to reconfigure capture (unmap all buffers)
4019-             */
4020-            return NULL;
4021+
4022+        // If capture && timeout == -1 then also wait for rx buffer free
4023+        if (is_cap && timeout == -1 && dq_ok(&m->output) && !m->draining)
4024+            pfd.events |= poll_out;
4025+
4026+        // If nothing Qed all we will get is POLLERR - avoid that
4027+        if ((pfd.events == poll_out && !dq_ok(&m->output)) ||
4028+            (pfd.events == poll_cap && !dq_ok(&m->capture)) ||
4029+            (pfd.events == (poll_cap | poll_out) && !dq_ok(&m->capture) && !dq_ok(&m->output))) {
4030+            av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name);
4031+            return AVERROR(ENOSPC);
4032         }
4033-    }
4034
4035-    /* 2. dequeue the buffer */
4036-    if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) {
4037+        // Timeout kludged s.t. "forever" eventually gives up & produces logging
4038+        // If waiting for an event when we have seen a last_frame then we expect
4039+        //   it to be ready already so force a short timeout
4040+        ret = poll(&pfd, 1,
4041+                   ff_v4l2_ctx_eos(ctx) ? 10 :
4042+                   timeout == -1 ? 3000 : timeout);
4043+        if (ret < 0) {
4044+            ret = AVERROR(errno);  // Remember errno before logging etc.
4045+            av_assert0(ret < 0);
4046+        }
4047+
4048+        av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s ret=%d, timeout=%d, events=%#x, revents=%#x\n",
4049+               ctx->name, ret, timeout, pfd.events, pfd.revents);
4050
4051-        if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
4052-            /* there is a capture buffer ready */
4053-            if (pfd.revents & (POLLIN | POLLRDNORM))
4054-                goto dequeue;
4055+        if (ret < 0) {
4056+            if (ret == AVERROR(EINTR))
4057+                continue;
4058+            av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll error %d (%s)\n", ctx->name, AVUNERROR(ret), av_err2str(ret));
4059+            return ret;
4060+        }
4061
4062-            /* the driver is ready to accept more input; instead of waiting for the capture
4063-             * buffer to complete we return NULL so input can proceed (we are single threaded)
4064-             */
4065-            if (pfd.revents & (POLLOUT | POLLWRNORM))
4066-                return NULL;
4067+        if (ret == 0) {
4068+            if (timeout == -1)
4069+                av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll unexpected timeout: events=%#x\n", ctx->name, pfd.events);
4070+            if (ff_v4l2_ctx_eos(ctx)) {
4071+                av_log(avctx, AV_LOG_WARNING, "V4L2 %s poll event timeout\n", ctx->name);
4072+                ret = get_event(m);
4073+                if (ret < 0) {
4074+                    ctx->done = 1;
4075+                    return ret;
4076+                }
4077+            }
4078+            return AVERROR(EAGAIN);
4079         }
4080
4081-dequeue:
4082-        memset(&buf, 0, sizeof(buf));
4083-        buf.memory = V4L2_MEMORY_MMAP;
4084-        buf.type = ctx->type;
4085-        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
4086-            memset(planes, 0, sizeof(planes));
4087-            buf.length = VIDEO_MAX_PLANES;
4088-            buf.m.planes = planes;
4089+        if ((pfd.revents & POLLERR) != 0) {
4090+            av_log(avctx, AV_LOG_WARNING, "V4L2 %s POLLERR\n", ctx->name);
4091+            return AVERROR_UNKNOWN;
4092         }
4093
4094-        ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf);
4095-        if (ret) {
4096-            if (errno != EAGAIN) {
4097+        if ((pfd.revents & poll_event) != 0) {
4098+            ret = get_event(m);
4099+            if (ret < 0) {
4100                 ctx->done = 1;
4101-                if (errno != EPIPE)
4102-                    av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
4103-                        ctx->name, av_err2str(AVERROR(errno)));
4104+                return ret;
4105             }
4106-            return NULL;
4107+            continue;
4108         }
4109
4110-        if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) {
4111-            int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ?
4112-                            buf.m.planes[0].bytesused : buf.bytesused;
4113-            if (bytesused == 0) {
4114-                ctx->done = 1;
4115-                return NULL;
4116-            }
4117-#ifdef V4L2_BUF_FLAG_LAST
4118-            if (buf.flags & V4L2_BUF_FLAG_LAST)
4119-                ctx->done = 1;
4120-#endif
4121+        if ((pfd.revents & poll_cap) != 0) {
4122+            ret = dq_buf(ctx, ppavbuf);
4123+            if (ret == AVERROR(EPIPE))
4124+                continue;
4125+            return ret;
4126         }
4127
4128-        avbuf = &ctx->buffers[buf.index];
4129-        avbuf->status = V4L2BUF_AVAILABLE;
4130-        avbuf->buf = buf;
4131-        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
4132-            memcpy(avbuf->planes, planes, sizeof(planes));
4133-            avbuf->buf.m.planes = avbuf->planes;
4134+        if ((pfd.revents & poll_out) != 0) {
4135+            if (is_cap)
4136+                return AVERROR(EAGAIN);
4137+            return dq_buf(ctx, ppavbuf);
4138         }
4139-        return avbuf;
4140+
4141+        av_log(avctx, AV_LOG_ERROR, "V4L2 poll unexpected events=%#x, revents=%#x\n", pfd.events, pfd.revents);
4142+        return AVERROR_UNKNOWN;
4143     }
4144+}
4145
4146-    return NULL;
4147+// Clear out flags and timestamps that should should be set by the user
4148+// Returns the passed avbuf
4149+static V4L2Buffer *
4150+clean_v4l2_buffer(V4L2Buffer * const avbuf)
4151+{
4152+    struct v4l2_buffer *const buf = &avbuf->buf;
4153+
4154+    buf->flags = 0;
4155+    buf->field = V4L2_FIELD_ANY;
4156+    buf->timestamp = (struct timeval){0};
4157+    buf->timecode = (struct v4l2_timecode){0};
4158+    buf->sequence = 0;
4159+
4160+    return avbuf;
4161+}
4162+
4163+int
4164+ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1)
4165+{
4166+    V4L2Buffer * avbuf;
4167+    if (timeout1 != 0) {
4168+        int rv = get_qbuf(ctx, &avbuf, timeout1);
4169+        if (rv != 0)
4170+            return rv;
4171+    }
4172+    do {
4173+        get_qbuf(ctx, &avbuf, 0);
4174+    } while (avbuf);
4175+    return 0;
4176 }
4177
4178 static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
4179 {
4180-    int timeout = 0; /* return when no more buffers to dequeue */
4181     int i;
4182
4183     /* get back as many output buffers as possible */
4184-    if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
4185-          do {
4186-          } while (v4l2_dequeue_v4l2buf(ctx, timeout));
4187-    }
4188+    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
4189+        ff_v4l2_dq_all(ctx, 0);
4190
4191     for (i = 0; i < ctx->num_buffers; i++) {
4192-        if (ctx->buffers[i].status == V4L2BUF_AVAILABLE)
4193-            return &ctx->buffers[i];
4194+        V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
4195+        if (avbuf->status == V4L2BUF_AVAILABLE)
4196+            return clean_v4l2_buffer(avbuf);
4197     }
4198
4199     return NULL;
4200@@ -438,25 +737,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(
4201
4202 static int v4l2_release_buffers(V4L2Context* ctx)
4203 {
4204-    struct v4l2_requestbuffers req = {
4205-        .memory = V4L2_MEMORY_MMAP,
4206-        .type = ctx->type,
4207-        .count = 0, /* 0 -> unmaps buffers from the driver */
4208-    };
4209-    int i, j;
4210+    int i;
4211+    int ret = 0;
4212+    const int fd = ctx_to_m2mctx(ctx)->fd;
4213
4214-    for (i = 0; i < ctx->num_buffers; i++) {
4215-        V4L2Buffer *buffer = &ctx->buffers[i];
4216+    // Orphan any buffers in the wild
4217+    ff_weak_link_break(&ctx->wl_master);
4218
4219-        for (j = 0; j < buffer->num_planes; j++) {
4220-            struct V4L2Plane_info *p = &buffer->plane_info[j];
4221-            if (p->mm_addr && p->length)
4222-                if (munmap(p->mm_addr, p->length) < 0)
4223-                    av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno)));
4224+    if (ctx->bufrefs) {
4225+        for (i = 0; i < ctx->num_buffers; i++)
4226+            av_buffer_unref(ctx->bufrefs + i);
4227+    }
4228+
4229+    if (fd != -1) {
4230+        struct v4l2_requestbuffers req = {
4231+            .memory = V4L2_MEMORY_MMAP,
4232+            .type = ctx->type,
4233+            .count = 0, /* 0 -> unmap all buffers from the driver */
4234+        };
4235+
4236+        while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) {
4237+            if (errno == EINTR)
4238+                continue;
4239+
4240+            ret = AVERROR(errno);
4241+
4242+            av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n",
4243+                ctx->name, av_err2str(AVERROR(errno)));
4244+
4245+            if (ctx_to_m2mctx(ctx)->output_drm)
4246+                av_log(logger(ctx), AV_LOG_ERROR,
4247+                    "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n"
4248+                    "for all buffers: \n"
4249+                    "  1. drmModeRmFB(..)\n"
4250+                    "  2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n");
4251         }
4252     }
4253+    atomic_store(&ctx->q_count, 0);
4254
4255-    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req);
4256+    return ret;
4257 }
4258
4259 static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt)
4260@@ -485,6 +804,8 @@ static inline int v4l2_try_raw_format(V4
4261
4262 static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
4263 {
4264+    V4L2m2mContext* s = ctx_to_m2mctx(ctx);
4265+    V4L2m2mPriv *priv = s->avctx->priv_data;
4266     enum AVPixelFormat pixfmt = ctx->av_pix_fmt;
4267     struct v4l2_fmtdesc fdesc;
4268     int ret;
4269@@ -498,21 +819,22 @@ static int v4l2_get_raw_format(V4L2Conte
4270             return 0;
4271     }
4272
4273-    for (;;) {
4274+    for (;; ++fdesc.index) {
4275         ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc);
4276         if (ret)
4277             return AVERROR(EINVAL);
4278
4279+        if (priv->pix_fmt != AV_PIX_FMT_NONE) {
4280+            if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt))
4281+                continue;
4282+        }
4283+
4284         pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO);
4285         ret = v4l2_try_raw_format(ctx, pixfmt);
4286-        if (ret){
4287-            fdesc.index++;
4288-            continue;
4289+        if (ret == 0) {
4290+            *p = pixfmt;
4291+            return 0;
4292         }
4293-
4294-        *p = pixfmt;
4295-
4296-        return 0;
4297     }
4298
4299     return AVERROR(EINVAL);
4300@@ -555,30 +877,131 @@ static int v4l2_get_coded_format(V4L2Con
4301   *
4302   *****************************************************************************/
4303
4304-int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
4305+
4306+static void flush_all_buffers_status(V4L2Context* const ctx)
4307+{
4308+    int i;
4309+
4310+    if (!ctx->bufrefs)
4311+        return;
4312+
4313+    for (i = 0; i < ctx->num_buffers; ++i) {
4314+        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
4315+        if (buf->status == V4L2BUF_IN_DRIVER)
4316+            ff_v4l2_buffer_set_avail(buf);
4317+    }
4318+    atomic_store(&ctx->q_count, 0);
4319+}
4320+
4321+static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx)
4322+{
4323+    int i;
4324+    int rv;
4325+
4326+    if (!ctx->bufrefs) {
4327+        rv = ff_v4l2_context_init(ctx);
4328+        if (rv) {
4329+            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
4330+            return rv;
4331+        }
4332+    }
4333+
4334+    ff_mutex_lock(&ctx->lock);
4335+    for (i = 0; i < ctx->num_buffers; ++i) {
4336+        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
4337+        if (buf->status == V4L2BUF_AVAILABLE) {
4338+            rv = ff_v4l2_buffer_enqueue(buf);
4339+            if (rv < 0)
4340+                break;
4341+        }
4342+    }
4343+    ff_mutex_unlock(&ctx->lock);
4344+    return rv;
4345+}
4346+
4347+static int set_streamon(AVCodecContext * const avctx, V4L2Context*const ctx)
4348 {
4349     int type = ctx->type;
4350-    int ret;
4351+    int ret = 0;
4352
4353-    ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type);
4354-    if (ret < 0)
4355-        return AVERROR(errno);
4356+    if (!V4L2_TYPE_IS_OUTPUT(ctx->type))
4357+        stuff_all_buffers(avctx, ctx);
4358
4359-    ctx->streamon = (cmd == VIDIOC_STREAMON);
4360+    if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMON, &type) < 0) {
4361+        ret = AVERROR(errno);
4362+        av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name,
4363+               av_err2str(ret));
4364+        return ret;
4365+    }
4366
4367-    return 0;
4368+    ctx->first_buf = 1;
4369+    ctx->streamon = 1;
4370+    ctx->flag_last = 0;
4371+    av_log(avctx, AV_LOG_DEBUG, "%s set status ON OK\n", ctx->name);
4372+    return ret;
4373+}
4374+
4375+static int set_streamoff(AVCodecContext * const avctx, V4L2Context*const ctx)
4376+{
4377+    int type = ctx->type;
4378+    int ret = 0;
4379+    const int has_bufs = ctx_buffers_alloced(ctx);
4380+
4381+    // Avoid doing anything if there is nothing we can do
4382+    if (!has_bufs && !ctx->streamon)
4383+        return 0;
4384+
4385+    if (has_bufs)
4386+        ff_mutex_lock(&ctx->lock);
4387+
4388+    if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMOFF, &type) < 0) {
4389+        ret = AVERROR(errno);
4390+        av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name,
4391+               av_err2str(ret));
4392+    }
4393+    else {
4394+        flush_all_buffers_status(ctx);
4395+
4396+        ctx->streamon = 0;
4397+        ctx->flag_last = 0;
4398+
4399+        av_log(avctx, AV_LOG_DEBUG, "%s set status OFF OK\n", ctx->name);
4400+    }
4401+
4402+    if (has_bufs)
4403+        ff_mutex_unlock(&ctx->lock);
4404+    return ret;
4405+}
4406+
4407+
4408+int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
4409+{
4410+    AVCodecContext * const avctx = logger(ctx);
4411+
4412+    switch (cmd) {
4413+        case VIDIOC_STREAMOFF:
4414+            return set_streamoff(avctx, ctx);
4415+        case VIDIOC_STREAMON:
4416+            return set_streamon(avctx, ctx);
4417+        default:
4418+            av_log(avctx, AV_LOG_ERROR, "%s: Unexpected cmd: %d\n", __func__, cmd);
4419+            break;
4420+    }
4421+    return AVERROR_BUG;
4422 }
4423
4424 int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
4425 {
4426-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
4427+    V4L2m2mContext *const s = ctx_to_m2mctx(ctx);
4428+    AVCodecContext *const avctx = s->avctx;
4429+    int64_t track_ts;
4430     V4L2Buffer* avbuf;
4431     int ret;
4432
4433     if (!frame) {
4434         ret = v4l2_stop_encode(ctx);
4435         if (ret)
4436-            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
4437+            av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
4438         s->draining= 1;
4439         return 0;
4440     }
4441@@ -587,23 +1010,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Co
4442     if (!avbuf)
4443         return AVERROR(EAGAIN);
4444
4445-    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf);
4446+    track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame);
4447+
4448+    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts);
4449     if (ret)
4450         return ret;
4451
4452     return ff_v4l2_buffer_enqueue(avbuf);
4453 }
4454
4455-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
4456+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
4457+                                   const void * extdata, size_t extlen)
4458 {
4459     V4L2m2mContext *s = ctx_to_m2mctx(ctx);
4460+    AVCodecContext *const avctx = s->avctx;
4461     V4L2Buffer* avbuf;
4462     int ret;
4463+    int64_t track_ts;
4464
4465     if (!pkt->size) {
4466         ret = v4l2_stop_decode(ctx);
4467+        // Log but otherwise ignore stop failure
4468         if (ret)
4469-            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name);
4470+            av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret);
4471         s->draining = 1;
4472         return 0;
4473     }
4474@@ -612,8 +1041,13 @@ int ff_v4l2_context_enqueue_packet(V4L2C
4475     if (!avbuf)
4476         return AVERROR(EAGAIN);
4477
4478-    ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf);
4479-    if (ret)
4480+    track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt);
4481+
4482+    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts);
4483+    if (ret == AVERROR(ENOMEM))
4484+        av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n",
4485+               __func__, pkt->size, avbuf->planes[0].length);
4486+    else if (ret)
4487         return ret;
4488
4489     return ff_v4l2_buffer_enqueue(avbuf);
4490@@ -621,42 +1055,77 @@ int ff_v4l2_context_enqueue_packet(V4L2C
4491
4492 int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
4493 {
4494+    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
4495+    AVCodecContext *const avctx = s->avctx;
4496     V4L2Buffer *avbuf;
4497+    int rv;
4498
4499-    /*
4500-     * timeout=-1 blocks until:
4501-     *  1. decoded frame available
4502-     *  2. an input buffer is ready to be dequeued
4503-     */
4504-    avbuf = v4l2_dequeue_v4l2buf(ctx, timeout);
4505-    if (!avbuf) {
4506-        if (ctx->done)
4507-            return AVERROR_EOF;
4508-
4509-        return AVERROR(EAGAIN);
4510-    }
4511+    do {
4512+        if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
4513+            return rv;
4514+        if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0)
4515+            return rv;
4516+    } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0);
4517
4518-    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
4519+   return 0;
4520 }
4521
4522-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
4523+int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout)
4524 {
4525+    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
4526+    AVCodecContext *const avctx = s->avctx;
4527     V4L2Buffer *avbuf;
4528+    int rv;
4529
4530-    /*
4531-     * blocks until:
4532-     *  1. encoded packet available
4533-     *  2. an input buffer ready to be dequeued
4534-     */
4535-    avbuf = v4l2_dequeue_v4l2buf(ctx, -1);
4536-    if (!avbuf) {
4537-        if (ctx->done)
4538-            return AVERROR_EOF;
4539+    do {
4540+        if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
4541+            return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv;  // Caller not currently expecting ENOSPC
4542+        if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0)
4543+            return rv;
4544+    } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0);
4545
4546-        return AVERROR(EAGAIN);
4547+    return 0;
4548+}
4549+
4550+// Return 0 terminated list of drm fourcc video formats for this context
4551+// NULL if none found or error
4552+// Returned list is malloced so must be freed
4553+uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN)
4554+{
4555+    unsigned int i;
4556+    unsigned int n = 0;
4557+    unsigned int size = 0;
4558+    uint32_t * e = NULL;
4559+    *pN = 0;
4560+
4561+    for (i = 0; i < 1024; ++i) {
4562+        struct v4l2_fmtdesc fdesc = {
4563+            .index = i,
4564+            .type = ctx->type
4565+        };
4566+
4567+        if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc))
4568+            return e;
4569+
4570+        if (n + 1 >= size) {
4571+            unsigned int newsize = (size == 0) ? 16 : size * 2;
4572+            uint32_t * t = av_realloc(e, newsize * sizeof(*t));
4573+            if (!t)
4574+                return e;
4575+            e = t;
4576+            size = newsize;
4577+        }
4578+
4579+        e[n] = fdesc.pixelformat;
4580+        e[++n] = 0;
4581+        if (pN)
4582+            *pN = n;
4583     }
4584
4585-    return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf);
4586+    // If we've looped 1024 times we are clearly confused
4587+    *pN = 0;
4588+    av_free(e);
4589+    return NULL;
4590 }
4591
4592 int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
4593@@ -688,78 +1157,194 @@ int ff_v4l2_context_get_format(V4L2Conte
4594
4595 int ff_v4l2_context_set_format(V4L2Context* ctx)
4596 {
4597-    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
4598+    int ret;
4599+
4600+    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
4601+    if (ret != 0)
4602+        return ret;
4603+
4604+    // Check returned size against min size and if smaller have another go
4605+    // Only worry about plane[0] as this is meant to enforce limits for
4606+    // encoded streams where we might know a bit more about the shape
4607+    // than the driver
4608+    if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) {
4609+        if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage)
4610+            return 0;
4611+        ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size;
4612+    }
4613+    else {
4614+        if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage)
4615+            return 0;
4616+        ctx->format.fmt.pix.sizeimage = ctx->min_buf_size;
4617+    }
4618+
4619+    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
4620+    return ret;
4621 }
4622
4623 void ff_v4l2_context_release(V4L2Context* ctx)
4624 {
4625     int ret;
4626
4627-    if (!ctx->buffers)
4628+    if (!ctx->bufrefs)
4629         return;
4630
4631     ret = v4l2_release_buffers(ctx);
4632     if (ret)
4633         av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name);
4634
4635-    av_freep(&ctx->buffers);
4636+    av_freep(&ctx->bufrefs);
4637+    av_buffer_unref(&ctx->frames_ref);
4638+
4639+    ff_mutex_destroy(&ctx->lock);
4640+    pthread_cond_destroy(&ctx->cond);
4641 }
4642
4643-int ff_v4l2_context_init(V4L2Context* ctx)
4644+
4645+static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem)
4646 {
4647-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
4648+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
4649     struct v4l2_requestbuffers req;
4650-    int ret, i;
4651-
4652-    if (!v4l2_type_supported(ctx)) {
4653-        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
4654-        return AVERROR_PATCHWELCOME;
4655-    }
4656+    int ret;
4657+    int i;
4658
4659-    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
4660-    if (ret)
4661-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name);
4662+    av_assert0(ctx->bufrefs == NULL);
4663
4664     memset(&req, 0, sizeof(req));
4665-    req.count = ctx->num_buffers;
4666-    req.memory = V4L2_MEMORY_MMAP;
4667+    req.count = req_buffers;
4668+    req.memory = mem;
4669     req.type = ctx->type;
4670-    ret = ioctl(s->fd, VIDIOC_REQBUFS, &req);
4671-    if (ret < 0) {
4672-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno));
4673-        return AVERROR(errno);
4674+    while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) {
4675+        if (errno != EINTR) {
4676+            ret = AVERROR(errno);
4677+            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret));
4678+            return ret;
4679+        }
4680     }
4681
4682     ctx->num_buffers = req.count;
4683-    ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer));
4684-    if (!ctx->buffers) {
4685+    ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs));
4686+    if (!ctx->bufrefs) {
4687         av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name);
4688-        return AVERROR(ENOMEM);
4689+        goto fail_release;
4690     }
4691
4692-    for (i = 0; i < req.count; i++) {
4693-        ctx->buffers[i].context = ctx;
4694-        ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i);
4695-        if (ret < 0) {
4696+    ctx->wl_master = ff_weak_link_new(ctx);
4697+    if (!ctx->wl_master) {
4698+        ret = AVERROR(ENOMEM);
4699+        goto fail_release;
4700+    }
4701+
4702+    for (i = 0; i < ctx->num_buffers; i++) {
4703+        ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem);
4704+        if (ret) {
4705             av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret));
4706-            goto error;
4707+            goto fail_release;
4708         }
4709     }
4710
4711     av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name,
4712         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat),
4713         req.count,
4714-        v4l2_get_width(&ctx->format),
4715-        v4l2_get_height(&ctx->format),
4716+        ff_v4l2_get_format_width(&ctx->format),
4717+        ff_v4l2_get_format_height(&ctx->format),
4718         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage,
4719         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline);
4720
4721     return 0;
4722
4723-error:
4724+fail_release:
4725     v4l2_release_buffers(ctx);
4726+    av_freep(&ctx->bufrefs);
4727+    return ret;
4728+}
4729+
4730+int ff_v4l2_context_frames_set(V4L2Context *const ctx)
4731+{
4732+    AVHWFramesContext *hwframes;
4733+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
4734+    const int w = ctx->width != 0 ? ctx->width : s->avctx->width;
4735+    const int h = ctx->height != 0 ? ctx->height : s->avctx->height;
4736+    int ret;
4737+
4738+    if (ctx->frames_ref != NULL) {
4739+        const AVHWFramesContext * const hwf = (AVHWFramesContext*)ctx->frames_ref->data;
4740+        if (hwf->sw_format == ctx->av_pix_fmt && hwf->width == w && hwf->height == h)
4741+            return 0;
4742+        av_buffer_unref(&ctx->frames_ref);
4743+    }
4744+
4745+    ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref);
4746+    if (!ctx->frames_ref)
4747+        return AVERROR(ENOMEM);
4748+
4749+    hwframes = (AVHWFramesContext*)ctx->frames_ref->data;
4750+    hwframes->format = AV_PIX_FMT_DRM_PRIME;
4751+    hwframes->sw_format = ctx->av_pix_fmt;
4752+    hwframes->width = w;
4753+    hwframes->height = h;
4754+    ret = av_hwframe_ctx_init(ctx->frames_ref);
4755+    if (ret < 0) {
4756+        av_log(s->avctx, AV_LOG_ERROR, "Failed to create hwframes context: %s\n", av_err2str(ret));
4757+        av_buffer_unref(&ctx->frames_ref);
4758+        return ret;
4759+    }
4760+
4761+    av_log(s->avctx, AV_LOG_DEBUG, "%s: HWFramesContext set to %s, %dx%d\n", __func__,
4762+           av_get_pix_fmt_name(ctx->av_pix_fmt), w, h);
4763+    return 0;
4764+}
4765+
4766+int ff_v4l2_context_init(V4L2Context* ctx)
4767+{
4768+    struct v4l2_queryctrl qctrl;
4769+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
4770+    int ret;
4771+
4772+    // It is not valid to reinit a context without a previous release
4773+    av_assert0(ctx->bufrefs == NULL);
4774+
4775+    if (!v4l2_type_supported(ctx)) {
4776+        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
4777+        return AVERROR_PATCHWELCOME;
4778+    }
4779+
4780+    ff_mutex_init(&ctx->lock, NULL);
4781+    pthread_cond_init(&ctx->cond, NULL);
4782+    atomic_init(&ctx->q_count, 0);
4783+
4784+    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
4785+    if (ret) {
4786+        ret = AVERROR(errno);
4787+        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret));
4788+        goto fail_unlock;
4789+    }
4790+
4791+    memset(&qctrl, 0, sizeof(qctrl));
4792+    qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT;
4793+    if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) {
4794+        ret = AVERROR(errno);
4795+        if (ret != AVERROR(EINVAL)) {
4796+            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret));
4797+            goto fail_unlock;
4798+        }
4799+        // Control unsupported - set default if wanted
4800+        if (ctx->num_buffers < 2)
4801+            ctx->num_buffers = 4;
4802+    }
4803+    else {
4804+        if (ctx->num_buffers < 2)
4805+            ctx->num_buffers = qctrl.minimum + 2;
4806+        ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum);
4807+    }
4808
4809-    av_freep(&ctx->buffers);
4810+    ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem);
4811+    if (ret < 0)
4812+        goto fail_unlock;
4813+
4814+    return 0;
4815
4816+fail_unlock:
4817+    ff_mutex_destroy(&ctx->lock);
4818     return ret;
4819 }
4820--- a/libavcodec/v4l2_context.h
4821+++ b/libavcodec/v4l2_context.h
4822@@ -32,6 +32,8 @@
4823 #include "libavutil/rational.h"
4824 #include "codec_id.h"
4825 #include "packet.h"
4826+#include "libavutil/buffer.h"
4827+#include "libavutil/thread.h"
4828 #include "v4l2_buffers.h"
4829
4830 typedef struct V4L2Context {
4831@@ -71,11 +73,18 @@ typedef struct V4L2Context {
4832      */
4833     int width, height;
4834     AVRational sample_aspect_ratio;
4835+    struct v4l2_rect selection;
4836
4837     /**
4838-     * Indexed array of V4L2Buffers
4839+     * If the default size of buffer is less than this then try to
4840+     * set to this.
4841      */
4842-    V4L2Buffer *buffers;
4843+    uint32_t min_buf_size;
4844+
4845+    /**
4846+     * Indexed array of pointers to V4L2Buffers
4847+     */
4848+    AVBufferRef **bufrefs;
4849
4850     /**
4851      * Readonly after init.
4852@@ -83,16 +92,38 @@ typedef struct V4L2Context {
4853     int num_buffers;
4854
4855     /**
4856+     * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF
4857+     */
4858+    enum v4l2_memory buf_mem;
4859+
4860+    /**
4861      * Whether the stream has been started (VIDIOC_STREAMON has been sent).
4862      */
4863     int streamon;
4864
4865+    /* 1st buffer after stream on */
4866+    int first_buf;
4867+
4868     /**
4869      *  Either no more buffers available or an unrecoverable error was notified
4870      *  by the V4L2 kernel driver: once set the context has to be exited.
4871      */
4872     int done;
4873
4874+    int flag_last;
4875+
4876+    /**
4877+     * If NZ then when Qing frame/pkt use this rather than the
4878+     * "real" PTS
4879+     */
4880+    uint64_t track_ts;
4881+
4882+    AVBufferRef *frames_ref;
4883+    atomic_int q_count;
4884+    struct ff_weak_link_master *wl_master;
4885+
4886+    AVMutex lock;
4887+    pthread_cond_t cond;
4888 } V4L2Context;
4889
4890 /**
4891@@ -104,6 +135,14 @@ typedef struct V4L2Context {
4892 int ff_v4l2_context_init(V4L2Context* ctx);
4893
4894 /**
4895+ * (re)set the hwframecontext from the current v4l2 context
4896+ *
4897+ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables.
4898+ * @return 0 in case of success, a negative value representing the error otherwise.
4899+ */
4900+int ff_v4l2_context_frames_set(V4L2Context *const ctx);
4901+
4902+/**
4903  * Sets the V4L2Context format in the v4l2 driver.
4904  *
4905  * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables.
4906@@ -121,6 +160,19 @@ int ff_v4l2_context_set_format(V4L2Conte
4907 int ff_v4l2_context_get_format(V4L2Context* ctx, int probe);
4908
4909 /**
4910+ * Get the list of drm fourcc pixel formats for this context
4911+ *
4912+ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context
4913+ *       description for required variables.
4914+ * @param[in] pN A pointer to receive the number of formats
4915+ *       found. May be NULL if not wanted.
4916+ * @return Pointer to malloced list of zero terminated formats,
4917+ *         NULL if none or error. As list is malloced it must be
4918+ *         freed.
4919+ */
4920+uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN);
4921+
4922+/**
4923  * Releases a V4L2Context.
4924  *
4925  * @param[in] ctx A pointer to a V4L2Context.
4926@@ -148,7 +200,7 @@ int ff_v4l2_context_set_status(V4L2Conte
4927  * @param[inout] pkt The AVPacket to dequeue to.
4928  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
4929  */
4930-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
4931+int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout);
4932
4933 /**
4934  * Dequeues a buffer from a V4L2Context to an AVFrame.
4935@@ -157,7 +209,10 @@ int ff_v4l2_context_dequeue_packet(V4L2C
4936  * @param[in] ctx The V4L2Context to dequeue from.
4937  * @param[inout] f The AVFrame to dequeue to.
4938  * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds)
4939+ *
4940  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
4941+ *                AVERROR(ENOSPC) if no buffer availible to put
4942+ *                the frame in
4943  */
4944 int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
4945
4946@@ -171,7 +226,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Co
4947  * @param[in] pkt A pointer to an AVPacket.
4948  * @return 0 in case of success, a negative error otherwise.
4949  */
4950-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt);
4951+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size);
4952
4953 /**
4954  * Enqueues a buffer to a V4L2Context from an AVFrame
4955@@ -184,4 +239,28 @@ int ff_v4l2_context_enqueue_packet(V4L2C
4956  */
4957 int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f);
4958
4959+/**
4960+ * Dequeue all buffers on this queue
4961+ *
4962+ * Used to recycle output buffers
4963+ *
4964+ * @param[in] ctx The V4L2Context to dequeue from.
4965+ * @param[in] timeout1 A timeout on dequeuing the 1st buffer,
4966+ *       all others have a timeout of zero
4967+ * @return AVERROR(EAGAIN) if timeout1 non-zero then the return
4968+ *         of the first dequeue operation, 0 otherwise.
4969+ */
4970+int ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1);
4971+
4972+/**
4973+ * Returns the number of buffers currently queued
4974+ *
4975+ * @param[in] ctx The V4L2Context to evaluate
4976+ */
4977+static inline int
4978+ff_v4l2_context_q_count(const V4L2Context* const ctx)
4979+{
4980+    return atomic_load(&ctx->q_count);
4981+}
4982+
4983 #endif // AVCODEC_V4L2_CONTEXT_H
4984--- a/libavcodec/v4l2_fmt.c
4985+++ b/libavcodec/v4l2_fmt.c
4986@@ -42,6 +42,14 @@ static const struct fmt_conversion {
4987     { AV_FMT(RGB24),       AV_CODEC(RAWVIDEO),    V4L2_FMT(RGB24) },
4988     { AV_FMT(BGR0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGR32) },
4989     { AV_FMT(0RGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGB32) },
4990+    { AV_FMT(BGR0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGRX32) },
4991+    { AV_FMT(RGB0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGBX32) },
4992+    { AV_FMT(0BGR),        AV_CODEC(RAWVIDEO),    V4L2_FMT(XBGR32) },
4993+    { AV_FMT(0RGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(XRGB32) },
4994+    { AV_FMT(BGRA),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGRA32) },
4995+    { AV_FMT(RGBA),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGBA32) },
4996+    { AV_FMT(ABGR),        AV_CODEC(RAWVIDEO),    V4L2_FMT(ABGR32) },
4997+    { AV_FMT(ARGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(ARGB32) },
4998     { AV_FMT(GRAY8),       AV_CODEC(RAWVIDEO),    V4L2_FMT(GREY) },
4999     { AV_FMT(YUV420P),     AV_CODEC(RAWVIDEO),    V4L2_FMT(YUV420) },
5000     { AV_FMT(YUYV422),     AV_CODEC(RAWVIDEO),    V4L2_FMT(YUYV) },
5001--- a/libavcodec/v4l2_m2m.c
5002+++ b/libavcodec/v4l2_m2m.c
5003@@ -34,6 +34,15 @@
5004 #include "v4l2_context.h"
5005 #include "v4l2_fmt.h"
5006 #include "v4l2_m2m.h"
5007+#include "v4l2_req_dmabufs.h"
5008+
5009+static void
5010+xlat_init(xlat_track_t * const x)
5011+{
5012+    memset(x, 0, sizeof(*x));
5013+    x->last_pts = AV_NOPTS_VALUE;
5014+}
5015+
5016
5017 static inline int v4l2_splane_video(struct v4l2_capability *cap)
5018 {
5019@@ -67,7 +76,9 @@ static int v4l2_prepare_contexts(V4L2m2m
5020
5021     s->capture.done = s->output.done = 0;
5022     s->capture.name = "capture";
5023+    s->capture.buf_mem = s->db_ctl != NULL ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
5024     s->output.name = "output";
5025+    s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
5026     atomic_init(&s->refcount, 0);
5027     sem_init(&s->refsync, 0, 0);
5028
5029@@ -84,18 +95,58 @@ static int v4l2_prepare_contexts(V4L2m2m
5030     if (v4l2_mplane_video(&cap)) {
5031         s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
5032         s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
5033+        s->output.format.type = s->output.type;
5034         return 0;
5035     }
5036
5037     if (v4l2_splane_video(&cap)) {
5038         s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
5039         s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
5040+        s->output.format.type = s->output.type;
5041         return 0;
5042     }
5043
5044     return AVERROR(EINVAL);
5045 }
5046
5047+static int check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
5048+{
5049+    struct v4l2_format fmt = {.type = s->output.type};
5050+    int rv;
5051+    uint32_t pixfmt = ff_v4l2_format_avfmt_to_v4l2(avctx->pix_fmt);
5052+    unsigned int w;
5053+    unsigned int h;
5054+
5055+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
5056+        fmt.fmt.pix_mp.pixelformat = pixfmt;
5057+        fmt.fmt.pix_mp.width = avctx->width;
5058+        fmt.fmt.pix_mp.height = avctx->height;
5059+    }
5060+    else {
5061+        fmt.fmt.pix.pixelformat = pixfmt;
5062+        fmt.fmt.pix.width = avctx->width;
5063+        fmt.fmt.pix.height = avctx->height;
5064+    }
5065+
5066+    rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt);
5067+
5068+    if (rv != 0) {
5069+        rv = AVERROR(errno);
5070+        av_log(avctx, AV_LOG_ERROR, "%s: Tryfmt failed: %s\n", __func__, av_err2str(rv));
5071+        return rv;
5072+    }
5073+
5074+    w = ff_v4l2_get_format_width(&fmt);
5075+    h = ff_v4l2_get_format_height(&fmt);
5076+
5077+    if (w < avctx->width || h < avctx->height) {
5078+        av_log(avctx, AV_LOG_WARNING, "%s: Size check failed: asked for %dx%d, got: %dx%d\n", __func__, avctx->width, avctx->height, w, h);
5079+        return AVERROR(EINVAL);
5080+    }
5081+
5082+    return 0;
5083+}
5084+
5085 static int v4l2_probe_driver(V4L2m2mContext *s)
5086 {
5087     void *log_ctx = s->avctx;
5088@@ -115,6 +166,11 @@ static int v4l2_probe_driver(V4L2m2mCont
5089         goto done;
5090     }
5091
5092+    // If being given frames (encode) check that V4L2 can cope with the size
5093+    if (s->output.av_codec_id == AV_CODEC_ID_RAWVIDEO &&
5094+        (ret = check_size(s->avctx, s)) != 0)
5095+        goto done;
5096+
5097     ret = ff_v4l2_context_get_format(&s->capture, 1);
5098     if (ret) {
5099         av_log(log_ctx, AV_LOG_DEBUG, "v4l2 capture format not supported\n");
5100@@ -214,13 +270,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont
5101         av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n");
5102
5103     /* 2. unmap the capture buffers (v4l2 and ffmpeg):
5104-     *    we must wait for all references to be released before being allowed
5105-     *    to queue new buffers.
5106      */
5107-    av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n");
5108-    if (atomic_load(&s->refcount))
5109-        while(sem_wait(&s->refsync) == -1 && errno == EINTR);
5110-
5111     ff_v4l2_context_release(&s->capture);
5112
5113     /* 3. get the new capture format */
5114@@ -239,7 +289,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont
5115
5116     /* 5. complete reinit */
5117     s->draining = 0;
5118-    s->reinit = 0;
5119
5120     return 0;
5121 }
5122@@ -256,6 +305,9 @@ static void v4l2_m2m_destroy_context(voi
5123     av_frame_unref(s->frame);
5124     av_frame_free(&s->frame);
5125     av_packet_unref(&s->buf_pkt);
5126+    av_freep(&s->extdata_data);
5127+
5128+    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n");
5129
5130     av_free(s);
5131 }
5132@@ -268,6 +320,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *p
5133     if (!s)
5134         return 0;
5135
5136+    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n");
5137+
5138+    if (s->avctx && av_codec_is_decoder(s->avctx->codec))
5139+        av_packet_unref(&s->buf_pkt);
5140+
5141     if (s->fd >= 0) {
5142         ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
5143         if (ret)
5144@@ -279,8 +336,20 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *p
5145     }
5146
5147     ff_v4l2_context_release(&s->output);
5148+    av_buffer_unref(&s->device_ref);
5149+
5150+    dmabufs_ctl_unref(&s->db_ctl);
5151+
5152+    if (s->fd != -1) {
5153+        close(s->fd);
5154+        s->fd = -1;
5155+    }
5156
5157     s->self_ref = NULL;
5158+    // This is only called on avctx close so after this point we don't have that
5159+    // Crash sooner if we find we are using it (can still log with avctx = NULL)
5160+    s->avctx = NULL;
5161+    priv->context = NULL;
5162     av_buffer_unref(&priv->context_ref);
5163
5164     return 0;
5165@@ -324,35 +393,38 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv *
5166     return v4l2_configure_contexts(s);
5167 }
5168
5169-int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s)
5170+int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **pps)
5171 {
5172-    *s = av_mallocz(sizeof(V4L2m2mContext));
5173-    if (!*s)
5174+    V4L2m2mContext * const s = av_mallocz(sizeof(V4L2m2mContext));
5175+
5176+    *pps = NULL;
5177+    if (!s)
5178         return AVERROR(ENOMEM);
5179
5180-    priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext),
5181+    priv->context_ref = av_buffer_create((uint8_t *)s, sizeof(*s),
5182                                          &v4l2_m2m_destroy_context, NULL, 0);
5183     if (!priv->context_ref) {
5184-        av_freep(s);
5185+        av_free(s);
5186         return AVERROR(ENOMEM);
5187     }
5188
5189     /* assign the context */
5190-    priv->context = *s;
5191-    (*s)->priv = priv;
5192+    priv->context = s;
5193+    s->priv = priv;
5194
5195     /* populate it */
5196-    priv->context->capture.num_buffers = priv->num_capture_buffers;
5197-    priv->context->output.num_buffers  = priv->num_output_buffers;
5198-    priv->context->self_ref = priv->context_ref;
5199-    priv->context->fd = -1;
5200+    s->capture.num_buffers = priv->num_capture_buffers;
5201+    s->output.num_buffers  = priv->num_output_buffers;
5202+    s->self_ref = priv->context_ref;
5203+    s->fd = -1;
5204+    xlat_init(&s->xlat);
5205
5206     priv->context->frame = av_frame_alloc();
5207     if (!priv->context->frame) {
5208         av_buffer_unref(&priv->context_ref);
5209-        *s = NULL; /* freed when unreferencing context_ref */
5210         return AVERROR(ENOMEM);
5211     }
5212
5213+    *pps = s;
5214     return 0;
5215 }
5216--- a/libavcodec/v4l2_m2m.h
5217+++ b/libavcodec/v4l2_m2m.h
5218@@ -30,6 +30,7 @@
5219 #include <linux/videodev2.h>
5220
5221 #include "libavcodec/avcodec.h"
5222+#include "libavutil/pixfmt.h"
5223 #include "v4l2_context.h"
5224
5225 #define container_of(ptr, type, member) ({ \
5226@@ -38,7 +39,39 @@
5227
5228 #define V4L_M2M_DEFAULT_OPTS \
5229     { "num_output_buffers", "Number of buffers in the output context",\
5230-        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 6, INT_MAX, FLAGS }
5231+        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS }
5232+
5233+#define FF_V4L2_M2M_TRACK_SIZE 128
5234+typedef struct V4L2m2mTrackEl {
5235+    int     discard;   // If we see this buffer its been flushed, so discard
5236+    int     pending;
5237+    int     pkt_size;
5238+    int64_t pts;
5239+    int64_t dts;
5240+    int64_t reordered_opaque;
5241+    int64_t pkt_pos;
5242+    int64_t pkt_duration;
5243+    int64_t track_pts;
5244+} V4L2m2mTrackEl;
5245+
5246+typedef struct pts_stats_s
5247+{
5248+    void * logctx;
5249+    const char * name;  // For debug
5250+    unsigned int last_count;
5251+    unsigned int last_interval;
5252+    int64_t last_pts;
5253+    int64_t guess;
5254+} pts_stats_t;
5255+
5256+typedef struct xlat_track_s {
5257+    unsigned int track_no;
5258+    int64_t last_pts;    // Last valid PTS decoded
5259+    int64_t last_opaque;
5260+    V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
5261+} xlat_track_t;
5262+
5263+struct dmabufs_ctl;
5264
5265 typedef struct V4L2m2mContext {
5266     char devname[PATH_MAX];
5267@@ -52,10 +85,10 @@ typedef struct V4L2m2mContext {
5268     AVCodecContext *avctx;
5269     sem_t refsync;
5270     atomic_uint refcount;
5271-    int reinit;
5272
5273     /* null frame/packet received */
5274     int draining;
5275+    int running;
5276     AVPacket buf_pkt;
5277
5278     /* Reference to a frame. Only used during encoding */
5279@@ -66,6 +99,36 @@ typedef struct V4L2m2mContext {
5280
5281     /* reference back to V4L2m2mPriv */
5282     void *priv;
5283+
5284+    AVBufferRef *device_ref;
5285+
5286+    /* generate DRM frames */
5287+    int output_drm;
5288+
5289+    /* input frames are drmprime */
5290+    int input_drm;
5291+
5292+    /* Frame tracking */
5293+    xlat_track_t xlat;
5294+
5295+    pts_stats_t pts_stat;
5296+
5297+    /* req pkt */
5298+    int req_pkt;
5299+    int reorder_size;
5300+
5301+    /* Ext data sent */
5302+    int extdata_sent;
5303+    /* Ext data sent in packet - overrides ctx */
5304+    void * extdata_data;
5305+    size_t extdata_size;
5306+
5307+#define FF_V4L2_QUIRK_REINIT_ALWAYS             1
5308+#define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN    2
5309+    /* Quirks */
5310+    unsigned int quirks;
5311+
5312+    struct dmabufs_ctl * db_ctl;
5313 } V4L2m2mContext;
5314
5315 typedef struct V4L2m2mPriv {
5316@@ -76,6 +139,8 @@ typedef struct V4L2m2mPriv {
5317
5318     int num_output_buffers;
5319     int num_capture_buffers;
5320+    const char * dmabuf_alloc;
5321+    enum AVPixelFormat pix_fmt;
5322 } V4L2m2mPriv;
5323
5324 /**
5325@@ -129,4 +194,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont
5326  */
5327 int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx);
5328
5329+
5330+static inline unsigned int ff_v4l2_get_format_width(const struct v4l2_format * const fmt)
5331+{
5332+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
5333+}
5334+
5335+static inline unsigned int ff_v4l2_get_format_height(const struct v4l2_format * const fmt)
5336+{
5337+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
5338+}
5339+
5340+static inline uint32_t ff_v4l2_get_format_pixelformat(const struct v4l2_format * const fmt)
5341+{
5342+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
5343+}
5344+
5345+static inline int ff_v4l2_ctx_eos(const V4L2Context * const ctx)
5346+{
5347+    return ctx->flag_last;
5348+}
5349+
5350+
5351 #endif /* AVCODEC_V4L2_M2M_H */
5352--- a/libavcodec/v4l2_m2m_dec.c
5353+++ b/libavcodec/v4l2_m2m_dec.c
5354@@ -21,8 +21,14 @@
5355  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
5356  */
5357
5358+#include "config_components.h"
5359+
5360 #include <linux/videodev2.h>
5361 #include <sys/ioctl.h>
5362+
5363+#include "libavutil/avassert.h"
5364+#include "libavutil/hwcontext.h"
5365+#include "libavutil/hwcontext_drm.h"
5366 #include "libavutil/pixfmt.h"
5367 #include "libavutil/pixdesc.h"
5368 #include "libavutil/opt.h"
5369@@ -30,75 +36,279 @@
5370 #include "codec_internal.h"
5371 #include "libavcodec/decode.h"
5372
5373+#include "libavcodec/hwaccels.h"
5374+#include "libavcodec/internal.h"
5375+#include "libavcodec/hwconfig.h"
5376+
5377 #include "v4l2_context.h"
5378 #include "v4l2_m2m.h"
5379 #include "v4l2_fmt.h"
5380+#include "v4l2_req_dmabufs.h"
5381
5382-static int v4l2_try_start(AVCodecContext *avctx)
5383+#if CONFIG_H264_DECODER
5384+#include "h264_parse.h"
5385+#endif
5386+#if CONFIG_HEVC_DECODER
5387+#include "hevc_parse.h"
5388+#endif
5389+
5390+// Pick 64 for max last count - that is >1sec at 60fps
5391+#define STATS_LAST_COUNT_MAX 64
5392+#define STATS_INTERVAL_MAX (1 << 30)
5393+
5394+#ifndef FF_API_BUFFER_SIZE_T
5395+#define FF_API_BUFFER_SIZE_T 1
5396+#endif
5397+
5398+#define DUMP_FAILED_EXTRADATA 0
5399+
5400+#if DUMP_FAILED_EXTRADATA
5401+static inline char hex1(unsigned int x)
5402 {
5403-    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
5404-    V4L2Context *const capture = &s->capture;
5405-    V4L2Context *const output = &s->output;
5406-    struct v4l2_selection selection = { 0 };
5407-    int ret;
5408+    x &= 0xf;
5409+    return x <= 9 ? '0' + x : 'a' + x - 10;
5410+}
5411
5412-    /* 1. start the output process */
5413-    if (!output->streamon) {
5414-        ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON);
5415-        if (ret < 0) {
5416-            av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n");
5417-            return ret;
5418-        }
5419+static inline char * hex2(char * s, unsigned int x)
5420+{
5421+    *s++ = hex1(x >> 4);
5422+    *s++ = hex1(x);
5423+    return s;
5424+}
5425+
5426+static inline char * hex4(char * s, unsigned int x)
5427+{
5428+    s = hex2(s, x >> 8);
5429+    s = hex2(s, x);
5430+    return s;
5431+}
5432+
5433+static inline char * dash2(char * s)
5434+{
5435+    *s++ = '-';
5436+    *s++ = '-';
5437+    return s;
5438+}
5439+
5440+static void
5441+data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len)
5442+{
5443+    size_t i;
5444+    s = hex4(s, offset);
5445+    m += offset;
5446+    for (i = 0; i != 8; ++i) {
5447+        *s++ = ' ';
5448+        s = len > i + offset ? hex2(s, *m++) : dash2(s);
5449     }
5450+    *s++ = ' ';
5451+    *s++ = ':';
5452+    for (; i != 16; ++i) {
5453+        *s++ = ' ';
5454+        s = len > i + offset ? hex2(s, *m++) : dash2(s);
5455+    }
5456+    *s++ = 0;
5457+}
5458
5459-    if (capture->streamon)
5460-        return 0;
5461+static void
5462+log_dump(void * logctx, int lvl, const void * const data, const size_t len)
5463+{
5464+    size_t i;
5465+    for (i = 0; i < len; i += 16) {
5466+        char buf[80];
5467+        data16(buf, i, data, len);
5468+        av_log(logctx, lvl, "%s\n", buf);
5469+    }
5470+}
5471+#endif
5472
5473-    /* 2. get the capture format */
5474-    capture->format.type = capture->type;
5475-    ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format);
5476-    if (ret) {
5477-        av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n");
5478-        return ret;
5479+static unsigned int pts_stats_interval(const pts_stats_t * const stats)
5480+{
5481+    return stats->last_interval;
5482+}
5483+
5484+static int64_t pts_stats_guess(const pts_stats_t * const stats, const int fail_bad_guess)
5485+{
5486+    if (stats->last_count <= 1)
5487+        return stats->last_pts;
5488+    if (stats->last_pts == AV_NOPTS_VALUE ||
5489+            fail_bad_guess && (stats->last_interval == 0 ||
5490+                               stats->last_count >= STATS_LAST_COUNT_MAX))
5491+        return AV_NOPTS_VALUE;
5492+    return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval;
5493+}
5494+
5495+static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
5496+{
5497+    if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
5498+        if (stats->last_count < STATS_LAST_COUNT_MAX)
5499+            ++stats->last_count;
5500+        return;
5501     }
5502
5503-    /* 2.1 update the AVCodecContext */
5504-    avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
5505-    capture->av_pix_fmt = avctx->pix_fmt;
5506+    if (stats->last_pts != AV_NOPTS_VALUE) {
5507+        const int64_t interval = pts - stats->last_pts;
5508
5509-    /* 3. set the crop parameters */
5510-    selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
5511-    selection.r.height = avctx->coded_height;
5512-    selection.r.width = avctx->coded_width;
5513-    ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection);
5514-    if (!ret) {
5515-        ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
5516-        if (ret) {
5517-            av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n");
5518-        } else {
5519-            av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height);
5520-            /* update the size of the resulting frame */
5521-            capture->height = selection.r.height;
5522-            capture->width  = selection.r.width;
5523+        if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
5524+            stats->last_count >= STATS_LAST_COUNT_MAX) {
5525+            if (stats->last_interval != 0)
5526+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
5527+                       __func__, stats->name, interval, stats->last_count);
5528+            stats->last_interval = 0;
5529+        }
5530+        else {
5531+            const int64_t frame_time = interval / (int64_t)stats->last_count;
5532+
5533+            if (frame_time != stats->last_interval)
5534+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
5535+                       __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
5536+            stats->last_interval = frame_time;
5537         }
5538     }
5539
5540-    /* 4. init the capture context now that we have the capture format */
5541-    if (!capture->buffers) {
5542-        ret = ff_v4l2_context_init(capture);
5543-        if (ret) {
5544-            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
5545-            return AVERROR(ENOMEM);
5546+    stats->last_pts = pts;
5547+    stats->last_count = 1;
5548+}
5549+
5550+static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
5551+{
5552+    *stats = (pts_stats_t){
5553+        .logctx = logctx,
5554+        .name = name,
5555+        .last_count = 1,
5556+        .last_interval = 0,
5557+        .last_pts = AV_NOPTS_VALUE
5558+    };
5559+}
5560+
5561+// If abdata == NULL then this just counts space required
5562+// Unpacks avcC if detected
5563+static int
5564+h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata)
5565+{
5566+    const uint8_t * const xdend = extradata + extrasize;
5567+    const uint8_t * p = extradata;
5568+    uint8_t * d = abdata;
5569+    unsigned int n;
5570+    unsigned int len;
5571+    const unsigned int hdrlen = 4;
5572+    unsigned int need_pps = 1;
5573+
5574+    if (extrasize < 8)
5575+        return AVERROR(EINVAL);
5576+
5577+    if (p[0] == 0 && p[1] == 0) {
5578+        // Assume a couple of leading zeros are good enough to indicate NAL
5579+        if (abdata)
5580+            memcpy(d, p, extrasize);
5581+        return extrasize;
5582+    }
5583+
5584+    // avcC starts with a 1
5585+    if (p[0] != 1)
5586+        return AVERROR(EINVAL);
5587+
5588+    p += 5;
5589+    n = *p++ & 0x1f;
5590+
5591+doxps:
5592+    while (n--) {
5593+        if (xdend - p < 2)
5594+            return AVERROR(EINVAL);
5595+        len = (p[0] << 8) | p[1];
5596+        p += 2;
5597+        if (xdend - p < (ptrdiff_t)len)
5598+            return AVERROR(EINVAL);
5599+        if (abdata) {
5600+            d[0] = 0;
5601+            d[1] = 0;
5602+            d[2] = 0;
5603+            d[3] = 1;
5604+            memcpy(d + 4, p, len);
5605         }
5606+        d += len + hdrlen;
5607+        p += len;
5608+    }
5609+    if (need_pps) {
5610+        need_pps = 0;
5611+        if (p >= xdend)
5612+            return AVERROR(EINVAL);
5613+        n = *p++;
5614+        goto doxps;
5615     }
5616
5617-    /* 5. start the capture process */
5618-    ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
5619-    if (ret) {
5620-        av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n");
5621+    return d - abdata;
5622+}
5623+
5624+static int
5625+copy_extradata(AVCodecContext * const avctx,
5626+               const void * const src_data, const int src_len,
5627+               void ** const pdst_data, size_t * const pdst_len)
5628+{
5629+    int len;
5630+
5631+    *pdst_len = 0;
5632+    av_freep(pdst_data);
5633+
5634+    if (avctx->codec_id == AV_CODEC_ID_H264)
5635+        len = h264_xd_copy(src_data, src_len, NULL);
5636+    else
5637+        len = src_len < 0 ? AVERROR(EINVAL) : src_len;
5638+
5639+    // Zero length is OK but we want to stop - -ve is error val
5640+    if (len <= 0)
5641+        return len;
5642+
5643+    if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
5644+        return AVERROR(ENOMEM);
5645+
5646+    if (avctx->codec_id == AV_CODEC_ID_H264)
5647+        h264_xd_copy(src_data, src_len, *pdst_data);
5648+    else
5649+        memcpy(*pdst_data, src_data, len);
5650+    *pdst_len = len;
5651+
5652+    return 0;
5653+}
5654+
5655+
5656+
5657+static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s)
5658+{
5659+    int ret;
5660+    struct v4l2_decoder_cmd cmd = {
5661+        .cmd = V4L2_DEC_CMD_START,
5662+        .flags = 0,
5663+    };
5664+
5665+    if (s->output.streamon)
5666+        return 0;
5667+
5668+    ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON);
5669+    if (ret != 0) {
5670+        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret));
5671         return ret;
5672     }
5673
5674+    // STREAMON should do implicit START so this just for those that don't.
5675+    // It is optional so don't worry if it fails
5676+    if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) {
5677+        ret = AVERROR(errno);
5678+        av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret));
5679+    }
5680+    else {
5681+        av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n");
5682+    }
5683+    return 0;
5684+}
5685+
5686+static int v4l2_try_start(AVCodecContext *avctx)
5687+{
5688+    V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
5689+    int ret;
5690+
5691+    /* 1. start the output process */
5692+    if ((ret = check_output_streamon(avctx, s)) != 0)
5693+        return ret;
5694     return 0;
5695 }
5696
5697@@ -133,51 +343,823 @@ static int v4l2_prepare_decoder(V4L2m2mC
5698     return 0;
5699 }
5700
5701-static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
5702+static void
5703+set_best_effort_pts(AVCodecContext *const avctx,
5704+             pts_stats_t * const ps,
5705+             AVFrame *const frame)
5706+{
5707+    pts_stats_add(ps, frame->pts);
5708+
5709+    frame->best_effort_timestamp = pts_stats_guess(ps, 1);
5710+    // If we can't guess from just PTS - try DTS
5711+    if (frame->best_effort_timestamp == AV_NOPTS_VALUE)
5712+        frame->best_effort_timestamp = frame->pkt_dts;
5713+
5714+    // We can't emulate what s/w does in a useful manner and using the
5715+    // "correct" answer seems to just confuse things.
5716+    frame->pkt_dts               = frame->pts;
5717+    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n",
5718+           frame->pts, frame->best_effort_timestamp, frame->pkt_dts);
5719+}
5720+
5721+static void
5722+xlat_flush(xlat_track_t * const x)
5723+{
5724+    unsigned int i;
5725+    // Do not reset track_no - this ensures that any frames left in the decoder
5726+    // that turn up later get discarded.
5727+
5728+    x->last_pts = AV_NOPTS_VALUE;
5729+    x->last_opaque = 0;
5730+    for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) {
5731+        x->track_els[i].pending = 0;
5732+        x->track_els[i].discard = 1;
5733+    }
5734+}
5735+
5736+static void
5737+xlat_init(xlat_track_t * const x)
5738+{
5739+    memset(x, 0, sizeof(*x));
5740+    xlat_flush(x);
5741+}
5742+
5743+static int
5744+xlat_pending(const V4L2m2mContext * const s)
5745+{
5746+    const xlat_track_t *const x = &s->xlat;
5747+    unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE;
5748+    int i;
5749+    const int64_t now = pts_stats_guess(&s->pts_stat, 0);
5750+    int64_t first_dts = AV_NOPTS_VALUE;
5751+    int no_dts_count = 0;
5752+    unsigned int interval = pts_stats_interval(&s->pts_stat);
5753+
5754+    for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) {
5755+        const V4L2m2mTrackEl * const t = x->track_els + n;
5756+
5757+        if (first_dts == AV_NOPTS_VALUE)
5758+            if (t->dts == AV_NOPTS_VALUE)
5759+                ++no_dts_count;
5760+            else
5761+                first_dts = t->dts;
5762+
5763+        // Discard only set on never-set or flushed entries
5764+        // So if we get here we've never successfully decoded a frame so allow
5765+        // more frames into the buffer before stalling
5766+        if (t->discard)
5767+            return i - 16;
5768+
5769+        // If we've got this frame out then everything before this point
5770+        // must have entered the decoder
5771+        if (!t->pending)
5772+            break;
5773+
5774+        // If we've never seen a pts all we can do is count frames
5775+        if (now == AV_NOPTS_VALUE)
5776+            continue;
5777+
5778+        if (t->dts != AV_NOPTS_VALUE && now >= t->dts)
5779+            break;
5780+    }
5781+
5782+    if (first_dts != AV_NOPTS_VALUE && now != AV_NOPTS_VALUE && interval != 0 && s->reorder_size != 0) {
5783+        const int iframes = (first_dts - now) / (int)interval;
5784+        const int t = iframes - s->reorder_size + no_dts_count;
5785+
5786+//        av_log(s->avctx, AV_LOG_DEBUG, "Last:%"PRId64", Now:%"PRId64", First:%"PRId64", delta=%"PRId64", frames=%d, nodts=%d\n",
5787+//               x->last_dts, now, first_dts, first_dts - now, iframes, no_dts_count);
5788+
5789+        if (iframes > 0 && iframes < 64 && t < i) {
5790+            return t;
5791+        }
5792+    }
5793+
5794+    return i;
5795+}
5796+
5797+static inline int stream_started(const V4L2m2mContext * const s) {
5798+    return s->output.streamon;
5799+}
5800+
5801+#define NQ_OK        0
5802+#define NQ_Q_FULL    1
5803+#define NQ_SRC_EMPTY 2
5804+#define NQ_NONE      3
5805+#define NQ_DRAINING  4
5806+#define NQ_DEAD      5
5807+
5808+#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING)
5809+#define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE)
5810+
5811+// do_not_get      If true then no new packet will be got but status will
5812+//                  be set appropriately
5813+
5814+// AVERROR_EOF     Flushing an already flushed stream
5815+// -ve             Error (all errors except EOF are unexpected)
5816+// NQ_OK (0)       OK
5817+// NQ_Q_FULL       Dst full (retry if we think V4L2 Q has space now)
5818+// NQ_SRC_EMPTY    Src empty (do not retry)
5819+// NQ_NONE         Enqueue not attempted
5820+// NQ_DRAINING     At EOS, dQ dest until EOS there too
5821+// NQ_DEAD         Not running (do not retry, do not attempt capture dQ)
5822+
5823+static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s, const int do_not_get)
5824 {
5825-    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
5826-    V4L2Context *const capture = &s->capture;
5827-    V4L2Context *const output = &s->output;
5828     int ret;
5829
5830-    if (!s->buf_pkt.size) {
5831-        ret = ff_decode_get_packet(avctx, &s->buf_pkt);
5832+    // If we don't already have a coded packet - get a new one
5833+    // We will already have a coded pkt if the output Q was full last time we
5834+    // tried to Q it
5835+    if (!s->buf_pkt.size && !do_not_get) {
5836+        unsigned int i;
5837+
5838+        for (i = 0; i < 256; ++i) {
5839+            uint8_t * side_data;
5840+            size_t side_size;
5841+
5842+            ret = ff_decode_get_packet(avctx, &s->buf_pkt);
5843+            if (ret != 0)
5844+                break;
5845+
5846+            // New extradata is the only side-data we undertand
5847+            side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
5848+            if (side_data) {
5849+                av_log(avctx, AV_LOG_DEBUG, "New extradata\n");
5850+                if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0)
5851+                    av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret));
5852+                s->extdata_sent = 0;
5853+            }
5854+
5855+            if (s->buf_pkt.size != 0)
5856+                break;
5857+
5858+            if (s->buf_pkt.side_data_elems == 0) {
5859+                av_log(avctx, AV_LOG_WARNING, "Empty pkt from ff_decode_get_packet - treating as EOF\n");
5860+                ret = AVERROR_EOF;
5861+                break;
5862+            }
5863+
5864+            // Retry a side-data only pkt
5865+        }
5866+        // If i >= 256 something has gone wrong
5867+        if (i >= 256) {
5868+            av_log(avctx, AV_LOG_ERROR, "Too many side-data only packets\n");
5869+            return AVERROR(EIO);
5870+        }
5871+
5872+        if (ret == AVERROR(EAGAIN)) {
5873+            if (!stream_started(s)) {
5874+                av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__);
5875+                return NQ_DEAD;
5876+            }
5877+            return NQ_SRC_EMPTY;
5878+        }
5879+
5880+        if (ret == AVERROR_EOF) {
5881+            // EOF - enter drain mode
5882+            av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n",
5883+                   ret, s->buf_pkt.size, stream_started(s), s->draining);
5884+            if (!stream_started(s)) {
5885+                av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n");
5886+                s->draining = 1;
5887+                s->capture.done = 1;
5888+                return AVERROR_EOF;
5889+            }
5890+
5891+            if (!s->draining) {
5892+                // Calling enqueue with an empty pkt starts drain
5893+                av_assert0(s->buf_pkt.size == 0);
5894+                ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
5895+                if (ret) {
5896+                    av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret);
5897+                    return ret;
5898+                }
5899+            }
5900+            return NQ_DRAINING;
5901+        }
5902+
5903         if (ret < 0) {
5904-            if (ret == AVERROR(EAGAIN))
5905-                return ff_v4l2_context_dequeue_frame(capture, frame, 0);
5906-            else if (ret != AVERROR_EOF)
5907-                return ret;
5908+            av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret);
5909+            return ret;
5910         }
5911     }
5912
5913-    if (s->draining)
5914-        goto dequeue;
5915+    if (s->draining) {
5916+        if (s->buf_pkt.size) {
5917+            av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n");
5918+            av_packet_unref(&s->buf_pkt);
5919+        }
5920+        return NQ_DRAINING;
5921+    }
5922+
5923+    if (!s->buf_pkt.size)
5924+        return NQ_NONE;
5925
5926-    ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt);
5927-    if (ret < 0 && ret != AVERROR(EAGAIN))
5928-        goto fail;
5929+    if ((ret = check_output_streamon(avctx, s)) != 0)
5930+        return ret;
5931
5932-    /* if EAGAIN don't unref packet and try to enqueue in the next iteration */
5933-    if (ret != AVERROR(EAGAIN))
5934+    if (s->extdata_sent)
5935+        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
5936+    else
5937+        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size);
5938+
5939+    if (ret == AVERROR(EAGAIN)) {
5940+        // Out of input buffers - keep packet
5941+        ret = NQ_Q_FULL;
5942+    }
5943+    else {
5944+        // In all other cases we are done with this packet
5945         av_packet_unref(&s->buf_pkt);
5946+        s->extdata_sent = 1;
5947
5948-    if (!s->draining) {
5949-        ret = v4l2_try_start(avctx);
5950         if (ret) {
5951-            /* cant recover */
5952-            if (ret != AVERROR(ENOMEM))
5953-                ret = 0;
5954-            goto fail;
5955+            av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret);
5956+            return ret;
5957+        }
5958+    }
5959+
5960+    // Start if we haven't
5961+    {
5962+        const int ret2 = v4l2_try_start(avctx);
5963+        if (ret2) {
5964+            av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2);
5965+            ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD;
5966         }
5967     }
5968
5969-dequeue:
5970-    return ff_v4l2_context_dequeue_frame(capture, frame, -1);
5971-fail:
5972-    av_packet_unref(&s->buf_pkt);
5973     return ret;
5974 }
5975
5976+static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx)
5977+{
5978+    int rv = 0;
5979+
5980+    ff_mutex_lock(&ctx->lock);
5981+
5982+    while (atomic_load(&ctx->q_count) == 0 && ctx->streamon) {
5983+        if (pthread_cond_wait(&ctx->cond, &ctx->lock) != 0) {
5984+            rv = AVERROR(errno);
5985+            av_log(avctx, AV_LOG_ERROR, "Cond wait failure: %s\n", av_err2str(rv));
5986+            break;
5987+        }
5988+    }
5989+
5990+    ff_mutex_unlock(&ctx->lock);
5991+    return rv;
5992+}
5993+
5994+static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
5995+{
5996+    V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
5997+    int src_rv = -1;
5998+    int dst_rv = 1;  // Non-zero (done), non-negative (error) number
5999+    unsigned int i = 0;
6000+
6001+    do {
6002+        const int pending = xlat_pending(s);
6003+        const int prefer_dq = (pending > 4);
6004+        const int last_src_rv = src_rv;
6005+
6006+        av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt);
6007+
6008+        // Enqueue another pkt for decode if
6009+        // (a) We don't have a lot of stuff in the buffer already OR
6010+        // (b) ... we (think we) do but we've failed to get a frame already OR
6011+        // (c) We've dequeued a lot of frames without asking for input
6012+        src_rv = try_enqueue_src(avctx, s, !(!prefer_dq || i != 0 || s->req_pkt > 2));
6013+
6014+        // If we got a frame last time or we've already tried to get a frame and
6015+        // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN)
6016+        // indicating that we want more input.
6017+        // This should mean that once decode starts we enter a stable state where
6018+        // we alternately ask for input and produce output
6019+        if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY)
6020+            break;
6021+
6022+        if (src_rv == NQ_Q_FULL && last_src_rv == NQ_Q_FULL) {
6023+            av_log(avctx, AV_LOG_WARNING, "Poll thinks src Q has space; none found\n");
6024+            break;
6025+        }
6026+
6027+        // Try to get a new frame if
6028+        // (a) we haven't already got one AND
6029+        // (b) enqueue returned a status indicating that decode should be attempted
6030+        if (dst_rv != 0 && TRY_DQ(src_rv)) {
6031+            // Pick a timeout depending on state
6032+            // The pending count isn't completely reliable so it is good enough
6033+            // hint that we want a frame but not good enough to require it in
6034+            // all cases; however if it has got > 31 that exceeds its margin of
6035+            // error so require a frame to prevent ridiculous levels of latency
6036+            const int t =
6037+                src_rv == NQ_Q_FULL ? -1 :
6038+                src_rv == NQ_DRAINING ? 300 :
6039+                prefer_dq ? (s->running && pending > 31 ? 100 : 5) : 0;
6040+
6041+            // Dequeue frame will unref any previous contents of frame
6042+            // if it returns success so we don't need an explicit unref
6043+            // when discarding
6044+            // This returns AVERROR(EAGAIN) on timeout or if
6045+            // there is room in the input Q and timeout == -1
6046+            dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
6047+
6048+            // Failure due to no buffer in Q?
6049+            if (dst_rv == AVERROR(ENOSPC)) {
6050+                // Wait & retry
6051+                if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) {
6052+                    dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
6053+                }
6054+            }
6055+
6056+            if (dst_rv == 0) {
6057+                set_best_effort_pts(avctx, &s->pts_stat, frame);
6058+                if (!s->running) {
6059+                    s->running = 1;
6060+                    av_log(avctx, AV_LOG_VERBOSE, "Decode running\n");
6061+                }
6062+            }
6063+
6064+            if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
6065+                av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
6066+                dst_rv = AVERROR_EOF;
6067+                s->capture.done = 1;
6068+            }
6069+            else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
6070+                av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
6071+                       s->draining, s->capture.done);
6072+            else if (dst_rv && dst_rv != AVERROR(EAGAIN))
6073+                av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n",
6074+                       s->draining, s->capture.done, dst_rv);
6075+        }
6076+
6077+        ++i;
6078+        if (i >= 256) {
6079+            av_log(avctx, AV_LOG_ERROR, "Unexpectedly large retry count: %d\n", i);
6080+            src_rv = AVERROR(EIO);
6081+        }
6082+
6083+        // Continue trying to enqueue packets if either
6084+        // (a) we succeeded last time OR
6085+        // (b) we didn't ret a frame and we can retry the input
6086+    } while (src_rv == NQ_OK || (dst_rv == AVERROR(EAGAIN) && RETRY_NQ(src_rv)));
6087+
6088+    // Ensure that the frame contains nothing if we aren't returning a frame
6089+    // (might happen when discarding)
6090+    if (dst_rv)
6091+        av_frame_unref(frame);
6092+
6093+    // If we got a frame this time ask for a pkt next time
6094+    s->req_pkt = (dst_rv == 0) ? s->req_pkt + 1 : 0;
6095+
6096+#if 0
6097+    if (dst_rv == 0)
6098+    {
6099+        static int z = 0;
6100+        if (++z > 50) {
6101+            av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n");
6102+            ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
6103+            return -1;
6104+        }
6105+    }
6106+#endif
6107+
6108+    return dst_rv == 0 ? 0 :
6109+        src_rv < 0 ? src_rv :
6110+        dst_rv < 0 ? dst_rv :
6111+            AVERROR(EAGAIN);
6112+}
6113+
6114+#if 0
6115+#include <time.h>
6116+static int64_t us_time(void)
6117+{
6118+    struct timespec ts;
6119+    clock_gettime(CLOCK_MONOTONIC, &ts);
6120+    return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
6121+}
6122+
6123+static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
6124+{
6125+    int ret;
6126+    const int64_t now = us_time();
6127+    int64_t done;
6128+    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
6129+    ret = v4l2_receive_frame2(avctx, frame);
6130+    done = us_time();
6131+    av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret);
6132+    return ret;
6133+}
6134+#endif
6135+
6136+static uint32_t
6137+avprofile_to_v4l2(const enum AVCodecID codec_id, const int avprofile)
6138+{
6139+    switch (codec_id) {
6140+        case AV_CODEC_ID_H264:
6141+            switch (avprofile) {
6142+                case FF_PROFILE_H264_BASELINE:
6143+                    return V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
6144+                case FF_PROFILE_H264_CONSTRAINED_BASELINE:
6145+                    return V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE;
6146+                case FF_PROFILE_H264_MAIN:
6147+                    return V4L2_MPEG_VIDEO_H264_PROFILE_MAIN;
6148+                case FF_PROFILE_H264_EXTENDED:
6149+                    return V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED;
6150+                case FF_PROFILE_H264_HIGH:
6151+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH;
6152+                case FF_PROFILE_H264_HIGH_10:
6153+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10;
6154+                case FF_PROFILE_H264_HIGH_10_INTRA:
6155+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA;
6156+                case FF_PROFILE_H264_MULTIVIEW_HIGH:
6157+                case FF_PROFILE_H264_HIGH_422:
6158+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422;
6159+                case FF_PROFILE_H264_HIGH_422_INTRA:
6160+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA;
6161+                case FF_PROFILE_H264_STEREO_HIGH:
6162+                    return V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH;
6163+                case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
6164+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE;
6165+                case FF_PROFILE_H264_HIGH_444_INTRA:
6166+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA;
6167+                case FF_PROFILE_H264_CAVLC_444:
6168+                    return V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA;
6169+                case FF_PROFILE_H264_HIGH_444:
6170+                default:
6171+                    break;
6172+//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE		= 12,
6173+//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH		= 13,
6174+//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA	= 14,
6175+//                    V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH		= 16,
6176+//                    V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH		= 17,
6177+            }
6178+            break;
6179+        case AV_CODEC_ID_MPEG2VIDEO:
6180+        case AV_CODEC_ID_MPEG4:
6181+        case AV_CODEC_ID_VC1:
6182+        case AV_CODEC_ID_VP8:
6183+        case AV_CODEC_ID_VP9:
6184+        case AV_CODEC_ID_AV1:
6185+            // Most profiles are a simple number that matches the V4L2 enum
6186+            return avprofile;
6187+        default:
6188+            break;
6189+    }
6190+    return ~(uint32_t)0;
6191+}
6192+
6193+// This check mirrors Chrome's profile check by testing to see if the profile
6194+// exists as a possible value for the V4L2 profile control
6195+static int
6196+check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s)
6197+{
6198+    struct v4l2_queryctrl query_ctrl;
6199+    struct v4l2_querymenu query_menu;
6200+    uint32_t profile_id;
6201+
6202+    // An unset profile is almost certainly zero or -99 - do not reject
6203+    if (avctx->profile <= 0) {
6204+        av_log(avctx, AV_LOG_VERBOSE, "Profile %d <= 0 - check skipped\n", avctx->profile);
6205+        return 0;
6206+    }
6207+
6208+    memset(&query_ctrl, 0, sizeof(query_ctrl));
6209+    switch (avctx->codec_id) {
6210+        case AV_CODEC_ID_MPEG2VIDEO:
6211+            profile_id = V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE;
6212+            break;
6213+        case AV_CODEC_ID_MPEG4:
6214+            profile_id = V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE;
6215+            break;
6216+        case AV_CODEC_ID_H264:
6217+            profile_id = V4L2_CID_MPEG_VIDEO_H264_PROFILE;
6218+            break;
6219+        case AV_CODEC_ID_VP8:
6220+            profile_id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE;
6221+            break;
6222+        case AV_CODEC_ID_VP9:
6223+            profile_id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE;
6224+            break;
6225+#ifdef V4L2_CID_MPEG_VIDEO_AV1_PROFILE
6226+        case AV_CODEC_ID_AV1:
6227+            profile_id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE;
6228+            break;
6229+#endif
6230+        default:
6231+            av_log(avctx, AV_LOG_VERBOSE, "Can't map profile for codec id %d; profile check skipped\n", avctx->codec_id);
6232+            return 0;
6233+    }
6234+
6235+    query_ctrl = (struct v4l2_queryctrl){.id = profile_id};
6236+    if (ioctl(s->fd, VIDIOC_QUERYCTRL, &query_ctrl) != 0) {
6237+        av_log(avctx, AV_LOG_VERBOSE, "Query profile ctrl (%#x) not supported: assume OK\n", query_ctrl.id);
6238+    }
6239+    else {
6240+        av_log(avctx, AV_LOG_DEBUG, "%s: Control supported: %#x\n", __func__, query_ctrl.id);
6241+
6242+        query_menu = (struct v4l2_querymenu){
6243+            .id = query_ctrl.id,
6244+            .index = avprofile_to_v4l2(avctx->codec_id, avctx->profile),
6245+        };
6246+
6247+        if (query_menu.index > query_ctrl.maximum ||
6248+            query_menu.index < query_ctrl.minimum ||
6249+            ioctl(s->fd, VIDIOC_QUERYMENU, &query_menu) != 0) {
6250+            return AVERROR(ENOENT);
6251+        }
6252+    }
6253+
6254+    return 0;
6255+};
6256+
6257+static int
6258+check_size(AVCodecContext * const avctx, V4L2m2mContext * const s, const uint32_t fcc)
6259+{
6260+    unsigned int i;
6261+    const uint32_t w = avctx->coded_width;
6262+    const uint32_t h = avctx->coded_height;
6263+
6264+    if (w == 0 || h == 0 || fcc == 0) {
6265+        av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc));
6266+        return 0;
6267+    }
6268+    if ((s->quirks & FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN) != 0) {
6269+        av_log(avctx, AV_LOG_TRACE, "%s: Skipped (quirk): Size %dx%d, fcc %s\n", __func__, w, h, av_fourcc2str(fcc));
6270+        return 0;
6271+    }
6272+
6273+    for (i = 0;; ++i) {
6274+        struct v4l2_frmsizeenum fs = {
6275+            .index = i,
6276+            .pixel_format = fcc,
6277+        };
6278+
6279+        while (ioctl(s->fd, VIDIOC_ENUM_FRAMESIZES, &fs) != 0) {
6280+            const int err = AVERROR(errno);
6281+            if (err == AVERROR(EINTR))
6282+                continue;
6283+            if (i == 0 && err == AVERROR(ENOTTY)) {
6284+                av_log(avctx, AV_LOG_DEBUG, "Framesize enum not supported\n");
6285+                return 0;
6286+            }
6287+            if (err != AVERROR(EINVAL)) {
6288+                av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err));
6289+                return err;
6290+            }
6291+            av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in %u frame size enums\n",
6292+                   w, h, av_fourcc2str(fcc), i);
6293+            return err;
6294+        }
6295+
6296+        switch (fs.type) {
6297+            case V4L2_FRMSIZE_TYPE_DISCRETE:
6298+                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Discrete: %dx%d\n", __func__, i,
6299+                       fs.discrete.width,fs.discrete.height);
6300+                if (w == fs.discrete.width && h == fs.discrete.height)
6301+                    return 0;
6302+                break;
6303+            case V4L2_FRMSIZE_TYPE_STEPWISE:
6304+                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Stepwise: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
6305+                       fs.stepwise.min_width, fs.stepwise.min_height,
6306+                       fs.stepwise.max_width, fs.stepwise.max_height,
6307+                       fs.stepwise.step_width,fs.stepwise.step_height);
6308+                if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
6309+                    h >= fs.stepwise.min_height && h <= fs.stepwise.max_height &&
6310+                    (w - fs.stepwise.min_width) % fs.stepwise.step_width == 0 &&
6311+                    (h - fs.stepwise.min_height) % fs.stepwise.step_height == 0)
6312+                    return 0;
6313+                break;
6314+            case V4L2_FRMSIZE_TYPE_CONTINUOUS:
6315+                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Continuous: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
6316+                       fs.stepwise.min_width, fs.stepwise.min_height,
6317+                       fs.stepwise.max_width, fs.stepwise.max_height,
6318+                       fs.stepwise.step_width,fs.stepwise.step_height);
6319+                if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
6320+                    h >= fs.stepwise.min_height && h <= fs.stepwise.max_height)
6321+                    return 0;
6322+                break;
6323+            default:
6324+                av_log(avctx, AV_LOG_ERROR, "Unexpected framesize enum: %d", fs.type);
6325+                return AVERROR(EINVAL);
6326+        }
6327+    }
6328+}
6329+
6330+static int
6331+get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s)
6332+{
6333+    struct v4l2_capability cap;
6334+
6335+    memset(&cap, 0, sizeof(cap));
6336+    while (ioctl(s->fd, VIDIOC_QUERYCAP, &cap) != 0) {
6337+        int err = errno;
6338+        if (err == EINTR)
6339+            continue;
6340+        av_log(avctx, AV_LOG_ERROR, "V4L2: Failed to get capabilities: %s\n", strerror(err));
6341+        return AVERROR(err);
6342+    }
6343+
6344+    // Could be made table driven if we have a few more but right now there
6345+    // seems no point
6346+
6347+    // Meson (amlogic) always gives a resolution changed event after output
6348+    // streamon and userspace must (re)allocate capture buffers and streamon
6349+    // capture to clear the event even if the capture buffers were the right
6350+    // size in the first place.
6351+    if (strcmp(cap.driver, "meson-vdec") == 0)
6352+        s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS | FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN;
6353+
6354+    av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks);
6355+    return 0;
6356+}
6357+
6358+// This heuristic is for H264 but use for everything
6359+static uint32_t max_coded_size(const AVCodecContext * const avctx)
6360+{
6361+    uint32_t wxh = avctx->coded_width * avctx->coded_height;
6362+    uint32_t size;
6363+
6364+    size = wxh * 3 / 2;
6365+    // H.264 Annex A table A-1 gives minCR which is either 2 or 4
6366+    // unfortunately that doesn't yield an actually useful limit
6367+    // and it should be noted that frame 0 is special cased to allow
6368+    // a bigger number which really isn't helpful for us. So just pick
6369+    // frame_size / 2
6370+    size /= 2;
6371+    // Add 64k to allow for any overheads and/or encoder hopefulness
6372+    // with small WxH
6373+    return size + (1 << 16);
6374+}
6375+
6376+static void
6377+parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s)
6378+{
6379+    s->reorder_size = 0;
6380+
6381+    if (!avctx->extradata || !avctx->extradata_size)
6382+        return;
6383+
6384+    switch (avctx->codec_id) {
6385+#if CONFIG_H264_DECODER
6386+        case AV_CODEC_ID_H264:
6387+        {
6388+            H264ParamSets ps;
6389+            int is_avc = 0;
6390+            int nal_length_size = 0;
6391+            int ret;
6392+
6393+            memset(&ps, 0, sizeof(ps));
6394+
6395+            ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
6396+                                           &ps, &is_avc, &nal_length_size,
6397+                                           avctx->err_recognition, avctx);
6398+            if (ret > 0) {
6399+                const SPS * sps = NULL;
6400+                unsigned int i;
6401+                for (i = 0; i != MAX_SPS_COUNT; ++i) {
6402+                    if (ps.sps_list[i]) {
6403+                        sps = (const SPS *)ps.sps_list[i]->data;
6404+                        break;
6405+                    }
6406+                }
6407+                if (sps) {
6408+                    avctx->profile = ff_h264_get_profile(sps);
6409+                    avctx->level = sps->level_idc;
6410+                    s->reorder_size = sps->num_reorder_frames;
6411+                }
6412+            }
6413+            ff_h264_ps_uninit(&ps);
6414+            break;
6415+        }
6416+#endif
6417+#if CONFIG_HEVC_DECODER
6418+        case AV_CODEC_ID_HEVC:
6419+        {
6420+            HEVCParamSets ps;
6421+            HEVCSEI sei;
6422+            int is_nalff = 0;
6423+            int nal_length_size = 0;
6424+            int ret;
6425+
6426+            memset(&ps, 0, sizeof(ps));
6427+            memset(&sei, 0, sizeof(sei));
6428+
6429+            ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size,
6430+                                           &ps, &sei, &is_nalff, &nal_length_size,
6431+                                           avctx->err_recognition, 0, avctx);
6432+            if (ret > 0) {
6433+                const HEVCSPS * sps = NULL;
6434+                unsigned int i;
6435+                for (i = 0; i != HEVC_MAX_SPS_COUNT; ++i) {
6436+                    if (ps.sps_list[i]) {
6437+                        sps = (const HEVCSPS *)ps.sps_list[i]->data;
6438+                        break;
6439+                    }
6440+                }
6441+                if (sps) {
6442+                    avctx->profile = sps->ptl.general_ptl.profile_idc;
6443+                    avctx->level   = sps->ptl.general_ptl.level_idc;
6444+                    s->reorder_size = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering;
6445+                }
6446+            }
6447+            ff_hevc_ps_uninit(&ps);
6448+            ff_hevc_reset_sei(&sei);
6449+            break;
6450+        }
6451+#endif
6452+        default:
6453+            break;
6454+    }
6455+}
6456+
6457+static int
6458+choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s)
6459+{
6460+    const V4L2m2mPriv * const priv = avctx->priv_data;
6461+    unsigned int fmts_n;
6462+    uint32_t *fmts = ff_v4l2_context_enum_drm_formats(&s->capture, &fmts_n);
6463+    enum AVPixelFormat *fmts2 = NULL;
6464+    enum AVPixelFormat gf_pix_fmt;
6465+    unsigned int i;
6466+    unsigned int n = 0;
6467+    unsigned int pref_n = 1;
6468+    int rv = AVERROR(ENOENT);
6469+
6470+    if (!fmts)
6471+        return AVERROR(ENOENT);
6472+
6473+    if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 3))) == NULL) {
6474+        rv = AVERROR(ENOMEM);
6475+        goto error;
6476+    }
6477+
6478+    // Filter for formats that are supported by ffmpeg and
6479+    // can accomodate the stream size
6480+    fmts2[n++] = AV_PIX_FMT_DRM_PRIME;
6481+    for (i = 0; i != fmts_n; ++i) {
6482+        const enum AVPixelFormat f = ff_v4l2_format_v4l2_to_avfmt(fmts[i], AV_CODEC_ID_RAWVIDEO);
6483+        av_log(avctx, AV_LOG_TRACE, "VLC pix %s -> %s\n", av_fourcc2str(fmts[i]), av_get_pix_fmt_name(f));
6484+        if (f == AV_PIX_FMT_NONE)
6485+            continue;
6486+
6487+        if (check_size(avctx, s, fmts[i]) != 0)
6488+            continue;
6489+
6490+        if (f == priv->pix_fmt)
6491+            pref_n = n;
6492+        fmts2[n++] = f;
6493+    }
6494+
6495+    if (n < 2) {
6496+        av_log(avctx, AV_LOG_DEBUG, "%s: No usable formats found\n", __func__);
6497+        goto error;
6498+    }
6499+
6500+    if (n != 2) {
6501+        // ffmpeg.c really only expects one s/w format. It thinks that the
6502+        // last format in the list is the s/w format of the h/w format but
6503+        // also chooses the first non-h/w format as the preferred s/w format.
6504+        // The only way of reconciling this is to dup our preferred format into
6505+        // both last & first place :-(
6506+        const enum AVPixelFormat t = fmts2[pref_n];
6507+        fmts2[pref_n] = fmts2[1];
6508+        fmts2[1] = t;
6509+        fmts2[n++] = t;
6510+    }
6511+
6512+    fmts2[n] = AV_PIX_FMT_NONE;
6513+
6514+    gf_pix_fmt = ff_get_format(avctx, fmts2);
6515+    av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n",
6516+           avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt),
6517+           avctx->coded_width, avctx->coded_height,
6518+           gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
6519+
6520+    if (gf_pix_fmt == AV_PIX_FMT_NONE)
6521+        goto error;
6522+
6523+    if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) {
6524+        avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
6525+        s->capture.av_pix_fmt = avctx->sw_pix_fmt;
6526+        s->output_drm = 1;
6527+    }
6528+    else {
6529+        avctx->pix_fmt = gf_pix_fmt;
6530+        s->capture.av_pix_fmt = gf_pix_fmt;
6531+        s->output_drm = 0;
6532+    }
6533+
6534+    // Get format converts capture.av_pix_fmt back into a V4L2 format in the context
6535+    if ((rv = ff_v4l2_context_get_format(&s->capture, 0)) != 0)
6536+        goto error;
6537+    rv = ff_v4l2_context_set_format(&s->capture);
6538+
6539+error:
6540+    av_free(fmts2);
6541+    av_free(fmts);
6542+    return rv;
6543+}
6544+
6545 static av_cold int v4l2_decode_init(AVCodecContext *avctx)
6546 {
6547     V4L2Context *capture, *output;
6548@@ -185,10 +1167,27 @@ static av_cold int v4l2_decode_init(AVCo
6549     V4L2m2mPriv *priv = avctx->priv_data;
6550     int ret;
6551
6552+    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
6553+
6554+    if (avctx->codec_id == AV_CODEC_ID_H264) {
6555+        if (avctx->ticks_per_frame == 1) {
6556+            if(avctx->time_base.den < INT_MAX/2) {
6557+                avctx->time_base.den *= 2;
6558+            } else
6559+                avctx->time_base.num /= 2;
6560+        }
6561+        avctx->ticks_per_frame = 2;
6562+    }
6563+
6564     ret = ff_v4l2_m2m_create_context(priv, &s);
6565     if (ret < 0)
6566         return ret;
6567
6568+    parse_extradata(avctx, s);
6569+
6570+    xlat_init(&s->xlat);
6571+    pts_stats_init(&s->pts_stat, avctx, "decoder");
6572+
6573     capture = &s->capture;
6574     output = &s->output;
6575
6576@@ -196,14 +1195,45 @@ static av_cold int v4l2_decode_init(AVCo
6577      * by the v4l2 driver; this event will trigger a full pipeline reconfig and
6578      * the proper values will be retrieved from the kernel driver.
6579      */
6580-    output->height = capture->height = avctx->coded_height;
6581-    output->width = capture->width = avctx->coded_width;
6582+//    output->height = capture->height = avctx->coded_height;
6583+//    output->width = capture->width = avctx->coded_width;
6584+    output->height = capture->height = 0;
6585+    output->width = capture->width = 0;
6586
6587     output->av_codec_id = avctx->codec_id;
6588     output->av_pix_fmt  = AV_PIX_FMT_NONE;
6589+    output->min_buf_size = max_coded_size(avctx);
6590
6591     capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
6592     capture->av_pix_fmt = avctx->pix_fmt;
6593+    capture->min_buf_size = 0;
6594+
6595+    capture->av_pix_fmt = AV_PIX_FMT_NONE;
6596+    s->output_drm = 0;
6597+
6598+    s->db_ctl = NULL;
6599+    if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) {
6600+        if (strcmp(priv->dmabuf_alloc, "cma") == 0)
6601+            s->db_ctl = dmabufs_ctl_new();
6602+        else {
6603+            av_log(avctx, AV_LOG_ERROR, "Unknown dmabuf alloc method: '%s'\n", priv->dmabuf_alloc);
6604+            return AVERROR(EINVAL);
6605+        }
6606+        if (!s->db_ctl) {
6607+            av_log(avctx, AV_LOG_ERROR, "Can't open dmabuf provider '%s'\n", priv->dmabuf_alloc);
6608+            return AVERROR(ENOMEM);
6609+        }
6610+    }
6611+
6612+    s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
6613+    if (!s->device_ref) {
6614+        ret = AVERROR(ENOMEM);
6615+        return ret;
6616+    }
6617+
6618+    ret = av_hwdevice_ctx_init(s->device_ref);
6619+    if (ret < 0)
6620+        return ret;
6621
6622     s->avctx = avctx;
6623     ret = ff_v4l2_m2m_codec_init(priv);
6624@@ -212,12 +1242,90 @@ static av_cold int v4l2_decode_init(AVCo
6625         return ret;
6626     }
6627
6628-    return v4l2_prepare_decoder(s);
6629+    if (avctx->extradata &&
6630+        (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) {
6631+        av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret));
6632+#if DUMP_FAILED_EXTRADATA
6633+        log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size);
6634+#endif
6635+        return ret;
6636+    }
6637+
6638+    if ((ret = get_quirks(avctx, s)) != 0)
6639+        return ret;
6640+
6641+    if ((ret = check_profile(avctx, s)) != 0) {
6642+        av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile);
6643+        return ret;
6644+    }
6645+
6646+    // Size check done as part of format filtering
6647+    if ((ret = choose_capture_format(avctx, s)) != 0)
6648+        return ret;
6649+
6650+    if ((ret = v4l2_prepare_decoder(s)) < 0)
6651+        return ret;
6652+
6653+    return 0;
6654 }
6655
6656 static av_cold int v4l2_decode_close(AVCodecContext *avctx)
6657 {
6658-    return ff_v4l2_m2m_codec_end(avctx->priv_data);
6659+    int rv;
6660+    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
6661+    rv = ff_v4l2_m2m_codec_end(avctx->priv_data);
6662+    av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv);
6663+    return rv;
6664+}
6665+
6666+static void v4l2_decode_flush(AVCodecContext *avctx)
6667+{
6668+    // An alternatve and more drastic form of flush is to simply do this:
6669+    //    v4l2_decode_close(avctx);
6670+    //    v4l2_decode_init(avctx);
6671+    // The downside is that this keeps a decoder open until all the frames
6672+    // associated with it have been returned.  This is a bit wasteful on
6673+    // possibly limited h/w resources and fails on a Pi for this reason unless
6674+    // more GPU mem is allocated than is the default.
6675+
6676+    V4L2m2mPriv * const priv = avctx->priv_data;
6677+    V4L2m2mContext * const s = priv->context;
6678+    V4L2Context * const output = &s->output;
6679+    V4L2Context * const capture = &s->capture;
6680+
6681+    av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon);
6682+
6683+    // Reflushing everything is benign, quick and avoids having to worry about
6684+    // states like EOS processing so don't try to optimize out (having got it
6685+    // wrong once)
6686+
6687+    ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF);
6688+
6689+    // Clear any buffered input packet
6690+    av_packet_unref(&s->buf_pkt);
6691+
6692+    // Clear a pending EOS
6693+    if (ff_v4l2_ctx_eos(capture)) {
6694+        // Arguably we could delay this but this is easy and doesn't require
6695+        // thought or extra vars
6696+        ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF);
6697+        ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
6698+    }
6699+
6700+    // V4L2 makes no guarantees about whether decoded frames are flushed or not
6701+    // so mark all frames we are tracking to be discarded if they appear
6702+    xlat_flush(&s->xlat);
6703+
6704+    // resend extradata
6705+    s->extdata_sent = 0;
6706+    // clear status vars
6707+    s->running = 0;
6708+    s->draining = 0;
6709+    output->done = 0;
6710+    capture->done = 0;
6711+
6712+    // Stream on will occur when we actually submit a new frame
6713+    av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__);
6714 }
6715
6716 #define OFFSET(x) offsetof(V4L2m2mPriv, x)
6717@@ -226,10 +1334,17 @@ static av_cold int v4l2_decode_close(AVC
6718 static const AVOption options[] = {
6719     V4L_M2M_DEFAULT_OPTS,
6720     { "num_capture_buffers", "Number of buffers in the capture context",
6721-        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 20, INT_MAX, FLAGS },
6722+        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS },
6723+    { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS },
6724+    { "dmabuf_alloc", "Dmabuf alloc method", OFFSET(dmabuf_alloc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS },
6725     { NULL},
6726 };
6727
6728+static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = {
6729+    HW_CONFIG_INTERNAL(DRM_PRIME),
6730+    NULL
6731+};
6732+
6733 #define M2MDEC_CLASS(NAME) \
6734     static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \
6735         .class_name = #NAME "_v4l2m2m_decoder", \
6736@@ -250,10 +1365,16 @@ static const AVOption options[] = {
6737         .init           = v4l2_decode_init, \
6738         FF_CODEC_RECEIVE_FRAME_CB(v4l2_receive_frame), \
6739         .close          = v4l2_decode_close, \
6740+        .flush          = v4l2_decode_flush, \
6741         .bsfs           = bsf_name, \
6742         .p.capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
6743         .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP, \
6744         .p.wrapper_name = "v4l2m2m", \
6745+        .p.pix_fmts     = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \
6746+                                                         AV_PIX_FMT_NV12, \
6747+                                                         AV_PIX_FMT_YUV420P, \
6748+                                                         AV_PIX_FMT_NONE}, \
6749+        .hw_configs     = v4l2_m2m_hw_configs, \
6750     }
6751
6752 M2MDEC(h264,  "H.264", AV_CODEC_ID_H264,       "h264_mp4toannexb");
6753--- a/libavcodec/v4l2_m2m_enc.c
6754+++ b/libavcodec/v4l2_m2m_enc.c
6755@@ -24,6 +24,8 @@
6756 #include <linux/videodev2.h>
6757 #include <sys/ioctl.h>
6758 #include <search.h>
6759+#include <drm_fourcc.h>
6760+
6761 #include "encode.h"
6762 #include "libavcodec/avcodec.h"
6763 #include "libavutil/pixdesc.h"
6764@@ -38,6 +40,34 @@
6765 #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x
6766 #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x
6767
6768+// P030 should be defined in drm_fourcc.h and hopefully will be sometime
6769+// in the future but until then...
6770+#ifndef DRM_FORMAT_P030
6771+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
6772+#endif
6773+
6774+#ifndef DRM_FORMAT_NV15
6775+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
6776+#endif
6777+
6778+#ifndef DRM_FORMAT_NV20
6779+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
6780+#endif
6781+
6782+#ifndef V4L2_CID_CODEC_BASE
6783+#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
6784+#endif
6785+
6786+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
6787+// in videodev2.h hopefully will be sometime in the future but until then...
6788+#ifndef V4L2_PIX_FMT_NV12_10_COL128
6789+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
6790+#endif
6791+
6792+#ifndef V4L2_PIX_FMT_NV12_COL128
6793+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
6794+#endif
6795+
6796 static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den)
6797 {
6798     struct v4l2_streamparm parm = { 0 };
6799@@ -148,15 +178,14 @@ static inline int v4l2_mpeg4_profile_fro
6800 static int v4l2_check_b_frame_support(V4L2m2mContext *s)
6801 {
6802     if (s->avctx->max_b_frames)
6803-        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n");
6804+        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames);
6805
6806-    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0);
6807+    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1);
6808     v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0);
6809     if (s->avctx->max_b_frames == 0)
6810         return 0;
6811
6812     avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding");
6813-
6814     return AVERROR_PATCHWELCOME;
6815 }
6816
6817@@ -271,17 +300,208 @@ static int v4l2_prepare_encoder(V4L2m2mC
6818     return 0;
6819 }
6820
6821+static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame)
6822+{
6823+    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
6824+
6825+    const uint32_t drm_fmt = src->layers[0].format;
6826+    // Treat INVALID as LINEAR
6827+    const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
6828+        DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
6829+    uint32_t pix_fmt = 0;
6830+    uint32_t w = 0;
6831+    uint32_t h = 0;
6832+    uint32_t bpl = src->layers[0].planes[0].pitch;
6833+
6834+    // We really don't expect multiple layers
6835+    // All formats that we currently cope with are single object
6836+
6837+    if (src->nb_layers != 1 || src->nb_objects != 1)
6838+        return AVERROR(EINVAL);
6839+
6840+    switch (drm_fmt) {
6841+        case DRM_FORMAT_YUV420:
6842+            if (mod == DRM_FORMAT_MOD_LINEAR) {
6843+                if (src->layers[0].nb_planes != 3)
6844+                    break;
6845+                pix_fmt = V4L2_PIX_FMT_YUV420;
6846+                h = src->layers[0].planes[1].offset / bpl;
6847+                w = bpl;
6848+            }
6849+            break;
6850+
6851+        case DRM_FORMAT_NV12:
6852+            if (mod == DRM_FORMAT_MOD_LINEAR) {
6853+                if (src->layers[0].nb_planes != 2)
6854+                    break;
6855+                pix_fmt = V4L2_PIX_FMT_NV12;
6856+                h = src->layers[0].planes[1].offset / bpl;
6857+                w = bpl;
6858+            }
6859+            else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
6860+                if (src->layers[0].nb_planes != 2)
6861+                    break;
6862+                pix_fmt = V4L2_PIX_FMT_NV12_COL128;
6863+                w = bpl;
6864+                h = src->layers[0].planes[1].offset / 128;
6865+                bpl = fourcc_mod_broadcom_param(mod);
6866+            }
6867+            break;
6868+
6869+        case DRM_FORMAT_P030:
6870+            if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
6871+                if (src->layers[0].nb_planes != 2)
6872+                    break;
6873+                pix_fmt =  V4L2_PIX_FMT_NV12_10_COL128;
6874+                w = bpl / 2;  // Matching lie to how we construct this
6875+                h = src->layers[0].planes[1].offset / 128;
6876+                bpl = fourcc_mod_broadcom_param(mod);
6877+            }
6878+            break;
6879+
6880+        default:
6881+            break;
6882+    }
6883+
6884+    if (!pix_fmt)
6885+        return AVERROR(EINVAL);
6886+
6887+    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
6888+        struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
6889+
6890+        pix->width = w;
6891+        pix->height = h;
6892+        pix->pixelformat = pix_fmt;
6893+        pix->plane_fmt[0].bytesperline = bpl;
6894+        pix->num_planes = 1;
6895+    }
6896+    else {
6897+        struct v4l2_pix_format *const pix = &format->fmt.pix;
6898+
6899+        pix->width = w;
6900+        pix->height = h;
6901+        pix->pixelformat = pix_fmt;
6902+        pix->bytesperline = bpl;
6903+    }
6904+
6905+    return 0;
6906+}
6907+
6908+// Do we have similar enough formats to be usable?
6909+static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b)
6910+{
6911+    if (a->type != b->type)
6912+        return 0;
6913+
6914+    if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) {
6915+        const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp;
6916+        const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp;
6917+        unsigned int i;
6918+        if (pa->pixelformat != pb->pixelformat ||
6919+            pa->num_planes != pb->num_planes)
6920+            return 0;
6921+        for (i = 0; i != pa->num_planes; ++i) {
6922+            if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline)
6923+                return 0;
6924+        }
6925+    }
6926+    else {
6927+        const struct v4l2_pix_format *const pa = &a->fmt.pix;
6928+        const struct v4l2_pix_format *const pb = &b->fmt.pix;
6929+        if (pa->pixelformat != pb->pixelformat ||
6930+            pa->bytesperline != pb->bytesperline)
6931+            return 0;
6932+    }
6933+    return 1;
6934+}
6935+
6936+static inline int q_full(const V4L2Context *const output)
6937+{
6938+    return ff_v4l2_context_q_count(output) == output->num_buffers;
6939+}
6940+
6941 static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
6942 {
6943     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
6944     V4L2Context *const output = &s->output;
6945+    int rv;
6946+    const int needs_slot = q_full(output);
6947+
6948+    av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot);
6949+
6950+    // Signal EOF if needed (doesn't need q slot)
6951+    if (!frame) {
6952+        av_log(avctx, AV_LOG_TRACE, "--- %s: EOS\n", __func__);
6953+        return ff_v4l2_context_enqueue_frame(output, frame);
6954+    }
6955+
6956+    if ((rv = ff_v4l2_dq_all(output, needs_slot? 500 : 0)) != 0) {
6957+        // We should be able to return AVERROR(EAGAIN) to indicate buffer
6958+        // exhaustion, but ffmpeg currently treats that as fatal.
6959+        av_log(avctx, AV_LOG_WARNING, "Failed to get buffer for src frame: %s\n", av_err2str(rv));
6960+        return rv;
6961+    }
6962+
6963+    if (s->input_drm && !output->streamon) {
6964+        struct v4l2_format req_format = {.type = output->format.type};
6965+
6966+        // Set format when we first get a buffer
6967+        if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) {
6968+            av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n");
6969+            return rv;
6970+        }
6971+
6972+        ff_v4l2_context_release(output);
6973+
6974+        output->format = req_format;
6975+
6976+        if ((rv = ff_v4l2_context_set_format(output)) != 0) {
6977+            av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n");
6978+            return rv;
6979+        }
6980+
6981+        if (!fmt_eq(&req_format, &output->format)) {
6982+            av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n");
6983+            return AVERROR(EINVAL);
6984+        }
6985+
6986+        output->selection.top = frame->crop_top;
6987+        output->selection.left = frame->crop_left;
6988+        output->selection.width = av_frame_cropped_width(frame);
6989+        output->selection.height = av_frame_cropped_height(frame);
6990+
6991+        if ((rv = ff_v4l2_context_init(output)) != 0) {
6992+            av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n");
6993+            return rv;
6994+        }
6995+
6996+        {
6997+            struct v4l2_selection selection = {
6998+                .type = V4L2_BUF_TYPE_VIDEO_OUTPUT,
6999+                .target = V4L2_SEL_TGT_CROP,
7000+                .r = output->selection
7001+            };
7002+            if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) {
7003+                av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n",
7004+                       selection.r.width, selection.r.height, selection.r.left, selection.r.top,
7005+                       av_err2str(AVERROR(errno)));
7006+            }
7007+            av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n",
7008+                   selection.r.width, selection.r.height, selection.r.left, selection.r.top);
7009+        }
7010+    }
7011
7012 #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME
7013-    if (frame && frame->pict_type == AV_PICTURE_TYPE_I)
7014+    if (frame->pict_type == AV_PICTURE_TYPE_I)
7015         v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1);
7016 #endif
7017
7018-    return ff_v4l2_context_enqueue_frame(output, frame);
7019+    rv = ff_v4l2_context_enqueue_frame(output, frame);
7020+    if (rv) {
7021+        av_log(avctx, AV_LOG_ERROR, "Enqueue frame failed: %s\n", av_err2str(rv));
7022+    }
7023+
7024+    return rv;
7025 }
7026
7027 static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
7028@@ -292,6 +512,11 @@ static int v4l2_receive_packet(AVCodecCo
7029     AVFrame *frame = s->frame;
7030     int ret;
7031
7032+    av_log(avctx, AV_LOG_TRACE, "<<< %s: qlen out %d cap %d\n", __func__,
7033+           ff_v4l2_context_q_count(output), ff_v4l2_context_q_count(capture));
7034+
7035+    ff_v4l2_dq_all(output, 0);
7036+
7037     if (s->draining)
7038         goto dequeue;
7039
7040@@ -328,7 +553,115 @@ static int v4l2_receive_packet(AVCodecCo
7041     }
7042
7043 dequeue:
7044-    return ff_v4l2_context_dequeue_packet(capture, avpkt);
7045+    // Dequeue a frame
7046+    for (;;) {
7047+        int t = q_full(output) ? -1 : s->draining ? 300 : 0;
7048+        int rv2;
7049+
7050+        // If output is full wait for either a packet or output to become not full
7051+        ret = ff_v4l2_context_dequeue_packet(capture, avpkt, t);
7052+
7053+        // If output was full retry packet dequeue
7054+        t = (ret != AVERROR(EAGAIN) || t != -1) ? 0 : 300;
7055+        rv2 = ff_v4l2_dq_all(output, t);
7056+        if (t == 0 || rv2 != 0)
7057+            break;
7058+    }
7059+    if (ret)
7060+        return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret;
7061+
7062+    if (capture->first_buf == 1) {
7063+        uint8_t * data;
7064+        const int len = avpkt->size;
7065+
7066+        // 1st buffer after streamon should be SPS/PPS
7067+        capture->first_buf = 2;
7068+
7069+        // Clear both possible stores so there is no chance of confusion
7070+        av_freep(&s->extdata_data);
7071+        s->extdata_size = 0;
7072+        av_freep(&avctx->extradata);
7073+        avctx->extradata_size = 0;
7074+
7075+        if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
7076+            goto fail_no_mem;
7077+
7078+        memcpy(data, avpkt->data, len);
7079+        av_packet_unref(avpkt);
7080+
7081+        // We need to copy the header, but keep local if not global
7082+        if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) {
7083+            avctx->extradata = data;
7084+            avctx->extradata_size = len;
7085+        }
7086+        else {
7087+            s->extdata_data = data;
7088+            s->extdata_size = len;
7089+        }
7090+
7091+        ret = ff_v4l2_context_dequeue_packet(capture, avpkt, 0);
7092+        ff_v4l2_dq_all(output, 0);
7093+        if (ret)
7094+            return ret;
7095+    }
7096+
7097+    // First frame must be key so mark as such even if encoder forgot
7098+    if (capture->first_buf == 2) {
7099+        avpkt->flags |= AV_PKT_FLAG_KEY;
7100+
7101+        // Add any extradata to the 1st packet we emit as we cannot create it at init
7102+        if (avctx->extradata_size > 0 && avctx->extradata) {
7103+            void * const side = av_packet_new_side_data(avpkt,
7104+                                           AV_PKT_DATA_NEW_EXTRADATA,
7105+                                           avctx->extradata_size);
7106+            if (!side)
7107+                goto fail_no_mem;
7108+
7109+            memcpy(side, avctx->extradata, avctx->extradata_size);
7110+        }
7111+    }
7112+
7113+    // Add SPS/PPS to the start of every key frame if non-global headers
7114+    if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) {
7115+        const size_t newlen = s->extdata_size + avpkt->size;
7116+        AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE);
7117+
7118+        if (buf == NULL)
7119+            goto fail_no_mem;
7120+
7121+        memcpy(buf->data, s->extdata_data, s->extdata_size);
7122+        memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size);
7123+
7124+        av_buffer_unref(&avpkt->buf);
7125+        avpkt->buf = buf;
7126+        avpkt->data = buf->data;
7127+        avpkt->size = newlen;
7128+    }
7129+    else if (ff_v4l2_context_q_count(capture) < 2) {
7130+        // Avoid running out of capture buffers
7131+        // In most cases the buffers will be returned quickly in which case
7132+        // we don't copy and can use the v4l2 buffers directly but sometimes
7133+        // ffmpeg seems to hold onto all of them for a long time (.mkv
7134+        // creation?) so avoid deadlock in those cases.
7135+        AVBufferRef * const buf = av_buffer_alloc(avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
7136+        if (buf == NULL)
7137+            goto fail_no_mem;
7138+
7139+        memcpy(buf->data, avpkt->data, avpkt->size);
7140+        av_buffer_unref(&avpkt->buf);  // Will recycle the V4L2 buffer
7141+
7142+        avpkt->buf = buf;
7143+        avpkt->data = buf->data;
7144+    }
7145+
7146+    capture->first_buf = 0;
7147+    return 0;
7148+
7149+fail_no_mem:
7150+    av_log(avctx, AV_LOG_ERROR, "Rx pkt failed: No memory\n");
7151+    ret = AVERROR(ENOMEM);
7152+    av_packet_unref(avpkt);
7153+    return ret;
7154 }
7155
7156 static av_cold int v4l2_encode_init(AVCodecContext *avctx)
7157@@ -340,6 +673,8 @@ static av_cold int v4l2_encode_init(AVCo
7158     uint32_t v4l2_fmt_output;
7159     int ret;
7160
7161+    av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt);
7162+
7163     ret = ff_v4l2_m2m_create_context(priv, &s);
7164     if (ret < 0)
7165         return ret;
7166@@ -347,13 +682,17 @@ static av_cold int v4l2_encode_init(AVCo
7167     capture = &s->capture;
7168     output  = &s->output;
7169
7170+    s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME);
7171+
7172     /* common settings output/capture */
7173     output->height = capture->height = avctx->height;
7174     output->width = capture->width = avctx->width;
7175
7176     /* output context */
7177     output->av_codec_id = AV_CODEC_ID_RAWVIDEO;
7178-    output->av_pix_fmt = avctx->pix_fmt;
7179+    output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt :
7180+            avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt :
7181+            AV_PIX_FMT_YUV420P;
7182
7183     /* capture context */
7184     capture->av_codec_id = avctx->codec_id;
7185@@ -372,7 +711,7 @@ static av_cold int v4l2_encode_init(AVCo
7186         v4l2_fmt_output = output->format.fmt.pix.pixelformat;
7187
7188     pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO);
7189-    if (pix_fmt_output != avctx->pix_fmt) {
7190+    if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) {
7191         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output);
7192         av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name);
7193         return AVERROR(EINVAL);
7194@@ -390,9 +729,10 @@ static av_cold int v4l2_encode_close(AVC
7195 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
7196
7197 #define V4L_M2M_CAPTURE_OPTS \
7198-    V4L_M2M_DEFAULT_OPTS,\
7199+    { "num_output_buffers", "Number of buffers in the output context",\
7200+        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\
7201     { "num_capture_buffers", "Number of buffers in the capture context", \
7202-        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS }
7203+        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS }
7204
7205 static const AVOption mpeg4_options[] = {
7206     V4L_M2M_CAPTURE_OPTS,
7207--- /dev/null
7208+++ b/libavcodec/v4l2_req_decode_q.c
7209@@ -0,0 +1,84 @@
7210+#include <memory.h>
7211+#include <semaphore.h>
7212+#include <pthread.h>
7213+
7214+#include "v4l2_req_decode_q.h"
7215+
7216+int decode_q_in_q(const req_decode_ent * const d)
7217+{
7218+    return d->in_q;
7219+}
7220+
7221+void decode_q_add(req_decode_q * const q, req_decode_ent * const d)
7222+{
7223+    pthread_mutex_lock(&q->q_lock);
7224+    if (!q->head) {
7225+        q->head = d;
7226+        q->tail = d;
7227+        d->prev = NULL;
7228+    }
7229+    else {
7230+        q->tail->next = d;
7231+        d->prev = q->tail;
7232+        q->tail = d;
7233+    }
7234+    d->next = NULL;
7235+    d->in_q = 1;
7236+    pthread_mutex_unlock(&q->q_lock);
7237+}
7238+
7239+// Remove entry from Q - if head wake-up anything that was waiting
7240+void decode_q_remove(req_decode_q * const q, req_decode_ent * const d)
7241+{
7242+    int try_signal = 0;
7243+
7244+    if (!d->in_q)
7245+        return;
7246+
7247+    pthread_mutex_lock(&q->q_lock);
7248+    if (d->prev)
7249+        d->prev->next = d->next;
7250+    else {
7251+        try_signal = 1;  // Only need to signal if we were head
7252+        q->head = d->next;
7253+    }
7254+
7255+    if (d->next)
7256+        d->next->prev = d->prev;
7257+    else
7258+        q->tail = d->prev;
7259+
7260+    // Not strictly needed but makes debug easier
7261+    d->next = NULL;
7262+    d->prev = NULL;
7263+    d->in_q = 0;
7264+    pthread_mutex_unlock(&q->q_lock);
7265+
7266+    if (try_signal)
7267+        pthread_cond_broadcast(&q->q_cond);
7268+}
7269+
7270+void decode_q_wait(req_decode_q * const q, req_decode_ent * const d)
7271+{
7272+    pthread_mutex_lock(&q->q_lock);
7273+
7274+    while (q->head != d)
7275+        pthread_cond_wait(&q->q_cond, &q->q_lock);
7276+
7277+    pthread_mutex_unlock(&q->q_lock);
7278+}
7279+
7280+void decode_q_uninit(req_decode_q * const q)
7281+{
7282+    pthread_mutex_destroy(&q->q_lock);
7283+    pthread_cond_destroy(&q->q_cond);
7284+}
7285+
7286+void decode_q_init(req_decode_q * const q)
7287+{
7288+    memset(q, 0, sizeof(*q));
7289+    pthread_mutex_init(&q->q_lock, NULL);
7290+    pthread_cond_init(&q->q_cond, NULL);
7291+}
7292+
7293+
7294--- /dev/null
7295+++ b/libavcodec/v4l2_req_decode_q.h
7296@@ -0,0 +1,27 @@
7297+#ifndef AVCODEC_V4L2_REQ_DECODE_Q_H
7298+#define AVCODEC_V4L2_REQ_DECODE_Q_H
7299+
7300+#include <pthread.h>
7301+
7302+typedef struct req_decode_ent {
7303+    struct req_decode_ent * next;
7304+    struct req_decode_ent * prev;
7305+    int in_q;
7306+} req_decode_ent;
7307+
7308+typedef struct req_decode_q {
7309+    pthread_mutex_t q_lock;
7310+    pthread_cond_t q_cond;
7311+    req_decode_ent * head;
7312+    req_decode_ent * tail;
7313+} req_decode_q;
7314+
7315+int decode_q_in_q(const req_decode_ent * const d);
7316+void decode_q_add(req_decode_q * const q, req_decode_ent * const d);
7317+void decode_q_remove(req_decode_q * const q, req_decode_ent * const d);
7318+void decode_q_wait(req_decode_q * const q, req_decode_ent * const d);
7319+void decode_q_uninit(req_decode_q * const q);
7320+void decode_q_init(req_decode_q * const q);
7321+
7322+#endif
7323+
7324--- /dev/null
7325+++ b/libavcodec/v4l2_req_devscan.c
7326@@ -0,0 +1,451 @@
7327+#include <errno.h>
7328+#include <fcntl.h>
7329+#include <libudev.h>
7330+#include <stdlib.h>
7331+#include <string.h>
7332+#include <unistd.h>
7333+
7334+#include <sys/ioctl.h>
7335+#include <sys/sysmacros.h>
7336+
7337+#include <linux/media.h>
7338+#include <linux/videodev2.h>
7339+
7340+#include "v4l2_req_devscan.h"
7341+#include "v4l2_req_utils.h"
7342+
7343+struct decdev {
7344+    enum v4l2_buf_type src_type;
7345+    uint32_t src_fmt_v4l2;
7346+    const char * vname;
7347+    const char * mname;
7348+};
7349+
7350+struct devscan {
7351+    struct decdev env;
7352+    unsigned int dev_size;
7353+    unsigned int dev_count;
7354+    struct decdev *devs;
7355+};
7356+
7357+static int video_src_pixfmt_supported(uint32_t fmt)
7358+{
7359+    return 1;
7360+}
7361+
7362+static void v4l2_setup_format(struct v4l2_format *format, unsigned int type,
7363+                  unsigned int width, unsigned int height,
7364+                  unsigned int pixelformat)
7365+{
7366+    unsigned int sizeimage;
7367+
7368+    memset(format, 0, sizeof(*format));
7369+    format->type = type;
7370+
7371+    sizeimage = V4L2_TYPE_IS_OUTPUT(type) ? 4 * 1024 * 1024 : 0;
7372+
7373+    if (V4L2_TYPE_IS_MULTIPLANAR(type)) {
7374+        format->fmt.pix_mp.width = width;
7375+        format->fmt.pix_mp.height = height;
7376+        format->fmt.pix_mp.plane_fmt[0].sizeimage = sizeimage;
7377+        format->fmt.pix_mp.pixelformat = pixelformat;
7378+    } else {
7379+        format->fmt.pix.width = width;
7380+        format->fmt.pix.height = height;
7381+        format->fmt.pix.sizeimage = sizeimage;
7382+        format->fmt.pix.pixelformat = pixelformat;
7383+    }
7384+}
7385+
7386+static int v4l2_set_format(int video_fd, unsigned int type, unsigned int pixelformat,
7387+            unsigned int width, unsigned int height)
7388+{
7389+    struct v4l2_format format;
7390+
7391+    v4l2_setup_format(&format, type, width, height, pixelformat);
7392+
7393+    return ioctl(video_fd, VIDIOC_S_FMT, &format) ? -errno : 0;
7394+}
7395+
7396+static int v4l2_query_capabilities(int video_fd, unsigned int *capabilities)
7397+{
7398+    struct v4l2_capability capability = { 0 };
7399+    int rc;
7400+
7401+    rc = ioctl(video_fd, VIDIOC_QUERYCAP, &capability);
7402+    if (rc < 0)
7403+        return -errno;
7404+
7405+    if (capabilities != NULL) {
7406+        if ((capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0)
7407+            *capabilities = capability.device_caps;
7408+        else
7409+            *capabilities = capability.capabilities;
7410+    }
7411+
7412+    return 0;
7413+}
7414+
7415+static int devscan_add(struct devscan *const scan,
7416+                       enum v4l2_buf_type src_type,
7417+                       uint32_t src_fmt_v4l2,
7418+                       const char * vname,
7419+                       const char * mname)
7420+{
7421+    struct decdev *d;
7422+
7423+    if (scan->dev_size <= scan->dev_count) {
7424+        unsigned int n = !scan->dev_size ? 4 : scan->dev_size * 2;
7425+        d = realloc(scan->devs, n * sizeof(*d));
7426+        if (!d)
7427+            return -ENOMEM;
7428+        scan->devs = d;
7429+        scan->dev_size = n;
7430+    }
7431+
7432+    d = scan->devs + scan->dev_count;
7433+    d->src_type = src_type;
7434+    d->src_fmt_v4l2 = src_fmt_v4l2;
7435+    d->vname = strdup(vname);
7436+    if (!d->vname)
7437+        return -ENOMEM;
7438+    d->mname = strdup(mname);
7439+    if (!d->mname) {
7440+        free((char *)d->vname);
7441+        return -ENOMEM;
7442+    }
7443+    ++scan->dev_count;
7444+    return 0;
7445+}
7446+
7447+void devscan_delete(struct devscan **const pScan)
7448+{
7449+    unsigned int i;
7450+    struct devscan * const scan = *pScan;
7451+
7452+    if (!scan)
7453+        return;
7454+    *pScan = NULL;
7455+
7456+    for (i = 0; i < scan->dev_count; ++i) {
7457+        free((char*)scan->devs[i].mname);
7458+        free((char*)scan->devs[i].vname);
7459+    }
7460+    free(scan->devs);
7461+    free(scan);
7462+}
7463+
7464+#define REQ_BUF_CAPS (\
7465+    V4L2_BUF_CAP_SUPPORTS_DMABUF |\
7466+    V4L2_BUF_CAP_SUPPORTS_REQUESTS |\
7467+    V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF)
7468+
7469+static void probe_formats(void * const dc,
7470+              struct devscan *const scan,
7471+              const int fd,
7472+              const unsigned int type_v4l2,
7473+              const char *const mpath,
7474+              const char *const vpath)
7475+{
7476+    unsigned int i;
7477+    for (i = 0;; ++i) {
7478+        struct v4l2_fmtdesc fmtdesc = {
7479+            .index = i,
7480+            .type = type_v4l2
7481+        };
7482+        struct v4l2_requestbuffers rbufs = {
7483+            .count = 0,
7484+            .type = type_v4l2,
7485+            .memory = V4L2_MEMORY_MMAP
7486+        };
7487+        while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
7488+            if (errno == EINTR)
7489+                continue;
7490+            if (errno != EINVAL)
7491+                request_err(dc, "Enum[%d] failed for type=%d\n", i, type_v4l2);
7492+            return;
7493+        }
7494+        if (!video_src_pixfmt_supported(fmtdesc.pixelformat))
7495+            continue;
7496+
7497+        if (v4l2_set_format(fd, type_v4l2, fmtdesc.pixelformat, 720, 480)) {
7498+            request_debug(dc, "Set failed for type=%d, pf=%.4s\n", type_v4l2, (char*)&fmtdesc.pixelformat);
7499+            continue;
7500+        }
7501+
7502+        while (ioctl(fd, VIDIOC_REQBUFS, &rbufs)) {
7503+            if (errno != EINTR) {
7504+                request_debug(dc, "%s: Reqbufs failed\n", vpath);
7505+                continue;
7506+            }
7507+        }
7508+
7509+        if ((rbufs.capabilities & REQ_BUF_CAPS) != REQ_BUF_CAPS) {
7510+            request_debug(dc, "%s: Buf caps %#x insufficient\n", vpath, rbufs.capabilities);
7511+            continue;
7512+        }
7513+
7514+        request_debug(dc, "Adding: %s,%s pix=%#x, type=%d\n",
7515+                 mpath, vpath, fmtdesc.pixelformat, type_v4l2);
7516+        devscan_add(scan, type_v4l2, fmtdesc.pixelformat, vpath, mpath);
7517+    }
7518+}
7519+
7520+
7521+static int probe_video_device(void * const dc,
7522+                   struct udev_device *const device,
7523+                   struct devscan *const scan,
7524+                   const char *const mpath)
7525+{
7526+    int ret;
7527+    unsigned int capabilities = 0;
7528+    int video_fd = -1;
7529+
7530+    const char *path = udev_device_get_devnode(device);
7531+    if (!path) {
7532+        request_err(dc, "%s: get video device devnode failed\n", __func__);
7533+        ret = -EINVAL;
7534+        goto fail;
7535+    }
7536+
7537+    video_fd = open(path, O_RDWR, 0);
7538+    if (video_fd == -1) {
7539+        ret = -errno;
7540+        request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(errno), errno);
7541+        goto fail;
7542+    }
7543+
7544+    ret = v4l2_query_capabilities(video_fd, &capabilities);
7545+    if (ret < 0) {
7546+        request_err(dc, "%s: get video capability failed, %s (%d)\n", __func__, strerror(-ret), -ret);
7547+        goto fail;
7548+    }
7549+
7550+    request_debug(dc, "%s: path=%s capabilities=%#x\n", __func__, path, capabilities);
7551+
7552+    if (!(capabilities & V4L2_CAP_STREAMING)) {
7553+        request_debug(dc, "%s: missing required streaming capability\n", __func__);
7554+        ret = -EINVAL;
7555+        goto fail;
7556+    }
7557+
7558+    if (!(capabilities & (V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M))) {
7559+        request_debug(dc, "%s: missing required mem2mem capability\n", __func__);
7560+        ret = -EINVAL;
7561+        goto fail;
7562+    }
7563+
7564+    /* Should check capture formats too... */
7565+    if ((capabilities & V4L2_CAP_VIDEO_M2M) != 0)
7566+        probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT, mpath, path);
7567+    if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) != 0)
7568+        probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, mpath, path);
7569+
7570+    close(video_fd);
7571+    return 0;
7572+
7573+fail:
7574+    if (video_fd >= 0)
7575+        close(video_fd);
7576+    return ret;
7577+}
7578+
7579+static int probe_media_device(void * const dc,
7580+                   struct udev_device *const device,
7581+                   struct devscan *const scan)
7582+{
7583+    int ret;
7584+    int rv;
7585+    struct media_device_info device_info = { 0 };
7586+    struct media_v2_topology topology = { 0 };
7587+    struct media_v2_interface *interfaces = NULL;
7588+    struct udev *udev = udev_device_get_udev(device);
7589+    struct udev_device *video_device;
7590+    dev_t devnum;
7591+    int media_fd = -1;
7592+
7593+    const char *path = udev_device_get_devnode(device);
7594+    if (!path) {
7595+        request_err(dc, "%s: get media device devnode failed\n", __func__);
7596+        ret = -EINVAL;
7597+        goto fail;
7598+    }
7599+
7600+    media_fd = open(path, O_RDWR, 0);
7601+    if (media_fd < 0) {
7602+        ret = -errno;
7603+        request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(-ret), -ret);
7604+        goto fail;
7605+    }
7606+
7607+    rv = ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &device_info);
7608+    if (rv < 0) {
7609+        ret = -errno;
7610+        request_err(dc, "%s: get media device info failed, %s (%d)\n", __func__, strerror(-ret), -ret);
7611+        goto fail;
7612+    }
7613+
7614+    rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
7615+    if (rv < 0) {
7616+        ret = -errno;
7617+        request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
7618+        goto fail;
7619+    }
7620+
7621+    if (topology.num_interfaces <= 0) {
7622+        request_err(dc, "%s: media device has no interfaces\n", __func__);
7623+        ret = -EINVAL;
7624+        goto fail;
7625+    }
7626+
7627+    interfaces = calloc(topology.num_interfaces, sizeof(*interfaces));
7628+    if (!interfaces) {
7629+        request_err(dc, "%s: allocating media interface struct failed\n", __func__);
7630+        ret = -ENOMEM;
7631+        goto fail;
7632+    }
7633+
7634+    topology.ptr_interfaces = (__u64)(uintptr_t)interfaces;
7635+    rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
7636+    if (rv < 0) {
7637+        ret = -errno;
7638+        request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
7639+        goto fail;
7640+    }
7641+
7642+    for (int i = 0; i < topology.num_interfaces; i++) {
7643+        if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO)
7644+            continue;
7645+
7646+        devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor);
7647+        video_device = udev_device_new_from_devnum(udev, 'c', devnum);
7648+        if (!video_device) {
7649+            ret = -errno;
7650+            request_err(dc, "%s: video_device[%d]=%p\n", __func__, i, video_device);
7651+            continue;
7652+        }
7653+
7654+        ret = probe_video_device(dc, video_device, scan, path);
7655+        udev_device_unref(video_device);
7656+
7657+        if (ret != 0)
7658+            goto fail;
7659+    }
7660+
7661+fail:
7662+    free(interfaces);
7663+    if (media_fd != -1)
7664+        close(media_fd);
7665+    return ret;
7666+}
7667+
7668+const char *decdev_media_path(const struct decdev *const dev)
7669+{
7670+    return !dev ? NULL : dev->mname;
7671+}
7672+
7673+const char *decdev_video_path(const struct decdev *const dev)
7674+{
7675+    return !dev ? NULL : dev->vname;
7676+}
7677+
7678+enum v4l2_buf_type decdev_src_type(const struct decdev *const dev)
7679+{
7680+    return !dev ? 0 : dev->src_type;
7681+}
7682+
7683+uint32_t decdev_src_pixelformat(const struct decdev *const dev)
7684+{
7685+    return !dev ? 0 : dev->src_fmt_v4l2;
7686+}
7687+
7688+
7689+const struct decdev *devscan_find(struct devscan *const scan,
7690+                  const uint32_t src_fmt_v4l2)
7691+{
7692+    unsigned int i;
7693+
7694+    if (scan->env.mname && scan->env.vname)
7695+        return &scan->env;
7696+
7697+    if (!src_fmt_v4l2)
7698+        return scan->dev_count ? scan->devs + 0 : NULL;
7699+
7700+    for (i = 0; i != scan->dev_count; ++i) {
7701+        if (scan->devs[i].src_fmt_v4l2 == src_fmt_v4l2)
7702+            return scan->devs + i;
7703+    }
7704+    return NULL;
7705+}
7706+
7707+int devscan_build(void * const dc, struct devscan **pscan)
7708+{
7709+    int ret;
7710+    struct udev *udev;
7711+    struct udev_enumerate *enumerate;
7712+    struct udev_list_entry *devices;
7713+    struct udev_list_entry *entry;
7714+    struct udev_device *device;
7715+    struct devscan * scan;
7716+
7717+    *pscan = NULL;
7718+
7719+    scan = calloc(1, sizeof(*scan));
7720+    if (!scan) {
7721+        ret = -ENOMEM;
7722+        goto fail;
7723+    }
7724+
7725+    scan->env.mname = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH");
7726+    scan->env.vname = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH");
7727+    if (scan->env.mname && scan->env.vname) {
7728+        request_info(dc, "Media/video device env overrides found: %s,%s\n",
7729+                 scan->env.mname, scan->env.vname);
7730+        *pscan = scan;
7731+        return 0;
7732+    }
7733+
7734+    udev = udev_new();
7735+    if (!udev) {
7736+        request_err(dc, "%s: allocating udev context failed\n", __func__);
7737+        ret = -ENOMEM;
7738+        goto fail;
7739+    }
7740+
7741+    enumerate = udev_enumerate_new(udev);
7742+    if (!enumerate) {
7743+        request_err(dc, "%s: allocating udev enumerator failed\n", __func__);
7744+        ret = -ENOMEM;
7745+        goto fail;
7746+    }
7747+
7748+    udev_enumerate_add_match_subsystem(enumerate, "media");
7749+    udev_enumerate_scan_devices(enumerate);
7750+
7751+    devices = udev_enumerate_get_list_entry(enumerate);
7752+    udev_list_entry_foreach(entry, devices) {
7753+        const char *path = udev_list_entry_get_name(entry);
7754+        if (!path)
7755+            continue;
7756+
7757+        device = udev_device_new_from_syspath(udev, path);
7758+        if (!device)
7759+            continue;
7760+
7761+        probe_media_device(dc, device, scan);
7762+        udev_device_unref(device);
7763+    }
7764+
7765+    udev_enumerate_unref(enumerate);
7766+    udev_unref(udev);
7767+
7768+    *pscan = scan;
7769+    return 0;
7770+
7771+fail:
7772+    if (udev)
7773+        udev_unref(udev);
7774+    devscan_delete(&scan);
7775+    return ret;
7776+}
7777+
7778--- /dev/null
7779+++ b/libavcodec/v4l2_req_devscan.h
7780@@ -0,0 +1,23 @@
7781+#ifndef _DEVSCAN_H_
7782+#define _DEVSCAN_H_
7783+
7784+#include <stdint.h>
7785+
7786+struct devscan;
7787+struct decdev;
7788+enum v4l2_buf_type;
7789+
7790+/* These return pointers to data in the devscan structure and so are vaild
7791+ * for the lifetime of that
7792+ */
7793+const char *decdev_media_path(const struct decdev *const dev);
7794+const char *decdev_video_path(const struct decdev *const dev);
7795+enum v4l2_buf_type decdev_src_type(const struct decdev *const dev);
7796+uint32_t decdev_src_pixelformat(const struct decdev *const dev);
7797+
7798+const struct decdev *devscan_find(struct devscan *const scan, const uint32_t src_fmt_v4l2);
7799+
7800+int devscan_build(void * const dc, struct devscan **pscan);
7801+void devscan_delete(struct devscan **const pScan);
7802+
7803+#endif
7804--- /dev/null
7805+++ b/libavcodec/v4l2_req_dmabufs.c
7806@@ -0,0 +1,409 @@
7807+#include <stdatomic.h>
7808+#include <stdio.h>
7809+#include <stdlib.h>
7810+#include <unistd.h>
7811+#include <inttypes.h>
7812+#include <fcntl.h>
7813+#include <errno.h>
7814+#include <string.h>
7815+#include <sys/ioctl.h>
7816+#include <sys/mman.h>
7817+#include <linux/mman.h>
7818+#include <linux/dma-buf.h>
7819+#include <linux/dma-heap.h>
7820+
7821+#include "v4l2_req_dmabufs.h"
7822+#include "v4l2_req_utils.h"
7823+
7824+#define TRACE_ALLOC 0
7825+
7826+#ifndef __O_CLOEXEC
7827+#define __O_CLOEXEC 0
7828+#endif
7829+
7830+struct dmabufs_ctl;
7831+struct dmabuf_h;
7832+
7833+struct dmabuf_fns {
7834+    int (*buf_alloc)(struct dmabufs_ctl * dbsc, struct dmabuf_h * dh, size_t size);
7835+    void (*buf_free)(struct dmabuf_h * dh);
7836+    int (*ctl_new)(struct dmabufs_ctl * dbsc);
7837+    void (*ctl_free)(struct dmabufs_ctl * dbsc);
7838+};
7839+
7840+struct dmabufs_ctl {
7841+    atomic_int ref_count;
7842+    int fd;
7843+    size_t page_size;
7844+    void * v;
7845+    const struct dmabuf_fns * fns;
7846+};
7847+
7848+struct dmabuf_h {
7849+    int fd;
7850+    size_t size;
7851+    size_t len;
7852+    void * mapptr;
7853+    void * v;
7854+    const struct dmabuf_fns * fns;
7855+};
7856+
7857+#if TRACE_ALLOC
7858+static unsigned int total_bufs = 0;
7859+static size_t total_size = 0;
7860+#endif
7861+
7862+struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size)
7863+{
7864+    struct dmabuf_h *dh;
7865+
7866+    if (mapptr == MAP_FAILED)
7867+        return NULL;
7868+
7869+    dh = malloc(sizeof(*dh));
7870+    if (!dh)
7871+        return NULL;
7872+
7873+    *dh = (struct dmabuf_h) {
7874+        .fd = -1,
7875+        .size = size,
7876+        .mapptr = mapptr
7877+    };
7878+
7879+    return dh;
7880+}
7881+
7882+struct dmabuf_h * dmabuf_import(int fd, size_t size)
7883+{
7884+    struct dmabuf_h *dh;
7885+
7886+    fd = dup(fd);
7887+    if (fd < 0  || size == 0)
7888+        return NULL;
7889+
7890+    dh = malloc(sizeof(*dh));
7891+    if (!dh) {
7892+        close(fd);
7893+        return NULL;
7894+    }
7895+
7896+    *dh = (struct dmabuf_h) {
7897+        .fd = fd,
7898+        .size = size,
7899+        .mapptr = MAP_FAILED
7900+    };
7901+
7902+#if TRACE_ALLOC
7903+    ++total_bufs;
7904+    total_size += dh->size;
7905+    request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
7906+#endif
7907+
7908+    return dh;
7909+}
7910+
7911+struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size)
7912+{
7913+    struct dmabuf_h * dh;
7914+    if (old != NULL) {
7915+        if (old->size >= size) {
7916+            return old;
7917+        }
7918+        dmabuf_free(old);
7919+    }
7920+
7921+    if (size == 0 ||
7922+        (dh = malloc(sizeof(*dh))) == NULL)
7923+        return NULL;
7924+
7925+    *dh = (struct dmabuf_h){
7926+        .fd = -1,
7927+        .mapptr = MAP_FAILED,
7928+        .fns = dbsc->fns
7929+    };
7930+
7931+    if (dh->fns->buf_alloc(dbsc, dh, size) != 0)
7932+        goto fail;
7933+
7934+
7935+#if TRACE_ALLOC
7936+    ++total_bufs;
7937+    total_size += dh->size;
7938+    request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
7939+#endif
7940+
7941+    return dh;
7942+
7943+fail:
7944+    free(dh);
7945+    return NULL;
7946+}
7947+
7948+int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags)
7949+{
7950+    struct dma_buf_sync sync = {
7951+        .flags = flags
7952+    };
7953+    if (dh->fd == -1)
7954+        return 0;
7955+    while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) {
7956+        const int err = errno;
7957+        if (errno == EINTR)
7958+            continue;
7959+        request_log("%s: ioctl failed: flags=%#x\n", __func__, flags);
7960+        return -err;
7961+    }
7962+    return 0;
7963+}
7964+
7965+int dmabuf_write_start(struct dmabuf_h * const dh)
7966+{
7967+    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
7968+}
7969+
7970+int dmabuf_write_end(struct dmabuf_h * const dh)
7971+{
7972+    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
7973+}
7974+
7975+int dmabuf_read_start(struct dmabuf_h * const dh)
7976+{
7977+    if (!dmabuf_map(dh))
7978+        return -1;
7979+    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
7980+}
7981+
7982+int dmabuf_read_end(struct dmabuf_h * const dh)
7983+{
7984+    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
7985+}
7986+
7987+
7988+void * dmabuf_map(struct dmabuf_h * const dh)
7989+{
7990+    if (!dh)
7991+        return NULL;
7992+    if (dh->mapptr != MAP_FAILED)
7993+        return dh->mapptr;
7994+    dh->mapptr = mmap(NULL, dh->size,
7995+              PROT_READ | PROT_WRITE,
7996+              MAP_SHARED | MAP_POPULATE,
7997+              dh->fd, 0);
7998+    if (dh->mapptr == MAP_FAILED) {
7999+        request_log("%s: Map failed\n", __func__);
8000+        return NULL;
8001+    }
8002+    return dh->mapptr;
8003+}
8004+
8005+int dmabuf_fd(const struct dmabuf_h * const dh)
8006+{
8007+    if (!dh)
8008+        return -1;
8009+    return dh->fd;
8010+}
8011+
8012+size_t dmabuf_size(const struct dmabuf_h * const dh)
8013+{
8014+    if (!dh)
8015+        return 0;
8016+    return dh->size;
8017+}
8018+
8019+size_t dmabuf_len(const struct dmabuf_h * const dh)
8020+{
8021+    if (!dh)
8022+        return 0;
8023+    return dh->len;
8024+}
8025+
8026+void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len)
8027+{
8028+    dh->len = len;
8029+}
8030+
8031+void dmabuf_free(struct dmabuf_h * dh)
8032+{
8033+    if (!dh)
8034+        return;
8035+
8036+#if TRACE_ALLOC
8037+    --total_bufs;
8038+    total_size -= dh->size;
8039+    request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
8040+#endif
8041+
8042+    if (dh->fns != NULL && dh->fns->buf_free)
8043+        dh->fns->buf_free(dh);
8044+
8045+    if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL)
8046+        munmap(dh->mapptr, dh->size);
8047+    if (dh->fd != -1)
8048+        while (close(dh->fd) == -1 && errno == EINTR)
8049+            /* loop */;
8050+    free(dh);
8051+}
8052+
8053+static struct dmabufs_ctl * dmabufs_ctl_new2(const struct dmabuf_fns * const fns)
8054+{
8055+    struct dmabufs_ctl * dbsc = calloc(1, sizeof(*dbsc));
8056+
8057+    if (!dbsc)
8058+        return NULL;
8059+
8060+    dbsc->fd = -1;
8061+    dbsc->fns = fns;
8062+    dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE);
8063+
8064+    if (fns->ctl_new(dbsc) != 0)
8065+        goto fail;
8066+
8067+    return dbsc;
8068+
8069+fail:
8070+    free(dbsc);
8071+    return NULL;
8072+}
8073+
8074+static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc)
8075+{
8076+    request_debug(NULL, "Free dmabuf ctl\n");
8077+
8078+    dbsc->fns->ctl_free(dbsc);
8079+
8080+    free(dbsc);
8081+}
8082+
8083+void dmabufs_ctl_unref(struct dmabufs_ctl ** const pDbsc)
8084+{
8085+    struct dmabufs_ctl * const dbsc = *pDbsc;
8086+
8087+    if (!dbsc)
8088+        return;
8089+    *pDbsc = NULL;
8090+
8091+    if (atomic_fetch_sub(&dbsc->ref_count, 1) != 0)
8092+        return;
8093+
8094+    dmabufs_ctl_free(dbsc);
8095+}
8096+
8097+struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc)
8098+{
8099+    atomic_fetch_add(&dbsc->ref_count, 1);
8100+    return dbsc;
8101+}
8102+
8103+//-----------------------------------------------------------------------------
8104+//
8105+// Alloc dmabuf via CMA
8106+
8107+static int ctl_cma_new2(struct dmabufs_ctl * dbsc, const char * const * names)
8108+{
8109+    for (; *names != NULL; ++names)
8110+    {
8111+        while ((dbsc->fd = open(*names, O_RDWR | __O_CLOEXEC)) == -1 &&
8112+               errno == EINTR)
8113+            /* Loop */;
8114+        if (dbsc->fd != -1)
8115+        {
8116+            request_debug(NULL, "%s: Using dma_heap device %s\n", __func__, *names);
8117+            return 0;
8118+        }
8119+        request_debug(NULL, "%s: Not using dma_heap device %s: %s\n", __func__, *names, strerror(errno));
8120+    }
8121+    request_log("Unable to open any dma_heap device\n");
8122+    return -1;
8123+}
8124+
8125+static int ctl_cma_new(struct dmabufs_ctl * dbsc)
8126+{
8127+    static const char * const names[] = {
8128+        "/dev/dma_heap/linux,cma",
8129+        "/dev/dma_heap/reserved",
8130+        NULL
8131+    };
8132+
8133+    return ctl_cma_new2(dbsc, names);
8134+}
8135+
8136+static void ctl_cma_free(struct dmabufs_ctl * dbsc)
8137+{
8138+    if (dbsc->fd != -1)
8139+        while (close(dbsc->fd) == -1 && errno == EINTR)
8140+            /* loop */;
8141+}
8142+
8143+static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size)
8144+{
8145+    struct dma_heap_allocation_data data = {
8146+        .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1),
8147+        .fd = 0,
8148+        .fd_flags = O_RDWR,
8149+        .heap_flags = 0
8150+    };
8151+
8152+    while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) {
8153+        int err = errno;
8154+        request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n",
8155+                (uint64_t)data.len,
8156+                dbsc->fd,
8157+                err,
8158+                strerror(err));
8159+        if (err == EINTR)
8160+            continue;
8161+        return -err;
8162+    }
8163+
8164+    dh->fd = data.fd;
8165+    dh->size = (size_t)data.len;
8166+
8167+//    fprintf(stderr, "%s: size=%#zx, ftell=%#zx\n", __func__,
8168+//            dh->size, (size_t)lseek(dh->fd, 0, SEEK_END));
8169+
8170+    return 0;
8171+}
8172+
8173+static void buf_cma_free(struct dmabuf_h * dh)
8174+{
8175+    // Nothing needed
8176+}
8177+
8178+static const struct dmabuf_fns dmabuf_cma_fns = {
8179+    .buf_alloc  = buf_cma_alloc,
8180+    .buf_free   = buf_cma_free,
8181+    .ctl_new    = ctl_cma_new,
8182+    .ctl_free   = ctl_cma_free,
8183+};
8184+
8185+struct dmabufs_ctl * dmabufs_ctl_new(void)
8186+{
8187+    request_debug(NULL, "Dmabufs using CMA\n");
8188+    return dmabufs_ctl_new2(&dmabuf_cma_fns);
8189+}
8190+
8191+static int ctl_cma_new_vidbuf_cached(struct dmabufs_ctl * dbsc)
8192+{
8193+    static const char * const names[] = {
8194+        "/dev/dma_heap/vidbuf_cached",
8195+        "/dev/dma_heap/linux,cma",
8196+        "/dev/dma_heap/reserved",
8197+        NULL
8198+    };
8199+
8200+    return ctl_cma_new2(dbsc, names);
8201+}
8202+
8203+static const struct dmabuf_fns dmabuf_vidbuf_cached_fns = {
8204+    .buf_alloc  = buf_cma_alloc,
8205+    .buf_free   = buf_cma_free,
8206+    .ctl_new    = ctl_cma_new_vidbuf_cached,
8207+    .ctl_free   = ctl_cma_free,
8208+};
8209+
8210+struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void)
8211+{
8212+    request_debug(NULL, "Dmabufs using Vidbuf\n");
8213+    return dmabufs_ctl_new2(&dmabuf_vidbuf_cached_fns);
8214+}
8215+
8216--- /dev/null
8217+++ b/libavcodec/v4l2_req_dmabufs.h
8218@@ -0,0 +1,45 @@
8219+#ifndef DMABUFS_H
8220+#define DMABUFS_H
8221+
8222+#include <stddef.h>
8223+
8224+struct dmabufs_ctl;
8225+struct dmabuf_h;
8226+
8227+struct dmabufs_ctl * dmabufs_ctl_new(void);
8228+struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void);
8229+void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc);
8230+struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc);
8231+
8232+// Need not preserve old contents
8233+// On NULL return old buffer is freed
8234+struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size);
8235+
8236+static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) {
8237+    return dmabuf_realloc(dbsc, NULL, size);
8238+}
8239+/* Create from existing fd - dups(fd) */
8240+struct dmabuf_h * dmabuf_import(int fd, size_t size);
8241+/* Import an MMAP - return NULL if mapptr = MAP_FAIL */
8242+struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size);
8243+
8244+void * dmabuf_map(struct dmabuf_h * const dh);
8245+
8246+/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */
8247+int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags);
8248+
8249+int dmabuf_write_start(struct dmabuf_h * const dh);
8250+int dmabuf_write_end(struct dmabuf_h * const dh);
8251+int dmabuf_read_start(struct dmabuf_h * const dh);
8252+int dmabuf_read_end(struct dmabuf_h * const dh);
8253+
8254+int dmabuf_fd(const struct dmabuf_h * const dh);
8255+/* Allocated size */
8256+size_t dmabuf_size(const struct dmabuf_h * const dh);
8257+/* Bytes in use */
8258+size_t dmabuf_len(const struct dmabuf_h * const dh);
8259+/* Set bytes in use */
8260+void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len);
8261+void dmabuf_free(struct dmabuf_h * dh);
8262+
8263+#endif
8264--- /dev/null
8265+++ b/libavcodec/v4l2_req_hevc_v1.c
8266@@ -0,0 +1,3 @@
8267+#define HEVC_CTRLS_VERSION 1
8268+#include "v4l2_req_hevc_vx.c"
8269+
8270--- /dev/null
8271+++ b/libavcodec/v4l2_req_hevc_v2.c
8272@@ -0,0 +1,3 @@
8273+#define HEVC_CTRLS_VERSION 2
8274+#include "v4l2_req_hevc_vx.c"
8275+
8276--- /dev/null
8277+++ b/libavcodec/v4l2_req_hevc_v3.c
8278@@ -0,0 +1,3 @@
8279+#define HEVC_CTRLS_VERSION 3
8280+#include "v4l2_req_hevc_vx.c"
8281+
8282--- /dev/null
8283+++ b/libavcodec/v4l2_req_hevc_v4.c
8284@@ -0,0 +1,3 @@
8285+#define HEVC_CTRLS_VERSION 4
8286+#include "v4l2_req_hevc_vx.c"
8287+
8288--- /dev/null
8289+++ b/libavcodec/v4l2_req_hevc_vx.c
8290@@ -0,0 +1,1362 @@
8291+// File included by v4l2_req_hevc_v* - not compiled on its own
8292+
8293+#include "decode.h"
8294+#include "hevcdec.h"
8295+#include "hwconfig.h"
8296+#include "internal.h"
8297+#include "thread.h"
8298+
8299+#if HEVC_CTRLS_VERSION == 1
8300+#include "hevc-ctrls-v1.h"
8301+
8302+// Fixup renamed entries
8303+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT
8304+
8305+#elif HEVC_CTRLS_VERSION == 2
8306+#include "hevc-ctrls-v2.h"
8307+#elif HEVC_CTRLS_VERSION == 3
8308+#include "hevc-ctrls-v3.h"
8309+#elif HEVC_CTRLS_VERSION == 4
8310+#include <linux/v4l2-controls.h>
8311+#if !defined(V4L2_CID_STATELESS_HEVC_SPS)
8312+#include "hevc-ctrls-v4.h"
8313+#endif
8314+#else
8315+#error Unknown HEVC_CTRLS_VERSION
8316+#endif
8317+
8318+#ifndef V4L2_CID_STATELESS_HEVC_SPS
8319+#define V4L2_CID_STATELESS_HEVC_SPS                     V4L2_CID_MPEG_VIDEO_HEVC_SPS
8320+#define V4L2_CID_STATELESS_HEVC_PPS                     V4L2_CID_MPEG_VIDEO_HEVC_PPS
8321+#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS            V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS
8322+#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX          V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX
8323+#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS           V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS
8324+#define V4L2_CID_STATELESS_HEVC_DECODE_MODE             V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE
8325+#define V4L2_CID_STATELESS_HEVC_START_CODE              V4L2_CID_MPEG_VIDEO_HEVC_START_CODE
8326+
8327+#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED     V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED
8328+#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED     V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED
8329+#define V4L2_STATELESS_HEVC_START_CODE_NONE             V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE
8330+#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B          V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B
8331+#endif
8332+
8333+#include "v4l2_request_hevc.h"
8334+
8335+#include "libavutil/hwcontext_drm.h"
8336+
8337+#include <semaphore.h>
8338+#include <pthread.h>
8339+
8340+#include "v4l2_req_devscan.h"
8341+#include "v4l2_req_dmabufs.h"
8342+#include "v4l2_req_pollqueue.h"
8343+#include "v4l2_req_media.h"
8344+#include "v4l2_req_utils.h"
8345+
8346+// Attached to buf[0] in frame
8347+// Pooled in hwcontext so generally create once - 1/frame
8348+typedef struct V4L2MediaReqDescriptor {
8349+    AVDRMFrameDescriptor drm;
8350+
8351+    // Media
8352+    uint64_t timestamp;
8353+    struct qent_dst * qe_dst;
8354+
8355+    // Decode only - should be NULL by the time we emit the frame
8356+    struct req_decode_ent decode_ent;
8357+
8358+    struct media_request *req;
8359+    struct qent_src *qe_src;
8360+
8361+#if HEVC_CTRLS_VERSION >= 2
8362+    struct v4l2_ctrl_hevc_decode_params dec;
8363+#endif
8364+
8365+    size_t num_slices;
8366+    size_t alloced_slices;
8367+    struct v4l2_ctrl_hevc_slice_params * slice_params;
8368+    struct slice_info * slices;
8369+
8370+    size_t num_offsets;
8371+    size_t alloced_offsets;
8372+    uint32_t *offsets;
8373+
8374+} V4L2MediaReqDescriptor;
8375+
8376+struct slice_info {
8377+    const uint8_t * ptr;
8378+    size_t len; // bytes
8379+    size_t n_offsets;
8380+};
8381+
8382+// Handy container for accumulating controls before setting
8383+struct req_controls {
8384+    int has_scaling;
8385+    struct timeval tv;
8386+    struct v4l2_ctrl_hevc_sps sps;
8387+    struct v4l2_ctrl_hevc_pps pps;
8388+    struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix;
8389+};
8390+
8391+//static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 };
8392+
8393+
8394+// Get an FFmpeg format from the v4l2 format
8395+static enum AVPixelFormat pixel_format_from_format(const struct v4l2_format *const format)
8396+{
8397+    switch (V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
8398+            format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat) {
8399+    case V4L2_PIX_FMT_YUV420:
8400+        return AV_PIX_FMT_YUV420P;
8401+    case V4L2_PIX_FMT_NV12:
8402+        return AV_PIX_FMT_NV12;
8403+#if CONFIG_SAND
8404+    case V4L2_PIX_FMT_NV12_COL128:
8405+        return AV_PIX_FMT_RPI4_8;
8406+    case V4L2_PIX_FMT_NV12_10_COL128:
8407+        return AV_PIX_FMT_RPI4_10;
8408+#endif
8409+    default:
8410+        break;
8411+    }
8412+    return AV_PIX_FMT_NONE;
8413+}
8414+
8415+static inline uint64_t frame_capture_dpb(const AVFrame * const frame)
8416+{
8417+    const V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
8418+    return rd->timestamp;
8419+}
8420+
8421+static inline void frame_set_capture_dpb(AVFrame * const frame, const uint64_t dpb_stamp)
8422+{
8423+    V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
8424+    rd->timestamp = dpb_stamp;
8425+}
8426+
8427+static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_table *table)
8428+{
8429+    int32_t luma_weight_denom, chroma_weight_denom;
8430+    const SliceHeader *sh = &h->sh;
8431+
8432+    if (sh->slice_type == HEVC_SLICE_I ||
8433+        (sh->slice_type == HEVC_SLICE_P && !h->ps.pps->weighted_pred_flag) ||
8434+        (sh->slice_type == HEVC_SLICE_B && !h->ps.pps->weighted_bipred_flag))
8435+        return;
8436+
8437+    table->luma_log2_weight_denom = sh->luma_log2_weight_denom;
8438+
8439+    if (h->ps.sps->chroma_format_idc)
8440+        table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom;
8441+
8442+    luma_weight_denom = (1 << sh->luma_log2_weight_denom);
8443+    chroma_weight_denom = (1 << sh->chroma_log2_weight_denom);
8444+
8445+    for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) {
8446+        table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom;
8447+        table->luma_offset_l0[i] = sh->luma_offset_l0[i];
8448+        table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom;
8449+        table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom;
8450+        table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0];
8451+        table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1];
8452+    }
8453+
8454+    if (sh->slice_type != HEVC_SLICE_B)
8455+        return;
8456+
8457+    for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) {
8458+        table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom;
8459+        table->luma_offset_l1[i] = sh->luma_offset_l1[i];
8460+        table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom;
8461+        table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom;
8462+        table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0];
8463+        table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1];
8464+    }
8465+}
8466+
8467+#if HEVC_CTRLS_VERSION <= 2
8468+static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp)
8469+{
8470+    const HEVCFrame *frame;
8471+    int i;
8472+
8473+    for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) {
8474+        frame = h->rps[ST_CURR_BEF].ref[i];
8475+        if (frame && timestamp == frame_capture_dpb(frame->frame))
8476+            return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE;
8477+    }
8478+
8479+    for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) {
8480+        frame = h->rps[ST_CURR_AFT].ref[i];
8481+        if (frame && timestamp == frame_capture_dpb(frame->frame))
8482+            return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER;
8483+    }
8484+
8485+    for (i = 0; i < h->rps[LT_CURR].nb_refs; i++) {
8486+        frame = h->rps[LT_CURR].ref[i];
8487+        if (frame && timestamp == frame_capture_dpb(frame->frame))
8488+            return V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR;
8489+    }
8490+
8491+    return 0;
8492+}
8493+#endif
8494+
8495+static unsigned int
8496+get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame,
8497+                  const struct v4l2_hevc_dpb_entry * const entries,
8498+                  const unsigned int num_entries)
8499+{
8500+    uint64_t timestamp;
8501+
8502+    if (!frame)
8503+        return 0;
8504+
8505+    timestamp = frame_capture_dpb(frame->frame);
8506+
8507+    for (unsigned int i = 0; i < num_entries; i++) {
8508+        if (entries[i].timestamp == timestamp)
8509+            return i;
8510+    }
8511+
8512+    return 0;
8513+}
8514+
8515+static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx)
8516+{
8517+    unsigned int z = 0;
8518+    while (idx--) {
8519+        if (*b++ == 0) {
8520+            ++z;
8521+            if (z >= 2 && *b == 3) {
8522+                ++b;
8523+                z = 0;
8524+            }
8525+        }
8526+        else {
8527+            z = 0;
8528+        }
8529+    }
8530+    return b;
8531+}
8532+
8533+static int slice_add(V4L2MediaReqDescriptor * const rd)
8534+{
8535+    if (rd->num_slices >= rd->alloced_slices) {
8536+        struct v4l2_ctrl_hevc_slice_params * p2;
8537+        struct slice_info * s2;
8538+        size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2;
8539+
8540+        p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2));
8541+        if (p2 == NULL)
8542+            return AVERROR(ENOMEM);
8543+        rd->slice_params = p2;
8544+
8545+        s2 = av_realloc_array(rd->slices, n2, sizeof(*s2));
8546+        if (s2 == NULL)
8547+            return AVERROR(ENOMEM);
8548+        rd->slices = s2;
8549+
8550+        rd->alloced_slices = n2;
8551+    }
8552+    ++rd->num_slices;
8553+    return 0;
8554+}
8555+
8556+static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets)
8557+{
8558+    if (rd->num_offsets + n > rd->alloced_offsets) {
8559+        size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2;
8560+        void * p2;
8561+        while (rd->num_offsets + n > n2)
8562+            n2 *= 2;
8563+        if ((p2 = av_realloc_array(rd->offsets, n2, sizeof(*rd->offsets))) == NULL)
8564+            return AVERROR(ENOMEM);
8565+        rd->offsets = p2;
8566+        rd->alloced_offsets = n2;
8567+    }
8568+    for (size_t i = 0; i != n; ++i)
8569+        rd->offsets[rd->num_offsets++] = offsets[i] - 1;
8570+    return 0;
8571+}
8572+
8573+static unsigned int
8574+fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries)
8575+{
8576+    unsigned int i;
8577+    unsigned int n = 0;
8578+    const HEVCFrame * const pic = h->ref;
8579+
8580+    for (i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) {
8581+        const HEVCFrame * const frame = &h->DPB[i];
8582+        if (frame != pic && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) {
8583+            struct v4l2_hevc_dpb_entry * const entry = entries + n++;
8584+
8585+            entry->timestamp = frame_capture_dpb(frame->frame);
8586+#if HEVC_CTRLS_VERSION <= 2
8587+            entry->rps = find_frame_rps_type(h, entry->timestamp);
8588+#else
8589+            entry->flags = (frame->flags & HEVC_FRAME_FLAG_LONG_REF) == 0 ? 0 :
8590+                V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE;
8591+#endif
8592+            entry->field_pic = frame->frame->interlaced_frame;
8593+
8594+#if HEVC_CTRLS_VERSION <= 3
8595+            /* TODO: Interleaved: Get the POC for each field. */
8596+            entry->pic_order_cnt[0] = frame->poc;
8597+            entry->pic_order_cnt[1] = frame->poc;
8598+#else
8599+            entry->pic_order_cnt_val = frame->poc;
8600+#endif
8601+        }
8602+    }
8603+    return n;
8604+}
8605+
8606+static void fill_slice_params(const HEVCContext * const h,
8607+#if HEVC_CTRLS_VERSION >= 2
8608+                              const struct v4l2_ctrl_hevc_decode_params * const dec,
8609+#endif
8610+                              struct v4l2_ctrl_hevc_slice_params *slice_params,
8611+                              uint32_t bit_size, uint32_t bit_offset)
8612+{
8613+    const SliceHeader * const sh = &h->sh;
8614+#if HEVC_CTRLS_VERSION >= 2
8615+    const struct v4l2_hevc_dpb_entry *const dpb = dec->dpb;
8616+    const unsigned int dpb_n = dec->num_active_dpb_entries;
8617+#else
8618+    struct v4l2_hevc_dpb_entry *const dpb = slice_params->dpb;
8619+    unsigned int dpb_n;
8620+#endif
8621+    unsigned int i;
8622+    RefPicList *rpl;
8623+
8624+    *slice_params = (struct v4l2_ctrl_hevc_slice_params) {
8625+        .bit_size = bit_size,
8626+#if HEVC_CTRLS_VERSION <= 3
8627+        .data_bit_offset = bit_offset,
8628+#else
8629+        .data_byte_offset = bit_offset / 8 + 1,
8630+#endif
8631+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
8632+        .slice_segment_addr = sh->slice_segment_addr,
8633+
8634+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
8635+        .nal_unit_type = h->nal_unit_type,
8636+        .nuh_temporal_id_plus1 = h->temporal_id + 1,
8637+
8638+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
8639+        .slice_type = sh->slice_type,
8640+        .colour_plane_id = sh->colour_plane_id,
8641+        .slice_pic_order_cnt = h->ref->poc,
8642+        .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0,
8643+        .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0,
8644+        .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ? sh->collocated_ref_idx : 0,
8645+        .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? 0 : 5 - sh->max_num_merge_cand,
8646+        .slice_qp_delta = sh->slice_qp_delta,
8647+        .slice_cb_qp_offset = sh->slice_cb_qp_offset,
8648+        .slice_cr_qp_offset = sh->slice_cr_qp_offset,
8649+        .slice_act_y_qp_offset = 0,
8650+        .slice_act_cb_qp_offset = 0,
8651+        .slice_act_cr_qp_offset = 0,
8652+        .slice_beta_offset_div2 = sh->beta_offset / 2,
8653+        .slice_tc_offset_div2 = sh->tc_offset / 2,
8654+
8655+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
8656+        .pic_struct = h->sei.picture_timing.picture_struct,
8657+
8658+#if HEVC_CTRLS_VERSION < 2
8659+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
8660+        .num_rps_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
8661+        .num_rps_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
8662+        .num_rps_poc_lt_curr = h->rps[LT_CURR].nb_refs,
8663+#endif
8664+    };
8665+
8666+    if (sh->slice_sample_adaptive_offset_flag[0])
8667+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA;
8668+
8669+    if (sh->slice_sample_adaptive_offset_flag[1])
8670+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA;
8671+
8672+    if (sh->slice_temporal_mvp_enabled_flag)
8673+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED;
8674+
8675+    if (sh->mvd_l1_zero_flag)
8676+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO;
8677+
8678+    if (sh->cabac_init_flag)
8679+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT;
8680+
8681+    if (sh->collocated_list == L0)
8682+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0;
8683+
8684+    if (sh->disable_deblocking_filter_flag)
8685+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED;
8686+
8687+    if (sh->slice_loop_filter_across_slices_enabled_flag)
8688+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED;
8689+
8690+    if (sh->dependent_slice_segment_flag)
8691+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT;
8692+
8693+#if HEVC_CTRLS_VERSION < 2
8694+    dpb_n = fill_dpb_entries(h, dpb);
8695+    slice_params->num_active_dpb_entries = dpb_n;
8696+#endif
8697+
8698+    if (sh->slice_type != HEVC_SLICE_I) {
8699+        rpl = &h->ref->refPicList[0];
8700+        for (i = 0; i < rpl->nb_refs; i++)
8701+            slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
8702+    }
8703+
8704+    if (sh->slice_type == HEVC_SLICE_B) {
8705+        rpl = &h->ref->refPicList[1];
8706+        for (i = 0; i < rpl->nb_refs; i++)
8707+            slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
8708+    }
8709+
8710+    fill_pred_table(h, &slice_params->pred_weight_table);
8711+
8712+    slice_params->num_entry_point_offsets = sh->num_entry_point_offsets;
8713+#if HEVC_CTRLS_VERSION <= 3
8714+    if (slice_params->num_entry_point_offsets > 256) {
8715+        slice_params->num_entry_point_offsets = 256;
8716+        av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets);
8717+    }
8718+
8719+    for (i = 0; i < slice_params->num_entry_point_offsets; i++)
8720+        slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1;
8721+#endif
8722+}
8723+
8724+#if HEVC_CTRLS_VERSION >= 2
8725+static void
8726+fill_decode_params(const HEVCContext * const h,
8727+                   struct v4l2_ctrl_hevc_decode_params * const dec)
8728+{
8729+    unsigned int i;
8730+
8731+    *dec = (struct v4l2_ctrl_hevc_decode_params){
8732+        .pic_order_cnt_val = h->poc,
8733+        .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
8734+        .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
8735+        .num_poc_lt_curr = h->rps[LT_CURR].nb_refs,
8736+    };
8737+
8738+    dec->num_active_dpb_entries = fill_dpb_entries(h, dec->dpb);
8739+
8740+    // The docn does seem to ask that we fit our 32 bit signed POC into
8741+    // a U8 so... (To be fair 16 bits would be enough)
8742+    // Luckily we (Pi) don't use these fields
8743+    for (i = 0; i != h->rps[ST_CURR_BEF].nb_refs; ++i)
8744+        dec->poc_st_curr_before[i] = h->rps[ST_CURR_BEF].ref[i]->poc;
8745+    for (i = 0; i != h->rps[ST_CURR_AFT].nb_refs; ++i)
8746+        dec->poc_st_curr_after[i] = h->rps[ST_CURR_AFT].ref[i]->poc;
8747+    for (i = 0; i != h->rps[LT_CURR].nb_refs; ++i)
8748+        dec->poc_lt_curr[i] = h->rps[LT_CURR].ref[i]->poc;
8749+
8750+    if (IS_IRAP(h))
8751+        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC;
8752+    if (IS_IDR(h))
8753+        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC;
8754+    if (h->sh.no_output_of_prior_pics_flag)
8755+        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR;
8756+
8757+}
8758+#endif
8759+
8760+static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCSPS *sps)
8761+{
8762+    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
8763+    *ctrl = (struct v4l2_ctrl_hevc_sps) {
8764+        .chroma_format_idc = sps->chroma_format_idc,
8765+        .pic_width_in_luma_samples = sps->width,
8766+        .pic_height_in_luma_samples = sps->height,
8767+        .bit_depth_luma_minus8 = sps->bit_depth - 8,
8768+        .bit_depth_chroma_minus8 = sps->bit_depth - 8,
8769+        .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4,
8770+        .sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1,
8771+        .sps_max_num_reorder_pics = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics,
8772+        .sps_max_latency_increase_plus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1,
8773+        .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3,
8774+        .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size,
8775+        .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2,
8776+        .log2_diff_max_min_luma_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size,
8777+        .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter,
8778+        .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra,
8779+        .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1,
8780+        .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1,
8781+        .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3,
8782+        .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size,
8783+        .num_short_term_ref_pic_sets = sps->nb_st_rps,
8784+        .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps,
8785+        .chroma_format_idc = sps->chroma_format_idc,
8786+        .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1,
8787+    };
8788+
8789+    if (sps->separate_colour_plane_flag)
8790+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE;
8791+
8792+    if (sps->scaling_list_enable_flag)
8793+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED;
8794+
8795+    if (sps->amp_enabled_flag)
8796+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED;
8797+
8798+    if (sps->sao_enabled)
8799+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET;
8800+
8801+    if (sps->pcm_enabled_flag)
8802+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED;
8803+
8804+    if (sps->pcm.loop_filter_disable_flag)
8805+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED;
8806+
8807+    if (sps->long_term_ref_pics_present_flag)
8808+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT;
8809+
8810+    if (sps->sps_temporal_mvp_enabled_flag)
8811+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED;
8812+
8813+    if (sps->sps_strong_intra_smoothing_enable_flag)
8814+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED;
8815+}
8816+
8817+static void fill_scaling_matrix(const ScalingList * const sl,
8818+                                struct v4l2_ctrl_hevc_scaling_matrix * const sm)
8819+{
8820+    unsigned int i;
8821+
8822+    for (i = 0; i < 6; i++) {
8823+        unsigned int j;
8824+
8825+        for (j = 0; j < 16; j++)
8826+            sm->scaling_list_4x4[i][j] = sl->sl[0][i][j];
8827+        for (j = 0; j < 64; j++) {
8828+            sm->scaling_list_8x8[i][j]   = sl->sl[1][i][j];
8829+            sm->scaling_list_16x16[i][j] = sl->sl[2][i][j];
8830+            if (i < 2)
8831+                sm->scaling_list_32x32[i][j] = sl->sl[3][i * 3][j];
8832+        }
8833+        sm->scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i];
8834+        if (i < 2)
8835+            sm->scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3];
8836+    }
8837+}
8838+
8839+static void fill_pps(struct v4l2_ctrl_hevc_pps * const ctrl, const HEVCPPS * const pps)
8840+{
8841+    uint64_t flags = 0;
8842+
8843+    if (pps->dependent_slice_segments_enabled_flag)
8844+        flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED;
8845+
8846+    if (pps->output_flag_present_flag)
8847+        flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT;
8848+
8849+    if (pps->sign_data_hiding_flag)
8850+        flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED;
8851+
8852+    if (pps->cabac_init_present_flag)
8853+        flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT;
8854+
8855+    if (pps->constrained_intra_pred_flag)
8856+        flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED;
8857+
8858+    if (pps->transform_skip_enabled_flag)
8859+        flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED;
8860+
8861+    if (pps->cu_qp_delta_enabled_flag)
8862+        flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED;
8863+
8864+    if (pps->pic_slice_level_chroma_qp_offsets_present_flag)
8865+        flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT;
8866+
8867+    if (pps->weighted_pred_flag)
8868+        flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED;
8869+
8870+    if (pps->weighted_bipred_flag)
8871+        flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED;
8872+
8873+    if (pps->transquant_bypass_enable_flag)
8874+        flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED;
8875+
8876+    if (pps->tiles_enabled_flag)
8877+        flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED;
8878+
8879+    if (pps->entropy_coding_sync_enabled_flag)
8880+        flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED;
8881+
8882+    if (pps->loop_filter_across_tiles_enabled_flag)
8883+        flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
8884+
8885+    if (pps->seq_loop_filter_across_slices_enabled_flag)
8886+        flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED;
8887+
8888+    if (pps->deblocking_filter_override_enabled_flag)
8889+        flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED;
8890+
8891+    if (pps->disable_dbf)
8892+        flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER;
8893+
8894+    if (pps->lists_modification_present_flag)
8895+        flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT;
8896+
8897+    if (pps->slice_header_extension_present_flag)
8898+        flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT;
8899+
8900+    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
8901+    *ctrl = (struct v4l2_ctrl_hevc_pps) {
8902+        .num_extra_slice_header_bits = pps->num_extra_slice_header_bits,
8903+        .init_qp_minus26 = pps->pic_init_qp_minus26,
8904+        .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth,
8905+        .pps_cb_qp_offset = pps->cb_qp_offset,
8906+        .pps_cr_qp_offset = pps->cr_qp_offset,
8907+        .pps_beta_offset_div2 = pps->beta_offset / 2,
8908+        .pps_tc_offset_div2 = pps->tc_offset / 2,
8909+        .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2,
8910+        .flags = flags
8911+    };
8912+
8913+
8914+    if (pps->tiles_enabled_flag) {
8915+        ctrl->num_tile_columns_minus1 = pps->num_tile_columns - 1;
8916+        ctrl->num_tile_rows_minus1 = pps->num_tile_rows - 1;
8917+
8918+        for (int i = 0; i < pps->num_tile_columns; i++)
8919+            ctrl->column_width_minus1[i] = pps->column_width[i] - 1;
8920+
8921+        for (int i = 0; i < pps->num_tile_rows; i++)
8922+            ctrl->row_height_minus1[i] = pps->row_height[i] - 1;
8923+    }
8924+}
8925+
8926+// Called before finally returning the frame to the user
8927+// Set corrupt flag here as this is actually the frame structure that
8928+// is going to the user (in MT land each thread has its own pool)
8929+static int frame_post_process(void *logctx, AVFrame *frame)
8930+{
8931+    V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)frame->data[0];
8932+
8933+//    av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
8934+    frame->flags &= ~AV_FRAME_FLAG_CORRUPT;
8935+    if (rd->qe_dst) {
8936+        MediaBufsStatus stat = qent_dst_wait(rd->qe_dst);
8937+        if (stat != MEDIABUFS_STATUS_SUCCESS) {
8938+            av_log(logctx, AV_LOG_ERROR, "%s: Decode fail\n", __func__);
8939+            frame->flags |= AV_FRAME_FLAG_CORRUPT;
8940+        }
8941+    }
8942+
8943+    return 0;
8944+}
8945+
8946+static inline struct timeval cvt_dpb_to_tv(uint64_t t)
8947+{
8948+    t /= 1000;
8949+    return (struct timeval){
8950+        .tv_usec = t % 1000000,
8951+        .tv_sec = t / 1000000
8952+    };
8953+}
8954+
8955+static inline uint64_t cvt_timestamp_to_dpb(const unsigned int t)
8956+{
8957+    return (uint64_t)t * 1000;
8958+}
8959+
8960+static int v4l2_request_hevc_start_frame(AVCodecContext *avctx,
8961+                                         av_unused const uint8_t *buffer,
8962+                                         av_unused uint32_t size)
8963+{
8964+    const HEVCContext *h = avctx->priv_data;
8965+    V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0];
8966+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
8967+
8968+//    av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
8969+    decode_q_add(&ctx->decode_q, &rd->decode_ent);
8970+
8971+    rd->num_slices = 0;
8972+    ctx->timestamp++;
8973+    rd->timestamp = cvt_timestamp_to_dpb(ctx->timestamp);
8974+
8975+    {
8976+        FrameDecodeData * const fdd = (FrameDecodeData*)h->ref->frame->private_ref->data;
8977+        fdd->post_process = frame_post_process;
8978+    }
8979+
8980+    // qe_dst needs to be bound to the data buffer and only returned when that is
8981+    if (!rd->qe_dst)
8982+    {
8983+        if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
8984+            av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
8985+            return AVERROR(ENOMEM);
8986+        }
8987+    }
8988+
8989+    ff_thread_finish_setup(avctx); // Allow next thread to enter rpi_hevc_start_frame
8990+
8991+    return 0;
8992+}
8993+
8994+// Object fd & size will be zapped by this & need setting later
8995+static int drm_from_format(AVDRMFrameDescriptor * const desc, const struct v4l2_format * const format)
8996+{
8997+    AVDRMLayerDescriptor *layer = &desc->layers[0];
8998+    unsigned int width;
8999+    unsigned int height;
9000+    unsigned int bpl;
9001+    uint32_t pixelformat;
9002+
9003+    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
9004+        width       = format->fmt.pix_mp.width;
9005+        height      = format->fmt.pix_mp.height;
9006+        pixelformat = format->fmt.pix_mp.pixelformat;
9007+        bpl         = format->fmt.pix_mp.plane_fmt[0].bytesperline;
9008+    }
9009+    else {
9010+        width       = format->fmt.pix.width;
9011+        height      = format->fmt.pix.height;
9012+        pixelformat = format->fmt.pix.pixelformat;
9013+        bpl         = format->fmt.pix.bytesperline;
9014+    }
9015+
9016+    switch (pixelformat) {
9017+    case V4L2_PIX_FMT_NV12:
9018+        layer->format = DRM_FORMAT_NV12;
9019+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
9020+        break;
9021+#if CONFIG_SAND
9022+    case V4L2_PIX_FMT_NV12_COL128:
9023+        layer->format = DRM_FORMAT_NV12;
9024+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
9025+        break;
9026+    case V4L2_PIX_FMT_NV12_10_COL128:
9027+        layer->format = DRM_FORMAT_P030;
9028+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
9029+        break;
9030+#endif
9031+#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED
9032+    case V4L2_PIX_FMT_SUNXI_TILED_NV12:
9033+        layer->format = DRM_FORMAT_NV12;
9034+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED;
9035+        break;
9036+#endif
9037+#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15)
9038+    case V4L2_PIX_FMT_NV15:
9039+        layer->format = DRM_FORMAT_NV15;
9040+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
9041+        break;
9042+#endif
9043+    case V4L2_PIX_FMT_NV16:
9044+        layer->format = DRM_FORMAT_NV16;
9045+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
9046+        break;
9047+#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20)
9048+    case V4L2_PIX_FMT_NV20:
9049+        layer->format = DRM_FORMAT_NV20;
9050+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
9051+        break;
9052+#endif
9053+    default:
9054+        return -1;
9055+    }
9056+
9057+    desc->nb_objects = 1;
9058+    desc->objects[0].fd = -1;
9059+    desc->objects[0].size = 0;
9060+
9061+    desc->nb_layers = 1;
9062+    layer->nb_planes = 2;
9063+
9064+    layer->planes[0].object_index = 0;
9065+    layer->planes[0].offset = 0;
9066+    layer->planes[0].pitch = bpl;
9067+#if CONFIG_SAND
9068+    if (pixelformat == V4L2_PIX_FMT_NV12_COL128) {
9069+        layer->planes[1].object_index = 0;
9070+        layer->planes[1].offset = height * 128;
9071+        layer->planes[0].pitch = width;
9072+        layer->planes[1].pitch = width;
9073+    }
9074+    else if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
9075+        layer->planes[1].object_index = 0;
9076+        layer->planes[1].offset = height * 128;
9077+        layer->planes[0].pitch = width * 2; // Lies but it keeps DRM import happy
9078+        layer->planes[1].pitch = width * 2;
9079+    }
9080+    else
9081+#endif
9082+    {
9083+        layer->planes[1].object_index = 0;
9084+        layer->planes[1].offset = layer->planes[0].pitch * height;
9085+        layer->planes[1].pitch = layer->planes[0].pitch;
9086+    }
9087+
9088+    return 0;
9089+}
9090+
9091+static int
9092+set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
9093+    struct req_controls *const controls,
9094+#if HEVC_CTRLS_VERSION >= 2
9095+    struct v4l2_ctrl_hevc_decode_params * const dec,
9096+#endif
9097+    struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count,
9098+    void * const offsets, const size_t offset_count)
9099+{
9100+    int rv;
9101+#if HEVC_CTRLS_VERSION >= 2
9102+    unsigned int n = 3;
9103+#else
9104+    unsigned int n = 2;
9105+#endif
9106+
9107+    struct v4l2_ext_control control[6] = {
9108+        {
9109+            .id = V4L2_CID_STATELESS_HEVC_SPS,
9110+            .ptr = &controls->sps,
9111+            .size = sizeof(controls->sps),
9112+        },
9113+        {
9114+            .id = V4L2_CID_STATELESS_HEVC_PPS,
9115+            .ptr = &controls->pps,
9116+            .size = sizeof(controls->pps),
9117+        },
9118+#if HEVC_CTRLS_VERSION >= 2
9119+        {
9120+            .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
9121+            .ptr = dec,
9122+            .size = sizeof(*dec),
9123+        },
9124+#endif
9125+    };
9126+
9127+    if (slices)
9128+        control[n++] = (struct v4l2_ext_control) {
9129+            .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
9130+            .ptr = slices,
9131+            .size = sizeof(*slices) * slice_count,
9132+        };
9133+
9134+    if (controls->has_scaling)
9135+        control[n++] = (struct v4l2_ext_control) {
9136+            .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
9137+            .ptr = &controls->scaling_matrix,
9138+            .size = sizeof(controls->scaling_matrix),
9139+        };
9140+
9141+#if HEVC_CTRLS_VERSION >= 4
9142+    if (offsets)
9143+        control[n++] = (struct v4l2_ext_control) {
9144+            .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS,
9145+            .ptr = offsets,
9146+            .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count,
9147+        };
9148+#endif
9149+
9150+    rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n);
9151+
9152+    return rv;
9153+}
9154+
9155+// This only works because we started out from a single coded frame buffer
9156+// that will remain intact until after end_frame
9157+static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
9158+{
9159+    const HEVCContext * const h = avctx->priv_data;
9160+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
9161+    V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0];
9162+    int bcount = get_bits_count(&h->HEVClc->gb);
9163+    uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount;
9164+
9165+    const unsigned int n = rd->num_slices;
9166+    const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices;
9167+
9168+    int rv;
9169+    struct slice_info * si;
9170+
9171+    // This looks dodgy but we know that FFmpeg has parsed this from a buffer
9172+    // that contains the entire frame including the start code
9173+    if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) {
9174+        buffer -= 3;
9175+        size += 3;
9176+        boff += 24;
9177+        if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) {
9178+            av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n",
9179+                   buffer[0], buffer[1], buffer[2]);
9180+        }
9181+    }
9182+
9183+    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) {
9184+        if (rd->slices == NULL) {
9185+            if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL)
9186+                return AVERROR(ENOMEM);
9187+            rd->slices->ptr = buffer;
9188+            rd->num_slices = 1;
9189+        }
9190+        rd->slices->len = buffer - rd->slices->ptr + size;
9191+        return 0;
9192+    }
9193+
9194+    if ((rv = slice_add(rd)) != 0)
9195+        return rv;
9196+
9197+    si = rd->slices + n;
9198+    si->ptr = buffer;
9199+    si->len = size;
9200+    si->n_offsets = rd->num_offsets;
9201+
9202+    if (n != block_start) {
9203+        struct slice_info *const si0 = rd->slices + block_start;
9204+        const size_t offset = (buffer - si0->ptr);
9205+        boff += offset * 8;
9206+        size += offset;
9207+        si0->len = si->len + offset;
9208+    }
9209+
9210+#if HEVC_CTRLS_VERSION >= 2
9211+    if (n == 0)
9212+        fill_decode_params(h, &rd->dec);
9213+    fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff);
9214+#else
9215+    fill_slice_params(h, rd->slice_params + n, size * 8, boff);
9216+#endif
9217+    if (ctx->max_offsets != 0 &&
9218+        (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0)
9219+        return rv;
9220+
9221+    return 0;
9222+}
9223+
9224+static void v4l2_request_hevc_abort_frame(AVCodecContext * const avctx)
9225+{
9226+    const HEVCContext * const h = avctx->priv_data;
9227+    if (h->ref != NULL) {
9228+        V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0];
9229+        V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
9230+
9231+        media_request_abort(&rd->req);
9232+        mediabufs_src_qent_abort(ctx->mbufs, &rd->qe_src);
9233+
9234+        decode_q_remove(&ctx->decode_q, &rd->decode_ent);
9235+    }
9236+}
9237+
9238+static int send_slice(AVCodecContext * const avctx,
9239+                      V4L2MediaReqDescriptor * const rd,
9240+                      struct req_controls *const controls,
9241+                      const unsigned int i, const unsigned int j)
9242+{
9243+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
9244+
9245+    const int is_last = (j == rd->num_slices);
9246+    struct slice_info *const si = rd->slices + i;
9247+    struct media_request * req = NULL;
9248+    struct qent_src * src = NULL;
9249+    MediaBufsStatus stat;
9250+    void * offsets = rd->offsets + rd->slices[i].n_offsets;
9251+    size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets;
9252+
9253+    if ((req = media_request_get(ctx->mpool)) == NULL) {
9254+        av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__);
9255+        return AVERROR(ENOMEM);
9256+    }
9257+
9258+    if (set_req_ctls(ctx, req,
9259+                     controls,
9260+#if HEVC_CTRLS_VERSION >= 2
9261+                     &rd->dec,
9262+#endif
9263+                     rd->slice_params + i, j - i,
9264+                     offsets, n_offsets)) {
9265+        av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__);
9266+        goto fail1;
9267+    }
9268+
9269+    if ((src = mediabufs_src_qent_get(ctx->mbufs)) == NULL) {
9270+        av_log(avctx, AV_LOG_ERROR, "%s: Failed to get src buffer\n", __func__);
9271+        goto fail1;
9272+    }
9273+
9274+    if (qent_src_data_copy(src, 0, si->ptr, si->len, ctx->dbufs) != 0) {
9275+        av_log(avctx, AV_LOG_ERROR, "%s: Failed data copy\n", __func__);
9276+        goto fail2;
9277+    }
9278+
9279+    if (qent_src_params_set(src, &controls->tv)) {
9280+        av_log(avctx, AV_LOG_ERROR, "%s: Failed src param set\n", __func__);
9281+        goto fail2;
9282+    }
9283+
9284+    stat = mediabufs_start_request(ctx->mbufs, &req, &src,
9285+                                   i == 0 ? rd->qe_dst : NULL,
9286+                                   is_last);
9287+
9288+    if (stat != MEDIABUFS_STATUS_SUCCESS) {
9289+        av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__);
9290+        return AVERROR_UNKNOWN;
9291+    }
9292+    return 0;
9293+
9294+fail2:
9295+    mediabufs_src_qent_abort(ctx->mbufs, &src);
9296+fail1:
9297+    media_request_abort(&req);
9298+    return AVERROR_UNKNOWN;
9299+}
9300+
9301+static int v4l2_request_hevc_end_frame(AVCodecContext *avctx)
9302+{
9303+    const HEVCContext * const h = avctx->priv_data;
9304+    V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0];
9305+    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
9306+    struct req_controls rc;
9307+    unsigned int i;
9308+    int rv;
9309+
9310+    // It is possible, though maybe a bug, to get an end_frame without
9311+    // a previous start_frame.  If we do then give up.
9312+    if (!decode_q_in_q(&rd->decode_ent)) {
9313+        av_log(avctx, AV_LOG_DEBUG, "%s: Frame not in decode Q\n", __func__);
9314+        return AVERROR_INVALIDDATA;
9315+    }
9316+
9317+    {
9318+        const ScalingList *sl = h->ps.pps->scaling_list_data_present_flag ?
9319+                                    &h->ps.pps->scaling_list :
9320+                                h->ps.sps->scaling_list_enable_flag ?
9321+                                    &h->ps.sps->scaling_list : NULL;
9322+
9323+
9324+        memset(&rc, 0, sizeof(rc));
9325+        rc.tv = cvt_dpb_to_tv(rd->timestamp);
9326+        fill_sps(&rc.sps, h->ps.sps);
9327+        fill_pps(&rc.pps, h->ps.pps);
9328+        if (sl) {
9329+            rc.has_scaling = 1;
9330+            fill_scaling_matrix(sl, &rc.scaling_matrix);
9331+        }
9332+    }
9333+
9334+    decode_q_wait(&ctx->decode_q, &rd->decode_ent);
9335+
9336+    // qe_dst needs to be bound to the data buffer and only returned when that is
9337+    // Alloc almost certainly wants to be serialised if there is any chance of blocking
9338+    // so we get the next frame to be free in the thread that needs it for decode first.
9339+    //
9340+    // In our current world this probably isn't a concern but put it here anyway
9341+    if (!rd->qe_dst)
9342+    {
9343+        if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
9344+            av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
9345+            rv = AVERROR(ENOMEM);
9346+            goto fail;
9347+        }
9348+    }
9349+
9350+    // Send as slices
9351+    for (i = 0; i < rd->num_slices; i += ctx->max_slices) {
9352+        const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices);
9353+        if ((rv = send_slice(avctx, rd, &rc, i, e)) != 0)
9354+            goto fail;
9355+    }
9356+
9357+    // Set the drm_prime desriptor
9358+    drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs));
9359+    rd->drm.objects[0].fd = dmabuf_fd(qent_dst_dmabuf(rd->qe_dst, 0));
9360+    rd->drm.objects[0].size = dmabuf_size(qent_dst_dmabuf(rd->qe_dst, 0));
9361+
9362+    decode_q_remove(&ctx->decode_q, &rd->decode_ent);
9363+    return 0;
9364+
9365+fail:
9366+    decode_q_remove(&ctx->decode_q, &rd->decode_ent);
9367+    return rv;
9368+}
9369+
9370+static inline int
9371+ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v)
9372+{
9373+    return v >= c->minimum && v <= c->maximum;
9374+}
9375+
9376+// Initial check & init
9377+static int
9378+probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
9379+{
9380+    const HEVCContext *h = avctx->priv_data;
9381+    const HEVCSPS * const sps = h->ps.sps;
9382+    struct v4l2_ctrl_hevc_sps ctrl_sps;
9383+    unsigned int i;
9384+
9385+    // Check for var slice array
9386+    struct v4l2_query_ext_ctrl qc[] = {
9387+        { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS },
9388+        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
9389+        { .id = V4L2_CID_STATELESS_HEVC_SPS },
9390+        { .id = V4L2_CID_STATELESS_HEVC_PPS },
9391+        { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX },
9392+#if HEVC_CTRLS_VERSION >= 2
9393+        { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS },
9394+#endif
9395+    };
9396+    // Order & size must match!
9397+    static const size_t ctrl_sizes[] = {
9398+        sizeof(struct v4l2_ctrl_hevc_slice_params),
9399+        sizeof(int32_t),
9400+        sizeof(struct v4l2_ctrl_hevc_sps),
9401+        sizeof(struct v4l2_ctrl_hevc_pps),
9402+        sizeof(struct v4l2_ctrl_hevc_scaling_matrix),
9403+#if HEVC_CTRLS_VERSION >= 2
9404+        sizeof(struct v4l2_ctrl_hevc_decode_params),
9405+#endif
9406+    };
9407+    const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc);
9408+
9409+#if HEVC_CTRLS_VERSION == 2
9410+    if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(5, 18, 0))
9411+        return AVERROR(EINVAL);
9412+#elif HEVC_CTRLS_VERSION == 3
9413+    if (mediabufs_ctl_driver_version(ctx->mbufs) < MEDIABUFS_DRIVER_VERSION(5, 18, 0))
9414+        return AVERROR(EINVAL);
9415+#endif
9416+
9417+    mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls);
9418+    i = 0;
9419+#if HEVC_CTRLS_VERSION >= 4
9420+    // Skip slice check if no slice mode
9421+    if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
9422+        i = 1;
9423+#else
9424+    // Fail frame mode silently for anything prior to V4
9425+    if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
9426+        return AVERROR(EINVAL);
9427+#endif
9428+    for (; i != noof_ctrls; ++i) {
9429+        if (qc[i].type == 0) {
9430+            av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id);
9431+            return AVERROR(EINVAL);
9432+        }
9433+        if (ctrl_sizes[i] != (size_t)qc[i].elem_size) {
9434+            av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n",
9435+                   HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size);
9436+            return AVERROR(EINVAL);
9437+        }
9438+    }
9439+
9440+    fill_sps(&ctrl_sps, sps);
9441+
9442+    if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) {
9443+        av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n");
9444+        return AVERROR(EINVAL);
9445+    }
9446+
9447+    return 0;
9448+}
9449+
9450+// Final init
9451+static int
9452+set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
9453+{
9454+    int ret;
9455+
9456+    struct v4l2_query_ext_ctrl querys[] = {
9457+        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
9458+        { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
9459+        { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, },
9460+#if HEVC_CTRLS_VERSION >= 4
9461+        { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, },
9462+#endif
9463+    };
9464+
9465+    struct v4l2_ext_control ctrls[] = {
9466+        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
9467+        { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
9468+    };
9469+
9470+    mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys));
9471+
9472+    ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) ||
9473+                       querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ?
9474+        1 : querys[2].dims[0];
9475+    av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices);
9476+
9477+#if HEVC_CTRLS_VERSION >= 4
9478+    ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ?
9479+        0 : querys[3].dims[0];
9480+    av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets);
9481+#else
9482+    ctx->max_offsets = 0;
9483+#endif
9484+
9485+    if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED ||
9486+        querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)
9487+        ctx->decode_mode = querys[0].default_value;
9488+    else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED))
9489+        ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED;
9490+    else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
9491+        ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED;
9492+    else {
9493+        av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__);
9494+        return AVERROR(EINVAL);
9495+    }
9496+
9497+    if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE ||
9498+        querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)
9499+        ctx->start_code = querys[1].default_value;
9500+    else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B))
9501+        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B;
9502+    else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
9503+        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
9504+    else {
9505+        av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__);
9506+        return AVERROR(EINVAL);
9507+    }
9508+
9509+    // If we are in slice mode & START_CODE_NONE supported then pick that
9510+    // as it doesn't require the slightly dodgy look backwards in our raw buffer
9511+    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED &&
9512+        ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
9513+        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
9514+
9515+    ctrls[0].value = ctx->decode_mode;
9516+    ctrls[1].value = ctx->start_code;
9517+
9518+    ret = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, NULL, ctrls, FF_ARRAY_ELEMS(ctrls));
9519+    return !ret ? 0 : AVERROR(-ret);
9520+}
9521+
9522+static void v4l2_req_frame_free(void *opaque, uint8_t *data)
9523+{
9524+    AVCodecContext *avctx = opaque;
9525+    V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)data;
9526+
9527+    av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p\n", __func__, avctx, data);
9528+
9529+    qent_dst_unref(&rd->qe_dst);
9530+
9531+    // We don't expect req or qe_src to be set
9532+    if (rd->req || rd->qe_src)
9533+        av_log(NULL, AV_LOG_ERROR, "%s: qe_src %p or req %p not NULL\n", __func__, rd->req, rd->qe_src);
9534+
9535+    av_freep(&rd->slices);
9536+    av_freep(&rd->slice_params);
9537+    av_freep(&rd->offsets);
9538+
9539+    av_free(rd);
9540+}
9541+
9542+static AVBufferRef *v4l2_req_frame_alloc(void *opaque, int size)
9543+{
9544+    AVCodecContext *avctx = opaque;
9545+//    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
9546+//    V4L2MediaReqDescriptor *req;
9547+    AVBufferRef *ref;
9548+    uint8_t *data;
9549+//    int ret;
9550+
9551+    data = av_mallocz(size);
9552+    if (!data)
9553+        return NULL;
9554+
9555+    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data);
9556+    ref = av_buffer_create(data, size, v4l2_req_frame_free, avctx, 0);
9557+    if (!ref) {
9558+        av_freep(&data);
9559+        return NULL;
9560+    }
9561+    return ref;
9562+}
9563+
9564+#if 0
9565+static void v4l2_req_pool_free(void *opaque)
9566+{
9567+    av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque);
9568+}
9569+
9570+static void v4l2_req_hwframe_ctx_free(AVHWFramesContext *hwfc)
9571+{
9572+    av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool);
9573+
9574+    av_buffer_pool_uninit(&hwfc->pool);
9575+}
9576+#endif
9577+
9578+static int frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
9579+{
9580+    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
9581+    AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data;
9582+    const struct v4l2_format *vfmt = mediabufs_dst_fmt(ctx->mbufs);
9583+
9584+    hwfc->format = AV_PIX_FMT_DRM_PRIME;
9585+    hwfc->sw_format = pixel_format_from_format(vfmt);
9586+    if (V4L2_TYPE_IS_MULTIPLANAR(vfmt->type)) {
9587+        hwfc->width = vfmt->fmt.pix_mp.width;
9588+        hwfc->height = vfmt->fmt.pix_mp.height;
9589+    } else {
9590+        hwfc->width = vfmt->fmt.pix.width;
9591+        hwfc->height = vfmt->fmt.pix.height;
9592+    }
9593+#if 0
9594+    hwfc->pool = av_buffer_pool_init2(sizeof(V4L2MediaReqDescriptor), avctx, v4l2_req_frame_alloc, v4l2_req_pool_free);
9595+    if (!hwfc->pool)
9596+        return AVERROR(ENOMEM);
9597+
9598+    hwfc->free = v4l2_req_hwframe_ctx_free;
9599+
9600+    hwfc->initial_pool_size = 1;
9601+
9602+    switch (avctx->codec_id) {
9603+    case AV_CODEC_ID_VP9:
9604+        hwfc->initial_pool_size += 8;
9605+        break;
9606+    case AV_CODEC_ID_VP8:
9607+        hwfc->initial_pool_size += 3;
9608+        break;
9609+    default:
9610+        hwfc->initial_pool_size += 2;
9611+    }
9612+#endif
9613+    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size);
9614+
9615+    return 0;
9616+}
9617+
9618+static int alloc_frame(AVCodecContext * avctx, AVFrame *frame)
9619+{
9620+    int rv;
9621+
9622+    frame->buf[0] = v4l2_req_frame_alloc(avctx, sizeof(V4L2MediaReqDescriptor));
9623+    if (!frame->buf[0])
9624+        return AVERROR(ENOMEM);
9625+
9626+    frame->data[0] = frame->buf[0]->data;
9627+
9628+    frame->hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx);
9629+
9630+    if ((rv = ff_attach_decode_data(frame)) != 0) {
9631+        av_log(avctx, AV_LOG_ERROR, "Failed to attach decode data to frame\n");
9632+        av_frame_unref(frame);
9633+        return rv;
9634+    }
9635+
9636+    return 0;
9637+}
9638+
9639+const v4l2_req_decode_fns V(ff_v4l2_req_hevc) = {
9640+    .src_pix_fmt_v4l2 = V4L2_PIX_FMT_HEVC_SLICE,
9641+    .name = "V4L2 HEVC stateless V" STR(HEVC_CTRLS_VERSION),
9642+    .probe = probe,
9643+    .set_controls = set_controls,
9644+
9645+    .start_frame    = v4l2_request_hevc_start_frame,
9646+    .decode_slice   = v4l2_request_hevc_decode_slice,
9647+    .end_frame      = v4l2_request_hevc_end_frame,
9648+    .abort_frame    = v4l2_request_hevc_abort_frame,
9649+    .frame_params   = frame_params,
9650+    .alloc_frame    = alloc_frame,
9651+};
9652+
9653--- /dev/null
9654+++ b/libavcodec/v4l2_req_media.c
9655@@ -0,0 +1,1808 @@
9656+/*
9657+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
9658+ *
9659+ * Permission is hereby granted, free of charge, to any person obtaining a
9660+ * copy of this software and associated documentation files (the
9661+ * "Software"), to deal in the Software without restriction, including
9662+ * without limitation the rights to use, copy, modify, merge, publish,
9663+ * distribute, sub license, and/or sell copies of the Software, and to
9664+ * permit persons to whom the Software is furnished to do so, subject to
9665+ * the following conditions:
9666+ *
9667+ * The above copyright notice and this permission notice (including the
9668+ * next paragraph) shall be included in all copies or substantial portions
9669+ * of the Software.
9670+ *
9671+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
9672+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
9673+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
9674+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
9675+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
9676+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
9677+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9678+ */
9679+
9680+#include <errno.h>
9681+#include <fcntl.h>
9682+#include <poll.h>
9683+#include <pthread.h>
9684+#include <semaphore.h>
9685+#include <stdatomic.h>
9686+#include <stdbool.h>
9687+#include <stdlib.h>
9688+#include <string.h>
9689+#include <unistd.h>
9690+#include <linux/media.h>
9691+#include <linux/mman.h>
9692+#include <sys/ioctl.h>
9693+#include <sys/select.h>
9694+#include <sys/ioctl.h>
9695+#include <sys/mman.h>
9696+
9697+#include <linux/videodev2.h>
9698+
9699+#include "v4l2_req_dmabufs.h"
9700+#include "v4l2_req_media.h"
9701+#include "v4l2_req_pollqueue.h"
9702+#include "v4l2_req_utils.h"
9703+#include "weak_link.h"
9704+
9705+
9706+/* floor(log2(x)) */
9707+static unsigned int log2_size(size_t x)
9708+{
9709+    unsigned int n = 0;
9710+
9711+    if (x & ~0xffff) {
9712+        n += 16;
9713+        x >>= 16;
9714+    }
9715+    if (x & ~0xff) {
9716+        n += 8;
9717+        x >>= 8;
9718+    }
9719+    if (x & ~0xf) {
9720+        n += 4;
9721+        x >>= 4;
9722+    }
9723+    if (x & ~3) {
9724+        n += 2;
9725+        x >>= 2;
9726+    }
9727+    return (x & ~1) ? n + 1 : n;
9728+}
9729+
9730+static size_t round_up_size(const size_t x)
9731+{
9732+    /* Admit no size < 256 */
9733+    const unsigned int n = x < 256 ? 8 : log2_size(x) - 1;
9734+
9735+    return x >= (3 << n) ? 4 << n : (3 << n);
9736+}
9737+
9738+struct media_request;
9739+
9740+struct media_pool {
9741+    int fd;
9742+    sem_t sem;
9743+    pthread_mutex_t lock;
9744+    unsigned int pool_n;
9745+    struct media_request * pool_reqs;
9746+    struct media_request * free_reqs;
9747+    struct pollqueue * pq;
9748+};
9749+
9750+struct media_request {
9751+    struct media_request * next;
9752+    struct media_pool * mp;
9753+    int fd;
9754+    struct polltask * pt;
9755+};
9756+
9757+static inline enum v4l2_memory
9758+mediabufs_memory_to_v4l2(const enum mediabufs_memory m)
9759+{
9760+    return (enum v4l2_memory)m;
9761+}
9762+
9763+const char *
9764+mediabufs_memory_name(const enum mediabufs_memory m)
9765+{
9766+    switch (m) {
9767+    case MEDIABUFS_MEMORY_UNSET:
9768+        return "Unset";
9769+    case MEDIABUFS_MEMORY_MMAP:
9770+        return "MMap";
9771+    case MEDIABUFS_MEMORY_USERPTR:
9772+        return "UserPtr";
9773+    case MEDIABUFS_MEMORY_OVERLAY:
9774+        return "Overlay";
9775+    case MEDIABUFS_MEMORY_DMABUF:
9776+        return "DMABuf";
9777+    default:
9778+        break;
9779+    }
9780+    return "Unknown";
9781+}
9782+
9783+
9784+static inline int do_trywait(sem_t *const sem)
9785+{
9786+    while (sem_trywait(sem)) {
9787+        if (errno != EINTR)
9788+            return -errno;
9789+    }
9790+    return 0;
9791+}
9792+
9793+static inline int do_wait(sem_t *const sem)
9794+{
9795+    while (sem_wait(sem)) {
9796+        if (errno != EINTR)
9797+            return -errno;
9798+    }
9799+    return 0;
9800+}
9801+
9802+static int request_buffers(int video_fd, unsigned int type,
9803+                           enum mediabufs_memory memory, unsigned int buffers_count)
9804+{
9805+    struct v4l2_requestbuffers buffers;
9806+    int rc;
9807+
9808+    memset(&buffers, 0, sizeof(buffers));
9809+    buffers.type = type;
9810+    buffers.memory = mediabufs_memory_to_v4l2(memory);
9811+    buffers.count = buffers_count;
9812+
9813+    rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers);
9814+    if (rc < 0) {
9815+        rc = -errno;
9816+        request_log("Unable to request %d type %d buffers: %s\n", buffers_count, type, strerror(-rc));
9817+        return rc;
9818+    }
9819+
9820+    return 0;
9821+}
9822+
9823+
9824+static int set_stream(int video_fd, unsigned int type, bool enable)
9825+{
9826+    enum v4l2_buf_type buf_type = type;
9827+    int rc;
9828+
9829+    rc = ioctl(video_fd, enable ? VIDIOC_STREAMON : VIDIOC_STREAMOFF,
9830+           &buf_type);
9831+    if (rc < 0) {
9832+        rc = -errno;
9833+        request_log("Unable to %sable stream: %s\n",
9834+                enable ? "en" : "dis", strerror(-rc));
9835+        return rc;
9836+    }
9837+
9838+    return 0;
9839+}
9840+
9841+
9842+
9843+struct media_request * media_request_get(struct media_pool * const mp)
9844+{
9845+    struct media_request *req = NULL;
9846+
9847+    /* Timeout handled by poll code */
9848+    if (do_wait(&mp->sem))
9849+        return NULL;
9850+
9851+    pthread_mutex_lock(&mp->lock);
9852+    req = mp->free_reqs;
9853+    if (req) {
9854+        mp->free_reqs = req->next;
9855+        req->next = NULL;
9856+    }
9857+    pthread_mutex_unlock(&mp->lock);
9858+    return req;
9859+}
9860+
9861+int media_request_fd(const struct media_request * const req)
9862+{
9863+    return req->fd;
9864+}
9865+
9866+int media_request_start(struct media_request * const req)
9867+{
9868+    while (ioctl(req->fd, MEDIA_REQUEST_IOC_QUEUE, NULL) == -1)
9869+    {
9870+        const int err = errno;
9871+        if (err == EINTR)
9872+            continue;
9873+        request_log("%s: Failed to Q media: (%d) %s\n", __func__, err, strerror(err));
9874+        return -err;
9875+    }
9876+
9877+    pollqueue_add_task(req->pt, 2000);
9878+    return 0;
9879+}
9880+
9881+static void media_request_done(void *v, short revents)
9882+{
9883+    struct media_request *const req = v;
9884+    struct media_pool *const mp = req->mp;
9885+
9886+    /* ** Not sure what to do about timeout */
9887+
9888+    if (ioctl(req->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0)
9889+        request_log("Unable to reinit media request: %s\n",
9890+                strerror(errno));
9891+
9892+    pthread_mutex_lock(&mp->lock);
9893+    req->next = mp->free_reqs;
9894+    mp->free_reqs = req;
9895+    pthread_mutex_unlock(&mp->lock);
9896+    sem_post(&mp->sem);
9897+}
9898+
9899+int media_request_abort(struct media_request ** const preq)
9900+{
9901+    struct media_request * const req = *preq;
9902+
9903+    if (req == NULL)
9904+        return 0;
9905+    *preq = NULL;
9906+
9907+    media_request_done(req, 0);
9908+    return 0;
9909+}
9910+
9911+static void free_req_pool(struct media_request * const pool, const unsigned int n)
9912+{
9913+    unsigned int i;
9914+    for (i = 0; i != n; ++i) {
9915+        struct media_request * const req = pool + i;
9916+        if (req->pt)
9917+            polltask_delete(&req->pt);
9918+        if (req->fd != -1)
9919+            close(req->fd);
9920+    }
9921+    free(pool);
9922+}
9923+
9924+struct media_pool * media_pool_new(const char * const media_path,
9925+                   struct pollqueue * const pq,
9926+                   const unsigned int n)
9927+{
9928+    struct media_pool * const mp = calloc(1, sizeof(*mp));
9929+    unsigned int i;
9930+
9931+    if (!mp)
9932+        goto fail0;
9933+
9934+    mp->pq = pq;
9935+    pthread_mutex_init(&mp->lock, NULL);
9936+    mp->fd = open(media_path, O_RDWR | O_NONBLOCK);
9937+    if (mp->fd == -1) {
9938+        request_log("Failed to open '%s': %s\n", media_path, strerror(errno));
9939+        goto fail1;
9940+    }
9941+
9942+    if ((mp->pool_reqs = calloc(n, sizeof(*mp->pool_reqs))) == NULL)
9943+        goto fail3;
9944+    mp->pool_n = n;
9945+    for (i = 0; i != n; ++i) {
9946+        mp->pool_reqs[i].mp = mp;
9947+        mp->pool_reqs[i].fd = -1;
9948+    }
9949+
9950+    for (i = 0; i != n; ++i) {
9951+        struct media_request * const req = mp->pool_reqs + i;
9952+
9953+        if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) {
9954+            request_log("Failed to alloc request %d: %s\n", i, strerror(errno));
9955+            goto fail4;
9956+        }
9957+
9958+        req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req);
9959+        if (!req->pt)
9960+            goto fail4;
9961+
9962+        req->next = mp->free_reqs,
9963+        mp->free_reqs = req;
9964+    }
9965+
9966+    sem_init(&mp->sem, 0, n);
9967+
9968+    return mp;
9969+
9970+fail4:
9971+    free_req_pool(mp->pool_reqs, mp->pool_n);
9972+fail3:
9973+    close(mp->fd);
9974+    pthread_mutex_destroy(&mp->lock);
9975+fail1:
9976+    free(mp);
9977+fail0:
9978+    return NULL;
9979+}
9980+
9981+void media_pool_delete(struct media_pool ** pMp)
9982+{
9983+    struct media_pool * const mp = *pMp;
9984+
9985+    if (!mp)
9986+        return;
9987+    *pMp = NULL;
9988+
9989+    free_req_pool(mp->pool_reqs, mp->pool_n);
9990+    close(mp->fd);
9991+    sem_destroy(&mp->sem);
9992+    pthread_mutex_destroy(&mp->lock);
9993+    free(mp);
9994+}
9995+
9996+
9997+#define INDEX_UNSET (~(uint32_t)0)
9998+
9999+enum qent_status {
10000+    QENT_NEW = 0,       // Initial state - shouldn't last
10001+    QENT_FREE,          // On free chain
10002+    QENT_PENDING,       // User has ent
10003+    QENT_WAITING,       // On inuse
10004+    QENT_DONE,          // Frame rx
10005+    QENT_ERROR,         // Error
10006+    QENT_IMPORT
10007+};
10008+
10009+struct qent_base {
10010+    atomic_int ref_count;
10011+    struct qent_base *next;
10012+    struct qent_base *prev;
10013+    enum qent_status status;
10014+    enum mediabufs_memory memtype;
10015+    uint32_t index;
10016+    struct dmabuf_h *dh[VIDEO_MAX_PLANES];
10017+    struct timeval timestamp;
10018+};
10019+
10020+struct qent_src {
10021+    struct qent_base base;
10022+    int fixed_size;
10023+};
10024+
10025+struct qent_dst {
10026+    struct qent_base base;
10027+    bool waiting;
10028+    pthread_mutex_t lock;
10029+    pthread_cond_t cond;
10030+    struct ff_weak_link_client * mbc_wl;
10031+};
10032+
10033+struct qe_list_head {
10034+    struct qent_base *head;
10035+    struct qent_base *tail;
10036+};
10037+
10038+struct buf_pool {
10039+    enum mediabufs_memory memtype;
10040+    pthread_mutex_t lock;
10041+    sem_t free_sem;
10042+    struct qe_list_head free;
10043+    struct qe_list_head inuse;
10044+};
10045+
10046+
10047+static inline struct qent_dst *base_to_dst(struct qent_base *be)
10048+{
10049+    return (struct qent_dst *)be;
10050+}
10051+
10052+static inline struct qent_src *base_to_src(struct qent_base *be)
10053+{
10054+    return (struct qent_src *)be;
10055+}
10056+
10057+
10058+#define QENT_BASE_INITIALIZER(mtype) {\
10059+    .ref_count = ATOMIC_VAR_INIT(0),\
10060+    .status = QENT_NEW,\
10061+    .memtype = (mtype),\
10062+    .index  = INDEX_UNSET\
10063+}
10064+
10065+static void qe_base_uninit(struct qent_base *const be)
10066+{
10067+    unsigned int i;
10068+    for (i = 0; i != VIDEO_MAX_PLANES; ++i) {
10069+        dmabuf_free(be->dh[i]);
10070+        be->dh[i] = NULL;
10071+    }
10072+}
10073+
10074+static void qe_src_free(struct qent_src *const be_src)
10075+{
10076+    if (!be_src)
10077+        return;
10078+    qe_base_uninit(&be_src->base);
10079+    free(be_src);
10080+}
10081+
10082+static struct qent_src * qe_src_new(enum mediabufs_memory mtype)
10083+{
10084+    struct qent_src *const be_src = malloc(sizeof(*be_src));
10085+    if (!be_src)
10086+        return NULL;
10087+    *be_src = (struct qent_src){
10088+        .base = QENT_BASE_INITIALIZER(mtype)
10089+    };
10090+    return be_src;
10091+}
10092+
10093+static void qe_dst_free(struct qent_dst *const be_dst)
10094+{
10095+    if (!be_dst)
10096+        return;
10097+
10098+    ff_weak_link_unref(&be_dst->mbc_wl);
10099+    pthread_cond_destroy(&be_dst->cond);
10100+    pthread_mutex_destroy(&be_dst->lock);
10101+    qe_base_uninit(&be_dst->base);
10102+    free(be_dst);
10103+}
10104+
10105+static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl, const enum mediabufs_memory memtype)
10106+{
10107+    struct qent_dst *const be_dst = malloc(sizeof(*be_dst));
10108+    if (!be_dst)
10109+        return NULL;
10110+    *be_dst = (struct qent_dst){
10111+        .base = QENT_BASE_INITIALIZER(memtype),
10112+        .lock = PTHREAD_MUTEX_INITIALIZER,
10113+        .cond = PTHREAD_COND_INITIALIZER,
10114+        .mbc_wl = ff_weak_link_ref(wl)
10115+    };
10116+    return be_dst;
10117+}
10118+
10119+static void ql_add_tail(struct qe_list_head * const ql, struct qent_base * be)
10120+{
10121+    if (ql->tail)
10122+        ql->tail->next = be;
10123+    else
10124+        ql->head = be;
10125+    be->prev = ql->tail;
10126+    be->next = NULL;
10127+    ql->tail = be;
10128+}
10129+
10130+static struct qent_base * ql_extract(struct qe_list_head * const ql, struct qent_base * be)
10131+{
10132+    if (!be)
10133+        return NULL;
10134+
10135+    if (be->next)
10136+        be->next->prev = be->prev;
10137+    else
10138+        ql->tail = be->prev;
10139+    if (be->prev)
10140+        be->prev->next = be->next;
10141+    else
10142+        ql->head = be->next;
10143+    be->next = NULL;
10144+    be->prev = NULL;
10145+    return be;
10146+}
10147+
10148+
10149+static void bq_put_free(struct buf_pool *const bp, struct qent_base * be)
10150+{
10151+    ql_add_tail(&bp->free, be);
10152+}
10153+
10154+static struct qent_base * bq_get_free(struct buf_pool *const bp)
10155+{
10156+    return ql_extract(&bp->free, bp->free.head);
10157+}
10158+
10159+static struct qent_base * bq_extract_inuse(struct buf_pool *const bp, struct qent_base *const be)
10160+{
10161+    return ql_extract(&bp->inuse, be);
10162+}
10163+
10164+static struct qent_base * bq_get_inuse(struct buf_pool *const bp)
10165+{
10166+    return ql_extract(&bp->inuse, bp->inuse.head);
10167+}
10168+
10169+static void bq_free_all_free_src(struct buf_pool *const bp)
10170+{
10171+    struct qent_base *be;
10172+    while ((be = bq_get_free(bp)) != NULL)
10173+        qe_src_free(base_to_src(be));
10174+}
10175+
10176+static void bq_free_all_inuse_src(struct buf_pool *const bp)
10177+{
10178+    struct qent_base *be;
10179+    while ((be = bq_get_inuse(bp)) != NULL)
10180+        qe_src_free(base_to_src(be));
10181+}
10182+
10183+static void bq_free_all_free_dst(struct buf_pool *const bp)
10184+{
10185+    struct qent_base *be;
10186+    while ((be = bq_get_free(bp)) != NULL)
10187+        qe_dst_free(base_to_dst(be));
10188+}
10189+
10190+static void queue_put_free(struct buf_pool *const bp, struct qent_base *be)
10191+{
10192+    unsigned int i;
10193+
10194+    pthread_mutex_lock(&bp->lock);
10195+    /* Clear out state vars */
10196+    be->timestamp.tv_sec = 0;
10197+    be->timestamp.tv_usec = 0;
10198+    be->status = QENT_FREE;
10199+    for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i)
10200+        dmabuf_len_set(be->dh[i], 0);
10201+    bq_put_free(bp, be);
10202+    pthread_mutex_unlock(&bp->lock);
10203+    sem_post(&bp->free_sem);
10204+}
10205+
10206+static bool queue_is_inuse(const struct buf_pool *const bp)
10207+{
10208+    return bp->inuse.tail != NULL;
10209+}
10210+
10211+static void queue_put_inuse(struct buf_pool *const bp, struct qent_base *be)
10212+{
10213+    if (!be)
10214+        return;
10215+    pthread_mutex_lock(&bp->lock);
10216+    ql_add_tail(&bp->inuse, be);
10217+    be->status = QENT_WAITING;
10218+    pthread_mutex_unlock(&bp->lock);
10219+}
10220+
10221+static struct qent_base *queue_get_free(struct buf_pool *const bp)
10222+{
10223+    struct qent_base *buf;
10224+
10225+    if (do_wait(&bp->free_sem))
10226+        return NULL;
10227+    pthread_mutex_lock(&bp->lock);
10228+    buf = bq_get_free(bp);
10229+    pthread_mutex_unlock(&bp->lock);
10230+    return buf;
10231+}
10232+
10233+static struct qent_base *queue_tryget_free(struct buf_pool *const bp)
10234+{
10235+    struct qent_base *buf;
10236+
10237+    if (do_trywait(&bp->free_sem))
10238+        return NULL;
10239+    pthread_mutex_lock(&bp->lock);
10240+    buf = bq_get_free(bp);
10241+    pthread_mutex_unlock(&bp->lock);
10242+    return buf;
10243+}
10244+
10245+static struct qent_base * queue_find_extract_index(struct buf_pool *const bp, const unsigned int index)
10246+{
10247+    struct qent_base *be;
10248+
10249+    pthread_mutex_lock(&bp->lock);
10250+    /* Expect 1st in Q, but allow anywhere */
10251+    for (be = bp->inuse.head; be; be = be->next) {
10252+        if (be->index == index) {
10253+            bq_extract_inuse(bp, be);
10254+            break;
10255+        }
10256+    }
10257+    pthread_mutex_unlock(&bp->lock);
10258+
10259+    return be;
10260+}
10261+
10262+static void queue_delete(struct buf_pool *const bp)
10263+{
10264+    sem_destroy(&bp->free_sem);
10265+    pthread_mutex_destroy(&bp->lock);
10266+    free(bp);
10267+}
10268+
10269+static struct buf_pool* queue_new(const int vfd)
10270+{
10271+    struct buf_pool *bp = calloc(1, sizeof(*bp));
10272+    if (!bp)
10273+        return NULL;
10274+    pthread_mutex_init(&bp->lock, NULL);
10275+    sem_init(&bp->free_sem, 0, 0);
10276+    return bp;
10277+}
10278+
10279+
10280+struct mediabufs_ctl {
10281+    atomic_int ref_count;  /* 0 is single ref for easier atomics */
10282+    void * dc;
10283+    int vfd;
10284+    bool stream_on;
10285+    bool polling;
10286+    bool dst_fixed;             // Dst Q is fixed size
10287+    pthread_mutex_t lock;
10288+    struct buf_pool * src;
10289+    struct buf_pool * dst;
10290+    struct polltask * pt;
10291+    struct pollqueue * pq;
10292+    struct ff_weak_link_master * this_wlm;
10293+
10294+    enum mediabufs_memory src_memtype;
10295+    enum mediabufs_memory dst_memtype;
10296+    struct v4l2_format src_fmt;
10297+    struct v4l2_format dst_fmt;
10298+    struct v4l2_capability capability;
10299+};
10300+
10301+static int qe_v4l2_queue(struct qent_base *const be,
10302+               const int vfd, struct media_request *const mreq,
10303+               const struct v4l2_format *const fmt,
10304+               const bool is_dst, const bool hold_flag)
10305+{
10306+    struct v4l2_buffer buffer = {
10307+        .type = fmt->type,
10308+        .memory = mediabufs_memory_to_v4l2(be->memtype),
10309+        .index = be->index
10310+    };
10311+    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
10312+
10313+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
10314+        unsigned int i;
10315+        for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) {
10316+            if (is_dst)
10317+                dmabuf_len_set(be->dh[i], 0);
10318+
10319+            /* *** Really need a pixdesc rather than a format so we can fill in data_offset */
10320+            planes[i].length = dmabuf_size(be->dh[i]);
10321+            planes[i].bytesused = dmabuf_len(be->dh[i]);
10322+            if (be->memtype == MEDIABUFS_MEMORY_DMABUF)
10323+                planes[i].m.fd = dmabuf_fd(be->dh[i]);
10324+            else
10325+                planes[i].m.mem_offset = 0;
10326+        }
10327+        buffer.m.planes = planes;
10328+        buffer.length = i;
10329+    }
10330+    else {
10331+        if (is_dst)
10332+            dmabuf_len_set(be->dh[0], 0);
10333+
10334+        buffer.bytesused = dmabuf_len(be->dh[0]);
10335+        buffer.length = dmabuf_size(be->dh[0]);
10336+        if (be->memtype == MEDIABUFS_MEMORY_DMABUF)
10337+            buffer.m.fd = dmabuf_fd(be->dh[0]);
10338+        else
10339+            buffer.m.offset = 0;
10340+    }
10341+
10342+    if (!is_dst && mreq) {
10343+        buffer.flags |= V4L2_BUF_FLAG_REQUEST_FD;
10344+        buffer.request_fd = media_request_fd(mreq);
10345+        if (hold_flag)
10346+            buffer.flags |= V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
10347+    }
10348+
10349+    if (is_dst)
10350+        be->timestamp = (struct timeval){0,0};
10351+
10352+    buffer.timestamp = be->timestamp;
10353+
10354+    while (ioctl(vfd, VIDIOC_QBUF, &buffer)) {
10355+        const int err = errno;
10356+        if (err != EINTR) {
10357+            request_log("%s: Failed to Q buffer: err=%d (%s)\n", __func__, err, strerror(err));
10358+            return -err;
10359+        }
10360+    }
10361+    return 0;
10362+}
10363+
10364+static struct qent_base * qe_dequeue(struct buf_pool *const bp,
10365+                     const int vfd,
10366+                     const struct v4l2_format * const f)
10367+{
10368+    struct qent_base *be;
10369+    int rc;
10370+    const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type);
10371+    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
10372+    struct v4l2_buffer buffer = {
10373+        .type =  f->type,
10374+        .memory = mediabufs_memory_to_v4l2(bp->memtype)
10375+    };
10376+    if (mp) {
10377+        buffer.length = f->fmt.pix_mp.num_planes;
10378+        buffer.m.planes = planes;
10379+    }
10380+
10381+    while ((rc = ioctl(vfd, VIDIOC_DQBUF, &buffer)) != 0 &&
10382+           errno == EINTR)
10383+        /* Loop */;
10384+    if (rc) {
10385+        request_log("Error DQing buffer type %d: %s\n", f->type, strerror(errno));
10386+        return NULL;
10387+    }
10388+
10389+    be = queue_find_extract_index(bp, buffer.index);
10390+    if (!be) {
10391+        request_log("Failed to find index %d in Q\n", buffer.index);
10392+        return NULL;
10393+    }
10394+
10395+    if (mp) {
10396+        unsigned int i;
10397+        for (i = 0; i != buffer.length; ++i)
10398+            dmabuf_len_set(be->dh[i], V4L2_TYPE_IS_CAPTURE(f->type) ? planes[i].bytesused : 0);
10399+    }
10400+    else
10401+        dmabuf_len_set(be->dh[0], V4L2_TYPE_IS_CAPTURE(f->type) ? buffer.length : 0);
10402+
10403+    be->timestamp = buffer.timestamp;
10404+    be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE;
10405+    return be;
10406+}
10407+
10408+static void qe_dst_done(struct qent_dst * dst_be)
10409+{
10410+    pthread_mutex_lock(&dst_be->lock);
10411+    dst_be->waiting = false;
10412+    pthread_cond_broadcast(&dst_be->cond);
10413+    pthread_mutex_unlock(&dst_be->lock);
10414+
10415+    qent_dst_unref(&dst_be);
10416+}
10417+
10418+static bool qe_dst_waiting(struct qent_dst *const dst_be)
10419+{
10420+    bool waiting;
10421+    pthread_mutex_lock(&dst_be->lock);
10422+    waiting = dst_be->waiting;
10423+    dst_be->waiting = true;
10424+    pthread_mutex_unlock(&dst_be->lock);
10425+    return waiting;
10426+}
10427+
10428+
10429+static bool mediabufs_wants_poll(const struct mediabufs_ctl *const mbc)
10430+{
10431+    return queue_is_inuse(mbc->src) || queue_is_inuse(mbc->dst);
10432+}
10433+
10434+static void mediabufs_poll_cb(void * v, short revents)
10435+{
10436+    struct mediabufs_ctl *mbc = v;
10437+    struct qent_src *src_be = NULL;
10438+    struct qent_dst *dst_be = NULL;
10439+
10440+    if (!revents)
10441+        request_err(mbc->dc, "%s: Timeout\n", __func__);
10442+
10443+    pthread_mutex_lock(&mbc->lock);
10444+    mbc->polling = false;
10445+
10446+    if ((revents & POLLOUT) != 0)
10447+        src_be = base_to_src(qe_dequeue(mbc->src, mbc->vfd, &mbc->src_fmt));
10448+    if ((revents & POLLIN) != 0)
10449+        dst_be = base_to_dst(qe_dequeue(mbc->dst, mbc->vfd, &mbc->dst_fmt));
10450+
10451+    /* Reschedule */
10452+    if (mediabufs_wants_poll(mbc)) {
10453+        mbc->polling = true;
10454+        pollqueue_add_task(mbc->pt, 2000);
10455+    }
10456+    pthread_mutex_unlock(&mbc->lock);
10457+
10458+    if (src_be)
10459+        queue_put_free(mbc->src, &src_be->base);
10460+    if (dst_be)
10461+        qe_dst_done(dst_be);
10462+}
10463+
10464+int qent_src_params_set(struct qent_src *const be_src, const struct timeval * timestamp)
10465+{
10466+    struct qent_base *const be = &be_src->base;
10467+
10468+    be->timestamp = *timestamp;
10469+    return 0;
10470+}
10471+
10472+struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst)
10473+{
10474+    return be_dst->base.timestamp;
10475+}
10476+
10477+static int qent_base_realloc(struct qent_base *const be, const size_t len, struct dmabufs_ctl * dbsc)
10478+{
10479+    if (!be->dh[0] || len > dmabuf_size(be->dh[0])) {
10480+        size_t newsize = round_up_size(len);
10481+        request_log("%s: Overrun %zd > %zd; trying %zd\n", __func__, len, dmabuf_size(be->dh[0]), newsize);
10482+        if (!dbsc) {
10483+            request_log("%s: No dmbabuf_ctrl for realloc\n", __func__);
10484+            return -ENOMEM;
10485+        }
10486+        if ((be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], newsize)) == NULL) {
10487+            request_log("%s: Realloc %zd failed\n", __func__, newsize);
10488+            return -ENOMEM;
10489+        }
10490+    }
10491+    return 0;
10492+}
10493+
10494+int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc)
10495+{
10496+    struct qent_base *const be = &be_src->base;
10497+    return qent_base_realloc(be, len, dbsc);
10498+}
10499+
10500+
10501+int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc)
10502+{
10503+    void * dst;
10504+    struct qent_base *const be = &be_src->base;
10505+    int rv;
10506+
10507+    // Realloc doesn't copy so don't alloc if offset != 0
10508+    if ((rv = qent_base_realloc(be, offset + len,
10509+                                be_src->fixed_size || offset ? NULL : dbsc)) != 0)
10510+        return rv;
10511+
10512+    dmabuf_write_start(be->dh[0]);
10513+    dst = dmabuf_map(be->dh[0]);
10514+    if (!dst)
10515+        return -1;
10516+    memcpy((char*)dst + offset, src, len);
10517+    dmabuf_len_set(be->dh[0], len);
10518+    dmabuf_write_end(be->dh[0]);
10519+    return 0;
10520+}
10521+
10522+const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be_dst, unsigned int plane)
10523+{
10524+    const struct qent_base *const be = &be_dst->base;
10525+
10526+    return (plane >= sizeof(be->dh)/sizeof(be->dh[0])) ? NULL : be->dh[plane];
10527+}
10528+
10529+int qent_dst_dup_fd(const struct qent_dst *const be_dst, unsigned int plane)
10530+{
10531+    return dup(dmabuf_fd(qent_dst_dmabuf(be_dst, plane)));
10532+}
10533+
10534+MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
10535+                struct media_request **const pmreq,
10536+                struct qent_src **const psrc_be,
10537+                struct qent_dst *const dst_be,
10538+                const bool is_final)
10539+{
10540+    struct media_request * mreq = *pmreq;
10541+    struct qent_src *const src_be = *psrc_be;
10542+
10543+    // Req & src are always both "consumed"
10544+    *pmreq = NULL;
10545+    *psrc_be = NULL;
10546+
10547+    pthread_mutex_lock(&mbc->lock);
10548+
10549+    if (!src_be)
10550+        goto fail1;
10551+
10552+    if (dst_be) {
10553+        if (qe_dst_waiting(dst_be)) {
10554+            request_info(mbc->dc, "Request buffer already waiting on start\n");
10555+            goto fail1;
10556+        }
10557+        dst_be->base.timestamp = (struct timeval){0,0};
10558+        if (qe_v4l2_queue(&dst_be->base, mbc->vfd, NULL, &mbc->dst_fmt, true, false))
10559+            goto fail1;
10560+
10561+        qent_dst_ref(dst_be);
10562+        queue_put_inuse(mbc->dst, &dst_be->base);
10563+    }
10564+
10565+    if (qe_v4l2_queue(&src_be->base, mbc->vfd, mreq, &mbc->src_fmt, false, !is_final))
10566+        goto fail1;
10567+    queue_put_inuse(mbc->src, &src_be->base);
10568+
10569+    if (!mbc->polling && mediabufs_wants_poll(mbc)) {
10570+        mbc->polling = true;
10571+        pollqueue_add_task(mbc->pt, 2000);
10572+    }
10573+    pthread_mutex_unlock(&mbc->lock);
10574+
10575+    if (media_request_start(mreq))
10576+        return MEDIABUFS_ERROR_OPERATION_FAILED;
10577+
10578+    return MEDIABUFS_STATUS_SUCCESS;
10579+
10580+fail1:
10581+    media_request_abort(&mreq);
10582+    if (src_be)
10583+        queue_put_free(mbc->src, &src_be->base);
10584+
10585+// *** TODO: If src Q fails this doesnt unwind properly - separate dst Q from src Q
10586+    if (dst_be) {
10587+        dst_be->base.status = QENT_ERROR;
10588+        qe_dst_done(dst_be);
10589+    }
10590+    pthread_mutex_unlock(&mbc->lock);
10591+    return MEDIABUFS_ERROR_OPERATION_FAILED;
10592+}
10593+
10594+
10595+static int qe_alloc_from_fmt(struct qent_base *const be,
10596+                   struct dmabufs_ctl *const dbsc,
10597+                   const struct v4l2_format *const fmt)
10598+{
10599+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
10600+        unsigned int i;
10601+        for (i = 0; i != fmt->fmt.pix_mp.num_planes; ++i) {
10602+            be->dh[i] = dmabuf_realloc(dbsc, be->dh[i],
10603+                fmt->fmt.pix_mp.plane_fmt[i].sizeimage);
10604+            /* On failure tidy up and die */
10605+            if (!be->dh[i]) {
10606+                while (i--) {
10607+                    dmabuf_free(be->dh[i]);
10608+                    be->dh[i] = NULL;
10609+                }
10610+                return -1;
10611+            }
10612+        }
10613+    }
10614+    else {
10615+//      be->dh[0] = dmabuf_alloc(dbsc, fmt->fmt.pix.sizeimage);
10616+        size_t size = fmt->fmt.pix.sizeimage;
10617+        be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], size);
10618+        if (!be->dh[0])
10619+            return -1;
10620+    }
10621+    return 0;
10622+}
10623+
10624+static MediaBufsStatus fmt_set(struct v4l2_format *const fmt, const int fd,
10625+            const enum v4l2_buf_type buftype,
10626+            uint32_t pixfmt,
10627+            const unsigned int width, const unsigned int height,
10628+                               const size_t bufsize)
10629+{
10630+    *fmt = (struct v4l2_format){.type = buftype};
10631+
10632+    if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
10633+        fmt->fmt.pix_mp.width = width;
10634+        fmt->fmt.pix_mp.height = height;
10635+        fmt->fmt.pix_mp.pixelformat = pixfmt;
10636+        if (bufsize) {
10637+            fmt->fmt.pix_mp.num_planes = 1;
10638+            fmt->fmt.pix_mp.plane_fmt[0].sizeimage = bufsize;
10639+        }
10640+    }
10641+    else {
10642+        fmt->fmt.pix.width = width;
10643+        fmt->fmt.pix.height = height;
10644+        fmt->fmt.pix.pixelformat = pixfmt;
10645+        fmt->fmt.pix.sizeimage = bufsize;
10646+    }
10647+
10648+    while (ioctl(fd, VIDIOC_S_FMT, fmt))
10649+        if (errno != EINTR)
10650+            return MEDIABUFS_ERROR_OPERATION_FAILED;
10651+
10652+    // Treat anything where we don't get at least what we asked for as a fail
10653+    if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
10654+        if (fmt->fmt.pix_mp.width < width ||
10655+            fmt->fmt.pix_mp.height < height ||
10656+            fmt->fmt.pix_mp.pixelformat != pixfmt) {
10657+            return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
10658+        }
10659+    }
10660+    else {
10661+        if (fmt->fmt.pix.width < width ||
10662+            fmt->fmt.pix.height < height ||
10663+            fmt->fmt.pix.pixelformat != pixfmt) {
10664+            return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
10665+        }
10666+    }
10667+
10668+    return MEDIABUFS_STATUS_SUCCESS;
10669+}
10670+
10671+static MediaBufsStatus find_fmt_flags(struct v4l2_format *const fmt,
10672+                   const int fd,
10673+                   const unsigned int type_v4l2,
10674+                   const uint32_t flags_must,
10675+                   const uint32_t flags_not,
10676+                   const unsigned int width,
10677+                   const unsigned int height,
10678+                   mediabufs_dst_fmt_accept_fn *const accept_fn,
10679+                   void *const accept_v)
10680+{
10681+    unsigned int i;
10682+
10683+    for (i = 0;; ++i) {
10684+        struct v4l2_fmtdesc fmtdesc = {
10685+            .index = i,
10686+            .type = type_v4l2
10687+        };
10688+        while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
10689+            if (errno != EINTR)
10690+                return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
10691+        }
10692+        if ((fmtdesc.flags & flags_must) != flags_must ||
10693+            (fmtdesc.flags & flags_not))
10694+            continue;
10695+        if (!accept_fn(accept_v, &fmtdesc))
10696+            continue;
10697+
10698+        if (fmt_set(fmt, fd, fmtdesc.type, fmtdesc.pixelformat,
10699+                width, height, 0) == MEDIABUFS_STATUS_SUCCESS)
10700+            return MEDIABUFS_STATUS_SUCCESS;
10701+    }
10702+    return 0;
10703+}
10704+
10705+
10706+/* Wait for qent done */
10707+
10708+MediaBufsStatus qent_dst_wait(struct qent_dst *const be_dst)
10709+{
10710+    struct qent_base *const be = &be_dst->base;
10711+    enum qent_status estat;
10712+
10713+    pthread_mutex_lock(&be_dst->lock);
10714+    while (be_dst->waiting &&
10715+           !pthread_cond_wait(&be_dst->cond, &be_dst->lock))
10716+        /* Loop */;
10717+    estat = be->status;
10718+    pthread_mutex_unlock(&be_dst->lock);
10719+
10720+    return estat == QENT_DONE ? MEDIABUFS_STATUS_SUCCESS :
10721+        estat == QENT_ERROR ? MEDIABUFS_ERROR_DECODING_ERROR :
10722+            MEDIABUFS_ERROR_OPERATION_FAILED;
10723+}
10724+
10725+const uint8_t * qent_dst_data(struct qent_dst *const be_dst, unsigned int buf_no)
10726+{
10727+    struct qent_base *const be = &be_dst->base;
10728+    return dmabuf_map(be->dh[buf_no]);
10729+}
10730+
10731+MediaBufsStatus qent_dst_read_start(struct qent_dst *const be_dst)
10732+{
10733+    struct qent_base *const be = &be_dst->base;
10734+    unsigned int i;
10735+    for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
10736+        if (dmabuf_read_start(be->dh[i])) {
10737+            while (i--)
10738+                dmabuf_read_end(be->dh[i]);
10739+            return MEDIABUFS_ERROR_ALLOCATION_FAILED;
10740+        }
10741+    }
10742+    return MEDIABUFS_STATUS_SUCCESS;
10743+}
10744+
10745+MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be_dst)
10746+{
10747+    struct qent_base *const be = &be_dst->base;
10748+    unsigned int i;
10749+    MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
10750+
10751+    for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
10752+        if (dmabuf_read_end(be->dh[i]))
10753+            status = MEDIABUFS_ERROR_OPERATION_FAILED;
10754+    }
10755+    return status;
10756+}
10757+
10758+struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst)
10759+{
10760+    if (be_dst)
10761+        atomic_fetch_add(&be_dst->base.ref_count, 1);
10762+    return be_dst;
10763+}
10764+
10765+void qent_dst_unref(struct qent_dst ** const pbe_dst)
10766+{
10767+    struct qent_dst * const be_dst = *pbe_dst;
10768+    struct mediabufs_ctl * mbc;
10769+    if (!be_dst)
10770+        return;
10771+    *pbe_dst = NULL;
10772+
10773+    if (atomic_fetch_sub(&be_dst->base.ref_count, 1) != 0)
10774+        return;
10775+
10776+    if ((mbc = ff_weak_link_lock(&be_dst->mbc_wl)) != NULL) {
10777+        queue_put_free(mbc->dst, &be_dst->base);
10778+        ff_weak_link_unlock(be_dst->mbc_wl);
10779+    }
10780+    else {
10781+        qe_dst_free(be_dst);
10782+    }
10783+}
10784+
10785+MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
10786+                unsigned int plane,
10787+                int fd, size_t size)
10788+{
10789+    struct qent_base *const be = &be_dst->base;
10790+    struct dmabuf_h * dh;
10791+
10792+    if (be->status != QENT_IMPORT || be->dh[plane])
10793+        return MEDIABUFS_ERROR_OPERATION_FAILED;
10794+
10795+    dh = dmabuf_import(fd, size);
10796+    if (!dh)
10797+        return MEDIABUFS_ERROR_ALLOCATION_FAILED;
10798+
10799+    be->dh[plane] = dh;
10800+    return MEDIABUFS_STATUS_SUCCESS;
10801+}
10802+
10803+// Returns noof buffers created, -ve for error
10804+static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, struct qent_dst * const qes[])
10805+{
10806+    unsigned int i;
10807+
10808+    struct v4l2_create_buffers cbuf = {
10809+        .count = n,
10810+        .memory = mediabufs_memory_to_v4l2(mbc->dst->memtype),
10811+        .format = mbc->dst_fmt,
10812+    };
10813+
10814+    while (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf)) {
10815+        const int err = -errno;
10816+        if (err != EINTR) {
10817+            request_err(mbc->dc, "%s: Failed to create V4L2 buffer\n", __func__);
10818+            return -err;
10819+        }
10820+    }
10821+
10822+    if (cbuf.count != n)
10823+        request_warn(mbc->dc, "%s: Created %d of %d V4L2 buffers requested\n", __func__, cbuf.count, n);
10824+
10825+    for (i = 0; i != cbuf.count; ++i)
10826+        qes[i]->base.index = cbuf.index + i;
10827+
10828+    return cbuf.count;
10829+}
10830+
10831+static MediaBufsStatus
10832+qe_import_from_buf(struct mediabufs_ctl *const mbc, struct qent_base * const be, const struct v4l2_format *const fmt,
10833+                   const unsigned int n, const bool x_dmabuf)
10834+{
10835+    struct v4l2_buffer buf = {
10836+        .index = n,
10837+        .type = fmt->type,
10838+    };
10839+    struct v4l2_plane planes[VIDEO_MAX_PLANES];
10840+    int ret;
10841+
10842+    if (be->dh[0])
10843+        return 0;
10844+
10845+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
10846+        memset(planes, 0, sizeof(planes));
10847+        buf.m.planes = planes;
10848+        buf.length = VIDEO_MAX_PLANES;
10849+    }
10850+
10851+    if ((ret = ioctl(mbc->vfd, VIDIOC_QUERYBUF, &buf)) != 0) {
10852+        request_err(mbc->dc, "VIDIOC_QUERYBUF failed");
10853+        return MEDIABUFS_ERROR_OPERATION_FAILED;
10854+    }
10855+
10856+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type))
10857+    {
10858+        unsigned int i;
10859+        for (i = 0; i != buf.length; ++i) {
10860+            if (x_dmabuf) {
10861+                struct v4l2_exportbuffer xbuf = {
10862+                    .type = buf.type,
10863+                    .index = buf.index,
10864+                    .plane = i,
10865+                    .flags = O_RDWR, // *** Arguably O_RDONLY would be fine
10866+                };
10867+                if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) {
10868+                    be->dh[i] = dmabuf_import(xbuf.fd, planes[i].length);
10869+                    close(xbuf.fd); // dmabuf_import dups the fd so close this one
10870+                }
10871+            }
10872+            else {
10873+                be->dh[i] = dmabuf_import_mmap(
10874+                    mmap(NULL, planes[i].length,
10875+                        PROT_READ | PROT_WRITE,
10876+                        MAP_SHARED | MAP_POPULATE,
10877+                        mbc->vfd, planes[i].m.mem_offset),
10878+                    planes[i].length);
10879+            }
10880+            /* On failure tidy up and die */
10881+            if (!be->dh[i]) {
10882+                while (i--) {
10883+                    dmabuf_free(be->dh[i]);
10884+                    be->dh[i] = NULL;
10885+                }
10886+                return MEDIABUFS_ERROR_OPERATION_FAILED;
10887+            }
10888+        }
10889+    }
10890+    else
10891+    {
10892+        if (x_dmabuf) {
10893+            struct v4l2_exportbuffer xbuf = {
10894+                .type = buf.type,
10895+                .index = buf.index,
10896+                .flags = O_RDWR, // *** Arguably O_RDONLY would be fine
10897+            };
10898+            if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0)
10899+                be->dh[0] = dmabuf_import(xbuf.fd, buf.length);
10900+        }
10901+        else {
10902+            be->dh[0] = dmabuf_import_mmap(
10903+                mmap(NULL, buf.length,
10904+                    PROT_READ | PROT_WRITE,
10905+                    MAP_SHARED | MAP_POPULATE,
10906+                    mbc->vfd, buf.m.offset),
10907+                buf.length);
10908+        }
10909+        /* On failure tidy up and die */
10910+        if (!be->dh[0]) {
10911+            return MEDIABUFS_ERROR_OPERATION_FAILED;
10912+        }
10913+    }
10914+
10915+    return 0;
10916+}
10917+
10918+struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc)
10919+{
10920+    struct qent_dst * be_dst;
10921+
10922+    if (mbc == NULL) {
10923+        be_dst = qe_dst_new(NULL, MEDIABUFS_MEMORY_DMABUF);
10924+        if (be_dst)
10925+            be_dst->base.status = QENT_IMPORT;
10926+        return be_dst;
10927+    }
10928+
10929+    if (mbc->dst_fixed) {
10930+        be_dst = base_to_dst(queue_get_free(mbc->dst));
10931+        if (!be_dst)
10932+            return NULL;
10933+    }
10934+    else {
10935+        be_dst = base_to_dst(queue_tryget_free(mbc->dst));
10936+        if (!be_dst) {
10937+            be_dst = qe_dst_new(mbc->this_wlm, mbc->dst->memtype);
10938+            if (!be_dst)
10939+                return NULL;
10940+
10941+            if (create_dst_bufs(mbc, 1, &be_dst) != 1) {
10942+                qe_dst_free(be_dst);
10943+                return NULL;
10944+            }
10945+        }
10946+    }
10947+
10948+    if (mbc->dst->memtype == MEDIABUFS_MEMORY_MMAP) {
10949+        if (qe_import_from_buf(mbc, &be_dst->base, &mbc->dst_fmt, be_dst->base.index, true)) {
10950+            request_err(mbc->dc, "Failed to export as dmabuf\n");
10951+            queue_put_free(mbc->dst, &be_dst->base);
10952+            return NULL;
10953+        }
10954+    }
10955+    else {
10956+        if (qe_alloc_from_fmt(&be_dst->base, dbsc, &mbc->dst_fmt)) {
10957+            /* Given  how create buf works we can't uncreate it on alloc failure
10958+             * all we can do is put it on the free Q
10959+            */
10960+            queue_put_free(mbc->dst, &be_dst->base);
10961+            return NULL;
10962+        }
10963+    }
10964+
10965+    be_dst->base.status = QENT_PENDING;
10966+    atomic_store(&be_dst->base.ref_count, 0);
10967+    return be_dst;
10968+}
10969+
10970+const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc)
10971+{
10972+    return &mbc->dst_fmt;
10973+}
10974+
10975+MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
10976+               const unsigned int width,
10977+               const unsigned int height,
10978+               mediabufs_dst_fmt_accept_fn *const accept_fn,
10979+               void *const accept_v)
10980+{
10981+    MediaBufsStatus status;
10982+    unsigned int i;
10983+    const enum v4l2_buf_type buf_type = mbc->dst_fmt.type;
10984+    static const struct {
10985+        unsigned int flags_must;
10986+        unsigned int flags_not;
10987+    } trys[] = {
10988+        {0, V4L2_FMT_FLAG_EMULATED},
10989+        {V4L2_FMT_FLAG_EMULATED, 0},
10990+    };
10991+    for (i = 0; i != sizeof(trys)/sizeof(trys[0]); ++i) {
10992+        status = find_fmt_flags(&mbc->dst_fmt, mbc->vfd,
10993+                                buf_type,
10994+                                trys[i].flags_must,
10995+                                trys[i].flags_not,
10996+                                width, height, accept_fn, accept_v);
10997+        if (status != MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE)
10998+            return status;
10999+    }
11000+
11001+    if (status != MEDIABUFS_STATUS_SUCCESS)
11002+        return status;
11003+
11004+    /* Try to create a buffer - don't alloc */
11005+    return status;
11006+}
11007+
11008+// ** This is a mess if we get partial alloc but without any way to remove
11009+//    individual V4L2 Q members we are somewhat stuffed
11010+MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype)
11011+{
11012+    unsigned int i;
11013+    int a = 0;
11014+    unsigned int qc;
11015+    struct qent_dst * qes[32];
11016+
11017+    if (n > 32)
11018+        return MEDIABUFS_ERROR_ALLOCATION_FAILED;
11019+
11020+    mbc->dst->memtype = memtype;
11021+
11022+    // Create qents first as it is hard to get rid of the V4L2 buffers on error
11023+    for (qc = 0; qc != n; ++qc)
11024+    {
11025+        if ((qes[qc] = qe_dst_new(mbc->this_wlm, mbc->dst->memtype)) == NULL)
11026+            goto fail;
11027+    }
11028+
11029+    if ((a = create_dst_bufs(mbc, n, qes)) < 0)
11030+        goto fail;
11031+
11032+    for (i = 0; i != a; ++i)
11033+        queue_put_free(mbc->dst, &qes[i]->base);
11034+
11035+    if (a != n)
11036+        goto fail;
11037+
11038+    mbc->dst_fixed = fixed;
11039+    return MEDIABUFS_STATUS_SUCCESS;
11040+
11041+fail:
11042+    for (i = (a < 0 ? 0 : a); i != qc; ++i)
11043+        qe_dst_free(qes[i]);
11044+
11045+    return MEDIABUFS_ERROR_ALLOCATION_FAILED;
11046+}
11047+
11048+struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc)
11049+{
11050+    struct qent_base * buf = queue_get_free(mbc->src);
11051+    buf->status = QENT_PENDING;
11052+    return base_to_src(buf);
11053+}
11054+
11055+void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src)
11056+{
11057+    struct qent_src *const qe_src = *pqe_src;
11058+    if (!qe_src)
11059+        return;
11060+    *pqe_src = NULL;
11061+    queue_put_free(mbc->src, &qe_src->base);
11062+}
11063+
11064+static MediaBufsStatus
11065+chk_memory_type(struct mediabufs_ctl *const mbc,
11066+    const struct v4l2_format * const f,
11067+    const enum mediabufs_memory m)
11068+{
11069+    struct v4l2_create_buffers cbuf = {
11070+        .count = 0,
11071+        .memory = V4L2_MEMORY_MMAP,
11072+        .format = *f
11073+    };
11074+
11075+    if (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf) != 0)
11076+        return MEDIABUFS_ERROR_OPERATION_FAILED;
11077+
11078+    switch (m) {
11079+    case MEDIABUFS_MEMORY_DMABUF:
11080+        // 0 = Unknown but assume not in that case
11081+        if ((cbuf.capabilities & V4L2_BUF_CAP_SUPPORTS_DMABUF) == 0)
11082+            return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY;
11083+        break;
11084+    case MEDIABUFS_MEMORY_MMAP:
11085+        break;
11086+    default:
11087+        return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY;
11088+    }
11089+
11090+    return MEDIABUFS_STATUS_SUCCESS;
11091+}
11092+
11093+MediaBufsStatus
11094+mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype)
11095+{
11096+    return chk_memory_type(mbc, &mbc->src_fmt, memtype);
11097+}
11098+
11099+MediaBufsStatus
11100+mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype)
11101+{
11102+    return chk_memory_type(mbc, &mbc->dst_fmt, memtype);
11103+}
11104+
11105+/* src format must have been set up before this */
11106+MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc,
11107+                  struct dmabufs_ctl * const dbsc,
11108+                  unsigned int n, const enum mediabufs_memory memtype)
11109+{
11110+    unsigned int i;
11111+    struct v4l2_requestbuffers req = {
11112+        .count = n,
11113+        .type = mbc->src_fmt.type,
11114+        .memory = mediabufs_memory_to_v4l2(memtype)
11115+    };
11116+
11117+    bq_free_all_free_src(mbc->src);
11118+
11119+    while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) {
11120+        if (errno != EINTR) {
11121+            request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__);
11122+            return MEDIABUFS_ERROR_OPERATION_FAILED;
11123+        }
11124+    }
11125+
11126+    if (n > req.count) {
11127+        request_info(mbc->dc, "Only allocated %d of %d src buffers requested\n", req.count, n);
11128+        n = req.count;
11129+    }
11130+
11131+    for (i = 0; i != n; ++i) {
11132+        struct qent_src *const be_src = qe_src_new(memtype);
11133+        if (!be_src) {
11134+            request_err(mbc->dc, "Failed to create src be %d\n", i);
11135+            goto fail;
11136+        }
11137+        switch (memtype) {
11138+        case MEDIABUFS_MEMORY_MMAP:
11139+            if (qe_import_from_buf(mbc, &be_src->base, &mbc->src_fmt, i, false)) {
11140+                qe_src_free(be_src);
11141+                goto fail;
11142+            }
11143+            be_src->fixed_size = 1;
11144+            break;
11145+        case MEDIABUFS_MEMORY_DMABUF:
11146+            if (qe_alloc_from_fmt(&be_src->base, dbsc, &mbc->src_fmt)) {
11147+                qe_src_free(be_src);
11148+                goto fail;
11149+            }
11150+            be_src->fixed_size = !mediabufs_src_resizable(mbc);
11151+            break;
11152+        default:
11153+            request_err(mbc->dc, "Unexpected memorty type\n");
11154+            goto fail;
11155+        }
11156+        be_src->base.index = i;
11157+
11158+        queue_put_free(mbc->src, &be_src->base);
11159+    }
11160+
11161+    mbc->src->memtype = memtype;
11162+    return MEDIABUFS_STATUS_SUCCESS;
11163+
11164+fail:
11165+    bq_free_all_free_src(mbc->src);
11166+    req.count = 0;
11167+    while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1 &&
11168+           errno == EINTR)
11169+        /* Loop */;
11170+
11171+    return MEDIABUFS_ERROR_OPERATION_FAILED;
11172+}
11173+
11174+
11175+
11176+/*
11177+ * Set stuff order:
11178+ *  Set src fmt
11179+ *  Set parameters (sps) on vfd
11180+ *  Negotiate dst format (dst_fmt_set)
11181+ *  Create src buffers
11182+ *  Alloc a dst buffer or Create dst slots
11183+*/
11184+MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc)
11185+{
11186+    if (mbc->stream_on)
11187+        return MEDIABUFS_STATUS_SUCCESS;
11188+
11189+    if (set_stream(mbc->vfd, mbc->src_fmt.type, true) < 0) {
11190+        request_log("Failed to set stream on src type %d\n", mbc->src_fmt.type);
11191+        return MEDIABUFS_ERROR_OPERATION_FAILED;
11192+    }
11193+
11194+    if (set_stream(mbc->vfd, mbc->dst_fmt.type, true) < 0) {
11195+        request_log("Failed to set stream on dst type %d\n", mbc->dst_fmt.type);
11196+        set_stream(mbc->vfd, mbc->src_fmt.type, false);
11197+        return MEDIABUFS_ERROR_OPERATION_FAILED;
11198+    }
11199+
11200+    mbc->stream_on = true;
11201+    return MEDIABUFS_STATUS_SUCCESS;
11202+}
11203+
11204+MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc)
11205+{
11206+    MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
11207+
11208+    if (!mbc->stream_on)
11209+        return MEDIABUFS_STATUS_SUCCESS;
11210+
11211+    if (set_stream(mbc->vfd, mbc->dst_fmt.type, false) < 0) {
11212+        request_log("Failed to set stream off dst type %d\n", mbc->dst_fmt.type);
11213+        status = MEDIABUFS_ERROR_OPERATION_FAILED;
11214+    }
11215+
11216+    if (set_stream(mbc->vfd, mbc->src_fmt.type, false) < 0) {
11217+        request_log("Failed to set stream off src type %d\n", mbc->src_fmt.type);
11218+        status = MEDIABUFS_ERROR_OPERATION_FAILED;
11219+    }
11220+
11221+    mbc->stream_on = false;
11222+    return status;
11223+}
11224+
11225+int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, struct v4l2_ext_control control_array[], unsigned int n)
11226+{
11227+    struct v4l2_ext_controls controls = {
11228+        .controls = control_array,
11229+        .count = n
11230+    };
11231+
11232+    if (mreq) {
11233+        controls.which = V4L2_CTRL_WHICH_REQUEST_VAL;
11234+        controls.request_fd = media_request_fd(mreq);
11235+    }
11236+
11237+    while (ioctl(mbc->vfd, VIDIOC_S_EXT_CTRLS, &controls))
11238+    {
11239+        const int err = errno;
11240+        if (err != EINTR) {
11241+            request_err(mbc->dc, "Unable to set controls: %s\n", strerror(err));
11242+            return -err;
11243+        }
11244+    }
11245+
11246+    return 0;
11247+}
11248+
11249+MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
11250+                struct media_request * const mreq,
11251+                unsigned int id, void *data,
11252+                unsigned int size)
11253+{
11254+    struct v4l2_ext_control control = {
11255+        .id = id,
11256+        .ptr = data,
11257+        .size = size
11258+    };
11259+
11260+    int rv = mediabufs_ctl_set_ext_ctrls(mbc, mreq, &control, 1);
11261+    return !rv ? MEDIABUFS_STATUS_SUCCESS : MEDIABUFS_ERROR_OPERATION_FAILED;
11262+}
11263+
11264+MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
11265+                                      enum v4l2_buf_type buf_type,
11266+                   const uint32_t pixfmt,
11267+                   const uint32_t width, const uint32_t height,
11268+                                      const size_t bufsize)
11269+{
11270+    MediaBufsStatus rv = fmt_set(&mbc->src_fmt, mbc->vfd, buf_type, pixfmt, width, height, bufsize);
11271+    if (rv != MEDIABUFS_STATUS_SUCCESS)
11272+        request_err(mbc->dc, "Failed to set src buftype %d, format %#x %dx%d\n", buf_type, pixfmt, width, height);
11273+
11274+    return rv;
11275+}
11276+
11277+int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n)
11278+{
11279+    int rv = 0;
11280+    while (n--) {
11281+        while (ioctl(mbc->vfd, VIDIOC_QUERY_EXT_CTRL, ctrls)) {
11282+            const int err = errno;
11283+            if (err != EINTR) {
11284+                // Often used for probing - errors are to be expected
11285+                request_debug(mbc->dc, "Failed to query ext id=%#x, err=%d\n", ctrls->id, err);
11286+                ctrls->type = 0; // 0 is invalid
11287+                rv = -err;
11288+                break;
11289+            }
11290+        }
11291+        ++ctrls;
11292+    }
11293+    return rv;
11294+}
11295+
11296+int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc)
11297+{
11298+#if 1
11299+    return 0;
11300+#else
11301+    // Single planar OUTPUT can only take exact size buffers
11302+    // Multiplanar will take larger than negotiated
11303+    return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type);
11304+#endif
11305+}
11306+
11307+static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc)
11308+{
11309+    if (!mbc)
11310+        return;
11311+
11312+    // Break the weak link first
11313+    ff_weak_link_break(&mbc->this_wlm);
11314+
11315+    polltask_delete(&mbc->pt);
11316+
11317+    mediabufs_stream_off(mbc);
11318+
11319+    // Empty v4l2 buffer stash
11320+    request_buffers(mbc->vfd, mbc->src_fmt.type, V4L2_MEMORY_MMAP, 0);
11321+    request_buffers(mbc->vfd, mbc->dst_fmt.type, V4L2_MEMORY_MMAP, 0);
11322+
11323+    bq_free_all_free_src(mbc->src);
11324+    bq_free_all_inuse_src(mbc->src);
11325+    bq_free_all_free_dst(mbc->dst);
11326+
11327+    {
11328+        struct qent_dst *dst_be;
11329+        while ((dst_be = base_to_dst(bq_get_inuse(mbc->dst))) != NULL) {
11330+            dst_be->base.timestamp = (struct timeval){0};
11331+            dst_be->base.status = QENT_ERROR;
11332+            qe_dst_done(dst_be);
11333+        }
11334+    }
11335+
11336+    queue_delete(mbc->dst);
11337+    queue_delete(mbc->src);
11338+    close(mbc->vfd);
11339+    pthread_mutex_destroy(&mbc->lock);
11340+
11341+    free(mbc);
11342+}
11343+
11344+struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc)
11345+{
11346+    atomic_fetch_add(&mbc->ref_count, 1);
11347+    return mbc;
11348+}
11349+
11350+void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc)
11351+{
11352+    struct mediabufs_ctl *const mbc = *pmbc;
11353+    int n;
11354+
11355+    if (!mbc)
11356+        return;
11357+    *pmbc = NULL;
11358+    n = atomic_fetch_sub(&mbc->ref_count, 1);
11359+    if (n)
11360+        return;
11361+    mediabufs_ctl_delete(mbc);
11362+}
11363+
11364+unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc)
11365+{
11366+    return mbc->capability.version;
11367+}
11368+
11369+static int set_capabilities(struct mediabufs_ctl *const mbc)
11370+{
11371+    uint32_t caps;
11372+
11373+    if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &mbc->capability)) {
11374+        int err = errno;
11375+        request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err));
11376+        return -err;
11377+    }
11378+
11379+    caps = (mbc->capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ?
11380+            mbc->capability.device_caps :
11381+            mbc->capability.capabilities;
11382+
11383+    if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) {
11384+        mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
11385+        mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
11386+    }
11387+    else if ((caps & V4L2_CAP_VIDEO_M2M) != 0) {
11388+        mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
11389+        mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
11390+    }
11391+    else {
11392+        request_err(mbc->dc, "No M2M capabilities (%#x)\n", caps);
11393+        return -EINVAL;
11394+    }
11395+
11396+    return 0;
11397+}
11398+
11399+/* One of these per context */
11400+struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, const char * vpath, struct pollqueue *const pq)
11401+{
11402+    struct mediabufs_ctl *const mbc = calloc(1, sizeof(*mbc));
11403+
11404+    if (!mbc)
11405+        return NULL;
11406+
11407+    mbc->dc = dc;
11408+    // Default mono planar
11409+    mbc->pq = pq;
11410+    pthread_mutex_init(&mbc->lock, NULL);
11411+
11412+    /* Pick a default  - could we scan for this? */
11413+    if (vpath == NULL)
11414+        vpath = "/dev/media0";
11415+
11416+    while ((mbc->vfd = open(vpath, O_RDWR)) == -1)
11417+    {
11418+        const int err = errno;
11419+        if (err != EINTR) {
11420+            request_err(dc, "Failed to open video dev '%s': %s\n", vpath, strerror(err));
11421+            goto fail0;
11422+        }
11423+    }
11424+
11425+    if (set_capabilities(mbc)) {
11426+        request_err(dc, "Bad capabilities for video dev '%s'\n", vpath);
11427+        goto fail1;
11428+    }
11429+
11430+    mbc->src = queue_new(mbc->vfd);
11431+    if (!mbc->src)
11432+        goto fail1;
11433+    mbc->dst = queue_new(mbc->vfd);
11434+    if (!mbc->dst)
11435+        goto fail2;
11436+    mbc->pt = polltask_new(pq, mbc->vfd, POLLIN | POLLOUT, mediabufs_poll_cb, mbc);
11437+    if (!mbc->pt)
11438+        goto fail3;
11439+    mbc->this_wlm = ff_weak_link_new(mbc);
11440+    if (!mbc->this_wlm)
11441+        goto fail4;
11442+
11443+    /* Cannot add polltask now - polling with nothing pending
11444+     * generates infinite error polls
11445+    */
11446+    return mbc;
11447+
11448+fail4:
11449+    polltask_delete(&mbc->pt);
11450+fail3:
11451+    queue_delete(mbc->dst);
11452+fail2:
11453+    queue_delete(mbc->src);
11454+fail1:
11455+    close(mbc->vfd);
11456+fail0:
11457+    free(mbc);
11458+    request_info(dc, "%s: FAILED\n", __func__);
11459+    return NULL;
11460+}
11461+
11462+
11463+
11464--- /dev/null
11465+++ b/libavcodec/v4l2_req_media.h
11466@@ -0,0 +1,171 @@
11467+/*
11468+e.h
11469+*
11470+ * Permission is hereby granted, free of charge, to any person obtaining a
11471+ * copy of this software and associated documentation files (the
11472+ * "Software"), to deal in the Software without restriction, including
11473+ * without limitation the rights to use, copy, modify, merge, publish,
11474+ * distribute, sub license, and/or sell copies of the Software, and to
11475+ * permit persons to whom the Software is furnished to do so, subject to
11476+ * the following conditions:
11477+ *
11478+ * The above copyright notice and this permission notice (including the
11479+ * next paragraph) shall be included in all copies or substantial portions
11480+ * of the Software.
11481+ *
11482+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
11483+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
11484+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
11485+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
11486+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
11487+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
11488+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11489+ */
11490+
11491+#ifndef _MEDIA_H_
11492+#define _MEDIA_H_
11493+
11494+#include <stdbool.h>
11495+#include <stdint.h>
11496+
11497+struct v4l2_format;
11498+struct v4l2_fmtdesc;
11499+struct v4l2_query_ext_ctrl;
11500+
11501+struct pollqueue;
11502+struct media_request;
11503+struct media_pool;
11504+
11505+typedef enum media_buf_status {
11506+    MEDIABUFS_STATUS_SUCCESS = 0,
11507+    MEDIABUFS_ERROR_OPERATION_FAILED,
11508+    MEDIABUFS_ERROR_DECODING_ERROR,
11509+    MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE,
11510+    MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT,
11511+    MEDIABUFS_ERROR_ALLOCATION_FAILED,
11512+    MEDIABUFS_ERROR_UNSUPPORTED_MEMORY,
11513+} MediaBufsStatus;
11514+
11515+struct media_pool * media_pool_new(const char * const media_path,
11516+                   struct pollqueue * const pq,
11517+                   const unsigned int n);
11518+void media_pool_delete(struct media_pool ** pmp);
11519+
11520+// Obtain a media request
11521+// Will block if none availible - has a 2sec timeout
11522+struct media_request * media_request_get(struct media_pool * const mp);
11523+int media_request_fd(const struct media_request * const req);
11524+
11525+// Start this request
11526+// Request structure is returned to pool once done
11527+int media_request_start(struct media_request * const req);
11528+
11529+// Return an *unstarted* media_request to the pool
11530+// May later be upgraded to allow for aborting a started req
11531+int media_request_abort(struct media_request ** const preq);
11532+
11533+
11534+struct mediabufs_ctl;
11535+struct qent_src;
11536+struct qent_dst;
11537+struct dmabuf_h;
11538+struct dmabufs_ctl;
11539+
11540+// 1-1 mammping to V4L2 type - just defined separetely to avoid some include versioning difficulties
11541+enum mediabufs_memory {
11542+   MEDIABUFS_MEMORY_UNSET            = 0,
11543+   MEDIABUFS_MEMORY_MMAP             = 1,
11544+   MEDIABUFS_MEMORY_USERPTR          = 2,
11545+   MEDIABUFS_MEMORY_OVERLAY          = 3,
11546+   MEDIABUFS_MEMORY_DMABUF           = 4,
11547+};
11548+
11549+int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp);
11550+struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst);
11551+
11552+// prealloc
11553+int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc);
11554+// dbsc may be NULL if realloc not required
11555+int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc);
11556+const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be, unsigned int plane);
11557+int qent_dst_dup_fd(const struct qent_dst *const be, unsigned int plane);
11558+MediaBufsStatus qent_dst_wait(struct qent_dst *const be);
11559+void qent_dst_delete(struct qent_dst *const be);
11560+// Returns a qent_dst to its mbc free Q or deletes it if the mbc is dead
11561+void qent_dst_unref(struct qent_dst ** const pbe_dst);
11562+struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst);
11563+
11564+const uint8_t * qent_dst_data(struct qent_dst *const be, unsigned int buf_no);
11565+MediaBufsStatus qent_dst_read_start(struct qent_dst *const be);
11566+MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be);
11567+/* Import an fd unattached to any mediabuf */
11568+MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
11569+                unsigned int plane,
11570+                int fd, size_t size);
11571+
11572+const char * mediabufs_memory_name(const enum mediabufs_memory m);
11573+
11574+MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
11575+                struct media_request **const pmreq,
11576+                struct qent_src **const psrc_be,
11577+                struct qent_dst *const dst_be,
11578+                const bool is_final);
11579+// Get / alloc a dst buffer & associate with a slot
11580+// If the dst pool is empty then behaviour depends on the fixed flag passed to
11581+// dst_slots_create.  Default is !fixed = unlimited alloc
11582+struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc,
11583+                           struct dmabufs_ctl *const dbsc);
11584+// Create dst slots without alloc
11585+// If fixed true then qent_alloc will only get slots from this pool and will
11586+// block until a qent has been unrefed
11587+MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype);
11588+
11589+MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc);
11590+MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc);
11591+const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc);
11592+
11593+typedef int mediabufs_dst_fmt_accept_fn(void * v, const struct v4l2_fmtdesc *fmtdesc);
11594+
11595+MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
11596+               const unsigned int width,
11597+               const unsigned int height,
11598+               mediabufs_dst_fmt_accept_fn *const accept_fn,
11599+               void *const accept_v);
11600+struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc);
11601+void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src);
11602+
11603+int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq,
11604+                                struct v4l2_ext_control control_array[], unsigned int n);
11605+MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
11606+                struct media_request * const mreq,
11607+                unsigned int id, void *data,
11608+                unsigned int size);
11609+int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n);
11610+
11611+int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc);
11612+
11613+MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
11614+                                      enum v4l2_buf_type buf_type,
11615+                                      const uint32_t pixfmt,
11616+                                      const uint32_t width, const uint32_t height,
11617+                                      const size_t bufsize);
11618+
11619+MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw,
11620+                  struct dmabufs_ctl * const dbsc,
11621+                  unsigned int n,
11622+                  const enum mediabufs_memory memtype);
11623+
11624+// Want to have appropriate formats set first
11625+MediaBufsStatus mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype);
11626+MediaBufsStatus mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype);
11627+
11628+#define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c))
11629+unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc);
11630+
11631+struct mediabufs_ctl * mediabufs_ctl_new(void * const dc,
11632+                     const char *vpath, struct pollqueue *const pq);
11633+void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc);
11634+struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc);
11635+
11636+
11637+#endif
11638--- /dev/null
11639+++ b/libavcodec/v4l2_req_pollqueue.c
11640@@ -0,0 +1,361 @@
11641+#include <errno.h>
11642+#include <limits.h>
11643+#include <poll.h>
11644+#include <pthread.h>
11645+#include <semaphore.h>
11646+#include <stdatomic.h>
11647+#include <stdbool.h>
11648+#include <stdlib.h>
11649+#include <stdint.h>
11650+#include <stdio.h>
11651+#include <string.h>
11652+#include <unistd.h>
11653+#include <sys/eventfd.h>
11654+
11655+#include "v4l2_req_pollqueue.h"
11656+#include "v4l2_req_utils.h"
11657+
11658+
11659+struct pollqueue;
11660+
11661+enum polltask_state {
11662+    POLLTASK_UNQUEUED = 0,
11663+    POLLTASK_QUEUED,
11664+    POLLTASK_RUNNING,
11665+    POLLTASK_Q_KILL,
11666+    POLLTASK_RUN_KILL,
11667+};
11668+
11669+struct polltask {
11670+    struct polltask *next;
11671+    struct polltask *prev;
11672+    struct pollqueue *q;
11673+    enum polltask_state state;
11674+
11675+    int fd;
11676+    short events;
11677+
11678+    void (*fn)(void *v, short revents);
11679+    void * v;
11680+
11681+    uint64_t timeout; /* CLOCK_MONOTONIC time, 0 => never */
11682+    sem_t kill_sem;
11683+};
11684+
11685+struct pollqueue {
11686+    atomic_int ref_count;
11687+    pthread_mutex_t lock;
11688+
11689+    struct polltask *head;
11690+    struct polltask *tail;
11691+
11692+    bool kill;
11693+    bool no_prod;
11694+    int prod_fd;
11695+    struct polltask *prod_pt;
11696+    pthread_t worker;
11697+};
11698+
11699+struct polltask *polltask_new(struct pollqueue *const pq,
11700+                              const int fd, const short events,
11701+                  void (*const fn)(void *v, short revents),
11702+                  void *const v)
11703+{
11704+    struct polltask *pt;
11705+
11706+    if (!events)
11707+        return NULL;
11708+
11709+    pt = malloc(sizeof(*pt));
11710+    if (!pt)
11711+        return NULL;
11712+
11713+    *pt = (struct polltask){
11714+        .next = NULL,
11715+        .prev = NULL,
11716+        .q = pollqueue_ref(pq),
11717+        .fd = fd,
11718+        .events = events,
11719+        .fn = fn,
11720+        .v = v
11721+    };
11722+
11723+    sem_init(&pt->kill_sem, 0, 0);
11724+
11725+    return pt;
11726+}
11727+
11728+static void pollqueue_rem_task(struct pollqueue *const pq, struct polltask *const pt)
11729+{
11730+    if (pt->prev)
11731+        pt->prev->next = pt->next;
11732+    else
11733+        pq->head = pt->next;
11734+    if (pt->next)
11735+        pt->next->prev = pt->prev;
11736+    else
11737+        pq->tail = pt->prev;
11738+    pt->next = NULL;
11739+    pt->prev = NULL;
11740+}
11741+
11742+static void polltask_free(struct polltask * const pt)
11743+{
11744+    sem_destroy(&pt->kill_sem);
11745+    free(pt);
11746+}
11747+
11748+static int pollqueue_prod(const struct pollqueue *const pq)
11749+{
11750+    static const uint64_t one = 1;
11751+    return write(pq->prod_fd, &one, sizeof(one));
11752+}
11753+
11754+void polltask_delete(struct polltask **const ppt)
11755+{
11756+    struct polltask *const pt = *ppt;
11757+    struct pollqueue * pq;
11758+    enum polltask_state state;
11759+    bool prodme;
11760+
11761+    if (!pt)
11762+        return;
11763+
11764+    pq = pt->q;
11765+    pthread_mutex_lock(&pq->lock);
11766+    state = pt->state;
11767+    pt->state = (state == POLLTASK_RUNNING) ? POLLTASK_RUN_KILL : POLLTASK_Q_KILL;
11768+    prodme = !pq->no_prod;
11769+    pthread_mutex_unlock(&pq->lock);
11770+
11771+    if (state != POLLTASK_UNQUEUED) {
11772+        if (prodme)
11773+            pollqueue_prod(pq);
11774+        while (sem_wait(&pt->kill_sem) && errno == EINTR)
11775+            /* loop */;
11776+    }
11777+
11778+    // Leave zapping the ref until we have DQed the PT as might well be
11779+    // legitimately used in it
11780+    *ppt = NULL;
11781+    polltask_free(pt);
11782+    pollqueue_unref(&pq);
11783+}
11784+
11785+static uint64_t pollqueue_now(int timeout)
11786+{
11787+    struct timespec now;
11788+    uint64_t now_ms;
11789+
11790+    if (clock_gettime(CLOCK_MONOTONIC, &now))
11791+        return 0;
11792+    now_ms = (now.tv_nsec / 1000000) + (uint64_t)now.tv_sec * 1000 + timeout;
11793+    return now_ms ? now_ms : (uint64_t)1;
11794+}
11795+
11796+void pollqueue_add_task(struct polltask *const pt, const int timeout)
11797+{
11798+    bool prodme = false;
11799+    struct pollqueue * const pq = pt->q;
11800+
11801+    pthread_mutex_lock(&pq->lock);
11802+    if (pt->state != POLLTASK_Q_KILL && pt->state != POLLTASK_RUN_KILL) {
11803+        if (pq->tail)
11804+            pq->tail->next = pt;
11805+        else
11806+            pq->head = pt;
11807+        pt->prev = pq->tail;
11808+        pt->next = NULL;
11809+        pt->state = POLLTASK_QUEUED;
11810+        pt->timeout = timeout < 0 ? 0 : pollqueue_now(timeout);
11811+        pq->tail = pt;
11812+        prodme = !pq->no_prod;
11813+    }
11814+    pthread_mutex_unlock(&pq->lock);
11815+    if (prodme)
11816+        pollqueue_prod(pq);
11817+}
11818+
11819+static void *poll_thread(void *v)
11820+{
11821+    struct pollqueue *const pq = v;
11822+    struct pollfd *a = NULL;
11823+    size_t asize = 0;
11824+
11825+    pthread_mutex_lock(&pq->lock);
11826+    do {
11827+        unsigned int i;
11828+        unsigned int n = 0;
11829+        struct polltask *pt;
11830+        struct polltask *pt_next;
11831+        uint64_t now = pollqueue_now(0);
11832+        int timeout = -1;
11833+        int rv;
11834+
11835+        for (pt = pq->head; pt; pt = pt_next) {
11836+            int64_t t;
11837+
11838+            pt_next = pt->next;
11839+
11840+            if (pt->state == POLLTASK_Q_KILL) {
11841+                pollqueue_rem_task(pq, pt);
11842+                sem_post(&pt->kill_sem);
11843+                continue;
11844+            }
11845+
11846+            if (n >= asize) {
11847+                asize = asize ? asize * 2 : 4;
11848+                a = realloc(a, asize * sizeof(*a));
11849+                if (!a) {
11850+                    request_log("Failed to realloc poll array to %zd\n", asize);
11851+                    goto fail_locked;
11852+                }
11853+            }
11854+
11855+            a[n++] = (struct pollfd){
11856+                .fd = pt->fd,
11857+                .events = pt->events
11858+            };
11859+
11860+            t = (int64_t)(pt->timeout - now);
11861+            if (pt->timeout && t < INT_MAX &&
11862+                (timeout < 0 || (int)t < timeout))
11863+                timeout = (t < 0) ? 0 : (int)t;
11864+        }
11865+        pthread_mutex_unlock(&pq->lock);
11866+
11867+        if ((rv = poll(a, n, timeout)) == -1) {
11868+            if (errno != EINTR) {
11869+                request_log("Poll error: %s\n", strerror(errno));
11870+                goto fail_unlocked;
11871+            }
11872+        }
11873+
11874+        pthread_mutex_lock(&pq->lock);
11875+        now = pollqueue_now(0);
11876+
11877+        /* Prodding in this loop is pointless and might lead to
11878+         * infinite looping
11879+        */
11880+        pq->no_prod = true;
11881+        for (i = 0, pt = pq->head; i < n; ++i, pt = pt_next) {
11882+            pt_next = pt->next;
11883+
11884+            /* Pending? */
11885+            if (a[i].revents ||
11886+                (pt->timeout && (int64_t)(now - pt->timeout) >= 0)) {
11887+                pollqueue_rem_task(pq, pt);
11888+                if (pt->state == POLLTASK_QUEUED)
11889+                    pt->state = POLLTASK_RUNNING;
11890+                if (pt->state == POLLTASK_Q_KILL)
11891+                    pt->state = POLLTASK_RUN_KILL;
11892+                pthread_mutex_unlock(&pq->lock);
11893+
11894+                /* This can add new entries to the Q but as
11895+                 * those are added to the tail our existing
11896+                 * chain remains intact
11897+                */
11898+                pt->fn(pt->v, a[i].revents);
11899+
11900+                pthread_mutex_lock(&pq->lock);
11901+                if (pt->state == POLLTASK_RUNNING)
11902+                    pt->state = POLLTASK_UNQUEUED;
11903+                if (pt->state == POLLTASK_RUN_KILL)
11904+                    sem_post(&pt->kill_sem);
11905+            }
11906+        }
11907+        pq->no_prod = false;
11908+
11909+    } while (!pq->kill);
11910+
11911+fail_locked:
11912+    pthread_mutex_unlock(&pq->lock);
11913+fail_unlocked:
11914+    free(a);
11915+    return NULL;
11916+}
11917+
11918+static void prod_fn(void *v, short revents)
11919+{
11920+    struct pollqueue *const pq = v;
11921+    char buf[8];
11922+    if (revents)
11923+        read(pq->prod_fd, buf, 8);
11924+    if (!pq->kill)
11925+        pollqueue_add_task(pq->prod_pt, -1);
11926+}
11927+
11928+struct pollqueue * pollqueue_new(void)
11929+{
11930+    struct pollqueue *pq = malloc(sizeof(*pq));
11931+    if (!pq)
11932+        return NULL;
11933+    *pq = (struct pollqueue){
11934+        .ref_count = ATOMIC_VAR_INIT(0),
11935+        .lock = PTHREAD_MUTEX_INITIALIZER,
11936+        .head = NULL,
11937+        .tail = NULL,
11938+        .kill = false,
11939+        .prod_fd = -1
11940+    };
11941+
11942+    pq->prod_fd = eventfd(0, EFD_NONBLOCK);
11943+    if (pq->prod_fd == 1)
11944+        goto fail1;
11945+    pq->prod_pt = polltask_new(pq, pq->prod_fd, POLLIN, prod_fn, pq);
11946+    if (!pq->prod_pt)
11947+        goto fail2;
11948+    pollqueue_add_task(pq->prod_pt, -1);
11949+    if (pthread_create(&pq->worker, NULL, poll_thread, pq))
11950+        goto fail3;
11951+    // Reset ref count which will have been inced by the add_task
11952+    atomic_store(&pq->ref_count, 0);
11953+    return pq;
11954+
11955+fail3:
11956+    polltask_free(pq->prod_pt);
11957+fail2:
11958+    close(pq->prod_fd);
11959+fail1:
11960+    free(pq);
11961+    return NULL;
11962+}
11963+
11964+static void pollqueue_free(struct pollqueue *const pq)
11965+{
11966+    void *rv;
11967+
11968+    pthread_mutex_lock(&pq->lock);
11969+    pq->kill = true;
11970+    pollqueue_prod(pq);
11971+    pthread_mutex_unlock(&pq->lock);
11972+
11973+    pthread_join(pq->worker, &rv);
11974+    polltask_free(pq->prod_pt);
11975+    pthread_mutex_destroy(&pq->lock);
11976+    close(pq->prod_fd);
11977+    free(pq);
11978+}
11979+
11980+struct pollqueue * pollqueue_ref(struct pollqueue *const pq)
11981+{
11982+    atomic_fetch_add(&pq->ref_count, 1);
11983+    return pq;
11984+}
11985+
11986+void pollqueue_unref(struct pollqueue **const ppq)
11987+{
11988+    struct pollqueue * const pq = *ppq;
11989+
11990+    if (!pq)
11991+        return;
11992+    *ppq = NULL;
11993+
11994+    if (atomic_fetch_sub(&pq->ref_count, 1) != 0)
11995+        return;
11996+
11997+    pollqueue_free(pq);
11998+}
11999+
12000+
12001+
12002--- /dev/null
12003+++ b/libavcodec/v4l2_req_pollqueue.h
12004@@ -0,0 +1,18 @@
12005+#ifndef POLLQUEUE_H_
12006+#define POLLQUEUE_H_
12007+
12008+struct polltask;
12009+struct pollqueue;
12010+
12011+struct polltask *polltask_new(struct pollqueue *const pq,
12012+			      const int fd, const short events,
12013+			      void (*const fn)(void *v, short revents),
12014+			      void *const v);
12015+void polltask_delete(struct polltask **const ppt);
12016+
12017+void pollqueue_add_task(struct polltask *const pt, const int timeout);
12018+struct pollqueue * pollqueue_new(void);
12019+void pollqueue_unref(struct pollqueue **const ppq);
12020+struct pollqueue * pollqueue_ref(struct pollqueue *const pq);
12021+
12022+#endif /* POLLQUEUE_H_ */
12023--- /dev/null
12024+++ b/libavcodec/v4l2_req_utils.h
12025@@ -0,0 +1,27 @@
12026+#ifndef AVCODEC_V4L2_REQ_UTILS_H
12027+#define AVCODEC_V4L2_REQ_UTILS_H
12028+
12029+#include <stdint.h>
12030+#include "libavutil/log.h"
12031+
12032+#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__)
12033+
12034+#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__)
12035+#define request_warn(_ctx, ...) av_log(_ctx, AV_LOG_WARNING, __VA_ARGS__)
12036+#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__)
12037+#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__)
12038+
12039+static inline char safechar(char c) {
12040+    return c > 0x20 && c < 0x7f ? c : '.';
12041+}
12042+
12043+static inline const char * strfourcc(char tbuf[5], uint32_t fcc) {
12044+    tbuf[0] = safechar((fcc >>  0) & 0xff);
12045+    tbuf[1] = safechar((fcc >>  8) & 0xff);
12046+    tbuf[2] = safechar((fcc >> 16) & 0xff);
12047+    tbuf[3] = safechar((fcc >> 24) & 0xff);
12048+    tbuf[4] = '\0';
12049+    return tbuf;
12050+}
12051+
12052+#endif
12053--- /dev/null
12054+++ b/libavcodec/v4l2_request_hevc.c
12055@@ -0,0 +1,351 @@
12056+/*
12057+ * This file is part of FFmpeg.
12058+ *
12059+ * FFmpeg is free software; you can redistribute it and/or
12060+ * modify it under the terms of the GNU Lesser General Public
12061+ * License as published by the Free Software Foundation; either
12062+ * version 2.1 of the License, or (at your option) any later version.
12063+ *
12064+ * FFmpeg is distributed in the hope that it will be useful,
12065+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12066+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12067+ * Lesser General Public License for more details.
12068+ *
12069+ * You should have received a copy of the GNU Lesser General Public
12070+ * License along with FFmpeg; if not, write to the Free Software
12071+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
12072+ */
12073+
12074+
12075+#include "config.h"
12076+#include "decode.h"
12077+#include "hevcdec.h"
12078+#include "hwconfig.h"
12079+#include "internal.h"
12080+
12081+#include "v4l2_request_hevc.h"
12082+
12083+#include "libavutil/hwcontext_drm.h"
12084+#include "libavutil/pixdesc.h"
12085+
12086+#include "v4l2_req_devscan.h"
12087+#include "v4l2_req_dmabufs.h"
12088+#include "v4l2_req_pollqueue.h"
12089+#include "v4l2_req_media.h"
12090+#include "v4l2_req_utils.h"
12091+
12092+static size_t bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8)
12093+{
12094+    const size_t wxh = w * h;
12095+    size_t bits_alloc;
12096+
12097+    /* Annex A gives a min compression of 2 @ lvl 3.1
12098+     * (wxh <= 983040) and min 4 thereafter but avoid
12099+     * the odity of 983041 having a lower limit than
12100+     * 983040.
12101+     * Multiply by 3/2 for 4:2:0
12102+     */
12103+    bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
12104+        wxh < 983040 * 2 ? 983040 * 3 / 4 :
12105+        wxh * 3 / 8;
12106+    /* Allow for bit depth */
12107+    bits_alloc += (bits_alloc * bits_minus8) / 8;
12108+    /* Add a few bytes (16k) for overhead */
12109+    bits_alloc += 0x4000;
12110+    return bits_alloc;
12111+}
12112+
12113+static int v4l2_req_hevc_start_frame(AVCodecContext *avctx,
12114+                                     av_unused const uint8_t *buffer,
12115+                                     av_unused uint32_t size)
12116+{
12117+    const V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
12118+    return ctx->fns->start_frame(avctx, buffer, size);
12119+}
12120+
12121+static int v4l2_req_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
12122+{
12123+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
12124+    return ctx->fns->decode_slice(avctx, buffer, size);
12125+}
12126+
12127+static int v4l2_req_hevc_end_frame(AVCodecContext *avctx)
12128+{
12129+    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
12130+    return ctx->fns->end_frame(avctx);
12131+}
12132+
12133+static void v4l2_req_hevc_abort_frame(AVCodecContext * const avctx)
12134+{
12135+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
12136+    ctx->fns->abort_frame(avctx);
12137+}
12138+
12139+static int v4l2_req_hevc_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
12140+{
12141+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
12142+    return ctx->fns->frame_params(avctx, hw_frames_ctx);
12143+}
12144+
12145+static int v4l2_req_hevc_alloc_frame(AVCodecContext * avctx, AVFrame *frame)
12146+{
12147+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
12148+    return ctx->fns->alloc_frame(avctx, frame);
12149+}
12150+
12151+
12152+static int v4l2_request_hevc_uninit(AVCodecContext *avctx)
12153+{
12154+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
12155+
12156+    av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
12157+
12158+    decode_q_wait(&ctx->decode_q, NULL);  // Wait for all other threads to be out of decode
12159+
12160+    mediabufs_ctl_unref(&ctx->mbufs);
12161+    media_pool_delete(&ctx->mpool);
12162+    pollqueue_unref(&ctx->pq);
12163+    dmabufs_ctl_unref(&ctx->dbufs);
12164+    devscan_delete(&ctx->devscan);
12165+
12166+    decode_q_uninit(&ctx->decode_q);
12167+
12168+//    if (avctx->hw_frames_ctx) {
12169+//        AVHWFramesContext *hwfc = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
12170+//        av_buffer_pool_flush(hwfc->pool);
12171+//    }
12172+    return 0;
12173+}
12174+
12175+static int dst_fmt_accept_cb(void * v, const struct v4l2_fmtdesc *fmtdesc)
12176+{
12177+    AVCodecContext *const avctx = v;
12178+    const HEVCContext *const h = avctx->priv_data;
12179+
12180+    if (h->ps.sps->bit_depth == 8) {
12181+        if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_COL128 ||
12182+            fmtdesc->pixelformat == V4L2_PIX_FMT_NV12) {
12183+            return 1;
12184+        }
12185+    }
12186+    else if (h->ps.sps->bit_depth == 10) {
12187+        if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
12188+            return 1;
12189+        }
12190+    }
12191+    return 0;
12192+}
12193+
12194+static int v4l2_request_hevc_init(AVCodecContext *avctx)
12195+{
12196+    const HEVCContext *h = avctx->priv_data;
12197+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
12198+    const HEVCSPS * const sps = h->ps.sps;
12199+    int ret;
12200+    const struct decdev * decdev;
12201+    const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 4).src_pix_fmt_v4l2;  // Assuming constant for all APIs but avoiding V4L2 includes
12202+    size_t src_size;
12203+    enum mediabufs_memory src_memtype;
12204+    enum mediabufs_memory dst_memtype;
12205+
12206+    av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
12207+
12208+    // Give up immediately if this is something that we have no code to deal with
12209+    if (h->ps.sps->chroma_format_idc != 1) {
12210+        av_log(avctx, AV_LOG_WARNING, "chroma_format_idc(%d) != 1: Not implemented\n", h->ps.sps->chroma_format_idc);
12211+        return AVERROR_PATCHWELCOME;
12212+    }
12213+    if (!(h->ps.sps->bit_depth == 10 || h->ps.sps->bit_depth == 8) ||
12214+        h->ps.sps->bit_depth != h->ps.sps->bit_depth_chroma) {
12215+        av_log(avctx, AV_LOG_WARNING, "Bit depth Y:%d C:%d: Not implemented\n", h->ps.sps->bit_depth, h->ps.sps->bit_depth_chroma);
12216+        return AVERROR_PATCHWELCOME;
12217+    }
12218+
12219+    if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) {
12220+        av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n");
12221+        return (AVERROR(-ret));
12222+    }
12223+    ret = AVERROR(ENOMEM);  // Assume mem fail by default for these
12224+
12225+    if ((decdev = devscan_find(ctx->devscan, src_pix_fmt)) == NULL)
12226+    {
12227+        av_log(avctx, AV_LOG_WARNING, "Failed to find a V4L2 device for H265\n");
12228+        ret = AVERROR(ENODEV);
12229+        goto fail0;
12230+    }
12231+    av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n",
12232+           decdev_media_path(decdev), decdev_video_path(decdev));
12233+
12234+    if ((ctx->pq = pollqueue_new()) == NULL) {
12235+        av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n");
12236+        goto fail1;
12237+    }
12238+
12239+    if ((ctx->mpool = media_pool_new(decdev_media_path(decdev), ctx->pq, 4)) == NULL) {
12240+        av_log(avctx, AV_LOG_ERROR, "Unable to create media pool\n");
12241+        goto fail2;
12242+    }
12243+
12244+    if ((ctx->mbufs = mediabufs_ctl_new(avctx, decdev_video_path(decdev), ctx->pq)) == NULL) {
12245+        av_log(avctx, AV_LOG_ERROR, "Unable to create media controls\n");
12246+        goto fail3;
12247+    }
12248+
12249+    // Version test for functional Pi5 HEVC iommu.
12250+    // rpivid kernel patch was merged in 6.1.57
12251+    // *** Remove when it is unlikely that there are any broken kernels left
12252+    if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(6,1,57))
12253+        ctx->dbufs = dmabufs_ctl_new_vidbuf_cached();
12254+    else
12255+        ctx->dbufs = dmabufs_ctl_new();
12256+
12257+    if (ctx->dbufs == NULL) {
12258+        av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n");
12259+        src_memtype = MEDIABUFS_MEMORY_MMAP;
12260+        dst_memtype = MEDIABUFS_MEMORY_MMAP;
12261+    }
12262+    else {
12263+        av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n");
12264+        src_memtype = MEDIABUFS_MEMORY_DMABUF;
12265+        dst_memtype = MEDIABUFS_MEMORY_DMABUF;
12266+    }
12267+
12268+    // Ask for an initial bitbuf size of max size / 4
12269+    // We will realloc if we need more
12270+    // Must use sps->h/w as avctx contains cropped size
12271+retry_src_memtype:
12272+    src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8);
12273+    if (src_memtype == MEDIABUFS_MEMORY_DMABUF && mediabufs_src_resizable(ctx->mbufs))
12274+        src_size /= 4;
12275+    // Kludge for conformance tests which break Annex A limits
12276+    else if (src_size < 0x40000)
12277+        src_size = 0x40000;
12278+
12279+    if (mediabufs_src_fmt_set(ctx->mbufs, decdev_src_type(decdev), src_pix_fmt,
12280+                              sps->width, sps->height, src_size)) {
12281+        char tbuf1[5];
12282+        av_log(avctx, AV_LOG_ERROR, "Failed to set source format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
12283+        goto fail4;
12284+    }
12285+
12286+    if (mediabufs_src_chk_memtype(ctx->mbufs, src_memtype)) {
12287+        if (src_memtype == MEDIABUFS_MEMORY_DMABUF) {
12288+            src_memtype = MEDIABUFS_MEMORY_MMAP;
12289+            goto retry_src_memtype;
12290+        }
12291+        av_log(avctx, AV_LOG_ERROR, "Failed to get src memory type\n");
12292+        goto fail4;
12293+    }
12294+
12295+    if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0)
12296+        ctx->fns = &V2(ff_v4l2_req_hevc, 4);
12297+#if CONFIG_V4L2_REQ_HEVC_VX
12298+    else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0)
12299+        ctx->fns = &V2(ff_v4l2_req_hevc, 3);
12300+    else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0)
12301+        ctx->fns = &V2(ff_v4l2_req_hevc, 2);
12302+    else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0)
12303+        ctx->fns = &V2(ff_v4l2_req_hevc, 1);
12304+#endif
12305+    else {
12306+        av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n");
12307+        ret = AVERROR(EINVAL);
12308+        goto fail4;
12309+    }
12310+
12311+    av_log(avctx, AV_LOG_DEBUG, "%s probed successfully: driver v %#x\n",
12312+           ctx->fns->name, mediabufs_ctl_driver_version(ctx->mbufs));
12313+
12314+    if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) {
12315+        char tbuf1[5];
12316+        av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
12317+        goto fail4;
12318+    }
12319+
12320+    if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6, src_memtype)) {
12321+        av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n");
12322+        goto fail4;
12323+    }
12324+
12325+    {
12326+        unsigned int dst_slots = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering +
12327+            avctx->thread_count + (avctx->extra_hw_frames > 0 ? avctx->extra_hw_frames : 6);
12328+        av_log(avctx, AV_LOG_DEBUG, "Slots=%d: Reordering=%d, threads=%d, hw+=%d\n", dst_slots,
12329+               sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering,
12330+               avctx->thread_count, avctx->extra_hw_frames);
12331+
12332+        if (mediabufs_dst_chk_memtype(ctx->mbufs, dst_memtype)) {
12333+            if (dst_memtype != MEDIABUFS_MEMORY_DMABUF) {
12334+                av_log(avctx, AV_LOG_ERROR, "Failed to get dst memory type\n");
12335+                goto fail4;
12336+            }
12337+            av_log(avctx, AV_LOG_DEBUG, "Dst DMABUF not supported - trying mmap\n");
12338+            dst_memtype = MEDIABUFS_MEMORY_MMAP;
12339+        }
12340+
12341+        // extra_hw_frames is -1 if unset
12342+        if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0), dst_memtype)) {
12343+            av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n");
12344+            goto fail4;
12345+        }
12346+    }
12347+
12348+    if (mediabufs_stream_on(ctx->mbufs)) {
12349+        av_log(avctx, AV_LOG_ERROR, "Failed stream on\n");
12350+        goto fail4;
12351+    }
12352+
12353+    if ((ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_DRM)) != 0) {
12354+        av_log(avctx, AV_LOG_ERROR, "Failed to create frame ctx\n");
12355+        goto fail4;
12356+    }
12357+
12358+    if ((ret = ctx->fns->set_controls(avctx, ctx)) != 0) {
12359+        av_log(avctx, AV_LOG_ERROR, "Failed set controls\n");
12360+        goto fail5;
12361+    }
12362+
12363+    decode_q_init(&ctx->decode_q);
12364+
12365+    // Set our s/w format
12366+    avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format;
12367+
12368+    av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s; swfmt=%s\n",
12369+           ctx->fns->name,
12370+           decdev_media_path(decdev), decdev_video_path(decdev),
12371+           mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype),
12372+           av_get_pix_fmt_name(avctx->sw_pix_fmt));
12373+
12374+    return 0;
12375+
12376+fail5:
12377+    av_buffer_unref(&avctx->hw_frames_ctx);
12378+fail4:
12379+    mediabufs_ctl_unref(&ctx->mbufs);
12380+fail3:
12381+    media_pool_delete(&ctx->mpool);
12382+fail2:
12383+    pollqueue_unref(&ctx->pq);
12384+fail1:
12385+    dmabufs_ctl_unref(&ctx->dbufs);
12386+fail0:
12387+    devscan_delete(&ctx->devscan);
12388+    return ret;
12389+}
12390+
12391+const AVHWAccel ff_hevc_v4l2request_hwaccel = {
12392+    .name           = "hevc_v4l2request",
12393+    .type           = AVMEDIA_TYPE_VIDEO,
12394+    .id             = AV_CODEC_ID_HEVC,
12395+    .pix_fmt        = AV_PIX_FMT_DRM_PRIME,
12396+    .alloc_frame    = v4l2_req_hevc_alloc_frame,
12397+    .start_frame    = v4l2_req_hevc_start_frame,
12398+    .decode_slice   = v4l2_req_hevc_decode_slice,
12399+    .end_frame      = v4l2_req_hevc_end_frame,
12400+    .abort_frame    = v4l2_req_hevc_abort_frame,
12401+    .init           = v4l2_request_hevc_init,
12402+    .uninit         = v4l2_request_hevc_uninit,
12403+    .priv_data_size = sizeof(V4L2RequestContextHEVC),
12404+    .frame_params   = v4l2_req_hevc_frame_params,
12405+    .caps_internal  = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE,
12406+};
12407--- /dev/null
12408+++ b/libavcodec/v4l2_request_hevc.h
12409@@ -0,0 +1,102 @@
12410+#ifndef AVCODEC_V4L2_REQUEST_HEVC_H
12411+#define AVCODEC_V4L2_REQUEST_HEVC_H
12412+
12413+#include <stdint.h>
12414+#include <drm_fourcc.h>
12415+#include "v4l2_req_decode_q.h"
12416+
12417+#ifndef DRM_FORMAT_NV15
12418+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
12419+#endif
12420+
12421+#ifndef DRM_FORMAT_NV20
12422+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
12423+#endif
12424+
12425+// P030 should be defined in drm_fourcc.h and hopefully will be sometime
12426+// in the future but until then...
12427+#ifndef DRM_FORMAT_P030
12428+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
12429+#endif
12430+
12431+#ifndef DRM_FORMAT_NV15
12432+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
12433+#endif
12434+
12435+#ifndef DRM_FORMAT_NV20
12436+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
12437+#endif
12438+
12439+#include <linux/videodev2.h>
12440+#ifndef V4L2_CID_CODEC_BASE
12441+#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
12442+#endif
12443+
12444+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
12445+// in drm_fourcc.h hopefully will be sometime in the future but until then...
12446+#ifndef V4L2_PIX_FMT_NV12_10_COL128
12447+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
12448+#endif
12449+
12450+#ifndef V4L2_PIX_FMT_NV12_COL128
12451+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
12452+#endif
12453+
12454+#ifndef V4L2_CTRL_FLAG_DYNAMIC_ARRAY
12455+#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY	0x0800
12456+#endif
12457+
12458+#define VCAT(name, version) name##_v##version
12459+#define V2(n,v) VCAT(n, v)
12460+#define V(n) V2(n, HEVC_CTRLS_VERSION)
12461+
12462+#define S2(x) #x
12463+#define STR(x) S2(x)
12464+
12465+// 1 per decoder
12466+struct v4l2_req_decode_fns;
12467+
12468+typedef struct V4L2RequestContextHEVC {
12469+//    V4L2RequestContext base;
12470+    const struct v4l2_req_decode_fns * fns;
12471+
12472+    unsigned int timestamp;  // ?? maybe uint64_t
12473+
12474+    int decode_mode;
12475+    int start_code;
12476+    unsigned int max_slices;    // 0 => not wanted (frame mode)
12477+    unsigned int max_offsets;   // 0 => not wanted
12478+
12479+    req_decode_q decode_q;
12480+
12481+    struct devscan *devscan;
12482+    struct dmabufs_ctl *dbufs;
12483+    struct pollqueue *pq;
12484+    struct media_pool * mpool;
12485+    struct mediabufs_ctl *mbufs;
12486+} V4L2RequestContextHEVC;
12487+
12488+typedef struct v4l2_req_decode_fns {
12489+    int src_pix_fmt_v4l2;
12490+    const char * name;
12491+
12492+    // Init setup
12493+    int (*probe)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
12494+    int (*set_controls)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
12495+
12496+    // Passthrough of hwaccel fns
12497+    int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
12498+    int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
12499+    int (*end_frame)(AVCodecContext *avctx);
12500+    void (*abort_frame)(AVCodecContext *avctx);
12501+    int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
12502+    int (*alloc_frame)(AVCodecContext * avctx, AVFrame *frame);
12503+} v4l2_req_decode_fns;
12504+
12505+
12506+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1);
12507+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2);
12508+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3);
12509+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4);
12510+
12511+#endif
12512--- /dev/null
12513+++ b/libavcodec/weak_link.c
12514@@ -0,0 +1,103 @@
12515+#include <stdlib.h>
12516+#include <pthread.h>
12517+#include <stdatomic.h>
12518+#include "weak_link.h"
12519+
12520+struct ff_weak_link_master {
12521+    atomic_int ref_count;    /* 0 is single ref for easier atomics */
12522+    pthread_rwlock_t lock;
12523+    void * ptr;
12524+};
12525+
12526+static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c)
12527+{
12528+    return (struct ff_weak_link_master *)c;
12529+}
12530+
12531+struct ff_weak_link_master * ff_weak_link_new(void * p)
12532+{
12533+    struct ff_weak_link_master * w = malloc(sizeof(*w));
12534+    if (!w)
12535+        return NULL;
12536+    atomic_init(&w->ref_count, 0);
12537+    w->ptr = p;
12538+    if (pthread_rwlock_init(&w->lock, NULL)) {
12539+        free(w);
12540+        return NULL;
12541+    }
12542+    return w;
12543+}
12544+
12545+static void weak_link_do_unref(struct ff_weak_link_master * const w)
12546+{
12547+    int n = atomic_fetch_sub(&w->ref_count, 1);
12548+    if (n)
12549+        return;
12550+
12551+    pthread_rwlock_destroy(&w->lock);
12552+    free(w);
12553+}
12554+
12555+// Unref & break link
12556+void ff_weak_link_break(struct ff_weak_link_master ** ppLink)
12557+{
12558+    struct ff_weak_link_master * const w = *ppLink;
12559+    if (!w)
12560+        return;
12561+
12562+    *ppLink = NULL;
12563+    pthread_rwlock_wrlock(&w->lock);
12564+    w->ptr = NULL;
12565+    pthread_rwlock_unlock(&w->lock);
12566+
12567+    weak_link_do_unref(w);
12568+}
12569+
12570+struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w)
12571+{
12572+    if (!w)
12573+        return NULL;
12574+    atomic_fetch_add(&w->ref_count, 1);
12575+    return (struct ff_weak_link_client*)w;
12576+}
12577+
12578+void ff_weak_link_unref(struct ff_weak_link_client ** ppLink)
12579+{
12580+    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
12581+    if (!w)
12582+        return;
12583+
12584+    *ppLink = NULL;
12585+    weak_link_do_unref(w);
12586+}
12587+
12588+void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink)
12589+{
12590+    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
12591+
12592+    if (!w)
12593+        return NULL;
12594+
12595+    if (pthread_rwlock_rdlock(&w->lock))
12596+        goto broken;
12597+
12598+    if (w->ptr)
12599+        return w->ptr;
12600+
12601+    pthread_rwlock_unlock(&w->lock);
12602+
12603+broken:
12604+    *ppLink = NULL;
12605+    weak_link_do_unref(w);
12606+    return NULL;
12607+}
12608+
12609+// Ignores a NULL c (so can be on the return path of both broken & live links)
12610+void ff_weak_link_unlock(struct ff_weak_link_client * c)
12611+{
12612+    struct ff_weak_link_master * const w = weak_link_x(c);
12613+    if (w)
12614+        pthread_rwlock_unlock(&w->lock);
12615+}
12616+
12617+
12618--- /dev/null
12619+++ b/libavcodec/weak_link.h
12620@@ -0,0 +1,23 @@
12621+struct ff_weak_link_master;
12622+struct ff_weak_link_client;
12623+
12624+struct ff_weak_link_master * ff_weak_link_new(void * p);
12625+void ff_weak_link_break(struct ff_weak_link_master ** ppLink);
12626+
12627+struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w);
12628+void ff_weak_link_unref(struct ff_weak_link_client ** ppLink);
12629+
12630+// Returns NULL if link broken - in this case it will also zap
12631+//   *ppLink and unref the weak_link.
12632+// Returns NULL if *ppLink is NULL (so a link once broken stays broken)
12633+//
12634+// The above does mean that there is a race if this is called simultainiously
12635+// by two threads using the same weak_link_client (so don't do that)
12636+void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink);
12637+void ff_weak_link_unlock(struct ff_weak_link_client * c);
12638+
12639+
12640+
12641+
12642+
12643+
12644--- a/libavdevice/Makefile
12645+++ b/libavdevice/Makefile
12646@@ -48,6 +48,8 @@ OBJS-$(CONFIG_SNDIO_OUTDEV)
12647 OBJS-$(CONFIG_V4L2_INDEV)                += v4l2.o v4l2-common.o timefilter.o
12648 OBJS-$(CONFIG_V4L2_OUTDEV)               += v4l2enc.o v4l2-common.o
12649 OBJS-$(CONFIG_VFWCAP_INDEV)              += vfwcap.o
12650+OBJS-$(CONFIG_VOUT_DRM_OUTDEV)           += drm_vout.o
12651+OBJS-$(CONFIG_VOUT_EGL_OUTDEV)           += egl_vout.o
12652 OBJS-$(CONFIG_XCBGRAB_INDEV)             += xcbgrab.o
12653 OBJS-$(CONFIG_XV_OUTDEV)                 += xv.o
12654
12655--- a/libavdevice/alldevices.c
12656+++ b/libavdevice/alldevices.c
12657@@ -51,6 +51,8 @@ extern const AVOutputFormat ff_sndio_mux
12658 extern const AVInputFormat  ff_v4l2_demuxer;
12659 extern const AVOutputFormat ff_v4l2_muxer;
12660 extern const AVInputFormat  ff_vfwcap_demuxer;
12661+extern const AVOutputFormat ff_vout_drm_muxer;
12662+extern const AVOutputFormat ff_vout_egl_muxer;
12663 extern const AVInputFormat  ff_xcbgrab_demuxer;
12664 extern const AVOutputFormat ff_xv_muxer;
12665
12666--- /dev/null
12667+++ b/libavdevice/drm_vout.c
12668@@ -0,0 +1,680 @@
12669+/*
12670+ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
12671+ *
12672+ * This file is part of FFmpeg.
12673+ *
12674+ * FFmpeg is free software; you can redistribute it and/or
12675+ * modify it under the terms of the GNU Lesser General Public
12676+ * License as published by the Free Software Foundation; either
12677+ * version 2.1 of the License, or (at your option) any later version.
12678+ *
12679+ * FFmpeg is distributed in the hope that it will be useful,
12680+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12681+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12682+ * Lesser General Public License for more details.
12683+ *
12684+ * You should have received a copy of the GNU Lesser General Public
12685+ * License along with FFmpeg; if not, write to the Free Software
12686+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
12687+ */
12688+
12689+
12690+// *** This module is a work in progress and its utility is strictly
12691+//     limited to testing.
12692+
12693+#include "libavutil/opt.h"
12694+#include "libavutil/pixdesc.h"
12695+#include "libavutil/hwcontext_drm.h"
12696+#include "libavformat/internal.h"
12697+#include "avdevice.h"
12698+
12699+#include "pthread.h"
12700+#include <semaphore.h>
12701+#include <unistd.h>
12702+
12703+#include <xf86drm.h>
12704+#include <xf86drmMode.h>
12705+#include <drm_fourcc.h>
12706+
12707+#define TRACE_ALL 0
12708+
12709+#define DRM_MODULE "vc4"
12710+
12711+#define ERRSTR strerror(errno)
12712+
12713+struct drm_setup {
12714+   int conId;
12715+   uint32_t crtcId;
12716+   int crtcIdx;
12717+   uint32_t planeId;
12718+   unsigned int out_fourcc;
12719+   struct {
12720+       int x, y, width, height;
12721+   } compose;
12722+};
12723+
12724+typedef struct drm_aux_s {
12725+    unsigned int fb_handle;
12726+    uint32_t bo_handles[AV_DRM_MAX_PLANES];
12727+    AVFrame * frame;
12728+} drm_aux_t;
12729+
12730+// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS
12731+// we get initial flicker probably due to dodgy drm timing
12732+#define AUX_SIZE 3
12733+typedef struct drm_display_env_s
12734+{
12735+    AVClass *class;
12736+
12737+    int drm_fd;
12738+    uint32_t con_id;
12739+    struct drm_setup setup;
12740+    enum AVPixelFormat avfmt;
12741+
12742+    int show_all;
12743+    const char * drm_module;
12744+
12745+    unsigned int ano;
12746+    drm_aux_t aux[AUX_SIZE];
12747+
12748+    pthread_t q_thread;
12749+    sem_t q_sem_in;
12750+    sem_t q_sem_out;
12751+    int q_terminate;
12752+    AVFrame * q_next;
12753+
12754+} drm_display_env_t;
12755+
12756+
12757+static int drm_vout_write_trailer(AVFormatContext *s)
12758+{
12759+#if TRACE_ALL
12760+    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
12761+#endif
12762+
12763+    return 0;
12764+}
12765+
12766+static int drm_vout_write_header(AVFormatContext *s)
12767+{
12768+    const AVCodecParameters * const par = s->streams[0]->codecpar;
12769+
12770+#if TRACE_ALL
12771+    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
12772+#endif
12773+    if (   s->nb_streams > 1
12774+        || par->codec_type != AVMEDIA_TYPE_VIDEO
12775+        || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
12776+        av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
12777+        return AVERROR(EINVAL);
12778+    }
12779+
12780+    return 0;
12781+}
12782+
12783+static int find_plane(struct AVFormatContext * const avctx,
12784+                      const int drmfd, const int crtcidx, const uint32_t format,
12785+                      uint32_t * const pplane_id)
12786+{
12787+   drmModePlaneResPtr planes;
12788+   drmModePlanePtr plane;
12789+   drmModeObjectPropertiesPtr props = NULL;
12790+   drmModePropertyPtr prop = NULL;
12791+   unsigned int i;
12792+   unsigned int j;
12793+   int ret = -1;
12794+
12795+   planes = drmModeGetPlaneResources(drmfd);
12796+   if (!planes)
12797+   {
12798+       av_log(avctx, AV_LOG_WARNING, "drmModeGetPlaneResources failed: %s\n", ERRSTR);
12799+       return -1;
12800+   }
12801+
12802+   for (i = 0; i < planes->count_planes; ++i) {
12803+      plane = drmModeGetPlane(drmfd, planes->planes[i]);
12804+      if (!planes)
12805+      {
12806+          av_log(avctx, AV_LOG_WARNING, "drmModeGetPlane failed: %s\n", ERRSTR);
12807+          break;
12808+      }
12809+
12810+      if (!(plane->possible_crtcs & (1 << crtcidx))) {
12811+         drmModeFreePlane(plane);
12812+         continue;
12813+      }
12814+
12815+      for (j = 0; j < plane->count_formats; ++j) {
12816+         if (plane->formats[j] == format)
12817+            break;
12818+      }
12819+
12820+      if (j == plane->count_formats) {
12821+         drmModeFreePlane(plane);
12822+         continue;
12823+      }
12824+
12825+      *pplane_id = plane->plane_id;
12826+      drmModeFreePlane(plane);
12827+      break;
12828+   }
12829+
12830+   if (i == planes->count_planes) {
12831+       ret = -1;
12832+       goto fail;
12833+   }
12834+
12835+    props = drmModeObjectGetProperties(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE);
12836+    if (!props)
12837+        goto fail;
12838+    for (i = 0; i != props->count_props; ++i) {
12839+        if (prop)
12840+            drmModeFreeProperty(prop);
12841+        prop = drmModeGetProperty(drmfd, props->props[i]);
12842+        if (!prop)
12843+            goto fail;
12844+        if (strcmp("zpos", prop->name) == 0) {
12845+            if (drmModeObjectSetProperty(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE, props->props[i], prop->values[1]) == 0)
12846+                av_log(avctx, AV_LOG_DEBUG, "ZPOS set to %d\n", (int)prop->values[1]);
12847+            else
12848+                av_log(avctx, AV_LOG_WARNING, "Failed to set ZPOS on DRM plane\n");
12849+            break;
12850+        }
12851+    }
12852+
12853+    ret = 0;
12854+fail:
12855+    if (props)
12856+        drmModeFreeObjectProperties(props);
12857+    if (prop)
12858+        drmModeFreeProperty(prop);
12859+    drmModeFreePlaneResources(planes);
12860+    return ret;
12861+}
12862+
12863+static void da_uninit(drm_display_env_t * const de, drm_aux_t * da)
12864+{
12865+    if (da->fb_handle != 0) {
12866+        drmModeRmFB(de->drm_fd, da->fb_handle);
12867+        da->fb_handle = 0;
12868+    }
12869+
12870+    for (unsigned int i = 0; i != AV_DRM_MAX_PLANES; ++i) {
12871+        if (da->bo_handles[i]) {
12872+            struct drm_gem_close gem_close = {.handle = da->bo_handles[i]};
12873+            drmIoctl(de->drm_fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
12874+            da->bo_handles[i] = 0;
12875+        }
12876+    }
12877+    av_frame_free(&da->frame);
12878+}
12879+
12880+static int do_display(AVFormatContext * const s, drm_display_env_t * const de, AVFrame * frame)
12881+{
12882+    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0];
12883+    drm_aux_t * da = de->aux + de->ano;
12884+    const uint32_t format = desc->layers[0].format;
12885+    int ret = 0;
12886+
12887+#if TRACE_ALL
12888+    av_log(s, AV_LOG_DEBUG, "<<< %s: fd=%d\n", __func__, desc->objects[0].fd);
12889+#endif
12890+
12891+    if (de->setup.out_fourcc != format) {
12892+        if (find_plane(s, de->drm_fd, de->setup.crtcIdx, format, &de->setup.planeId)) {
12893+            av_frame_free(&frame);
12894+            av_log(s, AV_LOG_WARNING, "No plane for format: %#x\n", format);
12895+            return -1;
12896+        }
12897+        de->setup.out_fourcc = format;
12898+    }
12899+
12900+    {
12901+        drmVBlank vbl = {
12902+            .request = {
12903+                .type = DRM_VBLANK_RELATIVE,
12904+                .sequence = 0
12905+            }
12906+        };
12907+
12908+        while (drmWaitVBlank(de->drm_fd, &vbl)) {
12909+            if (errno != EINTR) {
12910+//                av_log(s, AV_LOG_WARNING, "drmWaitVBlank failed: %s\n", ERRSTR);
12911+                break;
12912+            }
12913+        }
12914+    }
12915+
12916+    da_uninit(de, da);
12917+
12918+    {
12919+        uint32_t pitches[4] = {0};
12920+        uint32_t offsets[4] = {0};
12921+        uint64_t modifiers[4] = {0};
12922+        uint32_t bo_handles[4] = {0};
12923+        int has_mods = 0;
12924+        int i, j, n;
12925+
12926+        da->frame = frame;
12927+
12928+        for (i = 0; i < desc->nb_objects; ++i) {
12929+            if (drmPrimeFDToHandle(de->drm_fd, desc->objects[i].fd, da->bo_handles + i) != 0) {
12930+                av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR);
12931+                return -1;
12932+            }
12933+            if (desc->objects[i].format_modifier != DRM_FORMAT_MOD_LINEAR &&
12934+                desc->objects[i].format_modifier != DRM_FORMAT_MOD_INVALID)
12935+                has_mods = 1;
12936+        }
12937+
12938+        n = 0;
12939+        for (i = 0; i < desc->nb_layers; ++i) {
12940+            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
12941+                const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j;
12942+                const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index;
12943+                pitches[n] = p->pitch;
12944+                offsets[n] = p->offset;
12945+                modifiers[n] = obj->format_modifier;
12946+                bo_handles[n] = da->bo_handles[p->object_index];
12947+                ++n;
12948+            }
12949+        }
12950+
12951+#if 1 && TRACE_ALL
12952+        av_log(s, AV_LOG_DEBUG, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d,"
12953+               " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n",
12954+               av_frame_cropped_width(frame),
12955+               av_frame_cropped_height(frame),
12956+               desc->layers[0].format,
12957+               bo_handles[0],
12958+               bo_handles[1],
12959+               bo_handles[2],
12960+               bo_handles[3],
12961+               pitches[0],
12962+               pitches[1],
12963+               pitches[2],
12964+               pitches[3],
12965+               offsets[0],
12966+               offsets[1],
12967+               offsets[2],
12968+               offsets[3],
12969+               (long long)modifiers[0],
12970+               (long long)modifiers[1],
12971+               (long long)modifiers[2],
12972+               (long long)modifiers[3]
12973+               );
12974+#endif
12975+
12976+        if (drmModeAddFB2WithModifiers(de->drm_fd,
12977+                                       av_frame_cropped_width(frame),
12978+                                       av_frame_cropped_height(frame),
12979+                                       desc->layers[0].format, bo_handles,
12980+                                       pitches, offsets,
12981+                                       has_mods ? modifiers : NULL,
12982+                                       &da->fb_handle,
12983+                                       has_mods ? DRM_MODE_FB_MODIFIERS : 0) != 0) {
12984+            av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR);
12985+            return -1;
12986+        }
12987+    }
12988+
12989+    ret = drmModeSetPlane(de->drm_fd, de->setup.planeId, de->setup.crtcId,
12990+                              da->fb_handle, 0,
12991+                de->setup.compose.x, de->setup.compose.y,
12992+                de->setup.compose.width,
12993+                de->setup.compose.height,
12994+                0, 0,
12995+                av_frame_cropped_width(frame) << 16,
12996+                av_frame_cropped_height(frame) << 16);
12997+
12998+    if (ret != 0) {
12999+        av_log(s, AV_LOG_WARNING, "drmModeSetPlane failed: %s\n", ERRSTR);
13000+    }
13001+
13002+    de->ano = de->ano + 1 >= AUX_SIZE ? 0 : de->ano + 1;
13003+
13004+    return ret;
13005+}
13006+
13007+static int do_sem_wait(sem_t * const sem, const int nowait)
13008+{
13009+    while (nowait ? sem_trywait(sem) : sem_wait(sem)) {
13010+        if (errno != EINTR)
13011+            return -errno;
13012+    }
13013+    return 0;
13014+}
13015+
13016+static void * display_thread(void * v)
13017+{
13018+    AVFormatContext * const s = v;
13019+    drm_display_env_t * const de = s->priv_data;
13020+    int i;
13021+
13022+#if TRACE_ALL
13023+    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
13024+#endif
13025+
13026+    sem_post(&de->q_sem_out);
13027+
13028+    for (;;) {
13029+        AVFrame * frame;
13030+
13031+        do_sem_wait(&de->q_sem_in, 0);
13032+
13033+        if (de->q_terminate)
13034+            break;
13035+
13036+        frame = de->q_next;
13037+        de->q_next = NULL;
13038+        sem_post(&de->q_sem_out);
13039+
13040+        do_display(s, de, frame);
13041+    }
13042+
13043+#if TRACE_ALL
13044+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
13045+#endif
13046+
13047+    for (i = 0; i != AUX_SIZE; ++i)
13048+        da_uninit(de, de->aux + i);
13049+
13050+    av_frame_free(&de->q_next);
13051+
13052+    return NULL;
13053+}
13054+
13055+static int drm_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
13056+{
13057+    const AVFrame * const src_frame = (AVFrame *)pkt->data;
13058+    AVFrame * frame;
13059+    drm_display_env_t * const de = s->priv_data;
13060+    int ret;
13061+
13062+#if TRACE_ALL
13063+    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
13064+#endif
13065+
13066+    if ((src_frame->flags & AV_FRAME_FLAG_CORRUPT) != 0) {
13067+        av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", src_frame->format, src_frame->pts);
13068+        return 0;
13069+    }
13070+
13071+    if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
13072+        frame = av_frame_alloc();
13073+        av_frame_ref(frame, src_frame);
13074+    }
13075+    else if (src_frame->format == AV_PIX_FMT_VAAPI) {
13076+        frame = av_frame_alloc();
13077+        frame->format = AV_PIX_FMT_DRM_PRIME;
13078+        if (av_hwframe_map(frame, src_frame, 0) != 0)
13079+        {
13080+            av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
13081+            av_frame_free(&frame);
13082+            return AVERROR(EINVAL);
13083+        }
13084+    }
13085+    else {
13086+        av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
13087+        return AVERROR(EINVAL);
13088+    }
13089+
13090+    ret = do_sem_wait(&de->q_sem_out, !de->show_all);
13091+    if (ret) {
13092+        av_frame_free(&frame);
13093+    }
13094+    else {
13095+        de->q_next = frame;
13096+        sem_post(&de->q_sem_in);
13097+    }
13098+
13099+    return 0;
13100+}
13101+
13102+static int drm_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
13103+                          unsigned flags)
13104+{
13105+    av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
13106+    return AVERROR_PATCHWELCOME;
13107+}
13108+
13109+static int drm_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
13110+{
13111+#if TRACE_ALL
13112+    av_log(s, AV_LOG_DEBUG, "%s: %d\n", __func__, type);
13113+#endif
13114+    switch(type) {
13115+    case AV_APP_TO_DEV_WINDOW_REPAINT:
13116+        return 0;
13117+    default:
13118+        break;
13119+    }
13120+    return AVERROR(ENOSYS);
13121+}
13122+
13123+static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm_setup *s, uint32_t * const pConId)
13124+{
13125+   int ret = -1;
13126+   int i;
13127+   drmModeRes *res = drmModeGetResources(drmfd);
13128+   drmModeConnector *c;
13129+
13130+   if(!res)
13131+   {
13132+      printf( "drmModeGetResources failed: %s\n", ERRSTR);
13133+      return -1;
13134+   }
13135+
13136+   if (res->count_crtcs <= 0)
13137+   {
13138+      printf( "drm: no crts\n");
13139+      goto fail_res;
13140+   }
13141+
13142+   if (!s->conId) {
13143+      fprintf(stderr,
13144+         "No connector ID specified.  Choosing default from list:\n");
13145+
13146+      for (i = 0; i < res->count_connectors; i++) {
13147+         drmModeConnector *con =
13148+            drmModeGetConnector(drmfd, res->connectors[i]);
13149+         drmModeEncoder *enc = NULL;
13150+         drmModeCrtc *crtc = NULL;
13151+
13152+         if (con->encoder_id) {
13153+            enc = drmModeGetEncoder(drmfd, con->encoder_id);
13154+            if (enc->crtc_id) {
13155+               crtc = drmModeGetCrtc(drmfd, enc->crtc_id);
13156+            }
13157+         }
13158+
13159+         if (!s->conId && crtc) {
13160+            s->conId = con->connector_id;
13161+            s->crtcId = crtc->crtc_id;
13162+         }
13163+
13164+         av_log(avctx, AV_LOG_DEBUG, "Connector %d (crtc %d): type %d, %dx%d%s\n",
13165+                con->connector_id,
13166+                crtc ? crtc->crtc_id : 0,
13167+                con->connector_type,
13168+                crtc ? crtc->width : 0,
13169+                crtc ? crtc->height : 0,
13170+                (s->conId == (int)con->connector_id ?
13171+            " (chosen)" : ""));
13172+
13173+          if (crtc)
13174+              drmModeFreeCrtc(crtc);
13175+          if (enc)
13176+              drmModeFreeEncoder(enc);
13177+          if (con)
13178+              drmModeFreeConnector(con);
13179+      }
13180+
13181+      if (!s->conId) {
13182+         av_log(avctx, AV_LOG_ERROR,
13183+            "No suitable enabled connector found.\n");
13184+         return -1;;
13185+      }
13186+   }
13187+
13188+   s->crtcIdx = -1;
13189+
13190+   for (i = 0; i < res->count_crtcs; ++i) {
13191+      if (s->crtcId == res->crtcs[i]) {
13192+         s->crtcIdx = i;
13193+         break;
13194+      }
13195+   }
13196+
13197+   if (s->crtcIdx == -1)
13198+   {
13199+       av_log(avctx, AV_LOG_WARNING, "drm: CRTC %u not found\n", s->crtcId);
13200+       goto fail_res;
13201+   }
13202+
13203+   if (res->count_connectors <= 0)
13204+   {
13205+       av_log(avctx, AV_LOG_WARNING, "drm: no connectors\n");
13206+       goto fail_res;
13207+   }
13208+
13209+   c = drmModeGetConnector(drmfd, s->conId);
13210+   if (!c)
13211+   {
13212+       av_log(avctx, AV_LOG_WARNING, "drmModeGetConnector failed: %s\n", ERRSTR);
13213+       goto fail_res;
13214+   }
13215+
13216+   if (!c->count_modes)
13217+   {
13218+       av_log(avctx, AV_LOG_WARNING, "connector supports no mode\n");
13219+       goto fail_conn;
13220+   }
13221+
13222+   {
13223+      drmModeCrtc *crtc = drmModeGetCrtc(drmfd, s->crtcId);
13224+      s->compose.x = crtc->x;
13225+      s->compose.y = crtc->y;
13226+      s->compose.width = crtc->width;
13227+      s->compose.height = crtc->height;
13228+      drmModeFreeCrtc(crtc);
13229+   }
13230+
13231+   if (pConId)
13232+      *pConId = c->connector_id;
13233+   ret = 0;
13234+
13235+fail_conn:
13236+   drmModeFreeConnector(c);
13237+
13238+fail_res:
13239+   drmModeFreeResources(res);
13240+
13241+   return ret;
13242+}
13243+
13244+// deinit is called if init fails so no need to clean up explicity here
13245+static int drm_vout_init(struct AVFormatContext * s)
13246+{
13247+    drm_display_env_t * const de = s->priv_data;
13248+    int rv;
13249+
13250+    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
13251+
13252+    de->drm_fd = -1;
13253+    de->con_id = 0;
13254+    de->setup = (struct drm_setup){0};
13255+    de->q_terminate = 0;
13256+
13257+    if ((de->drm_fd = drmOpen(de->drm_module, NULL)) < 0)
13258+    {
13259+        rv = AVERROR(errno);
13260+        av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", de->drm_module, av_err2str(rv));
13261+        return rv;
13262+    }
13263+
13264+    if (find_crtc(s, de->drm_fd, &de->setup, &de->con_id) != 0)
13265+    {
13266+        av_log(s, AV_LOG_ERROR, "failed to find valid mode\n");
13267+        rv = AVERROR(EINVAL);
13268+        goto fail_close;
13269+    }
13270+
13271+    sem_init(&de->q_sem_in, 0, 0);
13272+    sem_init(&de->q_sem_out, 0, 0);
13273+    if (pthread_create(&de->q_thread, NULL, display_thread, s)) {
13274+        rv = AVERROR(errno);
13275+        av_log(s, AV_LOG_ERROR, "Failed to create display thread: %s\n", av_err2str(rv));
13276+        goto fail_close;
13277+    }
13278+
13279+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
13280+
13281+    return 0;
13282+
13283+fail_close:
13284+    close(de->drm_fd);
13285+    de->drm_fd = -1;
13286+    av_log(s, AV_LOG_DEBUG, ">>> %s: FAIL\n", __func__);
13287+
13288+    return rv;
13289+}
13290+
13291+static void drm_vout_deinit(struct AVFormatContext * s)
13292+{
13293+    drm_display_env_t * const de = s->priv_data;
13294+
13295+    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
13296+
13297+    de->q_terminate = 1;
13298+    sem_post(&de->q_sem_in);
13299+    pthread_join(de->q_thread, NULL);
13300+    sem_destroy(&de->q_sem_in);
13301+    sem_destroy(&de->q_sem_out);
13302+
13303+    for (unsigned int i = 0; i != AUX_SIZE; ++i)
13304+        da_uninit(de, de->aux + i);
13305+
13306+    av_frame_free(&de->q_next);
13307+
13308+    if (de->drm_fd >= 0) {
13309+        close(de->drm_fd);
13310+        de->drm_fd = -1;
13311+    }
13312+
13313+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
13314+}
13315+
13316+
13317+#define OFFSET(x) offsetof(drm_display_env_t, x)
13318+static const AVOption options[] = {
13319+    { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
13320+    { "drm_module", "drm_module name to use, default=" DRM_MODULE, OFFSET(drm_module), AV_OPT_TYPE_STRING, { .str = DRM_MODULE }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
13321+    { NULL }
13322+};
13323+
13324+static const AVClass drm_vout_class = {
13325+    .class_name = "drm vid outdev",
13326+    .item_name  = av_default_item_name,
13327+    .option     = options,
13328+    .version    = LIBAVUTIL_VERSION_INT,
13329+    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
13330+};
13331+
13332+AVOutputFormat ff_vout_drm_muxer = {
13333+    .name           = "vout_drm",
13334+    .long_name      = NULL_IF_CONFIG_SMALL("Drm video output device"),
13335+    .priv_data_size = sizeof(drm_display_env_t),
13336+    .audio_codec    = AV_CODEC_ID_NONE,
13337+    .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
13338+    .write_header   = drm_vout_write_header,
13339+    .write_packet   = drm_vout_write_packet,
13340+    .write_uncoded_frame = drm_vout_write_frame,
13341+    .write_trailer  = drm_vout_write_trailer,
13342+    .control_message = drm_vout_control_message,
13343+    .flags          = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
13344+    .priv_class     = &drm_vout_class,
13345+    .init           = drm_vout_init,
13346+    .deinit         = drm_vout_deinit,
13347+};
13348+
13349--- /dev/null
13350+++ b/libavdevice/egl_vout.c
13351@@ -0,0 +1,781 @@
13352+/*
13353+ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
13354+ *
13355+ * This file is part of FFmpeg.
13356+ *
13357+ * FFmpeg is free software; you can redistribute it and/or
13358+ * modify it under the terms of the GNU Lesser General Public
13359+ * License as published by the Free Software Foundation; either
13360+ * version 2.1 of the License, or (at your option) any later version.
13361+ *
13362+ * FFmpeg is distributed in the hope that it will be useful,
13363+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13364+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13365+ * Lesser General Public License for more details.
13366+ *
13367+ * You should have received a copy of the GNU Lesser General Public
13368+ * License along with FFmpeg; if not, write to the Free Software
13369+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
13370+ */
13371+
13372+
13373+// *** This module is a work in progress and its utility is strictly
13374+//     limited to testing.
13375+//     Amongst other issues it doesn't wait for the pic to be displayed before
13376+//     returning the buffer so flikering does occur.
13377+
13378+#include <epoxy/gl.h>
13379+#include <epoxy/egl.h>
13380+
13381+#include "libavutil/opt.h"
13382+#include "libavutil/avassert.h"
13383+#include "libavutil/pixdesc.h"
13384+#include "libavutil/imgutils.h"
13385+#include "libavutil/hwcontext_drm.h"
13386+#include "libavformat/internal.h"
13387+#include "avdevice.h"
13388+
13389+#include "pthread.h"
13390+#include <semaphore.h>
13391+#include <stdatomic.h>
13392+#include <unistd.h>
13393+
13394+#include <X11/Xlib.h>
13395+#include <X11/Xutil.h>
13396+
13397+#include "libavutil/rpi_sand_fns.h"
13398+
13399+#define TRACE_ALL 0
13400+
13401+struct egl_setup {
13402+    int conId;
13403+
13404+    Display *dpy;
13405+    EGLDisplay egl_dpy;
13406+    EGLContext ctx;
13407+    EGLSurface surf;
13408+    Window win;
13409+
13410+    uint32_t crtcId;
13411+    int crtcIdx;
13412+    uint32_t planeId;
13413+    struct {
13414+        int x, y, width, height;
13415+    } compose;
13416+};
13417+
13418+typedef struct egl_aux_s {
13419+    int fd;
13420+    GLuint texture;
13421+
13422+} egl_aux_t;
13423+
13424+typedef struct egl_display_env_s {
13425+    AVClass *class;
13426+
13427+    struct egl_setup setup;
13428+    enum AVPixelFormat avfmt;
13429+
13430+    int show_all;
13431+    int window_width, window_height;
13432+    int window_x, window_y;
13433+    int fullscreen;
13434+
13435+    egl_aux_t aux[32];
13436+
13437+    pthread_t q_thread;
13438+    pthread_mutex_t q_lock;
13439+    sem_t display_start_sem;
13440+    sem_t q_sem;
13441+    int q_terminate;
13442+    AVFrame *q_this;
13443+    AVFrame *q_next;
13444+
13445+} egl_display_env_t;
13446+
13447+
13448+/**
13449+ * Remove window border/decorations.
13450+ */
13451+static void
13452+no_border(Display *dpy, Window w)
13453+{
13454+    static const unsigned MWM_HINTS_DECORATIONS = (1 << 1);
13455+    static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5;
13456+
13457+    typedef struct {
13458+        unsigned long       flags;
13459+        unsigned long       functions;
13460+        unsigned long       decorations;
13461+        long                inputMode;
13462+        unsigned long       status;
13463+    } PropMotifWmHints;
13464+
13465+    PropMotifWmHints motif_hints;
13466+    Atom prop, proptype;
13467+    unsigned long flags = 0;
13468+
13469+    /* setup the property */
13470+    motif_hints.flags = MWM_HINTS_DECORATIONS;
13471+    motif_hints.decorations = flags;
13472+
13473+    /* get the atom for the property */
13474+    prop = XInternAtom(dpy, "_MOTIF_WM_HINTS", True);
13475+    if (!prop) {
13476+        /* something went wrong! */
13477+        return;
13478+    }
13479+
13480+    /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */
13481+    proptype = prop;
13482+
13483+    XChangeProperty(dpy, w,                         /* display, window */
13484+                    prop, proptype,                 /* property, type */
13485+                    32,                             /* format: 32-bit datums */
13486+                    PropModeReplace,                /* mode */
13487+                    (unsigned char *)&motif_hints, /* data */
13488+                    PROP_MOTIF_WM_HINTS_ELEMENTS    /* nelements */
13489+                   );
13490+}
13491+
13492+
13493+/*
13494+ * Create an RGB, double-buffered window.
13495+ * Return the window and context handles.
13496+ */
13497+static int
13498+make_window(struct AVFormatContext *const s,
13499+            egl_display_env_t *const de,
13500+            Display *dpy, EGLDisplay egl_dpy, const char *name,
13501+            Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet)
13502+{
13503+    int scrnum = DefaultScreen(dpy);
13504+    XSetWindowAttributes attr;
13505+    unsigned long mask;
13506+    Window root = RootWindow(dpy, scrnum);
13507+    Window win;
13508+    EGLContext ctx;
13509+    const int fullscreen = de->fullscreen;
13510+    EGLConfig config;
13511+    int x = de->window_x;
13512+    int y = de->window_y;
13513+    int width = de->window_width ? de->window_width : 1280;
13514+    int height = de->window_height ? de->window_height : 720;
13515+
13516+
13517+    if (fullscreen) {
13518+        int scrnum = DefaultScreen(dpy);
13519+
13520+        x = 0; y = 0;
13521+        width = DisplayWidth(dpy, scrnum);
13522+        height = DisplayHeight(dpy, scrnum);
13523+    }
13524+
13525+    {
13526+        EGLint num_configs;
13527+        static const EGLint attribs[] = {
13528+            EGL_RED_SIZE, 1,
13529+            EGL_GREEN_SIZE, 1,
13530+            EGL_BLUE_SIZE, 1,
13531+            EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
13532+            EGL_NONE
13533+        };
13534+
13535+        if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) {
13536+            av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n");
13537+            return -1;
13538+        }
13539+    }
13540+
13541+    {
13542+        EGLint vid;
13543+        if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) {
13544+            av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n");
13545+            return -1;
13546+        }
13547+
13548+        {
13549+            XVisualInfo visTemplate = {
13550+                .visualid = vid,
13551+            };
13552+            int num_visuals;
13553+            XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask,
13554+                                                  &visTemplate, &num_visuals);
13555+
13556+            /* window attributes */
13557+            attr.background_pixel = 0;
13558+            attr.border_pixel = 0;
13559+            attr.colormap = XCreateColormap(dpy, root, visinfo->visual, AllocNone);
13560+            attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask;
13561+            /* XXX this is a bad way to get a borderless window! */
13562+            mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask;
13563+
13564+            win = XCreateWindow(dpy, root, x, y, width, height,
13565+                                0, visinfo->depth, InputOutput,
13566+                                visinfo->visual, mask, &attr);
13567+            XFree(visinfo);
13568+        }
13569+    }
13570+
13571+    if (fullscreen)
13572+        no_border(dpy, win);
13573+
13574+    /* set hints and properties */
13575+    {
13576+        XSizeHints sizehints;
13577+        sizehints.x = x;
13578+        sizehints.y = y;
13579+        sizehints.width  = width;
13580+        sizehints.height = height;
13581+        sizehints.flags = USSize | USPosition;
13582+        XSetNormalHints(dpy, win, &sizehints);
13583+        XSetStandardProperties(dpy, win, name, name,
13584+                               None, (char **)NULL, 0, &sizehints);
13585+    }
13586+
13587+    eglBindAPI(EGL_OPENGL_ES_API);
13588+
13589+    {
13590+        static const EGLint ctx_attribs[] = {
13591+            EGL_CONTEXT_CLIENT_VERSION, 2,
13592+            EGL_NONE
13593+        };
13594+        ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs);
13595+        if (!ctx) {
13596+            av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
13597+            return -1;
13598+        }
13599+    }
13600+
13601+
13602+    XMapWindow(dpy, win);
13603+
13604+    {
13605+        EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL);
13606+        if (!surf) {
13607+            av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n");
13608+            return -1;
13609+        }
13610+
13611+        if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) {
13612+            av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
13613+            return -1;
13614+        }
13615+
13616+        *winRet = win;
13617+        *ctxRet = ctx;
13618+        *surfRet = surf;
13619+    }
13620+
13621+    return 0;
13622+}
13623+
13624+static GLint
13625+compile_shader(struct AVFormatContext *const avctx, GLenum target, const char *source)
13626+{
13627+    GLuint s = glCreateShader(target);
13628+
13629+    if (s == 0) {
13630+        av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n");
13631+        return 0;
13632+    }
13633+
13634+    glShaderSource(s, 1, (const GLchar **)&source, NULL);
13635+    glCompileShader(s);
13636+
13637+    {
13638+        GLint ok;
13639+        glGetShaderiv(s, GL_COMPILE_STATUS, &ok);
13640+
13641+        if (!ok) {
13642+            GLchar *info;
13643+            GLint size;
13644+
13645+            glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size);
13646+            info = malloc(size);
13647+
13648+            glGetShaderInfoLog(s, size, NULL, info);
13649+            av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source);
13650+
13651+            return 0;
13652+        }
13653+    }
13654+
13655+    return s;
13656+}
13657+
13658+static GLuint link_program(struct AVFormatContext *const s, GLint vs, GLint fs)
13659+{
13660+    GLuint prog = glCreateProgram();
13661+
13662+    if (prog == 0) {
13663+        av_log(s, AV_LOG_ERROR, "Failed to create program\n");
13664+        return 0;
13665+    }
13666+
13667+    glAttachShader(prog, vs);
13668+    glAttachShader(prog, fs);
13669+    glLinkProgram(prog);
13670+
13671+    {
13672+        GLint ok;
13673+        glGetProgramiv(prog, GL_LINK_STATUS, &ok);
13674+        if (!ok) {
13675+            /* Some drivers return a size of 1 for an empty log.  This is the size
13676+             * of a log that contains only a terminating NUL character.
13677+             */
13678+            GLint size;
13679+            GLchar *info = NULL;
13680+            glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size);
13681+            if (size > 1) {
13682+                info = malloc(size);
13683+                glGetProgramInfoLog(prog, size, NULL, info);
13684+            }
13685+
13686+            av_log(s, AV_LOG_ERROR, "Failed to link: %s\n",
13687+                   (info != NULL) ? info : "<empty log>");
13688+            return 0;
13689+        }
13690+    }
13691+
13692+    return prog;
13693+}
13694+
13695+static int
13696+gl_setup(struct AVFormatContext *const s)
13697+{
13698+    const char *vs =
13699+        "attribute vec4 pos;\n"
13700+        "varying vec2 texcoord;\n"
13701+        "\n"
13702+        "void main() {\n"
13703+        "  gl_Position = pos;\n"
13704+        "  texcoord.x = (pos.x + 1.0) / 2.0;\n"
13705+        "  texcoord.y = (-pos.y + 1.0) / 2.0;\n"
13706+        "}\n";
13707+    const char *fs =
13708+        "#extension GL_OES_EGL_image_external : enable\n"
13709+        "precision mediump float;\n"
13710+        "uniform samplerExternalOES s;\n"
13711+        "varying vec2 texcoord;\n"
13712+        "void main() {\n"
13713+        "  gl_FragColor = texture2D(s, texcoord);\n"
13714+        "}\n";
13715+
13716+    GLuint vs_s;
13717+    GLuint fs_s;
13718+    GLuint prog;
13719+
13720+    if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) ||
13721+        !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) ||
13722+        !(prog = link_program(s, vs_s, fs_s)))
13723+        return -1;
13724+
13725+    glUseProgram(prog);
13726+
13727+    {
13728+        static const float verts[] = {
13729+            -1, -1,
13730+            1, -1,
13731+            1,  1,
13732+            -1,  1,
13733+        };
13734+        glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts);
13735+    }
13736+
13737+    glEnableVertexAttribArray(0);
13738+    return 0;
13739+}
13740+
13741+static int egl_vout_write_trailer(AVFormatContext *s)
13742+{
13743+#if TRACE_ALL
13744+    av_log(s, AV_LOG_INFO, "%s\n", __func__);
13745+#endif
13746+
13747+    return 0;
13748+}
13749+
13750+static int egl_vout_write_header(AVFormatContext *s)
13751+{
13752+    const AVCodecParameters *const par = s->streams[0]->codecpar;
13753+
13754+#if TRACE_ALL
13755+    av_log(s, AV_LOG_INFO, "%s\n", __func__);
13756+#endif
13757+    if (s->nb_streams > 1
13758+        || par->codec_type != AVMEDIA_TYPE_VIDEO
13759+        || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
13760+        av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
13761+        return AVERROR(EINVAL);
13762+    }
13763+
13764+    return 0;
13765+}
13766+
13767+
13768+static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVFrame *const frame)
13769+{
13770+    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)frame->data[0];
13771+    egl_aux_t *da = NULL;
13772+    unsigned int i;
13773+
13774+#if TRACE_ALL
13775+    av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
13776+#endif
13777+
13778+    for (i = 0; i != 32; ++i) {
13779+        if (de->aux[i].fd == -1 || de->aux[i].fd == desc->objects[0].fd) {
13780+            da = de->aux + i;
13781+            break;
13782+        }
13783+    }
13784+
13785+    if (da == NULL) {
13786+        av_log(s, AV_LOG_INFO, "%s: Out of handles\n", __func__);
13787+        return AVERROR(EINVAL);
13788+    }
13789+
13790+    if (da->texture == 0) {
13791+        EGLint attribs[50];
13792+        EGLint *a = attribs;
13793+        int i, j;
13794+        static const EGLint anames[] = {
13795+            EGL_DMA_BUF_PLANE0_FD_EXT,
13796+            EGL_DMA_BUF_PLANE0_OFFSET_EXT,
13797+            EGL_DMA_BUF_PLANE0_PITCH_EXT,
13798+            EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
13799+            EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
13800+            EGL_DMA_BUF_PLANE1_FD_EXT,
13801+            EGL_DMA_BUF_PLANE1_OFFSET_EXT,
13802+            EGL_DMA_BUF_PLANE1_PITCH_EXT,
13803+            EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT,
13804+            EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT,
13805+            EGL_DMA_BUF_PLANE2_FD_EXT,
13806+            EGL_DMA_BUF_PLANE2_OFFSET_EXT,
13807+            EGL_DMA_BUF_PLANE2_PITCH_EXT,
13808+            EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT,
13809+            EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT,
13810+        };
13811+        const EGLint *b = anames;
13812+
13813+        *a++ = EGL_WIDTH;
13814+        *a++ = av_frame_cropped_width(frame);
13815+        *a++ = EGL_HEIGHT;
13816+        *a++ = av_frame_cropped_height(frame);
13817+        *a++ = EGL_LINUX_DRM_FOURCC_EXT;
13818+        *a++ = desc->layers[0].format;
13819+
13820+        for (i = 0; i < desc->nb_layers; ++i) {
13821+            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
13822+                const AVDRMPlaneDescriptor *const p = desc->layers[i].planes + j;
13823+                const AVDRMObjectDescriptor *const obj = desc->objects + p->object_index;
13824+                *a++ = *b++;
13825+                *a++ = obj->fd;
13826+                *a++ = *b++;
13827+                *a++ = p->offset;
13828+                *a++ = *b++;
13829+                *a++ = p->pitch;
13830+                if (obj->format_modifier == 0) {
13831+                    b += 2;
13832+                }
13833+                else {
13834+                    *a++ = *b++;
13835+                    *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF);
13836+                    *a++ = *b++;
13837+                    *a++ = (EGLint)(obj->format_modifier >> 32);
13838+                }
13839+            }
13840+        }
13841+
13842+        *a = EGL_NONE;
13843+
13844+#if TRACE_ALL
13845+        for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) {
13846+            av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]);
13847+        }
13848+#endif
13849+        {
13850+            const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy,
13851+                                                     EGL_NO_CONTEXT,
13852+                                                     EGL_LINUX_DMA_BUF_EXT,
13853+                                                     NULL, attribs);
13854+            if (!image) {
13855+                av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd);
13856+                return -1;
13857+            }
13858+
13859+            glGenTextures(1, &da->texture);
13860+            glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
13861+            glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
13862+            glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
13863+            glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image);
13864+
13865+            eglDestroyImageKHR(de->setup.egl_dpy, image);
13866+        }
13867+
13868+        da->fd = desc->objects[0].fd;
13869+    }
13870+
13871+    glClearColor(0.5, 0.5, 0.5, 0.5);
13872+    glClear(GL_COLOR_BUFFER_BIT);
13873+
13874+    glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
13875+    glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
13876+    eglSwapBuffers(de->setup.egl_dpy, de->setup.surf);
13877+
13878+    glDeleteTextures(1, &da->texture);
13879+    da->texture = 0;
13880+    da->fd = -1;
13881+
13882+    return 0;
13883+}
13884+
13885+static void* display_thread(void *v)
13886+{
13887+    AVFormatContext *const s = v;
13888+    egl_display_env_t *const de = s->priv_data;
13889+
13890+#if TRACE_ALL
13891+    av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
13892+#endif
13893+    {
13894+        EGLint egl_major, egl_minor;
13895+
13896+        de->setup.dpy = XOpenDisplay(NULL);
13897+        if (!de->setup.dpy) {
13898+            av_log(s, AV_LOG_ERROR, "Couldn't open X display\n");
13899+            goto fail;
13900+        }
13901+
13902+        de->setup.egl_dpy = eglGetDisplay(de->setup.dpy);
13903+        if (!de->setup.egl_dpy) {
13904+            av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n");
13905+            goto fail;
13906+        }
13907+
13908+        if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) {
13909+            av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n");
13910+            goto fail;
13911+        }
13912+
13913+        av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor);
13914+
13915+        if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) {
13916+            av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n");
13917+            goto fail;
13918+        }
13919+    }
13920+
13921+    if (!de->window_width || !de->window_height) {
13922+        de->window_width = 1280;
13923+        de->window_height = 720;
13924+    }
13925+    if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout",
13926+                    &de->setup.win, &de->setup.ctx, &de->setup.surf)) {
13927+        av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__);
13928+        goto fail;
13929+    }
13930+
13931+    if (gl_setup(s)) {
13932+        av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__);
13933+        goto fail;
13934+    }
13935+
13936+#if TRACE_ALL
13937+    av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__);
13938+#endif
13939+    sem_post(&de->display_start_sem);
13940+
13941+    for (;;) {
13942+        AVFrame *frame;
13943+
13944+        while (sem_wait(&de->q_sem) != 0) {
13945+            av_assert0(errno == EINTR);
13946+        }
13947+
13948+        if (de->q_terminate)
13949+            break;
13950+
13951+        pthread_mutex_lock(&de->q_lock);
13952+        frame = de->q_next;
13953+        de->q_next = NULL;
13954+        pthread_mutex_unlock(&de->q_lock);
13955+
13956+        do_display(s, de, frame);
13957+
13958+        av_frame_free(&de->q_this);
13959+        de->q_this = frame;
13960+    }
13961+
13962+#if TRACE_ALL
13963+    av_log(s, AV_LOG_INFO, ">>> %s\n", __func__);
13964+#endif
13965+
13966+    return NULL;
13967+
13968+fail:
13969+#if TRACE_ALL
13970+    av_log(s, AV_LOG_INFO, ">>> %s: FAIL\n", __func__);
13971+#endif
13972+    de->q_terminate = 1;
13973+    sem_post(&de->display_start_sem);
13974+
13975+    return NULL;
13976+}
13977+
13978+static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
13979+{
13980+    const AVFrame *const src_frame = (AVFrame *)pkt->data;
13981+    AVFrame *frame;
13982+    egl_display_env_t *const de = s->priv_data;
13983+
13984+#if TRACE_ALL
13985+    av_log(s, AV_LOG_INFO, "%s\n", __func__);
13986+#endif
13987+
13988+    if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
13989+        frame = av_frame_alloc();
13990+        av_frame_ref(frame, src_frame);
13991+    }
13992+    else if (src_frame->format == AV_PIX_FMT_VAAPI) {
13993+        frame = av_frame_alloc();
13994+        frame->format = AV_PIX_FMT_DRM_PRIME;
13995+        if (av_hwframe_map(frame, src_frame, 0) != 0) {
13996+            av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
13997+            av_frame_free(&frame);
13998+            return AVERROR(EINVAL);
13999+        }
14000+    }
14001+    else {
14002+        av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
14003+        return AVERROR(EINVAL);
14004+    }
14005+
14006+    // Really hacky sync
14007+    while (de->show_all && de->q_next) {
14008+        usleep(3000);
14009+    }
14010+
14011+    pthread_mutex_lock(&de->q_lock);
14012+    {
14013+        AVFrame *const t = de->q_next;
14014+        de->q_next = frame;
14015+        frame = t;
14016+    }
14017+    pthread_mutex_unlock(&de->q_lock);
14018+
14019+    if (frame == NULL)
14020+        sem_post(&de->q_sem);
14021+    else
14022+        av_frame_free(&frame);
14023+
14024+    return 0;
14025+}
14026+
14027+static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
14028+                                unsigned flags)
14029+{
14030+    av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
14031+    return AVERROR_PATCHWELCOME;
14032+}
14033+
14034+static int egl_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
14035+{
14036+#if TRACE_ALL
14037+    av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type);
14038+#endif
14039+    switch (type) {
14040+    case AV_APP_TO_DEV_WINDOW_REPAINT:
14041+        return 0;
14042+    default:
14043+        break;
14044+    }
14045+    return AVERROR(ENOSYS);
14046+}
14047+
14048+// deinit is called if init fails so no need to clean up explicity here
14049+static int egl_vout_init(struct AVFormatContext *s)
14050+{
14051+    egl_display_env_t *const de = s->priv_data;
14052+    unsigned int i;
14053+
14054+    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
14055+
14056+    de->setup = (struct egl_setup) { 0 };
14057+
14058+    for (i = 0; i != 32; ++i) {
14059+        de->aux[i].fd = -1;
14060+    }
14061+
14062+    de->q_terminate = 0;
14063+    pthread_mutex_init(&de->q_lock, NULL);
14064+    sem_init(&de->q_sem, 0, 0);
14065+    sem_init(&de->display_start_sem, 0, 0);
14066+    av_assert0(pthread_create(&de->q_thread, NULL, display_thread, s) == 0);
14067+
14068+    sem_wait(&de->display_start_sem);
14069+    if (de->q_terminate) {
14070+        av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__);
14071+        return -1;
14072+    }
14073+
14074+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
14075+
14076+    return 0;
14077+}
14078+
14079+static void egl_vout_deinit(struct AVFormatContext *s)
14080+{
14081+    egl_display_env_t *const de = s->priv_data;
14082+
14083+    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
14084+
14085+    de->q_terminate = 1;
14086+    sem_post(&de->q_sem);
14087+    pthread_join(de->q_thread, NULL);
14088+    sem_destroy(&de->q_sem);
14089+    pthread_mutex_destroy(&de->q_lock);
14090+
14091+    av_frame_free(&de->q_next);
14092+    av_frame_free(&de->q_this);
14093+
14094+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
14095+}
14096+
14097+#define OFFSET(x) offsetof(egl_display_env_t, x)
14098+static const AVOption options[] = {
14099+    { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
14100+    { "window_size",  "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
14101+    { "window_x",     "set window x offset",    OFFSET(window_x),     AV_OPT_TYPE_INT,    { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
14102+    { "window_y",     "set window y offset",    OFFSET(window_y),     AV_OPT_TYPE_INT,    { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
14103+    { "fullscreen",   "set fullscreen display", OFFSET(fullscreen),   AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
14104+    { NULL }
14105+
14106+};
14107+
14108+static const AVClass egl_vout_class = {
14109+    .class_name = "egl vid outdev",
14110+    .item_name  = av_default_item_name,
14111+    .option     = options,
14112+    .version    = LIBAVUTIL_VERSION_INT,
14113+    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
14114+};
14115+
14116+AVOutputFormat ff_vout_egl_muxer = {
14117+    .name           = "vout_egl",
14118+    .long_name      = NULL_IF_CONFIG_SMALL("Egl video output device"),
14119+    .priv_data_size = sizeof(egl_display_env_t),
14120+    .audio_codec    = AV_CODEC_ID_NONE,
14121+    .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
14122+    .write_header   = egl_vout_write_header,
14123+    .write_packet   = egl_vout_write_packet,
14124+    .write_uncoded_frame = egl_vout_write_frame,
14125+    .write_trailer  = egl_vout_write_trailer,
14126+    .control_message = egl_vout_control_message,
14127+    .flags          = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
14128+    .priv_class     = &egl_vout_class,
14129+    .init           = egl_vout_init,
14130+    .deinit         = egl_vout_deinit,
14131+};
14132+
14133--- a/libavfilter/Makefile
14134+++ b/libavfilter/Makefile
14135@@ -254,6 +254,7 @@ OBJS-$(CONFIG_DEFLATE_FILTER)
14136 OBJS-$(CONFIG_DEFLICKER_FILTER)              += vf_deflicker.o
14137 OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER)        += vf_deinterlace_qsv.o
14138 OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER)      += vf_deinterlace_vaapi.o vaapi_vpp.o
14139+OBJS-$(CONFIG_DEINTERLACE_V4L2M2M_FILTER)    += vf_deinterlace_v4l2m2m.o
14140 OBJS-$(CONFIG_DEJUDDER_FILTER)               += vf_dejudder.o
14141 OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
14142 OBJS-$(CONFIG_DENOISE_VAAPI_FILTER)          += vf_misc_vaapi.o vaapi_vpp.o
14143@@ -509,6 +510,7 @@ OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER)
14144 OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER)       += vf_transpose_vulkan.o vulkan.o vulkan_filter.o
14145 OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
14146 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
14147+OBJS-$(CONFIG_UNSAND_FILTER)                 += vf_unsand.o
14148 OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
14149 OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER)         += vf_unsharp_opencl.o opencl.o \
14150                                                 opencl/unsharp.o
14151--- a/libavfilter/aarch64/Makefile
14152+++ b/libavfilter/aarch64/Makefile
14153@@ -1,3 +1,5 @@
14154+OBJS-$(CONFIG_BWDIF_FILTER)                  += aarch64/vf_bwdif_init_aarch64.o
14155 OBJS-$(CONFIG_NLMEANS_FILTER)                += aarch64/vf_nlmeans_init.o
14156
14157+NEON-OBJS-$(CONFIG_BWDIF_FILTER)             += aarch64/vf_bwdif_neon.o
14158 NEON-OBJS-$(CONFIG_NLMEANS_FILTER)           += aarch64/vf_nlmeans_neon.o
14159--- /dev/null
14160+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
14161@@ -0,0 +1,125 @@
14162+/*
14163+ * bwdif aarch64 NEON optimisations
14164+ *
14165+ * Copyright (c) 2023 John Cox <jc@kynesim.co.uk>
14166+ *
14167+ * This file is part of FFmpeg.
14168+ *
14169+ * FFmpeg is free software; you can redistribute it and/or
14170+ * modify it under the terms of the GNU Lesser General Public
14171+ * License as published by the Free Software Foundation; either
14172+ * version 2.1 of the License, or (at your option) any later version.
14173+ *
14174+ * FFmpeg is distributed in the hope that it will be useful,
14175+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14176+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14177+ * Lesser General Public License for more details.
14178+ *
14179+ * You should have received a copy of the GNU Lesser General Public
14180+ * License along with FFmpeg; if not, write to the Free Software
14181+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
14182+ */
14183+
14184+#include "libavutil/common.h"
14185+#include "libavfilter/bwdif.h"
14186+#include "libavutil/aarch64/cpu.h"
14187+
14188+void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void *next1,
14189+                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
14190+                               int parity, int clip_max, int spat);
14191+
14192+void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs,
14193+                                int prefs3, int mrefs3, int parity, int clip_max);
14194+
14195+void ff_bwdif_filter_line_neon(void *dst1, void *prev1, void *cur1, void *next1,
14196+                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
14197+                               int prefs3, int mrefs3, int prefs4, int mrefs4,
14198+                               int parity, int clip_max);
14199+
14200+void ff_bwdif_filter_line3_neon(void * dst1, int d_stride,
14201+                                const void * prev1, const void * cur1, const void * next1, int s_stride,
14202+                                int w, int parity, int clip_max);
14203+
14204+
14205+static void filter_line3_helper(void * dst1, int d_stride,
14206+                                const void * prev1, const void * cur1, const void * next1, int s_stride,
14207+                                int w, int parity, int clip_max)
14208+{
14209+    // Asm works on 16 byte chunks
14210+    // If w is a multiple of 16 then all is good - if not then if width rounded
14211+    // up to nearest 16 will fit in both src & dst strides then allow the asm
14212+    // to write over the padding bytes as that is almost certainly faster than
14213+    // having to invoke the C version to clean up the tail.
14214+    const int w1 = FFALIGN(w, 16);
14215+    const int w0 = clip_max != 255 ? 0 :
14216+                   d_stride <= w1 && s_stride <= w1 ? w : w & ~15;
14217+
14218+    ff_bwdif_filter_line3_neon(dst1, d_stride,
14219+                               prev1, cur1, next1, s_stride,
14220+                               w0, parity, clip_max);
14221+
14222+    if (w0 < w)
14223+        ff_bwdif_filter_line3_c((char *)dst1 + w0, d_stride,
14224+                                (const char *)prev1 + w0, (const char *)cur1 + w0, (const char *)next1 + w0, s_stride,
14225+                                w - w0, parity, clip_max);
14226+}
14227+
14228+static void filter_line_helper(void *dst1, void *prev1, void *cur1, void *next1,
14229+                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
14230+                               int prefs3, int mrefs3, int prefs4, int mrefs4,
14231+                               int parity, int clip_max)
14232+{
14233+    const int w0 = clip_max != 255 ? 0 : w & ~15;
14234+
14235+    ff_bwdif_filter_line_neon(dst1, prev1, cur1, next1,
14236+                              w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
14237+
14238+    if (w0 < w)
14239+        ff_bwdif_filter_line_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0,
14240+                               w - w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
14241+}
14242+
14243+static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void *next1,
14244+                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
14245+                               int parity, int clip_max, int spat)
14246+{
14247+    const int w0 = clip_max != 255 ? 0 : w & ~15;
14248+
14249+    ff_bwdif_filter_edge_neon(dst1, prev1, cur1, next1, w0, prefs, mrefs, prefs2, mrefs2,
14250+                              parity, clip_max, spat);
14251+
14252+    if (w0 < w)
14253+        ff_bwdif_filter_edge_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0,
14254+                               w - w0, prefs, mrefs, prefs2, mrefs2,
14255+                               parity, clip_max, spat);
14256+}
14257+
14258+static void filter_intra_helper(void *dst1, void *cur1, int w, int prefs, int mrefs,
14259+                                int prefs3, int mrefs3, int parity, int clip_max)
14260+{
14261+    const int w0 = clip_max != 255 ? 0 : w & ~15;
14262+
14263+    ff_bwdif_filter_intra_neon(dst1, cur1, w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max);
14264+
14265+    if (w0 < w)
14266+        ff_bwdif_filter_intra_c((char *)dst1 + w0, (char *)cur1 + w0,
14267+                                w - w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max);
14268+}
14269+
14270+void
14271+ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth)
14272+{
14273+    const int cpu_flags = av_get_cpu_flags();
14274+
14275+    if (bit_depth != 8)
14276+        return;
14277+
14278+    if (!have_neon(cpu_flags))
14279+        return;
14280+
14281+    s->filter_intra = filter_intra_helper;
14282+    s->filter_line  = filter_line_helper;
14283+    s->filter_edge  = filter_edge_helper;
14284+    s->filter_line3 = filter_line3_helper;
14285+}
14286+
14287--- /dev/null
14288+++ b/libavfilter/aarch64/vf_bwdif_neon.S
14289@@ -0,0 +1,788 @@
14290+/*
14291+ * bwdif aarch64 NEON optimisations
14292+ *
14293+ * Copyright (c) 2023 John Cox <jc@kynesim.co.uk>
14294+ *
14295+ * This file is part of FFmpeg.
14296+ *
14297+ * FFmpeg is free software; you can redistribute it and/or
14298+ * modify it under the terms of the GNU Lesser General Public
14299+ * License as published by the Free Software Foundation; either
14300+ * version 2.1 of the License, or (at your option) any later version.
14301+ *
14302+ * FFmpeg is distributed in the hope that it will be useful,
14303+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14304+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14305+ * Lesser General Public License for more details.
14306+ *
14307+ * You should have received a copy of the GNU Lesser General Public
14308+ * License along with FFmpeg; if not, write to the Free Software
14309+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
14310+ */
14311+
14312+
14313+#include "libavutil/aarch64/asm.S"
14314+
14315+// Space taken on the stack by an int (32-bit)
14316+#ifdef __APPLE__
14317+.set    SP_INT, 4
14318+#else
14319+.set    SP_INT, 8
14320+#endif
14321+
14322+.macro SQSHRUNN b, s0, s1, s2, s3, n
14323+        sqshrun         \s0\().4h, \s0\().4s, #\n - 8
14324+        sqshrun2        \s0\().8h, \s1\().4s, #\n - 8
14325+        sqshrun         \s1\().4h, \s2\().4s, #\n - 8
14326+        sqshrun2        \s1\().8h, \s3\().4s, #\n - 8
14327+        uzp2            \b\().16b, \s0\().16b, \s1\().16b
14328+.endm
14329+
14330+.macro SMULL4K a0, a1, a2, a3, s0, s1, k
14331+        smull           \a0\().4s, \s0\().4h, \k
14332+        smull2          \a1\().4s, \s0\().8h, \k
14333+        smull           \a2\().4s, \s1\().4h, \k
14334+        smull2          \a3\().4s, \s1\().8h, \k
14335+.endm
14336+
14337+.macro UMULL4K a0, a1, a2, a3, s0, s1, k
14338+        umull           \a0\().4s, \s0\().4h, \k
14339+        umull2          \a1\().4s, \s0\().8h, \k
14340+        umull           \a2\().4s, \s1\().4h, \k
14341+        umull2          \a3\().4s, \s1\().8h, \k
14342+.endm
14343+
14344+.macro UMLAL4K a0, a1, a2, a3, s0, s1, k
14345+        umlal           \a0\().4s, \s0\().4h, \k
14346+        umlal2          \a1\().4s, \s0\().8h, \k
14347+        umlal           \a2\().4s, \s1\().4h, \k
14348+        umlal2          \a3\().4s, \s1\().8h, \k
14349+.endm
14350+
14351+.macro UMLSL4K a0, a1, a2, a3, s0, s1, k
14352+        umlsl           \a0\().4s, \s0\().4h, \k
14353+        umlsl2          \a1\().4s, \s0\().8h, \k
14354+        umlsl           \a2\().4s, \s1\().4h, \k
14355+        umlsl2          \a3\().4s, \s1\().8h, \k
14356+.endm
14357+
14358+//      int b = m2s1 - m1;
14359+//      int f = p2s1 - p1;
14360+//      int dc = c0s1 - m1;
14361+//      int de = c0s1 - p1;
14362+//      int sp_max = FFMIN(p1 - c0s1, m1 - c0s1);
14363+//      sp_max = FFMIN(sp_max, FFMAX(-b,-f));
14364+//      int sp_min = FFMIN(c0s1 - p1, c0s1 - m1);
14365+//      sp_min = FFMIN(sp_min, FFMAX(b,f));
14366+//      diff = diff == 0 ? 0 : FFMAX3(diff, sp_min, sp_max);
14367+.macro SPAT_CHECK diff, m2s1, m1, c0s1, p1, p2s1, t0, t1, t2, t3
14368+        uqsub           \t0\().16b, \p1\().16b, \c0s1\().16b
14369+        uqsub           \t2\().16b, \m1\().16b, \c0s1\().16b
14370+        umin            \t2\().16b, \t0\().16b, \t2\().16b
14371+
14372+        uqsub           \t1\().16b, \m1\().16b, \m2s1\().16b
14373+        uqsub           \t3\().16b, \p1\().16b, \p2s1\().16b
14374+        umax            \t3\().16b, \t3\().16b, \t1\().16b
14375+        umin            \t3\().16b, \t3\().16b, \t2\().16b
14376+
14377+        uqsub           \t0\().16b, \c0s1\().16b, \p1\().16b
14378+        uqsub           \t2\().16b, \c0s1\().16b, \m1\().16b
14379+        umin            \t2\().16b, \t0\().16b, \t2\().16b
14380+
14381+        uqsub           \t1\().16b, \m2s1\().16b, \m1\().16b
14382+        uqsub           \t0\().16b, \p2s1\().16b, \p1\().16b
14383+        umax            \t0\().16b, \t0\().16b, \t1\().16b
14384+        umin            \t2\().16b, \t2\().16b, \t0\().16b
14385+
14386+        cmeq            \t1\().16b, \diff\().16b, #0
14387+        umax            \diff\().16b, \diff\().16b, \t3\().16b
14388+        umax            \diff\().16b, \diff\().16b, \t2\().16b
14389+        bic             \diff\().16b, \diff\().16b, \t1\().16b
14390+.endm
14391+
14392+//      i0 = s0;
14393+//      if (i0 > d0 + diff0)
14394+//          i0 = d0 + diff0;
14395+//      else if (i0 < d0 - diff0)
14396+//          i0 = d0 - diff0;
14397+//
14398+// i0 = s0 is safe
14399+.macro DIFF_CLIP i0, s0, d0, diff, t0, t1
14400+        uqadd           \t0\().16b, \d0\().16b, \diff\().16b
14401+        uqsub           \t1\().16b, \d0\().16b, \diff\().16b
14402+        umin            \i0\().16b, \s0\().16b, \t0\().16b
14403+        umax            \i0\().16b, \i0\().16b, \t1\().16b
14404+.endm
14405+
14406+//      i0 = FFABS(m1 - p1) > td0 ? i1 : i2;
14407+//      DIFF_CLIP
14408+//
14409+// i0 = i1 is safe
14410+.macro INTERPOL i0, i1, i2, m1, d0, p1, td0, diff, t0, t1, t2
14411+        uabd            \t0\().16b, \m1\().16b, \p1\().16b
14412+        cmhi            \t0\().16b, \t0\().16b, \td0\().16b
14413+        bsl             \t0\().16b, \i1\().16b, \i2\().16b
14414+        DIFF_CLIP       \i0, \t0, \d0, \diff, \t1, \t2
14415+.endm
14416+
14417+.macro PUSH_VREGS
14418+        stp             d8,  d9,  [sp, #-64]!
14419+        stp             d10, d11, [sp, #16]
14420+        stp             d12, d13, [sp, #32]
14421+        stp             d14, d15, [sp, #48]
14422+.endm
14423+
14424+.macro POP_VREGS
14425+        ldp             d14, d15, [sp, #48]
14426+        ldp             d12, d13, [sp, #32]
14427+        ldp             d10, d11, [sp, #16]
14428+        ldp             d8,  d9,  [sp], #64
14429+.endm
14430+
14431+.macro LDR_COEFFS d, t0
14432+        movrel          \t0, coeffs, 0
14433+        ld1             {\d\().8h}, [\t0]
14434+.endm
14435+
14436+// static const uint16_t coef_lf[2] = { 4309, 213 };
14437+// static const uint16_t coef_hf[3] = { 5570, 3801, 1016 };
14438+// static const uint16_t coef_sp[2] = { 5077, 981 };
14439+
14440+const coeffs, align=4   // align 4 means align on 2^4 boundry
14441+        .hword          4309 * 4, 213 * 4               // lf[0]*4 = v0.h[0]
14442+        .hword          5570, 3801, 1016, -3801         // hf[0] = v0.h[2], -hf[1] = v0.h[5]
14443+        .hword          5077, 981                       // sp[0] = v0.h[6]
14444+endconst
14445+
14446+// ===========================================================================
14447+//
14448+// void ff_bwdif_filter_line3_neon(
14449+//         void * dst1,         // x0
14450+//         int d_stride,        // w1
14451+//         const void * prev1,  // x2
14452+//         const void * cur1,   // x3
14453+//         const void * next1,  // x4
14454+//         int s_stride,        // w5
14455+//         int w,               // w6
14456+//         int parity,          // w7
14457+//         int clip_max);       // [sp, #0] (Ignored)
14458+
14459+function ff_bwdif_filter_line3_neon, export=1
14460+        // Sanity check w
14461+        cmp             w6, #0
14462+        ble             99f
14463+
14464+        LDR_COEFFS      v0, x17
14465+
14466+// #define prev2 cur
14467+//        const uint8_t * restrict next2 = parity ? prev : next;
14468+        cmp             w7, #0
14469+        csel            x17, x2, x4, ne
14470+
14471+        // We want all the V registers - save all the ones we must
14472+        PUSH_VREGS
14473+
14474+        // Some rearrangement of initial values for nice layout of refs in regs
14475+        mov             w10, w6                         // w10 = loop count
14476+        neg             w9,  w5                         // w9  = mref
14477+        lsl             w8,  w9,  #1                    // w8 =  mref2
14478+        add             w7,  w9,  w9, LSL #1            // w7  = mref3
14479+        lsl             w6,  w9,  #2                    // w6  = mref4
14480+        mov             w11, w5                         // w11 = pref
14481+        lsl             w12, w5,  #1                    // w12 = pref2
14482+        add             w13, w5,  w5, LSL #1            // w13 = pref3
14483+        lsl             w14, w5,  #2                    // w14 = pref4
14484+        add             w15, w5,  w5, LSL #2            // w15 = pref5
14485+        add             w16, w14, w12                   // w16 = pref6
14486+
14487+        lsl             w5,  w1,  #1                    // w5 = d_stride * 2
14488+
14489+//         for (x = 0; x < w; x++) {
14490+//             int diff0, diff2;
14491+//             int d0, d2;
14492+//             int temporal_diff0, temporal_diff2;
14493+//
14494+//             int i1, i2;
14495+//             int j1, j2;
14496+//             int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4;
14497+
14498+10:
14499+//             c0 = prev2[0] + next2[0];                // c0 = v20, v21
14500+//             d0  = c0 >> 1;                           // d0 = v10
14501+//             temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11
14502+        ldr             q31, [x3]
14503+        ldr             q21, [x17]
14504+        uhadd           v10.16b, v31.16b, v21.16b
14505+        uabd            v11.16b, v31.16b, v21.16b
14506+        uaddl           v20.8h,  v21.8b,  v31.8b
14507+        uaddl2          v21.8h,  v21.16b, v31.16b
14508+
14509+        ldr             q31, [x3, w6, sxtw]
14510+        ldr             q23, [x17, w6, sxtw]
14511+
14512+//             i1 = coef_hf[0] * c0;                    // i1 = v2-v5
14513+        UMULL4K         v2, v3, v4, v5, v20, v21, v0.h[2]
14514+
14515+        ldr             q30, [x3, w14, sxtw]
14516+        ldr             q25, [x17, w14, sxtw]
14517+
14518+//             m4 = prev2[mrefs4] + next2[mrefs4];      // m4 = v22,v23
14519+        uaddl           v22.8h,  v23.8b,  v31.8b
14520+        uaddl2          v23.8h,  v23.16b, v31.16b
14521+
14522+//             p4 = prev2[prefs4] + next2[prefs4];      // p4 = v24,v25, (p4 >> 1) = v12
14523+        uhadd           v12.16b, v25.16b, v30.16b
14524+        uaddl           v24.8h,  v25.8b,  v30.8b
14525+        uaddl2          v25.8h,  v25.16b, v30.16b
14526+
14527+//             j1 = -coef_hf[1] * (c0 + p4);            // j1 = v6-v9  (-c0:v20,v21)
14528+        add             v20.8h,  v20.8h,  v24.8h
14529+        add             v21.8h,  v21.8h,  v25.8h
14530+        SMULL4K         v6, v7, v8, v9, v20, v21, v0.h[5]
14531+
14532+//             m3 = cur[mrefs3];                        // m3 = v20
14533+        ldr             q20, [x3, w7, sxtw]
14534+
14535+//             p3 = cur[prefs3];                        // p3 = v21
14536+        ldr             q21, [x3, w13, sxtw]
14537+
14538+//             i1 += coef_hf[2] * (m4 + p4);            // (-m4:v22,v23) (-p4:v24,v25)
14539+        add             v22.8h,  v22.8h,  v24.8h
14540+        add             v23.8h,  v23.8h,  v25.8h
14541+        UMLAL4K         v2, v3, v4, v5, v22, v23, v0.h[4]
14542+
14543+        ldr             q29, [x3, w8, sxtw]
14544+        ldr             q23, [x17, w8, sxtw]
14545+
14546+//             i1 -= coef_lf[1] * 4 * (m3 + p3);        // -
14547+        uaddl           v30.8h,  v20.8b,  v21.8b
14548+        uaddl2          v31.8h,  v20.16b, v21.16b
14549+
14550+        ldr             q28, [x3, w16, sxtw]
14551+        ldr             q25, [x17, w16, sxtw]
14552+
14553+        UMLSL4K         v2, v3, v4, v5, v30, v31, v0.h[1]
14554+
14555+//             m2 = prev2[mrefs2] + next2[mrefs2];      // m2 = v22,v23, (m2 >> 1) = v13
14556+        uhadd           v13.16b, v23.16b, v29.16b
14557+        uaddl           v22.8h,  v23.8b,  v29.8b
14558+        uaddl2          v23.8h,  v23.16b, v29.16b
14559+
14560+        ldr             q31, [x3, w12, sxtw]
14561+        ldr             q27, [x17, w12, sxtw]
14562+
14563+//             p6 = prev2[prefs6] + next2[prefs6];      // p6 = v24,v25
14564+        uaddl           v24.8h,  v25.8b,  v28.8b
14565+        uaddl2          v25.8h,  v25.16b, v28.16b
14566+
14567+//             j1 += coef_hf[2] * (m2 + p6);            // (-p6:v24,v25)
14568+        add             v24.8h,  v24.8h,  v22.8h
14569+        add             v25.8h,  v25.8h,  v23.8h
14570+        UMLAL4K         v6, v7, v8, v9, v24, v25, v0.h[4]
14571+
14572+//             m1 = cur[mrefs];                         // m1 = v24
14573+        ldr             q24, [x3, w9, sxtw]
14574+
14575+//             p5 = cur[prefs5];                        // p5 = v25
14576+        ldr             q25, [x3, w15, sxtw]
14577+
14578+//             p2 = prev2[prefs2] + next2[prefs2];      // p2 = v26, v27
14579+//             temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14
14580+//             d2  = p2 >> 1;                           // d2 = v15
14581+        uabd            v14.16b, v31.16b, v27.16b
14582+        uhadd           v15.16b, v31.16b, v27.16b
14583+        uaddl           v26.8h,  v27.8b,  v31.8b
14584+        uaddl2          v27.8h,  v27.16b, v31.16b
14585+
14586+//             j1 += coef_hf[0] * p2;                   // -
14587+        UMLAL4K         v6, v7, v8, v9, v26, v27, v0.h[2]
14588+
14589+//             i1 -= coef_hf[1] * (m2 + p2);            // (-m2:v22,v23*) (-p2:v26*,v27*)
14590+        add             v22.8h,  v22.8h,  v26.8h
14591+        add             v23.8h,  v23.8h,  v27.8h
14592+        UMLSL4K         v2, v3, v4, v5, v22, v23, v0.h[3]
14593+
14594+//             p1 = cur[prefs];                         // p1 = v22
14595+        ldr             q22, [x3, w11, sxtw]
14596+
14597+//             j1 -= coef_lf[1] * 4 * (m1 + p5);        // -
14598+        uaddl           v26.8h,  v24.8b,  v25.8b
14599+        uaddl2          v27.8h,  v24.16b, v25.16b
14600+        UMLSL4K         v6, v7, v8, v9, v26, v27, v0.h[1]
14601+
14602+//             j2 = (coef_sp[0] * (p1 + p3) - coef_sp[1]  * (m1 + p5)) >> 13; // (-p5:v25*) j2=v16
14603+        uaddl           v18.8h,  v22.8b,  v21.8b
14604+        uaddl2          v19.8h,  v22.16b, v21.16b
14605+        UMULL4K         v28, v29, v30, v31, v18, v19, v0.h[6]
14606+
14607+        uaddl           v18.8h,  v24.8b,  v25.8b
14608+        uaddl2          v19.8h,  v24.16b, v25.16b
14609+        UMLSL4K         v28, v29, v30, v31, v18, v19, v0.h[7]
14610+
14611+        SQSHRUNN        v16, v28, v29, v30, v31, 13
14612+
14613+//             i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17
14614+        uaddl           v18.8h,  v22.8b,  v24.8b
14615+        uaddl2          v19.8h,  v22.16b, v24.16b
14616+        UMULL4K         v28, v29, v30, v31, v18, v19, v0.h[6]
14617+
14618+        uaddl           v18.8h,  v20.8b,  v21.8b
14619+        uaddl2          v19.8h,  v20.16b, v21.16b
14620+        UMLSL4K         v28, v29, v30, v31, v18, v19, v0.h[7]
14621+
14622+        SQSHRUNN        v17, v28, v29, v30, v31, 13
14623+
14624+//             i1 += coef_lf[0] * 4 * (m1 + p1);        // p1 = v22, m1 = v24
14625+        uaddl           v26.8h,  v24.8b,  v22.8b
14626+        uaddl2          v27.8h,  v24.16b, v22.16b
14627+        UMLAL4K         v2, v3, v4, v5, v26, v27, v0.h[0]
14628+
14629+        ldr             q31, [x2, w9, sxtw]
14630+        ldr             q29, [x4, w9, sxtw]
14631+
14632+//             j1 += coef_lf[0] * 4 * (p1 + p3);        // p1 = v22, p3 = v21
14633+        uaddl           v26.8h,  v21.8b,  v22.8b
14634+        uaddl2          v27.8h,  v21.16b, v22.16b
14635+        UMLAL4K         v6, v7, v8, v9, v26, v27, v0.h[0]
14636+
14637+        ldr             q30, [x2, w11, sxtw]
14638+        ldr             q28, [x4, w11, sxtw]
14639+
14640+//             i1 >>= 15;                               // i1 = v2, -v3, -v4*, -v5*
14641+        SQSHRUNN        v2, v2, v3, v4, v5, 15
14642+
14643+//             j1 >>= 15;                               // j1 = v3, -v6*, -v7*, -v8*, -v9*
14644+        SQSHRUNN        v3, v6, v7, v8, v9, 15
14645+
14646+//             {
14647+//                 int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
14648+//                 int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
14649+        uabd            v30.16b, v22.16b, v30.16b
14650+        uabd            v31.16b, v24.16b, v31.16b
14651+        uabd            v28.16b, v22.16b, v28.16b
14652+        uabd            v29.16b, v24.16b, v29.16b
14653+        uhadd           v31.16b, v31.16b, v30.16b
14654+        uhadd           v29.16b, v29.16b, v28.16b
14655+
14656+        ldr             q27, [x2, w13, sxtw]
14657+        ldr             q26, [x4, w13, sxtw]
14658+
14659+//                 diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18
14660+        ushr            v18.16b, v11.16b, #1
14661+        umax            v18.16b, v18.16b, v31.16b
14662+        umax            v18.16b, v18.16b, v29.16b
14663+//             }                                        // v28, v30 preserved for next block
14664+//             {  // tdiff2 = v14
14665+//                 int t1 =(FFABS(prev[prefs] - p1) + FFABS(prev[prefs3] - p3)) >> 1;
14666+//                 int t2 =(FFABS(next[prefs] - p1) + FFABS(next[prefs3] - p3)) >> 1;
14667+        uabd            v31.16b, v21.16b, v27.16b
14668+        uabd            v29.16b, v21.16b, v26.16b
14669+        uhadd           v31.16b, v31.16b, v30.16b
14670+        uhadd           v29.16b, v29.16b, v28.16b
14671+
14672+//                 diff2 = FFMAX3(temporal_diff2 >> 1, t1, t2); // diff2=v19
14673+        ushr            v19.16b, v14.16b, #1
14674+        umax            v19.16b, v19.16b, v31.16b
14675+        umax            v19.16b, v19.16b, v29.16b
14676+//             }
14677+
14678+        // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15
14679+        SPAT_CHECK      v18, v13, v24, v10, v22, v15, v31, v30, v29, v28
14680+
14681+        //  diff2 = v19, d0 = v10, p1 = v22, d2 = v15, p3 = v21, (p4 >> 1) = v12
14682+        SPAT_CHECK      v19, v10, v22, v15, v21, v12, v31, v30, v29, v28
14683+
14684+        // j1 = v3, j2 = v16, p1 = v22, d2 = v15, p3 = v21, td2 = v14, diff2 = v19
14685+        INTERPOL        v3, v3, v16, v22, v15, v21, v14, v19, v31, v30, v29
14686+
14687+//                 dst[d_stride * 2] = av_clip_uint8(interpol);
14688+        str             q3,  [x0, w5, sxtw]
14689+
14690+//             dst[d_stride] = p1;
14691+        str             q22, [x0, w1, sxtw]
14692+
14693+        // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18
14694+        INTERPOL        v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29
14695+
14696+//                 dst[0] = av_clip_uint8(interpol);
14697+        str             q2,  [x0], #16
14698+//             }
14699+//
14700+//             dst++;
14701+//             cur++;
14702+//             prev++;
14703+//             prev2++;
14704+//             next++;
14705+//         }
14706+        subs            w10, w10, #16
14707+        add             x2,  x2,  #16
14708+        add             x3,  x3,  #16
14709+        add             x4,  x4,  #16
14710+        add             x17, x17, #16
14711+        bgt             10b
14712+
14713+        POP_VREGS
14714+99:
14715+        ret
14716+endfunc
14717+
14718+// ===========================================================================
14719+//
14720+// void filter_line(
14721+//      void *dst1,     // x0
14722+//      void *prev1,    // x1
14723+//      void *cur1,     // x2
14724+//      void *next1,    // x3
14725+//      int w,          // w4
14726+//      int prefs,      // w5
14727+//      int mrefs,      // w6
14728+//      int prefs2,     // w7
14729+//      int mrefs2,     // [sp, #0]
14730+//      int prefs3,     // [sp, #SP_INT]
14731+//      int mrefs3,     // [sp, #SP_INT*2]
14732+//      int prefs4,     // [sp, #SP_INT*3]
14733+//      int mrefs4,     // [sp, #SP_INT*4]
14734+//      int parity,     // [sp, #SP_INT*5]
14735+//      int clip_max)   // [sp, #SP_INT*6]
14736+
14737+function ff_bwdif_filter_line_neon, export=1
14738+        // Sanity check w
14739+        cmp             w4, #0
14740+        ble             99f
14741+
14742+        // Rearrange regs to be the same as line3 for ease of debug!
14743+        mov             w10, w4                         // w10 = loop count
14744+        mov             w9,  w6                         // w9  = mref
14745+        mov             w12, w7                         // w12 = pref2
14746+        mov             w11, w5                         // w11 = pref
14747+        ldr             w8,  [sp, #0]                   // w8 =  mref2
14748+        ldr             w7,  [sp, #SP_INT*2]            // w7  = mref3
14749+        ldr             w6,  [sp, #SP_INT*4]            // w6  = mref4
14750+        ldr             w13, [sp, #SP_INT]              // w13 = pref3
14751+        ldr             w14, [sp, #SP_INT*3]            // w14 = pref4
14752+
14753+        mov             x4,  x3
14754+        mov             x3,  x2
14755+        mov             x2,  x1
14756+
14757+        LDR_COEFFS      v0, x17
14758+
14759+// #define prev2 cur
14760+//        const uint8_t * restrict next2 = parity ? prev : next;
14761+        ldr             w17, [sp, #SP_INT*5]            // parity
14762+        cmp             w17, #0
14763+        csel            x17, x2, x4, ne
14764+
14765+        PUSH_VREGS
14766+
14767+//         for (x = 0; x < w; x++) {
14768+//             int diff0, diff2;
14769+//             int d0, d2;
14770+//             int temporal_diff0, temporal_diff2;
14771+//
14772+//             int i1, i2;
14773+//             int j1, j2;
14774+//             int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4;
14775+
14776+10:
14777+//             c0 = prev2[0] + next2[0];            // c0 = v20, v21
14778+//             d0  = c0 >> 1;                       // d0 = v10
14779+//             temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11
14780+        ldr             q31, [x3]
14781+        ldr             q21, [x17]
14782+        uhadd           v10.16b, v31.16b, v21.16b
14783+        uabd            v11.16b, v31.16b, v21.16b
14784+        uaddl           v20.8h,  v21.8b,  v31.8b
14785+        uaddl2          v21.8h,  v21.16b, v31.16b
14786+
14787+        ldr             q31, [x3, w6, sxtw]
14788+        ldr             q23, [x17, w6, sxtw]
14789+
14790+//             i1 = coef_hf[0] * c0;                // i1 = v2-v5
14791+        UMULL4K         v2, v3, v4, v5, v20, v21, v0.h[2]
14792+
14793+        ldr             q30, [x3, w14, sxtw]
14794+        ldr             q25, [x17, w14, sxtw]
14795+
14796+//             m4 = prev2[mrefs4] + next2[mrefs4];  // m4 = v22,v23
14797+        uaddl           v22.8h,  v23.8b,  v31.8b
14798+        uaddl2          v23.8h,  v23.16b, v31.16b
14799+
14800+//             p4 = prev2[prefs4] + next2[prefs4];  // p4 = v24,v25, (p4 >> 1) = v12
14801+        uhadd           v12.16b, v25.16b, v30.16b
14802+        uaddl           v24.8h,  v25.8b,  v30.8b
14803+        uaddl2          v25.8h,  v25.16b, v30.16b
14804+
14805+//             m3 = cur[mrefs3];                    // m3 = v20
14806+        ldr             q20, [x3, w7, sxtw]
14807+
14808+//             p3 = cur[prefs3];                    // p3 = v21
14809+        ldr             q21, [x3, w13, sxtw]
14810+
14811+//             i1 += coef_hf[2] * (m4 + p4);        // (-m4:v22,v23) (-p4:v24,v25)
14812+        add             v22.8h,  v22.8h,  v24.8h
14813+        add             v23.8h,  v23.8h,  v25.8h
14814+        UMLAL4K         v2, v3, v4, v5, v22, v23, v0.h[4]
14815+
14816+        ldr             q29, [x3, w8, sxtw]
14817+        ldr             q23, [x17, w8, sxtw]
14818+
14819+//             i1 -= coef_lf[1] * 4 * (m3 + p3);    // -
14820+        uaddl           v30.8h,  v20.8b,  v21.8b
14821+        uaddl2          v31.8h,  v20.16b, v21.16b
14822+
14823+        UMLSL4K         v2, v3, v4, v5, v30, v31, v0.h[1]
14824+
14825+        ldr             q31, [x3, w12, sxtw]
14826+        ldr             q27, [x17, w12, sxtw]
14827+
14828+//             m2 = prev2[mrefs2] + next2[mrefs2];  // m2 = v22,v23, (m2 >> 1) = v13
14829+        uhadd           v13.16b, v23.16b, v29.16b
14830+        uaddl           v22.8h,  v23.8b,  v29.8b
14831+        uaddl2          v23.8h,  v23.16b, v29.16b
14832+
14833+//             m1 = cur[mrefs];                     // m1 = v24
14834+        ldr             q24, [x3, w9, sxtw]
14835+
14836+//             p2 = prev2[prefs2] + next2[prefs2];  // p2 = v26, v27
14837+//             temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14
14838+//             d2  = p2 >> 1;                       // d2 = v15
14839+        uabd            v14.16b, v31.16b, v27.16b
14840+        uhadd           v15.16b, v31.16b, v27.16b
14841+        uaddl           v26.8h,  v27.8b,  v31.8b
14842+        uaddl2          v27.8h,  v27.16b, v31.16b
14843+
14844+//             i1 -= coef_hf[1] * (m2 + p2);        // (-m2:v22,v23*) (-p2:v26*,v27*)
14845+        add             v22.8h,  v22.8h,  v26.8h
14846+        add             v23.8h,  v23.8h,  v27.8h
14847+        UMLSL4K         v2, v3, v4, v5, v22, v23, v0.h[3]
14848+
14849+//             p1 = cur[prefs];                     // p1 = v22
14850+        ldr             q22, [x3, w11, sxtw]
14851+
14852+//             i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17
14853+        uaddl           v18.8h,  v22.8b,  v24.8b
14854+        uaddl2          v19.8h,  v22.16b, v24.16b
14855+        UMULL4K         v28, v29, v30, v31, v18, v19, v0.h[6]
14856+
14857+        uaddl           v18.8h,  v20.8b,  v21.8b
14858+        uaddl2          v19.8h,  v20.16b, v21.16b
14859+        UMLSL4K         v28, v29, v30, v31, v18, v19, v0.h[7]
14860+
14861+        SQSHRUNN        v17, v28, v29, v30, v31, 13
14862+
14863+//             i1 += coef_lf[0] * 4 * (m1 + p1);    // p1 = v22, m1 = v24
14864+        uaddl           v26.8h,  v24.8b,  v22.8b
14865+        uaddl2          v27.8h,  v24.16b, v22.16b
14866+        UMLAL4K         v2, v3, v4, v5, v26, v27, v0.h[0]
14867+
14868+        ldr             q31, [x2, w9, sxtw]
14869+        ldr             q29, [x4, w9, sxtw]
14870+
14871+        ldr             q30, [x2, w11, sxtw]
14872+        ldr             q28, [x4, w11, sxtw]
14873+
14874+//             i1 >>= 15;                            // i1 = v2, -v3, -v4*, -v5*
14875+        SQSHRUNN        v2, v2, v3, v4, v5, 15
14876+
14877+//             {
14878+//                 int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
14879+//                 int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
14880+        uabd            v30.16b, v22.16b, v30.16b
14881+        uabd            v31.16b, v24.16b, v31.16b
14882+        uabd            v28.16b, v22.16b, v28.16b
14883+        uabd            v29.16b, v24.16b, v29.16b
14884+        uhadd           v31.16b, v31.16b, v30.16b
14885+        uhadd           v29.16b, v29.16b, v28.16b
14886+
14887+//                 diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18
14888+        ushr            v18.16b, v11.16b, #1
14889+        umax            v18.16b, v18.16b, v31.16b
14890+        umax            v18.16b, v18.16b, v29.16b
14891+
14892+        // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15
14893+        SPAT_CHECK      v18, v13, v24, v10, v22, v15, v31, v30, v29, v28
14894+
14895+        // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18
14896+        INTERPOL        v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29
14897+
14898+//                 dst[0] = av_clip_uint8(interpol);
14899+        str             q2,  [x0], #16
14900+//             }
14901+//
14902+//             dst++;
14903+//             cur++;
14904+//             prev++;
14905+//             prev2++;
14906+//             next++;
14907+//         }
14908+
14909+        subs            w10, w10, #16
14910+        add             x2,  x2,  #16
14911+        add             x3,  x3,  #16
14912+        add             x4,  x4,  #16
14913+        add             x17, x17, #16
14914+        bgt             10b
14915+
14916+        POP_VREGS
14917+99:
14918+        ret
14919+endfunc
14920+
14921+// ============================================================================
14922+//
14923+// void ff_bwdif_filter_edge_neon(
14924+//      void *dst1,     // x0
14925+//      void *prev1,    // x1
14926+//      void *cur1,     // x2
14927+//      void *next1,    // x3
14928+//      int w,          // w4
14929+//      int prefs,      // w5
14930+//      int mrefs,      // w6
14931+//      int prefs2,     // w7
14932+//      int mrefs2,     // [sp, #0]
14933+//      int parity,     // [sp, #SP_INT]
14934+//      int clip_max,   // [sp, #SP_INT*2]  unused
14935+//      int spat);      // [sp, #SP_INT*3]
14936+
14937+function ff_bwdif_filter_edge_neon, export=1
14938+        // Sanity check w
14939+        cmp             w4, #0
14940+        ble             99f
14941+
14942+// #define prev2 cur
14943+//     const uint8_t * restrict next2 = parity ? prev : next;
14944+
14945+        ldr             w8,  [sp, #0]                   // mrefs2
14946+
14947+        ldr             w17, [sp, #SP_INT]              // parity
14948+        ldr             w16, [sp, #SP_INT*3]            // spat
14949+        cmp             w17, #0
14950+        csel            x17, x1, x3, ne
14951+
14952+//     for (x = 0; x < w; x++) {
14953+
14954+10:
14955+//        int m1 = cur[mrefs];
14956+//        int d = (prev2[0] + next2[0]) >> 1;
14957+//        int p1 = cur[prefs];
14958+//        int temporal_diff0 = FFABS(prev2[0] - next2[0]);
14959+//        int temporal_diff1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
14960+//        int temporal_diff2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
14961+//        int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2);
14962+        ldr             q31, [x2]
14963+        ldr             q21, [x17]
14964+        uhadd           v16.16b, v31.16b, v21.16b       // d0 = v16
14965+        uabd            v17.16b, v31.16b, v21.16b       // td0 = v17
14966+        ldr             q24, [x2, w6, sxtw]             // m1 = v24
14967+        ldr             q22, [x2, w5, sxtw]             // p1 = v22
14968+
14969+        ldr             q0,  [x1, w6, sxtw]             // prev[mrefs]
14970+        ldr             q2,  [x1, w5, sxtw]             // prev[prefs]
14971+        ldr             q1,  [x3, w6, sxtw]             // next[mrefs]
14972+        ldr             q3,  [x3, w5, sxtw]             // next[prefs]
14973+
14974+        ushr            v29.16b, v17.16b, #1
14975+
14976+        uabd            v31.16b, v0.16b,  v24.16b
14977+        uabd            v30.16b, v2.16b,  v22.16b
14978+        uhadd           v0.16b,  v31.16b, v30.16b       // td1 = q0
14979+
14980+        uabd            v31.16b, v1.16b,  v24.16b
14981+        uabd            v30.16b, v3.16b,  v22.16b
14982+        uhadd           v1.16b,  v31.16b, v30.16b       // td2 = q1
14983+
14984+        umax            v0.16b,  v0.16b,  v29.16b
14985+        umax            v0.16b,  v0.16b,  v1.16b        // diff = v0
14986+
14987+//        if (spat) {
14988+//            SPAT_CHECK()
14989+//        }
14990+//        i0 = (m1 + p1) >> 1;
14991+        cbz             w16, 1f
14992+
14993+        ldr             q31, [x2,  w8, sxtw]
14994+        ldr             q18, [x17, w8, sxtw]
14995+        ldr             q30, [x2,  w7, sxtw]
14996+        ldr             q19, [x17, w7, sxtw]
14997+        uhadd           v18.16b, v18.16b, v31.16b
14998+        uhadd           v19.16b, v19.16b, v30.16b
14999+
15000+        SPAT_CHECK      v0, v18, v24, v16, v22, v19, v31, v30, v29, v28
15001+
15002+1:
15003+        uhadd           v2.16b,  v22.16b, v24.16b
15004+
15005+        // i0 = v2, s0 = v2, d0 = v16, diff = v0, t0 = v31, t1 = v30
15006+        DIFF_CLIP       v2, v2, v16, v0, v31, v30
15007+
15008+//        dst[0] = av_clip(interpol, 0, clip_max);
15009+        str             q2, [x0], #16
15010+
15011+//        dst++;
15012+//        cur++;
15013+//    }
15014+        subs            w4,  w4,  #16
15015+        add             x1,  x1,  #16
15016+        add             x2,  x2,  #16
15017+        add             x3,  x3,  #16
15018+        add             x17, x17, #16
15019+        bgt             10b
15020+
15021+99:
15022+        ret
15023+endfunc
15024+
15025+// ============================================================================
15026+//
15027+// void ff_bwdif_filter_intra_neon(
15028+//      void *dst1,     // x0
15029+//      void *cur1,     // x1
15030+//      int w,          // w2
15031+//      int prefs,      // w3
15032+//      int mrefs,      // w4
15033+//      int prefs3,     // w5
15034+//      int mrefs3,     // w6
15035+//      int parity,     // w7       unused
15036+//      int clip_max)   // [sp, #0] unused
15037+
15038+function ff_bwdif_filter_intra_neon, export=1
15039+        cmp             w2, #0
15040+        ble             99f
15041+
15042+        LDR_COEFFS      v0, x17
15043+
15044+//    for (x = 0; x < w; x++) {
15045+10:
15046+
15047+//        interpol = (coef_sp[0] * (cur[mrefs] + cur[prefs]) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13;
15048+        ldr             q31, [x1, w4, sxtw]
15049+        ldr             q30, [x1, w3, sxtw]
15050+        ldr             q29, [x1, w6, sxtw]
15051+        ldr             q28, [x1, w5, sxtw]
15052+
15053+        uaddl           v20.8h,  v31.8b,  v30.8b
15054+        uaddl2          v21.8h,  v31.16b, v30.16b
15055+
15056+        UMULL4K         v2, v3, v4, v5, v20, v21, v0.h[6]
15057+
15058+        uaddl           v20.8h,  v29.8b,  v28.8b
15059+        uaddl2          v21.8h,  v29.16b, v28.16b
15060+
15061+        UMLSL4K         v2, v3, v4, v5, v20, v21, v0.h[7]
15062+
15063+//        dst[0] = av_clip(interpol, 0, clip_max);
15064+        SQSHRUNN        v2, v2, v3, v4, v5, 13
15065+        str             q2, [x0], #16
15066+
15067+//        dst++;
15068+//        cur++;
15069+//    }
15070+
15071+        subs            w2,  w2,  #16
15072+        add             x1,  x1,  #16
15073+        bgt             10b
15074+
15075+99:
15076+        ret
15077+endfunc
15078--- a/libavfilter/allfilters.c
15079+++ b/libavfilter/allfilters.c
15080@@ -242,6 +242,7 @@ extern const AVFilter ff_vf_derain;
15081 extern const AVFilter ff_vf_deshake;
15082 extern const AVFilter ff_vf_deshake_opencl;
15083 extern const AVFilter ff_vf_despill;
15084+extern const AVFilter ff_vf_deinterlace_v4l2m2m;
15085 extern const AVFilter ff_vf_detelecine;
15086 extern const AVFilter ff_vf_dilation;
15087 extern const AVFilter ff_vf_dilation_opencl;
15088@@ -414,6 +415,7 @@ extern const AVFilter ff_vf_scale;
15089 extern const AVFilter ff_vf_scale_cuda;
15090 extern const AVFilter ff_vf_scale_npp;
15091 extern const AVFilter ff_vf_scale_qsv;
15092+extern const AVFilter ff_vf_scale_v4l2m2m;
15093 extern const AVFilter ff_vf_scale_vaapi;
15094 extern const AVFilter ff_vf_scale_vulkan;
15095 extern const AVFilter ff_vf_scale2ref;
15096@@ -483,6 +485,7 @@ extern const AVFilter ff_vf_trim;
15097 extern const AVFilter ff_vf_unpremultiply;
15098 extern const AVFilter ff_vf_unsharp;
15099 extern const AVFilter ff_vf_unsharp_opencl;
15100+extern const AVFilter ff_vf_unsand;
15101 extern const AVFilter ff_vf_untile;
15102 extern const AVFilter ff_vf_uspp;
15103 extern const AVFilter ff_vf_v360;
15104--- a/libavfilter/buffersink.c
15105+++ b/libavfilter/buffersink.c
15106@@ -62,6 +62,11 @@ typedef struct BufferSinkContext {
15107     int sample_rates_size;
15108
15109     AVFrame *peeked_frame;
15110+
15111+    union {
15112+        av_buffersink_alloc_video_frame * video;
15113+    } alloc_cb;
15114+    void * alloc_v;
15115 } BufferSinkContext;
15116
15117 #define NB_ITEMS(list) (list ## _size / sizeof(*list))
15118@@ -154,6 +159,22 @@ int attribute_align_arg av_buffersink_ge
15119     return get_frame_internal(ctx, frame, 0, nb_samples);
15120 }
15121
15122+static AVFrame * alloc_video_buffer(AVFilterLink *link, int w, int h)
15123+{
15124+    AVFilterContext * const ctx = link->dst;
15125+    BufferSinkContext * const bs = ctx->priv;
15126+    return bs->alloc_cb.video ? bs->alloc_cb.video(ctx, bs->alloc_v, w, h) :
15127+        ff_default_get_video_buffer(link, w, h);
15128+}
15129+
15130+int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v)
15131+{
15132+    BufferSinkContext * const bs = ctx->priv;
15133+    bs->alloc_cb.video = cb;
15134+    bs->alloc_v = v;
15135+    return 0;
15136+}
15137+
15138 #if FF_API_BUFFERSINK_ALLOC
15139 AVBufferSinkParams *av_buffersink_params_alloc(void)
15140 {
15141@@ -403,6 +424,7 @@ static const AVFilterPad avfilter_vsink_
15142     {
15143         .name = "default",
15144         .type = AVMEDIA_TYPE_VIDEO,
15145+        .get_buffer = {.video = alloc_video_buffer},
15146     },
15147 };
15148
15149--- a/libavfilter/buffersink.h
15150+++ b/libavfilter/buffersink.h
15151@@ -202,6 +202,9 @@ int av_buffersink_get_frame(AVFilterCont
15152  */
15153 int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples);
15154
15155+typedef AVFrame * av_buffersink_alloc_video_frame(AVFilterContext * ctx, void * v, int w, int h);
15156+int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v);
15157+
15158 /**
15159  * @}
15160  */
15161--- a/libavfilter/buffersrc.c
15162+++ b/libavfilter/buffersrc.c
15163@@ -204,7 +204,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
15164
15165         switch (ctx->outputs[0]->type) {
15166         case AVMEDIA_TYPE_VIDEO:
15167-            CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height,
15168+            CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame),
15169                                      frame->format, frame->pts);
15170             break;
15171         case AVMEDIA_TYPE_AUDIO:
15172--- a/libavfilter/bwdif.h
15173+++ b/libavfilter/bwdif.h
15174@@ -35,8 +35,29 @@ typedef struct BWDIFContext {
15175     void (*filter_edge)(void *dst, void *prev, void *cur, void *next,
15176                         int w, int prefs, int mrefs, int prefs2, int mrefs2,
15177                         int parity, int clip_max, int spat);
15178+    void (*filter_line3)(void *dst, int dstride,
15179+                         const void *prev, const void *cur, const void *next, int prefs,
15180+                         int w, int parity, int clip_max);
15181 } BWDIFContext;
15182
15183-void ff_bwdif_init_x86(BWDIFContext *bwdif);
15184+void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth);
15185+void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth);
15186+void ff_bwdif_init_aarch64(BWDIFContext *bwdif, int bit_depth);
15187+
15188+void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1,
15189+                            int w, int prefs, int mrefs, int prefs2, int mrefs2,
15190+                            int parity, int clip_max, int spat);
15191+
15192+void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs,
15193+                             int prefs3, int mrefs3, int parity, int clip_max);
15194+
15195+void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
15196+                            int w, int prefs, int mrefs, int prefs2, int mrefs2,
15197+                            int prefs3, int mrefs3, int prefs4, int mrefs4,
15198+                            int parity, int clip_max);
15199+
15200+void ff_bwdif_filter_line3_c(void * dst1, int d_stride,
15201+                             const void * prev1, const void * cur1, const void * next1, int s_stride,
15202+                             int w, int parity, int clip_max);
15203
15204 #endif /* AVFILTER_BWDIF_H */
15205--- a/libavfilter/vf_bwdif.c
15206+++ b/libavfilter/vf_bwdif.c
15207@@ -122,8 +122,8 @@ typedef struct ThreadData {
15208         next2++; \
15209     }
15210
15211-static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs,
15212-                         int prefs3, int mrefs3, int parity, int clip_max)
15213+void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs,
15214+                             int prefs3, int mrefs3, int parity, int clip_max)
15215 {
15216     uint8_t *dst = dst1;
15217     uint8_t *cur = cur1;
15218@@ -132,10 +132,10 @@ static void filter_intra(void *dst1, voi
15219     FILTER_INTRA()
15220 }
15221
15222-static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
15223-                          int w, int prefs, int mrefs, int prefs2, int mrefs2,
15224-                          int prefs3, int mrefs3, int prefs4, int mrefs4,
15225-                          int parity, int clip_max)
15226+void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
15227+                            int w, int prefs, int mrefs, int prefs2, int mrefs2,
15228+                            int prefs3, int mrefs3, int prefs4, int mrefs4,
15229+                            int parity, int clip_max)
15230 {
15231     uint8_t *dst   = dst1;
15232     uint8_t *prev  = prev1;
15233@@ -150,9 +150,34 @@ static void filter_line_c(void *dst1, vo
15234     FILTER2()
15235 }
15236
15237-static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1,
15238-                        int w, int prefs, int mrefs, int prefs2, int mrefs2,
15239-                        int parity, int clip_max, int spat)
15240+#define NEXT_LINE()\
15241+    dst += d_stride; \
15242+    prev += prefs; \
15243+    cur  += prefs; \
15244+    next += prefs;
15245+
15246+void ff_bwdif_filter_line3_c(void * dst1, int d_stride,
15247+                             const void * prev1, const void * cur1, const void * next1, int s_stride,
15248+                             int w, int parity, int clip_max)
15249+{
15250+    const int prefs = s_stride;
15251+    uint8_t * dst  = dst1;
15252+    const uint8_t * prev = prev1;
15253+    const uint8_t * cur  = cur1;
15254+    const uint8_t * next = next1;
15255+
15256+    ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w,
15257+                           prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max);
15258+    NEXT_LINE();
15259+    memcpy(dst, cur, w);
15260+    NEXT_LINE();
15261+    ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w,
15262+                           prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max);
15263+}
15264+
15265+void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1,
15266+                            int w, int prefs, int mrefs, int prefs2, int mrefs2,
15267+                            int parity, int clip_max, int spat)
15268 {
15269     uint8_t *dst   = dst1;
15270     uint8_t *prev  = prev1;
15271@@ -212,6 +237,13 @@ static void filter_edge_16bit(void *dst1
15272     FILTER2()
15273 }
15274
15275+// Round job start line down to multiple of 4 so that if filter_line3 exists
15276+// and the frame is a multiple of 4 high then filter_line will never be called
15277+static inline int job_start(const int jobnr, const int nb_jobs, const int h)
15278+{
15279+    return jobnr >= nb_jobs ? h : ((h * jobnr) / nb_jobs) & ~3;
15280+}
15281+
15282 static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
15283 {
15284     BWDIFContext *s = ctx->priv;
15285@@ -221,8 +253,8 @@ static int filter_slice(AVFilterContext
15286     int clip_max = (1 << (yadif->csp->comp[td->plane].depth)) - 1;
15287     int df = (yadif->csp->comp[td->plane].depth + 7) / 8;
15288     int refs = linesize / df;
15289-    int slice_start = (td->h *  jobnr   ) / nb_jobs;
15290-    int slice_end   = (td->h * (jobnr+1)) / nb_jobs;
15291+    int slice_start = job_start(jobnr, nb_jobs, td->h);
15292+    int slice_end   = job_start(jobnr + 1, nb_jobs, td->h);
15293     int y;
15294
15295     for (y = slice_start; y < slice_end; y++) {
15296@@ -244,6 +276,11 @@ static int filter_slice(AVFilterContext
15297                                refs << 1, -(refs << 1),
15298                                td->parity ^ td->tff, clip_max,
15299                                (y < 2) || ((y + 3) > td->h) ? 0 : 1);
15300+            } else if (s->filter_line3 && y + 2 < slice_end && y + 6 < td->h) {
15301+                s->filter_line3(dst, td->frame->linesize[td->plane],
15302+                                prev, cur, next, linesize, td->w,
15303+                                td->parity ^ td->tff, clip_max);
15304+                y += 2;
15305             } else {
15306                 s->filter_line(dst, prev, cur, next, td->w,
15307                                refs, -refs, refs << 1, -(refs << 1),
15308@@ -265,22 +302,31 @@ static void filter(AVFilterContext *ctx,
15309     YADIFContext *yadif = &bwdif->yadif;
15310     ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff };
15311     int i;
15312+    int last_plane = -1;
15313
15314     for (i = 0; i < yadif->csp->nb_components; i++) {
15315         int w = dstpic->width;
15316         int h = dstpic->height;
15317+        const AVComponentDescriptor * const comp = yadif->csp->comp + i;
15318+
15319+        // If the last plane was the same as this plane assume we've dealt
15320+        // with all the pels already
15321+        if (last_plane == comp->plane)
15322+            continue;
15323+        last_plane = comp->plane;
15324
15325         if (i == 1 || i == 2) {
15326             w = AV_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w);
15327             h = AV_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h);
15328         }
15329
15330-        td.w     = w;
15331-        td.h     = h;
15332-        td.plane = i;
15333+        // comp step is in bytes but td.w is in pels
15334+        td.w       = w * comp->step / ((comp->depth + 7) / 8);
15335+        td.h       = h;
15336+        td.plane   = comp->plane;
15337
15338         ff_filter_execute(ctx, filter_slice, &td, NULL,
15339-                          FFMIN(h, ff_filter_get_nb_threads(ctx)));
15340+                          FFMIN((h+3)/4, ff_filter_get_nb_threads(ctx)));
15341     }
15342     if (yadif->current_field == YADIF_FIELD_END) {
15343         yadif->current_field = YADIF_FIELD_NORMAL;
15344@@ -313,6 +359,7 @@ static const enum AVPixelFormat pix_fmts
15345     AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
15346     AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
15347     AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
15348+    AV_PIX_FMT_NV12,
15349     AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
15350     AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
15351     AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16,
15352@@ -340,21 +387,29 @@ static int config_props(AVFilterLink *li
15353
15354     yadif->csp = av_pix_fmt_desc_get(link->format);
15355     yadif->filter = filter;
15356-    if (yadif->csp->comp[0].depth > 8) {
15357+    ff_bwdif_init_filter_line(s, yadif->csp->comp[0].depth);
15358+
15359+    return 0;
15360+}
15361+
15362+av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth)
15363+{
15364+    s->filter_line3 = 0;
15365+    if (bit_depth > 8) {
15366         s->filter_intra = filter_intra_16bit;
15367         s->filter_line  = filter_line_c_16bit;
15368         s->filter_edge  = filter_edge_16bit;
15369     } else {
15370-        s->filter_intra = filter_intra;
15371-        s->filter_line  = filter_line_c;
15372-        s->filter_edge  = filter_edge;
15373+        s->filter_intra = ff_bwdif_filter_intra_c;
15374+        s->filter_line  = ff_bwdif_filter_line_c;
15375+        s->filter_edge  = ff_bwdif_filter_edge_c;
15376     }
15377
15378 #if ARCH_X86
15379-    ff_bwdif_init_x86(s);
15380+    ff_bwdif_init_x86(s, bit_depth);
15381+#elif ARCH_AARCH64
15382+    ff_bwdif_init_aarch64(s, bit_depth);
15383 #endif
15384-
15385-    return 0;
15386 }
15387
15388
15389--- /dev/null
15390+++ b/libavfilter/vf_deinterlace_v4l2m2m.c
15391@@ -0,0 +1,2102 @@
15392+/*
15393+ * This file is part of FFmpeg.
15394+ *
15395+ * FFmpeg is free software; you can redistribute it and/or
15396+ * modify it under the terms of the GNU Lesser General Public
15397+ * License as published by the Free Software Foundation; either
15398+ * version 2.1 of the License, or (at your option) any later version.
15399+ *
15400+ * FFmpeg is distributed in the hope that it will be useful,
15401+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15402+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15403+ * Lesser General Public License for more details.
15404+ *
15405+ * You should have received a copy of the GNU Lesser General Public
15406+ * License along with FFmpeg; if not, write to the Free Software
15407+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15408+ */
15409+
15410+/**
15411+ * @file
15412+ * deinterlace video filter - V4L2 M2M
15413+ */
15414+
15415+#include <drm_fourcc.h>
15416+
15417+#include <linux/videodev2.h>
15418+
15419+#include <dirent.h>
15420+#include <fcntl.h>
15421+#include <poll.h>
15422+#include <stdatomic.h>
15423+#include <stdio.h>
15424+#include <string.h>
15425+#include <sys/ioctl.h>
15426+#include <sys/mman.h>
15427+#include <unistd.h>
15428+
15429+#include "config.h"
15430+
15431+#include "libavutil/avassert.h"
15432+#include "libavutil/avstring.h"
15433+#include "libavutil/common.h"
15434+#include "libavutil/hwcontext.h"
15435+#include "libavutil/hwcontext_drm.h"
15436+#include "libavutil/internal.h"
15437+#include "libavutil/mathematics.h"
15438+#include "libavutil/opt.h"
15439+#include "libavutil/pixdesc.h"
15440+#include "libavutil/time.h"
15441+
15442+#define FF_INTERNAL_FIELDS 1
15443+#include "framequeue.h"
15444+#include "filters.h"
15445+#include "avfilter.h"
15446+#include "formats.h"
15447+#include "internal.h"
15448+#include "scale_eval.h"
15449+#include "video.h"
15450+
15451+#ifndef DRM_FORMAT_P030
15452+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
15453+#endif
15454+
15455+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
15456+// in drm_fourcc.h hopefully will be sometime in the future but until then...
15457+#ifndef V4L2_PIX_FMT_NV12_10_COL128
15458+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
15459+#endif
15460+
15461+#ifndef V4L2_PIX_FMT_NV12_COL128
15462+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
15463+#endif
15464+
15465+typedef struct V4L2Queue V4L2Queue;
15466+typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared;
15467+
15468+typedef enum filter_type_v4l2_e
15469+{
15470+    FILTER_V4L2_DEINTERLACE = 1,
15471+    FILTER_V4L2_SCALE,
15472+} filter_type_v4l2_t;
15473+
15474+typedef struct V4L2Buffer {
15475+    int enqueued;
15476+    int reenqueue;
15477+    struct v4l2_buffer buffer;
15478+    AVFrame frame;
15479+    struct v4l2_plane planes[VIDEO_MAX_PLANES];
15480+    int num_planes;
15481+    AVDRMFrameDescriptor drm_frame;
15482+    V4L2Queue *q;
15483+} V4L2Buffer;
15484+
15485+typedef struct V4L2Queue {
15486+    struct v4l2_format format;
15487+    struct v4l2_selection sel;
15488+    int eos;
15489+    int num_buffers;
15490+    V4L2Buffer *buffers;
15491+    const char * name;
15492+    DeintV4L2M2MContextShared *ctx;
15493+} V4L2Queue;
15494+
15495+typedef struct pts_stats_s
15496+{
15497+    void * logctx;
15498+    const char * name;  // For debug
15499+    unsigned int last_count;
15500+    unsigned int last_interval;
15501+    int64_t last_pts;
15502+} pts_stats_t;
15503+
15504+#define PTS_TRACK_SIZE 32
15505+typedef struct pts_track_el_s
15506+{
15507+    uint32_t n;
15508+    unsigned int interval;
15509+    AVFrame * props;
15510+} pts_track_el_t;
15511+
15512+typedef struct pts_track_s
15513+{
15514+    uint32_t n;
15515+    uint32_t last_n;
15516+    int got_2;
15517+    void * logctx;
15518+    pts_stats_t stats;
15519+    pts_track_el_t a[PTS_TRACK_SIZE];
15520+} pts_track_t;
15521+
15522+typedef enum drain_state_e
15523+{
15524+    DRAIN_NONE = 0,     // Not draining
15525+    DRAIN_TIMEOUT,      // Drain until normal timeout setup yields no frame
15526+    DRAIN_LAST,         // Drain with long timeout last_frame in received on output expected
15527+    DRAIN_EOS,          // Drain with long timeout EOS expected
15528+    DRAIN_DONE          // Drained
15529+} drain_state_t;
15530+
15531+typedef struct DeintV4L2M2MContextShared {
15532+    void * logctx;  // For logging - will be NULL when done
15533+    filter_type_v4l2_t filter_type;
15534+
15535+    int fd;
15536+    int done;   // fd closed - awating all refs dropped
15537+    int width;
15538+    int height;
15539+
15540+    int drain;          // EOS received (inlink status)
15541+    drain_state_t drain_state;
15542+    int64_t drain_pts;  // PTS associated with inline status
15543+
15544+    unsigned int frames_rx;
15545+    unsigned int frames_tx;
15546+
15547+    // from options
15548+    int output_width;
15549+    int output_height;
15550+    enum AVPixelFormat output_format;
15551+
15552+    int has_enc_stop;
15553+    // We expect to get exactly the same number of frames out as we put in
15554+    // We can drain by matching input to output
15555+    int one_to_one;
15556+
15557+    int orig_width;
15558+    int orig_height;
15559+    atomic_uint refcount;
15560+
15561+    AVBufferRef *hw_frames_ctx;
15562+
15563+    unsigned int field_order;
15564+
15565+    pts_track_t track;
15566+
15567+    V4L2Queue output;
15568+    V4L2Queue capture;
15569+} DeintV4L2M2MContextShared;
15570+
15571+typedef struct DeintV4L2M2MContext {
15572+    const AVClass *class;
15573+
15574+    DeintV4L2M2MContextShared *shared;
15575+
15576+    char * w_expr;
15577+    char * h_expr;
15578+    char * output_format_string;;
15579+
15580+    int force_original_aspect_ratio;
15581+    int force_divisible_by;
15582+
15583+    char *colour_primaries_string;
15584+    char *colour_transfer_string;
15585+    char *colour_matrix_string;
15586+    int   colour_range;
15587+    char *chroma_location_string;
15588+
15589+    enum AVColorPrimaries colour_primaries;
15590+    enum AVColorTransferCharacteristic colour_transfer;
15591+    enum AVColorSpace colour_matrix;
15592+    enum AVChromaLocation chroma_location;
15593+} DeintV4L2M2MContext;
15594+
15595+
15596+static inline int drain_frame_expected(const drain_state_t d)
15597+{
15598+    return d == DRAIN_EOS || d == DRAIN_LAST;
15599+}
15600+
15601+// These just list the ones we know we can cope with
15602+static uint32_t
15603+fmt_av_to_v4l2(const enum AVPixelFormat avfmt)
15604+{
15605+    switch (avfmt) {
15606+    case AV_PIX_FMT_YUV420P:
15607+        return V4L2_PIX_FMT_YUV420;
15608+    case AV_PIX_FMT_NV12:
15609+        return V4L2_PIX_FMT_NV12;
15610+#if CONFIG_SAND
15611+    case AV_PIX_FMT_RPI4_8:
15612+    case AV_PIX_FMT_SAND128:
15613+        return V4L2_PIX_FMT_NV12_COL128;
15614+#endif
15615+    default:
15616+        break;
15617+    }
15618+    return 0;
15619+}
15620+
15621+static enum AVPixelFormat
15622+fmt_v4l2_to_av(const uint32_t pixfmt)
15623+{
15624+    switch (pixfmt) {
15625+    case V4L2_PIX_FMT_YUV420:
15626+        return AV_PIX_FMT_YUV420P;
15627+    case V4L2_PIX_FMT_NV12:
15628+        return AV_PIX_FMT_NV12;
15629+#if CONFIG_SAND
15630+    case V4L2_PIX_FMT_NV12_COL128:
15631+        return AV_PIX_FMT_RPI4_8;
15632+#endif
15633+    default:
15634+        break;
15635+    }
15636+    return AV_PIX_FMT_NONE;
15637+}
15638+
15639+static unsigned int pts_stats_interval(const pts_stats_t * const stats)
15640+{
15641+    return stats->last_interval;
15642+}
15643+
15644+// Pick 64 for max last count - that is >1sec at 60fps
15645+#define STATS_LAST_COUNT_MAX 64
15646+#define STATS_INTERVAL_MAX (1 << 30)
15647+static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
15648+{
15649+    if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
15650+        if (stats->last_count < STATS_LAST_COUNT_MAX)
15651+            ++stats->last_count;
15652+        return;
15653+    }
15654+
15655+    if (stats->last_pts != AV_NOPTS_VALUE) {
15656+        const int64_t interval = pts - stats->last_pts;
15657+
15658+        if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
15659+            stats->last_count >= STATS_LAST_COUNT_MAX) {
15660+            if (stats->last_interval != 0)
15661+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
15662+                       __func__, stats->name, interval, stats->last_count);
15663+            stats->last_interval = 0;
15664+        }
15665+        else {
15666+            const int64_t frame_time = interval / (int64_t)stats->last_count;
15667+
15668+            if (frame_time != stats->last_interval)
15669+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
15670+                       __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
15671+            stats->last_interval = frame_time;
15672+        }
15673+    }
15674+
15675+    stats->last_pts = pts;
15676+    stats->last_count = 1;
15677+}
15678+
15679+static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
15680+{
15681+    *stats = (pts_stats_t){
15682+        .logctx = logctx,
15683+        .name = name,
15684+        .last_count = 1,
15685+        .last_interval = 0,
15686+        .last_pts = AV_NOPTS_VALUE
15687+    };
15688+}
15689+
15690+static inline uint32_t pts_track_next_n(pts_track_t * const trk)
15691+{
15692+    if (++trk->n == 0)
15693+        trk->n = 1;
15694+    return trk->n;
15695+}
15696+
15697+static int pts_track_get_frame(pts_track_t * const trk, const struct timeval tv, AVFrame * const dst)
15698+{
15699+    uint32_t n = (uint32_t)(tv.tv_usec / 2 + tv.tv_sec * 500000);
15700+    pts_track_el_t * t;
15701+
15702+    // As a first guess assume that n==0 means last frame
15703+    if (n == 0) {
15704+        n = trk->last_n;
15705+        if (n == 0)
15706+            goto fail;
15707+    }
15708+
15709+    t = trk->a + (n & (PTS_TRACK_SIZE - 1));
15710+
15711+    if (t->n != n) {
15712+        av_log(trk->logctx, AV_LOG_ERROR, "%s: track failure: got %u, expected %u\n", __func__, n, trk->n);
15713+        goto fail;
15714+    }
15715+
15716+    // 1st frame is simple - just believe it
15717+    if (n != trk->last_n) {
15718+        trk->last_n = n;
15719+        trk->got_2 = 0;
15720+        return av_frame_copy_props(dst, t->props);
15721+    }
15722+
15723+    // Only believe in a single interpolated frame
15724+    if (trk->got_2)
15725+        goto fail;
15726+    trk->got_2 = 1;
15727+
15728+    av_frame_copy_props(dst, t->props);
15729+
15730+
15731+    // If we can't guess - don't
15732+    if (t->interval == 0) {
15733+        dst->best_effort_timestamp = AV_NOPTS_VALUE;
15734+        dst->pts = AV_NOPTS_VALUE;
15735+        dst->pkt_dts = AV_NOPTS_VALUE;
15736+    }
15737+    else {
15738+        if (dst->best_effort_timestamp != AV_NOPTS_VALUE)
15739+            dst->best_effort_timestamp += t->interval / 2;
15740+        if (dst->pts != AV_NOPTS_VALUE)
15741+            dst->pts += t->interval / 2;
15742+        if (dst->pkt_dts != AV_NOPTS_VALUE)
15743+            dst->pkt_dts += t->interval / 2;
15744+    }
15745+
15746+    return 0;
15747+
15748+fail:
15749+    trk->last_n = 0;
15750+    trk->got_2 = 0;
15751+    dst->pts = AV_NOPTS_VALUE;
15752+    dst->pkt_dts = AV_NOPTS_VALUE;
15753+    return 0;
15754+}
15755+
15756+// We are only ever expecting in-order frames so nothing more clever is required
15757+static unsigned int
15758+pts_track_count(const pts_track_t * const trk)
15759+{
15760+    return (trk->n - trk->last_n) & (PTS_TRACK_SIZE - 1);
15761+}
15762+
15763+static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src)
15764+{
15765+    const uint32_t n = pts_track_next_n(trk);
15766+    pts_track_el_t * const t = trk->a + (n & (PTS_TRACK_SIZE - 1));
15767+
15768+    pts_stats_add(&trk->stats, src->pts);
15769+
15770+    t->n = n;
15771+    t->interval = pts_stats_interval(&trk->stats); // guess that next interval is the same as the last
15772+    av_frame_unref(t->props);
15773+    av_frame_copy_props(t->props, src);
15774+
15775+    // We now know what the previous interval was, rather than having to guess,
15776+    // so set it.  There is a better than decent chance that this is before
15777+    // we use it.
15778+    if (t->interval != 0) {
15779+        pts_track_el_t * const prev_t = trk->a + ((n - 1) & (PTS_TRACK_SIZE - 1));
15780+        prev_t->interval = t->interval;
15781+    }
15782+
15783+    // In case deinterlace interpolates frames use every other usec
15784+    return (struct timeval){.tv_sec = n / 500000, .tv_usec = (n % 500000) * 2};
15785+}
15786+
15787+static void pts_track_uninit(pts_track_t * const trk)
15788+{
15789+    unsigned int i;
15790+    for (i = 0; i != PTS_TRACK_SIZE; ++i) {
15791+        trk->a[i].n = 0;
15792+        av_frame_free(&trk->a[i].props);
15793+    }
15794+}
15795+
15796+static int pts_track_init(pts_track_t * const trk, void *logctx)
15797+{
15798+    unsigned int i;
15799+    trk->n = 1;
15800+    pts_stats_init(&trk->stats, logctx, "track");
15801+    for (i = 0; i != PTS_TRACK_SIZE; ++i) {
15802+        trk->a[i].n = 0;
15803+        if ((trk->a[i].props = av_frame_alloc()) == NULL) {
15804+            pts_track_uninit(trk);
15805+            return AVERROR(ENOMEM);
15806+        }
15807+    }
15808+    return 0;
15809+}
15810+
15811+static inline uint32_t
15812+fmt_bpl(const struct v4l2_format * const fmt, const unsigned int plane_n)
15813+{
15814+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.plane_fmt[plane_n].bytesperline : fmt->fmt.pix.bytesperline;
15815+}
15816+
15817+static inline uint32_t
15818+fmt_height(const struct v4l2_format * const fmt)
15819+{
15820+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
15821+}
15822+
15823+static inline uint32_t
15824+fmt_width(const struct v4l2_format * const fmt)
15825+{
15826+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
15827+}
15828+
15829+static inline uint32_t
15830+fmt_pixelformat(const struct v4l2_format * const fmt)
15831+{
15832+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
15833+}
15834+
15835+static inline uint32_t
15836+buf_bytesused0(const struct v4l2_buffer * const buf)
15837+{
15838+    return V4L2_TYPE_IS_MULTIPLANAR(buf->type) ? buf->m.planes[0].bytesused : buf->bytesused;
15839+}
15840+
15841+static void
15842+init_format(V4L2Queue * const q, const uint32_t format_type)
15843+{
15844+    memset(&q->format, 0, sizeof(q->format));
15845+    memset(&q->sel,    0, sizeof(q->sel));
15846+    q->format.type = format_type;
15847+    q->sel.type    = format_type;
15848+}
15849+
15850+static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx)
15851+{
15852+    struct v4l2_capability cap;
15853+    int ret;
15854+
15855+    memset(&cap, 0, sizeof(cap));
15856+    ret = ioctl(ctx->fd, VIDIOC_QUERYCAP, &cap);
15857+    if (ret < 0)
15858+        return ret;
15859+
15860+    if (ctx->filter_type == FILTER_V4L2_SCALE &&
15861+        strcmp("bcm2835-codec-isp", cap.card) != 0)
15862+    {
15863+        av_log(ctx->logctx, AV_LOG_DEBUG, "Not ISP\n");
15864+        return AVERROR(EINVAL);
15865+    }
15866+
15867+    if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
15868+        av_log(ctx->logctx, AV_LOG_DEBUG, "No streaming\n");
15869+        return AVERROR(EINVAL);
15870+    }
15871+
15872+    if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) {
15873+        init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
15874+        init_format(&ctx->output,  V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
15875+    }
15876+    else if (cap.capabilities & V4L2_CAP_VIDEO_M2M) {
15877+        init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE);
15878+        init_format(&ctx->output,  V4L2_BUF_TYPE_VIDEO_OUTPUT);
15879+    }
15880+    else {
15881+        av_log(ctx->logctx, AV_LOG_DEBUG, "Not M2M\n");
15882+        return AVERROR(EINVAL);
15883+    }
15884+
15885+    return 0;
15886+}
15887+
15888+// Just use for probe - doesn't modify q format
15889+static int deint_v4l2m2m_try_format(V4L2Queue *queue, const uint32_t width, const uint32_t height, const enum AVPixelFormat avfmt)
15890+{
15891+    struct v4l2_format fmt         = {.type = queue->format.type};
15892+    DeintV4L2M2MContextShared *ctx = queue->ctx;
15893+    int ret, field;
15894+    // Pick YUV to test with if not otherwise specified
15895+    uint32_t pixelformat = avfmt == AV_PIX_FMT_NONE ? V4L2_PIX_FMT_YUV420 : fmt_av_to_v4l2(avfmt);
15896+    enum AVPixelFormat r_avfmt;
15897+
15898+
15899+    ret = ioctl(ctx->fd, VIDIOC_G_FMT, &fmt);
15900+    if (ret)
15901+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret);
15902+
15903+    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && V4L2_TYPE_IS_OUTPUT(fmt.type))
15904+        field = V4L2_FIELD_INTERLACED_TB;
15905+    else
15906+        field = V4L2_FIELD_NONE;
15907+
15908+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
15909+        fmt.fmt.pix_mp.pixelformat = pixelformat;
15910+        fmt.fmt.pix_mp.field = field;
15911+        fmt.fmt.pix_mp.width = width;
15912+        fmt.fmt.pix_mp.height = height;
15913+    } else {
15914+        fmt.fmt.pix.pixelformat = pixelformat;
15915+        fmt.fmt.pix.field = field;
15916+        fmt.fmt.pix.width = width;
15917+        fmt.fmt.pix.height = height;
15918+    }
15919+
15920+    av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__,
15921+         fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
15922+         fmt.fmt.pix_mp.pixelformat,
15923+         fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
15924+
15925+    ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, &fmt);
15926+    if (ret)
15927+        return AVERROR(EINVAL);
15928+
15929+    av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__,
15930+         fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
15931+         fmt.fmt.pix_mp.pixelformat,
15932+         fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
15933+
15934+    r_avfmt = fmt_v4l2_to_av(fmt_pixelformat(&fmt));
15935+    if (r_avfmt != avfmt && avfmt != AV_PIX_FMT_NONE) {
15936+        av_log(ctx->logctx, AV_LOG_DEBUG, "Unable to set format %s on %s port\n", av_get_pix_fmt_name(avfmt), V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src");
15937+        return AVERROR(EINVAL);
15938+    }
15939+    if (r_avfmt == AV_PIX_FMT_NONE) {
15940+        av_log(ctx->logctx, AV_LOG_DEBUG, "No supported format on %s port\n", V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src");
15941+        return AVERROR(EINVAL);
15942+    }
15943+
15944+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
15945+        if (fmt.fmt.pix_mp.field != field) {
15946+            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
15947+
15948+            return AVERROR(EINVAL);
15949+        }
15950+    } else {
15951+        if (fmt.fmt.pix.field != field) {
15952+            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
15953+
15954+            return AVERROR(EINVAL);
15955+        }
15956+    }
15957+
15958+    return 0;
15959+}
15960+
15961+static int
15962+do_s_fmt(V4L2Queue * const q)
15963+{
15964+    DeintV4L2M2MContextShared * const ctx = q->ctx;
15965+    const uint32_t pixelformat = fmt_pixelformat(&q->format);
15966+    int ret;
15967+
15968+    ret = ioctl(ctx->fd, VIDIOC_S_FMT, &q->format);
15969+    if (ret) {
15970+        ret = AVERROR(errno);
15971+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %s\n", av_err2str(ret));
15972+        return ret;
15973+    }
15974+
15975+    if (pixelformat != fmt_pixelformat(&q->format)) {
15976+        av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt_pixelformat(&q->format)));
15977+        return AVERROR(EINVAL);
15978+    }
15979+
15980+    q->sel.target = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE,
15981+    q->sel.flags  = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_FLAG_LE : V4L2_SEL_FLAG_GE;
15982+
15983+    ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &q->sel);
15984+    if (ret) {
15985+        ret = AVERROR(errno);
15986+        av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %s\n", av_err2str(ret));
15987+    }
15988+
15989+    return 0;
15990+}
15991+
15992+static void
15993+set_fmt_color(struct v4l2_format *const fmt,
15994+               const enum AVColorPrimaries avcp,
15995+               const enum AVColorSpace avcs,
15996+               const enum AVColorTransferCharacteristic avxc)
15997+{
15998+    enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
15999+    enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
16000+    enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
16001+
16002+    switch (avcp) {
16003+    case AVCOL_PRI_BT709:
16004+        cs = V4L2_COLORSPACE_REC709;
16005+        ycbcr = V4L2_YCBCR_ENC_709;
16006+        break;
16007+    case AVCOL_PRI_BT470M:
16008+        cs = V4L2_COLORSPACE_470_SYSTEM_M;
16009+        ycbcr = V4L2_YCBCR_ENC_601;
16010+        break;
16011+    case AVCOL_PRI_BT470BG:
16012+        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
16013+        break;
16014+    case AVCOL_PRI_SMPTE170M:
16015+        cs = V4L2_COLORSPACE_SMPTE170M;
16016+        break;
16017+    case AVCOL_PRI_SMPTE240M:
16018+        cs = V4L2_COLORSPACE_SMPTE240M;
16019+        break;
16020+    case AVCOL_PRI_BT2020:
16021+        cs = V4L2_COLORSPACE_BT2020;
16022+        break;
16023+    case AVCOL_PRI_SMPTE428:
16024+    case AVCOL_PRI_SMPTE431:
16025+    case AVCOL_PRI_SMPTE432:
16026+    case AVCOL_PRI_EBU3213:
16027+    case AVCOL_PRI_RESERVED:
16028+    case AVCOL_PRI_FILM:
16029+    case AVCOL_PRI_UNSPECIFIED:
16030+    default:
16031+        break;
16032+    }
16033+
16034+    switch (avcs) {
16035+    case AVCOL_SPC_RGB:
16036+        cs = V4L2_COLORSPACE_SRGB;
16037+        break;
16038+    case AVCOL_SPC_BT709:
16039+        cs = V4L2_COLORSPACE_REC709;
16040+        break;
16041+    case AVCOL_SPC_FCC:
16042+        cs = V4L2_COLORSPACE_470_SYSTEM_M;
16043+        break;
16044+    case AVCOL_SPC_BT470BG:
16045+        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
16046+        break;
16047+    case AVCOL_SPC_SMPTE170M:
16048+        cs = V4L2_COLORSPACE_SMPTE170M;
16049+        break;
16050+    case AVCOL_SPC_SMPTE240M:
16051+        cs = V4L2_COLORSPACE_SMPTE240M;
16052+        break;
16053+    case AVCOL_SPC_BT2020_CL:
16054+        cs = V4L2_COLORSPACE_BT2020;
16055+        ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
16056+        break;
16057+    case AVCOL_SPC_BT2020_NCL:
16058+        cs = V4L2_COLORSPACE_BT2020;
16059+        break;
16060+    default:
16061+        break;
16062+    }
16063+
16064+    switch (xfer) {
16065+    case AVCOL_TRC_BT709:
16066+        xfer = V4L2_XFER_FUNC_709;
16067+        break;
16068+    case AVCOL_TRC_IEC61966_2_1:
16069+        xfer = V4L2_XFER_FUNC_SRGB;
16070+        break;
16071+    case AVCOL_TRC_SMPTE240M:
16072+        xfer = V4L2_XFER_FUNC_SMPTE240M;
16073+        break;
16074+    case AVCOL_TRC_SMPTE2084:
16075+        xfer = V4L2_XFER_FUNC_SMPTE2084;
16076+        break;
16077+    default:
16078+        break;
16079+    }
16080+
16081+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
16082+        fmt->fmt.pix_mp.colorspace = cs;
16083+        fmt->fmt.pix_mp.ycbcr_enc = ycbcr;
16084+        fmt->fmt.pix_mp.xfer_func = xfer;
16085+    } else {
16086+        fmt->fmt.pix.colorspace = cs;
16087+        fmt->fmt.pix.ycbcr_enc = ycbcr;
16088+        fmt->fmt.pix.xfer_func = xfer;
16089+    }
16090+}
16091+
16092+static void
16093+set_fmt_color_range(struct v4l2_format *const fmt, const enum AVColorRange avcr)
16094+{
16095+    const enum v4l2_quantization q =
16096+        avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
16097+        avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
16098+            V4L2_QUANTIZATION_DEFAULT;
16099+
16100+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
16101+        fmt->fmt.pix_mp.quantization = q;
16102+    } else {
16103+        fmt->fmt.pix.quantization = q;
16104+    }
16105+}
16106+
16107+static enum AVColorPrimaries get_color_primaries(const struct v4l2_format *const fmt)
16108+{
16109+    enum v4l2_ycbcr_encoding ycbcr;
16110+    enum v4l2_colorspace cs;
16111+
16112+    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
16113+        fmt->fmt.pix_mp.colorspace :
16114+        fmt->fmt.pix.colorspace;
16115+
16116+    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
16117+        fmt->fmt.pix_mp.ycbcr_enc:
16118+        fmt->fmt.pix.ycbcr_enc;
16119+
16120+    switch(ycbcr) {
16121+    case V4L2_YCBCR_ENC_XV709:
16122+    case V4L2_YCBCR_ENC_709: return AVCOL_PRI_BT709;
16123+    case V4L2_YCBCR_ENC_XV601:
16124+    case V4L2_YCBCR_ENC_601:return AVCOL_PRI_BT470M;
16125+    default:
16126+        break;
16127+    }
16128+
16129+    switch(cs) {
16130+    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_PRI_BT470BG;
16131+    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_PRI_SMPTE170M;
16132+    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_PRI_SMPTE240M;
16133+    case V4L2_COLORSPACE_BT2020: return AVCOL_PRI_BT2020;
16134+    default:
16135+        break;
16136+    }
16137+
16138+    return AVCOL_PRI_UNSPECIFIED;
16139+}
16140+
16141+static enum AVColorSpace get_color_space(const struct v4l2_format *const fmt)
16142+{
16143+    enum v4l2_ycbcr_encoding ycbcr;
16144+    enum v4l2_colorspace cs;
16145+
16146+    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
16147+        fmt->fmt.pix_mp.colorspace :
16148+        fmt->fmt.pix.colorspace;
16149+
16150+    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
16151+        fmt->fmt.pix_mp.ycbcr_enc:
16152+        fmt->fmt.pix.ycbcr_enc;
16153+
16154+    switch(cs) {
16155+    case V4L2_COLORSPACE_SRGB: return AVCOL_SPC_RGB;
16156+    case V4L2_COLORSPACE_REC709: return AVCOL_SPC_BT709;
16157+    case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_SPC_FCC;
16158+    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_SPC_BT470BG;
16159+    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_SPC_SMPTE170M;
16160+    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_SPC_SMPTE240M;
16161+    case V4L2_COLORSPACE_BT2020:
16162+        if (ycbcr == V4L2_YCBCR_ENC_BT2020_CONST_LUM)
16163+            return AVCOL_SPC_BT2020_CL;
16164+        else
16165+             return AVCOL_SPC_BT2020_NCL;
16166+    default:
16167+        break;
16168+    }
16169+
16170+    return AVCOL_SPC_UNSPECIFIED;
16171+}
16172+
16173+static enum AVColorTransferCharacteristic get_color_trc(const struct v4l2_format *const fmt)
16174+{
16175+    enum v4l2_ycbcr_encoding ycbcr;
16176+    enum v4l2_xfer_func xfer;
16177+    enum v4l2_colorspace cs;
16178+
16179+    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
16180+        fmt->fmt.pix_mp.colorspace :
16181+        fmt->fmt.pix.colorspace;
16182+
16183+    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
16184+        fmt->fmt.pix_mp.ycbcr_enc:
16185+        fmt->fmt.pix.ycbcr_enc;
16186+
16187+    xfer = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
16188+        fmt->fmt.pix_mp.xfer_func:
16189+        fmt->fmt.pix.xfer_func;
16190+
16191+    switch (xfer) {
16192+    case V4L2_XFER_FUNC_709: return AVCOL_TRC_BT709;
16193+    case V4L2_XFER_FUNC_SRGB: return AVCOL_TRC_IEC61966_2_1;
16194+    default:
16195+        break;
16196+    }
16197+
16198+    switch (cs) {
16199+    case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_TRC_GAMMA22;
16200+    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_TRC_GAMMA28;
16201+    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_TRC_SMPTE170M;
16202+    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_TRC_SMPTE240M;
16203+    default:
16204+        break;
16205+    }
16206+
16207+    switch (ycbcr) {
16208+    case V4L2_YCBCR_ENC_XV709:
16209+    case V4L2_YCBCR_ENC_XV601: return AVCOL_TRC_BT1361_ECG;
16210+    default:
16211+        break;
16212+    }
16213+
16214+    return AVCOL_TRC_UNSPECIFIED;
16215+}
16216+
16217+static enum AVColorRange get_color_range(const struct v4l2_format *const fmt)
16218+{
16219+    enum v4l2_quantization qt;
16220+
16221+    qt = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
16222+        fmt->fmt.pix_mp.quantization :
16223+        fmt->fmt.pix.quantization;
16224+
16225+    switch (qt) {
16226+    case V4L2_QUANTIZATION_LIM_RANGE: return AVCOL_RANGE_MPEG;
16227+    case V4L2_QUANTIZATION_FULL_RANGE: return AVCOL_RANGE_JPEG;
16228+    default:
16229+        break;
16230+    }
16231+
16232+     return AVCOL_RANGE_UNSPECIFIED;
16233+}
16234+
16235+static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame)
16236+{
16237+    struct v4l2_format *const format = &q->format;
16238+    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
16239+
16240+    const uint32_t drm_fmt = src->layers[0].format;
16241+    // Treat INVALID as LINEAR
16242+    const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
16243+        DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
16244+    uint32_t pix_fmt = 0;
16245+    uint32_t w = 0;
16246+    uint32_t h = 0;
16247+    uint32_t bpl = src->layers[0].planes[0].pitch;
16248+
16249+    // We really don't expect multiple layers
16250+    // All formats that we currently cope with are single object
16251+
16252+    if (src->nb_layers != 1 || src->nb_objects != 1)
16253+        return AVERROR(EINVAL);
16254+
16255+    switch (drm_fmt) {
16256+        case DRM_FORMAT_YUV420:
16257+            if (mod == DRM_FORMAT_MOD_LINEAR) {
16258+                if (src->layers[0].nb_planes != 3)
16259+                    break;
16260+                pix_fmt = V4L2_PIX_FMT_YUV420;
16261+                h = src->layers[0].planes[1].offset / bpl;
16262+                w = bpl;
16263+            }
16264+            break;
16265+
16266+        case DRM_FORMAT_NV12:
16267+            if (mod == DRM_FORMAT_MOD_LINEAR) {
16268+                if (src->layers[0].nb_planes != 2)
16269+                    break;
16270+                pix_fmt = V4L2_PIX_FMT_NV12;
16271+                h = src->layers[0].planes[1].offset / bpl;
16272+                w = bpl;
16273+            }
16274+#if CONFIG_SAND
16275+            else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
16276+                if (src->layers[0].nb_planes != 2)
16277+                    break;
16278+                pix_fmt = V4L2_PIX_FMT_NV12_COL128;
16279+                w = bpl;
16280+                h = src->layers[0].planes[1].offset / 128;
16281+                bpl = fourcc_mod_broadcom_param(mod);
16282+            }
16283+#endif
16284+            break;
16285+
16286+        case DRM_FORMAT_P030:
16287+#if CONFIG_SAND
16288+            if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
16289+                if (src->layers[0].nb_planes != 2)
16290+                    break;
16291+                pix_fmt =  V4L2_PIX_FMT_NV12_10_COL128;
16292+                w = bpl / 2;  // Matching lie to how we construct this
16293+                h = src->layers[0].planes[1].offset / 128;
16294+                bpl = fourcc_mod_broadcom_param(mod);
16295+            }
16296+#endif
16297+            break;
16298+
16299+        default:
16300+            break;
16301+    }
16302+
16303+    if (!pix_fmt)
16304+        return AVERROR(EINVAL);
16305+
16306+    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
16307+        struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
16308+
16309+        pix->width = w;
16310+        pix->height = h;
16311+        pix->pixelformat = pix_fmt;
16312+        pix->plane_fmt[0].bytesperline = bpl;
16313+        pix->num_planes = 1;
16314+    }
16315+    else {
16316+        struct v4l2_pix_format *const pix = &format->fmt.pix;
16317+
16318+        pix->width = w;
16319+        pix->height = h;
16320+        pix->pixelformat = pix_fmt;
16321+        pix->bytesperline = bpl;
16322+    }
16323+
16324+    set_fmt_color(format, frame->color_primaries, frame->colorspace, frame->color_trc);
16325+    set_fmt_color_range(format, frame->color_range);
16326+
16327+    q->sel.r.width = frame->width - (frame->crop_left + frame->crop_right);
16328+    q->sel.r.height = frame->height - (frame->crop_top + frame->crop_bottom);
16329+    q->sel.r.left = frame->crop_left;
16330+    q->sel.r.top = frame->crop_top;
16331+
16332+    return 0;
16333+}
16334+
16335+
16336+static int set_dst_format(DeintV4L2M2MContext * const priv, V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height)
16337+{
16338+    struct v4l2_format * const fmt   = &queue->format;
16339+    struct v4l2_selection *const sel = &queue->sel;
16340+
16341+    memset(&fmt->fmt, 0, sizeof(fmt->fmt));
16342+
16343+    // Align w/h to 16 here in case there are alignment requirements at the next
16344+    // stage of the filter chain (also RPi deinterlace setup is bust and this
16345+    // fixes it)
16346+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
16347+        fmt->fmt.pix_mp.pixelformat = pixelformat;
16348+        fmt->fmt.pix_mp.field = field;
16349+        fmt->fmt.pix_mp.width = FFALIGN(width, 16);
16350+        fmt->fmt.pix_mp.height = FFALIGN(height, 16);
16351+    } else {
16352+        fmt->fmt.pix.pixelformat = pixelformat;
16353+        fmt->fmt.pix.field = field;
16354+        fmt->fmt.pix.width = FFALIGN(width, 16);
16355+        fmt->fmt.pix.height = FFALIGN(height, 16);
16356+    }
16357+
16358+    set_fmt_color(fmt, priv->colour_primaries, priv->colour_matrix, priv->colour_transfer);
16359+    set_fmt_color_range(fmt, priv->colour_range);
16360+
16361+    sel->r.width = width;
16362+    sel->r.height = height;
16363+    sel->r.left = 0;
16364+    sel->r.top = 0;
16365+
16366+    return do_s_fmt(queue);
16367+}
16368+
16369+static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node)
16370+{
16371+    int ret;
16372+
16373+    ctx->fd = open(node, O_RDWR | O_NONBLOCK, 0);
16374+    if (ctx->fd < 0)
16375+        return AVERROR(errno);
16376+
16377+    ret = deint_v4l2m2m_prepare_context(ctx);
16378+    if (ret) {
16379+        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to prepare context\n");
16380+        goto fail;
16381+    }
16382+
16383+    ret = deint_v4l2m2m_try_format(&ctx->capture, ctx->output_width, ctx->output_height, ctx->output_format);
16384+    if (ret) {
16385+        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try dst format\n");
16386+        goto fail;
16387+    }
16388+
16389+    ret = deint_v4l2m2m_try_format(&ctx->output, ctx->width, ctx->height, AV_PIX_FMT_NONE);
16390+    if (ret) {
16391+        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try src format\n");
16392+        goto fail;
16393+    }
16394+
16395+    return 0;
16396+
16397+fail:
16398+    close(ctx->fd);
16399+    ctx->fd = -1;
16400+
16401+    return ret;
16402+}
16403+
16404+static int deint_v4l2m2m_find_device(DeintV4L2M2MContextShared *ctx)
16405+{
16406+    int ret = AVERROR(EINVAL);
16407+    struct dirent *entry;
16408+    char node[PATH_MAX];
16409+    DIR *dirp;
16410+
16411+    dirp = opendir("/dev");
16412+    if (!dirp)
16413+        return AVERROR(errno);
16414+
16415+    for (entry = readdir(dirp); entry; entry = readdir(dirp)) {
16416+
16417+        if (strncmp(entry->d_name, "video", 5))
16418+            continue;
16419+
16420+        snprintf(node, sizeof(node), "/dev/%s", entry->d_name);
16421+        av_log(ctx->logctx, AV_LOG_DEBUG, "probing device %s\n", node);
16422+        ret = deint_v4l2m2m_probe_device(ctx, node);
16423+        if (!ret)
16424+            break;
16425+    }
16426+
16427+    closedir(dirp);
16428+
16429+    if (ret) {
16430+        av_log(ctx->logctx, AV_LOG_ERROR, "Could not find a valid device\n");
16431+        ctx->fd = -1;
16432+
16433+        return ret;
16434+    }
16435+
16436+    av_log(ctx->logctx, AV_LOG_INFO, "Using device %s\n", node);
16437+
16438+    return 0;
16439+}
16440+
16441+static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf)
16442+{
16443+    int ret;
16444+
16445+    ret = ioctl(buf->q->ctx->fd, VIDIOC_QBUF, &buf->buffer);
16446+    if (ret < 0)
16447+        return AVERROR(errno);
16448+
16449+    buf->enqueued = 1;
16450+
16451+    return 0;
16452+}
16453+
16454+static void
16455+drm_frame_init(AVDRMFrameDescriptor * const d)
16456+{
16457+    unsigned int i;
16458+    for (i = 0; i != AV_DRM_MAX_PLANES; ++i) {
16459+        d->objects[i].fd = -1;
16460+    }
16461+}
16462+
16463+static void
16464+drm_frame_uninit(AVDRMFrameDescriptor * const d)
16465+{
16466+    unsigned int i;
16467+    for (i = 0; i != d->nb_objects; ++i) {
16468+        if (d->objects[i].fd != -1) {
16469+            close(d->objects[i].fd);
16470+            d->objects[i].fd = -1;
16471+        }
16472+    }
16473+}
16474+
16475+static void
16476+avbufs_delete(V4L2Buffer** ppavbufs, const unsigned int n)
16477+{
16478+    unsigned int i;
16479+    V4L2Buffer* const avbufs = *ppavbufs;
16480+
16481+    if (avbufs == NULL)
16482+        return;
16483+    *ppavbufs = NULL;
16484+
16485+    for (i = 0; i != n; ++i) {
16486+        V4L2Buffer* const avbuf = avbufs + i;
16487+        drm_frame_uninit(&avbuf->drm_frame);
16488+    }
16489+
16490+    av_free(avbufs);
16491+}
16492+
16493+static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf)
16494+{
16495+    struct v4l2_exportbuffer expbuf;
16496+    int i, ret;
16497+    uint64_t mod = DRM_FORMAT_MOD_LINEAR;
16498+
16499+    AVDRMFrameDescriptor * const drm_desc = &avbuf->drm_frame;
16500+    AVDRMLayerDescriptor * const layer = &drm_desc->layers[0];
16501+    const struct v4l2_format *const fmt = &q->format;
16502+    const uint32_t height = fmt_height(fmt);
16503+    ptrdiff_t bpl0;
16504+
16505+    /* fill the DRM frame descriptor */
16506+    drm_desc->nb_layers = 1;
16507+    layer->nb_planes = avbuf->num_planes;
16508+
16509+    for (int i = 0; i < avbuf->num_planes; i++) {
16510+        layer->planes[i].object_index = i;
16511+        layer->planes[i].offset = 0;
16512+        layer->planes[i].pitch = fmt_bpl(fmt, i);
16513+    }
16514+    bpl0 = layer->planes[0].pitch;
16515+
16516+    switch (fmt_pixelformat(fmt)) {
16517+#if CONFIG_SAND
16518+        case V4L2_PIX_FMT_NV12_COL128:
16519+            mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0);
16520+            layer->format = V4L2_PIX_FMT_NV12;
16521+
16522+            if (avbuf->num_planes > 1)
16523+                break;
16524+
16525+            layer->nb_planes = 2;
16526+            layer->planes[1].object_index = 0;
16527+            layer->planes[1].offset = height * 128;
16528+            layer->planes[0].pitch = fmt_width(fmt);
16529+            layer->planes[1].pitch = layer->planes[0].pitch;
16530+            break;
16531+#endif
16532+
16533+        case DRM_FORMAT_NV12:
16534+            layer->format = V4L2_PIX_FMT_NV12;
16535+
16536+            if (avbuf->num_planes > 1)
16537+                break;
16538+
16539+            layer->nb_planes = 2;
16540+            layer->planes[1].object_index = 0;
16541+            layer->planes[1].offset = bpl0 * height;
16542+            layer->planes[1].pitch = bpl0;
16543+            break;
16544+
16545+        case V4L2_PIX_FMT_YUV420:
16546+            layer->format = DRM_FORMAT_YUV420;
16547+
16548+            if (avbuf->num_planes > 1)
16549+                break;
16550+
16551+            layer->nb_planes = 3;
16552+            layer->planes[1].object_index = 0;
16553+            layer->planes[1].offset = bpl0 * height;
16554+            layer->planes[1].pitch = bpl0 / 2;
16555+            layer->planes[2].object_index = 0;
16556+            layer->planes[2].offset = layer->planes[1].offset + ((bpl0 * height) / 4);
16557+            layer->planes[2].pitch = bpl0 / 2;
16558+            break;
16559+
16560+        default:
16561+            drm_desc->nb_layers = 0;
16562+            return AVERROR(EINVAL);
16563+    }
16564+
16565+    drm_desc->nb_objects = 0;
16566+    for (i = 0; i < avbuf->num_planes; i++) {
16567+        memset(&expbuf, 0, sizeof(expbuf));
16568+
16569+        expbuf.index = avbuf->buffer.index;
16570+        expbuf.type = avbuf->buffer.type;
16571+        expbuf.plane = i;
16572+
16573+        ret = ioctl(avbuf->q->ctx->fd, VIDIOC_EXPBUF, &expbuf);
16574+        if (ret < 0)
16575+            return AVERROR(errno);
16576+
16577+        drm_desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type) ?
16578+            avbuf->buffer.m.planes[i].length : avbuf->buffer.length;
16579+        drm_desc->objects[i].fd = expbuf.fd;
16580+        drm_desc->objects[i].format_modifier = mod;
16581+        drm_desc->nb_objects = i + 1;
16582+    }
16583+
16584+    return 0;
16585+}
16586+
16587+static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
16588+{
16589+    struct v4l2_format *fmt = &queue->format;
16590+    DeintV4L2M2MContextShared *ctx = queue->ctx;
16591+    struct v4l2_requestbuffers req;
16592+    int ret, i, multiplanar;
16593+    uint32_t memory;
16594+
16595+    memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ?
16596+        V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
16597+
16598+    multiplanar = V4L2_TYPE_IS_MULTIPLANAR(fmt->type);
16599+
16600+    memset(&req, 0, sizeof(req));
16601+    req.count = queue->num_buffers;
16602+    req.memory = memory;
16603+    req.type = fmt->type;
16604+
16605+    ret = ioctl(ctx->fd, VIDIOC_REQBUFS, &req);
16606+    if (ret < 0) {
16607+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_REQBUFS failed: %s\n", strerror(errno));
16608+
16609+        return AVERROR(errno);
16610+    }
16611+
16612+    queue->num_buffers = req.count;
16613+    queue->buffers = av_mallocz(queue->num_buffers * sizeof(V4L2Buffer));
16614+    if (!queue->buffers) {
16615+        av_log(ctx->logctx, AV_LOG_ERROR, "malloc enomem\n");
16616+
16617+        return AVERROR(ENOMEM);
16618+    }
16619+
16620+    for (i = 0; i < queue->num_buffers; i++) {
16621+        V4L2Buffer * const buf = &queue->buffers[i];
16622+
16623+        buf->enqueued = 0;
16624+        buf->q = queue;
16625+
16626+        buf->buffer.type = fmt->type;
16627+        buf->buffer.memory = memory;
16628+        buf->buffer.index = i;
16629+
16630+        if (multiplanar) {
16631+            buf->buffer.length = VIDEO_MAX_PLANES;
16632+            buf->buffer.m.planes = buf->planes;
16633+        }
16634+
16635+        drm_frame_init(&buf->drm_frame);
16636+    }
16637+
16638+    for (i = 0; i < queue->num_buffers; i++) {
16639+        V4L2Buffer * const buf = &queue->buffers[i];
16640+
16641+        ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer);
16642+        if (ret < 0) {
16643+            ret = AVERROR(errno);
16644+
16645+            goto fail;
16646+        }
16647+
16648+        buf->num_planes = multiplanar ? buf->buffer.length : 1;
16649+
16650+        if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) {
16651+            ret = deint_v4l2m2m_enqueue_buffer(buf);
16652+            if (ret)
16653+                goto fail;
16654+
16655+            ret = v4l2_buffer_export_drm(queue, buf);
16656+            if (ret)
16657+                goto fail;
16658+        }
16659+    }
16660+
16661+    return 0;
16662+
16663+fail:
16664+    avbufs_delete(&queue->buffers, queue->num_buffers);
16665+    queue->num_buffers = 0;
16666+    return ret;
16667+}
16668+
16669+static int deint_v4l2m2m_streamon(V4L2Queue *queue)
16670+{
16671+    DeintV4L2M2MContextShared * const ctx = queue->ctx;
16672+    int type = queue->format.type;
16673+    int ret;
16674+
16675+    ret = ioctl(ctx->fd, VIDIOC_STREAMON, &type);
16676+    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
16677+    if (ret < 0)
16678+        return AVERROR(errno);
16679+
16680+    return 0;
16681+}
16682+
16683+static int deint_v4l2m2m_streamoff(V4L2Queue *queue)
16684+{
16685+    DeintV4L2M2MContextShared * const ctx = queue->ctx;
16686+    int type = queue->format.type;
16687+    int ret;
16688+
16689+    ret = ioctl(ctx->fd, VIDIOC_STREAMOFF, &type);
16690+    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
16691+    if (ret < 0)
16692+        return AVERROR(errno);
16693+
16694+    return 0;
16695+}
16696+
16697+// timeout in ms
16698+static V4L2Buffer* deint_v4l2m2m_dequeue_buffer(V4L2Queue *queue, int timeout)
16699+{
16700+    struct v4l2_plane planes[VIDEO_MAX_PLANES];
16701+    DeintV4L2M2MContextShared *ctx = queue->ctx;
16702+    struct v4l2_buffer buf = { 0 };
16703+    V4L2Buffer* avbuf = NULL;
16704+    struct pollfd pfd;
16705+    short events;
16706+    int ret;
16707+
16708+    if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
16709+        events =  POLLOUT | POLLWRNORM;
16710+    else
16711+        events = POLLIN | POLLRDNORM;
16712+
16713+    pfd.events = events;
16714+    pfd.fd = ctx->fd;
16715+
16716+    for (;;) {
16717+        ret = poll(&pfd, 1, timeout);
16718+        if (ret > 0)
16719+            break;
16720+        if (errno == EINTR)
16721+            continue;
16722+        return NULL;
16723+    }
16724+
16725+    if (pfd.revents & POLLERR)
16726+        return NULL;
16727+
16728+    if (pfd.revents & events) {
16729+        memset(&buf, 0, sizeof(buf));
16730+        buf.memory = V4L2_MEMORY_MMAP;
16731+        buf.type = queue->format.type;
16732+        if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
16733+            memset(planes, 0, sizeof(planes));
16734+            buf.length = VIDEO_MAX_PLANES;
16735+            buf.m.planes = planes;
16736+        }
16737+
16738+        ret = ioctl(ctx->fd, VIDIOC_DQBUF, &buf);
16739+        if (ret) {
16740+            if (errno != EAGAIN)
16741+                av_log(ctx->logctx, AV_LOG_DEBUG, "VIDIOC_DQBUF, errno (%s)\n",
16742+                       av_err2str(AVERROR(errno)));
16743+            return NULL;
16744+        }
16745+
16746+        avbuf = &queue->buffers[buf.index];
16747+        avbuf->enqueued = 0;
16748+        avbuf->buffer = buf;
16749+        if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
16750+            memcpy(avbuf->planes, planes, sizeof(planes));
16751+            avbuf->buffer.m.planes = avbuf->planes;
16752+        }
16753+        return avbuf;
16754+    }
16755+
16756+    return NULL;
16757+}
16758+
16759+static V4L2Buffer *deint_v4l2m2m_find_free_buf(V4L2Queue *queue)
16760+{
16761+    int i;
16762+    V4L2Buffer *buf = NULL;
16763+
16764+    for (i = 0; i < queue->num_buffers; i++)
16765+        if (!queue->buffers[i].enqueued) {
16766+            buf = &queue->buffers[i];
16767+            break;
16768+        }
16769+    return buf;
16770+}
16771+
16772+static void deint_v4l2m2m_unref_queued(V4L2Queue *queue)
16773+{
16774+    int i;
16775+    V4L2Buffer *buf = NULL;
16776+
16777+    if (!queue || !queue->buffers)
16778+        return;
16779+    for (i = 0; i < queue->num_buffers; i++) {
16780+        buf = &queue->buffers[i];
16781+        if (queue->buffers[i].enqueued)
16782+            av_frame_unref(&buf->frame);
16783+    }
16784+}
16785+
16786+static void recycle_q(V4L2Queue * const queue)
16787+{
16788+    V4L2Buffer* avbuf;
16789+    while (avbuf = deint_v4l2m2m_dequeue_buffer(queue, 0), avbuf) {
16790+        av_frame_unref(&avbuf->frame);
16791+    }
16792+}
16793+
16794+static int count_enqueued(V4L2Queue *queue)
16795+{
16796+    int i;
16797+    int n = 0;
16798+
16799+    if (queue->buffers == NULL)
16800+        return 0;
16801+
16802+    for (i = 0; i < queue->num_buffers; i++)
16803+        if (queue->buffers[i].enqueued)
16804+            ++n;
16805+    return n;
16806+}
16807+
16808+static int deint_v4l2m2m_enqueue_frame(V4L2Queue * const queue, AVFrame * const frame)
16809+{
16810+    DeintV4L2M2MContextShared *const ctx = queue->ctx;
16811+    AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)frame->data[0];
16812+    V4L2Buffer *buf;
16813+    int i;
16814+
16815+    if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
16816+        recycle_q(queue);
16817+
16818+    buf = deint_v4l2m2m_find_free_buf(queue);
16819+    if (!buf) {
16820+        av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d finding free buf\n", __func__, 0);
16821+        return AVERROR(EAGAIN);
16822+    }
16823+    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type))
16824+        for (i = 0; i < drm_desc->nb_objects; i++)
16825+            buf->buffer.m.planes[i].m.fd = drm_desc->objects[i].fd;
16826+    else
16827+        buf->buffer.m.fd = drm_desc->objects[0].fd;
16828+
16829+    buf->buffer.field = !frame->interlaced_frame ? V4L2_FIELD_NONE :
16830+        frame->top_field_first ? V4L2_FIELD_INTERLACED_TB :
16831+            V4L2_FIELD_INTERLACED_BT;
16832+
16833+    if (ctx->field_order != buf->buffer.field) {
16834+        av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Field changed: %d->%d\n", __func__, ctx->field_order, buf->buffer.field);
16835+        ctx->field_order = buf->buffer.field;
16836+    }
16837+
16838+    buf->buffer.timestamp = pts_track_add_frame(&ctx->track, frame);
16839+
16840+    buf->drm_frame.objects[0].fd = drm_desc->objects[0].fd;
16841+
16842+    av_frame_move_ref(&buf->frame, frame);
16843+
16844+    return deint_v4l2m2m_enqueue_buffer(buf);
16845+}
16846+
16847+static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx)
16848+{
16849+    if (atomic_fetch_sub(&ctx->refcount, 1) == 1) {
16850+        V4L2Queue *capture = &ctx->capture;
16851+        V4L2Queue *output  = &ctx->output;
16852+
16853+        av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__);
16854+
16855+        if (ctx->fd >= 0) {
16856+            deint_v4l2m2m_streamoff(capture);
16857+            deint_v4l2m2m_streamoff(output);
16858+        }
16859+
16860+        avbufs_delete(&capture->buffers, capture->num_buffers);
16861+
16862+        deint_v4l2m2m_unref_queued(output);
16863+
16864+        av_buffer_unref(&ctx->hw_frames_ctx);
16865+
16866+        if (capture->buffers)
16867+            av_free(capture->buffers);
16868+
16869+        if (output->buffers)
16870+            av_free(output->buffers);
16871+
16872+        if (ctx->fd >= 0) {
16873+            close(ctx->fd);
16874+            ctx->fd = -1;
16875+        }
16876+
16877+        av_free(ctx);
16878+    }
16879+}
16880+
16881+static void v4l2_free_buffer(void *opaque, uint8_t *unused)
16882+{
16883+    V4L2Buffer *buf                = opaque;
16884+    DeintV4L2M2MContextShared *ctx = buf->q->ctx;
16885+
16886+    if (!ctx->done)
16887+        deint_v4l2m2m_enqueue_buffer(buf);
16888+
16889+    deint_v4l2m2m_destroy_context(ctx);
16890+}
16891+
16892+// timeout in ms
16893+static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout)
16894+{
16895+    DeintV4L2M2MContextShared *ctx = queue->ctx;
16896+    V4L2Buffer* avbuf;
16897+    enum AVColorPrimaries color_primaries;
16898+    enum AVColorSpace colorspace;
16899+    enum AVColorTransferCharacteristic color_trc;
16900+    enum AVColorRange color_range;
16901+
16902+    av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__);
16903+
16904+    if (queue->eos) {
16905+        av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: EOS\n", __func__);
16906+        return AVERROR_EOF;
16907+    }
16908+
16909+    avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout);
16910+    if (!avbuf) {
16911+        av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout);
16912+        return AVERROR(EAGAIN);
16913+    }
16914+
16915+    if (V4L2_TYPE_IS_CAPTURE(avbuf->buffer.type)) {
16916+        if ((avbuf->buffer.flags & V4L2_BUF_FLAG_LAST) != 0)
16917+            queue->eos = 1;
16918+        if (buf_bytesused0(&avbuf->buffer) == 0)
16919+            return queue->eos ? AVERROR_EOF : AVERROR(EINVAL);
16920+    }
16921+
16922+    // Fill in PTS and anciliary info from src frame
16923+    pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame);
16924+
16925+    frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame,
16926+                            sizeof(avbuf->drm_frame), v4l2_free_buffer,
16927+                            avbuf, AV_BUFFER_FLAG_READONLY);
16928+    if (!frame->buf[0]) {
16929+        av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d creating buffer\n", __func__, 0);
16930+        return AVERROR(ENOMEM);
16931+    }
16932+
16933+    atomic_fetch_add(&ctx->refcount, 1);
16934+
16935+    frame->data[0] = (uint8_t *)&avbuf->drm_frame;
16936+    frame->format = AV_PIX_FMT_DRM_PRIME;
16937+    if (ctx->hw_frames_ctx)
16938+        frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
16939+    frame->height = ctx->output_height;
16940+    frame->width = ctx->output_width;
16941+
16942+    color_primaries = get_color_primaries(&ctx->capture.format);
16943+    colorspace      = get_color_space(&ctx->capture.format);
16944+    color_trc       = get_color_trc(&ctx->capture.format);
16945+    color_range     = get_color_range(&ctx->capture.format);
16946+
16947+    // If the color parameters are unspecified by V4L2 then leave alone as they
16948+    // will have been copied from src
16949+    if (color_primaries != AVCOL_PRI_UNSPECIFIED)
16950+        frame->color_primaries = color_primaries;
16951+    if (colorspace != AVCOL_SPC_UNSPECIFIED)
16952+        frame->colorspace = colorspace;
16953+    if (color_trc != AVCOL_TRC_UNSPECIFIED)
16954+        frame->color_trc = color_trc;
16955+    if (color_range != AVCOL_RANGE_UNSPECIFIED)
16956+        frame->color_range = color_range;
16957+
16958+    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE) {
16959+        // Not interlaced now
16960+        frame->interlaced_frame = 0;   // *** Fill in from dst buffer?
16961+        frame->top_field_first = 0;
16962+        // Pkt duration halved
16963+        frame->pkt_duration /= 2;
16964+    }
16965+
16966+    if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) {
16967+        av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n");
16968+        frame->decode_error_flags |= FF_DECODE_ERROR_INVALID_BITSTREAM;
16969+    }
16970+
16971+    av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: PTS=%"PRId64"\n", __func__, frame->pts);
16972+    return 0;
16973+}
16974+
16975+static int deint_v4l2m2m_config_props(AVFilterLink *outlink)
16976+{
16977+    AVFilterLink *inlink           = outlink->src->inputs[0];
16978+    AVFilterContext *avctx         = outlink->src;
16979+    DeintV4L2M2MContext *priv      = avctx->priv;
16980+    DeintV4L2M2MContextShared *ctx = priv->shared;
16981+    int ret;
16982+
16983+    ctx->height = avctx->inputs[0]->h;
16984+    ctx->width = avctx->inputs[0]->w;
16985+
16986+    if (ctx->filter_type == FILTER_V4L2_SCALE) {
16987+        if ((ret = ff_scale_eval_dimensions(priv,
16988+                                            priv->w_expr, priv->h_expr,
16989+                                            inlink, outlink,
16990+                                            &ctx->output_width, &ctx->output_height)) < 0)
16991+            return ret;
16992+
16993+        ff_scale_adjust_dimensions(inlink, &ctx->output_width, &ctx->output_height,
16994+                                   priv->force_original_aspect_ratio, priv->force_divisible_by);
16995+    }
16996+    else {
16997+        ctx->output_width  = ctx->width;
16998+        ctx->output_height = ctx->height;
16999+    }
17000+
17001+    av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d FR: %d/%d->%d/%d\n", __func__,
17002+           ctx->width, ctx->height, ctx->output_width, ctx->output_height,
17003+           inlink->frame_rate.num, inlink->frame_rate.den, outlink->frame_rate.num, outlink->frame_rate.den);
17004+
17005+    outlink->time_base           = inlink->time_base;
17006+    outlink->w                   = ctx->output_width;
17007+    outlink->h                   = ctx->output_height;
17008+    outlink->format              = inlink->format;
17009+    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && inlink->frame_rate.den != 0)
17010+        outlink->frame_rate = (AVRational){inlink->frame_rate.num * 2, inlink->frame_rate.den};
17011+
17012+    if (inlink->sample_aspect_ratio.num)
17013+        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
17014+    else
17015+        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
17016+
17017+    ret = deint_v4l2m2m_find_device(ctx);
17018+    if (ret)
17019+        return ret;
17020+
17021+    if (inlink->hw_frames_ctx) {
17022+        ctx->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
17023+        if (!ctx->hw_frames_ctx)
17024+            return AVERROR(ENOMEM);
17025+    }
17026+    return 0;
17027+}
17028+
17029+static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc)
17030+{
17031+    const uint64_t mod = drm_desc->objects[0].format_modifier;
17032+    const int is_linear = (mod == DRM_FORMAT_MOD_LINEAR || mod == DRM_FORMAT_MOD_INVALID);
17033+
17034+    // Only currently support single object things
17035+    if (drm_desc->nb_objects != 1)
17036+        return 0;
17037+
17038+    switch (drm_desc->layers[0].format) {
17039+    case DRM_FORMAT_YUV420:
17040+        return is_linear ? V4L2_PIX_FMT_YUV420 : 0;
17041+    case DRM_FORMAT_NV12:
17042+        return is_linear ? V4L2_PIX_FMT_NV12 :
17043+#if CONFIG_SAND
17044+            fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 :
17045+#endif
17046+            0;
17047+    default:
17048+        break;
17049+    }
17050+    return 0;
17051+}
17052+
17053+static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
17054+{
17055+    AVFilterContext *avctx         = link->dst;
17056+    DeintV4L2M2MContext *priv      = avctx->priv;
17057+    DeintV4L2M2MContextShared *ctx = priv->shared;
17058+    V4L2Queue *capture             = &ctx->capture;
17059+    V4L2Queue *output              = &ctx->output;
17060+    int ret;
17061+
17062+    av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" dts: %"PRId64" field :%d interlaced: %d aspect:%d/%d\n",
17063+           __func__, in->pts, in->pkt_dts, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den);
17064+    av_log(priv, AV_LOG_DEBUG, "--- %s: in status in %d/ot %d; out status in %d/out %d\n", __func__,
17065+           avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out);
17066+
17067+    if (ctx->field_order == V4L2_FIELD_ANY) {
17068+        const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0];
17069+        uint32_t pixelformat = desc_pixelformat(drm_desc);
17070+
17071+        if (pixelformat == 0) {
17072+            av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n",
17073+                   av_fourcc2str(drm_desc->layers[0].format),
17074+                   drm_desc->nb_objects, drm_desc->objects[0].format_modifier);
17075+            return AVERROR(EINVAL);
17076+        }
17077+
17078+        ctx->orig_width = drm_desc->layers[0].planes[0].pitch;
17079+        ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width;
17080+
17081+        av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height,
17082+           drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset);
17083+
17084+        if ((ret = set_src_fmt(output, in)) != 0) {
17085+            av_log(avctx, AV_LOG_WARNING, "Unknown input DRM format: %s mod: %#" PRIx64 "\n",
17086+                   av_fourcc2str(drm_desc->layers[0].format), drm_desc->objects[0].format_modifier);
17087+            return ret;
17088+        }
17089+
17090+        ret = do_s_fmt(output);
17091+        if (ret) {
17092+            av_log(avctx, AV_LOG_WARNING, "Failed to set source format\n");
17093+            return ret;
17094+        }
17095+
17096+        if (ctx->output_format != AV_PIX_FMT_NONE)
17097+           pixelformat = fmt_av_to_v4l2(ctx->output_format);
17098+        ret = set_dst_format(priv, capture, pixelformat, V4L2_FIELD_NONE, ctx->output_width, ctx->output_height);
17099+        if (ret) {
17100+            av_log(avctx, AV_LOG_WARNING, "Failed to set destination format\n");
17101+            return ret;
17102+        }
17103+
17104+        ret = deint_v4l2m2m_allocate_buffers(capture);
17105+        if (ret) {
17106+            av_log(avctx, AV_LOG_WARNING, "Failed to allocate destination buffers\n");
17107+            return ret;
17108+        }
17109+
17110+        ret = deint_v4l2m2m_streamon(capture);
17111+        if (ret) {
17112+            av_log(avctx, AV_LOG_WARNING, "Failed set destination streamon: %s\n", av_err2str(ret));
17113+            return ret;
17114+        }
17115+
17116+        ret = deint_v4l2m2m_allocate_buffers(output);
17117+        if (ret) {
17118+            av_log(avctx, AV_LOG_WARNING, "Failed to allocate src buffers\n");
17119+            return ret;
17120+        }
17121+
17122+        ret = deint_v4l2m2m_streamon(output);
17123+        if (ret) {
17124+            av_log(avctx, AV_LOG_WARNING, "Failed set src streamon: %s\n", av_err2str(ret));
17125+            return ret;
17126+        }
17127+
17128+        if (in->top_field_first)
17129+            ctx->field_order = V4L2_FIELD_INTERLACED_TB;
17130+        else
17131+            ctx->field_order = V4L2_FIELD_INTERLACED_BT;
17132+
17133+        {
17134+            struct v4l2_encoder_cmd ecmd = {
17135+                .cmd = V4L2_ENC_CMD_STOP
17136+            };
17137+            ctx->has_enc_stop = 0;
17138+            if (ioctl(ctx->fd, VIDIOC_TRY_ENCODER_CMD, &ecmd) == 0) {
17139+                av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop succeeded\n");
17140+                ctx->has_enc_stop = 1;
17141+            }
17142+            else {
17143+                av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop fail: %s\n", av_err2str(AVERROR(errno)));
17144+            }
17145+
17146+        }
17147+    }
17148+
17149+    ret = deint_v4l2m2m_enqueue_frame(output, in);
17150+
17151+    av_log(priv, AV_LOG_TRACE, ">>> %s: %s\n", __func__, av_err2str(ret));
17152+    return ret;
17153+}
17154+
17155+static int
17156+ack_inlink(AVFilterContext * const avctx, DeintV4L2M2MContextShared *const s,
17157+           AVFilterLink * const inlink)
17158+{
17159+    int instatus;
17160+    int64_t inpts;
17161+
17162+    if (ff_inlink_acknowledge_status(inlink, &instatus, &inpts) <= 0)
17163+        return 0;
17164+
17165+    s->drain      = instatus;
17166+    s->drain_pts  = inpts;
17167+    s->drain_state = DRAIN_TIMEOUT;
17168+
17169+    if (s->field_order == V4L2_FIELD_ANY) {  // Not yet started
17170+        s->drain_state = DRAIN_DONE;
17171+    }
17172+    else if (s->one_to_one) {
17173+        s->drain_state = DRAIN_LAST;
17174+    }
17175+    else if (s->has_enc_stop) {
17176+        struct v4l2_encoder_cmd ecmd = {
17177+            .cmd = V4L2_ENC_CMD_STOP
17178+        };
17179+        if (ioctl(s->fd, VIDIOC_ENCODER_CMD, &ecmd) == 0) {
17180+            av_log(avctx->priv, AV_LOG_DEBUG, "Do Encode stop\n");
17181+            s->drain_state = DRAIN_EOS;
17182+        }
17183+        else {
17184+            av_log(avctx->priv, AV_LOG_WARNING, "Encode stop fail: %s\n", av_err2str(AVERROR(errno)));
17185+        }
17186+    }
17187+    return 1;
17188+}
17189+
17190+static int deint_v4l2m2m_activate(AVFilterContext *avctx)
17191+{
17192+    DeintV4L2M2MContext * const priv = avctx->priv;
17193+    DeintV4L2M2MContextShared *const s = priv->shared;
17194+    AVFilterLink * const outlink = avctx->outputs[0];
17195+    AVFilterLink * const inlink = avctx->inputs[0];
17196+    int n = 0;
17197+    int cn = 99;
17198+    int did_something = 0;
17199+
17200+    av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__);
17201+
17202+    FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx);
17203+
17204+    ack_inlink(avctx, s, inlink);
17205+
17206+    if (s->field_order != V4L2_FIELD_ANY)  // Can't DQ if no setup!
17207+    {
17208+        AVFrame * frame = av_frame_alloc();
17209+        int rv;
17210+
17211+        recycle_q(&s->output);
17212+        n = count_enqueued(&s->output);
17213+
17214+        if (frame == NULL) {
17215+            av_log(priv, AV_LOG_ERROR, "%s: error allocating frame\n", __func__);
17216+            return AVERROR(ENOMEM);
17217+        }
17218+
17219+        rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame,
17220+                                         drain_frame_expected(s->drain_state) || n > 4 ? 300 : 0);
17221+        if (rv != 0) {
17222+            av_frame_free(&frame);
17223+            if (rv == AVERROR_EOF) {
17224+                av_log(priv, AV_LOG_DEBUG, "%s: --- DQ EOF\n", __func__);
17225+                s->drain_state = DRAIN_DONE;
17226+            }
17227+            else if (rv == AVERROR(EAGAIN)) {
17228+                if (s->drain_state != DRAIN_NONE) {
17229+                    av_log(priv, AV_LOG_DEBUG, "%s: --- DQ empty - drain done\n", __func__);
17230+                    s->drain_state = DRAIN_DONE;
17231+                }
17232+            }
17233+            else {
17234+                av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv));
17235+                return rv;
17236+            }
17237+        }
17238+        else {
17239+            frame->interlaced_frame = 0;
17240+            // frame is always consumed by filter_frame - even on error despite
17241+            // a somewhat confusing comment in the header
17242+            rv = ff_filter_frame(outlink, frame);
17243+            ++s->frames_tx;
17244+
17245+            av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv));
17246+            did_something = 1;
17247+
17248+            if (s->drain_state != DRAIN_NONE && pts_track_count(&s->track) == 0) {
17249+                av_log(priv, AV_LOG_DEBUG, "%s: --- DQ last - drain done\n", __func__);
17250+                s->drain_state = DRAIN_DONE;
17251+            }
17252+        }
17253+
17254+        cn = count_enqueued(&s->capture);
17255+    }
17256+
17257+    if (s->drain_state == DRAIN_DONE) {
17258+        ff_outlink_set_status(outlink, s->drain, s->drain_pts);
17259+        av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(s->drain));
17260+        return 0;
17261+    }
17262+
17263+    recycle_q(&s->output);
17264+    n = count_enqueued(&s->output);
17265+
17266+    while (n < 6 && !s->drain) {
17267+        AVFrame * frame;
17268+        int rv;
17269+
17270+        if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) {
17271+            av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv));
17272+            return rv;
17273+        }
17274+
17275+        if (frame == NULL) {
17276+            av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__);
17277+            if (!ack_inlink(avctx, s, inlink)) {
17278+                ff_inlink_request_frame(inlink);
17279+                av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__);
17280+            }
17281+            break;
17282+        }
17283+        ++s->frames_rx;
17284+
17285+        rv = deint_v4l2m2m_filter_frame(inlink, frame);
17286+        av_frame_free(&frame);
17287+
17288+        if (rv != 0)
17289+            return rv;
17290+
17291+        av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__);
17292+        did_something = 1;
17293+        ++n;
17294+    }
17295+
17296+    if ((n > 4 || s->drain) && ff_outlink_frame_wanted(outlink)) {
17297+        ff_filter_set_ready(avctx, 1);
17298+        did_something = 1;
17299+        av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__);
17300+    }
17301+
17302+    av_log(priv, AV_LOG_TRACE, ">>> %s: OK (n=%d, cn=%d)\n", __func__, n, cn);
17303+    return did_something ? 0 : FFERROR_NOT_READY;
17304+}
17305+
17306+static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filter_type_v4l2_t filter_type)
17307+{
17308+    DeintV4L2M2MContext * const priv = avctx->priv;
17309+    DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared));
17310+
17311+    if (!ctx) {
17312+        av_log(priv, AV_LOG_ERROR, "%s: error %d allocating context\n", __func__, 0);
17313+        return AVERROR(ENOMEM);
17314+    }
17315+    priv->shared = ctx;
17316+    ctx->logctx = priv;
17317+    ctx->filter_type = filter_type;
17318+    ctx->fd = -1;
17319+    ctx->output.ctx = ctx;
17320+    ctx->output.num_buffers = 8;
17321+    ctx->output.name = "OUTPUT";
17322+    ctx->capture.ctx = ctx;
17323+    ctx->capture.num_buffers = 12;
17324+    ctx->capture.name = "CAPTURE";
17325+    ctx->done = 0;
17326+    ctx->field_order = V4L2_FIELD_ANY;
17327+
17328+    pts_track_init(&ctx->track, priv);
17329+
17330+    atomic_init(&ctx->refcount, 1);
17331+
17332+    if (priv->output_format_string) {
17333+        ctx->output_format = av_get_pix_fmt(priv->output_format_string);
17334+        if (ctx->output_format == AV_PIX_FMT_NONE) {
17335+            av_log(avctx, AV_LOG_ERROR, "Invalid ffmpeg output format '%s'.\n", priv->output_format_string);
17336+            return AVERROR(EINVAL);
17337+        }
17338+        if (fmt_av_to_v4l2(ctx->output_format) == 0) {
17339+            av_log(avctx, AV_LOG_ERROR, "Unsupported output format for V4L2: %s.\n", av_get_pix_fmt_name(ctx->output_format));
17340+            return AVERROR(EINVAL);
17341+        }
17342+    } else {
17343+        // Use the input format once that is configured.
17344+        ctx->output_format = AV_PIX_FMT_NONE;
17345+    }
17346+
17347+#define STRING_OPTION(var_name, func_name, default_value) do { \
17348+        if (priv->var_name ## _string) { \
17349+            int var = av_ ## func_name ## _from_name(priv->var_name ## _string); \
17350+            if (var < 0) { \
17351+                av_log(avctx, AV_LOG_ERROR, "Invalid %s.\n", #var_name); \
17352+                return AVERROR(EINVAL); \
17353+            } \
17354+            priv->var_name = var; \
17355+        } else { \
17356+            priv->var_name = default_value; \
17357+        } \
17358+    } while (0)
17359+
17360+    STRING_OPTION(colour_primaries, color_primaries, AVCOL_PRI_UNSPECIFIED);
17361+    STRING_OPTION(colour_transfer,  color_transfer,  AVCOL_TRC_UNSPECIFIED);
17362+    STRING_OPTION(colour_matrix,    color_space,     AVCOL_SPC_UNSPECIFIED);
17363+    STRING_OPTION(chroma_location,  chroma_location, AVCHROMA_LOC_UNSPECIFIED);
17364+
17365+    return 0;
17366+}
17367+
17368+static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
17369+{
17370+    return common_v4l2m2m_init(avctx, FILTER_V4L2_DEINTERLACE);
17371+}
17372+
17373+static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx)
17374+{
17375+    int rv;
17376+    DeintV4L2M2MContext * priv;
17377+    DeintV4L2M2MContextShared * ctx;
17378+
17379+    if ((rv = common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE)) != 0)
17380+        return rv;
17381+
17382+    priv = avctx->priv;
17383+    ctx = priv->shared;
17384+
17385+    ctx->one_to_one = 1;
17386+    return 0;
17387+}
17388+
17389+static void deint_v4l2m2m_uninit(AVFilterContext *avctx)
17390+{
17391+    DeintV4L2M2MContext *priv = avctx->priv;
17392+    DeintV4L2M2MContextShared *ctx = priv->shared;
17393+
17394+    av_log(priv, AV_LOG_VERBOSE, "Frames Rx: %u, Frames Tx: %u\n",
17395+           ctx->frames_rx, ctx->frames_tx);
17396+    ctx->done = 1;
17397+    ctx->logctx = NULL;  // Log to NULL works, log to missing crashes
17398+    pts_track_uninit(&ctx->track);
17399+    deint_v4l2m2m_destroy_context(ctx);
17400+}
17401+
17402+static const AVOption deinterlace_v4l2m2m_options[] = {
17403+    { NULL },
17404+};
17405+
17406+AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m);
17407+
17408+#define OFFSET(x) offsetof(DeintV4L2M2MContext, x)
17409+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
17410+
17411+static const AVOption scale_v4l2m2m_options[] = {
17412+    { "w", "Output video width",
17413+      OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
17414+    { "h", "Output video height",
17415+      OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
17416+    { "format", "Output video format (software format of hardware frames)",
17417+      OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
17418+      // These colour properties match the ones of the same name in vf_scale.
17419+      { "out_color_matrix", "Output colour matrix coefficient set",
17420+      OFFSET(colour_matrix_string), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
17421+    { "out_range", "Output colour range",
17422+      OFFSET(colour_range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED },
17423+      AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, FLAGS, "range" },
17424+        { "full",    "Full range",
17425+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
17426+        { "limited", "Limited range",
17427+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
17428+        { "jpeg",    "Full range",
17429+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
17430+        { "mpeg",    "Limited range",
17431+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
17432+        { "tv",      "Limited range",
17433+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
17434+        { "pc",      "Full range",
17435+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
17436+    // These colour properties match the ones in the VAAPI scaler
17437+    { "out_color_primaries", "Output colour primaries",
17438+      OFFSET(colour_primaries_string), AV_OPT_TYPE_STRING,
17439+      { .str = NULL }, .flags = FLAGS },
17440+    { "out_color_transfer", "Output colour transfer characteristics",
17441+      OFFSET(colour_transfer_string),  AV_OPT_TYPE_STRING,
17442+      { .str = NULL }, .flags = FLAGS },
17443+    { "out_chroma_location", "Output chroma sample location",
17444+      OFFSET(chroma_location_string),  AV_OPT_TYPE_STRING,
17445+      { .str = NULL }, .flags = FLAGS },
17446+    { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" },
17447+    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
17448+    { NULL },
17449+};
17450+
17451+AVFILTER_DEFINE_CLASS(scale_v4l2m2m);
17452+
17453+static const AVFilterPad deint_v4l2m2m_inputs[] = {
17454+    {
17455+        .name         = "default",
17456+        .type         = AVMEDIA_TYPE_VIDEO,
17457+    },
17458+};
17459+
17460+static const AVFilterPad deint_v4l2m2m_outputs[] = {
17461+    {
17462+        .name          = "default",
17463+        .type          = AVMEDIA_TYPE_VIDEO,
17464+        .config_props  = deint_v4l2m2m_config_props,
17465+    },
17466+};
17467+
17468+AVFilter ff_vf_deinterlace_v4l2m2m = {
17469+    .name           = "deinterlace_v4l2m2m",
17470+    .description    = NULL_IF_CONFIG_SMALL("V4L2 M2M deinterlacer"),
17471+    .priv_size      = sizeof(DeintV4L2M2MContext),
17472+    .init           = &deint_v4l2m2m_init,
17473+    .uninit         = &deint_v4l2m2m_uninit,
17474+    FILTER_INPUTS(deint_v4l2m2m_inputs),
17475+    FILTER_OUTPUTS(deint_v4l2m2m_outputs),
17476+    FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME),
17477+    .priv_class     = &deinterlace_v4l2m2m_class,
17478+    .activate       = deint_v4l2m2m_activate,
17479+};
17480+
17481+AVFilter ff_vf_scale_v4l2m2m = {
17482+    .name           = "scale_v4l2m2m",
17483+    .description    = NULL_IF_CONFIG_SMALL("V4L2 M2M scaler"),
17484+    .priv_size      = sizeof(DeintV4L2M2MContext),
17485+    .init           = &scale_v4l2m2m_init,
17486+    .uninit         = &deint_v4l2m2m_uninit,
17487+    FILTER_INPUTS(deint_v4l2m2m_inputs),
17488+    FILTER_OUTPUTS(deint_v4l2m2m_outputs),
17489+    FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME),
17490+    .priv_class     = &scale_v4l2m2m_class,
17491+    .activate       = deint_v4l2m2m_activate,
17492+};
17493+
17494--- /dev/null
17495+++ b/libavfilter/vf_unsand.c
17496@@ -0,0 +1,228 @@
17497+/*
17498+ * Copyright (c) 2007 Bobby Bingham
17499+ *
17500+ * This file is part of FFmpeg.
17501+ *
17502+ * FFmpeg is free software; you can redistribute it and/or
17503+ * modify it under the terms of the GNU Lesser General Public
17504+ * License as published by the Free Software Foundation; either
17505+ * version 2.1 of the License, or (at your option) any later version.
17506+ *
17507+ * FFmpeg is distributed in the hope that it will be useful,
17508+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17509+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17510+ * Lesser General Public License for more details.
17511+ *
17512+ * You should have received a copy of the GNU Lesser General Public
17513+ * License along with FFmpeg; if not, write to the Free Software
17514+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17515+ */
17516+
17517+/**
17518+ * @file
17519+ * format and noformat video filters
17520+ */
17521+
17522+#include <string.h>
17523+
17524+#include "libavutil/internal.h"
17525+#include "libavutil/mem.h"
17526+#include "libavutil/pixdesc.h"
17527+#include "libavutil/opt.h"
17528+#include "libavutil/rpi_sand_fns.h"
17529+
17530+#include "avfilter.h"
17531+#include "formats.h"
17532+#include "internal.h"
17533+#include "video.h"
17534+
17535+typedef struct UnsandContext {
17536+    const AVClass *class;
17537+} UnsandContext;
17538+
17539+static av_cold void uninit(AVFilterContext *ctx)
17540+{
17541+//    UnsandContext *s = ctx->priv;
17542+}
17543+
17544+static av_cold int init(AVFilterContext *ctx)
17545+{
17546+//    UnsandContext *s = ctx->priv;
17547+
17548+    return 0;
17549+}
17550+
17551+
17552+static int filter_frame(AVFilterLink *link, AVFrame *in)
17553+{
17554+    AVFilterLink * const outlink = link->dst->outputs[0];
17555+    AVFrame *out = NULL;
17556+    int rv = 0;
17557+
17558+    if (outlink->format == in->format) {
17559+        // If nothing to do then do nothing
17560+        out = in;
17561+    }
17562+    else
17563+    {
17564+        if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL)
17565+        {
17566+            rv = AVERROR(ENOMEM);
17567+            goto fail;
17568+        }
17569+        if (av_rpi_sand_to_planar_frame(out, in) != 0)
17570+        {
17571+            rv = -1;
17572+            goto fail;
17573+        }
17574+
17575+        av_frame_free(&in);
17576+    }
17577+
17578+    return ff_filter_frame(outlink, out);
17579+
17580+fail:
17581+    av_frame_free(&out);
17582+    av_frame_free(&in);
17583+    return rv;
17584+}
17585+
17586+#if 0
17587+static void dump_fmts(const AVFilterFormats * fmts)
17588+{
17589+    int i;
17590+    if (fmts== NULL) {
17591+        printf("NULL\n");
17592+        return;
17593+    }
17594+    for (i = 0; i < fmts->nb_formats; ++i) {
17595+        printf(" %d", fmts->formats[i]);
17596+    }
17597+    printf("\n");
17598+}
17599+#endif
17600+
17601+static int query_formats(AVFilterContext *ctx)
17602+{
17603+//    UnsandContext *s = ctx->priv;
17604+    int ret;
17605+
17606+    // If we aren't connected at both ends then just do nothing
17607+    if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL)
17608+        return 0;
17609+
17610+    // Our output formats depend on our input formats and we can't/don't
17611+    // want to convert between bit depths so we need to wait for the source
17612+    // to have an opinion before we do
17613+    if (ctx->inputs[0]->incfg.formats == NULL)
17614+        return AVERROR(EAGAIN);
17615+
17616+    // Accept anything
17617+    if (ctx->inputs[0]->outcfg.formats == NULL &&
17618+        (ret = ff_formats_ref(ctx->inputs[0]->incfg.formats, &ctx->inputs[0]->outcfg.formats)) < 0)
17619+        return ret;
17620+
17621+    // Filter out sand formats
17622+
17623+    // Generate a container if we don't already have one
17624+    if (ctx->outputs[0]->incfg.formats == NULL)
17625+    {
17626+        // Somewhat rubbish way of ensuring we have a good structure
17627+        const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE};
17628+        AVFilterFormats *formats = ff_make_format_list(out_fmts);
17629+
17630+        if (formats == NULL)
17631+            return AVERROR(ENOMEM);
17632+        if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats)) < 0)
17633+            return ret;
17634+    }
17635+
17636+    // Replace old format list with new filtered list derived from what our
17637+    // input says it can do
17638+    {
17639+        const AVFilterFormats * const src_ff = ctx->inputs[0]->outcfg.formats;
17640+        AVFilterFormats * const dst_ff = ctx->outputs[0]->incfg.formats;
17641+        enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats);
17642+        int i;
17643+        int n = 0;
17644+        int seen_420p = 0;
17645+        int seen_420p10 = 0;
17646+
17647+        for (i = 0; i < src_ff->nb_formats; ++i) {
17648+            const enum AVPixelFormat f = src_ff->formats[i];
17649+
17650+            switch (f){
17651+                case AV_PIX_FMT_YUV420P:
17652+                case AV_PIX_FMT_SAND128:
17653+                case AV_PIX_FMT_RPI4_8:
17654+                    if (!seen_420p) {
17655+                        seen_420p = 1;
17656+                        dst_fmts[n++] = AV_PIX_FMT_YUV420P;
17657+                    }
17658+                    break;
17659+                case AV_PIX_FMT_SAND64_10:
17660+                case AV_PIX_FMT_YUV420P10:
17661+                case AV_PIX_FMT_RPI4_10:
17662+                    if (!seen_420p10) {
17663+                        seen_420p10 = 1;
17664+                        dst_fmts[n++] = AV_PIX_FMT_YUV420P10;
17665+                    }
17666+                    break;
17667+                default:
17668+                    dst_fmts[n++] = f;
17669+                    break;
17670+            }
17671+        }
17672+
17673+        av_freep(&dst_ff->formats);
17674+        dst_ff->formats = dst_fmts;
17675+        dst_ff->nb_formats = n;
17676+    }
17677+
17678+//    printf("Unsand: %s calc: ", __func__);
17679+//    dump_fmts(ctx->outputs[0]->incfg.formats);
17680+
17681+    return 0;
17682+}
17683+
17684+
17685+#define OFFSET(x) offsetof(UnsandContext, x)
17686+static const AVOption unsand_options[] = {
17687+    { NULL }
17688+};
17689+
17690+
17691+AVFILTER_DEFINE_CLASS(unsand);
17692+
17693+static const AVFilterPad avfilter_vf_unsand_inputs[] = {
17694+    {
17695+        .name             = "default",
17696+        .type             = AVMEDIA_TYPE_VIDEO,
17697+        .filter_frame = filter_frame,
17698+    },
17699+    { NULL }
17700+};
17701+
17702+static const AVFilterPad avfilter_vf_unsand_outputs[] = {
17703+    {
17704+        .name = "default",
17705+        .type = AVMEDIA_TYPE_VIDEO
17706+    },
17707+};
17708+
17709+AVFilter ff_vf_unsand = {
17710+    .name          = "unsand",
17711+    .description   = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"),
17712+
17713+    .init          = init,
17714+    .uninit        = uninit,
17715+
17716+    FILTER_QUERY_FUNC(query_formats),
17717+
17718+    .priv_size     = sizeof(UnsandContext),
17719+    .priv_class    = &unsand_class,
17720+
17721+    FILTER_INPUTS(avfilter_vf_unsand_inputs),
17722+    FILTER_OUTPUTS(avfilter_vf_unsand_outputs),
17723+};
17724+
17725--- a/libavfilter/x86/vf_bwdif_init.c
17726+++ b/libavfilter/x86/vf_bwdif_init.c
17727@@ -42,11 +42,9 @@ void ff_bwdif_filter_line_12bit_ssse3(vo
17728                                       int mrefs2, int prefs3, int mrefs3, int prefs4,
17729                                       int mrefs4, int parity, int clip_max);
17730
17731-av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif)
17732+av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
17733 {
17734-    YADIFContext *yadif = &bwdif->yadif;
17735     int cpu_flags = av_get_cpu_flags();
17736-    int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth;
17737
17738     if (bit_depth <= 8) {
17739         if (EXTERNAL_SSE2(cpu_flags))
17740--- a/libavformat/matroskaenc.c
17741+++ b/libavformat/matroskaenc.c
17742@@ -75,6 +75,10 @@
17743
17744 #define IS_WEBM(mkv) (CONFIG_WEBM_MUXER && CONFIG_MATROSKA_MUXER ? \
17745                       ((mkv)->mode == MODE_WEBM) : CONFIG_WEBM_MUXER)
17746+
17747+/* Reserved size for H264 headers if not extant at init time */
17748+#define MAX_H264_HEADER_SIZE 1024
17749+
17750 #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \
17751                               !(mkv)->is_live)
17752
17753@@ -1119,8 +1123,12 @@ static int mkv_assemble_native_codecpriv
17754     case AV_CODEC_ID_WAVPACK:
17755         return put_wv_codecpriv(dyn_cp, extradata, extradata_size);
17756     case AV_CODEC_ID_H264:
17757-        return ff_isom_write_avcc(dyn_cp, extradata,
17758-                                  extradata_size);
17759+        if (extradata_size)
17760+            return ff_isom_write_avcc(dyn_cp, extradata,
17761+                                      extradata_size);
17762+        else
17763+            *size_to_reserve = MAX_H264_HEADER_SIZE;
17764+        break;
17765     case AV_CODEC_ID_HEVC:
17766         return ff_isom_write_hvcc(dyn_cp, extradata,
17767                                   extradata_size, 0);
17768@@ -2726,8 +2734,8 @@ static int mkv_check_new_extra_data(AVFo
17769         }
17770         break;
17771 #endif
17772-    // FIXME: Remove the following once libaom starts propagating proper extradata during init()
17773-    //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2208
17774+    // FIXME: Remove the following once libaom starts propagating extradata during init()
17775+    //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012
17776     case AV_CODEC_ID_AV1:
17777         if (side_data_size && mkv->track.bc && !par->extradata_size) {
17778             // If the reserved space doesn't suffice, only write
17779@@ -2739,6 +2747,16 @@ static int mkv_check_new_extra_data(AVFo
17780         } else if (!par->extradata_size)
17781             return AVERROR_INVALIDDATA;
17782         break;
17783+    // H264 V4L2 has a similar issue
17784+    case AV_CODEC_ID_H264:
17785+        if (side_data_size && mkv->track.bc && !par->extradata_size) {
17786+            ret = mkv_update_codecprivate(s, mkv, side_data, side_data_size,
17787+                                          par, mkv->track.bc, track, 0);
17788+            if (ret < 0)
17789+                return ret;
17790+        } else if (!par->extradata_size)
17791+            return AVERROR_INVALIDDATA;
17792+        break;
17793     default:
17794         if (side_data_size)
17795             av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index);
17796@@ -3171,9 +3189,15 @@ static int mkv_init(struct AVFormatConte
17797             track->reformat = mkv_reformat_wavpack;
17798             break;
17799         case AV_CODEC_ID_H264:
17800+            // Default to reformat if no extradata as the only current
17801+            // encoder which does this is v4l2m2m which needs reformat
17802+            if (par->extradata_size == 0 ||
17803+                (par->extradata_size > 3 &&
17804+                 (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1)))
17805+                track->reformat = mkv_reformat_h2645;
17806+            break;
17807         case AV_CODEC_ID_HEVC:
17808-            if ((par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 0 ||
17809-                 par->codec_id == AV_CODEC_ID_HEVC && par->extradata_size > 6) &&
17810+            if (par->extradata_size > 6 &&
17811                 (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1))
17812                 track->reformat = mkv_reformat_h2645;
17813             break;
17814--- a/libavformat/movenc.c
17815+++ b/libavformat/movenc.c
17816@@ -6318,6 +6318,7 @@ static int mov_write_single_packet(AVFor
17817     if (trk->par->codec_id == AV_CODEC_ID_MP4ALS ||
17818             trk->par->codec_id == AV_CODEC_ID_AAC ||
17819             trk->par->codec_id == AV_CODEC_ID_AV1 ||
17820+            trk->par->codec_id == AV_CODEC_ID_H264 ||
17821             trk->par->codec_id == AV_CODEC_ID_FLAC) {
17822         size_t side_size;
17823         uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
17824--- a/libavformat/rtpenc.c
17825+++ b/libavformat/rtpenc.c
17826@@ -19,6 +19,7 @@
17827  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17828  */
17829
17830+#include "avc.h"
17831 #include "avformat.h"
17832 #include "mpegts.h"
17833 #include "internal.h"
17834@@ -584,8 +585,25 @@ static int rtp_write_packet(AVFormatCont
17835         ff_rtp_send_vc2hq(s1, pkt->data, size, st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0);
17836         break;
17837     case AV_CODEC_ID_H264:
17838+    {
17839+        uint8_t *side_data;
17840+        size_t side_data_size = 0;
17841+
17842+        side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
17843+                                            &side_data_size);
17844+
17845+        if (side_data_size != 0) {
17846+            int ps_size = side_data_size;
17847+            uint8_t * ps_buf = NULL;
17848+
17849+            ff_avc_write_annexb_extradata(side_data, &ps_buf, &ps_size);
17850+            av_log(s1, AV_LOG_TRACE, "H264: write side data=%d\n", ps_size);
17851+            ff_rtp_send_h264_hevc(s1, ps_buf ? ps_buf : side_data, ps_size);
17852+            av_free(ps_buf);
17853+        }
17854         ff_rtp_send_h264_hevc(s1, pkt->data, size);
17855         break;
17856+    }
17857     case AV_CODEC_ID_H261:
17858         ff_rtp_send_h261(s1, pkt->data, size);
17859         break;
17860--- a/libavutil/Makefile
17861+++ b/libavutil/Makefile
17862@@ -72,6 +72,7 @@ HEADERS = adler32.h
17863           rational.h                                                    \
17864           replaygain.h                                                  \
17865           ripemd.h                                                      \
17866+	  rpi_sand_fns.h                                                \
17867           samplefmt.h                                                   \
17868           sha.h                                                         \
17869           sha512.h                                                      \
17870@@ -191,6 +192,7 @@ OBJS-$(CONFIG_MACOS_KPERF)
17871 OBJS-$(CONFIG_MEDIACODEC)               += hwcontext_mediacodec.o
17872 OBJS-$(CONFIG_OPENCL)                   += hwcontext_opencl.o
17873 OBJS-$(CONFIG_QSV)                      += hwcontext_qsv.o
17874+OBJS-$(CONFIG_SAND)                     += rpi_sand_fns.o
17875 OBJS-$(CONFIG_VAAPI)                    += hwcontext_vaapi.o
17876 OBJS-$(CONFIG_VIDEOTOOLBOX)             += hwcontext_videotoolbox.o
17877 OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o
17878@@ -211,6 +213,7 @@ SKIPHEADERS-$(CONFIG_D3D11VA)          +
17879 SKIPHEADERS-$(CONFIG_DXVA2)            += hwcontext_dxva2.h
17880 SKIPHEADERS-$(CONFIG_QSV)              += hwcontext_qsv.h
17881 SKIPHEADERS-$(CONFIG_OPENCL)           += hwcontext_opencl.h
17882+SKIPHEADERS-$(CONFIG-RPI)              += rpi_sand_fn_pw.h
17883 SKIPHEADERS-$(CONFIG_VAAPI)            += hwcontext_vaapi.h
17884 SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += hwcontext_videotoolbox.h
17885 SKIPHEADERS-$(CONFIG_VDPAU)            += hwcontext_vdpau.h
17886--- a/libavutil/aarch64/Makefile
17887+++ b/libavutil/aarch64/Makefile
17888@@ -1,4 +1,6 @@
17889 OBJS += aarch64/cpu.o                                                 \
17890         aarch64/float_dsp_init.o                                      \
17891
17892-NEON-OBJS += aarch64/float_dsp_neon.o
17893+NEON-OBJS += aarch64/float_dsp_neon.o                                 \
17894+             aarch64/rpi_sand_neon.o                                  \
17895+
17896--- /dev/null
17897+++ b/libavutil/aarch64/rpi_sand_neon.S
17898@@ -0,0 +1,672 @@
17899+/*
17900+Copyright (c) 2021 Michael Eiler
17901+
17902+Redistribution and use in source and binary forms, with or without
17903+modification, are permitted provided that the following conditions are met:
17904+    * Redistributions of source code must retain the above copyright
17905+      notice, this list of conditions and the following disclaimer.
17906+    * Redistributions in binary form must reproduce the above copyright
17907+      notice, this list of conditions and the following disclaimer in the
17908+      documentation and/or other materials provided with the distribution.
17909+    * Neither the name of the copyright holder nor the
17910+      names of its contributors may be used to endorse or promote products
17911+      derived from this software without specific prior written permission.
17912+
17913+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17914+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17915+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17916+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
17917+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17918+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
17919+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
17920+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
17921+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
17922+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17923+
17924+Authors: Michael Eiler <eiler.mike@gmail.com>
17925+*/
17926+
17927+#include "asm.S"
17928+
17929+// void ff_rpi_sand8_lines_to_planar_y8(
17930+//   uint8_t * dest,            : x0
17931+//   unsigned int dst_stride,   : w1
17932+//   const uint8_t * src,       : x2
17933+//   unsigned int src_stride1,  : w3, always 128
17934+//   unsigned int src_stride2,  : w4
17935+//   unsigned int _x,           : w5
17936+//   unsigned int y,            : w6
17937+//   unsigned int _w,           : w7
17938+//   unsigned int h);           : [sp, #0]
17939+
17940+function ff_rpi_sand8_lines_to_planar_y8, export=1
17941+    // w15 contains the number of rows we need to process
17942+    ldr w15, [sp, #0]
17943+
17944+    // w8 will contain the number of blocks per row
17945+    // w8 = floor(_w/stride1)
17946+    // stride1 is assumed to always be 128
17947+    mov w8, w1
17948+    lsr w8, w8, #7
17949+
17950+    // in case the width of the image is not a multiple of 128, there will
17951+    // be an incomplete block at the end of every row
17952+    // w9 contains the number of pixels stored within this block
17953+    // w9 = _w - w8 * 128
17954+    lsl w9, w8, #7
17955+    sub w9, w7, w9
17956+
17957+    // this is the value we have to add to the src pointer after reading a complete block
17958+    // it will move the address to the start of the next block
17959+    // w10 = stride2 * stride1 - stride1
17960+    mov w10, w4
17961+    lsl w10, w10, #7
17962+    sub w10, w10, #128
17963+
17964+    // w11 is the row offset, meaning the start offset of the first block of every collumn
17965+    // this will be increased with stride1 within every iteration of the row_loop
17966+    eor w11, w11, w11
17967+
17968+    // w12 = 0, processed row count
17969+    eor w12, w12, w12
17970+row_loop:
17971+    // start of the first block within the current row
17972+    // x13 = row offset + src
17973+    mov x13, x2
17974+    add x13, x13, x11
17975+
17976+    // w14 = 0, processed block count
17977+    eor w14, w14, w14
17978+
17979+    cmp w8, #0
17980+    beq no_main_y8
17981+
17982+block_loop:
17983+    // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128
17984+    // fortunately these aren't callee saved ones, meaning we don't need to backup them
17985+    ld1 { v0.16b,  v1.16b,  v2.16b,  v3.16b}, [x13], #64
17986+    ld1 { v4.16b,  v5.16b,  v6.16b,  v7.16b}, [x13], #64
17987+
17988+    // write these registers back to the destination vector and increase the dst address by 128
17989+    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
17990+    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x0], #64
17991+
17992+    // move the source register to the beginning of the next block (x13 = src + block offset)
17993+    add x13, x13, x10
17994+    // increase the block counter
17995+    add w14, w14, #1
17996+
17997+    // continue with the block_loop if we haven't copied all full blocks yet
17998+    cmp w8, w14
17999+    bgt block_loop
18000+
18001+    // handle the last block at the end of each row
18002+    // at most 127 byte values copied from src to dst
18003+no_main_y8:
18004+    eor w5, w5, w5 // i = 0
18005+incomplete_block_loop_y8:
18006+    cmp w5, w9
18007+    bge incomplete_block_loop_end_y8
18008+
18009+    ldrb w6, [x13]
18010+    strb w6, [x0]
18011+    add x13, x13, #1
18012+    add x0, x0, #1
18013+
18014+    add w5, w5, #1
18015+    b incomplete_block_loop_y8
18016+incomplete_block_loop_end_y8:
18017+
18018+
18019+    // increase the row offset by 128 (stride1)
18020+    add w11, w11, #128
18021+    // increment the row counter
18022+    add w12, w12, #1
18023+
18024+    // process the next row if we haven't finished yet
18025+    cmp w15, w12
18026+    bgt row_loop
18027+
18028+    ret
18029+endfunc
18030+
18031+
18032+
18033+// void ff_rpi_sand8_lines_to_planar_c8(
18034+//   uint8_t * dst_u,           : x0
18035+//   unsigned int dst_stride_u, : w1 == width
18036+//   uint8_t * dst_v,           : x2
18037+//   unsigned int dst_stride_v, : w3 == width
18038+//   const uint8_t * src,       : x4
18039+//   unsigned int stride1,      : w5 == 128
18040+//   unsigned int stride2,      : w6
18041+//   unsigned int _x,           : w7
18042+//   unsigned int y,            : [sp, #0]
18043+//   unsigned int _w,           : [sp, #8]
18044+//   unsigned int h);           : [sp, #16]
18045+
18046+function ff_rpi_sand8_lines_to_planar_c8, export=1
18047+    // w7 = width
18048+    ldr w7, [sp, #8]
18049+
18050+    // w15 contains the number of rows we need to process
18051+    // counts down
18052+    ldr w15, [sp, #16]
18053+
18054+    // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6
18055+    mov w8, w7
18056+    lsr w8, w8, #6
18057+
18058+    // number of pixels in block at the end of every row
18059+    // w9 = _w - (w8 * 64)
18060+    lsl w9, w8, #6
18061+    sub w9, w7, w9
18062+
18063+    // Skip at the end of the line to account for stride
18064+    sub w12, w1, w7
18065+
18066+    // address delta to the beginning of the next block
18067+    // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128
18068+    lsl w10, w6, #7
18069+    sub w10, w10, #128
18070+
18071+    // w11 = row address start offset = 0
18072+    eor w11, w11, w11
18073+
18074+row_loop_c8:
18075+    // start of the first block within the current row
18076+    // x13 = row offset + src
18077+    mov x13, x4
18078+    add x13, x13, x11
18079+
18080+    // w14 = 0, processed block count
18081+    eor w14, w14, w14
18082+
18083+    cmp w8, #0
18084+    beq no_main_c8
18085+
18086+block_loop_c8:
18087+    // load the full block -> 128 bytes, the block contains 64 interleaved U and V values
18088+    ld2 { v0.16b,  v1.16b }, [x13], #32
18089+    ld2 { v2.16b,  v3.16b }, [x13], #32
18090+    ld2 { v4.16b,  v5.16b }, [x13], #32
18091+    ld2 { v6.16b,  v7.16b }, [x13], #32
18092+
18093+    // swap register so that we can write them out with a single instruction
18094+    mov v16.16b, v1.16b
18095+    mov v17.16b, v3.16b
18096+    mov v18.16b, v5.16b
18097+    mov v1.16b, v2.16b
18098+    mov v2.16b, v4.16b
18099+    mov v3.16b, v6.16b
18100+    mov v4.16b, v16.16b
18101+    mov v5.16b, v17.16b
18102+    mov v6.16b, v18.16b
18103+
18104+    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
18105+    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x2], #64
18106+
18107+    // increment row counter and move src to the beginning of the next block
18108+    add w14, w14, #1
18109+    add x13, x13, x10
18110+
18111+    // jump to block_loop_c8 iff the block count is smaller than the number of full blocks
18112+    cmp w8, w14
18113+    bgt block_loop_c8
18114+
18115+no_main_c8:
18116+    // handle incomplete block at the end of every row
18117+    eor w5, w5, w5 // point counter, this might be
18118+incomplete_block_loop_c8:
18119+    cmp w5, w9
18120+    bge incomplete_block_loop_end_c8
18121+
18122+    ldrb w1, [x13]
18123+    strb w1, [x0]
18124+    add x13, x13, #1
18125+
18126+    ldrb w1, [x13]
18127+    strb w1, [x2]
18128+    add x13, x13, #1
18129+
18130+    add x0, x0, #1
18131+    add x2, x2, #1
18132+
18133+    add w5, w5, #1
18134+    b incomplete_block_loop_c8
18135+incomplete_block_loop_end_c8:
18136+
18137+    // increase row_offset by stride1
18138+    add w11, w11, #128
18139+    add x0, x0, w12, sxtw
18140+    add x2, x2, w12, sxtw
18141+
18142+    // jump to row_Loop_c8 iff the row count is small than the height
18143+    subs w15, w15, #1
18144+    bgt row_loop_c8
18145+
18146+    ret
18147+endfunc
18148+
18149+// Unzip chroma
18150+//
18151+// On entry:
18152+// a0 = V0, U2,  ...
18153+// a1 = U0, V1,  ...
18154+// a2 = U1, V2,  ...
18155+// b0 = V8, U10, ...
18156+// b1 = U8, V9,  ...
18157+// b2 = U9, V10, ...
18158+//
18159+// On exit:
18160+// d0 = U0, U3, ...
18161+// ...
18162+// a0 = V0, V3, ..
18163+// ...
18164+//
18165+// Reg order for USAND is a1, a0, a2 (i.e. swap natural order of 1st 2 dest regs)
18166+
18167+.macro UZPH_C d0, d1, d2, a0, a1, a2, b0, b1, b2
18168+                uzp1            \d0\().8h, \a1\().8h, \b1\().8h
18169+                uzp1            \d1\().8h, \a2\().8h, \b2\().8h
18170+                uzp2            \d2\().8h, \a0\().8h, \b0\().8h
18171+
18172+                uzp1            \a0\().8h, \a0\().8h, \b0\().8h
18173+                uzp2            \a1\().8h, \a1\().8h, \b1\().8h
18174+                uzp2            \a2\().8h, \a2\().8h, \b2\().8h
18175+.endm
18176+
18177+// SAND30 -> 10bit
18178+.macro USAND10 d0, d1, d2, a0, a1
18179+                shrn            \d2\().4h, \a0\().4s, #14
18180+                shrn            \d1\().4h, \a0\().4s, #10
18181+
18182+                shrn2           \d2\().8h, \a1\().4s, #14
18183+                shrn2           \d1\().8h, \a1\().4s, #10
18184+                uzp1            \d0\().8h, \a0\().8h, \a1\().8h
18185+
18186+                ushr            \d2\().8h, \d2\().8h, #6
18187+                bic             \d0\().8h, #0xfc,     lsl #8
18188+                bic             \d1\().8h, #0xfc,     lsl #8
18189+.endm
18190+
18191+// SAND30 -> 8bit
18192+.macro USAND8 d0, d1, d2, a0, a1, a2, a3, t0, t1, t2
18193+                shrn            \d1\().4h,  \a0\().4s,  #12
18194+                shrn2           \d1\().8h,  \a1\().4s,  #12
18195+                uzp1            \d0\().8h,  \a0\().8h,  \a1\().8h
18196+                uzp2            \d2\().8h,  \a0\().8h,  \a1\().8h
18197+
18198+                shrn            \t1\().4h,  \a2\().4s,  #12
18199+                shrn2           \t1\().8h,  \a3\().4s,  #12
18200+                uzp1            \t0\().8h,  \a2\().8h,  \a3\().8h
18201+                uzp2            \t2\().8h,  \a2\().8h,  \a3\().8h
18202+
18203+                shrn            \d0\().8b,  \d0\().8h,  #2
18204+                shrn2           \d0\().16b, \t0\().8h,  #2
18205+                shrn            \d2\().8b,  \d2\().8h,  #6
18206+                shrn2           \d2\().16b, \t2\().8h,  #6
18207+                uzp1            \d1\().16b, \d1\().16b, \t1\().16b
18208+.endm
18209+
18210+
18211+// void ff_rpi_sand30_lines_to_planar_c16(
18212+//   uint8_t * dst_u,            // [x0]
18213+//   unsigned int dst_stride_u,  // [w1]
18214+//   uint8_t * dst_v,            // [x2]
18215+//   unsigned int dst_stride_v,  // [w3]
18216+//   const uint8_t * src,        // [x4]
18217+//   unsigned int stride1,       // [w5]      128
18218+//   unsigned int stride2,       // [w6]
18219+//   unsigned int _x,            // [w7]      0
18220+//   unsigned int y,             // [sp, #0]
18221+//   unsigned int _w,            // [sp, #8]  w9
18222+//   unsigned int h);            // [sp, #16] w10
18223+
18224+function ff_rpi_sand30_lines_to_planar_c16, export=1
18225+                ldr             w7,  [sp, #0]                   // y
18226+                ldr             w8,  [sp, #8]                   // _w
18227+                ldr             w10, [sp, #16]                  // h
18228+                lsl             w6,  w6,  #7                    // Fixup stride2
18229+                sub             w6,  w6,  #64
18230+                uxtw            x6,  w6
18231+                sub             w1,  w1,  w8,  LSL #1           // Fixup chroma strides
18232+                sub             w3,  w3,  w8,  LSL #1
18233+                lsl             w7,  w7,  #7                    // Add y to src
18234+                add             x4,  x4,  w7,  UXTW
18235+10:
18236+                mov             w13, #0
18237+                mov             x5,  x4
18238+                mov             w9,  w8
18239+1:
18240+                ld1             {v0.4s-v3.4s}, [x5], #64
18241+                ld1             {v4.4s-v7.4s}, [x5], x6
18242+                subs            w9,  w9,  #48
18243+
18244+                USAND10         v17, v16, v18, v0, v1
18245+                USAND10         v20, v19, v21, v2, v3
18246+                UZPH_C          v0, v1, v2, v16, v17, v18, v19, v20, v21
18247+                USAND10         v23, v22, v24, v4, v5
18248+                USAND10         v26, v25, v27, v6, v7
18249+                UZPH_C          v4, v5, v6, v22, v23, v24, v25, v26, v27
18250+
18251+                blt             2f
18252+
18253+                st3             {v0.8h-v2.8h},   [x0], #48
18254+                st3             {v4.8h-v6.8h},   [x0], #48
18255+                st3             {v16.8h-v18.8h}, [x2], #48
18256+                st3             {v22.8h-v24.8h}, [x2], #48
18257+
18258+                bne             1b
18259+11:
18260+                subs            w10, w10, #1
18261+                add             x4,  x4,  #128
18262+                add             x0,  x0,  w1,  UXTW
18263+                add             x2,  x2,  w3,  UXTW
18264+                bne             10b
18265+99:
18266+                ret
18267+
18268+// Partial final write
18269+2:
18270+                cmp             w9,  #24-48
18271+                blt             1f
18272+                st3             {v0.8h  - v2.8h},  [x0], #48
18273+                st3             {v16.8h - v18.8h}, [x2], #48
18274+                beq             11b
18275+                mov             v0.16b,  v4.16b
18276+                mov             v1.16b,  v5.16b
18277+                sub             w9,  w9,  #24
18278+                mov             v2.16b,  v6.16b
18279+                mov             v16.16b, v22.16b
18280+                mov             v17.16b, v23.16b
18281+                mov             v18.16b, v24.16b
18282+1:
18283+                cmp             w9,  #12-48
18284+                blt             1f
18285+                st3             {v0.4h  - v2.4h},  [x0], #24
18286+                st3             {v16.4h - v18.4h}, [x2], #24
18287+                beq             11b
18288+                mov             v0.d[0],  v0.d[1]
18289+                sub             w9,  w9,  #12
18290+                mov             v1.d[0],  v1.d[1]
18291+                mov             v2.d[0],  v2.d[1]
18292+                mov             v16.d[0], v16.d[1]
18293+                mov             v17.d[0], v17.d[1]
18294+                mov             v18.d[0], v18.d[1]
18295+1:
18296+                cmp             w9,  #6-48
18297+                blt             1f
18298+                st3             {v0.h  - v2.h}[0],  [x0], #6
18299+                st3             {v0.h  - v2.h}[1],  [x0], #6
18300+                st3             {v16.h - v18.h}[0], [x2], #6
18301+                st3             {v16.h - v18.h}[1], [x2], #6
18302+                beq             11b
18303+                mov             v0.s[0],  v0.s[1]
18304+                sub             w9,  w9,  #6
18305+                mov             v1.s[0],  v1.s[1]
18306+                mov             v2.s[0],  v2.s[1]
18307+                mov             v16.s[0], v16.s[1]
18308+                mov             v17.s[0], v17.s[1]
18309+                mov             v18.s[0], v18.s[1]
18310+1:
18311+                cmp             w9,  #3-48
18312+                blt             1f
18313+                st3             {v0.h  - v2.h}[0],  [x0], #6
18314+                st3             {v16.h - v18.h}[0], [x2], #6
18315+                beq             11b
18316+                mov             v0.h[0],  v0.h[1]
18317+                sub             w9,  w9,  #3
18318+                mov             v1.h[0],  v1.h[1]
18319+                mov             v16.h[0], v16.h[1]
18320+                mov             v17.h[0], v17.h[1]
18321+1:
18322+                cmp             w9,  #2-48
18323+                blt             1f
18324+                st2             {v0.h  - v1.h}[0],  [x0], #4
18325+                st2             {v16.h - v17.h}[0], [x2], #4
18326+                b               11b
18327+1:
18328+                st1             {v0.h}[0],  [x0], #2
18329+                st1             {v16.h}[0], [x2], #2
18330+                b               11b
18331+endfunc
18332+
18333+
18334+//void ff_rpi_sand30_lines_to_planar_p010(
18335+//  uint8_t * dest,
18336+//  unsigned int dst_stride,
18337+//  const uint8_t * src,
18338+//  unsigned int src_stride1,
18339+//  unsigned int src_stride2,
18340+//  unsigned int _x,
18341+//  unsigned int y,
18342+//  unsigned int _w,
18343+//  unsigned int h);
18344+
18345+// void ff_rpi_sand30_lines_to_planar_y8(
18346+//   uint8_t * dest,            : x0
18347+//   unsigned int dst_stride,   : w1
18348+//   const uint8_t * src,       : x2
18349+//   unsigned int src_stride1,  : w3, always 128
18350+//   unsigned int src_stride2,  : w4
18351+//   unsigned int _x,           : w5
18352+//   unsigned int y,            : w6
18353+//   unsigned int _w,           : w7
18354+//   unsigned int h);           : [sp, #0]
18355+//
18356+// Assumes that we are starting on a stripe boundary and that overreading
18357+// within the stripe is OK. However it does respect the dest size for wri
18358+
18359+function ff_rpi_sand30_lines_to_planar_y16, export=1
18360+                lsl             w4,  w4,  #7
18361+                sub             w4,  w4,  #64
18362+                uxtw            x4,  w4
18363+                sub             w1,  w1,  w7, lsl #1
18364+                uxtw            x6,  w6
18365+                add             x8,  x2,  x6, lsl #7
18366+                ldr             w6,  [sp, #0]
18367+
18368+10:
18369+                mov             x2,  x8
18370+                mov             w5,  w7
18371+1:
18372+                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
18373+                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
18374+
18375+                subs            w5,  w5,  #96
18376+
18377+                USAND10         v16, v17, v18, v0, v1
18378+                USAND10         v19, v20, v21, v2, v3
18379+                USAND10         v22, v23, v24, v4, v5
18380+                USAND10         v25, v26, v27, v6, v7
18381+
18382+                blt             2f
18383+
18384+                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
18385+                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
18386+                st3             {v22.8h, v23.8h, v24.8h}, [x0], #48
18387+                st3             {v25.8h, v26.8h, v27.8h}, [x0], #48
18388+
18389+                bne             1b
18390+
18391+11:
18392+                subs            w6,  w6,  #1
18393+                add             x0,  x0,  w1,  uxtw
18394+                add             x8,  x8,  #128
18395+                bne             10b
18396+
18397+                ret
18398+
18399+// Partial final write
18400+2:
18401+                cmp             w5,  #48-96
18402+                blt             1f
18403+                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
18404+                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
18405+                beq             11b
18406+                mov             v16.16b, v22.16b
18407+                mov             v17.16b, v23.16b
18408+                sub             w5,  w5,  #48
18409+                mov             v18.16b, v24.16b
18410+                mov             v19.16b, v25.16b
18411+                mov             v20.16b, v26.16b
18412+                mov             v21.16b, v27.16b
18413+1:
18414+                cmp             w5,  #24-96
18415+                blt             1f
18416+                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
18417+                beq             11b
18418+                mov             v16.16b, v19.16b
18419+                mov             v17.16b, v20.16b
18420+                sub             w5,  w5,  #24
18421+                mov             v18.16b, v21.16b
18422+1:
18423+                cmp             w5,  #12-96
18424+                blt             1f
18425+                st3             {v16.4h, v17.4h, v18.4h}, [x0], #24
18426+                beq             11b
18427+                mov             v16.d[0], v16.d[1]
18428+                sub             w5,  w5,  #12
18429+                mov             v17.d[0], v17.d[1]
18430+                mov             v18.d[0], v18.d[1]
18431+1:
18432+                cmp             w5,  #6-96
18433+                blt             1f
18434+                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
18435+                st3             {v16.h, v17.h, v18.h}[1], [x0], #6
18436+                beq             11b
18437+                mov             v16.s[0], v16.s[1]
18438+                sub             w5,  w5,  #6
18439+                mov             v17.s[0], v17.s[1]
18440+                mov             v18.s[0], v18.s[1]
18441+1:
18442+                cmp             w5,  #3-96
18443+                blt             1f
18444+                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
18445+                beq             11b
18446+                mov             v16.h[0], v16.h[1]
18447+                sub             w5,  w5,  #3
18448+                mov             v17.h[0], v17.h[1]
18449+1:
18450+                cmp             w5,  #2-96
18451+                blt             1f
18452+                st2             {v16.h, v17.h}[0], [x0], #4
18453+                b               11b
18454+1:
18455+                st1             {v16.h}[0], [x0], #2
18456+                b               11b
18457+
18458+endfunc
18459+
18460+// void ff_rpi_sand30_lines_to_planar_y8(
18461+//   uint8_t * dest,            : x0
18462+//   unsigned int dst_stride,   : w1
18463+//   const uint8_t * src,       : x2
18464+//   unsigned int src_stride1,  : w3, always 128
18465+//   unsigned int src_stride2,  : w4
18466+//   unsigned int _x,           : w5
18467+//   unsigned int y,            : w6
18468+//   unsigned int _w,           : w7
18469+//   unsigned int h);           : [sp, #0]
18470+//
18471+// Assumes that we are starting on a stripe boundary and that overreading
18472+// within the stripe is OK. However it does respect the dest size for wri
18473+
18474+function ff_rpi_sand30_lines_to_planar_y8, export=1
18475+                lsl             w4,  w4,  #7
18476+                sub             w4,  w4,  #64
18477+                uxtw            x4,  w4
18478+                sub             w1,  w1,  w7
18479+                uxtw            x6,  w6
18480+                add             x8,  x2,  x6, lsl #7
18481+                ldr             w6,  [sp, #0]
18482+
18483+10:
18484+                mov             x2,  x8
18485+                mov             w5,  w7
18486+1:
18487+                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
18488+                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
18489+
18490+                subs            w5,  w5,  #96
18491+
18492+                // v0, v1
18493+                USAND8          v16, v17, v18, v0, v1, v2, v3, v22, v23, v24
18494+                USAND8          v19, v20, v21, v4, v5, v6, v7, v22, v23, v24
18495+
18496+                blt             2f
18497+
18498+                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
18499+                st3             {v19.16b, v20.16b, v21.16b}, [x0], #48
18500+
18501+                bne             1b
18502+
18503+11:
18504+                subs            w6,  w6,  #1
18505+                add             x0,  x0,  w1,  uxtw
18506+                add             x8,  x8,  #128
18507+                bne             10b
18508+
18509+                ret
18510+
18511+// Partial final write
18512+2:
18513+                cmp             w5,  #48-96
18514+                blt             1f
18515+                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
18516+                beq             11b
18517+                mov             v16.16b, v22.16b
18518+                mov             v17.16b, v23.16b
18519+                sub             w5,  w5,  #48
18520+                mov             v18.16b, v24.16b
18521+1:
18522+                cmp             w5,  #24-96
18523+                blt             1f
18524+                st3             {v16.8b, v17.8b, v18.8b}, [x0], #24
18525+                beq             11b
18526+                mov             v16.d[0], v16.d[1]
18527+                sub             w5,  w5,  #24
18528+                mov             v17.d[0], v17.d[1]
18529+                mov             v18.d[0], v18.d[1]
18530+1:
18531+                cmp             w5,  #12-96
18532+                blt             1f
18533+                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
18534+                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
18535+                st3             {v16.b, v17.b, v18.b}[2], [x0], #3
18536+                st3             {v16.b, v17.b, v18.b}[3], [x0], #3
18537+                beq             11b
18538+                mov             v16.s[0], v16.s[1]
18539+                sub             w5,  w5,  #12
18540+                mov             v17.s[0], v17.s[1]
18541+                mov             v18.s[0], v18.s[1]
18542+1:
18543+                cmp             w5,  #6-96
18544+                blt             1f
18545+                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
18546+                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
18547+                beq             11b
18548+                mov             v16.h[0], v16.h[1]
18549+                sub             w5,  w5,  #6
18550+                mov             v17.h[0], v17.h[1]
18551+                mov             v18.h[0], v18.h[1]
18552+1:
18553+                cmp             w5,  #3-96
18554+                blt             1f
18555+                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
18556+                beq             11b
18557+                mov             v16.b[0], v16.b[1]
18558+                sub             w5,  w5,  #3
18559+                mov             v17.b[0], v17.b[1]
18560+1:
18561+                cmp             w5,  #2-96
18562+                blt             1f
18563+                st2             {v16.b, v17.b}[0], [x0], #2
18564+                b               11b
18565+1:
18566+                st1             {v16.b}[0], [x0], #1
18567+                b               11b
18568+
18569+endfunc
18570+
18571--- /dev/null
18572+++ b/libavutil/aarch64/rpi_sand_neon.h
18573@@ -0,0 +1,59 @@
18574+/*
18575+Copyright (c) 2021 Michael Eiler
18576+
18577+Redistribution and use in source and binary forms, with or without
18578+modification, are permitted provided that the following conditions are met:
18579+    * Redistributions of source code must retain the above copyright
18580+      notice, this list of conditions and the following disclaimer.
18581+    * Redistributions in binary form must reproduce the above copyright
18582+      notice, this list of conditions and the following disclaimer in the
18583+      documentation and/or other materials provided with the distribution.
18584+    * Neither the name of the copyright holder nor the
18585+      names of its contributors may be used to endorse or promote products
18586+      derived from this software without specific prior written permission.
18587+
18588+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18589+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18590+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18591+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
18592+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18593+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18594+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
18595+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
18596+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
18597+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
18598+
18599+Authors: Michael Eiler <eiler.mike@gmail.com>
18600+*/
18601+
18602+#pragma once
18603+
18604+#ifdef __cplusplus
18605+extern "C" {
18606+#endif
18607+
18608+void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
18609+  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
18610+  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
18611+
18612+void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u,
18613+  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src,
18614+  unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y,
18615+  unsigned int _w, unsigned int h);
18616+
18617+void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride,
18618+  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
18619+  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
18620+
18621+void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u,
18622+  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1,
18623+  unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
18624+
18625+void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
18626+  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
18627+  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
18628+
18629+#ifdef __cplusplus
18630+}
18631+#endif
18632+
18633--- a/libavutil/arm/Makefile
18634+++ b/libavutil/arm/Makefile
18635@@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o
18636
18637 NEON-OBJS += arm/float_dsp_init_neon.o                                  \
18638              arm/float_dsp_neon.o                                       \
18639+             arm/rpi_sand_neon.o                                        \
18640--- /dev/null
18641+++ b/libavutil/arm/rpi_sand_neon.S
18642@@ -0,0 +1,925 @@
18643+/*
18644+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
18645+All rights reserved.
18646+
18647+Redistribution and use in source and binary forms, with or without
18648+modification, are permitted provided that the following conditions are met:
18649+    * Redistributions of source code must retain the above copyright
18650+      notice, this list of conditions and the following disclaimer.
18651+    * Redistributions in binary form must reproduce the above copyright
18652+      notice, this list of conditions and the following disclaimer in the
18653+      documentation and/or other materials provided with the distribution.
18654+    * Neither the name of the copyright holder nor the
18655+      names of its contributors may be used to endorse or promote products
18656+      derived from this software without specific prior written permission.
18657+
18658+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18659+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18660+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18661+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
18662+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18663+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18664+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
18665+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
18666+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
18667+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
18668+
18669+Authors: John Cox
18670+*/
18671+
18672+#include "libavutil/arm/asm.S"
18673+
18674+
18675+@ General notes:
18676+@ Having done some timing on this in sand8->y8 (Pi4)
18677+@  vst1 (680fps) is a bit faster than vstm (660fps)
18678+@  vldm (680fps) is noticably faster than vld1 (480fps)
18679+@  (or it might be that a mix is what is required)
18680+@
18681+@ At least on a Pi4 it is no more expensive to have a single auto-inc register
18682+@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted
18683+@ the latter was better)
18684+@
18685+@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless
18686+@ the memory is uncached.
18687+@ As these are Sand -> planar we can assume that src is going to be aligned but
18688+@ it is possible that dest isn't (converting to .yuv or other packed format).
18689+@ Luckily vst1 is faster than vstm :-) so all is well
18690+@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4
18691+@ .8 stores would let us do non-word aligned stores into uncached but it
18692+@ probably isn't worth it.
18693+
18694+
18695+
18696+
18697+@ void ff_rpi_sand128b_stripe_to_8_10(
18698+@   uint8_t * dest,             // [r0]
18699+@   const uint8_t * src1,       // [r1]
18700+@   const uint8_t * src2,       // [r2]
18701+@   unsigned int lines);        // [r3]
18702+
18703+.macro  stripe2_to_8, bit_depth
18704+        vpush    {q4-q7}
18705+1:
18706+        vldm     r1!, {q0-q7}
18707+        subs     r3, #1
18708+        vldm     r2!, {q8-q15}
18709+        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
18710+        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
18711+        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
18712+        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
18713+        vqrshrn.u16 d4,  q4,  #\bit_depth - 8
18714+        vqrshrn.u16 d5,  q5,  #\bit_depth - 8
18715+        vqrshrn.u16 d6,  q6,  #\bit_depth - 8
18716+        vqrshrn.u16 d7,  q7,  #\bit_depth - 8
18717+        vqrshrn.u16 d8,  q8,  #\bit_depth - 8
18718+        vqrshrn.u16 d9,  q9,  #\bit_depth - 8
18719+        vqrshrn.u16 d10, q10, #\bit_depth - 8
18720+        vqrshrn.u16 d11, q11, #\bit_depth - 8
18721+        vqrshrn.u16 d12, q12, #\bit_depth - 8
18722+        vqrshrn.u16 d13, q13, #\bit_depth - 8
18723+        vqrshrn.u16 d14, q14, #\bit_depth - 8
18724+        vqrshrn.u16 d15, q15, #\bit_depth - 8
18725+        vstm     r0!, {q0-q7}
18726+        bne      1b
18727+        vpop     {q4-q7}
18728+        bx       lr
18729+.endm
18730+
18731+function ff_rpi_sand128b_stripe_to_8_10, export=1
18732+        stripe2_to_8     10
18733+endfunc
18734+
18735+@ void ff_rpi_sand8_lines_to_planar_y8(
18736+@   uint8_t * dest,             // [r0]
18737+@   unsigned int dst_stride,    // [r1]
18738+@   const uint8_t * src,        // [r2]
18739+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
18740+@   unsigned int src_stride2,   // [sp, #0]  -> r3
18741+@   unsigned int _x,            // [sp, #4]  Ignored - 0
18742+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
18743+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
18744+@   unsigned int h);            // [sp, #16] -> r7
18745+@
18746+@ Assumes that we are starting on a stripe boundary and that overreading
18747+@ within the stripe is OK. However it does respect the dest size for writing
18748+
18749+function ff_rpi_sand8_lines_to_planar_y8, export=1
18750+                push            {r4-r8, lr}     @ +24            L
18751+                ldr             r3,  [sp, #24]
18752+                ldr             r6,  [sp, #36]
18753+                ldr             r7,  [sp, #32]  @ y
18754+                lsl             r3,  #7
18755+                sub             r1,  r6
18756+                add             r8,  r2,  r7,  lsl #7
18757+                ldr             r7,  [sp, #40]
18758+
18759+10:
18760+                mov             r2,  r8
18761+                add             r4,  r0,  #24
18762+                mov             r5,  r6
18763+                mov             lr,  #0
18764+1:
18765+                vldm            r2,  {q8-q15}
18766+                add             r2,  r3
18767+                subs            r5,  #128
18768+                blt             2f
18769+                vst1.8          {d16, d17, d18, d19}, [r0]!
18770+                vst1.8          {d20, d21, d22, d23}, [r0]!
18771+                vst1.8          {d24, d25, d26, d27}, [r0]!
18772+                vst1.8          {d28, d29, d30, d31}, [r0]!
18773+                bne             1b
18774+11:
18775+                subs            r7,  #1
18776+                add             r0,  r1
18777+                add             r8,  #128
18778+                bne             10b
18779+
18780+                pop             {r4-r8, pc}
18781+
18782+@ Partial final write
18783+2:
18784+                cmp             r5,  #64-128
18785+                blt             1f
18786+                vst1.8          {d16, d17, d18, d19}, [r0]!
18787+                vst1.8          {d20, d21, d22, d23}, [r0]!
18788+                beq             11b
18789+                vmov            q8,  q12
18790+                vmov            q9,  q13
18791+                sub             r5,  #64
18792+                vmov            q10, q14
18793+                vmov            q11, q15
18794+1:
18795+                cmp             r5,  #32-128
18796+                blt             1f
18797+                vst1.8          {d16, d17, d18, d19}, [r0]!
18798+                beq             11b
18799+                vmov            q8,  q10
18800+                sub             r5,  #32
18801+                vmov            q9,  q11
18802+1:
18803+                cmp             r5,  #16-128
18804+                blt             1f
18805+                vst1.8          {d16, d17}, [r0]!
18806+                beq             11b
18807+                sub             r5,  #16
18808+                vmov            q8,  q9
18809+1:
18810+                cmp             r5,  #8-128
18811+                blt             1f
18812+                vst1.8          {d16}, [r0]!
18813+                beq             11b
18814+                sub             r5,  #8
18815+                vmov            d16, d17
18816+1:
18817+                cmp             r5,  #4-128
18818+                blt             1f
18819+                vst1.32         {d16[0]}, [r0]!
18820+                beq             11b
18821+                sub             r5,  #4
18822+                vshr.u64        d16, #32
18823+1:
18824+                cmp             r5,  #2-128
18825+                blt             1f
18826+                vst1.16         {d16[0]}, [r0]!
18827+                beq             11b
18828+                vst1.8          {d16[2]}, [r0]!
18829+                b               11b
18830+1:
18831+                vst1.8          {d16[0]}, [r0]!
18832+                b               11b
18833+endfunc
18834+
18835+@ void ff_rpi_sand8_lines_to_planar_c8(
18836+@   uint8_t * dst_u,            // [r0]
18837+@   unsigned int dst_stride_u,  // [r1]
18838+@   uint8_t * dst_v,            // [r2]
18839+@   unsigned int dst_stride_v,  // [r3]
18840+@   const uint8_t * src,        // [sp, #0]  -> r4, r5
18841+@   unsigned int stride1,       // [sp, #4]  128
18842+@   unsigned int stride2,       // [sp, #8]  -> r8
18843+@   unsigned int _x,            // [sp, #12] 0
18844+@   unsigned int y,             // [sp, #16] (r7 in prefix)
18845+@   unsigned int _w,            // [sp, #20] -> r12, r6
18846+@   unsigned int h);            // [sp, #24] -> r7
18847+@
18848+@ Assumes that we are starting on a stripe boundary and that overreading
18849+@ within the stripe is OK. However it does respect the dest size for writing
18850+
18851+function ff_rpi_sand8_lines_to_planar_c8, export=1
18852+                push            {r4-r8, lr}     @ +24
18853+
18854+                ldr             r5,  [sp, #24]
18855+                ldr             r8,  [sp, #32]
18856+                ldr             r7,  [sp, #40]
18857+                ldr             r6,  [sp, #44]
18858+                lsl             r8,  #7
18859+                add             r5,  r5,  r7,  lsl #7
18860+                sub             r1,  r1,  r6
18861+                sub             r3,  r3,  r6
18862+                ldr             r7,  [sp, #48]
18863+                vpush           {q4-q7}
18864+
18865+10:
18866+                mov             r4,  r5
18867+                mov             r12, r6
18868+1:
18869+                subs            r12, #64
18870+                vldm            r4,  {q0-q7}
18871+                add             r4,  r8
18872+                it              gt
18873+                vldmgt          r4,  {q8-q15}
18874+                add             r4,  r8
18875+
18876+                vuzp.8          q0,  q1
18877+                vuzp.8          q2,  q3
18878+                vuzp.8          q4,  q5
18879+                vuzp.8          q6,  q7
18880+
18881+                vuzp.8          q8,  q9
18882+                vuzp.8          q10, q11
18883+                vuzp.8          q12, q13
18884+                vuzp.8          q14, q15
18885+                subs            r12, #64
18886+
18887+                @ Rearrange regs so we can use vst1 with 4 regs
18888+                vswp            q1,  q2
18889+                vswp            q5,  q6
18890+                vswp            q9,  q10
18891+                vswp            q13, q14
18892+                blt             2f
18893+
18894+                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
18895+                vst1.8          {d8,  d9,  d10, d11}, [r0]!
18896+                vst1.8          {d16, d17, d18, d19}, [r0]!
18897+                vst1.8          {d24, d25, d26, d27}, [r0]!
18898+
18899+                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
18900+                vst1.8          {d12, d13, d14, d15}, [r2]!
18901+                vst1.8          {d20, d21, d22, d23}, [r2]!
18902+                vst1.8          {d28, d29, d30, d31}, [r2]!
18903+                bne             1b
18904+11:
18905+                subs            r7,  #1
18906+                add             r5,  #128
18907+                add             r0,  r1
18908+                add             r2,  r3
18909+                bne             10b
18910+                vpop            {q4-q7}
18911+                pop             {r4-r8,pc}
18912+
18913+2:
18914+                cmp             r12, #64-128
18915+                blt             1f
18916+                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
18917+                vst1.8          {d8,  d9,  d10, d11}, [r0]!
18918+                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
18919+                vst1.8          {d12, d13, d14, d15}, [r2]!
18920+                beq             11b
18921+                sub             r12, #64
18922+                vmov            q0,  q8
18923+                vmov            q1,  q9
18924+                vmov            q2,  q10
18925+                vmov            q3,  q11
18926+                vmov            q4,  q12
18927+                vmov            q5,  q13
18928+                vmov            q6,  q14
18929+                vmov            q7,  q15
18930+1:
18931+                cmp             r12, #32-128
18932+                blt             1f
18933+                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
18934+                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
18935+                beq             11b
18936+                sub             r12, #32
18937+                vmov            q0,  q4
18938+                vmov            q1,  q5
18939+                vmov            q2,  q6
18940+                vmov            q3,  q7
18941+1:
18942+                cmp             r12, #16-128
18943+                blt             1f
18944+                vst1.8          {d0,  d1 }, [r0]!
18945+                vst1.8          {d4,  d5 }, [r2]!
18946+                beq             11b
18947+                sub             r12, #16
18948+                vmov            q0,  q1
18949+                vmov            q2,  q3
18950+1:
18951+                cmp             r12, #8-128
18952+                blt             1f
18953+                vst1.8          {d0}, [r0]!
18954+                vst1.8          {d4}, [r2]!
18955+                beq             11b
18956+                sub             r12, #8
18957+                vmov            d0,  d1
18958+                vmov            d4,  d5
18959+1:
18960+                cmp             r12, #4-128
18961+                blt             1f
18962+                vst1.32         {d0[0]}, [r0]!
18963+                vst1.32         {d4[0]}, [r2]!
18964+                beq             11b
18965+                sub             r12, #4
18966+                vmov            s0,  s1
18967+                vmov            s8,  s9
18968+1:
18969+                cmp             r12, #2-128
18970+                blt             1f
18971+                vst1.16         {d0[0]}, [r0]!
18972+                vst1.16         {d4[0]}, [r2]!
18973+                beq             11b
18974+                vst1.8          {d0[2]}, [r0]!
18975+                vst1.8          {d4[2]}, [r2]!
18976+                b               11b
18977+1:
18978+                vst1.8          {d0[0]}, [r0]!
18979+                vst1.8          {d4[0]}, [r2]!
18980+                b               11b
18981+endfunc
18982+
18983+
18984+
18985+@ void ff_rpi_sand30_lines_to_planar_y16(
18986+@   uint8_t * dest,             // [r0]
18987+@   unsigned int dst_stride,    // [r1]
18988+@   const uint8_t * src,        // [r2]
18989+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
18990+@   unsigned int src_stride2,   // [sp, #0]  -> r3
18991+@   unsigned int _x,            // [sp, #4]  Ignored - 0
18992+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
18993+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
18994+@   unsigned int h);            // [sp, #16] -> r7
18995+@
18996+@ Assumes that we are starting on a stripe boundary and that overreading
18997+@ within the stripe is OK. However it does respect the dest size for writing
18998+
18999+function ff_rpi_sand30_lines_to_planar_y16, export=1
19000+                push            {r4-r8, lr}     @ +24
19001+                ldr             r3,  [sp, #24]
19002+                ldr             r6,  [sp, #36]
19003+                ldr             r7,  [sp, #32]  @ y
19004+                mov             r12, #48
19005+                sub             r3,  #1
19006+                lsl             r3,  #7
19007+                sub             r1,  r1,  r6,  lsl #1
19008+                add             r8,  r2,  r7,  lsl #7
19009+                ldr             r7,  [sp, #40]
19010+
19011+10:
19012+                mov             r2,  r8
19013+                add             r4,  r0,  #24
19014+                mov             r5,  r6
19015+                mov             lr,  #0
19016+1:
19017+                vldm            r2!, {q10-q13}
19018+                add             lr,  #64
19019+
19020+                vshrn.u32       d4 , q10, #14    @ Cannot vshrn.u32 #20!
19021+                ands            lr,  #127
19022+                vshrn.u32       d2,  q10, #10
19023+                vmovn.u32       d0,  q10
19024+
19025+                vshrn.u32       d5,  q11, #14
19026+                it              eq
19027+                addeq           r2,  r3
19028+                vshrn.u32       d3,  q11, #10
19029+                vmovn.u32       d1,  q11
19030+
19031+                subs            r5,  #48
19032+                vshr.u16        q2,  #6
19033+                vbic.u16        q0,  #0xfc00
19034+                vbic.u16        q1,  #0xfc00
19035+
19036+                vshrn.u32       d20, q12, #14
19037+                vshrn.u32       d18, q12, #10
19038+                vmovn.u32       d16, q12
19039+
19040+                vshrn.u32       d21, q13, #14
19041+                vshrn.u32       d19, q13, #10
19042+                vmovn.u32       d17, q13
19043+
19044+                vshr.u16        q10, #6
19045+                vbic.u16        q8,  #0xfc00
19046+                vbic.u16        q9 , #0xfc00
19047+                blt             2f
19048+
19049+                vst3.16         {d0,  d2,  d4},  [r0], r12
19050+                vst3.16         {d1,  d3,  d5},  [r4], r12
19051+                vst3.16         {d16, d18, d20}, [r0], r12
19052+                vst3.16         {d17, d19, d21}, [r4], r12
19053+
19054+                bne             1b
19055+
19056+11:
19057+                subs            r7,  #1
19058+                add             r0,  r1
19059+                add             r8,  #128
19060+                bne             10b
19061+
19062+                pop             {r4-r8, pc}
19063+
19064+@ Partial final write
19065+2:
19066+                cmp             r5,  #24-48
19067+                blt             1f
19068+                vst3.16         {d0,  d2,  d4},  [r0], r12
19069+                vst3.16         {d1,  d3,  d5},  [r4]
19070+                beq             11b
19071+                vmov            q0,  q8
19072+                sub             r5,  #24
19073+                vmov            q1,  q9
19074+                vmov            q2,  q10
19075+1:
19076+                cmp             r5,  #12-48
19077+                blt             1f
19078+                vst3.16         {d0,  d2,  d4},  [r0]!
19079+                beq             11b
19080+                vmov            d0, d1
19081+                sub             r5, #12
19082+                vmov            d2, d3
19083+                vmov            d4, d5
19084+1:
19085+                cmp             r5,  #6-48
19086+                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
19087+                blt             1f
19088+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
19089+                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
19090+                add             r0,  #12
19091+                beq             11b
19092+                vmov            s0,  s1
19093+                sub             r5,  #6
19094+                vmov            s4,  s5
19095+                vmov            s8,  s9
19096+1:
19097+                cmp             r5, #3-48
19098+                blt             1f
19099+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
19100+                beq             11b
19101+                sub             r5, #3
19102+                vshr.u32        d0, #16
19103+                vshr.u32        d2, #16
19104+1:
19105+                cmp             r5, #2-48
19106+                blt             1f
19107+                vst2.16         {d0[0], d2[0]}, [r0]!
19108+                b               11b
19109+1:
19110+                vst1.16         {d0[0]}, [r0]!
19111+                b               11b
19112+
19113+endfunc
19114+
19115+
19116+@ void ff_rpi_sand30_lines_to_planar_c16(
19117+@   uint8_t * dst_u,            // [r0]
19118+@   unsigned int dst_stride_u,  // [r1]
19119+@   uint8_t * dst_v,            // [r2]
19120+@   unsigned int dst_stride_v,  // [r3]
19121+@   const uint8_t * src,        // [sp, #0]  -> r4, r5
19122+@   unsigned int stride1,       // [sp, #4]  128
19123+@   unsigned int stride2,       // [sp, #8]  -> r8
19124+@   unsigned int _x,            // [sp, #12] 0
19125+@   unsigned int y,             // [sp, #16] (r7 in prefix)
19126+@   unsigned int _w,            // [sp, #20] -> r6, r9
19127+@   unsigned int h);            // [sp, #24] -> r7
19128+@
19129+@ Assumes that we are starting on a stripe boundary and that overreading
19130+@ within the stripe is OK. However it does respect the dest size for writing
19131+
19132+function ff_rpi_sand30_lines_to_planar_c16, export=1
19133+                push            {r4-r10, lr}    @ +32
19134+                ldr             r5,  [sp, #32]
19135+                ldr             r8,  [sp, #40]
19136+                ldr             r7,  [sp, #48]
19137+                ldr             r9,  [sp, #52]
19138+                mov             r12, #48
19139+                sub             r8,  #1
19140+                lsl             r8,  #7
19141+                add             r5,  r5,  r7,  lsl #7
19142+                sub             r1,  r1,  r9,  lsl #1
19143+                sub             r3,  r3,  r9,  lsl #1
19144+                ldr             r7,  [sp, #56]
19145+10:
19146+                mov             lr,  #0
19147+                mov             r4,  r5
19148+                mov             r6,  r9
19149+1:
19150+                vldm            r4!, {q0-q3}
19151+                add             lr,  #64
19152+
19153+                @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2
19154+                vshrn.u32       d20, q0,  #14
19155+                vmovn.u32       d18, q0
19156+                vshrn.u32       d0,  q0,  #10
19157+                ands            lr,  #127
19158+
19159+                vshrn.u32       d21, q1,  #14
19160+                vmovn.u32       d19, q1
19161+                vshrn.u32       d1,  q1,  #10
19162+
19163+                vshrn.u32       d22, q2,  #10
19164+                vmovn.u32       d2,  q2
19165+                vshrn.u32       d4,  q2,  #14
19166+
19167+                add             r10, r0,  #24
19168+                vshrn.u32       d23, q3,  #10
19169+                vmovn.u32       d3,  q3
19170+                vshrn.u32       d5,  q3,  #14
19171+
19172+                it              eq
19173+                addeq           r4,  r8
19174+                vuzp.16         q0,  q11
19175+                vuzp.16         q9,  q1
19176+                vuzp.16         q10, q2
19177+
19178+                @ q0   V0, V3,..
19179+                @ q9   U0, U3...
19180+                @ q10  U1, U4...
19181+                @ q11  U2, U5,..
19182+                @ q1   V1, V4,
19183+                @ q2   V2, V5,..
19184+
19185+                subs            r6,  #24
19186+                vbic.u16        q11, #0xfc00
19187+                vbic.u16        q9,  #0xfc00
19188+                vshr.u16        q10, #6
19189+                vshr.u16        q2,  #6
19190+                vbic.u16        q0,  #0xfc00
19191+                vbic.u16        q1,  #0xfc00
19192+
19193+                blt             2f
19194+
19195+                vst3.16         {d18, d20, d22}, [r0],  r12
19196+                vst3.16         {d19, d21, d23}, [r10]
19197+                add             r10, r2,  #24
19198+                vst3.16         {d0,  d2,  d4},  [r2],  r12
19199+                vst3.16         {d1,  d3,  d5},  [r10]
19200+
19201+                bne             1b
19202+
19203+11:
19204+                subs            r7,  #1
19205+                add             r5,  #128
19206+                add             r0,  r1
19207+                add             r2,  r3
19208+                bne             10b
19209+
19210+                pop             {r4-r10, pc}
19211+
19212+@ Partial final write
19213+2:
19214+                cmp             r6,  #-12
19215+                blt             1f
19216+                vst3.16         {d18, d20, d22}, [r0]!
19217+                vst3.16         {d0,  d2,  d4},  [r2]!
19218+                beq             11b
19219+                vmov            d18, d19
19220+                vmov            d20, d21
19221+                vmov            d22, d23
19222+                sub             r6,  #12
19223+                vmov            d0,  d1
19224+                vmov            d2,  d3
19225+                vmov            d4,  d5
19226+1:
19227+                cmp             r6,  #-18
19228+                @ Rezip here as it makes the remaining tail handling easier
19229+                vzip.16         d0,  d18
19230+                vzip.16         d2,  d20
19231+                vzip.16         d4,  d22
19232+                blt             1f
19233+                vst3.16         {d0[1],  d2[1],  d4[1]},  [r0]!
19234+                vst3.16         {d0[0],  d2[0],  d4[0]},  [r2]!
19235+                vst3.16         {d0[3],  d2[3],  d4[3]},  [r0]!
19236+                vst3.16         {d0[2],  d2[2],  d4[2]},  [r2]!
19237+                beq             11b
19238+                vmov            d0,  d18
19239+                vmov            d2,  d20
19240+                sub             r6,  #6
19241+                vmov            d4,  d22
19242+1:
19243+                cmp             r6,  #-21
19244+                blt             1f
19245+                vst3.16         {d0[1], d2[1], d4[1]}, [r0]!
19246+                vst3.16         {d0[0], d2[0], d4[0]}, [r2]!
19247+                beq             11b
19248+                vmov            s4,  s5
19249+                sub             r6,  #3
19250+                vmov            s0,  s1
19251+1:
19252+                cmp             r6,  #-22
19253+                blt             1f
19254+                vst2.16         {d0[1], d2[1]}, [r0]!
19255+                vst2.16         {d0[0], d2[0]}, [r2]!
19256+                b               11b
19257+1:
19258+                vst1.16         {d0[1]}, [r0]!
19259+                vst1.16         {d0[0]}, [r2]!
19260+                b               11b
19261+
19262+endfunc
19263+
19264+@ void ff_rpi_sand30_lines_to_planar_p010(
19265+@   uint8_t * dest,             // [r0]
19266+@   unsigned int dst_stride,    // [r1]
19267+@   const uint8_t * src,        // [r2]
19268+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
19269+@   unsigned int src_stride2,   // [sp, #0]  -> r3
19270+@   unsigned int _x,            // [sp, #4]  Ignored - 0
19271+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
19272+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
19273+@   unsigned int h);            // [sp, #16] -> r7
19274+@
19275+@ Assumes that we are starting on a stripe boundary and that overreading
19276+@ within the stripe is OK. However it does respect the dest size for writing
19277+
19278+function ff_rpi_sand30_lines_to_planar_p010, export=1
19279+                push            {r4-r8, lr}     @ +24
19280+                ldr             r3,  [sp, #24]
19281+                ldr             r6,  [sp, #36]
19282+                ldr             r7,  [sp, #32]  @ y
19283+                mov             r12, #48
19284+                vmov.u16        q15, #0xffc0
19285+                sub             r3,  #1
19286+                lsl             r3,  #7
19287+                sub             r1,  r1,  r6,  lsl #1
19288+                add             r8,  r2,  r7,  lsl #7
19289+                ldr             r7,  [sp, #40]
19290+
19291+10:
19292+                mov             r2,  r8
19293+                add             r4,  r0,  #24
19294+                mov             r5,  r6
19295+                mov             lr,  #0
19296+1:
19297+                vldm            r2!, {q10-q13}
19298+                add             lr,  #64
19299+
19300+                vshl.u32        q14, q10, #6
19301+                ands            lr,  #127
19302+                vshrn.u32       d4,  q10, #14
19303+                vshrn.u32       d2,  q10, #4
19304+                vmovn.u32       d0,  q14
19305+
19306+                vshl.u32        q14, q11, #6
19307+                it              eq
19308+                addeq           r2,  r3
19309+                vshrn.u32       d5,  q11, #14
19310+                vshrn.u32       d3,  q11, #4
19311+                vmovn.u32       d1,  q14
19312+
19313+                subs            r5,  #48
19314+                vand            q2,  q15
19315+                vand            q1,  q15
19316+                vand            q0,  q15
19317+
19318+                vshl.u32        q14, q12, #6
19319+                vshrn.u32       d20, q12, #14
19320+                vshrn.u32       d18, q12, #4
19321+                vmovn.u32       d16, q14
19322+
19323+                vshl.u32        q14, q13, #6
19324+                vshrn.u32       d21, q13, #14
19325+                vshrn.u32       d19, q13, #4
19326+                vmovn.u32       d17, q14
19327+
19328+                vand            q10, q15
19329+                vand            q9,  q15
19330+                vand            q8,  q15
19331+                blt             2f
19332+
19333+                vst3.16         {d0,  d2,  d4},  [r0], r12
19334+                vst3.16         {d1,  d3,  d5},  [r4], r12
19335+                vst3.16         {d16, d18, d20}, [r0], r12
19336+                vst3.16         {d17, d19, d21}, [r4], r12
19337+
19338+                bne             1b
19339+
19340+11:
19341+                subs            r7,  #1
19342+                add             r0,  r1
19343+                add             r8,  #128
19344+                bne             10b
19345+
19346+                pop             {r4-r8, pc}
19347+
19348+@ Partial final write
19349+2:
19350+                cmp             r5,  #24-48
19351+                blt             1f
19352+                vst3.16         {d0,  d2,  d4},  [r0], r12
19353+                vst3.16         {d1,  d3,  d5},  [r4]
19354+                beq             11b
19355+                vmov            q0,  q8
19356+                sub             r5,  #24
19357+                vmov            q1,  q9
19358+                vmov            q2,  q10
19359+1:
19360+                cmp             r5,  #12-48
19361+                blt             1f
19362+                vst3.16         {d0,  d2,  d4},  [r0]!
19363+                beq             11b
19364+                vmov            d0, d1
19365+                sub             r5, #12
19366+                vmov            d2, d3
19367+                vmov            d4, d5
19368+1:
19369+                cmp             r5,  #6-48
19370+                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
19371+                blt             1f
19372+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
19373+                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
19374+                add             r0,  #12
19375+                beq             11b
19376+                vmov            s0,  s1
19377+                sub             r5,  #6
19378+                vmov            s4,  s5
19379+                vmov            s8,  s9
19380+1:
19381+                cmp             r5, #3-48
19382+                blt             1f
19383+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
19384+                beq             11b
19385+                sub             r5, #3
19386+                vshr.u32        d0, #16
19387+                vshr.u32        d2, #16
19388+1:
19389+                cmp             r5, #2-48
19390+                blt             1f
19391+                vst2.16         {d0[0], d2[0]}, [r0]!
19392+                b               11b
19393+1:
19394+                vst1.16         {d0[0]}, [r0]!
19395+                b               11b
19396+
19397+endfunc
19398+
19399+
19400+@ void ff_rpi_sand30_lines_to_planar_y8(
19401+@   uint8_t * dest,             // [r0]
19402+@   unsigned int dst_stride,    // [r1]
19403+@   const uint8_t * src,        // [r2]
19404+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
19405+@   unsigned int src_stride2,   // [sp, #0]  -> r3
19406+@   unsigned int _x,            // [sp, #4]  Ignored - 0
19407+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
19408+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
19409+@   unsigned int h);            // [sp, #16] -> r7
19410+@
19411+@ Assumes that we are starting on a stripe boundary and that overreading
19412+@ within the stripe is OK. However it does respect the dest size for wri
19413+
19414+function ff_rpi_sand30_lines_to_planar_y8, export=1
19415+                push            {r4-r8, lr}     @ +24
19416+                ldr             r3,  [sp, #24]
19417+                ldr             r6,  [sp, #36]
19418+                ldr             r7,  [sp, #32]  @ y
19419+                mov             r12, #48
19420+                lsl             r3,  #7
19421+                sub             r1,  r1,  r6
19422+                add             r8,  r2,  r7,  lsl #7
19423+                ldr             r7,  [sp, #40]
19424+
19425+10:
19426+                mov             r2,  r8
19427+                add             r4,  r0,  #24
19428+                mov             r5,  r6
19429+1:
19430+                vldm            r2,  {q8-q15}
19431+
19432+                subs            r5,  #96
19433+
19434+                vmovn.u32       d0,  q8
19435+                vshrn.u32       d2,  q8,  #12
19436+                vshrn.u32       d4,  q8,  #16    @ Cannot vshrn.u32 #20!
19437+
19438+                add             r2,  r3
19439+
19440+                vmovn.u32       d1,  q9
19441+                vshrn.u32       d3,  q9,  #12
19442+                vshrn.u32       d5,  q9,  #16
19443+
19444+                pld             [r2, #0]
19445+
19446+                vshrn.u16       d0,  q0,  #2
19447+                vmovn.u16       d1,  q1
19448+                vshrn.u16       d2,  q2,  #6
19449+
19450+                vmovn.u32       d16, q10
19451+                vshrn.u32       d18, q10, #12
19452+                vshrn.u32       d20, q10, #16
19453+
19454+                vmovn.u32       d17, q11
19455+                vshrn.u32       d19, q11, #12
19456+                vshrn.u32       d21, q11, #16
19457+
19458+                pld             [r2, #64]
19459+
19460+                vshrn.u16       d4,  q8,  #2
19461+                vmovn.u16       d5,  q9
19462+                vshrn.u16       d6,  q10, #6
19463+
19464+                vmovn.u32       d16, q12
19465+                vshrn.u32       d18, q12, #12
19466+                vshrn.u32       d20, q12, #16
19467+
19468+                vmovn.u32       d17, q13
19469+                vshrn.u32       d19, q13, #12
19470+                vshrn.u32       d21, q13, #16
19471+
19472+                vshrn.u16       d16, q8,  #2
19473+                vmovn.u16       d17, q9
19474+                vshrn.u16       d18, q10, #6
19475+
19476+                vmovn.u32       d20, q14
19477+                vshrn.u32       d22, q14, #12
19478+                vshrn.u32       d24, q14, #16
19479+
19480+                vmovn.u32       d21, q15
19481+                vshrn.u32       d23, q15, #12
19482+                vshrn.u32       d25, q15, #16
19483+
19484+                vshrn.u16       d20, q10, #2
19485+                vmovn.u16       d21, q11
19486+                vshrn.u16       d22, q12, #6
19487+
19488+                blt             2f
19489+
19490+                vst3.8          {d0,  d1,  d2},  [r0], r12
19491+                vst3.8          {d4,  d5,  d6},  [r4], r12
19492+                vst3.8          {d16, d17, d18}, [r0], r12
19493+                vst3.8          {d20, d21, d22}, [r4], r12
19494+
19495+                bne             1b
19496+
19497+11:
19498+                subs            r7,  #1
19499+                add             r0,  r1
19500+                add             r8,  #128
19501+                bne             10b
19502+
19503+                pop             {r4-r8, pc}
19504+
19505+@ Partial final write
19506+2:
19507+                cmp             r5,  #48-96
19508+                blt             1f
19509+                vst3.8          {d0,  d1,  d2},  [r0], r12
19510+                vst3.8          {d4,  d5,  d6},  [r4], r12
19511+                beq             11b
19512+                vmov            q0,  q8
19513+                vmov            q2,  q10
19514+                sub             r5,  #48
19515+                vmov            d2,  d18
19516+                vmov            d6,  d22
19517+1:
19518+                cmp             r5,  #24-96
19519+                blt             1f
19520+                vst3.8          {d0,  d1,  d2},  [r0]!
19521+                beq             11b
19522+                vmov            q0,  q2
19523+                sub             r5,  #24
19524+                vmov            d2,  d6
19525+1:
19526+                cmp             r5,  #12-96
19527+                blt             1f
19528+                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
19529+                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
19530+                vst3.8          {d0[2], d1[2], d2[2]}, [r0]!
19531+                vst3.8          {d0[3], d1[3], d2[3]}, [r0]!
19532+                beq             11b
19533+                vmov            s0,  s1
19534+                sub             r5,  #12
19535+                vmov            s2,  s3
19536+                vmov            s4,  s5
19537+1:
19538+                cmp             r5,  #6-96
19539+                blt             1f
19540+                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
19541+                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
19542+                add             r0,  #12
19543+                beq             11b
19544+                vshr.u32        d0,  #16
19545+                sub             r5,  #6
19546+                vshr.u32        d1,  #16
19547+                vshr.u32        d2,  #16
19548+1:
19549+                cmp             r5, #3-96
19550+                blt             1f
19551+                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
19552+                beq             11b
19553+                sub             r5, #3
19554+                vshr.u32        d0, #8
19555+                vshr.u32        d1, #8
19556+1:
19557+                cmp             r5, #2-96
19558+                blt             1f
19559+                vst2.8          {d0[0], d1[0]}, [r0]!
19560+                b               11b
19561+1:
19562+                vst1.8          {d0[0]}, [r0]!
19563+                b               11b
19564+
19565+endfunc
19566+
19567+
19568--- /dev/null
19569+++ b/libavutil/arm/rpi_sand_neon.h
19570@@ -0,0 +1,110 @@
19571+/*
19572+Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
19573+All rights reserved.
19574+
19575+Redistribution and use in source and binary forms, with or without
19576+modification, are permitted provided that the following conditions are met:
19577+    * Redistributions of source code must retain the above copyright
19578+      notice, this list of conditions and the following disclaimer.
19579+    * Redistributions in binary form must reproduce the above copyright
19580+      notice, this list of conditions and the following disclaimer in the
19581+      documentation and/or other materials provided with the distribution.
19582+    * Neither the name of the copyright holder nor the
19583+      names of its contributors may be used to endorse or promote products
19584+      derived from this software without specific prior written permission.
19585+
19586+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19587+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19588+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19589+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
19590+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19591+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19592+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19593+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
19594+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
19595+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19596+
19597+Authors: John Cox
19598+*/
19599+
19600+#ifndef AVUTIL_ARM_SAND_NEON_H
19601+#define AVUTIL_ARM_SAND_NEON_H
19602+
19603+void ff_rpi_sand128b_stripe_to_8_10(
19604+  uint8_t * dest,             // [r0]
19605+  const uint8_t * src1,       // [r1]
19606+  const uint8_t * src2,       // [r2]
19607+  unsigned int lines);        // [r3]
19608+
19609+void ff_rpi_sand8_lines_to_planar_y8(
19610+  uint8_t * dest,             // [r0]
19611+  unsigned int dst_stride,    // [r1]
19612+  const uint8_t * src,        // [r2]
19613+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
19614+  unsigned int src_stride2,   // [sp, #0]  -> r3
19615+  unsigned int _x,            // [sp, #4]  Ignored - 0
19616+  unsigned int y,             // [sp, #8]  (r7 in prefix)
19617+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
19618+  unsigned int h);            // [sp, #16] -> r7
19619+
19620+void ff_rpi_sand8_lines_to_planar_c8(
19621+  uint8_t * dst_u,            // [r0]
19622+  unsigned int dst_stride_u,  // [r1]
19623+  uint8_t * dst_v,            // [r2]
19624+  unsigned int dst_stride_v,  // [r3]
19625+  const uint8_t * src,        // [sp, #0]  -> r4, r5
19626+  unsigned int stride1,       // [sp, #4]  128
19627+  unsigned int stride2,       // [sp, #8]  -> r8
19628+  unsigned int _x,            // [sp, #12] 0
19629+  unsigned int y,             // [sp, #16] (r7 in prefix)
19630+  unsigned int _w,            // [sp, #20] -> r12, r6
19631+  unsigned int h);            // [sp, #24] -> r7
19632+
19633+void ff_rpi_sand30_lines_to_planar_y16(
19634+  uint8_t * dest,             // [r0]
19635+  unsigned int dst_stride,    // [r1]
19636+  const uint8_t * src,        // [r2]
19637+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
19638+  unsigned int src_stride2,   // [sp, #0]  -> r3
19639+  unsigned int _x,            // [sp, #4]  Ignored - 0
19640+  unsigned int y,             // [sp, #8]  (r7 in prefix)
19641+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
19642+  unsigned int h);            // [sp, #16] -> r7
19643+
19644+void ff_rpi_sand30_lines_to_planar_c16(
19645+  uint8_t * dst_u,            // [r0]
19646+  unsigned int dst_stride_u,  // [r1]
19647+  uint8_t * dst_v,            // [r2]
19648+  unsigned int dst_stride_v,  // [r3]
19649+  const uint8_t * src,        // [sp, #0]  -> r4, r5
19650+  unsigned int stride1,       // [sp, #4]  128
19651+  unsigned int stride2,       // [sp, #8]  -> r8
19652+  unsigned int _x,            // [sp, #12] 0
19653+  unsigned int y,             // [sp, #16] (r7 in prefix)
19654+  unsigned int _w,            // [sp, #20] -> r6, r9
19655+  unsigned int h);            // [sp, #24] -> r7
19656+
19657+void ff_rpi_sand30_lines_to_planar_p010(
19658+  uint8_t * dest,             // [r0]
19659+  unsigned int dst_stride,    // [r1]
19660+  const uint8_t * src,        // [r2]
19661+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
19662+  unsigned int src_stride2,   // [sp, #0]  -> r3
19663+  unsigned int _x,            // [sp, #4]  Ignored - 0
19664+  unsigned int y,             // [sp, #8]  (r7 in prefix)
19665+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
19666+  unsigned int h);            // [sp, #16] -> r7
19667+
19668+void ff_rpi_sand30_lines_to_planar_y8(
19669+  uint8_t * dest,             // [r0]
19670+  unsigned int dst_stride,    // [r1]
19671+  const uint8_t * src,        // [r2]
19672+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
19673+  unsigned int src_stride2,   // [sp, #0]  -> r3
19674+  unsigned int _x,            // [sp, #4]  Ignored - 0
19675+  unsigned int y,             // [sp, #8]  (r7 in prefix)
19676+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
19677+  unsigned int h);            // [sp, #16] -> r7
19678+
19679+#endif // AVUTIL_ARM_SAND_NEON_H
19680+
19681--- a/libavutil/frame.c
19682+++ b/libavutil/frame.c
19683@@ -16,6 +16,8 @@
19684  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19685  */
19686
19687+#include "config.h"
19688+
19689 #include "channel_layout.h"
19690 #include "avassert.h"
19691 #include "buffer.h"
19692@@ -27,6 +29,9 @@
19693 #include "mem.h"
19694 #include "samplefmt.h"
19695 #include "hwcontext.h"
19696+#if CONFIG_SAND
19697+#include "rpi_sand_fns.h"
19698+#endif
19699
19700 #if FF_API_OLD_CHANNEL_LAYOUT
19701 #define CHECK_CHANNELS_CONSISTENCY(frame) \
19702@@ -875,6 +880,12 @@ int av_frame_apply_cropping(AVFrame *fra
19703         (frame->crop_top + frame->crop_bottom) >= frame->height)
19704         return AVERROR(ERANGE);
19705
19706+#if CONFIG_SAND
19707+    // Sand cannot be cropped - do not try
19708+    if (av_rpi_is_sand_format(frame->format))
19709+        return 0;
19710+#endif
19711+
19712     desc = av_pix_fmt_desc_get(frame->format);
19713     if (!desc)
19714         return AVERROR_BUG;
19715--- a/libavutil/frame.h
19716+++ b/libavutil/frame.h
19717@@ -940,6 +940,16 @@ int av_frame_apply_cropping(AVFrame *fra
19718  */
19719 const char *av_frame_side_data_name(enum AVFrameSideDataType type);
19720
19721+
19722+static inline int av_frame_cropped_width(const AVFrame * const frame)
19723+{
19724+    return frame->width - (frame->crop_left + frame->crop_right);
19725+}
19726+static inline int av_frame_cropped_height(const AVFrame * const frame)
19727+{
19728+    return frame->height - (frame->crop_top + frame->crop_bottom);
19729+}
19730+
19731 /**
19732  * @}
19733  */
19734--- a/libavutil/hwcontext_drm.c
19735+++ b/libavutil/hwcontext_drm.c
19736@@ -21,6 +21,7 @@
19737 #include <fcntl.h>
19738 #include <sys/mman.h>
19739 #include <unistd.h>
19740+#include <sys/ioctl.h>
19741
19742 /* This was introduced in version 4.6. And may not exist all without an
19743  * optional package. So to prevent a hard dependency on needing the Linux
19744@@ -31,6 +32,7 @@
19745 #endif
19746
19747 #include <drm.h>
19748+#include <libdrm/drm_fourcc.h>
19749 #include <xf86drm.h>
19750
19751 #include "avassert.h"
19752@@ -38,7 +40,9 @@
19753 #include "hwcontext_drm.h"
19754 #include "hwcontext_internal.h"
19755 #include "imgutils.h"
19756-
19757+#if CONFIG_SAND
19758+#include "libavutil/rpi_sand_fns.h"
19759+#endif
19760
19761 static void drm_device_free(AVHWDeviceContext *hwdev)
19762 {
19763@@ -53,6 +57,11 @@ static int drm_device_create(AVHWDeviceC
19764     AVDRMDeviceContext *hwctx = hwdev->hwctx;
19765     drmVersionPtr version;
19766
19767+    if (device == NULL) {
19768+        hwctx->fd = -1;
19769+        return 0;
19770+    }
19771+
19772     hwctx->fd = open(device, O_RDWR);
19773     if (hwctx->fd < 0)
19774         return AVERROR(errno);
19775@@ -139,6 +148,8 @@ static int drm_map_frame(AVHWFramesConte
19776     if (flags & AV_HWFRAME_MAP_WRITE)
19777         mmap_prot |= PROT_WRITE;
19778
19779+    if (dst->format == AV_PIX_FMT_NONE)
19780+        dst->format = hwfc->sw_format;
19781 #if HAVE_LINUX_DMA_BUF_H
19782     if (flags & AV_HWFRAME_MAP_READ)
19783         map->sync_flags |= DMA_BUF_SYNC_READ;
19784@@ -185,6 +196,23 @@ static int drm_map_frame(AVHWFramesConte
19785
19786     dst->width  = src->width;
19787     dst->height = src->height;
19788+    dst->crop_top    = src->crop_top;
19789+    dst->crop_bottom = src->crop_bottom;
19790+    dst->crop_left   = src->crop_left;
19791+    dst->crop_right  = src->crop_right;
19792+
19793+#if CONFIG_SAND
19794+    // Rework for sand frames
19795+    if (av_rpi_is_sand_frame(dst)) {
19796+        // As it stands the sand formats hold stride2 in linesize[3]
19797+        // linesize[0] & [1] contain stride1 which is always 128 for everything we do
19798+        // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1]
19799+        dst->linesize[3] = fourcc_mod_broadcom_param(desc->objects[0].format_modifier);
19800+        dst->linesize[0] = 128;
19801+        dst->linesize[1] = 128;
19802+        // *** Are we sure src->height is actually what we want ???
19803+    }
19804+#endif
19805
19806     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
19807                                 &drm_unmap_frame, map);
19808@@ -206,16 +234,29 @@ static int drm_transfer_get_formats(AVHW
19809                                     enum AVHWFrameTransferDirection dir,
19810                                     enum AVPixelFormat **formats)
19811 {
19812-    enum AVPixelFormat *pix_fmts;
19813+    enum AVPixelFormat *p;
19814
19815-    pix_fmts = av_malloc_array(2, sizeof(*pix_fmts));
19816-    if (!pix_fmts)
19817+    p = *formats = av_malloc_array(3, sizeof(*p));
19818+    if (!p)
19819         return AVERROR(ENOMEM);
19820
19821-    pix_fmts[0] = ctx->sw_format;
19822-    pix_fmts[1] = AV_PIX_FMT_NONE;
19823+    // **** Offer native sand too ????
19824+    *p++ =
19825+#if CONFIG_SAND
19826+        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ?
19827+            AV_PIX_FMT_YUV420P :
19828+        ctx->sw_format == AV_PIX_FMT_RPI4_10 ?
19829+            AV_PIX_FMT_YUV420P10LE :
19830+#endif
19831+            ctx->sw_format;
19832+
19833+#if CONFIG_SAND
19834+    if (ctx->sw_format == AV_PIX_FMT_RPI4_10 ||
19835+        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128)
19836+        *p++ = AV_PIX_FMT_NV12;
19837+#endif
19838
19839-    *formats = pix_fmts;
19840+    *p = AV_PIX_FMT_NONE;
19841     return 0;
19842 }
19843
19844@@ -231,18 +272,62 @@ static int drm_transfer_data_from(AVHWFr
19845     map = av_frame_alloc();
19846     if (!map)
19847         return AVERROR(ENOMEM);
19848-    map->format = dst->format;
19849
19850+    // Map to default
19851+    map->format = AV_PIX_FMT_NONE;
19852     err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ);
19853     if (err)
19854         goto fail;
19855
19856-    map->width  = dst->width;
19857-    map->height = dst->height;
19858+#if 0
19859+    av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__,
19860+           hwfc->sw_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE,
19861+           map->width, map->height,
19862+           map->linesize[0],
19863+           map->linesize[1],
19864+           map->linesize[2],
19865+           map->linesize[3],
19866+           dst->width, dst->height,
19867+           dst->linesize[0],
19868+           dst->linesize[1],
19869+           dst->linesize[2]);
19870+#endif
19871+#if CONFIG_SAND
19872+    if (av_rpi_is_sand_frame(map)) {
19873+        // Preserve crop - later ffmpeg code assumes that we have in that it
19874+        // overwrites any crop that we create with the old values
19875+        const unsigned int w = FFMIN(dst->width, map->width);
19876+        const unsigned int h = FFMIN(dst->height, map->height);
19877+
19878+        map->crop_top = 0;
19879+        map->crop_bottom = 0;
19880+        map->crop_left = 0;
19881+        map->crop_right = 0;
19882+
19883+        if (av_rpi_sand_to_planar_frame(dst, map) != 0)
19884+        {
19885+            av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__);
19886+            err = AVERROR(EINVAL);
19887+            goto fail;
19888+        }
19889+
19890+        dst->width = w;
19891+        dst->height = h;
19892+    }
19893+    else
19894+#endif
19895+    {
19896+        // Kludge mapped h/w s.t. frame_copy works
19897+        map->width  = dst->width;
19898+        map->height = dst->height;
19899+        err = av_frame_copy(dst, map);
19900+    }
19901
19902-    err = av_frame_copy(dst, map);
19903     if (err)
19904+    {
19905+        av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__);
19906         goto fail;
19907+    }
19908
19909     err = 0;
19910 fail:
19911@@ -257,7 +342,10 @@ static int drm_transfer_data_to(AVHWFram
19912     int err;
19913
19914     if (src->width > hwfc->width || src->height > hwfc->height)
19915+    {
19916+        av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height);
19917         return AVERROR(EINVAL);
19918+    }
19919
19920     map = av_frame_alloc();
19921     if (!map)
19922--- a/libavutil/hwcontext_vulkan.c
19923+++ b/libavutil/hwcontext_vulkan.c
19924@@ -57,6 +57,14 @@
19925 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
19926 #endif
19927
19928+// Sometimes missing definitions
19929+#ifndef VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME
19930+#define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264"
19931+#endif
19932+#ifndef VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME
19933+#define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265"
19934+#endif
19935+
19936 typedef struct VulkanQueueCtx {
19937     VkFence fence;
19938     VkQueue queue;
19939--- a/libavutil/pixdesc.c
19940+++ b/libavutil/pixdesc.c
19941@@ -2491,6 +2491,50 @@ static const AVPixFmtDescriptor av_pix_f
19942         },
19943         .flags = AV_PIX_FMT_FLAG_PLANAR,
19944     },
19945+    [AV_PIX_FMT_SAND128] = {
19946+        .name = "sand128",
19947+        .nb_components = 3,
19948+        .log2_chroma_w = 1,
19949+        .log2_chroma_h = 1,
19950+        .comp = {
19951+            { 0, 1, 0, 0, 8 },        /* Y */
19952+            { 1, 2, 0, 0, 8 },        /* U */
19953+            { 1, 2, 1, 0, 8 },        /* V */
19954+        },
19955+        .flags = 0,
19956+    },
19957+    [AV_PIX_FMT_SAND64_10] = {
19958+        .name = "sand64_10",
19959+        .nb_components = 3,
19960+        .log2_chroma_w = 1,
19961+        .log2_chroma_h = 1,
19962+        .comp = {
19963+            { 0, 2, 0, 0, 10 },        /* Y */
19964+            { 1, 4, 0, 0, 10 },        /* U */
19965+            { 1, 4, 2, 0, 10 },        /* V */
19966+        },
19967+        .flags = 0,
19968+    },
19969+    [AV_PIX_FMT_SAND64_16] = {
19970+        .name = "sand64_16",
19971+        .nb_components = 3,
19972+        .log2_chroma_w = 1,
19973+        .log2_chroma_h = 1,
19974+        .comp = {
19975+            { 0, 2, 0, 0, 16 },        /* Y */
19976+            { 1, 4, 0, 0, 16 },        /* U */
19977+            { 1, 4, 2, 0, 16 },        /* V */
19978+        },
19979+        .flags = 0,
19980+    },
19981+    [AV_PIX_FMT_RPI4_8] = {
19982+        .name = "rpi4_8",
19983+        .flags = AV_PIX_FMT_FLAG_HWACCEL,
19984+    },
19985+    [AV_PIX_FMT_RPI4_10] = {
19986+        .name = "rpi4_10",
19987+        .flags = AV_PIX_FMT_FLAG_HWACCEL,
19988+    },
19989 };
19990
19991 static const char * const color_range_names[] = {
19992--- a/libavutil/pixfmt.h
19993+++ b/libavutil/pixfmt.h
19994@@ -349,6 +349,14 @@ enum AVPixelFormat {
19995
19996     AV_PIX_FMT_Y210BE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian
19997     AV_PIX_FMT_Y210LE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian
19998+// RPI - not on ifdef so can be got at by calling progs
19999+// #define so code that uses this can know it is there
20000+#define AVUTIL_HAVE_PIX_FMT_SAND 1
20001+    AV_PIX_FMT_SAND128,    ///< 4:2:0  8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding
20002+    AV_PIX_FMT_SAND64_10,  ///< 4:2:0 10-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
20003+    AV_PIX_FMT_SAND64_16,  ///< 4:2:0 16-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
20004+    AV_PIX_FMT_RPI4_8,
20005+    AV_PIX_FMT_RPI4_10,
20006
20007     AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined
20008     AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined
20009--- /dev/null
20010+++ b/libavutil/rpi_sand_fn_pw.h
20011@@ -0,0 +1,227 @@
20012+/*
20013+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
20014+All rights reserved.
20015+
20016+Redistribution and use in source and binary forms, with or without
20017+modification, are permitted provided that the following conditions are met:
20018+    * Redistributions of source code must retain the above copyright
20019+      notice, this list of conditions and the following disclaimer.
20020+    * Redistributions in binary form must reproduce the above copyright
20021+      notice, this list of conditions and the following disclaimer in the
20022+      documentation and/or other materials provided with the distribution.
20023+    * Neither the name of the copyright holder nor the
20024+      names of its contributors may be used to endorse or promote products
20025+      derived from this software without specific prior written permission.
20026+
20027+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20028+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20029+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20030+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20031+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20032+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20033+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20034+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20035+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
20036+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20037+
20038+Authors: John Cox
20039+*/
20040+
20041+// * Included twice from rpi_sand_fn with different PW
20042+
20043+#define STRCAT(x,y) x##y
20044+
20045+#if PW == 1
20046+#define pixel uint8_t
20047+#define FUNC(f) STRCAT(f, 8)
20048+#elif PW == 2
20049+#define pixel uint16_t
20050+#define FUNC(f) STRCAT(f, 16)
20051+#else
20052+#error Unexpected PW
20053+#endif
20054+
20055+// Fetches a single patch - offscreen fixup not done here
20056+// w <= stride1
20057+// unclipped
20058+void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride,
20059+                             const uint8_t * src,
20060+                             unsigned int stride1, unsigned int stride2,
20061+                             unsigned int _x, unsigned int y,
20062+                             unsigned int _w, unsigned int h)
20063+{
20064+    const unsigned int x = _x;
20065+    const unsigned int w = _w;
20066+    const unsigned int mask = stride1 - 1;
20067+
20068+#if PW == 1 && HAVE_SAND_ASM
20069+    if (_x == 0) {
20070+        ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride,
20071+                                     src, stride1, stride2, _x, y, _w, h);
20072+        return;
20073+    }
20074+#endif
20075+
20076+    if ((x & ~mask) == ((x + w) & ~mask)) {
20077+        // All in one sand stripe
20078+        const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
20079+        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) {
20080+            memcpy(dst, p, w);
20081+        }
20082+    }
20083+    else
20084+    {
20085+        // Two+ stripe
20086+        const unsigned int sstride = stride1 * stride2;
20087+        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
20088+        const uint8_t * p2 = p1 + sstride - (x & mask);
20089+        const unsigned int w1 = stride1 - (x & mask);
20090+        const unsigned int w3 = (x + w) & mask;
20091+        const unsigned int w2 = w - (w1 + w3);
20092+
20093+        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) {
20094+            unsigned int j;
20095+            const uint8_t * p = p2;
20096+            uint8_t * d = dst;
20097+            memcpy(d, p1, w1);
20098+            d += w1;
20099+            for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) {
20100+                memcpy(d, p, stride1);
20101+            }
20102+            memcpy(d, p, w3);
20103+        }
20104+    }
20105+}
20106+
20107+// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V)
20108+
20109+void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u,
20110+                             uint8_t * dst_v, const unsigned int dst_stride_v,
20111+                             const uint8_t * src,
20112+                             unsigned int stride1, unsigned int stride2,
20113+                             unsigned int _x, unsigned int y,
20114+                             unsigned int _w, unsigned int h)
20115+{
20116+    const unsigned int x = _x * 2;
20117+    const unsigned int w = _w * 2;
20118+    const unsigned int mask = stride1 - 1;
20119+
20120+#if PW == 1 && HAVE_SAND_ASM
20121+    if (_x == 0) {
20122+        ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v,
20123+                                     src, stride1, stride2, _x, y, _w, h);
20124+        return;
20125+    }
20126+#endif
20127+
20128+    if ((x & ~mask) == ((x + w) & ~mask)) {
20129+        // All in one sand stripe
20130+        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
20131+        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) {
20132+            pixel * du = (pixel *)dst_u;
20133+            pixel * dv = (pixel *)dst_v;
20134+            const pixel * p = (const pixel *)p1;
20135+            for (unsigned int k = 0; k < w; k += 2 * PW) {
20136+                *du++ = *p++;
20137+                *dv++ = *p++;
20138+            }
20139+        }
20140+    }
20141+    else
20142+    {
20143+        // Two+ stripe
20144+        const unsigned int sstride = stride1 * stride2;
20145+        const unsigned int sstride_p = (sstride - stride1) / PW;
20146+
20147+        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
20148+        const uint8_t * p2 = p1 + sstride - (x & mask);
20149+        const unsigned int w1 = stride1 - (x & mask);
20150+        const unsigned int w3 = (x + w) & mask;
20151+        const unsigned int w2 = w - (w1 + w3);
20152+
20153+        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) {
20154+            unsigned int j;
20155+            const pixel * p = (const pixel *)p1;
20156+            pixel * du = (pixel *)dst_u;
20157+            pixel * dv = (pixel *)dst_v;
20158+            for (unsigned int k = 0; k < w1; k += 2 * PW) {
20159+                *du++ = *p++;
20160+                *dv++ = *p++;
20161+            }
20162+            for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) {
20163+                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
20164+                    *du++ = *p++;
20165+                    *dv++ = *p++;
20166+                }
20167+            }
20168+            for (unsigned int k = 0; k < w3; k += 2 * PW) {
20169+                *du++ = *p++;
20170+                *dv++ = *p++;
20171+            }
20172+        }
20173+    }
20174+}
20175+
20176+void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c,
20177+                             unsigned int stride1, unsigned int stride2,
20178+                             const uint8_t * src_u, const unsigned int src_stride_u,
20179+                             const uint8_t * src_v, const unsigned int src_stride_v,
20180+                             unsigned int _x, unsigned int y,
20181+                             unsigned int _w, unsigned int h)
20182+{
20183+    const unsigned int x = _x * 2;
20184+    const unsigned int w = _w * 2;
20185+    const unsigned int mask = stride1 - 1;
20186+    if ((x & ~mask) == ((x + w) & ~mask)) {
20187+        // All in one sand stripe
20188+        uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
20189+        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) {
20190+            const pixel * su = (const pixel *)src_u;
20191+            const pixel * sv = (const pixel *)src_v;
20192+            pixel * p = (pixel *)p1;
20193+            for (unsigned int k = 0; k < w; k += 2 * PW) {
20194+                *p++ = *su++;
20195+                *p++ = *sv++;
20196+            }
20197+        }
20198+    }
20199+    else
20200+    {
20201+        // Two+ stripe
20202+        const unsigned int sstride = stride1 * stride2;
20203+        const unsigned int sstride_p = (sstride - stride1) / PW;
20204+
20205+        const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
20206+        const uint8_t * p2 = p1 + sstride - (x & mask);
20207+        const unsigned int w1 = stride1 - (x & mask);
20208+        const unsigned int w3 = (x + w) & mask;
20209+        const unsigned int w2 = w - (w1 + w3);
20210+
20211+        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) {
20212+            unsigned int j;
20213+            const pixel * su = (const pixel *)src_u;
20214+            const pixel * sv = (const pixel *)src_v;
20215+            pixel * p = (pixel *)p1;
20216+            for (unsigned int k = 0; k < w1; k += 2 * PW) {
20217+                *p++ = *su++;
20218+                *p++ = *sv++;
20219+            }
20220+            for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) {
20221+                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
20222+                    *p++ = *su++;
20223+                    *p++ = *sv++;
20224+                }
20225+            }
20226+            for (unsigned int k = 0; k < w3; k += 2 * PW) {
20227+                *p++ = *su++;
20228+                *p++ = *sv++;
20229+            }
20230+        }
20231+    }
20232+}
20233+
20234+
20235+#undef pixel
20236+#undef STRCAT
20237+#undef FUNC
20238+
20239--- /dev/null
20240+++ b/libavutil/rpi_sand_fns.c
20241@@ -0,0 +1,447 @@
20242+/*
20243+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
20244+All rights reserved.
20245+
20246+Redistribution and use in source and binary forms, with or without
20247+modification, are permitted provided that the following conditions are met:
20248+    * Redistributions of source code must retain the above copyright
20249+      notice, this list of conditions and the following disclaimer.
20250+    * Redistributions in binary form must reproduce the above copyright
20251+      notice, this list of conditions and the following disclaimer in the
20252+      documentation and/or other materials provided with the distribution.
20253+    * Neither the name of the copyright holder nor the
20254+      names of its contributors may be used to endorse or promote products
20255+      derived from this software without specific prior written permission.
20256+
20257+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20258+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20259+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20260+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20261+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20262+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20263+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20264+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20265+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
20266+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20267+
20268+Authors: John Cox
20269+*/
20270+
20271+#include "config.h"
20272+#include <stdint.h>
20273+#include <string.h>
20274+#include "rpi_sand_fns.h"
20275+#include "avassert.h"
20276+#include "frame.h"
20277+
20278+#if ARCH_ARM && HAVE_NEON
20279+#include "libavutil/arm/cpu.h"
20280+#include "libavutil/arm/rpi_sand_neon.h"
20281+#define HAVE_SAND_ASM 1
20282+#elif ARCH_AARCH64 && HAVE_NEON
20283+#include "libavutil/aarch64/cpu.h"
20284+#include "libavutil/aarch64/rpi_sand_neon.h"
20285+#define HAVE_SAND_ASM 1
20286+#else
20287+#define HAVE_SAND_ASM 0
20288+#endif
20289+
20290+#define PW 1
20291+#include "rpi_sand_fn_pw.h"
20292+#undef PW
20293+
20294+#define PW 2
20295+#include "rpi_sand_fn_pw.h"
20296+#undef PW
20297+
20298+#if 1
20299+// Simple round
20300+static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
20301+{
20302+    const unsigned int rnd = (1 << shr) >> 1;
20303+    const uint16_t * src = (const uint16_t *)_src;
20304+
20305+    for (; n != 0; --n) {
20306+        *dst++ = (*src++ + rnd) >> shr;
20307+    }
20308+}
20309+#else
20310+// Dithered variation
20311+static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
20312+{
20313+    unsigned int rnd = (1 << shr) >> 1;
20314+    const unsigned int mask = ((1 << shr) - 1);
20315+    const uint16_t * src = (const uint16_t *)_src;
20316+
20317+    for (; n != 0; --n) {
20318+        rnd = *src++ + (rnd & mask);
20319+        *dst++ = rnd >> shr;
20320+    }
20321+}
20322+#endif
20323+
20324+// Fetches a single patch - offscreen fixup not done here
20325+// w <= stride1
20326+// unclipped
20327+// _x & _w in pixels, strides in bytes
20328+void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
20329+                             const uint8_t * src,
20330+                             unsigned int stride1, unsigned int stride2,
20331+                             unsigned int _x, unsigned int y,
20332+                             unsigned int _w, unsigned int h)
20333+{
20334+    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
20335+    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
20336+    const unsigned int x1 = ((_x + _w) / 3) * 4;
20337+    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
20338+    const unsigned int mask = stride1 - 1;
20339+    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
20340+    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
20341+
20342+#if HAVE_SAND_ASM
20343+    if (_x == 0 && have_neon(av_get_cpu_flags())) {
20344+        ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
20345+        return;
20346+    }
20347+#endif
20348+
20349+    if (x0 == x1) {
20350+        // *******************
20351+        // Partial single word xfer
20352+        return;
20353+    }
20354+
20355+    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
20356+    {
20357+        unsigned int x = x0;
20358+        const uint32_t * p = (const uint32_t *)p0;
20359+        uint16_t * d = (uint16_t *)dst;
20360+
20361+        if (xskip0 != 0) {
20362+            const uint32_t p3 = *p++;
20363+
20364+            if (xskip0 == 1)
20365+                *d++ = (p3 >> 10) & 0x3ff;
20366+            *d++ = (p3 >> 20) & 0x3ff;
20367+
20368+            if (((x += 4) & mask) == 0)
20369+                p += slice_inc;
20370+        }
20371+
20372+        while (x != x1) {
20373+            const uint32_t p3 = *p++;
20374+            *d++ = p3 & 0x3ff;
20375+            *d++ = (p3 >> 10) & 0x3ff;
20376+            *d++ = (p3 >> 20) & 0x3ff;
20377+
20378+            if (((x += 4) & mask) == 0)
20379+                p += slice_inc;
20380+        }
20381+
20382+        if (xrem1 != 0) {
20383+            const uint32_t p3 = *p;
20384+
20385+            *d++ = p3 & 0x3ff;
20386+            if (xrem1 == 2)
20387+                *d++ = (p3 >> 10) & 0x3ff;
20388+        }
20389+    }
20390+}
20391+
20392+
20393+void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
20394+                             uint8_t * dst_v, const unsigned int dst_stride_v,
20395+                             const uint8_t * src,
20396+                             unsigned int stride1, unsigned int stride2,
20397+                             unsigned int _x, unsigned int y,
20398+                             unsigned int _w, unsigned int h)
20399+{
20400+    const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word
20401+    const unsigned int xskip0 = _x - (x0 >> 3) * 3;
20402+    const unsigned int x1 = ((_x + _w) / 3) * 8;
20403+    const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3;
20404+    const unsigned int mask = stride1 - 1;
20405+    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
20406+    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
20407+
20408+#if HAVE_SAND_ASM
20409+    if (_x == 0 && have_neon(av_get_cpu_flags())) {
20410+        ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v,
20411+                                       src, stride1, stride2, _x, y, _w, h);
20412+        return;
20413+    }
20414+#endif
20415+
20416+    if (x0 == x1) {
20417+        // *******************
20418+        // Partial single word xfer
20419+        return;
20420+    }
20421+
20422+    for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1)
20423+    {
20424+        unsigned int x = x0;
20425+        const uint32_t * p = (const uint32_t *)p0;
20426+        uint16_t * du = (uint16_t *)dst_u;
20427+        uint16_t * dv = (uint16_t *)dst_v;
20428+
20429+        if (xskip0 != 0) {
20430+            const uint32_t p3a = *p++;
20431+            const uint32_t p3b = *p++;
20432+
20433+            if (xskip0 == 1)
20434+            {
20435+                *du++ = (p3a >> 20) & 0x3ff;
20436+                *dv++ = (p3b >>  0) & 0x3ff;
20437+            }
20438+            *du++ = (p3b >> 10) & 0x3ff;
20439+            *dv++ = (p3b >> 20) & 0x3ff;
20440+
20441+            if (((x += 8) & mask) == 0)
20442+                p += slice_inc;
20443+        }
20444+
20445+        while (x != x1) {
20446+            const uint32_t p3a = *p++;
20447+            const uint32_t p3b = *p++;
20448+
20449+            *du++ = p3a & 0x3ff;
20450+            *dv++ = (p3a >> 10) & 0x3ff;
20451+            *du++ = (p3a >> 20) & 0x3ff;
20452+            *dv++ = p3b & 0x3ff;
20453+            *du++ = (p3b >> 10) & 0x3ff;
20454+            *dv++ = (p3b >> 20) & 0x3ff;
20455+
20456+            if (((x += 8) & mask) == 0)
20457+                p += slice_inc;
20458+        }
20459+
20460+        if (xrem1 != 0) {
20461+            const uint32_t p3a = *p++;
20462+            const uint32_t p3b = *p++;
20463+
20464+            *du++ = p3a & 0x3ff;
20465+            *dv++ = (p3a >> 10) & 0x3ff;
20466+            if (xrem1 == 2)
20467+            {
20468+                *du++ = (p3a >> 20) & 0x3ff;
20469+                *dv++ = p3b & 0x3ff;
20470+            }
20471+        }
20472+    }
20473+}
20474+
20475+// Fetches a single patch - offscreen fixup not done here
20476+// w <= stride1
20477+// single lose bottom 2 bits truncation
20478+// _x & _w in pixels, strides in bytes
20479+void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
20480+                             const uint8_t * src,
20481+                             unsigned int stride1, unsigned int stride2,
20482+                             unsigned int _x, unsigned int y,
20483+                             unsigned int _w, unsigned int h)
20484+{
20485+    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
20486+    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
20487+    const unsigned int x1 = ((_x + _w) / 3) * 4;
20488+    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
20489+    const unsigned int mask = stride1 - 1;
20490+    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
20491+    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
20492+
20493+#if HAVE_SAND_ASM
20494+    if (_x == 0) {
20495+        ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
20496+        return;
20497+    }
20498+#endif
20499+
20500+    if (x0 == x1) {
20501+        // *******************
20502+        // Partial single word xfer
20503+        return;
20504+    }
20505+
20506+    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
20507+    {
20508+        unsigned int x = x0;
20509+        const uint32_t * p = (const uint32_t *)p0;
20510+        uint8_t * d = dst;
20511+
20512+        if (xskip0 != 0) {
20513+            const uint32_t p3 = *p++;
20514+
20515+            if (xskip0 == 1)
20516+                *d++ = (p3 >> 12) & 0xff;
20517+            *d++ = (p3 >> 22) & 0xff;
20518+
20519+            if (((x += 4) & mask) == 0)
20520+                p += slice_inc;
20521+        }
20522+
20523+        while (x != x1) {
20524+            const uint32_t p3 = *p++;
20525+            *d++ = (p3 >> 2) & 0xff;
20526+            *d++ = (p3 >> 12) & 0xff;
20527+            *d++ = (p3 >> 22) & 0xff;
20528+
20529+            if (((x += 4) & mask) == 0)
20530+                p += slice_inc;
20531+        }
20532+
20533+        if (xrem1 != 0) {
20534+            const uint32_t p3 = *p;
20535+
20536+            *d++ = (p3 >> 2) & 0xff;
20537+            if (xrem1 == 2)
20538+                *d++ = (p3 >> 12) & 0xff;
20539+        }
20540+    }
20541+}
20542+
20543+
20544+
20545+// w/h in pixels
20546+void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
20547+                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
20548+                         unsigned int w, unsigned int h, const unsigned int shr)
20549+{
20550+    const unsigned int n = dst_stride1 / 2;
20551+    unsigned int j;
20552+
20553+    // This is true for our current layouts
20554+    av_assert0(dst_stride1 == src_stride1);
20555+
20556+    // As we have the same stride1 for src & dest and src is wider than dest
20557+    // then if we loop on src we can always write contiguously to dest
20558+    // We make no effort to copy an exact width - round up to nearest src stripe
20559+    // as we will always have storage in dest for that
20560+
20561+#if ARCH_ARM && HAVE_NEON
20562+    if (shr == 3 && src_stride1 == 128) {
20563+        for (j = 0; j + n < w; j += dst_stride1) {
20564+            uint8_t * d = dst + j * dst_stride2;
20565+            const uint8_t * s1 = src + j * 2 * src_stride2;
20566+            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
20567+
20568+            ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h);
20569+        }
20570+    }
20571+    else
20572+#endif
20573+    {
20574+        for (j = 0; j + n < w; j += dst_stride1) {
20575+            uint8_t * d = dst + j * dst_stride2;
20576+            const uint8_t * s1 = src + j * 2 * src_stride2;
20577+            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
20578+
20579+            for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) {
20580+                cpy16_to_8(d, s1, n, shr);
20581+                cpy16_to_8(d + n, s2, n, shr);
20582+            }
20583+        }
20584+    }
20585+
20586+    // Fix up a trailing dest half stripe
20587+    if (j < w) {
20588+        uint8_t * d = dst + j * dst_stride2;
20589+        const uint8_t * s1 = src + j * 2 * src_stride2;
20590+
20591+        for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) {
20592+            cpy16_to_8(d, s1, n, shr);
20593+        }
20594+    }
20595+}
20596+
20597+int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src)
20598+{
20599+    const int w = av_frame_cropped_width(src);
20600+    const int h = av_frame_cropped_height(src);
20601+    const int x = src->crop_left;
20602+    const int y = src->crop_top;
20603+
20604+    // We will crop as part of the conversion
20605+    dst->crop_top = 0;
20606+    dst->crop_left = 0;
20607+    dst->crop_bottom = 0;
20608+    dst->crop_right = 0;
20609+
20610+    switch (src->format){
20611+        case AV_PIX_FMT_SAND128:
20612+        case AV_PIX_FMT_RPI4_8:
20613+            switch (dst->format){
20614+                case AV_PIX_FMT_YUV420P:
20615+                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
20616+                                             src->data[0],
20617+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
20618+                                             x, y, w, h);
20619+                    av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1],
20620+                                             dst->data[2], dst->linesize[2],
20621+                                             src->data[1],
20622+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
20623+                                             x/2, y/2,  w/2, h/2);
20624+                    break;
20625+                case AV_PIX_FMT_NV12:
20626+                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
20627+                                             src->data[0],
20628+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
20629+                                             x, y, w, h);
20630+                    av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1],
20631+                                             src->data[1],
20632+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
20633+                                             x/2, y/2, w, h/2);
20634+                    break;
20635+                default:
20636+                    return -1;
20637+            }
20638+            break;
20639+        case AV_PIX_FMT_SAND64_10:
20640+            switch (dst->format){
20641+                case AV_PIX_FMT_YUV420P10:
20642+                    av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0],
20643+                                             src->data[0],
20644+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
20645+                                             x*2, y, w*2, h);
20646+                    av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1],
20647+                                             dst->data[2], dst->linesize[2],
20648+                                             src->data[1],
20649+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
20650+                                             x, y/2,  w, h/2);
20651+                    break;
20652+                default:
20653+                    return -1;
20654+            }
20655+            break;
20656+        case AV_PIX_FMT_RPI4_10:
20657+            switch (dst->format){
20658+                case AV_PIX_FMT_YUV420P10:
20659+                    av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0],
20660+                                             src->data[0],
20661+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
20662+                                             x, y, w, h);
20663+                    av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1],
20664+                                             dst->data[2], dst->linesize[2],
20665+                                             src->data[1],
20666+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
20667+                                             x/2, y/2, w/2, h/2);
20668+                    break;
20669+                case AV_PIX_FMT_NV12:
20670+                    av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0],
20671+                                             src->data[0],
20672+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
20673+                                             x, y, w, h);
20674+                    av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1],
20675+                                             src->data[1],
20676+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
20677+                                             x/2, y/2, w, h/2);
20678+                    break;
20679+                default:
20680+                    return -1;
20681+            }
20682+            break;
20683+        default:
20684+            return -1;
20685+    }
20686+
20687+    return av_frame_copy_props(dst, src);
20688+}
20689--- /dev/null
20690+++ b/libavutil/rpi_sand_fns.h
20691@@ -0,0 +1,188 @@
20692+/*
20693+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
20694+All rights reserved.
20695+
20696+Redistribution and use in source and binary forms, with or without
20697+modification, are permitted provided that the following conditions are met:
20698+    * Redistributions of source code must retain the above copyright
20699+      notice, this list of conditions and the following disclaimer.
20700+    * Redistributions in binary form must reproduce the above copyright
20701+      notice, this list of conditions and the following disclaimer in the
20702+      documentation and/or other materials provided with the distribution.
20703+    * Neither the name of the copyright holder nor the
20704+      names of its contributors may be used to endorse or promote products
20705+      derived from this software without specific prior written permission.
20706+
20707+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20708+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20709+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20710+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20711+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20712+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20713+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20714+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20715+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
20716+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20717+
20718+Authors: John Cox
20719+*/
20720+
20721+#ifndef AVUTIL_RPI_SAND_FNS
20722+#define AVUTIL_RPI_SAND_FNS
20723+
20724+#include "libavutil/frame.h"
20725+
20726+// For all these fns _x & _w are measured as coord * PW
20727+// For the C fns coords are in chroma pels (so luma / 2)
20728+// Strides are in bytes
20729+
20730+void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
20731+                             const uint8_t * src,
20732+                             unsigned int stride1, unsigned int stride2,
20733+                             unsigned int _x, unsigned int y,
20734+                             unsigned int _w, unsigned int h);
20735+void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
20736+                             const uint8_t * src,
20737+                             unsigned int stride1, unsigned int stride2,
20738+                             unsigned int _x, unsigned int y,
20739+                             unsigned int _w, unsigned int h);
20740+
20741+void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u,
20742+                             uint8_t * dst_v, const unsigned int dst_stride_v,
20743+                             const uint8_t * src,
20744+                             unsigned int stride1, unsigned int stride2,
20745+                             unsigned int _x, unsigned int y,
20746+                             unsigned int _w, unsigned int h);
20747+void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
20748+                             uint8_t * dst_v, const unsigned int dst_stride_v,
20749+                             const uint8_t * src,
20750+                             unsigned int stride1, unsigned int stride2,
20751+                             unsigned int _x, unsigned int y,
20752+                             unsigned int _w, unsigned int h);
20753+
20754+void av_rpi_planar_to_sand_c8(uint8_t * dst_c,
20755+                             unsigned int stride1, unsigned int stride2,
20756+                             const uint8_t * src_u, const unsigned int src_stride_u,
20757+                             const uint8_t * src_v, const unsigned int src_stride_v,
20758+                             unsigned int _x, unsigned int y,
20759+                             unsigned int _w, unsigned int h);
20760+void av_rpi_planar_to_sand_c16(uint8_t * dst_c,
20761+                             unsigned int stride1, unsigned int stride2,
20762+                             const uint8_t * src_u, const unsigned int src_stride_u,
20763+                             const uint8_t * src_v, const unsigned int src_stride_v,
20764+                             unsigned int _x, unsigned int y,
20765+                             unsigned int _w, unsigned int h);
20766+
20767+void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
20768+                             const uint8_t * src,
20769+                             unsigned int stride1, unsigned int stride2,
20770+                             unsigned int _x, unsigned int y,
20771+                             unsigned int _w, unsigned int h);
20772+void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
20773+                             uint8_t * dst_v, const unsigned int dst_stride_v,
20774+                             const uint8_t * src,
20775+                             unsigned int stride1, unsigned int stride2,
20776+                             unsigned int _x, unsigned int y,
20777+                             unsigned int _w, unsigned int h);
20778+
20779+void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
20780+                             const uint8_t * src,
20781+                             unsigned int stride1, unsigned int stride2,
20782+                             unsigned int _x, unsigned int y,
20783+                             unsigned int _w, unsigned int h);
20784+
20785+// w/h in pixels
20786+void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
20787+                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
20788+                         unsigned int w, unsigned int h, const unsigned int shr);
20789+
20790+
20791+// dst must contain required pixel format & allocated data buffers
20792+// Cropping on the src buffer will be honoured and dst crop will be set to zero
20793+int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src);
20794+
20795+
20796+static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame)
20797+{
20798+#ifdef RPI_ZC_SAND128_ONLY
20799+    // If we are sure we only only support 128 byte sand formats replace the
20800+    // var with a constant which should allow for better optimisation
20801+    return 128;
20802+#else
20803+    return frame->linesize[0];
20804+#endif
20805+}
20806+
20807+static inline unsigned int av_rpi_sand_frame_stride2(const AVFrame * const frame)
20808+{
20809+    return frame->linesize[3];
20810+}
20811+
20812+
20813+static inline int av_rpi_is_sand_format(const int format)
20814+{
20815+    return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10);
20816+}
20817+
20818+static inline int av_rpi_is_sand_frame(const AVFrame * const frame)
20819+{
20820+    return av_rpi_is_sand_format(frame->format);
20821+}
20822+
20823+static inline int av_rpi_is_sand8_frame(const AVFrame * const frame)
20824+{
20825+    return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8);
20826+}
20827+
20828+static inline int av_rpi_is_sand16_frame(const AVFrame * const frame)
20829+{
20830+    return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16);
20831+}
20832+
20833+static inline int av_rpi_is_sand30_frame(const AVFrame * const frame)
20834+{
20835+    return (frame->format == AV_PIX_FMT_RPI4_10);
20836+}
20837+
20838+static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame)
20839+{
20840+    return av_rpi_is_sand8_frame(frame) ? 0 : 1;
20841+}
20842+
20843+// If x is measured in bytes (not pixels) then this works for sand64_16 as
20844+// well as sand128 - but in the general case we work that out
20845+
20846+static inline unsigned int av_rpi_sand_frame_off_y(const AVFrame * const frame, const unsigned int x_y, const unsigned int y)
20847+{
20848+    const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
20849+    const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
20850+    const unsigned int x = x_y << av_rpi_sand_frame_xshl(frame);
20851+    const unsigned int x1 = x & (stride1 - 1);
20852+    const unsigned int x2 = x ^ x1;
20853+
20854+    return x1 + stride1 * y + stride2 * x2;
20855+}
20856+
20857+static inline unsigned int av_rpi_sand_frame_off_c(const AVFrame * const frame, const unsigned int x_c, const unsigned int y_c)
20858+{
20859+    const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
20860+    const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
20861+    const unsigned int x = x_c << (av_rpi_sand_frame_xshl(frame) + 1);
20862+    const unsigned int x1 = x & (stride1 - 1);
20863+    const unsigned int x2 = x ^ x1;
20864+
20865+    return x1 + stride1 * y_c + stride2 * x2;
20866+}
20867+
20868+static inline uint8_t * av_rpi_sand_frame_pos_y(const AVFrame * const frame, const unsigned int x, const unsigned int y)
20869+{
20870+    return frame->data[0] + av_rpi_sand_frame_off_y(frame, x, y);
20871+}
20872+
20873+static inline uint8_t * av_rpi_sand_frame_pos_c(const AVFrame * const frame, const unsigned int x, const unsigned int y)
20874+{
20875+    return frame->data[1] + av_rpi_sand_frame_off_c(frame, x, y);
20876+}
20877+
20878+#endif
20879+
20880--- a/libswscale/aarch64/rgb2rgb.c
20881+++ b/libswscale/aarch64/rgb2rgb.c
20882@@ -30,6 +30,12 @@
20883 void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2,
20884                               uint8_t *dest, int width, int height,
20885                               int src1Stride, int src2Stride, int dstStride);
20886+void ff_bgr24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
20887+                   uint8_t *vdst, int width, int height, int lumStride,
20888+                   int chromStride, int srcStride, int32_t *rgb2yuv);
20889+void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
20890+                   uint8_t *vdst, int width, int height, int lumStride,
20891+                   int chromStride, int srcStride, int32_t *rgb2yuv);
20892
20893 av_cold void rgb2rgb_init_aarch64(void)
20894 {
20895@@ -37,5 +43,7 @@ av_cold void rgb2rgb_init_aarch64(void)
20896
20897     if (have_neon(cpu_flags)) {
20898         interleaveBytes = ff_interleave_bytes_neon;
20899+        ff_rgb24toyv12 = ff_rgb24toyv12_aarch64;
20900+        ff_bgr24toyv12 = ff_bgr24toyv12_aarch64;
20901     }
20902 }
20903--- a/libswscale/aarch64/rgb2rgb_neon.S
20904+++ b/libswscale/aarch64/rgb2rgb_neon.S
20905@@ -77,3 +77,359 @@ function ff_interleave_bytes_neon, expor
20906 0:
20907         ret
20908 endfunc
20909+
20910+// Expand rgb2 into r0+r1/g0+g1/b0+b1
20911+.macro XRGB3Y r0, g0, b0, r1, g1, b1, r2, g2, b2
20912+        uxtl            \r0\().8h, \r2\().8b
20913+        uxtl            \g0\().8h, \g2\().8b
20914+        uxtl            \b0\().8h, \b2\().8b
20915+
20916+        uxtl2           \r1\().8h, \r2\().16b
20917+        uxtl2           \g1\().8h, \g2\().16b
20918+        uxtl2           \b1\().8h, \b2\().16b
20919+.endm
20920+
20921+// Expand rgb2 into r0+r1/g0+g1/b0+b1
20922+// and pick every other el to put back into rgb2 for chroma
20923+.macro XRGB3YC r0, g0, b0, r1, g1, b1, r2, g2, b2
20924+        XRGB3Y          \r0, \g0, \b0, \r1, \g1, \b1, \r2, \g2, \b2
20925+
20926+        bic             \r2\().8h, #0xff, LSL #8
20927+        bic             \g2\().8h, #0xff, LSL #8
20928+        bic             \b2\().8h, #0xff, LSL #8
20929+.endm
20930+
20931+.macro SMLAL3 d0, d1, s0, s1, s2, c0, c1, c2
20932+        smull           \d0\().4s, \s0\().4h, \c0
20933+        smlal           \d0\().4s, \s1\().4h, \c1
20934+        smlal           \d0\().4s, \s2\().4h, \c2
20935+        smull2          \d1\().4s, \s0\().8h, \c0
20936+        smlal2          \d1\().4s, \s1\().8h, \c1
20937+        smlal2          \d1\().4s, \s2\().8h, \c2
20938+.endm
20939+
20940+// d0 may be s0
20941+// s0, s2 corrupted
20942+.macro SHRN_Y d0, s0, s1, s2, s3, k128h
20943+        shrn            \s0\().4h, \s0\().4s, #12
20944+        shrn2           \s0\().8h, \s1\().4s, #12
20945+        add             \s0\().8h, \s0\().8h, \k128h\().8h     // +128 (>> 3 = 16)
20946+        sqrshrun        \d0\().8b, \s0\().8h, #3
20947+        shrn            \s2\().4h, \s2\().4s, #12
20948+        shrn2           \s2\().8h, \s3\().4s, #12
20949+        add             \s2\().8h, \s2\().8h, \k128h\().8h
20950+        sqrshrun2       \d0\().16b, v28.8h, #3
20951+.endm
20952+
20953+.macro SHRN_C d0, s0, s1, k128b
20954+        shrn            \s0\().4h, \s0\().4s, #14
20955+        shrn2           \s0\().8h, \s1\().4s, #14
20956+        sqrshrn         \s0\().8b, \s0\().8h, #1
20957+        add             \d0\().8b, \s0\().8b, \k128b\().8b     // +128
20958+.endm
20959+
20960+.macro STB2V s0, n, a
20961+        st1             {\s0\().b}[(\n+0)], [\a], #1
20962+        st1             {\s0\().b}[(\n+1)], [\a], #1
20963+.endm
20964+
20965+.macro STB4V s0, n, a
20966+        STB2V           \s0, (\n+0), \a
20967+        STB2V           \s0, (\n+2), \a
20968+.endm
20969+
20970+
20971+// void ff_rgb24toyv12_aarch64(
20972+//              const uint8_t *src,             // x0
20973+//              uint8_t *ydst,                  // x1
20974+//              uint8_t *udst,                  // x2
20975+//              uint8_t *vdst,                  // x3
20976+//              int width,                      // w4
20977+//              int height,                     // w5
20978+//              int lumStride,                  // w6
20979+//              int chromStride,                // w7
20980+//              int srcStr,                     // [sp, #0]
20981+//              int32_t *rgb2yuv);              // [sp, #8]
20982+
20983+function ff_rgb24toyv12_aarch64, export=1
20984+        ldr             x15, [sp, #8]
20985+        ld3             {v3.s, v4.s, v5.s}[0], [x15], #12
20986+        ld3             {v3.s, v4.s, v5.s}[1], [x15], #12
20987+        ld3             {v3.s, v4.s, v5.s}[2], [x15]
20988+        mov             v6.16b, v3.16b
20989+        mov             v3.16b, v5.16b
20990+        mov             v5.16b, v6.16b
20991+        b               99f
20992+endfunc
20993+
20994+// void ff_bgr24toyv12_aarch64(
20995+//              const uint8_t *src,             // x0
20996+//              uint8_t *ydst,                  // x1
20997+//              uint8_t *udst,                  // x2
20998+//              uint8_t *vdst,                  // x3
20999+//              int width,                      // w4
21000+//              int height,                     // w5
21001+//              int lumStride,                  // w6
21002+//              int chromStride,                // w7
21003+//              int srcStr,                     // [sp, #0]
21004+//              int32_t *rgb2yuv);              // [sp, #8] (including Mac)
21005+
21006+// regs
21007+// v0-2         Src bytes - reused as chroma src
21008+// v3-5         Coeffs (packed very inefficiently - could be squashed)
21009+// v6           128b
21010+// v7           128h
21011+// v8-15        Reserved
21012+// v16-18       Lo Src expanded as H
21013+// v19          -
21014+// v20-22       Hi Src expanded as H
21015+// v23          -
21016+// v24          U out
21017+// v25          U tmp
21018+// v26          Y out
21019+// v27-29       Y tmp
21020+// v30          V out
21021+// v31          V tmp
21022+
21023+function ff_bgr24toyv12_aarch64, export=1
21024+        ldr             x15, [sp, #8]
21025+        ld3             {v3.s, v4.s, v5.s}[0], [x15], #12
21026+        ld3             {v3.s, v4.s, v5.s}[1], [x15], #12
21027+        ld3             {v3.s, v4.s, v5.s}[2], [x15]
21028+
21029+99:
21030+        ldr             w14, [sp, #0]
21031+        movi            v7.8b, #128
21032+        uxtl            v6.8h, v7.8b
21033+        // Ensure if nothing to do then we do nothing
21034+        cmp             w4, #0
21035+        b.le            90f
21036+        cmp             w5, #0
21037+        b.le            90f
21038+        // If w % 16 != 0 then -16 so we do main loop 1 fewer times with
21039+        // the remainder done in the tail
21040+        tst             w4, #15
21041+        b.eq            1f
21042+        sub             w4, w4, #16
21043+1:
21044+
21045+// -------------------- Even line body - YUV
21046+11:
21047+        subs            w9,  w4, #0
21048+        mov             x10, x0
21049+        mov             x11, x1
21050+        mov             x12, x2
21051+        mov             x13, x3
21052+        b.lt            12f
21053+
21054+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
21055+        subs            w9, w9, #16
21056+        b.le            13f
21057+
21058+10:
21059+        XRGB3YC         v16, v17, v18,  v20, v21, v22,  v0, v1, v2
21060+
21061+        // Testing shows it is faster to stack the smull/smlal ops together
21062+        // rather than interleave them between channels and indeed even the
21063+        // shift/add sections seem happier not interleaved
21064+
21065+        // Y0
21066+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
21067+        // Y1
21068+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
21069+        SHRN_Y          v26, v26, v27, v28, v29, v6
21070+
21071+        // U
21072+        // Vector subscript *2 as we loaded into S but are only using H
21073+        SMLAL3          v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2]
21074+
21075+        // V
21076+        SMLAL3          v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4]
21077+
21078+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
21079+
21080+        SHRN_C          v24, v24, v25, v7
21081+        SHRN_C          v30, v30, v31, v7
21082+
21083+        subs            w9, w9, #16
21084+
21085+        st1             {v26.16b}, [x11], #16
21086+        st1             {v24.8b}, [x12], #8
21087+        st1             {v30.8b}, [x13], #8
21088+
21089+        b.gt            10b
21090+
21091+// -------------------- Even line tail - YUV
21092+// If width % 16 == 0 then simply runs once with preloaded RGB
21093+// If other then deals with preload & then does remaining tail
21094+
21095+13:
21096+        // Body is simple copy of main loop body minus preload
21097+
21098+        XRGB3YC         v16, v17, v18,  v20, v21, v22,  v0, v1, v2
21099+        // Y0
21100+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
21101+        // Y1
21102+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
21103+        SHRN_Y          v26, v26, v27, v28, v29, v6
21104+        // U
21105+        SMLAL3          v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2]
21106+        // V
21107+        SMLAL3          v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4]
21108+
21109+        cmp             w9, #-16
21110+
21111+        SHRN_C          v24, v24, v25, v7
21112+        SHRN_C          v30, v30, v31, v7
21113+
21114+        // Here:
21115+        // w9 == 0      width % 16 == 0, tail done
21116+        // w9 > -16     1st tail done (16 pels), remainder still to go
21117+        // w9 == -16    shouldn't happen
21118+        // w9 > -32     2nd tail done
21119+        // w9 <= -32    shouldn't happen
21120+
21121+        b.lt            2f
21122+        st1             {v26.16b}, [x11], #16
21123+        st1             {v24.8b}, [x12], #8
21124+        st1             {v30.8b}, [x13], #8
21125+        cbz             w9, 3f
21126+
21127+12:
21128+        sub             w9, w9, #16
21129+
21130+        tbz             w9, #3, 1f
21131+        ld3             {v0.8b, v1.8b, v2.8b},  [x10], #24
21132+1:      tbz             w9, #2, 1f
21133+        ld3             {v0.b, v1.b, v2.b}[8],  [x10], #3
21134+        ld3             {v0.b, v1.b, v2.b}[9],  [x10], #3
21135+        ld3             {v0.b, v1.b, v2.b}[10], [x10], #3
21136+        ld3             {v0.b, v1.b, v2.b}[11], [x10], #3
21137+1:      tbz             w9, #1, 1f
21138+        ld3             {v0.b, v1.b, v2.b}[12], [x10], #3
21139+        ld3             {v0.b, v1.b, v2.b}[13], [x10], #3
21140+1:      tbz             w9, #0, 13b
21141+        ld3             {v0.b, v1.b, v2.b}[14], [x10], #3
21142+        b               13b
21143+
21144+2:
21145+        tbz             w9, #3, 1f
21146+        st1             {v26.8b},    [x11], #8
21147+        STB4V           v24, 0, x12
21148+        STB4V           v30, 0, x13
21149+1:      tbz             w9, #2, 1f
21150+        STB4V           v26  8, x11
21151+        STB2V           v24, 4, x12
21152+        STB2V           v30, 4, x13
21153+1:      tbz             w9, #1, 1f
21154+        STB2V           v26, 12, x11
21155+        st1             {v24.b}[6],  [x12], #1
21156+        st1             {v30.b}[6],  [x13], #1
21157+1:      tbz             w9, #0, 1f
21158+        st1             {v26.b}[14], [x11]
21159+        st1             {v24.b}[7],  [x12]
21160+        st1             {v30.b}[7],  [x13]
21161+1:
21162+3:
21163+
21164+// -------------------- Odd line body - Y only
21165+
21166+        subs            w5, w5, #1
21167+        b.eq            90f
21168+
21169+        subs            w9,  w4, #0
21170+        add             x0, x0, w14, sxtw
21171+        add             x1, x1, w6, sxtw
21172+        mov             x10, x0
21173+        mov             x11, x1
21174+        b.lt            12f
21175+
21176+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
21177+        subs            w9, w9, #16
21178+        b.le            13f
21179+
21180+10:
21181+        XRGB3Y          v16, v17, v18,  v20, v21, v22,  v0, v1, v2
21182+        // Y0
21183+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
21184+        // Y1
21185+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
21186+
21187+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
21188+
21189+        SHRN_Y          v26, v26, v27, v28, v29, v6
21190+
21191+        subs            w9, w9, #16
21192+
21193+        st1             {v26.16b}, [x11], #16
21194+
21195+        b.gt            10b
21196+
21197+// -------------------- Odd line tail - Y
21198+// If width % 16 == 0 then simply runs once with preloaded RGB
21199+// If other then deals with preload & then does remaining tail
21200+
21201+13:
21202+        // Body is simple copy of main loop body minus preload
21203+
21204+        XRGB3Y          v16, v17, v18,  v20, v21, v22,  v0, v1, v2
21205+        // Y0
21206+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
21207+        // Y1
21208+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
21209+
21210+        cmp             w9, #-16
21211+
21212+        SHRN_Y          v26, v26, v27, v28, v29, v6
21213+
21214+        // Here:
21215+        // w9 == 0      width % 16 == 0, tail done
21216+        // w9 > -16     1st tail done (16 pels), remainder still to go
21217+        // w9 == -16    shouldn't happen
21218+        // w9 > -32     2nd tail done
21219+        // w9 <= -32    shouldn't happen
21220+
21221+        b.lt            2f
21222+        st1             {v26.16b}, [x11], #16
21223+        cbz             w9, 3f
21224+
21225+12:
21226+        sub             w9, w9, #16
21227+
21228+        tbz             w9, #3, 1f
21229+        ld3             {v0.8b, v1.8b, v2.8b},  [x10], #24
21230+1:      tbz             w9, #2, 1f
21231+        ld3             {v0.b, v1.b, v2.b}[8],  [x10], #3
21232+        ld3             {v0.b, v1.b, v2.b}[9],  [x10], #3
21233+        ld3             {v0.b, v1.b, v2.b}[10], [x10], #3
21234+        ld3             {v0.b, v1.b, v2.b}[11], [x10], #3
21235+1:      tbz             w9, #1, 1f
21236+        ld3             {v0.b, v1.b, v2.b}[12], [x10], #3
21237+        ld3             {v0.b, v1.b, v2.b}[13], [x10], #3
21238+1:      tbz             w9, #0, 13b
21239+        ld3             {v0.b, v1.b, v2.b}[14], [x10], #3
21240+        b               13b
21241+
21242+2:
21243+        tbz             w9, #3, 1f
21244+        st1             {v26.8b},    [x11], #8
21245+1:      tbz             w9, #2, 1f
21246+        STB4V           v26, 8,  x11
21247+1:      tbz             w9, #1, 1f
21248+        STB2V           v26, 12, x11
21249+1:      tbz             w9, #0, 1f
21250+        st1             {v26.b}[14], [x11]
21251+1:
21252+3:
21253+
21254+// ------------------- Loop to start
21255+
21256+        add             x0, x0, w14, sxtw
21257+        add             x1, x1, w6, sxtw
21258+        add             x2, x2, w7, sxtw
21259+        add             x3, x3, w7, sxtw
21260+        subs            w5, w5, #1
21261+        b.gt            11b
21262+90:
21263+        ret
21264+endfunc
21265--- a/libswscale/rgb2rgb.c
21266+++ b/libswscale/rgb2rgb.c
21267@@ -83,6 +83,31 @@ void (*ff_rgb24toyv12)(const uint8_t *sr
21268                        int width, int height,
21269                        int lumStride, int chromStride, int srcStride,
21270                        int32_t *rgb2yuv);
21271+void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
21272+                       uint8_t *udst, uint8_t *vdst,
21273+                       int width, int height,
21274+                       int lumStride, int chromStride, int srcStride,
21275+                       int32_t *rgb2yuv);
21276+void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst,
21277+					  uint8_t *udst, uint8_t *vdst,
21278+					  int width, int height,
21279+					  int lumStride, int chromStride, int srcStride,
21280+					  int32_t *rgb2yuv);
21281+void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst,
21282+					  uint8_t *udst, uint8_t *vdst,
21283+					  int width, int height,
21284+					  int lumStride, int chromStride, int srcStride,
21285+					  int32_t *rgb2yuv);
21286+void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst,
21287+					  uint8_t *udst, uint8_t *vdst,
21288+					  int width, int height,
21289+					  int lumStride, int chromStride, int srcStride,
21290+					  int32_t *rgb2yuv);
21291+void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst,
21292+					  uint8_t *udst, uint8_t *vdst,
21293+					  int width, int height,
21294+					  int lumStride, int chromStride, int srcStride,
21295+					  int32_t *rgb2yuv);
21296 void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
21297                  int srcStride, int dstStride);
21298 void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
21299--- a/libswscale/rgb2rgb.h
21300+++ b/libswscale/rgb2rgb.h
21301@@ -79,6 +79,9 @@ void    rgb12to15(const uint8_t *src, ui
21302 void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
21303                       uint8_t *vdst, int width, int height, int lumStride,
21304                       int chromStride, int srcStride, int32_t *rgb2yuv);
21305+void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
21306+                      uint8_t *vdst, int width, int height, int lumStride,
21307+                      int chromStride, int srcStride, int32_t *rgb2yuv);
21308
21309 /**
21310  * Height should be a multiple of 2 and width should be a multiple of 16.
21311@@ -128,6 +131,26 @@ extern void (*ff_rgb24toyv12)(const uint
21312                               int width, int height,
21313                               int lumStride, int chromStride, int srcStride,
21314                               int32_t *rgb2yuv);
21315+extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
21316+                              int width, int height,
21317+                              int lumStride, int chromStride, int srcStride,
21318+                              int32_t *rgb2yuv);
21319+extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
21320+                             int width, int height,
21321+                             int lumStride, int chromStride, int srcStride,
21322+                             int32_t *rgb2yuv);
21323+extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
21324+                             int width, int height,
21325+                             int lumStride, int chromStride, int srcStride,
21326+                             int32_t *rgb2yuv);
21327+extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
21328+                             int width, int height,
21329+                             int lumStride, int chromStride, int srcStride,
21330+                             int32_t *rgb2yuv);
21331+extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
21332+                             int width, int height,
21333+                             int lumStride, int chromStride, int srcStride,
21334+                             int32_t *rgb2yuv);
21335 extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
21336                         int srcStride, int dstStride);
21337
21338--- a/libswscale/rgb2rgb_template.c
21339+++ b/libswscale/rgb2rgb_template.c
21340@@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const ui
21341  * others are ignored in the C version.
21342  * FIXME: Write HQ version.
21343  */
21344-void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
21345+static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
21346                    uint8_t *vdst, int width, int height, int lumStride,
21347-                   int chromStride, int srcStride, int32_t *rgb2yuv)
21348+                   int chromStride, int srcStride, int32_t *rgb2yuv,
21349+                   const uint8_t x[9])
21350 {
21351-    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
21352-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
21353-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
21354+    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
21355+    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
21356+    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
21357     int y;
21358     const int chromWidth = width >> 1;
21359
21360@@ -678,6 +679,19 @@ void ff_rgb24toyv12_c(const uint8_t *src
21361             Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
21362             ydst[2 * i + 1] = Y;
21363         }
21364+        if ((width & 1) != 0) {
21365+            unsigned int b = src[6 * i + 0];
21366+            unsigned int g = src[6 * i + 1];
21367+            unsigned int r = src[6 * i + 2];
21368+
21369+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
21370+            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
21371+            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
21372+
21373+            udst[i]     = U;
21374+            vdst[i]     = V;
21375+            ydst[2 * i] = Y;
21376+        }
21377         ydst += lumStride;
21378         src  += srcStride;
21379
21380@@ -700,6 +714,15 @@ void ff_rgb24toyv12_c(const uint8_t *src
21381             Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
21382             ydst[2 * i + 1] = Y;
21383         }
21384+        if ((width & 1) != 0) {
21385+            unsigned int b = src[6 * i + 0];
21386+            unsigned int g = src[6 * i + 1];
21387+            unsigned int r = src[6 * i + 2];
21388+
21389+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
21390+
21391+            ydst[2 * i] = Y;
21392+        }
21393         udst += chromStride;
21394         vdst += chromStride;
21395         ydst += lumStride;
21396@@ -707,6 +730,147 @@ void ff_rgb24toyv12_c(const uint8_t *src
21397     }
21398 }
21399
21400+static const uint8_t x_rgb[9] = {
21401+    RY_IDX, GY_IDX, BY_IDX,
21402+    RU_IDX, GU_IDX, BU_IDX,
21403+    RV_IDX, GV_IDX, BV_IDX,
21404+};
21405+
21406+static const uint8_t x_bgr[9] = {
21407+     BY_IDX, GY_IDX, RY_IDX,
21408+     BU_IDX, GU_IDX, RU_IDX,
21409+     BV_IDX, GV_IDX, RV_IDX,
21410+};
21411+
21412+void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
21413+                   uint8_t *vdst, int width, int height, int lumStride,
21414+                   int chromStride, int srcStride, int32_t *rgb2yuv)
21415+{
21416+    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
21417+}
21418+
21419+void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
21420+                   uint8_t *vdst, int width, int height, int lumStride,
21421+                   int chromStride, int srcStride, int32_t *rgb2yuv)
21422+{
21423+    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
21424+}
21425+
21426+static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
21427+                   uint8_t *vdst, int width, int height, int lumStride,
21428+                   int chromStride, int srcStride, int32_t *rgb2yuv,
21429+                   const uint8_t x[9])
21430+{
21431+    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
21432+    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
21433+    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
21434+    int y;
21435+    const int chromWidth = width >> 1;
21436+
21437+    for (y = 0; y < height; y += 2) {
21438+        int i;
21439+        for (i = 0; i < chromWidth; i++) {
21440+            unsigned int b = src[8 * i + 2];
21441+            unsigned int g = src[8 * i + 1];
21442+            unsigned int r = src[8 * i + 0];
21443+
21444+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
21445+            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
21446+            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
21447+
21448+            udst[i]     = U;
21449+            vdst[i]     = V;
21450+            ydst[2 * i] = Y;
21451+
21452+            b = src[8 * i + 6];
21453+            g = src[8 * i + 5];
21454+            r = src[8 * i + 4];
21455+
21456+            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
21457+            ydst[2 * i + 1] = Y;
21458+        }
21459+        if ((width & 1) != 0) {
21460+            unsigned int b = src[8 * i + 2];
21461+            unsigned int g = src[8 * i + 1];
21462+            unsigned int r = src[8 * i + 0];
21463+
21464+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
21465+            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
21466+            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
21467+
21468+            udst[i]     = U;
21469+            vdst[i]     = V;
21470+            ydst[2 * i] = Y;
21471+        }
21472+        ydst += lumStride;
21473+        src  += srcStride;
21474+
21475+        if (y+1 == height)
21476+            break;
21477+
21478+        for (i = 0; i < chromWidth; i++) {
21479+            unsigned int b = src[8 * i + 2];
21480+            unsigned int g = src[8 * i + 1];
21481+            unsigned int r = src[8 * i + 0];
21482+
21483+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
21484+
21485+            ydst[2 * i] = Y;
21486+
21487+            b = src[8 * i + 6];
21488+            g = src[8 * i + 5];
21489+            r = src[8 * i + 4];
21490+
21491+            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
21492+            ydst[2 * i + 1] = Y;
21493+        }
21494+        if ((width & 1) != 0) {
21495+            unsigned int b = src[8 * i + 2];
21496+            unsigned int g = src[8 * i + 1];
21497+            unsigned int r = src[8 * i + 0];
21498+
21499+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
21500+
21501+            ydst[2 * i] = Y;
21502+        }
21503+        udst += chromStride;
21504+        vdst += chromStride;
21505+        ydst += lumStride;
21506+        src  += srcStride;
21507+    }
21508+}
21509+
21510+static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
21511+                   uint8_t *vdst, int width, int height, int lumStride,
21512+                   int chromStride, int srcStride, int32_t *rgb2yuv)
21513+{
21514+    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
21515+}
21516+
21517+static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
21518+                   uint8_t *vdst, int width, int height, int lumStride,
21519+                   int chromStride, int srcStride, int32_t *rgb2yuv)
21520+{
21521+    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
21522+}
21523+
21524+// As the general code does no SIMD-like ops simply adding 1 to the src address
21525+// will fix the ignored alpha position
21526+static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
21527+                   uint8_t *vdst, int width, int height, int lumStride,
21528+                   int chromStride, int srcStride, int32_t *rgb2yuv)
21529+{
21530+    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
21531+}
21532+
21533+static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
21534+                   uint8_t *vdst, int width, int height, int lumStride,
21535+                   int chromStride, int srcStride, int32_t *rgb2yuv)
21536+{
21537+    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
21538+}
21539+
21540+
21541 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
21542                               uint8_t *dest, int width, int height,
21543                               int src1Stride, int src2Stride, int dstStride)
21544@@ -980,6 +1144,11 @@ static av_cold void rgb2rgb_init_c(void)
21545     yuy2toyv12         = yuy2toyv12_c;
21546     planar2x           = planar2x_c;
21547     ff_rgb24toyv12     = ff_rgb24toyv12_c;
21548+    ff_bgr24toyv12     = ff_bgr24toyv12_c;
21549+    ff_rgbxtoyv12      = ff_rgbxtoyv12_c;
21550+    ff_bgrxtoyv12      = ff_bgrxtoyv12_c;
21551+    ff_xrgbtoyv12      = ff_xrgbtoyv12_c;
21552+    ff_xbgrtoyv12      = ff_xbgrtoyv12_c;
21553     interleaveBytes    = interleaveBytes_c;
21554     deinterleaveBytes  = deinterleaveBytes_c;
21555     vu9_to_vu12        = vu9_to_vu12_c;
21556--- a/libswscale/swscale_unscaled.c
21557+++ b/libswscale/swscale_unscaled.c
21558@@ -1654,6 +1654,91 @@ static int bgr24ToYv12Wrapper(SwsContext
21559     return srcSliceH;
21560 }
21561
21562+static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
21563+                              int srcStride[], int srcSliceY, int srcSliceH,
21564+                              uint8_t *dst[], int dstStride[])
21565+{
21566+    ff_bgr24toyv12(
21567+        src[0],
21568+        dst[0] +  srcSliceY       * dstStride[0],
21569+        dst[1] + (srcSliceY >> 1) * dstStride[1],
21570+        dst[2] + (srcSliceY >> 1) * dstStride[2],
21571+        c->srcW, srcSliceH,
21572+        dstStride[0], dstStride[1], srcStride[0],
21573+        c->input_rgb2yuv_table);
21574+    if (dst[3])
21575+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
21576+    return srcSliceH;
21577+}
21578+
21579+static int bgrxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
21580+                             int srcStride[], int srcSliceY, int srcSliceH,
21581+                             uint8_t *dst[], int dstStride[])
21582+{
21583+    ff_bgrxtoyv12(
21584+        src[0],
21585+        dst[0] +  srcSliceY       * dstStride[0],
21586+        dst[1] + (srcSliceY >> 1) * dstStride[1],
21587+        dst[2] + (srcSliceY >> 1) * dstStride[2],
21588+        c->srcW, srcSliceH,
21589+        dstStride[0], dstStride[1], srcStride[0],
21590+        c->input_rgb2yuv_table);
21591+    if (dst[3])
21592+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
21593+    return srcSliceH;
21594+}
21595+
21596+static int rgbxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
21597+                             int srcStride[], int srcSliceY, int srcSliceH,
21598+                             uint8_t *dst[], int dstStride[])
21599+{
21600+    ff_rgbxtoyv12(
21601+        src[0],
21602+        dst[0] +  srcSliceY       * dstStride[0],
21603+        dst[1] + (srcSliceY >> 1) * dstStride[1],
21604+        dst[2] + (srcSliceY >> 1) * dstStride[2],
21605+        c->srcW, srcSliceH,
21606+        dstStride[0], dstStride[1], srcStride[0],
21607+        c->input_rgb2yuv_table);
21608+    if (dst[3])
21609+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
21610+    return srcSliceH;
21611+}
21612+
21613+static int xbgrToYv12Wrapper(SwsContext *c, const uint8_t *src[],
21614+                             int srcStride[], int srcSliceY, int srcSliceH,
21615+                             uint8_t *dst[], int dstStride[])
21616+{
21617+    ff_xbgrtoyv12(
21618+        src[0],
21619+        dst[0] +  srcSliceY       * dstStride[0],
21620+        dst[1] + (srcSliceY >> 1) * dstStride[1],
21621+        dst[2] + (srcSliceY >> 1) * dstStride[2],
21622+        c->srcW, srcSliceH,
21623+        dstStride[0], dstStride[1], srcStride[0],
21624+        c->input_rgb2yuv_table);
21625+    if (dst[3])
21626+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
21627+    return srcSliceH;
21628+}
21629+
21630+static int xrgbToYv12Wrapper(SwsContext *c, const uint8_t *src[],
21631+                             int srcStride[], int srcSliceY, int srcSliceH,
21632+                             uint8_t *dst[], int dstStride[])
21633+{
21634+    ff_xrgbtoyv12(
21635+        src[0],
21636+        dst[0] +  srcSliceY       * dstStride[0],
21637+        dst[1] + (srcSliceY >> 1) * dstStride[1],
21638+        dst[2] + (srcSliceY >> 1) * dstStride[2],
21639+        c->srcW, srcSliceH,
21640+        dstStride[0], dstStride[1], srcStride[0],
21641+        c->input_rgb2yuv_table);
21642+    if (dst[3])
21643+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
21644+    return srcSliceH;
21645+}
21646+
21647 static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
21648                              int srcStride[], int srcSliceY, int srcSliceH,
21649                              uint8_t *dst[], int dstStride[])
21650@@ -1977,7 +2062,6 @@ void ff_get_unscaled_swscale(SwsContext
21651     const enum AVPixelFormat dstFormat = c->dstFormat;
21652     const int flags = c->flags;
21653     const int dstH = c->dstH;
21654-    const int dstW = c->dstW;
21655     int needsDither;
21656
21657     needsDither = isAnyRGB(dstFormat) &&
21658@@ -2035,8 +2119,34 @@ void ff_get_unscaled_swscale(SwsContext
21659     /* bgr24toYV12 */
21660     if (srcFormat == AV_PIX_FMT_BGR24 &&
21661         (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
21662-        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
21663+        !(flags & SWS_ACCURATE_RND))
21664         c->convert_unscaled = bgr24ToYv12Wrapper;
21665+    /* rgb24toYV12 */
21666+    if (srcFormat == AV_PIX_FMT_RGB24 &&
21667+        (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
21668+        !(flags & SWS_ACCURATE_RND))
21669+        c->convert_unscaled = rgb24ToYv12Wrapper;
21670+
21671+    /* bgrxtoYV12 */
21672+    if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) ||
21673+         (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
21674+        !(flags & SWS_ACCURATE_RND))
21675+        c->convert_unscaled = bgrxToYv12Wrapper;
21676+    /* rgbx24toYV12 */
21677+    if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) ||
21678+         (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
21679+        !(flags & SWS_ACCURATE_RND))
21680+        c->convert_unscaled = rgbxToYv12Wrapper;
21681+    /* xbgrtoYV12 */
21682+    if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) ||
21683+         (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
21684+        !(flags & SWS_ACCURATE_RND))
21685+        c->convert_unscaled = xbgrToYv12Wrapper;
21686+    /* xrgb24toYV12 */
21687+    if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) ||
21688+         (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
21689+        !(flags & SWS_ACCURATE_RND))
21690+        c->convert_unscaled = xrgbToYv12Wrapper;
21691
21692     /* RGB/BGR -> RGB/BGR (no dither needed forms) */
21693     if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c)
21694--- a/libswscale/tests/swscale.c
21695+++ b/libswscale/tests/swscale.c
21696@@ -23,6 +23,7 @@
21697 #include <string.h>
21698 #include <inttypes.h>
21699 #include <stdarg.h>
21700+#include <time.h>
21701
21702 #undef HAVE_AV_CONFIG_H
21703 #include "libavutil/cpu.h"
21704@@ -78,6 +79,15 @@ struct Results {
21705     uint32_t crc;
21706 };
21707
21708+static int time_rep = 0;
21709+
21710+static uint64_t utime(void)
21711+{
21712+    struct timespec ts;
21713+    clock_gettime(CLOCK_MONOTONIC, &ts);
21714+    return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000;
21715+}
21716+
21717 // test by ref -> src -> dst -> out & compare out against ref
21718 // ref & out are YV12
21719 static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
21720@@ -174,7 +184,7 @@ static int doTest(const uint8_t * const
21721         goto end;
21722     }
21723
21724-    printf(" %s %dx%d -> %s %3dx%3d flags=%2d",
21725+    printf(" %s %4dx%4d -> %s %4dx%4d flags=%2d",
21726            desc_src->name, srcW, srcH,
21727            desc_dst->name, dstW, dstH,
21728            flags);
21729@@ -182,6 +192,17 @@ static int doTest(const uint8_t * const
21730
21731     sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
21732
21733+    if (time_rep != 0)
21734+    {
21735+        const uint64_t now = utime();
21736+        uint64_t done;
21737+        for (i = 1; i != time_rep; ++i) {
21738+            sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
21739+        }
21740+        done = utime();
21741+        printf(" T=%7"PRId64"us ", done-now);
21742+    }
21743+
21744     for (i = 0; i < 4 && dstStride[i]; i++)
21745         crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i],
21746                      dstStride[i] * dstH);
21747@@ -355,56 +376,78 @@ static int fileTest(const uint8_t * cons
21748     return 0;
21749 }
21750
21751-#define W 96
21752-#define H 96
21753-
21754 int main(int argc, char **argv)
21755 {
21756+    unsigned int W = 96;
21757+    unsigned int H = 96;
21758+    unsigned int W2;
21759+    unsigned int H2;
21760+    unsigned int S;
21761     enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE;
21762     enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE;
21763-    uint8_t *rgb_data   = av_malloc(W * H * 4);
21764-    const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL };
21765-    int rgb_stride[4]   = { 4 * W, 0, 0, 0 };
21766-    uint8_t *data       = av_malloc(4 * W * H);
21767-    const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 };
21768-    int stride[4]       = { W, W, W, W };
21769     int x, y;
21770     struct SwsContext *sws;
21771     AVLFG rand;
21772     int res = -1;
21773     int i;
21774     FILE *fp = NULL;
21775-
21776-    if (!rgb_data || !data)
21777-        return -1;
21778+    uint8_t *rgb_data;
21779+    uint8_t * rgb_src[4] = { NULL };
21780+    int rgb_stride[4]   = { 0 };
21781+    uint8_t *data;
21782+    uint8_t * src[4] = { NULL };
21783+    int stride[4]       = { 0 };
21784
21785     for (i = 1; i < argc; i += 2) {
21786+        const char * const arg2 = argv[i+1];
21787+
21788         if (argv[i][0] != '-' || i + 1 == argc)
21789             goto bad_option;
21790         if (!strcmp(argv[i], "-ref")) {
21791-            fp = fopen(argv[i + 1], "r");
21792+            fp = fopen(arg2, "r");
21793             if (!fp) {
21794-                fprintf(stderr, "could not open '%s'\n", argv[i + 1]);
21795+                fprintf(stderr, "could not open '%s'\n", arg2);
21796                 goto error;
21797             }
21798         } else if (!strcmp(argv[i], "-cpuflags")) {
21799             unsigned flags = av_get_cpu_flags();
21800-            int ret = av_parse_cpu_caps(&flags, argv[i + 1]);
21801+            int ret = av_parse_cpu_caps(&flags, arg2);
21802             if (ret < 0) {
21803-                fprintf(stderr, "invalid cpu flags %s\n", argv[i + 1]);
21804+                fprintf(stderr, "invalid cpu flags %s\n", arg2);
21805                 return ret;
21806             }
21807             av_force_cpu_flags(flags);
21808         } else if (!strcmp(argv[i], "-src")) {
21809-            srcFormat = av_get_pix_fmt(argv[i + 1]);
21810+            srcFormat = av_get_pix_fmt(arg2);
21811             if (srcFormat == AV_PIX_FMT_NONE) {
21812-                fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
21813+                fprintf(stderr, "invalid pixel format %s\n", arg2);
21814                 return -1;
21815             }
21816         } else if (!strcmp(argv[i], "-dst")) {
21817-            dstFormat = av_get_pix_fmt(argv[i + 1]);
21818+            dstFormat = av_get_pix_fmt(arg2);
21819             if (dstFormat == AV_PIX_FMT_NONE) {
21820-                fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
21821+                fprintf(stderr, "invalid pixel format %s\n", arg2);
21822+                return -1;
21823+            }
21824+        } else if (!strcmp(argv[i], "-w")) {
21825+            char * p = NULL;
21826+            W = strtoul(arg2, &p, 0);
21827+            if (!W || *p) {
21828+                fprintf(stderr, "bad width %s\n", arg2);
21829+                return -1;
21830+            }
21831+        } else if (!strcmp(argv[i], "-h")) {
21832+            char * p = NULL;
21833+            H = strtoul(arg2, &p, 0);
21834+            if (!H || *p) {
21835+                fprintf(stderr, "bad height '%s'\n", arg2);
21836+                return -1;
21837+            }
21838+        } else if (!strcmp(argv[i], "-t")) {
21839+            char * p = NULL;
21840+            time_rep = (int)strtol(arg2, &p, 0);
21841+            if (*p) {
21842+                fprintf(stderr, "bad time repetitions '%s'\n", arg2);
21843                 return -1;
21844             }
21845         } else {
21846@@ -414,15 +457,34 @@ bad_option:
21847         }
21848     }
21849
21850-    sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H,
21851+    S = (W + 15) & ~15;
21852+    rgb_data   = av_mallocz(S * H * 4);
21853+    rgb_src[0] = rgb_data;
21854+    rgb_stride[0]   = 4 * S;
21855+    data       = av_mallocz(4 * S * H);
21856+    src[0] = data;
21857+    src[1] = data + S * H;
21858+    src[2] = data + S * H * 2;
21859+    src[3] = data + S * H * 3;
21860+    stride[0] = S;
21861+    stride[1] = S;
21862+    stride[2] = S;
21863+    stride[3] = S;
21864+    H2 = H < 96 ? 8 : H / 12;
21865+    W2 = W < 96 ? 8 : W / 12;
21866+
21867+    if (!rgb_data || !data)
21868+        return -1;
21869+
21870+    sws = sws_getContext(W2, H2, AV_PIX_FMT_RGB32, W, H,
21871                          AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
21872
21873     av_lfg_init(&rand, 1);
21874
21875     for (y = 0; y < H; y++)
21876         for (x = 0; x < W * 4; x++)
21877-            rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
21878-    res = sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride);
21879+            rgb_data[ x + y * 4 * S] = av_lfg_get(&rand);
21880+    res = sws_scale(sws, (const uint8_t * const *)rgb_src, rgb_stride, 0, H2, (uint8_t * const *) src, stride);
21881     if (res < 0 || res != H) {
21882         res = -1;
21883         goto error;
21884@@ -431,10 +493,10 @@ bad_option:
21885     av_free(rgb_data);
21886
21887     if(fp) {
21888-        res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat);
21889+        res = fileTest((const uint8_t * const *)src, stride, W, H, fp, srcFormat, dstFormat);
21890         fclose(fp);
21891     } else {
21892-        selfTest(src, stride, W, H, srcFormat, dstFormat);
21893+        selfTest((const uint8_t * const *)src, stride, W, H, srcFormat, dstFormat);
21894         res = 0;
21895     }
21896 error:
21897--- /dev/null
21898+++ b/pi-util/BUILD.txt
21899@@ -0,0 +1,67 @@
21900+Building Pi FFmpeg
21901+==================
21902+
21903+Current only building on a Pi is supported.
21904+This builds ffmpeg the way I've tested it
21905+
21906+Get all dependencies - the current package dependencies are good enough
21907+
21908+$ sudo apt-get build-dep ffmpeg
21909+
21910+Configure using the pi-util/conf_native.sh script
21911+-------------------------------------------------
21912+
21913+This sets the normal release options and creates an ouutput dir to build into
21914+The directory name will depend on system and options but will be under out/
21915+
21916+There are a few choices here
21917+ --mmal  build including the legacy mmal-based decoders and zero-copy code
21918+         this requires appropriate libraries which currently will exist for
21919+         armv7 but not arm64
21920+ --noshared
21921+         Build a static image rather than a shared library one.  Static is
21922+         easier for testing as there is no need to worry about library
21923+         paths being confused and therefore running the wrong code,  Shared
21924+         is what is needed, in most cases, when building for use by other
21925+         programs.
21926+ --usr   Set install dir to /usr (i.e. system default) rather than in
21927+         <builddir>/install
21928+
21929+So for a static build
21930+---------------------
21931+
21932+$ pi-util/conf_native.sh --noshared
21933+
21934+$ make -j8 -C out/<wherever the script said it was building to>
21935+
21936+You can now run ffmpeg directly from where it was built
21937+
21938+For a shared build
21939+------------------
21940+
21941+There are two choices here
21942+
21943+$ pi-util/conf_native.sh
21944+$ make -j8 -C out/<builddir> install
21945+
21946+This sets the install prefix to <builddir>/install and is probably what you
21947+want if you don't want to overwrite the system files.
21948+
21949+You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was
21950+built. You can copy the contents of <build dir>/install to /usr and that mostly
21951+works. The only downside is that paths in pkgconfig end up being set to the
21952+install directory in your build directory which may be less than ideal when
21953+building other packages.
21954+
21955+The alternative if you just want to replace the system libs is:
21956+
21957+$ pi-util/conf_native.sh --usr
21958+$ make -j8 -C out/<builddir>
21959+$ sudo pi-util/clean_usr_libs.sh
21960+$ sudo make -j8 -C out/<builddir> install
21961+
21962+The clean_usr_libs.sh step wipes any existing libs & includes (for all
21963+architectures) from the system which helps avoid confusion when running other
21964+progs as you can be sure you're not running old code which is unfortunately
21965+easy to do otherwise.
21966+
21967--- /dev/null
21968+++ b/pi-util/NOTES.txt
21969@@ -0,0 +1,69 @@
21970+Notes on the hevc_rpi decoder & associated support code
21971+-------------------------------------------------------
21972+
21973+There are 3 main parts to the existing code:
21974+
21975+1) The decoder - this is all in libavcodec as rpi_hevc*.
21976+
21977+2) A few filters to deal with Sand frames and a small patch to
21978+automatically select the sand->i420 converter when required.
21979+
21980+3) A kludge in ffmpeg.c to display the decoded video. This could & should
21981+be converted into a proper ffmpeg display module.
21982+
21983+
21984+Decoder
21985+-------
21986+
21987+The decoder is a modified version of the existing ffmpeg hevc decoder.
21988+Generally it is ~100% faster than the existing ffmpeg hevc s/w decoder.
21989+More complex bitstreams can be up to ~200% faster but particularly easy
21990+streams can cut its advantage down to ~50%.  This means that a Pi3+ can
21991+display nearly all 8-bit 1080p30 streams and with some overclocking it can
21992+display most lower bitrate 10-bit 1080p30 streams - this latter case is
21993+not helped by the requirement to downsample to 8-bit before display on a
21994+Pi.
21995+
21996+It has had co-processor offload added for inter-pred and large block
21997+residual transform.  Various parts have had optimized ARM NEON assembler
21998+added and the existing ARM asm sections have been profiled and
21999+re-optimized for A53. The main C code has been substantially reworked at
22000+its lower levels in an attempt to optimize it and minimize memory
22001+bandwidth. To some extent code paths that deal with frame types that it
22002+doesn't support have been pruned.
22003+
22004+It outputs frames in Broadcom Sand format. This is a somewhat annoying
22005+layout that doesn't fit into ffmpegs standard frame descriptions. It has
22006+vertical stripes of 128 horizontal pixels (64 in 10 bit forms) with Y for
22007+the stripe followed by interleaved U & V, that is then followed by the Y
22008+for the next stripe, etc. The final stripe is always padded to
22009+stripe-width. This is used in an attempt to help with cache locality and
22010+cut down on the number of dram bank switches. It is annoying to use for
22011+inter-pred with conventional processing but the way the Pi QPU (which is
22012+used for inter-pred) works means that it has negligible downsides here and
22013+the improved memory performance exceeds the overhead of the increased
22014+complexity in the rest of the code.
22015+
22016+Frames must be allocated out of GPU memory (as otherwise they can't be
22017+accessed by the co-processors). Utility functions (in rpi_zc.c) have been
22018+written to make this easier. As the frames are already in GPU memory they
22019+can be displayed by the Pi h/w without any further copying.
22020+
22021+
22022+Known non-features
22023+------------------
22024+
22025+Frame allocation should probably be done in some other way in order to fit
22026+into the standard framework better.
22027+
22028+Sand frames are currently declared as software frames, there is an
22029+argument that they should be hardware frames but they aren't really.
22030+
22031+There must be a better way of auto-selecting the hevc_rpi decoder over the
22032+normal s/w hevc decoder, but I became confused by the existing h/w
22033+acceleration framework and what I wanted to do didn't seem to fit in
22034+neatly.
22035+
22036+Display should be a proper device rather than a kludge in ffmpeg.c
22037+
22038+
22039--- /dev/null
22040+++ b/pi-util/TESTMESA.txt
22041@@ -0,0 +1,82 @@
22042+# Setup & Build instructions for testing Argon30 mesa support (on Pi4)
22043+
22044+# These assume that the drm_mmal test for Sand8 has been built on this Pi
22045+# as build relies on many of the same files
22046+
22047+# 1st get everything required to build ffmpeg
22048+# If sources aren't already enabled on your Pi then enable them
22049+sudo su
22050+sed "s/#deb-src/deb-src/" /etc/apt/sources.list > /tmp/sources.list
22051+sed "s/#deb-src/deb-src/" /etc/apt/sources.list.d/raspi.list > /tmp/raspi.list
22052+mv /tmp/sources.list /etc/apt/
22053+mv /tmp/raspi.list /etc/apt/sources.list.d/
22054+apt update
22055+
22056+# Get dependancies
22057+sudo apt build-dep ffmpeg
22058+
22059+sudo apt install meson libepoxy-dev libxcb-dri3-dev libxcb1-dev libx11-dev libx11-xcb-dev libdrm-dev
22060+
22061+# Enable H265 V4L2 request decoder
22062+sudo su
22063+echo dtoverlay=rpivid-v4l2 >> /boot/config.txt
22064+# You may also want to add more CMA if you are going to try 4k videos
22065+# Change the dtoverlay=vc4-fkms-v3d line in config.txt to read
22066+# dtoverlay=vc4-fkms-v3d,cma-512
22067+reboot
22068+# Check it has turned up
22069+ls -la /dev/video*
22070+# This should include video19
22071+# crw-rw----+ 1 root video 81, 7 Aug  4 17:25 /dev/video19
22072+
22073+# Currently on the Pi the linux headers from the debian distro don't match
22074+# the kernel that we ship and we need to update them - hopefully this step
22075+# will be unneeded in the future
22076+sudo apt install git bc bison flex libssl-dev make
22077+git clone --depth=1 https://github.com/raspberrypi/linux --branch rpi-5.10.y
22078+cd linux
22079+KERNEL=kernel7l
22080+make bcm2711_defconfig
22081+make headers_install
22082+sudo cp -r usr/include/linux /usr/include
22083+cd ..
22084+
22085+# Config - this builds a staticly linked ffmpeg which is easier for testing
22086+pi-util/conf_native.sh --noshared
22087+
22088+# Build (this is a bit dull)
22089+# If you want to poke the source the libavdevice/egl_vout.c contains the
22090+# output code -
22091+cd out/armv7-static-rel
22092+
22093+# Check that you have actually configured V4L2 request
22094+grep HEVC_V4L2REQUEST config.h
22095+# You are hoping for
22096+# #define CONFIG_HEVC_V4L2REQUEST_HWACCEL 1
22097+# if you get 0 then the config has failed
22098+
22099+make -j6
22100+
22101+# Grab test streams
22102+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-h264.mkv
22103+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc.mkv
22104+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc-10bit.mkv
22105+
22106+# Test i420 output (works currently)
22107+./ffmpeg -no_cvt_hw -vcodec h264_v4l2m2m -i jellyfish-3-mbps-hd-h264.mkv -f vout_egl -
22108+
22109+# Test Sand8 output - doesn't currently work but should once you have
22110+# Sand8 working in drm_mmal. I can't guarantee that this will work as
22111+# I can't test this path with a known working format, but the debug looks
22112+# good.  If this doesn't work & drm_mmal does with sand8 then come back to me
22113+# The "show_all 1" forces vout to display every frame otherwise it drops any
22114+# frame that would cause it to block
22115+./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc.mkv -show_all 1 -f vout_egl -
22116+
22117+# Test Sand30 - doesn't currently work
22118+# (Beware that when FFmpeg errors out it often leaves your teminal window
22119+# in a state where you need to reset it)
22120+./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc-10bit.mkv -f vout_egl -
22121+
22122+
22123+
22124--- /dev/null
22125+++ b/pi-util/clean_usr_libs.sh
22126@@ -0,0 +1,42 @@
22127+set -e
22128+U=/usr/include/arm-linux-gnueabihf
22129+rm -rf $U/libavcodec
22130+rm -rf $U/libavdevice
22131+rm -rf $U/libavfilter
22132+rm -rf $U/libavformat
22133+rm -rf $U/libavutil
22134+rm -rf $U/libswresample
22135+rm -rf $U/libswscale
22136+U=/usr/include/aarch64-linux-gnu
22137+rm -rf $U/libavcodec
22138+rm -rf $U/libavdevice
22139+rm -rf $U/libavfilter
22140+rm -rf $U/libavformat
22141+rm -rf $U/libavutil
22142+rm -rf $U/libswresample
22143+rm -rf $U/libswscale
22144+U=/usr/lib/arm-linux-gnueabihf
22145+rm -f $U/libavcodec.*
22146+rm -f $U/libavdevice.*
22147+rm -f $U/libavfilter.*
22148+rm -f $U/libavformat.*
22149+rm -f $U/libavutil.*
22150+rm -f $U/libswresample.*
22151+rm -f $U/libswscale.*
22152+U=/usr/lib/arm-linux-gnueabihf/neon/vfp
22153+rm -f $U/libavcodec.*
22154+rm -f $U/libavdevice.*
22155+rm -f $U/libavfilter.*
22156+rm -f $U/libavformat.*
22157+rm -f $U/libavutil.*
22158+rm -f $U/libswresample.*
22159+rm -f $U/libswscale.*
22160+U=/usr/lib/aarch64-linux-gnu
22161+rm -f $U/libavcodec.*
22162+rm -f $U/libavdevice.*
22163+rm -f $U/libavfilter.*
22164+rm -f $U/libavformat.*
22165+rm -f $U/libavutil.*
22166+rm -f $U/libswresample.*
22167+rm -f $U/libswscale.*
22168+
22169--- /dev/null
22170+++ b/pi-util/conf_arm64_native.sh
22171@@ -0,0 +1,45 @@
22172+echo "Configure for ARM64 native build"
22173+
22174+#RPI_KEEPS="-save-temps=obj"
22175+
22176+SHARED_LIBS="--enable-shared"
22177+if [ "$1" == "--noshared" ]; then
22178+  SHARED_LIBS="--disable-shared"
22179+  echo Static libs
22180+  OUT=out/arm64-static-rel
22181+else
22182+  echo Shared libs
22183+  OUT=out/arm64-shared-rel
22184+fi
22185+
22186+mkdir -p $OUT
22187+cd $OUT
22188+
22189+A=aarch64-linux-gnu
22190+USR_PREFIX=`pwd`/install
22191+LIB_PREFIX=$USR_PREFIX/lib/$A
22192+INC_PREFIX=$USR_PREFIX/include/$A
22193+
22194+../../configure \
22195+ --prefix=$USR_PREFIX\
22196+ --libdir=$LIB_PREFIX\
22197+ --incdir=$INC_PREFIX\
22198+ --disable-stripping\
22199+ --disable-thumb\
22200+ --disable-mmal\
22201+ --enable-sand\
22202+ --enable-v4l2-request\
22203+ --enable-libdrm\
22204+ --enable-epoxy\
22205+ --enable-libudev\
22206+ --enable-vout-drm\
22207+ --enable-vout-egl\
22208+ $SHARED_LIBS\
22209+ --extra-cflags="-ggdb"
22210+
22211+# --enable-decoder=hevc_rpi\
22212+# --enable-extra-warnings\
22213+# --arch=armv71\
22214+
22215+# gcc option for getting asm listing
22216+# -Wa,-ahls
22217--- /dev/null
22218+++ b/pi-util/conf_h265.2016.csv
22219@@ -0,0 +1,195 @@
22220+1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5,8
22221+1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5,8
22222+1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5,8
22223+1,HEVC_v1/AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5,8
22224+1,HEVC_v1/AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5,8
22225+1,HEVC_v1/AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5,8
22226+1,HEVC_v1/AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5,8
22227+1,HEVC_v1/AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5,8
22228+1,HEVC_v1/BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5,8
22229+1,HEVC_v1/CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5,8
22230+1,HEVC_v1/CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5,8
22231+1,HEVC_v1/CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5,8
22232+1,HEVC_v1/CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5,8
22233+1,HEVC_v1/CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5,8
22234+1,HEVC_v1/CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5,8
22235+1,HEVC_v1/CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5,8
22236+1,HEVC_v1/CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5,8
22237+1,HEVC_v1/CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5,8
22238+1,HEVC_v1/cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5,8
22239+1,HEVC_v1/CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5,8
22240+1,HEVC_v1/CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5,8
22241+1,HEVC_v1/DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5,10
22242+1,HEVC_v1/DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5,8
22243+1,HEVC_v1/DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5,8
22244+1,HEVC_v1/DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5,8
22245+1,HEVC_v1/DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5,8
22246+1,HEVC_v1/DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5,8
22247+1,HEVC_v1/DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5,8
22248+1,HEVC_v1/DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5,8
22249+1,HEVC_v1/DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5,8
22250+1,HEVC_v1/DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5,8
22251+1,HEVC_v1/DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5,8
22252+1,HEVC_v1/DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5,8
22253+1,HEVC_v1/DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5,8
22254+1,HEVC_v1/DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5,8
22255+1,HEVC_v1/ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5,8
22256+1,HEVC_v1/ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5,8
22257+1,HEVC_v1/ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5,8
22258+1,HEVC_v1/EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5,8
22259+1,HEVC_v1/FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5,8
22260+1,HEVC_v1/HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5,8
22261+1,HEVC_v1/INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5,8
22262+1,HEVC_v1/INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5,10
22263+1,HEVC_v1/ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5,8
22264+1,HEVC_v1/ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5,8
22265+1,HEVC_v1/ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5,8
22266+1,HEVC_v1/ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5,8
22267+1,HEVC_v1/ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5,8
22268+1,HEVC_v1/IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5,8
22269+1,HEVC_v1/IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5,8
22270+1,HEVC_v1/IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5,8
22271+1,HEVC_v1/LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5,8
22272+1,HEVC_v1/LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5,8
22273+1,HEVC_v1/LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5,8
22274+1,HEVC_v1/MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5,8
22275+1,HEVC_v1/MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5,8
22276+1,HEVC_v1/MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5,8
22277+1,HEVC_v1/MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5,8
22278+1,HEVC_v1/MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5,8
22279+1,HEVC_v1/MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5,8
22280+1,HEVC_v1/MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5,8
22281+1,HEVC_v1/MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5,8
22282+1,HEVC_v1/MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5,8
22283+1,HEVC_v1/MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5,8
22284+1,HEVC_v1/MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5,8
22285+1,HEVC_v1/MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5,8
22286+1,HEVC_v1/MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5,8
22287+1,HEVC_v1/NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5,8
22288+1,HEVC_v1/NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5,8
22289+1,HEVC_v1/NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5,8
22290+1,HEVC_v1/OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5,8
22291+1,HEVC_v1/OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5,8
22292+1,HEVC_v1/OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5,8
22293+1,HEVC_v1/PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5,8
22294+1,HEVC_v1/PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5,8
22295+1,HEVC_v1/PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5,8
22296+1,HEVC_v1/PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5,8
22297+1,HEVC_v1/PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5,8
22298+1,HEVC_v1/PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5,8
22299+1,HEVC_v1/PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5,8
22300+1,HEVC_v1/PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5,8
22301+1,HEVC_v1/PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5,8
22302+1,HEVC_v1/POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5,8
22303+1,HEVC_v1/PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5,8
22304+1,HEVC_v1/PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5,8
22305+1,HEVC_v1/RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5,8
22306+1,HEVC_v1/RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5,8
22307+1,HEVC_v1/RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5,8
22308+1,HEVC_v1/RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5,8
22309+1,HEVC_v1/RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5,8
22310+1,HEVC_v1/RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5,8
22311+1,HEVC_v1/RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5,8
22312+1,HEVC_v1/RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5,8
22313+1,HEVC_v1/RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5,8
22314+1,HEVC_v1/RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5,8
22315+1,HEVC_v1/RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5,8
22316+1,HEVC_v1/RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5,8
22317+1,HEVC_v1/RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5,8
22318+1,HEVC_v1/RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5,8
22319+1,HEVC_v1/RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5,8
22320+1,HEVC_v1/RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5,8
22321+1,HEVC_v1/RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5,8
22322+1,HEVC_v1/SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5,8
22323+1,HEVC_v1/SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5,8
22324+1,HEVC_v1/SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5,8
22325+1,HEVC_v1/SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5,8
22326+1,HEVC_v1/SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5,8
22327+1,HEVC_v1/SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5,8
22328+1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5,8
22329+1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5,8
22330+1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt,8
22331+1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt,8
22332+1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5,8
22333+1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5,8
22334+1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5,8
22335+1,HEVC_v1/SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5,8
22336+1,HEVC_v1/SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5,8
22337+1,HEVC_v1/SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5,8
22338+1,HEVC_v1/SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5,8
22339+1,HEVC_v1/STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5,8
22340+1,HEVC_v1/STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5,8
22341+1,HEVC_v1/TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5,8
22342+1,HEVC_v1/TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5,8
22343+1,HEVC_v1/TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5,8
22344+1,HEVC_v1/TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5,8
22345+1,HEVC_v1/TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5,8
22346+1,HEVC_v1/TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5,8
22347+3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10
22348+1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8
22349+1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8
22350+3,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8
22351+1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10
22352+1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8
22353+1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8
22354+1,HEVC_v1/WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5,10
22355+1,HEVC_v1/WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5,10
22356+1,HEVC_v1/WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5,8
22357+1,HEVC_v1/WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5,10
22358+1,HEVC_v1/WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5,8
22359+1,HEVC_v1/WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5,10
22360+1,HEVC_v1/WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5,8
22361+1,HEVC_v1/WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5,10
22362+1,HEVC_v1/WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5,8
22363+1,HEVC_v1/WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5,10
22364+1,HEVC_v1/WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5,8
22365+1,HEVC_v1/WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5,10
22366+1,HEVC_v1/WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5,8
22367+1,RExt/ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_yuv_2.md5,0
22368+0,RExt/Bitdepth_A_RExt_Sony_1,Bitdepth_A_RExt_Sony_1.bin,md5sum.txt,8
22369+0,RExt/Bitdepth_B_RExt_Sony_1,Bitdepth_B_RExt_Sony_1.bin,md5sum.txt,8
22370+0,RExt/CCP_10bit_RExt_QCOM,CCP_10bit_RExt_QCOM.bin,CCP_10bit_RExt_QCOM_md5sum.txt,10
22371+0,RExt/CCP_12bit_RExt_QCOM,CCP_12bit_RExt_QCOM.bin,CCP_12bit_RExt_QCOM_md5sum.txt,8
22372+0,RExt/CCP_8bit_RExt_QCOM,CCP_8bit_RExt_QCOM.bin,CCP_8bit_RExt_QCOM_md5sum.txt,8
22373+1,RExt/ExplicitRdpcm_A_BBC_1,ExplicitRdpcm_A_BBC_1.bit,md5sum.txt,0
22374+0,RExt/ExplicitRdpcm_B_BBC_2,ExplicitRdpcm_B_BBC_1.bit,md5sum.txt,8
22375+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
22376+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
22377+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
22378+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
22379+0,RExt/EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
22380+0,RExt/EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
22381+0,RExt/EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
22382+0,RExt/EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
22383+1,RExt/GENERAL_10b_420_RExt_Sony_1,GENERAL_10b_420_RExt_Sony_1.bit,GENERAL_10b_420_RExt_Sony_1.md5,10
22384+1,RExt/GENERAL_10b_422_RExt_Sony_1,GENERAL_10b_422_RExt_Sony_1.bit,GENERAL_10b_422_RExt_Sony_1.md5,0
22385+1,RExt/GENERAL_10b_444_RExt_Sony_2,GENERAL_10b_444_RExt_Sony_2.bit,GENERAL_10b_444_RExt_Sony_2.md5,0
22386+1,RExt/GENERAL_12b_400_RExt_Sony_1,GENERAL_12b_400_RExt_Sony_1.bit,GENERAL_12b_400_RExt_Sony_1.md5,0
22387+1,RExt/GENERAL_12b_420_RExt_Sony_1,GENERAL_12b_420_RExt_Sony_1.bit,GENERAL_12b_420_RExt_Sony_1.md5,0
22388+1,RExt/GENERAL_12b_422_RExt_Sony_1,GENERAL_12b_422_RExt_Sony_1.bit,GENERAL_12b_422_RExt_Sony_1.md5,0
22389+1,RExt/GENERAL_12b_444_RExt_Sony_2,GENERAL_12b_444_RExt_Sony_2.bit,GENERAL_12b_444_RExt_Sony_2.md5,0
22390+0,RExt/GENERAL_16b_400_RExt_Sony_1,GENERAL_16b_400_RExt_Sony_1.bit,GENERAL_16b_400_RExt_Sony_1.md5,0
22391+0,RExt/GENERAL_16b_444_highThroughput_RExt_Sony_2,GENERAL_16b_444_highThroughput_RExt_Sony_2.bit,GENERAL_16b_444_highThroughput_RExt_Sony_2.md5,8
22392+0,RExt/GENERAL_16b_444_RExt_Sony_2,GENERAL_16b_444_RExt_Sony_2.bit,GENERAL_16b_444_RExt_Sony_2.md5,8
22393+1,RExt/GENERAL_8b_400_RExt_Sony_1,GENERAL_8b_400_RExt_Sony_1.bit,GENERAL_8b_400_RExt_Sony_1.md5,0
22394+1,RExt/GENERAL_8b_420_RExt_Sony_1,GENERAL_8b_420_RExt_Sony_1.bit,GENERAL_8b_420_RExt_Sony_1.md5,8
22395+1,RExt/GENERAL_8b_444_RExt_Sony_2,GENERAL_8b_444_RExt_Sony_2.bit,GENERAL_8b_444_RExt_Sony_2.md5,0
22396+1,RExt/IPCM_A_RExt_NEC_2,IPCM_A_RExt_NEC_2.bit,IPCM_A_RExt_NEC_2_yuv.md5,0
22397+1,RExt/IPCM_B_RExt_NEC,IPCM_B_RExt_NEC.bit,IPCM_B_RExt_NEC_yuv.md5,0
22398+1,RExt/Main_422_10_A_RExt_Sony_2,Main_422_10_A_RExt_Sony_2.bin,md5sum.txt,0
22399+1,RExt/Main_422_10_B_RExt_Sony_2,Main_422_10_B_RExt_Sony_2.bin,md5sum.txt,0
22400+1,RExt/PERSIST_RPARAM_A_RExt_Sony_3,PERSIST_RPARAM_A_RExt_Sony_3.bit,PERSIST_RPARAM_A_RExt_Sony_3.md5,0
22401+1,RExt/QMATRIX_A_RExt_Sony_1,QMATRIX_A_RExt_Sony_1.bit,QMATRIX_A_RExt_Sony_1.md5,0
22402+0,RExt/SAO_A_RExt_MediaTek_1,SAO_A_RExt_MediaTek_1.bit,SAO_A_RExt_MediaTek_1.md5, # Runs out of memory - could be fixed,8
22403+0,RExt/TSCTX_10bit_I_RExt_SHARP_1,TSCTX_10bit_I_RExt_SHARP_1.bin,TSCTX_10bit_I_RExt_SHARP_1.md5,10
22404+0,RExt/TSCTX_10bit_RExt_SHARP_1,TSCTX_10bit_RExt_SHARP_1.bin,TSCTX_10bit_RExt_SHARP_1.md5,10
22405+0,RExt/TSCTX_12bit_I_RExt_SHARP_1,TSCTX_12bit_I_RExt_SHARP_1.bin,TSCTX_12bit_I_RExt_SHARP_1.md5,8
22406+0,RExt/TSCTX_12bit_RExt_SHARP_1,TSCTX_12bit_RExt_SHARP_1.bin,TSCTX_12bit_RExt_SHARP_1.md5,8
22407+0,RExt/TSCTX_8bit_I_RExt_SHARP_1,TSCTX_8bit_I_RExt_SHARP_1.bin,TSCTX_8bit_I_RExt_SHARP_1.md5,8
22408+0,RExt/TSCTX_8bit_RExt_SHARP_1,TSCTX_8bit_RExt_SHARP_1.bin,TSCTX_8bit_RExt_SHARP_1.md5,8
22409+0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5,8
22410+1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5,8
22411+1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5,8
22412+1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5,8
22413+1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5,8
22414+1,local/intra_pred_21_laps,intra_pred_21_laps.265,intra_pred_21_laps.md5,8
22415--- /dev/null
22416+++ b/pi-util/conf_h265.2016_HEVC_v1.csv
22417@@ -0,0 +1,147 @@
22418+1,AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5
22419+1,AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5
22420+1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
22421+1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
22422+1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
22423+1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
22424+1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
22425+1,AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5
22426+1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
22427+1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
22428+1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
22429+1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
22430+1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
22431+1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
22432+1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
22433+1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
22434+1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
22435+1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
22436+1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
22437+1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
22438+1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
22439+1,DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5
22440+1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
22441+1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
22442+1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
22443+1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
22444+1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
22445+1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
22446+1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
22447+1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
22448+1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
22449+1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
22450+1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
22451+1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
22452+1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
22453+1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
22454+1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
22455+1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
22456+1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
22457+1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
22458+1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
22459+1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
22460+1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
22461+1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
22462+1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
22463+1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
22464+1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
22465+1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
22466+1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
22467+1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
22468+1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
22469+1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
22470+1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
22471+1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
22472+1,MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5
22473+1,MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5
22474+1,MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5
22475+1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
22476+1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
22477+1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
22478+1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
22479+1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
22480+1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
22481+1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
22482+1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
22483+1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
22484+1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
22485+1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
22486+1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
22487+1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
22488+1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
22489+1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
22490+1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
22491+1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
22492+1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
22493+1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
22494+1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
22495+1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
22496+1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
22497+1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
22498+1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
22499+1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
22500+1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
22501+1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
22502+1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
22503+1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
22504+1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
22505+1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
22506+1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
22507+1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
22508+1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
22509+1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
22510+1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
22511+1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
22512+1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
22513+1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
22514+1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
22515+1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
22516+1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
22517+1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
22518+1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
22519+1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
22520+1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
22521+1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
22522+1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
22523+1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
22524+1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
22525+1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
22526+1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
22527+1,SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5
22528+2,SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt
22529+2,SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt
22530+1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
22531+1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
22532+1,SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5
22533+1,SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5
22534+1,SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5
22535+1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
22536+1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
22537+1,STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5
22538+1,STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5
22539+1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
22540+1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
22541+1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
22542+1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
22543+1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
22544+1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
22545+3,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth
22546+1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
22547+1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
22548+3,VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???
22549+1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
22550+1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
22551+1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
22552+1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
22553+1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
22554+1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
22555+1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
22556+1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
22557+1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
22558+1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
22559+1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
22560+1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
22561+1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
22562+1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
22563+1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
22564+1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
22565--- /dev/null
22566+++ b/pi-util/conf_h265.csv
22567@@ -0,0 +1,144 @@
22568+1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.md5
22569+1,AMP_A_Samsung_6,AMP_A_Samsung_6.bin,AMP_A_Samsung_6.md5
22570+1,AMP_B_Samsung_6,AMP_B_Samsung_6.bin,AMP_B_Samsung_6.md5
22571+1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
22572+1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
22573+1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
22574+1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
22575+1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
22576+1,AMVP_C_Samsung_6,AMVP_C_Samsung_6.bin,AMVP_C_Samsung_6.md5
22577+1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
22578+1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
22579+1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
22580+1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
22581+1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
22582+1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
22583+1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
22584+1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
22585+1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
22586+1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
22587+1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
22588+1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
22589+1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
22590+1,DBLK_A_MAIN10_VIXS_3,DBLK_A_MAIN10_VIXS_3.bit,DBLK_A_MAIN10_VIXS_3.md5
22591+1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
22592+1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
22593+1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
22594+1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
22595+1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
22596+1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
22597+1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
22598+1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
22599+1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
22600+1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
22601+1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
22602+1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
22603+1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
22604+1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
22605+1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
22606+1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
22607+1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
22608+1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
22609+1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
22610+1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
22611+1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
22612+1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
22613+1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
22614+1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
22615+1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
22616+1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
22617+1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
22618+1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
22619+1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
22620+1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
22621+1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
22622+1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
22623+1,MAXBINS_A_TI_4,MAXBINS_A_TI_4.bit,MAXBINS_A_TI_4.md5
22624+1,MAXBINS_B_TI_4,MAXBINS_B_TI_4.bit,MAXBINS_B_TI_4.md5
22625+1,MAXBINS_C_TI_4,MAXBINS_C_TI_4.bit,MAXBINS_C_TI_4.md5
22626+1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
22627+1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
22628+1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
22629+1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
22630+1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
22631+1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
22632+1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
22633+1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
22634+1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
22635+1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
22636+1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
22637+1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
22638+1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
22639+1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
22640+1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
22641+1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
22642+1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
22643+1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
22644+1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
22645+1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
22646+1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
22647+1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
22648+1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
22649+1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
22650+1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
22651+1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
22652+1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
22653+1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
22654+1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
22655+1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
22656+1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
22657+1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
22658+1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
22659+1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
22660+1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
22661+1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
22662+1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
22663+1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
22664+1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
22665+1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
22666+1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
22667+1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
22668+1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
22669+1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
22670+1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
22671+1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
22672+1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
22673+1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
22674+1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
22675+1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
22676+1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
22677+1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
22678+1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
22679+1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
22680+1,SLIST_A_Sony_4,str.bin,SLIST_A_Sony_4_yuv.md5
22681+1,SLIST_B_Sony_8,str.bin,SLIST_B_Sony_8_yuv.md5
22682+1,SLIST_C_Sony_3,str.bin,SLIST_C_Sony_3_yuv.md5
22683+1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
22684+1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
22685+1,STRUCT_A_Samsung_6,STRUCT_A_Samsung_6.bin,STRUCT_A_Samsung_6.md5
22686+1,STRUCT_B_Samsung_6,STRUCT_B_Samsung_6.bin,STRUCT_B_Samsung_6.md5
22687+1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
22688+1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
22689+1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
22690+1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
22691+1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
22692+1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
22693+0,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # Y/C bit depth unmatched
22694+1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
22695+1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
22696+1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
22697+1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
22698+1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
22699+1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
22700+1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
22701+1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
22702+1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
22703+1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
22704+1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
22705+1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
22706+1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
22707+1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
22708+1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
22709+1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
22710+1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
22711+1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
22712--- /dev/null
22713+++ b/pi-util/conf_native.sh
22714@@ -0,0 +1,135 @@
22715+echo "Configure for native build"
22716+
22717+FFSRC=`pwd`
22718+MC=`dpkg --print-architecture`
22719+BUILDBASE=$FFSRC/out
22720+
22721+#RPI_KEEPS="-save-temps=obj"
22722+RPI_KEEPS=""
22723+
22724+NOSHARED=
22725+MMAL=
22726+USR_PREFIX=
22727+TOOLCHAIN=
22728+R=rel
22729+
22730+while [ "$1" != "" ] ; do
22731+    case $1 in
22732+	--noshared)
22733+	    NOSHARED=1
22734+	    ;;
22735+	--mmal)
22736+	    MMAL=1
22737+	    ;;
22738+	--usr)
22739+	    USR_PREFIX=/usr
22740+	    ;;
22741+	--tsan)
22742+	    TOOLCHAIN="--toolchain=gcc-tsan"
22743+	    R=tsan
22744+	    ;;
22745+	*)
22746+	    echo "Usage $0: [--noshared] [--mmal] [--usr]"
22747+	    echo "  noshared  Build static libs and executable - good for testing"
22748+	    echo "  mmal      Build mmal decoders"
22749+	    echo "  usr       Set install prefix to /usr [default=<build-dir>/install]"
22750+	    exit 1
22751+	    ;;
22752+    esac
22753+    shift
22754+done
22755+
22756+
22757+MCOPTS=
22758+RPI_INCLUDES=
22759+RPI_LIBDIRS=
22760+RPI_DEFINES=
22761+RPI_EXTRALIBS=
22762+
22763+# uname -m gives kernel type which may not have the same
22764+# 32/64bitness as userspace :-( getconf shoudl provide the answer
22765+# but use uname to check we are on the right processor
22766+MC=`uname -m`
22767+LB=`getconf LONG_BIT`
22768+if [ "$MC" == "armv7l" ] || [ "$MC" == "aarch64" ]; then
22769+  if [ "$LB" == "32" ]; then
22770+    echo "M/C armv7"
22771+    A=arm-linux-gnueabihf
22772+    B=armv7
22773+    MCOPTS="--arch=armv6t2 --cpu=cortex-a7"
22774+    RPI_DEFINES=-mfpu=neon-vfpv4
22775+  elif [ "$LB" == "64" ]; then
22776+    echo "M/C aarch64"
22777+    A=aarch64-linux-gnu
22778+    B=arm64
22779+  else
22780+    echo "Unknown LONG_BIT name: $LB"
22781+    exit 1
22782+  fi
22783+else
22784+  echo "Unknown machine name: $MC"
22785+  exit 1
22786+fi
22787+
22788+if [ $MMAL ]; then
22789+  RPI_OPT_VC=/opt/vc
22790+  RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux"
22791+  RPI_LIBDIRS="-L$RPI_OPT_VC/lib"
22792+  RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000"
22793+  RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group"
22794+  RPIOPTS="--enable-mmal"
22795+else
22796+  RPIOPTS="--disable-mmal"
22797+fi
22798+
22799+C=`lsb_release -sc`
22800+V=`cat RELEASE`
22801+
22802+SHARED_LIBS="--enable-shared"
22803+if [ $NOSHARED ]; then
22804+  SHARED_LIBS="--disable-shared"
22805+  OUT=$BUILDBASE/$B-$C-$V-static-$R
22806+  echo Static libs
22807+else
22808+  echo Shared libs
22809+  OUT=$BUILDBASE/$B-$C-$V-shared-$R
22810+fi
22811+
22812+if [ ! $USR_PREFIX ]; then
22813+  USR_PREFIX=$OUT/install
22814+fi
22815+LIB_PREFIX=$USR_PREFIX/lib/$A
22816+INC_PREFIX=$USR_PREFIX/include/$A
22817+
22818+echo Destination directory: $OUT
22819+mkdir -p $OUT
22820+# Nothing under here need worry git - including this .gitignore!
22821+echo "**" > $BUILDBASE/.gitignore
22822+cd $OUT
22823+
22824+$FFSRC/configure \
22825+ --prefix=$USR_PREFIX\
22826+ --libdir=$LIB_PREFIX\
22827+ --incdir=$INC_PREFIX\
22828+ $MCOPTS\
22829+ $TOOLCHAIN\
22830+ --disable-stripping\
22831+ --disable-thumb\
22832+ --enable-sand\
22833+ --enable-v4l2-request\
22834+ --enable-libdrm\
22835+ --enable-vout-egl\
22836+ --enable-vout-drm\
22837+ --enable-gpl\
22838+ $SHARED_LIBS\
22839+ $RPIOPTS\
22840+ --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\
22841+ --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\
22842+ --extra-ldflags="$RPI_LIBDIRS"\
22843+ --extra-libs="$RPI_EXTRALIBS"\
22844+ --extra-version="rpi"
22845+
22846+echo "Configured into $OUT"
22847+
22848+# gcc option for getting asm listing
22849+# -Wa,-ahls
22850--- /dev/null
22851+++ b/pi-util/ffconf.py
22852@@ -0,0 +1,215 @@
22853+#!/usr/bin/env python3
22854+
22855+import string
22856+import os
22857+import subprocess
22858+import re
22859+import argparse
22860+import sys
22861+import csv
22862+from stat import *
22863+
22864+CODEC_HEVC_RPI  = 1
22865+HWACCEL_RPI     = 2
22866+HWACCEL_DRM     = 3
22867+HWACCEL_VAAPI   = 4
22868+
22869+def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_exec):
22870+    hwaccel = ""
22871+    if dectype == HWACCEL_RPI:
22872+        hwaccel = "rpi"
22873+    elif dectype == HWACCEL_DRM:
22874+        hwaccel = "drm"
22875+    elif dectype == HWACCEL_VAAPI:
22876+        hwaccel = "vaapi"
22877+
22878+    pix_fmt = []
22879+    if pix == "8":
22880+        pix_fmt = ["-pix_fmt", "yuv420p"]
22881+    elif pix == "10":
22882+        pix_fmt = ["-pix_fmt", "yuv420p10le"]
22883+    elif pix == "12":
22884+        pix_fmt = ["-pix_fmt", "yuv420p12le"]
22885+
22886+    tmp_root = "/tmp"
22887+
22888+    names = srcname.split('/')
22889+    while len(names) > 1:
22890+        tmp_root = os.path.join(tmp_root, names[0])
22891+        del names[0]
22892+    name = names[0]
22893+
22894+    if not os.path.exists(tmp_root):
22895+        os.makedirs(tmp_root)
22896+
22897+    dec_file = os.path.join(tmp_root, name + ".dec.md5")
22898+    try:
22899+        os.remove(dec_file)
22900+    except:
22901+        pass
22902+
22903+    flog = open(os.path.join(tmp_root, name + ".log"), "wt")
22904+
22905+    ffargs = [ffmpeg_exec, "-flags", "unaligned", "-hwaccel", hwaccel, "-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] + pix_fmt + ["-f", "md5", dec_file]
22906+
22907+    # Unaligned needed for cropping conformance
22908+    if hwaccel:
22909+        rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT)
22910+    else:
22911+        rstr = subprocess.call(
22912+            [ffmpeg_exec, "-flags", "unaligned", "-vcodec", vcodec, "-i", os.path.join(fileroot, es_file), "-f", "md5", dec_file],
22913+            stdout=flog, stderr=subprocess.STDOUT)
22914+
22915+    try:
22916+        m1 = None
22917+        m2 = None
22918+        with open(os.path.join(fileroot, md5_file)) as f:
22919+            for line in f:
22920+                m1 = re.search("[0-9a-f]{32}", line.lower())
22921+                if m1:
22922+                    break
22923+
22924+        with open(dec_file) as f:
22925+            m2 = re.search("[0-9a-f]{32}", f.readline())
22926+    except:
22927+        pass
22928+
22929+    if  m1 and m2 and m1.group() == m2.group():
22930+        print("Match: " + m1.group(), file=flog)
22931+        rv = 0
22932+    elif not m1:
22933+        print("****** Cannot find m1", file=flog)
22934+        rv = 3
22935+    elif not m2:
22936+        print("****** Cannot find m2", file=flog)
22937+        rv = 2
22938+    else:
22939+        print("****** Mismatch: " + m1.group() + " != " + m2.group(), file=flog)
22940+        rv = 1
22941+    flog.close()
22942+    return rv
22943+
22944+def scandir(root):
22945+    aconf = []
22946+    ents = os.listdir(root)
22947+    ents.sort(key=str.lower)
22948+    for name in ents:
22949+        test_path = os.path.join(root, name)
22950+        if S_ISDIR(os.stat(test_path).st_mode):
22951+            files = os.listdir(test_path)
22952+            es_file = "?"
22953+            md5_file = "?"
22954+            for f in files:
22955+                (base, ext) = os.path.splitext(f)
22956+                if base[0] == '.':
22957+                    pass
22958+                elif ext == ".bit" or ext == ".bin":
22959+                    es_file = f
22960+                elif ext == ".md5" or (ext == ".txt" and (base[-4:] == "_md5" or base[-6:] == "md5sum")):
22961+                    if md5_file == "?":
22962+                        md5_file = f
22963+                    elif base[-3:] == "yuv":
22964+                        md5_file = f
22965+            aconf.append((1, name, es_file, md5_file))
22966+    return aconf
22967+
22968+def runtest(name, tests):
22969+    if not tests:
22970+        return True
22971+    for t in tests:
22972+        if name[0:len(t)] == t or name.find("/" + t) != -1:
22973+            return True
22974+    return False
22975+
22976+def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec):
22977+    unx_failures = []
22978+    unx_success = []
22979+    failures = 0
22980+    successes = 0
22981+    for a in csva:
22982+        exp_test = int(a[0])
22983+        if (exp_test and runtest(a[1], tests)):
22984+            name = a[1]
22985+            print ("==== ", name, end="")
22986+            sys.stdout.flush()
22987+
22988+            rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, ffmpeg_exec=ffmpeg_exec)
22989+            if (rv == 0):
22990+                successes += 1
22991+            else:
22992+                failures += 1
22993+
22994+            if (rv == 0):
22995+                if exp_test == 2:
22996+                    print(": * OK *")
22997+                    unx_success.append(name)
22998+                else:
22999+                    print(": ok")
23000+            elif exp_test == 2 and rv == 1:
23001+                print(": fail")
23002+            elif exp_test == 3 and rv == 2:
23003+                # Call an expected "crash" an abort
23004+                print(": abort")
23005+            else:
23006+                unx_failures.append(name)
23007+                if rv == 1:
23008+                    print(": * FAIL *")
23009+                elif (rv == 2) :
23010+                    print(": * CRASH *")
23011+                elif (rv == 3) :
23012+                    print(": * MD5 MISSING *")
23013+                else :
23014+                    print(": * BANG *")
23015+
23016+    if unx_failures or unx_success:
23017+        print("Unexpected Failures:", unx_failures)
23018+        print("Unexpected Success: ", unx_success)
23019+    else:
23020+        print("All tests normal:", successes, "ok,", failures, "failed")
23021+
23022+
23023+class ConfCSVDialect(csv.Dialect):
23024+    delimiter = ','
23025+    doublequote = True
23026+    lineterminator = '\n'
23027+    quotechar='"'
23028+    quoting = csv.QUOTE_MINIMAL
23029+    skipinitialspace = True
23030+    strict = True
23031+
23032+if __name__ == '__main__':
23033+
23034+    argp = argparse.ArgumentParser(description="FFmpeg h265 conformance tester")
23035+    argp.add_argument("tests", nargs='*')
23036+    argp.add_argument("--pi4", action='store_true', help="Force pi4 cmd line")
23037+    argp.add_argument("--drm", action='store_true', help="Force v4l2 drm cmd line")
23038+    argp.add_argument("--vaapi", action='store_true', help="Force vaapi cmd line")
23039+    argp.add_argument("--test_root", default="/opt/conform/h265.2016", help="Root dir for test")
23040+    argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir")
23041+    argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename")
23042+    argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use")
23043+    argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name")
23044+    args = argp.parse_args()
23045+
23046+    if args.csvgen:
23047+        csv.writer(sys.stdout).writerows(scandir(args.test_root))
23048+        exit(0)
23049+
23050+    with open(args.csv, 'rt') as csvfile:
23051+        csva = [a for a in csv.reader(csvfile, ConfCSVDialect())]
23052+
23053+    dectype = CODEC_HEVC_RPI
23054+    if os.path.exists("/dev/rpivid-hevcmem"):
23055+        dectype = HWACCEL_RPI
23056+    if args.drm or os.path.exists("/sys/module/rpivid_hevc"):
23057+        dectype = HWACCEL_DRM
23058+
23059+    if args.pi4:
23060+        dectype = HWACCEL_RPI
23061+    elif args.drm:
23062+        dectype = HWACCEL_DRM
23063+    elif args.vaapi:
23064+        dectype = HWACCEL_VAAPI
23065+
23066+    doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args.ffmpeg)
23067+
23068--- /dev/null
23069+++ b/pi-util/ffperf.py
23070@@ -0,0 +1,128 @@
23071+#!/usr/bin/env python3
23072+
23073+import time
23074+import string
23075+import os
23076+import tempfile
23077+import subprocess
23078+import re
23079+import argparse
23080+import sys
23081+import csv
23082+from stat import *
23083+
23084+class tstats:
23085+    close_threshold = 0.01
23086+
23087+    def __init__(self, stats_dict=None):
23088+        if stats_dict != None:
23089+            self.name = stats_dict["name"]
23090+            self.elapsed = float(stats_dict["elapsed"])
23091+            self.user = float(stats_dict["user"])
23092+            self.sys = float(stats_dict["sys"])
23093+
23094+    def times_str(self):
23095+        ctime = self.sys + self.user
23096+        return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
23097+
23098+    def dict(self):
23099+        return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
23100+
23101+    def is_close(self, other):
23102+        return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
23103+
23104+    def __lt__(self, other):
23105+        return self.elapsed < other.elapsed
23106+    def __gt__(self, other):
23107+        return self.elapsed > other.elapsed
23108+
23109+    def time_file(name, prefix, ffmpeg="./ffmpeg"):
23110+        stats = tstats()
23111+        stats.name = name
23112+        start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
23113+        cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw",
23114+                                  "-vcodec", "hevc_rpi",
23115+                                  "-t", "30", "-i", prefix + name,
23116+                                  "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
23117+        pinfo = os.wait4(cproc.pid, 0)
23118+        end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
23119+        stats.elapsed = end_time - start_time
23120+        stats.user = pinfo[2].ru_utime
23121+        stats.sys = pinfo[2].ru_stime
23122+        return stats
23123+
23124+
23125+def common_prefix(s1, s2):
23126+    for i in range(min(len(s1),len(s2))):
23127+        if s1[i] != s2[i]:
23128+            return s1[:i]
23129+    return s1[:i+1]
23130+
23131+def main():
23132+    global flog
23133+
23134+    argp = argparse.ArgumentParser(description="FFmpeg performance tester", epilog="""
23135+To blank the screen before starting use "xdg-screensaver activate"
23136+(For some reason this doesn't seem to work from within python).
23137+""")
23138+
23139+    argp.add_argument("streams", nargs='*')
23140+    argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename")
23141+    argp.add_argument("--csv_in", help="CSV input filename")
23142+    argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).")
23143+    argp.add_argument("--repeat", default=3, type=int, help="Run repeat count")
23144+    argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable")
23145+
23146+    args = argp.parse_args()
23147+
23148+    csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"])
23149+    csv_out.writeheader()
23150+
23151+    stats_in = {}
23152+    if args.csv_in != None:
23153+        with open(args.csv_in, 'r', newline='') as f_in:
23154+            stats_in = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
23155+
23156+    flog = open(os.path.join(tempfile.gettempdir(), "ffperf.log"), "wt")
23157+
23158+    streams = args.streams
23159+    if not streams:
23160+        if not stats_in:
23161+            print ("No source streams specified")
23162+            return 1
23163+        prefix = "" if args.prefix == None else args.prefix
23164+        streams = [k for k in stats_in]
23165+    elif args.prefix != None:
23166+        prefix = args.prefix
23167+    else:
23168+        prefix = streams[0]
23169+        for f in streams[1:]:
23170+            prefix = common_prefix(prefix, f)
23171+        pp = prefix.rpartition(os.sep)
23172+        prefix = pp[0] + pp[1]
23173+        streams = [s[len(prefix):] for s in streams]
23174+
23175+    for f in sorted(streams, key=lambda x : "~" * x.count(os.sep) + x.lower()):
23176+        print ("====", f)
23177+
23178+        t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999})
23179+        for i in range(args.repeat):
23180+            t = tstats.time_file(f, prefix, args.ffmpeg)
23181+            print ("...", t.times_str())
23182+            if t0 > t:
23183+                t0 = t
23184+
23185+        if t0.name in stats_in:
23186+            pstat = stats_in[t0.name]
23187+            print("---" if pstat.is_close(t0) else "<<<" if t0 < pstat else ">>>", pstat.times_str())
23188+
23189+        csv_out.writerow(t0.dict())
23190+
23191+        print ()
23192+
23193+    return 0
23194+
23195+
23196+if __name__ == '__main__':
23197+    exit(main())
23198+
23199--- /dev/null
23200+++ b/pi-util/genpatch.sh
23201@@ -0,0 +1,35 @@
23202+set -e
23203+
23204+NOPATCH=
23205+if [ "$1" == "--notag" ]; then
23206+  shift
23207+  NOPATCH=1
23208+fi
23209+
23210+if [ "$1" == "" ]; then
23211+  echo Usage: $0 [--notag] \<patch_tag\>
23212+  echo e.g.: $0 mmal_4
23213+  exit 1
23214+fi
23215+
23216+VERSION=`cat RELEASE`
23217+if [ "$VERSION" == "" ]; then
23218+  echo Can\'t find version RELEASE
23219+  exit 1
23220+fi
23221+
23222+PATCHFILE=../ffmpeg-$VERSION-$1.patch
23223+
23224+if [ $NOPATCH ]; then
23225+  echo Not tagged
23226+else
23227+  # Only continue if we are all comitted
23228+  git diff --name-status --exit-code
23229+
23230+  PATCHTAG=pi/$VERSION/$1
23231+  echo Tagging: $PATCHTAG
23232+
23233+  git tag $PATCHTAG
23234+fi
23235+echo Generating patch: $PATCHFILE
23236+git diff n$VERSION -- > $PATCHFILE
23237--- /dev/null
23238+++ b/pi-util/make_array.py
23239@@ -0,0 +1,23 @@
23240+#!/usr/bin/env python
23241+
23242+# Usage
23243+#   make_array file.bin
23244+#   Produces file.h with array of bytes.
23245+#
23246+import sys
23247+for file in sys.argv[1:]:
23248+  prefix,suffix = file.split('.')
23249+  assert suffix=='bin'
23250+  name=prefix.split('/')[-1]
23251+  print 'Converting',file
23252+  with open(prefix+'.h','wb') as out:
23253+    print >>out, 'static const unsigned char',name,'[] = {'
23254+    with open(file,'rb') as fd:
23255+      i = 0
23256+      for byte in fd.read():
23257+        print >>out, '0x%02x, ' % ord(byte),
23258+        i = i + 1
23259+        if i % 8 == 0:
23260+          print >>out, ' // %04x' % (i - 8)
23261+    print >>out,'};'
23262+
23263--- /dev/null
23264+++ b/pi-util/mkinst.sh
23265@@ -0,0 +1,5 @@
23266+set -e
23267+
23268+make install
23269+
23270+cp -r install/* ../vlc/sysroot/raspian_stretch_pi1-sysroot/usr
23271--- /dev/null
23272+++ b/pi-util/patkodi.sh
23273@@ -0,0 +1,9 @@
23274+set -e
23275+KODIBASE=/home/jc/rpi/kodi/xbmc
23276+JOBS=-j20
23277+make $JOBS
23278+git diff xbmc/release/4.3-kodi > $KODIBASE/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch
23279+make -C $KODIBASE/tools/depends/target/ffmpeg $JOBS
23280+make -C $KODIBASE/build install
23281+
23282+
23283--- /dev/null
23284+++ b/pi-util/perfcmp.py
23285@@ -0,0 +1,101 @@
23286+#!/usr/bin/env python3
23287+
23288+import time
23289+import string
23290+import os
23291+import tempfile
23292+import subprocess
23293+import re
23294+import argparse
23295+import sys
23296+import csv
23297+from stat import *
23298+
23299+class tstats:
23300+    close_threshold = 0.01
23301+
23302+    def __init__(self, stats_dict=None):
23303+        if stats_dict != None:
23304+            self.name = stats_dict["name"]
23305+            self.elapsed = float(stats_dict["elapsed"])
23306+            self.user = float(stats_dict["user"])
23307+            self.sys = float(stats_dict["sys"])
23308+
23309+    def times_str(self):
23310+        ctime = self.sys + self.user
23311+        return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
23312+
23313+    def dict(self):
23314+        return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
23315+
23316+    def is_close(self, other):
23317+        return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
23318+
23319+    def __lt__(self, other):
23320+        return self.elapsed < other.elapsed
23321+    def __gt__(self, other):
23322+        return self.elapsed > other.elapsed
23323+
23324+    def time_file(name, prefix):
23325+        stats = tstats()
23326+        stats.name = name
23327+        start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
23328+        cproc = subprocess.Popen(["./ffmpeg", "-t", "30", "-i", prefix + name,
23329+                                  "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
23330+        pinfo = os.wait4(cproc.pid, 0)
23331+        end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
23332+        stats.elapsed = end_time - start_time
23333+        stats.user = pinfo[2].ru_utime
23334+        stats.sys = pinfo[2].ru_stime
23335+        return stats
23336+
23337+
23338+def common_prefix(s1, s2):
23339+    for i in range(min(len(s1),len(s2))):
23340+        if s1[i] != s2[i]:
23341+            return s1[:i]
23342+    return s1[:i+1]
23343+
23344+def main():
23345+    argp = argparse.ArgumentParser(description="FFmpeg performance compare")
23346+
23347+    argp.add_argument("stream0", help="CSV to compare")
23348+    argp.add_argument("stream1", nargs='?', default="ffperf_out.csv", help="CSV to compare")
23349+
23350+    args = argp.parse_args()
23351+
23352+    with open(args.stream0, 'r', newline='') as f_in:
23353+        stats0 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
23354+    with open(args.stream1, 'r', newline='') as f_in:
23355+        stats1 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
23356+
23357+    print (args.stream0, "<<-->>", args.stream1)
23358+    print ()
23359+
23360+    for f in sorted(stats0.keys() | stats1.keys(), key=lambda x : "~" * x.count(os.sep) + x.lower()):
23361+       if not (f in stats0) :
23362+           print ("           XX               :", f)
23363+           continue
23364+       if not (f in stats1) :
23365+           print ("       XX                   :", f)
23366+           continue
23367+
23368+       s0 = stats0[f]
23369+       s1 = stats1[f]
23370+
23371+       pcent = ((s0.elapsed - s1.elapsed) / s0.elapsed) * 100.0
23372+       thresh = 0.3
23373+       tc = 6
23374+
23375+       nchar = min(tc - 1, int(abs(pcent) / thresh))
23376+       cc = "  --  " if nchar == 0 else "<" * nchar + " " * (tc - nchar) if pcent < 0 else " " * (tc - nchar) + ">" * nchar
23377+
23378+       print ("%6.2f %s%6.2f (%+5.2f) : %s" %
23379+           (s0.elapsed, cc, s1.elapsed, pcent, f))
23380+
23381+    return 0
23382+
23383+
23384+if __name__ == '__main__':
23385+    exit(main())
23386+
23387--- /dev/null
23388+++ b/pi-util/qem.sh
23389@@ -0,0 +1,9 @@
23390+TARGET_DIR=../src/eupton_vc4dev_2012a/software/vc4/DEV/applications/tutorials/user_shader_example_tex
23391+QASM=python\ ../local/bin/qasm.py
23392+SRC_FILE=libavcodec/rpi_hevc_shader.qasm
23393+DST_BASE=shader
23394+
23395+cp libavcodec/rpi_hevc_shader_cmd.h $TARGET_DIR
23396+$QASM -mc_c:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.c
23397+$QASM -mc_h:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.h
23398+
23399--- /dev/null
23400+++ b/pi-util/testfilt.py
23401@@ -0,0 +1,83 @@
23402+#!/usr/bin/env python3
23403+
23404+import string
23405+import os
23406+import subprocess
23407+import re
23408+import argparse
23409+import sys
23410+import csv
23411+from stat import *
23412+
23413+class validator:
23414+    def __init__(self):
23415+        self.ok = False
23416+
23417+    def isok(self):
23418+        return self.ok
23419+
23420+    def setok(self):
23421+        self.ok = True
23422+
23423+class valid_regex(validator):
23424+    def __init__(self, regex):
23425+        super().__init__()
23426+        self.regex = re.compile(regex)
23427+
23428+    def scanline(self, line):
23429+        if self.isok() or self.regex.search(line):
23430+            self.setok()
23431+
23432+
23433+def validate(validators, flog):
23434+    for line in flog:
23435+        for v in validators:
23436+            v.scanline(line)
23437+
23438+    ok = True
23439+    for v in validators:
23440+        if not v.isok():
23441+            ok = False
23442+            # complain
23443+            print("Test failed")
23444+
23445+    if ok:
23446+        print("OK")
23447+    return ok
23448+
23449+def runtest(name, ffmpeg, args, suffix, validators):
23450+    log_root = os.path.join("/tmp", "testfilt", name)
23451+    ofilename = os.path.join(log_root, name + suffix)
23452+
23453+    if not os.path.exists(log_root):
23454+        os.makedirs(log_root)
23455+
23456+    try:
23457+        os.remove(ofilename)
23458+    except:
23459+        pass
23460+
23461+    flog = open(os.path.join(log_root, name + ".log"), "wb")
23462+    ffargs = [ffmpeg] + args + [ofilename]
23463+
23464+    subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT, text=False)
23465+    flog.close
23466+
23467+    flog = open(os.path.join(log_root, name + ".log"), "rt")
23468+    return validate(validators, flog)
23469+
23470+def sayok(log_root, flog):
23471+    print("Woohoo")
23472+    return True
23473+
23474+if __name__ == '__main__':
23475+
23476+    argp = argparse.ArgumentParser(description="FFmpeg filter tester")
23477+    argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name")
23478+    args = argp.parse_args()
23479+
23480+    runtest("ATest", args.ffmpeg, ["-v", "verbose", "-no_cvt_hw", "-an", "-c:v", "h264_v4l2m2m", "-i",
23481+                                   "/home/johncox/server/TestMedia/Sony/jellyfish-10-mbps-hd-h264.mkv",
23482+#                                    "/home/jc/rpi/streams/jellyfish-3-mbps-hd-h264.mkv",
23483+                                   "-c:v", "h264_v4l2m2m", "-b:v", "2M"], ".mkv",
23484+            [valid_regex(r'Output stream #0:0 \(video\): 900 frames encoded; 900 packets muxed')])
23485--- /dev/null
23486+++ b/pi-util/v3dusage.py
23487@@ -0,0 +1,128 @@
23488+#!/usr/bin/env python
23489+
23490+import sys
23491+import argparse
23492+import re
23493+
23494+def do_logparse(logname):
23495+
23496+    rmatch = re.compile(r'^([0-9]+\.[0-9]{3}): (done )?((vpu0)|(vpu1)|(qpu1)) ([A-Z_]+) cb:([0-9a-f]+) ')
23497+    rqcycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs doing vertex/coordinate shading +([0-9]+)$')
23498+    rqtscycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs stalled waiting for TMUs +([0-9]+)$')
23499+    rl2hits = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: L2C Total Level 2 cache ([a-z]+) +([0-9]+)$')
23500+
23501+    ttotal = {'idle':0.0}
23502+    tstart = {}
23503+    qctotal = {}
23504+    qtstotal = {}
23505+    l2hits = {}
23506+    l2total = {}
23507+    time0 = None
23508+    idle_start = None
23509+    qpu_op_no = 0
23510+    op_count = 0
23511+
23512+    with open(logname, "rt") as infile:
23513+        for line in infile:
23514+            match = rmatch.match(line)
23515+            if match:
23516+#                print match.group(1), ":", match.group(2), ":", match.group(3), ":", match.group(7), ":"
23517+                time = float(match.group(1))
23518+                unit = match.group(3)
23519+                opstart = not match.group(2)
23520+                optype = match.group(7)
23521+                hascb = match.group(8) != "0"
23522+
23523+                if unit == 'qpu1':
23524+                    unit = unit + "." + str(qpu_op_no)
23525+                    if not opstart:
23526+                        if hascb or optype == 'EXECUTE_SYNC':
23527+                            qpu_op_no = 0
23528+                        else:
23529+                            qpu_op_no += 1
23530+
23531+                # Ignore sync type
23532+                if optype == 'EXECUTE_SYNC':
23533+                    continue
23534+
23535+                if not time0:
23536+                    time0 = time
23537+
23538+                if opstart:
23539+                    tstart[unit] = time;
23540+                elif unit in tstart:
23541+                    op_count += 1
23542+                    if not unit in ttotal:
23543+                        ttotal[unit] = 0.0
23544+                    ttotal[unit] += time - tstart[unit]
23545+                    del tstart[unit]
23546+
23547+                if not idle_start and not tstart:
23548+                    idle_start = time
23549+                elif idle_start and tstart:
23550+                    ttotal['idle'] += time - idle_start
23551+                    idle_start = None
23552+
23553+            match = rqcycle.match(line)
23554+            if match:
23555+                unit = "qpu1." + str(qpu_op_no)
23556+                if not unit in qctotal:
23557+                    qctotal[unit] = 0
23558+                qctotal[unit] += int(match.group(2))
23559+
23560+            match = rqtscycle.match(line)
23561+            if match:
23562+                unit = "qpu1." + str(qpu_op_no)
23563+                if not unit in qtstotal:
23564+                    qtstotal[unit] = 0
23565+                qtstotal[unit] += int(match.group(2))
23566+
23567+            match = rl2hits.match(line)
23568+            if match:
23569+                unit = "qpu1." + str(qpu_op_no)
23570+                if not unit in l2total:
23571+                    l2total[unit] = 0
23572+                    l2hits[unit] = 0
23573+                l2total[unit] += int(match.group(3))
23574+                if match.group(2) == "hits":
23575+                    l2hits[unit] += int(match.group(3))
23576+
23577+
23578+    if not time0:
23579+        print "No v3d profile records found"
23580+    else:
23581+        tlogged = time - time0
23582+
23583+        print "Logged time:", tlogged, "  Op count:", op_count
23584+        for unit in sorted(ttotal):
23585+            print b'%6s: %10.3f    %7.3f%%' % (unit, ttotal[unit], ttotal[unit] * 100.0 / tlogged)
23586+        print
23587+        for unit in sorted(qctotal):
23588+            if not unit in qtstotal:
23589+                qtstotal[unit] = 0;
23590+            print b'%6s: Qcycles: %10d, TMU stall: %10d (%7.3f%%)' % (unit, qctotal[unit], qtstotal[unit], (qtstotal[unit] * 100.0)/qctotal[unit])
23591+            if unit in l2total:
23592+                print b'        L2Total: %10d, hits:      %10d (%7.3f%%)' % (l2total[unit], l2hits[unit], (l2hits[unit] * 100.0)/l2total[unit])
23593+
23594+
23595+
23596+if __name__ == '__main__':
23597+    argp = argparse.ArgumentParser(
23598+        formatter_class=argparse.RawDescriptionHelpFormatter,
23599+        description="QPU/VPU perf summary from VC logging",
23600+        epilog = """
23601+Will also summarise TMU stalls if logging requests set in qpu noflush param
23602+in the profiled code.
23603+
23604+Example use:
23605+  vcgencmd set_logging level=0xc0
23606+  <command to profile>
23607+  sudo vcdbg log msg >& t.log
23608+  v3dusage.py t.log
23609+""")
23610+
23611+    argp.add_argument("logfile")
23612+    args = argp.parse_args()
23613+
23614+    do_logparse(args.logfile)
23615+
23616--- a/tests/checkasm/Makefile
23617+++ b/tests/checkasm/Makefile
23618@@ -38,6 +38,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC)
23619 # libavfilter tests
23620 AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
23621 AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
23622+AVFILTEROBJS-$(CONFIG_BWDIF_FILTER)      += vf_bwdif.o
23623 AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
23624 AVFILTEROBJS-$(CONFIG_EQ_FILTER)         += vf_eq.o
23625 AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)      += vf_gblur.o
23626@@ -56,8 +57,9 @@ CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWS
23627 AVUTILOBJS                              += av_tx.o
23628 AVUTILOBJS                              += fixed_dsp.o
23629 AVUTILOBJS                              += float_dsp.o
23630+AVUTILOBJS-$(CONFIG_SAND)               += rpi_sand.o
23631
23632-CHECKASMOBJS-$(CONFIG_AVUTIL)  += $(AVUTILOBJS)
23633+CHECKASMOBJS-$(CONFIG_AVUTIL)  += $(AVUTILOBJS) $(AVUTILOBJS-yes)
23634
23635 CHECKASMOBJS-$(ARCH_AARCH64)            += aarch64/checkasm.o
23636 CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL)   += arm/checkasm.o
23637--- a/tests/checkasm/checkasm.c
23638+++ b/tests/checkasm/checkasm.c
23639@@ -173,6 +173,9 @@ static const struct {
23640     #if CONFIG_BLEND_FILTER
23641         { "vf_blend", checkasm_check_blend },
23642     #endif
23643+    #if CONFIG_BWDIF_FILTER
23644+        { "vf_bwdif", checkasm_check_vf_bwdif },
23645+    #endif
23646     #if CONFIG_COLORSPACE_FILTER
23647         { "vf_colorspace", checkasm_check_colorspace },
23648     #endif
23649@@ -201,6 +204,9 @@ static const struct {
23650         { "fixed_dsp", checkasm_check_fixed_dsp },
23651         { "float_dsp", checkasm_check_float_dsp },
23652         { "av_tx",     checkasm_check_av_tx },
23653+    #if CONFIG_SAND
23654+        { "rpi_sand",  checkasm_check_rpi_sand },
23655+    #endif
23656 #endif
23657     { NULL }
23658 };
23659--- a/tests/checkasm/checkasm.h
23660+++ b/tests/checkasm/checkasm.h
23661@@ -72,6 +72,7 @@ void checkasm_check_motion(void);
23662 void checkasm_check_nlmeans(void);
23663 void checkasm_check_opusdsp(void);
23664 void checkasm_check_pixblockdsp(void);
23665+void checkasm_check_rpi_sand(void);
23666 void checkasm_check_sbrdsp(void);
23667 void checkasm_check_synth_filter(void);
23668 void checkasm_check_sw_gbrp(void);
23669@@ -81,6 +82,7 @@ void checkasm_check_utvideodsp(void);
23670 void checkasm_check_v210dec(void);
23671 void checkasm_check_v210enc(void);
23672 void checkasm_check_vc1dsp(void);
23673+void checkasm_check_vf_bwdif(void);
23674 void checkasm_check_vf_eq(void);
23675 void checkasm_check_vf_gblur(void);
23676 void checkasm_check_vf_hflip(void);
23677--- /dev/null
23678+++ b/tests/checkasm/rpi_sand.c
23679@@ -0,0 +1,118 @@
23680+/*
23681+ * Copyright (c) 2023 John Cox
23682+ *
23683+ * This file is part of FFmpeg.
23684+ *
23685+ * FFmpeg is free software; you can redistribute it and/or modify
23686+ * it under the terms of the GNU General Public License as published by
23687+ * the Free Software Foundation; either version 2 of the License, or
23688+ * (at your option) any later version.
23689+ *
23690+ * FFmpeg is distributed in the hope that it will be useful,
23691+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23692+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23693+ * GNU General Public License for more details.
23694+ *
23695+ * You should have received a copy of the GNU General Public License along
23696+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
23697+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23698+ */
23699+
23700+#include <string.h>
23701+#include "checkasm.h"
23702+#include "libavutil/common.h"
23703+#include "libavutil/rpi_sand_fns.h"
23704+
23705+#if ARCH_ARM
23706+#include "libavutil/arm/cpu.h"
23707+#include "libavutil/arm/rpi_sand_neon.h"
23708+#elif ARCH_AARCH64
23709+#include "libavutil/aarch64/cpu.h"
23710+#include "libavutil/aarch64/rpi_sand_neon.h"
23711+#endif
23712+
23713+static inline uint32_t pack30(unsigned int a, unsigned int b, unsigned int c)
23714+{
23715+    return (a & 0x3ff) | ((b & 0x3ff) << 10) | ((c & 0x3ff) << 20);
23716+}
23717+
23718+void checkasm_check_rpi_sand(void)
23719+{
23720+    const unsigned int w = 1280;
23721+    const unsigned int h = 66;
23722+    const unsigned int stride1 = 128;
23723+    const unsigned int stride2 = h*3/2;
23724+    const unsigned int ssize = ((w+95)/96)*128*h*3/2;
23725+    const unsigned int ysize = ((w + 32) * (h + 32) * 2);
23726+
23727+    uint8_t * sbuf0 = malloc(ssize);
23728+    uint8_t * sbuf1 = malloc(ssize);
23729+    uint8_t * ybuf0 = malloc(ysize);
23730+    uint8_t * ybuf1 = malloc(ysize);
23731+    uint8_t * vbuf0 = malloc(ysize);
23732+    uint8_t * vbuf1 = malloc(ysize);
23733+    uint8_t * yframe0 = (w + 32) * 16 + ybuf0;
23734+    uint8_t * yframe1 = (w + 32) * 16 + ybuf1;
23735+    uint8_t * vframe0 = (w + 32) * 16 + vbuf0;
23736+    uint8_t * vframe1 = (w + 32) * 16 + vbuf1;
23737+    unsigned int i;
23738+
23739+    for (i = 0; i != ssize; i += 4)
23740+        *(uint32_t*)(sbuf0 + i) = rnd();
23741+    memcpy(sbuf1, sbuf0, ssize);
23742+
23743+    if (check_func(have_neon(av_get_cpu_flags()) ? ff_rpi_sand30_lines_to_planar_y16 : av_rpi_sand30_to_planar_y16, "rpi_sand30_to_planar_y16")) {
23744+        declare_func(void, uint8_t * dst, const unsigned int dst_stride,
23745+                     const uint8_t * src,
23746+                     unsigned int stride1, unsigned int stride2,
23747+                     unsigned int _x, unsigned int y,
23748+                     unsigned int _w, unsigned int h);
23749+
23750+        memset(ybuf0, 0xbb, ysize);
23751+        memset(ybuf1, 0xbb, ysize);
23752+
23753+        call_ref(yframe0, (w + 32) * 2, sbuf0, stride1, stride2, 0, 0, w, h);
23754+        call_new(yframe1, (w + 32) * 2, sbuf1, stride1, stride2, 0, 0, w, h);
23755+
23756+        if (memcmp(sbuf0, sbuf1, ssize)
23757+            || memcmp(ybuf0, ybuf1, ysize))
23758+            fail();
23759+
23760+        bench_new(ybuf1, (w + 32) * 2, sbuf1, stride1, stride2, 0, 0, w, h);
23761+    }
23762+
23763+    if (check_func(have_neon(av_get_cpu_flags()) ? ff_rpi_sand30_lines_to_planar_c16 : av_rpi_sand30_to_planar_c16, "rpi_sand30_to_planar_c16")) {
23764+        declare_func(void, uint8_t * u_dst, const unsigned int u_stride,
23765+                     uint8_t * v_dst, const unsigned int v_stride,
23766+                     const uint8_t * src,
23767+                     unsigned int stride1, unsigned int stride2,
23768+                     unsigned int _x, unsigned int y,
23769+                     unsigned int _w, unsigned int h);
23770+
23771+        memset(ybuf0, 0xbb, ysize);
23772+        memset(ybuf1, 0xbb, ysize);
23773+        memset(vbuf0, 0xbb, ysize);
23774+        memset(vbuf1, 0xbb, ysize);
23775+
23776+        call_ref(yframe0, (w + 32), vframe0, (w + 32), sbuf0, stride1, stride2, 0, 0, w/2, h/2);
23777+        call_new(yframe1, (w + 32), vframe1, (w + 32), sbuf1, stride1, stride2, 0, 0, w/2, h/2);
23778+
23779+        if (memcmp(sbuf0, sbuf1, ssize)
23780+            || memcmp(ybuf0, ybuf1, ysize)
23781+            || memcmp(vbuf0, vbuf1, ysize))
23782+            fail();
23783+
23784+        bench_new(yframe1, (w + 32), vframe1, (w + 32), sbuf1, stride1, stride2, 0, 0, w/2, h/2);
23785+    }
23786+
23787+
23788+    report("sand30");
23789+
23790+    free(sbuf0);
23791+    free(sbuf1);
23792+    free(ybuf0);
23793+    free(ybuf1);
23794+    free(vbuf0);
23795+    free(vbuf1);
23796+}
23797+
23798--- /dev/null
23799+++ b/tests/checkasm/vf_bwdif.c
23800@@ -0,0 +1,256 @@
23801+/*
23802+ * This file is part of FFmpeg.
23803+ *
23804+ * FFmpeg is free software; you can redistribute it and/or modify
23805+ * it under the terms of the GNU General Public License as published by
23806+ * the Free Software Foundation; either version 2 of the License, or
23807+ * (at your option) any later version.
23808+ *
23809+ * FFmpeg is distributed in the hope that it will be useful,
23810+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23811+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23812+ * GNU General Public License for more details.
23813+ *
23814+ * You should have received a copy of the GNU General Public License along
23815+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
23816+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23817+ */
23818+
23819+#include <string.h>
23820+#include "checkasm.h"
23821+#include "libavcodec/internal.h"
23822+#include "libavfilter/bwdif.h"
23823+#include "libavutil/mem_internal.h"
23824+
23825+#define WIDTH 256
23826+
23827+#define randomize_buffers(buf0, buf1, mask, count) \
23828+    for (size_t i = 0; i < count; i++) \
23829+        buf0[i] = buf1[i] = rnd() & mask
23830+
23831+#define randomize_overflow_check(buf0, buf1, mask, count) \
23832+    for (size_t i = 0; i < count; i++) \
23833+        buf0[i] = buf1[i] = (rnd() & 1) != 0 ? mask : 0;
23834+
23835+#define BODY(type, depth)                                                      \
23836+    do {                                                                       \
23837+        type prev0[9*WIDTH], prev1[9*WIDTH];                                   \
23838+        type next0[9*WIDTH], next1[9*WIDTH];                                   \
23839+        type cur0[9*WIDTH], cur1[9*WIDTH];                                     \
23840+        type dst0[WIDTH], dst1[WIDTH];                                         \
23841+        const int stride = WIDTH;                                              \
23842+        const int mask = (1<<depth)-1;                                         \
23843+                                                                               \
23844+        declare_func(void, void *dst, void *prev, void *cur, void *next,       \
23845+                        int w, int prefs, int mrefs, int prefs2, int mrefs2,   \
23846+                        int prefs3, int mrefs3, int prefs4, int mrefs4,        \
23847+                        int parity, int clip_max);                             \
23848+                                                                               \
23849+        randomize_buffers(prev0, prev1, mask, 9*WIDTH);                        \
23850+        randomize_buffers(next0, next1, mask, 9*WIDTH);                        \
23851+        randomize_buffers( cur0,  cur1, mask, 9*WIDTH);                        \
23852+                                                                               \
23853+        call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH,       \
23854+                WIDTH, stride, -stride, 2*stride, -2*stride,                   \
23855+                3*stride, -3*stride, 4*stride, -4*stride,                      \
23856+                0, mask);                                                      \
23857+        call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH,       \
23858+                WIDTH, stride, -stride, 2*stride, -2*stride,                   \
23859+                3*stride, -3*stride, 4*stride, -4*stride,                      \
23860+                0, mask);                                                      \
23861+                                                                               \
23862+        if (memcmp(dst0, dst1, sizeof dst0)                                    \
23863+                || memcmp(prev0, prev1, sizeof prev0)                          \
23864+                || memcmp(next0, next1, sizeof next0)                          \
23865+                || memcmp( cur0,  cur1, sizeof cur0))                          \
23866+            fail();                                                            \
23867+        bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH,      \
23868+                WIDTH, stride, -stride, 2*stride, -2*stride,                   \
23869+                3*stride, -3*stride, 4*stride, -4*stride,                      \
23870+                0, mask);                                                      \
23871+    } while (0)
23872+
23873+void checkasm_check_vf_bwdif(void)
23874+{
23875+    BWDIFContext ctx_8, ctx_10;
23876+
23877+    ff_bwdif_init_filter_line(&ctx_8, 8);
23878+    ff_bwdif_init_filter_line(&ctx_10, 10);
23879+
23880+    if (check_func(ctx_8.filter_line, "bwdif8")) {
23881+        BODY(uint8_t, 8);
23882+        report("bwdif8");
23883+    }
23884+
23885+    if (check_func(ctx_10.filter_line, "bwdif10")) {
23886+        BODY(uint16_t, 10);
23887+        report("bwdif10");
23888+    }
23889+
23890+    if (!ctx_8.filter_line3)
23891+        ctx_8.filter_line3 = ff_bwdif_filter_line3_c;
23892+
23893+    {
23894+        LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]);
23895+        LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]);
23896+        LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]);
23897+        LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]);
23898+        LOCAL_ALIGNED_16(uint8_t, cur0,  [11*WIDTH]);
23899+        LOCAL_ALIGNED_16(uint8_t, cur1,  [11*WIDTH]);
23900+        LOCAL_ALIGNED_16(uint8_t, dst0,  [WIDTH*3]);
23901+        LOCAL_ALIGNED_16(uint8_t, dst1,  [WIDTH*3]);
23902+        const int stride = WIDTH;
23903+        const int mask = (1<<8)-1;
23904+        int parity;
23905+
23906+        for (parity = 0; parity != 2; ++parity) {
23907+            if (check_func(ctx_8.filter_line3, "bwdif8.line3.rnd.p%d", parity)) {
23908+
23909+                declare_func(void, void * dst1, int d_stride,
23910+                                          const void * prev1, const void * cur1, const void * next1, int prefs,
23911+                                          int w, int parity, int clip_max);
23912+
23913+                randomize_buffers(prev0, prev1, mask, 11*WIDTH);
23914+                randomize_buffers(next0, next1, mask, 11*WIDTH);
23915+                randomize_buffers( cur0,  cur1, mask, 11*WIDTH);
23916+
23917+                call_ref(dst0, stride,
23918+                         prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride,
23919+                         WIDTH, parity, mask);
23920+                call_new(dst1, stride,
23921+                         prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride,
23922+                         WIDTH, parity, mask);
23923+
23924+                if (memcmp(dst0, dst1, WIDTH*3)
23925+                        || memcmp(prev0, prev1, WIDTH*11)
23926+                        || memcmp(next0, next1, WIDTH*11)
23927+                        || memcmp( cur0,  cur1, WIDTH*11))
23928+                    fail();
23929+
23930+                bench_new(dst1, stride,
23931+                         prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride,
23932+                         WIDTH, parity, mask);
23933+            }
23934+        }
23935+
23936+        // Use just 0s and ~0s to try to provoke bad cropping or overflow
23937+        // Parity makes no difference to this test so just test 0
23938+        if (check_func(ctx_8.filter_line3, "bwdif8.line3.overflow")) {
23939+
23940+            declare_func(void, void * dst1, int d_stride,
23941+                                      const void * prev1, const void * cur1, const void * next1, int prefs,
23942+                                      int w, int parity, int clip_max);
23943+
23944+            randomize_overflow_check(prev0, prev1, mask, 11*WIDTH);
23945+            randomize_overflow_check(next0, next1, mask, 11*WIDTH);
23946+            randomize_overflow_check( cur0,  cur1, mask, 11*WIDTH);
23947+
23948+            call_ref(dst0, stride,
23949+                     prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride,
23950+                     WIDTH, 0, mask);
23951+            call_new(dst1, stride,
23952+                     prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride,
23953+                     WIDTH, 0, mask);
23954+
23955+            if (memcmp(dst0, dst1, WIDTH*3)
23956+                    || memcmp(prev0, prev1, WIDTH*11)
23957+                    || memcmp(next0, next1, WIDTH*11)
23958+                    || memcmp( cur0,  cur1, WIDTH*11))
23959+                fail();
23960+
23961+            // No point to benching
23962+        }
23963+
23964+        report("bwdif8.line3");
23965+    }
23966+
23967+    {
23968+        LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]);
23969+        LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]);
23970+        LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]);
23971+        LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]);
23972+        LOCAL_ALIGNED_16(uint8_t, cur0,  [11*WIDTH]);
23973+        LOCAL_ALIGNED_16(uint8_t, cur1,  [11*WIDTH]);
23974+        LOCAL_ALIGNED_16(uint8_t, dst0,  [WIDTH*3]);
23975+        LOCAL_ALIGNED_16(uint8_t, dst1,  [WIDTH*3]);
23976+        const int stride = WIDTH;
23977+        const int mask = (1<<8)-1;
23978+        int spat;
23979+        int parity;
23980+
23981+        for (spat = 0; spat != 2; ++spat) {
23982+            for (parity = 0; parity != 2; ++parity) {
23983+                if (check_func(ctx_8.filter_edge, "bwdif8.edge.s%d.p%d", spat, parity)) {
23984+
23985+                    declare_func(void, void *dst1, void *prev1, void *cur1, void *next1,
23986+                                            int w, int prefs, int mrefs, int prefs2, int mrefs2,
23987+                                            int parity, int clip_max, int spat);
23988+
23989+                    randomize_buffers(prev0, prev1, mask, 11*WIDTH);
23990+                    randomize_buffers(next0, next1, mask, 11*WIDTH);
23991+                    randomize_buffers( cur0,  cur1, mask, 11*WIDTH);
23992+                    memset(dst0, 0xba, WIDTH * 3);
23993+                    memset(dst1, 0xba, WIDTH * 3);
23994+
23995+                    call_ref(dst0 + stride,
23996+                             prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, WIDTH,
23997+                             stride, -stride, stride * 2, -stride * 2,
23998+                             parity, mask, spat);
23999+                    call_new(dst1 + stride,
24000+                             prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH,
24001+                             stride, -stride, stride * 2, -stride * 2,
24002+                             parity, mask, spat);
24003+
24004+                    if (memcmp(dst0, dst1, WIDTH*3)
24005+                            || memcmp(prev0, prev1, WIDTH*11)
24006+                            || memcmp(next0, next1, WIDTH*11)
24007+                            || memcmp( cur0,  cur1, WIDTH*11))
24008+                        fail();
24009+
24010+                    bench_new(dst1 + stride,
24011+                             prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH,
24012+                             stride, -stride, stride * 2, -stride * 2,
24013+                             parity, mask, spat);
24014+                }
24015+            }
24016+        }
24017+
24018+        report("bwdif8.edge");
24019+    }
24020+
24021+    if (check_func(ctx_8.filter_intra, "bwdif8.intra")) {
24022+        LOCAL_ALIGNED_16(uint8_t, cur0,  [11*WIDTH]);
24023+        LOCAL_ALIGNED_16(uint8_t, cur1,  [11*WIDTH]);
24024+        LOCAL_ALIGNED_16(uint8_t, dst0,  [WIDTH*3]);
24025+        LOCAL_ALIGNED_16(uint8_t, dst1,  [WIDTH*3]);
24026+        const int stride = WIDTH;
24027+        const int mask = (1<<8)-1;
24028+
24029+        declare_func(void, void *dst1, void *cur1, int w, int prefs, int mrefs,
24030+                     int prefs3, int mrefs3, int parity, int clip_max);
24031+
24032+        randomize_buffers( cur0,  cur1, mask, 11*WIDTH);
24033+        memset(dst0, 0xba, WIDTH * 3);
24034+        memset(dst1, 0xba, WIDTH * 3);
24035+
24036+        call_ref(dst0 + stride,
24037+                 cur0 + stride * 4, WIDTH,
24038+                 stride, -stride, stride * 3, -stride * 3,
24039+                 0, mask);
24040+        call_new(dst1 + stride,
24041+                 cur0 + stride * 4, WIDTH,
24042+                 stride, -stride, stride * 3, -stride * 3,
24043+                 0, mask);
24044+
24045+        if (memcmp(dst0, dst1, WIDTH*3)
24046+                || memcmp( cur0,  cur1, WIDTH*11))
24047+            fail();
24048+
24049+        bench_new(dst1 + stride,
24050+                  cur0 + stride * 4, WIDTH,
24051+                  stride, -stride, stride * 3, -stride * 3,
24052+                  0, mask);
24053+
24054+        report("bwdif8.intra");
24055+    }
24056+}
24057--- a/tests/fate/checkasm.mak
24058+++ b/tests/fate/checkasm.mak
24059@@ -26,6 +26,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp
24060                 fate-checkasm-motion                                    \
24061                 fate-checkasm-opusdsp                                   \
24062                 fate-checkasm-pixblockdsp                               \
24063+                fate-checkasm-rpi_sand                                  \
24064                 fate-checkasm-sbrdsp                                    \
24065                 fate-checkasm-synth_filter                              \
24066                 fate-checkasm-sw_gbrp                                   \
24067@@ -36,6 +37,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp
24068                 fate-checkasm-v210enc                                   \
24069                 fate-checkasm-vc1dsp                                    \
24070                 fate-checkasm-vf_blend                                  \
24071+                fate-checkasm-vf_bwdif                                  \
24072                 fate-checkasm-vf_colorspace                             \
24073                 fate-checkasm-vf_eq                                     \
24074                 fate-checkasm-vf_gblur                                  \
24075