This is a combination of Kwiboo's v4l2request patchset and the stateful bits of
JC's RPi patchset. Both patchsets provide a stateless HEVC decoder, but Kwiboo's
is designed to work on all boards with mainline (and RPi) kernels.

On the RPi, Kodi works fully with RPi kernel 6.12 or 6.18. Other applications
using the stateless HEVC decoder require 6.18. The necessary patches have not
been merged to mainline at the time of writing. Other boards should work with
any recent mainline kernel.

lrusak's DRM PRIME patchset and jernejsk's v4l2m2m deinterlace filter patch are
often applied together with Kwiboo's patchset, but JC's patchset already
includes these features. His implementations are seemingly based on these
earlier ones.

-- Chewi

diff --git a/configure b/configure
index 1759694274..f8a400e2a2 100755
--- a/configure
+++ b/configure
@@ -362,12 +362,14 @@ External library support:
   --enable-libvpl          enable Intel oneVPL code via libvpl if libmfx is not used [no]
   --enable-libnpp          enable Nvidia Performance Primitives-based code [no]
   --enable-mmal            enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no]
+  --enable-sand            enable sand video formats [rpi]
   --disable-nvdec          disable Nvidia video decoding acceleration (via hwaccel) [autodetect]
   --disable-nvenc          disable Nvidia video encoding code [autodetect]
   --enable-omx             enable OpenMAX IL code [no]
   --enable-omx-rpi         enable OpenMAX IL code for Raspberry Pi [no]
   --enable-rkmpp           enable Rockchip Media Process Platform code [no]
   --disable-v4l2-m2m       disable V4L2 mem2mem code [autodetect]
+  --disable-v4l2-request   disable V4L2 Request API code [autodetect]
   --disable-vaapi          disable Video Acceleration API (mainly Unix/Intel) code [autodetect]
   --disable-vdpau          disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
   --disable-videotoolbox   disable VideoToolbox code [autodetect]
@@ -2097,6 +2099,7 @@ EXTERNAL_LIBRARY_LIST="
     libtorch
     libtwolame
     libuavs3d
+    libudev
     libv4l2
     libvmaf
     libvorbis
@@ -2141,6 +2144,7 @@ HWACCEL_AUTODETECT_LIBRARY_LIST="
     videotoolbox
     vulkan
     v4l2_m2m
+    v4l2_request
 "
 
 # catchall list of things that require external libs to link
@@ -2179,6 +2183,7 @@ FEATURE_LIST="
     omx_rpi
     runtime_cpudetect
     safe_bitstream_reader
+    sand
     shared
     small
     static
@@ -2642,6 +2647,7 @@ TYPES_LIST="
     struct_sockaddr_sa_len
     struct_sockaddr_storage
     struct_stat_st_mtim_tv_nsec
+    struct_v4l2_ctrl_hevc_decode_params_num_delta_pocs_of_ref_rps_idx
     struct_v4l2_frmivalenum_discrete
     struct_mfxConfigInterface
 "
@@ -2781,6 +2787,7 @@ CONFIG_EXTRA="
     rtpdec
     rtpenc_chain
     rv34dsp
+    sand
     scene_sad
     sinewin
     smpte_436m
@@ -3341,6 +3348,8 @@ dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
 ffnvcodec_deps_any="libdl LoadLibrary"
 mediacodec_deps="android mediandk pthreads"
 nvdec_deps="ffnvcodec"
+v4l2_request_deps="linux_media_h v4l2_timeval_to_ns v4l2_m2m_hold_capture_buf libdrm libudev"
+v4l2_request_suggest="libdrm libudev"
 vaapi_x11_deps="xlib_x11"
 videotoolbox_hwaccel_deps="videotoolbox pthreads"
 videotoolbox_hwaccel_extralibs="-framework QuartzCore"
@@ -3357,6 +3366,8 @@ av1_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_AV1"
 av1_dxva2_hwaccel_select="av1_decoder"
 av1_nvdec_hwaccel_deps="nvdec CUVIDAV1PICPARAMS"
 av1_nvdec_hwaccel_select="av1_decoder"
+av1_v4l2request_hwaccel_deps="v4l2_request av1_v4l2_request"
+av1_v4l2request_hwaccel_select="av1_decoder"
 av1_vaapi_hwaccel_deps="vaapi VADecPictureParameterBufferAV1_bit_depth_idx"
 av1_vaapi_hwaccel_select="av1_decoder"
 av1_vdpau_hwaccel_deps="vdpau VdpPictureInfoAV1"
@@ -3383,6 +3394,8 @@ h264_dxva2_hwaccel_deps="dxva2"
 h264_dxva2_hwaccel_select="h264_decoder"
 h264_nvdec_hwaccel_deps="nvdec"
 h264_nvdec_hwaccel_select="h264_decoder"
+h264_v4l2request_hwaccel_deps="v4l2_request h264_v4l2_request"
+h264_v4l2request_hwaccel_select="h264_decoder"
 h264_vaapi_hwaccel_deps="vaapi"
 h264_vaapi_hwaccel_select="h264_decoder"
 h264_vdpau_hwaccel_deps="vdpau"
@@ -3401,6 +3414,8 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC"
 hevc_dxva2_hwaccel_select="hevc_decoder"
 hevc_nvdec_hwaccel_deps="nvdec"
 hevc_nvdec_hwaccel_select="hevc_decoder"
+hevc_v4l2request_hwaccel_deps="v4l2_request hevc_v4l2_request"
+hevc_v4l2request_hwaccel_select="hevc_decoder"
 hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC"
 hevc_vaapi_hwaccel_select="hevc_decoder"
 hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
@@ -3429,6 +3444,8 @@ mpeg2_dxva2_hwaccel_deps="dxva2"
 mpeg2_dxva2_hwaccel_select="mpeg2video_decoder"
 mpeg2_nvdec_hwaccel_deps="nvdec"
 mpeg2_nvdec_hwaccel_select="mpeg2video_decoder"
+mpeg2_v4l2request_hwaccel_deps="v4l2_request mpeg2_v4l2_request"
+mpeg2_v4l2request_hwaccel_select="mpeg2video_decoder"
 mpeg2_vaapi_hwaccel_deps="vaapi"
 mpeg2_vaapi_hwaccel_select="mpeg2video_decoder"
 mpeg2_vdpau_hwaccel_deps="vdpau"
@@ -3465,6 +3482,8 @@ vc1_vdpau_hwaccel_deps="vdpau"
 vc1_vdpau_hwaccel_select="vc1_decoder"
 vp8_nvdec_hwaccel_deps="nvdec"
 vp8_nvdec_hwaccel_select="vp8_decoder"
+vp8_v4l2request_hwaccel_deps="v4l2_request vp8_v4l2_request"
+vp8_v4l2request_hwaccel_select="vp8_decoder"
 vp8_vaapi_hwaccel_deps="vaapi"
 vp8_vaapi_hwaccel_select="vp8_decoder"
 vp9_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_VP9"
@@ -3477,6 +3496,8 @@ vp9_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_VP9"
 vp9_dxva2_hwaccel_select="vp9_decoder"
 vp9_nvdec_hwaccel_deps="nvdec"
 vp9_nvdec_hwaccel_select="vp9_decoder"
+vp9_v4l2request_hwaccel_deps="v4l2_request vp9_v4l2_request"
+vp9_v4l2request_hwaccel_select="vp9_decoder"
 vp9_vaapi_hwaccel_deps="vaapi VADecPictureParameterBufferVP9_bit_depth"
 vp9_vaapi_hwaccel_select="vp9_decoder"
 vp9_vdpau_hwaccel_deps="vdpau VdpPictureInfoVP9"
@@ -3563,6 +3584,7 @@ av1_nvenc_encoder_select="atsc_a53"
 av1_qsv_decoder_select="qsvdec"
 av1_qsv_encoder_deps="libvpl"
 av1_qsv_encoder_select="qsvenc"
+av1_v4l2m2m_decoder_deps="v4l2_m2m av1_v4l2_m2m"
 av1_vaapi_encoder_deps="VAEncPictureParameterBufferAV1"
 av1_vaapi_encoder_select="cbs_av1 vaapi_encode"
 av1_vulkan_encoder_deps="vulkan_1_4"
@@ -4112,6 +4134,7 @@ cropdetect_filter_deps="gpl"
 deinterlace_qsv_filter_deps="libmfx"
 deinterlace_qsv_filter_select="qsvvpp"
 deinterlace_vaapi_filter_deps="vaapi"
+deinterlace_v4l2m2m_filter_deps="libdrm v4l2_m2m"
 delogo_filter_deps="gpl"
 denoise_vaapi_filter_deps="vaapi"
 derain_filter_select="dnn"
@@ -4227,6 +4250,7 @@ transpose_opencl_filter_deps="opencl"
 transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
 transpose_vt_filter_deps="videotoolbox VTPixelRotationSessionCreate"
 transpose_vulkan_filter_deps="vulkan spirv_library"
+unsand_filter_select="sand"
 unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
@@ -4238,6 +4262,7 @@ libvmaf_cuda_filter_deps="libvmaf libvmaf_cuda ffnvcodec"
 zmq_filter_deps="libzmq"
 zoompan_filter_deps="swscale"
 zscale_filter_deps="libzimg const_nan"
+scale_v4l2m2m_filter_deps="libdrm v4l2_m2m"
 scale_vaapi_filter_deps="vaapi"
 scale_vt_filter_deps="videotoolbox VTPixelTransferSessionCreate"
 scale_vulkan_filter_deps="vulkan spirv_compiler spirv_library"
@@ -4309,7 +4334,7 @@ shader_compression_suggest="zlib"
 
 avcodec_extralibs="pthreads_extralibs iconv_extralibs dxva2_extralibs liblcevc_dec_extralibs lcms2_extralibs"
 avfilter_extralibs="pthreads_extralibs"
-avutil_extralibs="d3d11va_extralibs d3d12va_extralibs mediacodec_extralibs nanosleep_extralibs pthreads_extralibs vaapi_drm_extralibs vaapi_x11_extralibs vaapi_win32_extralibs vdpau_x11_extralibs"
+avutil_extralibs="d3d11va_extralibs d3d12va_extralibs mediacodec_extralibs nanosleep_extralibs pthreads_extralibs v4l2_request_extralibs vaapi_drm_extralibs vaapi_x11_extralibs vaapi_win32_extralibs vdpau_x11_extralibs"
 
 # programs
 ffmpeg_deps="avcodec avfilter avformat threads"
@@ -7586,6 +7611,21 @@ if enabled v4l2_m2m; then
     check_cc h264_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_H264;"
     check_cc vp8_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP8;"
     check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;"
+    check_cc av1_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_AV1;"
+fi
+
+if enabled v4l2_request; then
+    check_cc av1_v4l2_request linux/videodev2.h "int i = V4L2_CID_STATELESS_AV1_SEQUENCE"
+    check_cc h264_v4l2_request linux/videodev2.h "int i = V4L2_CID_STATELESS_H264_DECODE_MODE"
+    check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_CID_STATELESS_HEVC_SPS"
+    check_cc mpeg2_v4l2_request linux/videodev2.h "int i = V4L2_CID_STATELESS_MPEG2_SEQUENCE"
+    check_cc v4l2_m2m_hold_capture_buf linux/videodev2.h "int i = V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF"
+    check_cc vp8_v4l2_request linux/videodev2.h "int i = V4L2_CID_STATELESS_VP8_FRAME"
+    check_cc vp9_v4l2_request linux/videodev2.h "int i = V4L2_CID_STATELESS_VP9_FRAME"
+    check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns
+    check_pkg_config libudev libudev libudev.h udev_new
+    check_struct linux/videodev2.h "struct v4l2_ctrl_hevc_decode_params" num_delta_pocs_of_ref_rps_idx
+    v4l2_request_extralibs="$libudev_extralibs"
 fi
 
 check_headers sys/videoio.h
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index b4d3a2c2ac..41f5ad087f 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -765,6 +765,8 @@ extern enum VideoSyncMethod video_sync_method;
 extern float frame_drop_threshold;
 extern int do_benchmark;
 extern int do_benchmark_all;
+extern int no_cvt_hw;
+extern int do_deinterlace;
 extern int do_hex_dump;
 extern int do_pkt_dump;
 extern int copy_ts;
diff --git a/fftools/ffmpeg_dec.c b/fftools/ffmpeg_dec.c
index 5020684a28..03a5ce8164 100644
--- a/fftools/ffmpeg_dec.c
+++ b/fftools/ffmpeg_dec.c
@@ -392,7 +392,7 @@ static int video_frame_process(DecoderPriv *dp, AVFrame *frame,
     }
 #endif
 
-    if (frame->format == dp->hwaccel_pix_fmt) {
+    if (!no_cvt_hw && frame->format == dp->hwaccel_pix_fmt) {
         int err = hwaccel_retrieve_data(dp->dec_ctx, frame);
         if (err < 0)
             return err;
@@ -1336,12 +1336,15 @@ static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat
             break;
 
         if (dp->hwaccel_id == HWACCEL_GENERIC ||
-            dp->hwaccel_id == HWACCEL_AUTO) {
+            dp->hwaccel_id == HWACCEL_AUTO ||
+			no_cvt_hw) {
             for (int i = 0;; i++) {
                 config = avcodec_get_hw_config(s->codec, i);
                 if (!config)
                     break;
-                if (!(config->methods &
+                if (no_cvt_hw && (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL))
+                    av_log(s, AV_LOG_DEBUG, "no_cvt_hw so trying pix_fmt %d with codec internal hwaccel\n", *p);
+                else if (!(config->methods &
                       AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
                     continue;
                 if (config->pix_fmt == *p)
diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
index 765b65d0ec..7ee9b0f6ea 100644
--- a/fftools/ffmpeg_filter.c
+++ b/fftools/ffmpeg_filter.c
@@ -3094,8 +3094,8 @@ static int send_frame(FilterGraph *fg, FilterGraphThread *fgt,
         break;
     case AVMEDIA_TYPE_VIDEO:
         if (ifp->format != frame->format ||
-            ifp->width  != frame->width ||
-            ifp->height != frame->height ||
+            ifp->width  != av_frame_cropped_width(frame) ||
+            ifp->height != av_frame_cropped_height(frame) ||
             ifp->color_space != frame->colorspace ||
             ifp->color_range != frame->color_range ||
             ifp->alpha_mode != frame->alpha_mode)
@@ -3131,6 +3131,9 @@ static int send_frame(FilterGraph *fg, FilterGraphThread *fgt,
         (ifp->hw_frames_ctx && ifp->hw_frames_ctx->data != frame->hw_frames_ctx->data))
         need_reinit |= HWACCEL_CHANGED;
 
+    if (no_cvt_hw && fgt->graph)
+        need_reinit = 0;
+
     if (need_reinit) {
         ret = ifilter_parameters_from_frame(ifilter, frame);
         if (ret < 0)
diff --git a/fftools/ffmpeg_hw.c b/fftools/ffmpeg_hw.c
index 5d4c06c28e..f39bd29b6e 100644
--- a/fftools/ffmpeg_hw.c
+++ b/fftools/ffmpeg_hw.c
@@ -73,6 +73,8 @@ static char *hw_device_default_name(enum AVHWDeviceType type)
     char *name;
     size_t index_pos;
     int index, index_limit = 1000;
+    if (!type_name)
+        return NULL;
     index_pos = strlen(type_name);
     name = av_malloc(index_pos + 4);
     if (!name)
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index a0664e2964..32e70c7c43 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -62,6 +62,7 @@ enum VideoSyncMethod video_sync_method = VSYNC_AUTO;
 float frame_drop_threshold = 0;
 int do_benchmark      = 0;
 int do_benchmark_all  = 0;
+int no_cvt_hw         = 0;
 int do_hex_dump       = 0;
 int do_pkt_dump       = 0;
 int copy_ts           = 0;
@@ -1717,8 +1718,11 @@ const OptionDef options[] = {
     { "benchmark_all",          OPT_TYPE_BOOL, OPT_EXPERT,
         { &do_benchmark_all },
       "add timings for each task" },
-    { "progress",               OPT_TYPE_FUNC, OPT_FUNC_ARG | OPT_EXPERT,
-        { .func_arg = opt_progress },
+    { "no_cvt_hw",      		OPT_TYPE_BOOL, OPT_EXPERT,
+		{ &no_cvt_hw },
+      "do not auto-convert hw frames to sw" },
+	{ "progress",               OPT_TYPE_FUNC, OPT_FUNC_ARG | OPT_EXPERT,
+		{ .func_arg = opt_progress },
       "write program-readable progress information", "url" },
     { "stdin",                  OPT_TYPE_BOOL, OPT_EXPERT,
         { &stdin_interaction },
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 12a8265025..717941f1c4 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -184,7 +184,9 @@ OBJS-$(CONFIG_VC1DSP)                  += vc1dsp.o
 OBJS-$(CONFIG_VIDEODSP)                += videodsp.o
 OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
 OBJS-$(CONFIG_VP8DSP)                  += vp8dsp.o
-OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o
+OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\
+                                          weak_link.o v4l2_req_dmabufs.o
+OBJS-$(CONFIG_V4L2_REQUEST)            += v4l2_request.o
 OBJS-$(CONFIG_WMA_FREQS)               += wma_freqs.o
 OBJS-$(CONFIG_WMV2DSP)                 += wmv2dsp.o
 
@@ -1050,6 +1052,7 @@ OBJS-$(CONFIG_AV1_D3D11VA_HWACCEL)        += dxva2_av1.o
 OBJS-$(CONFIG_AV1_DXVA2_HWACCEL)          += dxva2_av1.o
 OBJS-$(CONFIG_AV1_D3D12VA_HWACCEL)        += dxva2_av1.o d3d12va_av1.o
 OBJS-$(CONFIG_AV1_NVDEC_HWACCEL)          += nvdec_av1.o
+OBJS-$(CONFIG_AV1_V4L2REQUEST_HWACCEL)    += v4l2_request_av1.o
 OBJS-$(CONFIG_AV1_VAAPI_HWACCEL)          += vaapi_av1.o
 OBJS-$(CONFIG_AV1_VDPAU_HWACCEL)          += vdpau_av1.o
 OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL)   += videotoolbox_av1.o
@@ -1063,6 +1066,7 @@ OBJS-$(CONFIG_H264_DXVA2_HWACCEL)         += dxva2_h264.o
 OBJS-$(CONFIG_H264_D3D12VA_HWACCEL)       += dxva2_h264.o d3d12va_h264.o
 OBJS-$(CONFIG_H264_NVDEC_HWACCEL)         += nvdec_h264.o
 OBJS-$(CONFIG_H264_QSV_HWACCEL)           += qsvdec.o
+OBJS-$(CONFIG_H264_V4L2REQUEST_HWACCEL)   += v4l2_request_h264.o
 OBJS-$(CONFIG_H264_VAAPI_HWACCEL)         += vaapi_h264.o
 OBJS-$(CONFIG_H264_VDPAU_HWACCEL)         += vdpau_h264.o
 OBJS-$(CONFIG_H264_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
@@ -1072,6 +1076,7 @@ OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
 OBJS-$(CONFIG_HEVC_D3D12VA_HWACCEL)       += dxva2_hevc.o d3d12va_hevc.o
 OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL)         += nvdec_hevc.o
 OBJS-$(CONFIG_HEVC_QSV_HWACCEL)           += qsvdec.o
+OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL)   += v4l2_request_hevc.o
 OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o h265_profile_level.o
 OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o h265_profile_level.o
 OBJS-$(CONFIG_HEVC_VULKAN_HWACCEL)        += vulkan_decode.o vulkan_hevc.o
@@ -1085,6 +1090,7 @@ OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL)        += dxva2_mpeg2.o
 OBJS-$(CONFIG_MPEG2_D3D12VA_HWACCEL)      += dxva2_mpeg2.o d3d12va_mpeg2.o
 OBJS-$(CONFIG_MPEG2_NVDEC_HWACCEL)        += nvdec_mpeg12.o
 OBJS-$(CONFIG_MPEG2_QSV_HWACCEL)          += qsvdec.o
+OBJS-$(CONFIG_MPEG2_V4L2REQUEST_HWACCEL)  += v4l2_request_mpeg2.o
 OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL)        += vaapi_mpeg2.o
 OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL)        += vdpau_mpeg12.o
 OBJS-$(CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
@@ -1100,11 +1106,13 @@ OBJS-$(CONFIG_VC1_QSV_HWACCEL)            += qsvdec.o
 OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)          += vaapi_vc1.o
 OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)          += vdpau_vc1.o
 OBJS-$(CONFIG_VP8_NVDEC_HWACCEL)          += nvdec_vp8.o
+OBJS-$(CONFIG_VP8_V4L2REQUEST_HWACCEL)    += v4l2_request_vp8.o
 OBJS-$(CONFIG_VP8_VAAPI_HWACCEL)          += vaapi_vp8.o
 OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)        += dxva2_vp9.o
 OBJS-$(CONFIG_VP9_DXVA2_HWACCEL)          += dxva2_vp9.o
 OBJS-$(CONFIG_VP9_D3D12VA_HWACCEL)        += dxva2_vp9.o d3d12va_vp9.o
 OBJS-$(CONFIG_VP9_NVDEC_HWACCEL)          += nvdec_vp9.o
+OBJS-$(CONFIG_VP9_V4L2REQUEST_HWACCEL)    += v4l2_request_vp9.o
 OBJS-$(CONFIG_VP9_VAAPI_HWACCEL)          += vaapi_vp9.o
 OBJS-$(CONFIG_VP9_VDPAU_HWACCEL)          += vdpau_vp9.o
 OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)   += videotoolbox_vp9.o
@@ -1360,6 +1368,7 @@ SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
 SKIPHEADERS-$(CONFIG_VULKAN)           += ffv1_vulkan.h prores_vulkan.h vulkan_video.h \
                                           vulkan_encode.h vulkan_decode.h
 SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
+SKIPHEADERS-$(CONFIG_V4L2_REQUEST)     += v4l2_request.h
 SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
 
 TESTPROGS = avcodec                                                     \
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 695214f192..08cccc2e81 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -871,6 +871,7 @@ extern const FFCodec ff_av1_qsv_encoder;
 extern const FFCodec ff_av1_amf_encoder;
 extern const FFCodec ff_av1_amf_decoder;
 extern const FFCodec ff_av1_mf_encoder;
+extern const FFCodec ff_av1_v4l2m2m_decoder;
 extern const FFCodec ff_av1_vaapi_encoder;
 extern const FFCodec ff_av1_vulkan_encoder;
 extern const FFCodec ff_libopenh264_encoder;
diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
index ba8442077a..dd89bf857b 100644
--- a/libavcodec/av1dec.c
+++ b/libavcodec/av1dec.c
@@ -542,6 +542,7 @@ static int get_pixel_format(AVCodecContext *avctx)
                      CONFIG_AV1_D3D11VA_HWACCEL * 2 + \
                      CONFIG_AV1_D3D12VA_HWACCEL + \
                      CONFIG_AV1_NVDEC_HWACCEL + \
+                     CONFIG_AV1_V4L2REQUEST_HWACCEL + \
                      CONFIG_AV1_VAAPI_HWACCEL + \
                      CONFIG_AV1_VDPAU_HWACCEL + \
                      CONFIG_AV1_VIDEOTOOLBOX_HWACCEL + \
@@ -577,6 +578,9 @@ static int get_pixel_format(AVCodecContext *avctx)
 #endif
 #if CONFIG_AV1_VULKAN_HWACCEL
         *fmtp++ = AV_PIX_FMT_VULKAN;
+#endif
+#if CONFIG_AV1_V4L2REQUEST_HWACCEL
+        *fmtp++ = AV_PIX_FMT_DRM_PRIME;
 #endif
         break;
     case AV_PIX_FMT_YUV420P10:
@@ -604,6 +608,9 @@ static int get_pixel_format(AVCodecContext *avctx)
 #endif
 #if CONFIG_AV1_VULKAN_HWACCEL
         *fmtp++ = AV_PIX_FMT_VULKAN;
+#endif
+#if CONFIG_AV1_V4L2REQUEST_HWACCEL
+        *fmtp++ = AV_PIX_FMT_DRM_PRIME;
 #endif
         break;
     case AV_PIX_FMT_YUV420P12:
@@ -1630,6 +1637,9 @@ const FFCodec ff_av1_decoder = {
 #if CONFIG_AV1_VULKAN_HWACCEL
         HWACCEL_VULKAN(av1),
 #endif
+#if CONFIG_AV1_V4L2REQUEST_HWACCEL
+        HWACCEL_V4L2REQUEST(av1),
+#endif
 
         NULL
     },
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 0ce8e46c72..97fab7085c 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -789,6 +789,7 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
                      (CONFIG_H264_D3D11VA_HWACCEL * 2) + \
                      CONFIG_H264_D3D12VA_HWACCEL + \
                      CONFIG_H264_NVDEC_HWACCEL + \
+                     CONFIG_H264_V4L2REQUEST_HWACCEL + \
                      CONFIG_H264_VAAPI_HWACCEL + \
                      CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \
                      CONFIG_H264_VDPAU_HWACCEL + \
@@ -817,6 +818,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
 #endif
 #if CONFIG_H264_NVDEC_HWACCEL
         *fmt++ = AV_PIX_FMT_CUDA;
+#endif
+#if CONFIG_H264_V4L2REQUEST_HWACCEL
+        *fmt++ = AV_PIX_FMT_DRM_PRIME;
 #endif
         if (CHROMA444(h)) {
             if (h->avctx->colorspace == AVCOL_SPC_RGB) {
@@ -873,6 +877,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
 #if CONFIG_H264_VIDEOTOOLBOX_HWACCEL
         if (h->avctx->colorspace != AVCOL_SPC_RGB)
             *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_H264_V4L2REQUEST_HWACCEL
+        *fmt++ = AV_PIX_FMT_DRM_PRIME;
 #endif
         if (CHROMA444(h)) {
             if (h->avctx->colorspace == AVCOL_SPC_RGB)
@@ -1701,7 +1708,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
     unsigned int slice_type, tmp, i;
     int field_pic_flag, bottom_field_flag;
     int first_slice = sl == h->slice_ctx && !h->current_slice;
-    int picture_structure;
+    int picture_structure, pos;
 
     if (first_slice)
         av_assert0(!h->setup_finished);
@@ -1792,6 +1799,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
 
     sl->poc_lsb = 0;
     sl->delta_poc_bottom = 0;
+    pos = get_bits_left(&sl->gb);
     if (sps->poc_type == 0) {
         sl->poc_lsb = get_bits(&sl->gb, sps->log2_max_poc_lsb);
 
@@ -1806,6 +1814,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
         if (pps->pic_order_present == 1 && picture_structure == PICT_FRAME)
             sl->delta_poc[1] = get_se_golomb(&sl->gb);
     }
+    sl->pic_order_cnt_bit_size = pos - get_bits_left(&sl->gb);
 
     sl->redundant_pic_count = 0;
     if (pps->redundant_pic_cnt_present)
@@ -1845,9 +1854,11 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
 
     sl->explicit_ref_marking = 0;
     if (nal->ref_idc) {
+        pos = get_bits_left(&sl->gb);
         ret = ff_h264_decode_ref_pic_marking(sl, &sl->gb, nal, h->avctx);
         if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
             return AVERROR_INVALIDDATA;
+        sl->ref_pic_marking_bit_size = pos - get_bits_left(&sl->gb);
     }
 
     if (sl->slice_type_nos != AV_PICTURE_TYPE_I && pps->cabac) {
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 809a938386..6c4ce97f63 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -1144,6 +1144,9 @@ const FFCodec ff_h264_decoder = {
 #endif
 #if CONFIG_H264_VULKAN_HWACCEL
                                HWACCEL_VULKAN(h264),
+#endif
+#if CONFIG_H264_V4L2REQUEST_HWACCEL
+                               HWACCEL_V4L2REQUEST(h264),
 #endif
                                NULL
                            },
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 74fd09dfaa..33d788627f 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -322,6 +322,7 @@ typedef struct H264SliceContext {
     MMCO mmco[H264_MAX_MMCO_COUNT];
     int  nb_mmco;
     int explicit_ref_marking;
+    int ref_pic_marking_bit_size;
 
     int frame_num;
     int idr_pic_id;
@@ -330,6 +331,7 @@ typedef struct H264SliceContext {
     int delta_poc[2];
     int curr_pic_num;
     int max_pic_num;
+    int pic_order_cnt_bit_size;
 } H264SliceContext;
 
 /**
diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
index ae5f0fe69d..647251e6f0 100644
--- a/libavcodec/hevc/hevcdec.c
+++ b/libavcodec/hevc/hevcdec.c
@@ -580,6 +580,7 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
                      CONFIG_HEVC_D3D12VA_HWACCEL + \
                      CONFIG_HEVC_NVDEC_HWACCEL + \
+                     CONFIG_HEVC_V4L2REQUEST_HWACCEL + \
                      CONFIG_HEVC_VAAPI_HWACCEL + \
                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
                      CONFIG_HEVC_VDPAU_HWACCEL + \
@@ -618,6 +619,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
 #if CONFIG_HEVC_VULKAN_HWACCEL
         *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
+        *fmt++ = AV_PIX_FMT_DRM_PRIME;
 #endif
         break;
     case AV_PIX_FMT_YUV420P10:
@@ -645,6 +649,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
 #if CONFIG_HEVC_NVDEC_HWACCEL
         *fmt++ = AV_PIX_FMT_CUDA;
+#endif
+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
+        *fmt++ = AV_PIX_FMT_DRM_PRIME;
 #endif
         break;
     case AV_PIX_FMT_YUV444P:
@@ -4280,6 +4287,9 @@ const FFCodec ff_hevc_decoder = {
 #endif
 #if CONFIG_HEVC_VULKAN_HWACCEL
                                HWACCEL_VULKAN(hevc),
+#endif
+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
+                               HWACCEL_V4L2REQUEST(hevc),
 #endif
                                NULL
                            },
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 3de191288a..7b6f67330a 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -24,6 +24,7 @@ extern const struct FFHWAccel ff_av1_d3d11va2_hwaccel;
 extern const struct FFHWAccel ff_av1_d3d12va_hwaccel;
 extern const struct FFHWAccel ff_av1_dxva2_hwaccel;
 extern const struct FFHWAccel ff_av1_nvdec_hwaccel;
+extern const struct FFHWAccel ff_av1_v4l2request_hwaccel;
 extern const struct FFHWAccel ff_av1_vaapi_hwaccel;
 extern const struct FFHWAccel ff_av1_vdpau_hwaccel;
 extern const struct FFHWAccel ff_av1_videotoolbox_hwaccel;
@@ -37,6 +38,7 @@ extern const struct FFHWAccel ff_h264_d3d11va2_hwaccel;
 extern const struct FFHWAccel ff_h264_d3d12va_hwaccel;
 extern const struct FFHWAccel ff_h264_dxva2_hwaccel;
 extern const struct FFHWAccel ff_h264_nvdec_hwaccel;
+extern const struct FFHWAccel ff_h264_v4l2request_hwaccel;
 extern const struct FFHWAccel ff_h264_vaapi_hwaccel;
 extern const struct FFHWAccel ff_h264_vdpau_hwaccel;
 extern const struct FFHWAccel ff_h264_videotoolbox_hwaccel;
@@ -46,6 +48,7 @@ extern const struct FFHWAccel ff_hevc_d3d11va2_hwaccel;
 extern const struct FFHWAccel ff_hevc_d3d12va_hwaccel;
 extern const struct FFHWAccel ff_hevc_dxva2_hwaccel;
 extern const struct FFHWAccel ff_hevc_nvdec_hwaccel;
+extern const struct FFHWAccel ff_hevc_v4l2request_hwaccel;
 extern const struct FFHWAccel ff_hevc_vaapi_hwaccel;
 extern const struct FFHWAccel ff_hevc_vdpau_hwaccel;
 extern const struct FFHWAccel ff_hevc_videotoolbox_hwaccel;
@@ -60,6 +63,7 @@ extern const struct FFHWAccel ff_mpeg2_d3d11va2_hwaccel;
 extern const struct FFHWAccel ff_mpeg2_d3d12va_hwaccel;
 extern const struct FFHWAccel ff_mpeg2_dxva2_hwaccel;
 extern const struct FFHWAccel ff_mpeg2_nvdec_hwaccel;
+extern const struct FFHWAccel ff_mpeg2_v4l2request_hwaccel;
 extern const struct FFHWAccel ff_mpeg2_vaapi_hwaccel;
 extern const struct FFHWAccel ff_mpeg2_vdpau_hwaccel;
 extern const struct FFHWAccel ff_mpeg2_videotoolbox_hwaccel;
@@ -78,12 +82,14 @@ extern const struct FFHWAccel ff_vc1_nvdec_hwaccel;
 extern const struct FFHWAccel ff_vc1_vaapi_hwaccel;
 extern const struct FFHWAccel ff_vc1_vdpau_hwaccel;
 extern const struct FFHWAccel ff_vp8_nvdec_hwaccel;
+extern const struct FFHWAccel ff_vp8_v4l2request_hwaccel;
 extern const struct FFHWAccel ff_vp8_vaapi_hwaccel;
 extern const struct FFHWAccel ff_vp9_d3d11va_hwaccel;
 extern const struct FFHWAccel ff_vp9_d3d11va2_hwaccel;
 extern const struct FFHWAccel ff_vp9_d3d12va_hwaccel;
 extern const struct FFHWAccel ff_vp9_dxva2_hwaccel;
 extern const struct FFHWAccel ff_vp9_nvdec_hwaccel;
+extern const struct FFHWAccel ff_vp9_v4l2request_hwaccel;
 extern const struct FFHWAccel ff_vp9_vaapi_hwaccel;
 extern const struct FFHWAccel ff_vp9_vdpau_hwaccel;
 extern const struct FFHWAccel ff_vp9_videotoolbox_hwaccel;
diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
index ee29ca631d..159064a1f1 100644
--- a/libavcodec/hwconfig.h
+++ b/libavcodec/hwconfig.h
@@ -79,6 +79,8 @@ void ff_hwaccel_uninit(AVCodecContext *avctx);
     HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD,  NONE,         ff_ ## codec ## _d3d11va_hwaccel)
 #define HWACCEL_D3D12VA(codec) \
     HW_CONFIG_HWACCEL(1, 1, 0, D3D12,        D3D12VA,      ff_ ## codec ## _d3d12va_hwaccel)
+#define HWACCEL_V4L2REQUEST(codec) \
+    HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME,    V4L2REQUEST,  ff_ ## codec ## _v4l2request_hwaccel)
 
 #define HW_CONFIG_ENCODER(device, frames, ad_hoc, format, device_type_) \
     &(const AVCodecHWConfigInternal) { \
diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c
index e42591110f..70cc0ecabc 100644
--- a/libavcodec/mmaldec.c
+++ b/libavcodec/mmaldec.c
@@ -24,6 +24,9 @@
  * MMAL Video Decoder
  */
 
+#pragma GCC diagnostic push
+// Many many redundant decls in the header files
+#pragma GCC diagnostic ignored "-Wredundant-decls"
 #include <bcm_host.h>
 #include <interface/mmal/mmal.h>
 #include <interface/mmal/mmal_parameters_video.h>
@@ -31,6 +34,7 @@
 #include <interface/mmal/util/mmal_util_params.h>
 #include <interface/mmal/util/mmal_default_components.h>
 #include <interface/mmal/vc/mmal_vc_api.h>
+#pragma GCC diagnostic pop
 #include <stdatomic.h>
 
 #include "avcodec.h"
diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
index 4c83bcfa90..c65e9e3b86 100644
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c
@@ -821,6 +821,9 @@ static const enum AVPixelFormat mpeg2_hwaccel_pixfmt_list_420[] = {
 #endif
 #if CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL
     AV_PIX_FMT_VIDEOTOOLBOX,
+#endif
+#if CONFIG_MPEG2_V4L2REQUEST_HWACCEL
+    AV_PIX_FMT_DRM_PRIME,
 #endif
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_NONE
@@ -2736,6 +2739,9 @@ const FFCodec ff_mpeg2video_decoder = {
 #endif
 #if CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL
                         HWACCEL_VIDEOTOOLBOX(mpeg2),
+#endif
+#if CONFIG_MPEG2_V4L2REQUEST_HWACCEL
+                        HWACCEL_V4L2REQUEST(mpeg2),
 #endif
                         NULL
                     },
diff --git a/libavcodec/raw_pix_fmt_tags.h b/libavcodec/raw_pix_fmt_tags.h
index bdde060cf7..a1621b7319 100644
--- a/libavcodec/raw_pix_fmt_tags.h
+++ b/libavcodec/raw_pix_fmt_tags.h
@@ -310,6 +310,12 @@ static const PixelFormatTag raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_RGB565LE,        MKTAG( 3 ,  0 ,  0 ,  0 ) }, /* flipped RGB565LE */
     { AV_PIX_FMT_YUV444P,         MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */
 
+    /* RPI (Might as well define for everything) */
+    { AV_PIX_FMT_SAND128,     MKTAG('S', 'A', 'N', 'D') },
+    { AV_PIX_FMT_RPI4_8,      MKTAG('S', 'A', 'N', 'D') },
+    { AV_PIX_FMT_SAND64_10,   MKTAG('S', 'N', 'D', 'A') },
+    { AV_PIX_FMT_RPI4_10,     MKTAG('S', 'N', 'D', 'B') },
+
     { AV_PIX_FMT_NONE, 0 },
 };
 
diff --git a/libavcodec/rawenc.c b/libavcodec/rawenc.c
index 8c577006d9..b215577b75 100644
--- a/libavcodec/rawenc.c
+++ b/libavcodec/rawenc.c
@@ -24,6 +24,7 @@
  * Raw Video Encoder
  */
 
+#include "config.h"
 #include "avcodec.h"
 #include "codec_internal.h"
 #include "encode.h"
@@ -33,6 +34,10 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
+#include "libavutil/avassert.h"
+#if CONFIG_SAND
+#include "libavutil/rpi_sand_fns.h"
+#endif
 
 static av_cold int raw_encode_init(AVCodecContext *avctx)
 {
@@ -46,22 +51,114 @@ static av_cold int raw_encode_init(AVCodecContext *avctx)
     return 0;
 }
 
+#if CONFIG_SAND
+static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
+                      const AVFrame *frame)
+{
+    const int width = av_frame_cropped_width(frame);
+    const int height = av_frame_cropped_height(frame);
+    const int x0 = frame->crop_left;
+    const int y0 = frame->crop_top;
+    const int size = width * height * 3 / 2;
+    uint8_t * dst;
+    int ret;
+
+    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
+        return ret;
+
+    dst = pkt->data;
+
+    av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], av_rpi_sand_frame_stride2_y(frame), x0, y0, width, height);
+    dst += width * height;
+    av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2,
+                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2_c(frame), x0 / 2, y0 / 2, width / 2, height / 2);
+    return 0;
+}
+
+static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
+                      const AVFrame *frame)
+{
+    const int width = av_frame_cropped_width(frame);
+    const int height = av_frame_cropped_height(frame);
+    const int x0 = frame->crop_left;
+    const int y0 = frame->crop_top;
+    const int size = width * height * 3;
+    uint8_t * dst;
+    int ret;
+
+    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
+        return ret;
+
+    dst = pkt->data;
+
+    av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], av_rpi_sand_frame_stride2_y(frame), x0 * 2, y0, width * 2, height);
+    dst += width * height * 2;
+    av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width,
+                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2_c(frame), x0, y0 / 2, width, height / 2);
+    return 0;
+}
+
+static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
+                      const AVFrame *frame)
+{
+    const int width = av_frame_cropped_width(frame);
+    const int height = av_frame_cropped_height(frame);
+    const int x0 = frame->crop_left;
+    const int y0 = frame->crop_top;
+    const int size = width * height * 3;
+    uint8_t * dst;
+    int ret;
+
+    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
+        return ret;
+
+    dst = pkt->data;
+
+    av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], av_rpi_sand_frame_stride2_y(frame), x0, y0, width, height);
+    dst += width * height * 2;
+    av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width,
+                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2_c(frame), x0/2, y0 / 2, width/2, height / 2);
+    return 0;
+}
+#endif
+
+
 static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
-                      const AVFrame *frame, int *got_packet)
+                      const AVFrame *src_frame, int *got_packet)
 {
-    int ret = av_image_get_buffer_size(frame->format,
-                                       frame->width, frame->height, 1);
+    int ret;
+    AVFrame * frame = NULL;
 
-    if (ret < 0)
+#if CONFIG_SAND
+    if (av_rpi_is_sand_frame(src_frame)) {
+        ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) :
+            av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) :
+            av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1;
+        *got_packet = (ret == 0);
         return ret;
+    }
+#endif
+
+    if ((frame = av_frame_clone(src_frame)) == NULL) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0)
+        goto fail;
+
+    ret = av_image_get_buffer_size(frame->format,
+                                       frame->width, frame->height, 1);
+    if (ret < 0)
+        goto fail;
 
     if ((ret = ff_get_encode_buffer(avctx, pkt, ret, 0)) < 0)
-        return ret;
+        goto fail;
     if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size,
                                        (const uint8_t **)frame->data, frame->linesize,
                                        frame->format,
                                        frame->width, frame->height, 1)) < 0)
-        return ret;
+        goto fail;
 
     if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 &&
        frame->format   == AV_PIX_FMT_YUYV422) {
@@ -77,8 +174,15 @@ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
             AV_WB64(&pkt->data[8 * x], v << 48 | v >> 16);
         }
     }
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    av_frame_free(&frame);
     *got_packet = 1;
     return 0;
+
+fail:
+    av_frame_free(&frame);
+    *got_packet = 0;
+    return ret;
 }
 
 const FFCodec ff_rawvideo_encoder = {
diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
index 077c1ee944..81d7919f5b 100644
--- a/libavcodec/v4l2_buffers.c
+++ b/libavcodec/v4l2_buffers.c
@@ -21,6 +21,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
 #include <linux/videodev2.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
@@ -28,61 +29,98 @@
 #include <fcntl.h>
 #include <poll.h>
 #include "libavcodec/avcodec.h"
+#include "libavcodec/internal.h"
+#include "libavutil/avassert.h"
+#include "libavutil/mem.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/refstruct.h"
+#include "libavutil/hwcontext.h"
 #include "v4l2_context.h"
 #include "v4l2_buffers.h"
 #include "v4l2_m2m.h"
+#include "v4l2_req_dmabufs.h"
+#include "weak_link.h"
+
+#if CONFIG_LIBDRM
+#include <drm_fourcc.h>
+#endif
 
 #ifndef USEC_PER_SEC
 #define USEC_PER_SEC 1000000
 #endif
 
-static AVRational v4l2_timebase = { 1, USEC_PER_SEC };
+static const AVRational v4l2_timebase = { 1, USEC_PER_SEC };
 
-static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf)
+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
 {
-    return V4L2_TYPE_IS_OUTPUT(buf->context->type) ?
-        container_of(buf->context, V4L2m2mContext, output) :
-        container_of(buf->context, V4L2m2mContext, capture);
+    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
+        container_of(ctx, V4L2m2mContext, output) :
+        container_of(ctx, V4L2m2mContext, capture);
 }
 
-static inline AVCodecContext *logger(V4L2Buffer *buf)
+static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf)
 {
-    return buf_to_m2mctx(buf)->avctx;
+    return ctx_to_m2mctx(buf->context);
 }
 
-static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf)
+static inline AVCodecContext *logger(const V4L2Buffer * const buf)
 {
-    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
+    return buf_to_m2mctx(buf)->avctx;
+}
 
-    if (s->avctx->pkt_timebase.num)
-        return s->avctx->pkt_timebase;
-    return s->avctx->time_base;
+static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf)
+{
+    const V4L2m2mContext *s = buf_to_m2mctx(avbuf);
+    const AVRational tb = s->avctx->pkt_timebase.num ?
+        s->avctx->pkt_timebase :
+        s->avctx->time_base;
+    return tb.num && tb.den ? tb : v4l2_timebase;
 }
 
-static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts)
+static inline struct timeval tv_from_int(const int64_t t)
 {
-    int64_t v4l2_pts;
+    return (struct timeval){
+        .tv_usec = t % USEC_PER_SEC,
+        .tv_sec  = t / USEC_PER_SEC
+    };
+}
 
-    if (pts == AV_NOPTS_VALUE)
-        pts = 0;
+static inline int64_t int_from_tv(const struct timeval t)
+{
+    return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec;
+}
 
+static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts)
+{
     /* convert pts to v4l2 timebase */
-    v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
-    out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC;
-    out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC;
+    const int64_t v4l2_pts =
+        pts == AV_NOPTS_VALUE ? 0 :
+            av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
+    out->buf.timestamp = tv_from_int(v4l2_pts);
 }
 
-static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf)
+static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf)
 {
-    int64_t v4l2_pts;
-
+    const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp);
+    return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE;
+#if 0
     /* convert pts back to encoder timebase */
-    v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
-                        avbuf->buf.timestamp.tv_usec;
+    return
+        avbuf->context->no_pts_rescale ? v4l2_pts :
+        v4l2_pts == 0 ? AV_NOPTS_VALUE :
+            av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
+#endif
+}
 
-    return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
+static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length)
+{
+    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
+        out->planes[plane].bytesused = bytesused;
+        out->planes[plane].length = length;
+    } else {
+        out->buf.bytesused = bytesused;
+        out->buf.length = length;
+    }
 }
 
 static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
@@ -119,6 +157,105 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
     return AVCOL_PRI_UNSPECIFIED;
 }
 
+static void v4l2_set_color(V4L2Buffer *buf,
+                           const enum AVColorPrimaries avcp,
+                           const enum AVColorSpace avcs,
+                           const enum AVColorTransferCharacteristic avxc)
+{
+    enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
+    enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
+    enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
+
+    switch (avcp) {
+    case AVCOL_PRI_BT709:
+        cs = V4L2_COLORSPACE_REC709;
+        ycbcr = V4L2_YCBCR_ENC_709;
+        break;
+    case AVCOL_PRI_BT470M:
+        cs = V4L2_COLORSPACE_470_SYSTEM_M;
+        ycbcr = V4L2_YCBCR_ENC_601;
+        break;
+    case AVCOL_PRI_BT470BG:
+        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
+        break;
+    case AVCOL_PRI_SMPTE170M:
+        cs = V4L2_COLORSPACE_SMPTE170M;
+        break;
+    case AVCOL_PRI_SMPTE240M:
+        cs = V4L2_COLORSPACE_SMPTE240M;
+        break;
+    case AVCOL_PRI_BT2020:
+        cs = V4L2_COLORSPACE_BT2020;
+        break;
+    case AVCOL_PRI_SMPTE428:
+    case AVCOL_PRI_SMPTE431:
+    case AVCOL_PRI_SMPTE432:
+    case AVCOL_PRI_EBU3213:
+    case AVCOL_PRI_RESERVED:
+    case AVCOL_PRI_FILM:
+    case AVCOL_PRI_UNSPECIFIED:
+    default:
+        break;
+    }
+
+    switch (avcs) {
+    case AVCOL_SPC_RGB:
+        cs = V4L2_COLORSPACE_SRGB;
+        break;
+    case AVCOL_SPC_BT709:
+        cs = V4L2_COLORSPACE_REC709;
+        break;
+    case AVCOL_SPC_FCC:
+        cs = V4L2_COLORSPACE_470_SYSTEM_M;
+        break;
+    case AVCOL_SPC_BT470BG:
+        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
+        break;
+    case AVCOL_SPC_SMPTE170M:
+        cs = V4L2_COLORSPACE_SMPTE170M;
+        break;
+    case AVCOL_SPC_SMPTE240M:
+        cs = V4L2_COLORSPACE_SMPTE240M;
+        break;
+    case AVCOL_SPC_BT2020_CL:
+        cs = V4L2_COLORSPACE_BT2020;
+        ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
+        break;
+    case AVCOL_SPC_BT2020_NCL:
+        cs = V4L2_COLORSPACE_BT2020;
+        break;
+    default:
+        break;
+    }
+
+    switch (xfer) {
+    case AVCOL_TRC_BT709:
+        xfer = V4L2_XFER_FUNC_709;
+        break;
+    case AVCOL_TRC_IEC61966_2_1:
+        xfer = V4L2_XFER_FUNC_SRGB;
+        break;
+    case AVCOL_TRC_SMPTE240M:
+        xfer = V4L2_XFER_FUNC_SMPTE240M;
+        break;
+    case AVCOL_TRC_SMPTE2084:
+        xfer = V4L2_XFER_FUNC_SMPTE2084;
+        break;
+    default:
+        break;
+    }
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
+        buf->context->format.fmt.pix_mp.colorspace = cs;
+        buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr;
+        buf->context->format.fmt.pix_mp.xfer_func = xfer;
+    } else {
+        buf->context->format.fmt.pix.colorspace = cs;
+        buf->context->format.fmt.pix.ycbcr_enc = ycbcr;
+        buf->context->format.fmt.pix.xfer_func = xfer;
+    }
+}
+
 static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
 {
     enum v4l2_quantization qt;
@@ -137,6 +274,20 @@ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
      return AVCOL_RANGE_UNSPECIFIED;
 }
 
+static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr)
+{
+    const enum v4l2_quantization q =
+        avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
+        avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
+            V4L2_QUANTIZATION_DEFAULT;
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
+        buf->context->format.fmt.pix_mp.quantization = q;
+    } else {
+        buf->context->format.fmt.pix.quantization = q;
+    }
+}
+
 static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf)
 {
     enum v4l2_ycbcr_encoding ycbcr;
@@ -216,88 +367,259 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf)
     return AVCOL_TRC_UNSPECIFIED;
 }
 
-static void v4l2_get_interlacing(AVFrame *frame, V4L2Buffer *buf)
+static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf)
 {
-    enum v4l2_field field = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ?
-        buf->context->format.fmt.pix_mp.field :
-        buf->context->format.fmt.pix.field;
+    return V4L2_FIELD_IS_INTERLACED(buf->buf.field);
+}
 
-    switch (field) {
-    case V4L2_FIELD_INTERLACED:
-    case V4L2_FIELD_INTERLACED_TB:
-        frame->flags |=  AV_FRAME_FLAG_TOP_FIELD_FIRST;
-        /* fallthrough */
-    case V4L2_FIELD_INTERLACED_BT:
-        frame->flags |=  AV_FRAME_FLAG_INTERLACED;
-        break;
+static int v4l2_buf_is_top_first(const V4L2Buffer * const buf)
+{
+    return buf->buf.field == V4L2_FIELD_INTERLACED_TB;
+}
+
+static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff)
+{
+    buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE :
+        is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT;
+}
+
+static inline void frame_set_interlace(AVFrame* frame, const int is_interlaced, const int is_tff)
+{
+    if (!is_interlaced) {
+        frame->flags &= ~(AV_FRAME_FLAG_TOP_FIELD_FIRST | AV_FRAME_FLAG_INTERLACED);
+    }
+    else {
+        if (is_tff)
+            frame->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST | AV_FRAME_FLAG_INTERLACED;
+        else
+            frame->flags = (frame->flags & ~AV_FRAME_FLAG_TOP_FIELD_FIRST) | AV_FRAME_FLAG_INTERLACED;
     }
 }
 
-static void v4l2_free_buffer(void *opaque, uint8_t *unused)
+static inline int frame_is_interlaced(const AVFrame* const frame)
 {
-    V4L2Buffer* avbuf = opaque;
-    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
+    return (frame->flags & AV_FRAME_FLAG_INTERLACED) != 0;
+}
 
-    if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) {
-        atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
+static inline int frame_is_tff(const AVFrame* const frame)
+{
+    return (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) != 0;
+}
 
-        if (s->reinit) {
-            if (!atomic_load(&s->refcount))
-                sem_post(&s->refsync);
-        } else {
-            if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) {
-                /* no need to queue more buffers to the driver */
-                avbuf->status = V4L2BUF_AVAILABLE;
-            }
-            else if (avbuf->context->streamon)
-                ff_v4l2_buffer_enqueue(avbuf);
-        }
+static inline int frame_is_key(const AVFrame* const frame)
+{
+    return (frame->flags & AV_FRAME_FLAG_KEY) != 0;
+}
 
-        av_refstruct_unref(&avbuf->context_ref);
-    }
+static inline void frame_set_key(AVFrame* const frame, const int is_key)
+{
+    frame->flags = is_key ?
+        frame->flags | AV_FRAME_FLAG_KEY :
+        frame->flags & ~AV_FRAME_FLAG_KEY;
 }
 
-static int v4l2_buf_increase_ref(V4L2Buffer *in)
+#if CONFIG_LIBDRM
+static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
 {
-    V4L2m2mContext *s = buf_to_m2mctx(in);
+    AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
+    AVDRMLayerDescriptor *layer;
 
-    if (in->context_ref)
-        atomic_fetch_add(&in->context_refcount, 1);
-    else {
-        in->context_ref = av_refstruct_ref(s->self_ref);
+    /* fill the DRM frame descriptor */
+    drm_desc->nb_objects = avbuf->num_planes;
+    drm_desc->nb_layers = 1;
+
+    layer = &drm_desc->layers[0];
+    layer->nb_planes = avbuf->num_planes;
 
-        in->context_refcount = 1;
+    for (int i = 0; i < avbuf->num_planes; i++) {
+        layer->planes[i].object_index = i;
+        layer->planes[i].offset = avbuf->plane_info[i].offset;
+        layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
     }
 
-    in->status = V4L2BUF_RET_USER;
-    atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed);
+    switch (avbuf->context->av_pix_fmt) {
+    case AV_PIX_FMT_0BGR:
+        layer->format = DRM_FORMAT_RGBX8888;
+        break;
+    case AV_PIX_FMT_RGB0:
+        layer->format = DRM_FORMAT_XBGR8888;
+        break;
+    case AV_PIX_FMT_0RGB:
+        layer->format = DRM_FORMAT_BGRX8888;
+        break;
+    case AV_PIX_FMT_BGR0:
+        layer->format = DRM_FORMAT_XRGB8888;
+        break;
 
-    return 0;
+    case AV_PIX_FMT_ABGR:
+        layer->format = DRM_FORMAT_RGBA8888;
+        break;
+    case AV_PIX_FMT_RGBA:
+        layer->format = DRM_FORMAT_ABGR8888;
+        break;
+    case AV_PIX_FMT_ARGB:
+        layer->format = DRM_FORMAT_BGRA8888;
+        break;
+    case AV_PIX_FMT_BGRA:
+        layer->format = DRM_FORMAT_ARGB8888;
+        break;
+
+    case AV_PIX_FMT_BGR24:
+        layer->format = DRM_FORMAT_BGR888;
+        break;
+    case AV_PIX_FMT_RGB24:
+        layer->format = DRM_FORMAT_RGB888;
+        break;
+
+    case AV_PIX_FMT_YUYV422:
+
+        layer->format = DRM_FORMAT_YUYV;
+        layer->nb_planes = 1;
+
+        break;
+
+    case AV_PIX_FMT_NV12:
+    case AV_PIX_FMT_NV21:
+
+        layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ?
+            DRM_FORMAT_NV12 : DRM_FORMAT_NV21;
+
+        if (avbuf->num_planes > 1)
+            break;
+
+        layer->nb_planes = 2;
+
+        layer->planes[1].object_index = 0;
+        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
+            avbuf->context->format.fmt.pix.height;
+        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;
+        break;
+
+    case AV_PIX_FMT_YUV420P:
+
+        layer->format = DRM_FORMAT_YUV420;
+
+        if (avbuf->num_planes > 1)
+            break;
+
+        layer->nb_planes = 3;
+
+        layer->planes[1].object_index = 0;
+        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
+            avbuf->context->format.fmt.pix.height;
+        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1;
+
+        layer->planes[2].object_index = 0;
+        layer->planes[2].offset = layer->planes[1].offset +
+            ((avbuf->plane_info[0].bytesperline *
+              avbuf->context->format.fmt.pix.height) >> 2);
+        layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1;
+        break;
+
+    default:
+        drm_desc->nb_layers = 0;
+        break;
+    }
+
+    return (uint8_t *) drm_desc;
 }
+#endif
 
-static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
+static void v4l2_free_bufref(void *opaque, uint8_t *data)
 {
-    int ret;
+    AVBufferRef * bufref = (AVBufferRef *)data;
+    V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data;
+    struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl);
 
-    if (plane >= in->num_planes)
-        return AVERROR(EINVAL);
+    if (ctx != NULL) {
+        // Buffer still attached to context
+        V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
 
-    /* even though most encoders return 0 in data_offset encoding vp8 does require this value */
-    *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset,
-                            in->plane_info[plane].length, v4l2_free_buffer, in, 0);
-    if (!*buf)
-        return AVERROR(ENOMEM);
+        if (!s->output_drm && avbuf->dmabuf[0] != NULL) {
+            for (unsigned int i = 0; i != avbuf->num_planes; ++i)
+                dmabuf_read_end(avbuf->dmabuf[i]);
+        }
 
-    ret = v4l2_buf_increase_ref(in);
-    if (ret)
-        av_buffer_unref(buf);
+        ff_mutex_lock(&ctx->lock);
 
-    return ret;
+        ff_v4l2_buffer_set_avail(avbuf);
+        avbuf->buf.timestamp.tv_sec = 0;
+        avbuf->buf.timestamp.tv_usec = 0;
+
+        if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
+            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name);
+        }
+        else if (ctx->streamon) {
+            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name);
+            ff_v4l2_buffer_enqueue(avbuf);  // will set to IN_DRIVER
+        }
+        else {
+            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name);
+        }
+
+        ff_mutex_unlock(&ctx->lock);
+    }
+
+    ff_weak_link_unlock(avbuf->context_wl);
+    av_buffer_unref(&bufref);
+}
+
+static inline uint32_t ff_v4l2_buf_len(const struct v4l2_buffer * b, unsigned int i)
+{
+    return V4L2_TYPE_IS_MULTIPLANAR(b->type) ? b->m.planes[i].length : b->length;
+}
+
+static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
+{
+    int i, ret;
+    const V4L2m2mContext * const s = buf_to_m2mctx(avbuf);
+
+    for (i = 0; i < avbuf->num_planes; i++) {
+        int dma_fd = -1;
+        const uint32_t blen = ff_v4l2_buf_len(&avbuf->buf, i);
+
+        if (s->db_ctl != NULL) {
+            if ((avbuf->dmabuf[i] = dmabuf_alloc(s->db_ctl, blen)) == NULL)
+                return AVERROR(ENOMEM);
+            dma_fd = dmabuf_fd(avbuf->dmabuf[i]);
+            if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type))
+                avbuf->buf.m.planes[i].m.fd = dma_fd;
+            else
+                avbuf->buf.m.fd = dma_fd;
+
+            if (!s->output_drm)
+                avbuf->plane_info[i].mm_addr = dmabuf_map(avbuf->dmabuf[i]);
+        }
+        else {
+            struct v4l2_exportbuffer expbuf;
+            memset(&expbuf, 0, sizeof(expbuf));
+
+            expbuf.index = avbuf->buf.index;
+            expbuf.type = avbuf->buf.type;
+            expbuf.plane = i;
+
+            ret = ioctl(s->fd, VIDIOC_EXPBUF, &expbuf);
+            if (ret < 0)
+                return AVERROR(errno);
+            dma_fd = expbuf.fd;
+        }
+
+        avbuf->drm_frame.objects[i].size = blen;
+        avbuf->drm_frame.objects[i].fd = dma_fd;
+#if !CONFIG_LIBDRM
+        avbuf->drm_frame.objects[i].format_modifier = 0;
+#else
+        avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
+#endif
+    }
+
+    return 0;
 }
 
 static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset)
 {
     unsigned int bytesused, length;
+    int rv = 0;
 
     if (plane >= out->num_planes)
         return AVERROR(EINVAL);
@@ -305,32 +627,65 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i
     length = out->plane_info[plane].length;
     bytesused = FFMIN(size+offset, length);
 
-    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset));
-
-    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
-        out->planes[plane].bytesused = bytesused;
-        out->planes[plane].length = length;
-    } else {
-        out->buf.bytesused = bytesused;
-        out->buf.length = length;
+    if (size > length - offset) {
+        size = length - offset;
+        rv = AVERROR(ENOMEM);
     }
 
-    return 0;
+    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size);
+
+    set_buf_length(out, plane, bytesused, length);
+
+    return rv;
+}
+
+static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf)
+{
+    AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]);
+    AVBufferRef * newbuf;
+
+    if (!bufref)
+        return NULL;
+
+    newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0);
+    if (newbuf == NULL)
+        av_buffer_unref(&bufref);
+
+    avbuf->status = V4L2BUF_RET_USER;
+    return newbuf;
 }
 
 static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
 {
-    int i, ret;
+    int i;
 
     frame->format = avbuf->context->av_pix_fmt;
 
-    for (i = 0; i < avbuf->num_planes; i++) {
-        ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]);
-        if (ret)
-            return ret;
+    frame->buf[0] = wrap_avbuf(avbuf);
+    if (frame->buf[0] == NULL)
+        return AVERROR(ENOMEM);
+
+    if (buf_to_m2mctx(avbuf)->output_drm) {
+#if !CONFIG_LIBDRM
+        return AVERROR_OPTION_NOT_FOUND;
+#else
+        /* 1. get references to the actual data */
+        const int rv = ff_v4l2_context_frames_set(avbuf->context);
+        if (rv != 0)
+            return rv;
+
+        frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf);
+        frame->format = AV_PIX_FMT_DRM_PRIME;
+        frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref);
+        return 0;
+#endif
+    }
 
+
+    /* 1. get references to the actual data */
+    for (i = 0; i < avbuf->num_planes; i++) {
+        frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset;
         frame->linesize[i] = avbuf->plane_info[i].bytesperline;
-        frame->data[i] = frame->buf[i]->data;
     }
 
     /* fixup special cases */
@@ -339,88 +694,152 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
     case AV_PIX_FMT_NV21:
         if (avbuf->num_planes > 1)
             break;
-        frame->linesize[1] = avbuf->plane_info[0].bytesperline;
-        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
+        frame->linesize[1] = frame->linesize[0];
+        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
         break;
 
     case AV_PIX_FMT_YUV420P:
         if (avbuf->num_planes > 1)
             break;
-        frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1;
-        frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1;
-        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
-        frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2);
+        frame->linesize[1] = frame->linesize[0] / 2;
+        frame->linesize[2] = frame->linesize[1];
+        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
+        frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2;
         break;
 
     default:
         break;
     }
 
+    if (avbuf->dmabuf[0] != NULL) {
+        for (unsigned int i = 0; i != avbuf->num_planes; ++i)
+            dmabuf_read_start(avbuf->dmabuf[i]);
+    }
+
+    return 0;
+}
+
+static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h)
+{
+    if (dst_stride == src_stride && w + 32 >= dst_stride) {
+        memcpy(dst, src, dst_stride * h);
+    }
+    else {
+        while (--h >= 0) {
+            memcpy(dst, src, w);
+            dst += dst_stride;
+            src += src_stride;
+        }
+    }
+}
+
+static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes)
+{
+    return i != 0  && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA));
+}
+
+static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+{
+    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
+
+    if (frame->format != AV_PIX_FMT_DRM_PRIME || !src)
+        return AVERROR(EINVAL);
+
+    av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF);
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
+        // Only currently cope with single buffer types
+        if (out->buf.length != 1)
+            return AVERROR_PATCHWELCOME;
+        if (src->nb_objects != 1)
+            return AVERROR(EINVAL);
+
+        out->planes[0].m.fd = src->objects[0].fd;
+    }
+    else {
+        if (src->nb_objects != 1)
+            return AVERROR(EINVAL);
+
+        out->buf.m.fd      = src->objects[0].fd;
+    }
+
+    // No need to copy src AVDescriptor and if we did then we may confuse
+    // fd close on free
+    out->ref_buf = av_buffer_ref(frame->buf[0]);
+
     return 0;
 }
 
 static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
 {
-    int i, ret;
-    struct v4l2_format fmt = out->context->format;
-    int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
-                       fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat;
-    int height       = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
-                       fmt.fmt.pix_mp.height : fmt.fmt.pix.height;
-    int is_planar_format = 0;
-
-    switch (pixel_format) {
-    case V4L2_PIX_FMT_YUV420M:
-    case V4L2_PIX_FMT_YVU420M:
-#ifdef V4L2_PIX_FMT_YUV422M
-    case V4L2_PIX_FMT_YUV422M:
-#endif
-#ifdef V4L2_PIX_FMT_YVU422M
-    case V4L2_PIX_FMT_YVU422M:
-#endif
-#ifdef V4L2_PIX_FMT_YUV444M
-    case V4L2_PIX_FMT_YUV444M:
-#endif
-#ifdef V4L2_PIX_FMT_YVU444M
-    case V4L2_PIX_FMT_YVU444M:
-#endif
-    case V4L2_PIX_FMT_NV12M:
-    case V4L2_PIX_FMT_NV21M:
-    case V4L2_PIX_FMT_NV12MT_16X16:
-    case V4L2_PIX_FMT_NV12MT:
-    case V4L2_PIX_FMT_NV16M:
-    case V4L2_PIX_FMT_NV61M:
-        is_planar_format = 1;
-    }
-
-    if (!is_planar_format) {
-        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
-        int planes_nb = 0;
-        int offset = 0;
-
-        for (i = 0; i < desc->nb_components; i++)
-            planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
-
-        for (i = 0; i < planes_nb; i++) {
-            int size, h = height;
-            if (i == 1 || i == 2) {
+    int i;
+    int num_planes = 0;
+    int pel_strides[4] = {0};
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+
+    if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) {
+        av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__);
+        return -1;
+    }
+
+    for (i = 0; i != desc->nb_components; ++i) {
+        if (desc->comp[i].plane >= num_planes)
+            num_planes = desc->comp[i].plane + 1;
+        pel_strides[desc->comp[i].plane] = desc->comp[i].step;
+    }
+
+    if (out->num_planes > 1) {
+        if (num_planes != out->num_planes) {
+            av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes);
+            return -1;
+        }
+        for (i = 0; i != num_planes; ++i) {
+            int w = frame->width;
+            int h = frame->height;
+            if (is_chroma(desc, i, num_planes)) {
+                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
                 h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
             }
-            size = frame->linesize[i] * h;
-            ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset);
-            if (ret)
-                return ret;
-            offset += size;
+
+            cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline,
+                   frame->data[i], frame->linesize[i],
+                   w * pel_strides[i], h);
+            set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length);
         }
-        return 0;
     }
+    else
+    {
+        unsigned int offset = 0;
+
+        for (i = 0; i != num_planes; ++i) {
+            int w = frame->width;
+            int h = frame->height;
+            int dst_stride = out->plane_info[0].bytesperline;
+            uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset;
+
+            if (is_chroma(desc, i, num_planes)) {
+                // Is chroma
+                dst_stride >>= desc->log2_chroma_w;
+                offset += dst_stride * (out->context->height >> desc->log2_chroma_h);
+                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
+                h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
+            }
+            else {
+                // Is luma or alpha
+                offset += dst_stride * out->context->height;
+            }
+            if (offset > out->plane_info[0].length) {
+                av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length);
+                return -1;
+            }
 
-    for (i = 0; i < out->num_planes; i++) {
-        ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0);
-        if (ret)
-            return ret;
+            cpy_2d(dst, dst_stride,
+                   frame->data[i], frame->linesize[i],
+                   w * pel_strides[i], h);
+        }
+        set_buf_length(out, 0, offset, out->plane_info[0].length);
     }
-
     return 0;
 }
 
@@ -430,16 +849,31 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
  *
  ******************************************************************************/
 
-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts)
 {
-    v4l2_set_pts(out, frame->pts);
-
-    return v4l2_buffer_swframe_to_buf(frame, out);
+    out->buf.flags = frame_is_key(frame) ?
+        (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
+        (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
+    // Beware that colour info is held in format rather than the actual
+    // v4l2 buffer struct so this may not be as useful as you might hope
+    v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc);
+    v4l2_set_color_range(out, frame->color_range);
+    // PTS & interlace are buffer vars
+    if (track_ts)
+        out->buf.timestamp = tv_from_int(track_ts);
+    else
+        v4l2_set_pts(out, frame->pts);
+    v4l2_set_interlace(out, frame_is_interlaced(frame), frame_is_tff(frame));
+
+    return frame->format == AV_PIX_FMT_DRM_PRIME ?
+        v4l2_buffer_primeframe_to_buf(frame, out) :
+        v4l2_buffer_swframe_to_buf(frame, out);
 }
 
 int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
 {
     int ret;
+    V4L2Context * const ctx = avbuf->context;
 
     av_frame_unref(frame);
 
@@ -449,20 +883,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
         return ret;
 
     /* 2. get frame information */
-    if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME)
-        frame->flags |= AV_FRAME_FLAG_KEY;
+    frame_set_key(frame, avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME);
+    frame->pict_type = (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) != 0 ? AV_PICTURE_TYPE_I :
+        (avbuf->buf.flags & V4L2_BUF_FLAG_PFRAME) != 0 ? AV_PICTURE_TYPE_P :
+        (avbuf->buf.flags & V4L2_BUF_FLAG_BFRAME) != 0 ? AV_PICTURE_TYPE_B :
+            AV_PICTURE_TYPE_NONE;
     frame->color_primaries = v4l2_get_color_primaries(avbuf);
     frame->colorspace = v4l2_get_color_space(avbuf);
     frame->color_range = v4l2_get_color_range(avbuf);
     frame->color_trc = v4l2_get_color_trc(avbuf);
     frame->pts = v4l2_get_pts(avbuf);
     frame->pkt_dts = AV_NOPTS_VALUE;
-    v4l2_get_interlacing(frame, avbuf);
+    frame_set_interlace(frame, v4l2_buf_is_interlaced(avbuf), v4l2_buf_is_top_first(avbuf));
 
     /* these values are updated also during re-init in v4l2_process_driver_event */
-    frame->height = avbuf->context->height;
-    frame->width = avbuf->context->width;
-    frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio;
+    frame->height = ctx->height;
+    frame->width = ctx->width;
+    frame->sample_aspect_ratio = ctx->sample_aspect_ratio;
+
+    if (ctx->selection.height && ctx->selection.width) {
+        frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0;
+        frame->crop_top  = ctx->selection.top < frame->height ? ctx->selection.top  : 0;
+        frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ?
+            frame->width - (ctx->selection.left + ctx->selection.width) : 0;
+        frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ?
+            frame->height - (ctx->selection.top + ctx->selection.height) : 0;
+    }
 
     /* 3. report errors upstream */
     if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) {
@@ -475,15 +921,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
 
 int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
 {
-    int ret;
-
     av_packet_unref(pkt);
-    ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf);
-    if (ret)
-        return ret;
+
+    pkt->buf = wrap_avbuf(avbuf);
+    if (pkt->buf == NULL)
+        return AVERROR(ENOMEM);
 
     pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused;
-    pkt->data = pkt->buf->data;
+    pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset;
+    pkt->flags = 0;
 
     if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME)
         pkt->flags |= AV_PKT_FLAG_KEY;
@@ -498,39 +944,108 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
     return 0;
 }
 
-int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
+                                    const void *extdata, size_t extlen,
+                                    const int64_t timestamp)
 {
     int ret;
 
-    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0);
-    if (ret)
+    if (extlen) {
+        ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0);
+        if (ret)
+            return ret;
+    }
+
+    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen);
+    if (ret && ret != AVERROR(ENOMEM))
         return ret;
 
-    v4l2_set_pts(out, pkt->pts);
+    if (timestamp)
+        out->buf.timestamp = tv_from_int(timestamp);
+    else
+        v4l2_set_pts(out, pkt->pts);
 
-    if (pkt->flags & AV_PKT_FLAG_KEY)
-        out->flags = V4L2_BUF_FLAG_KEYFRAME;
+    out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ?
+        (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
+        (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
 
-    return 0;
+    return ret;
+}
+
+int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
+{
+    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0);
+}
+
+
+static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data)
+{
+    V4L2Buffer * const avbuf = (V4L2Buffer *)data;
+    int i;
+
+    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) {
+        struct V4L2Plane_info *p = avbuf->plane_info + i;
+        if (p->mm_addr != NULL)
+            munmap(p->mm_addr, p->length);
+    }
+
+    if (avbuf->dmabuf[0] == NULL) {
+        for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
+            if (avbuf->drm_frame.objects[i].fd != -1)
+                close(avbuf->drm_frame.objects[i].fd);
+        }
+    }
+    else {
+        for (i = 0; i != FF_ARRAY_ELEMS(avbuf->dmabuf); ++i) {
+            dmabuf_free(avbuf->dmabuf[i]);
+        }
+    }
+
+    av_buffer_unref(&avbuf->ref_buf);
+
+    ff_weak_link_unref(&avbuf->context_wl);
+
+    av_free(avbuf);
 }
 
-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
+
+int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem)
 {
-    V4L2Context *ctx = avbuf->context;
     int ret, i;
+    V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf));
+    AVBufferRef * bufref;
+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
+    int want_mmap;
 
-    avbuf->buf.memory = V4L2_MEMORY_MMAP;
+    *pbufref = NULL;
+    if (avbuf == NULL)
+        return AVERROR(ENOMEM);
+
+    bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0);
+    if (bufref == NULL) {
+        av_free(avbuf);
+        return AVERROR(ENOMEM);
+    }
+
+    avbuf->context = ctx;
+    avbuf->buf.memory = mem;
     avbuf->buf.type = ctx->type;
     avbuf->buf.index = index;
 
+    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
+        avbuf->drm_frame.objects[i].fd = -1;
+    }
+
+    avbuf->context_wl = ff_weak_link_ref(ctx->wl_master);
+
     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
         avbuf->buf.length = VIDEO_MAX_PLANES;
         avbuf->buf.m.planes = avbuf->planes;
     }
 
-    ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf);
+    ret = ioctl(s->fd, VIDIOC_QUERYBUF, &avbuf->buf);
     if (ret < 0)
-        return AVERROR(errno);
+        goto fail;
 
     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
         avbuf->num_planes = 0;
@@ -542,33 +1057,41 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
     } else
         avbuf->num_planes = 1;
 
-    for (i = 0; i < avbuf->num_planes; i++) {
+    want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP &&
+        (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm);
 
+    for (i = 0; i < avbuf->num_planes; i++) {
         avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
             ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline :
             ctx->format.fmt.pix.bytesperline;
 
         if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
             avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length;
-            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
-                                           PROT_READ | PROT_WRITE, MAP_SHARED,
-                                           buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
+            avbuf->plane_info[i].offset = avbuf->buf.m.planes[i].data_offset;
+
+            if (want_mmap)
+                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
+                                               PROT_READ | PROT_WRITE, MAP_SHARED,
+                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
         } else {
             avbuf->plane_info[i].length = avbuf->buf.length;
-            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
-                                          PROT_READ | PROT_WRITE, MAP_SHARED,
-                                          buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
+            avbuf->plane_info[i].offset = 0;
+
+            if (want_mmap)
+                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
+                                               PROT_READ | PROT_WRITE, MAP_SHARED,
+                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
         }
 
-        if (avbuf->plane_info[i].mm_addr == MAP_FAILED)
-            return AVERROR(ENOMEM);
+        if (avbuf->plane_info[i].mm_addr == MAP_FAILED) {
+            avbuf->plane_info[i].mm_addr = NULL;
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
     }
 
     avbuf->status = V4L2BUF_AVAILABLE;
 
-    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
-        return 0;
-
     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
         avbuf->buf.m.planes = avbuf->planes;
         avbuf->buf.length   = avbuf->num_planes;
@@ -578,20 +1101,52 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
         avbuf->buf.length    = avbuf->planes[0].length;
     }
 
-    return ff_v4l2_buffer_enqueue(avbuf);
+    if (V4L2_TYPE_IS_CAPTURE(ctx->type) && !want_mmap) {
+        // export_drm does dmabuf alloc if we aren't using v4l2 alloc
+        ret = v4l2_buffer_export_drm(avbuf);
+        if (ret) {
+            av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n");
+            goto fail;
+        }
+    }
+
+    *pbufref = bufref;
+    return 0;
+
+fail:
+    av_buffer_unref(&bufref);
+    return ret;
 }
 
 int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
 {
     int ret;
+    int qc;
 
-    avbuf->buf.flags = avbuf->flags;
+    if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) {
+        av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
+               avbuf->context->name, avbuf->buf.index,
+               avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec,
+               avbuf->context->q_count);
+    }
 
     ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf);
-    if (ret < 0)
-        return AVERROR(errno);
+    if (ret < 0) {
+        int err = errno;
+        av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n",
+               avbuf->context->name, avbuf->buf.index,
+               err, strerror(err));
+        return AVERROR(err);
+    }
 
+    // Lock not wanted - if called from buffer free then lock already obtained
+    qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1;
     avbuf->status = V4L2BUF_IN_DRIVER;
+    pthread_cond_broadcast(&avbuf->context->cond);
+
+    av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
+           avbuf->context->name, avbuf->buf.index,
+           avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc);
 
     return 0;
 }
diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
index e35b161309..444ad94b14 100644
--- a/libavcodec/v4l2_buffers.h
+++ b/libavcodec/v4l2_buffers.h
@@ -28,31 +28,47 @@
 #include <stddef.h>
 #include <linux/videodev2.h>
 
+#include "avcodec.h"
+#include "libavutil/buffer.h"
 #include "libavutil/frame.h"
+#include "libavutil/hwcontext_drm.h"
 #include "packet.h"
 
 enum V4L2Buffer_status {
     V4L2BUF_AVAILABLE,
     V4L2BUF_IN_DRIVER,
+    V4L2BUF_IN_USE,
     V4L2BUF_RET_USER,
 };
 
 /**
  * V4L2Buffer (wrapper for v4l2_buffer management)
  */
+struct V4L2Context;
+struct ff_weak_link_client;
+struct dmabuf_h;
+
 typedef struct V4L2Buffer {
-    /* each buffer needs to have a reference to its context */
+    /* each buffer needs to have a reference to its context
+     * The pointer is good enough for most operation but once the buffer has
+     * been passed to the user the buffer may become orphaned so for free ops
+     * the weak link must be used to ensure that the context is actually
+     * there
+     */
     struct V4L2Context *context;
+    struct ff_weak_link_client *context_wl;
 
-    /* This object is refcounted per-plane, so we need to keep track
-     * of how many context-refs we are holding.
-     * This pointer is a RefStruct reference. */
-    const struct V4L2m2mContext *context_ref;
-    atomic_uint context_refcount;
+    /* DRM descriptor */
+    AVDRMFrameDescriptor drm_frame;
+    /* For DRM_PRIME encode - need to keep a ref to the source buffer till we
+     * are done
+     */
+    AVBufferRef * ref_buf;
 
     /* keep track of the mmap address and mmap length */
     struct V4L2Plane_info {
-        int bytesperline;
+        size_t bytesperline;
+        size_t offset;
         void * mm_addr;
         size_t length;
     } plane_info[VIDEO_MAX_PLANES];
@@ -63,9 +79,9 @@ typedef struct V4L2Buffer {
     struct v4l2_buffer buf;
     struct v4l2_plane planes[VIDEO_MAX_PLANES];
 
-    int flags;
     enum V4L2Buffer_status status;
 
+    struct dmabuf_h * dmabuf[VIDEO_MAX_PLANES]; // If externally alloced dmabufs - stash other info here
 } V4L2Buffer;
 
 /**
@@ -101,6 +117,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf);
  */
 int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
 
+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
+                                    const void *extdata, size_t extlen,
+                                    const int64_t timestamp);
+
 /**
  * Extracts the data from an AVFrame to a V4L2Buffer
  *
@@ -109,7 +129,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
  *
  * @returns 0 in case of success, a negative AVERROR code otherwise
  */
-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
+int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts);
 
 /**
  * Initializes a V4L2Buffer
@@ -119,7 +139,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
  *
  * @returns 0 in case of success, a negative AVERROR code otherwise
  */
-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
+int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem);
 
 /**
  * Enqueues a V4L2Buffer
@@ -130,5 +150,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
  */
 int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf);
 
+static inline void
+ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf)
+{
+    avbuf->status = V4L2BUF_AVAILABLE;
+    av_buffer_unref(&avbuf->ref_buf);
+}
+
 
 #endif // AVCODEC_V4L2_BUFFERS_H
diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
index be1df3785b..cf4b90155e 100644
--- a/libavcodec/v4l2_context.c
+++ b/libavcodec/v4l2_context.c
@@ -28,11 +28,14 @@
 #include <fcntl.h>
 #include <poll.h>
 #include "libavutil/mem.h"
+#include "libavutil/avassert.h"
+#include "libavutil/pixdesc.h"
 #include "libavcodec/avcodec.h"
 #include "decode.h"
 #include "v4l2_buffers.h"
 #include "v4l2_fmt.h"
 #include "v4l2_m2m.h"
+#include "weak_link.h"
 
 struct v4l2_format_update {
     uint32_t v4l2_fmt;
@@ -42,26 +45,154 @@ struct v4l2_format_update {
     int update_avfmt;
 };
 
-static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx)
+
+static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n)
 {
-    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
-        container_of(ctx, V4L2m2mContext, output) :
-        container_of(ctx, V4L2m2mContext, capture);
+    return (int64_t)n;
 }
 
-static inline AVCodecContext *logger(V4L2Context *ctx)
+static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts)
 {
-    return ctx_to_m2mctx(ctx)->avctx;
+    return (unsigned int)pts;
 }
 
-static inline unsigned int v4l2_get_width(struct v4l2_format *fmt)
+// FFmpeg requires us to propagate a number of vars from the coded pkt into
+// the decoded frame. The only thing that tracks like that in V4L2 stateful
+// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no
+// guarantees about PTS being unique or specified for every frame so replace
+// the supplied PTS with a simple incrementing number and keep a circular
+// buffer of all the things we want preserved (including the original PTS)
+// indexed by the tracking no.
+static int64_t
+xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt)
 {
-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
+    int64_t track_pts;
+
+    // Avoid 0
+    if (++x->track_no == 0)
+        x->track_no = 1;
+
+    track_pts = track_to_pts(avctx, x->track_no);
+
+    av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no);
+    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
+        .discard          = 0,
+        .pending          = 1,
+        .pts              = avpkt->pts,
+        .dts              = avpkt->dts,
+        .duration         = avpkt->duration,
+        .track_pts        = track_pts
+    };
+    return track_pts;
 }
 
-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt)
+static int64_t
+xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame)
 {
-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
+    int64_t track_pts;
+
+    // Avoid 0
+    if (++x->track_no == 0)
+        x->track_no = 1;
+
+    track_pts = track_to_pts(avctx, x->track_no);
+
+    av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no);
+    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
+        .discard          = 0,
+        .pending          = 1,
+        .pts              = frame->pts,
+        .dts              = AV_NOPTS_VALUE,
+        .duration         = frame->duration,
+        .track_pts        = track_pts
+    };
+    return track_pts;
+}
+
+
+// Returns -1 if we should discard the frame
+static int
+xlat_pts_frame_out(AVCodecContext *const avctx,
+             xlat_track_t * const x,
+             AVFrame *const frame)
+{
+    unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
+    V4L2m2mTrackEl *const t = x->track_els + n;
+    if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
+    {
+        av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
+               "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
+        frame->pts              = AV_NOPTS_VALUE;
+        frame->pkt_dts          = AV_NOPTS_VALUE;
+        frame->duration         = 0;
+    }
+    else if (!t->discard)
+    {
+        frame->pts              = t->pending ? t->pts : AV_NOPTS_VALUE;
+        frame->pkt_dts          = t->dts;
+        frame->duration         = t->duration;
+
+        if (frame->pts != AV_NOPTS_VALUE)
+            x->last_pts = frame->pts;
+        t->pending = 0;
+    }
+    else
+    {
+        av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
+        return -1;
+    }
+
+    av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n",
+           frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n);
+    return 0;
+}
+
+// Returns -1 if we should discard the frame
+static int
+xlat_pts_pkt_out(AVCodecContext *const avctx,
+             xlat_track_t * const x,
+             AVPacket *const pkt)
+{
+    unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE;
+    V4L2m2mTrackEl *const t = x->track_els + n;
+    if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts)
+    {
+        av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
+               "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
+        pkt->pts                = AV_NOPTS_VALUE;
+    }
+    else if (!t->discard)
+    {
+        pkt->pts                = t->pending ? t->pts : AV_NOPTS_VALUE;
+
+        if (pkt->pts != AV_NOPTS_VALUE)
+            x->last_pts = pkt->pts;
+        t->pending = 0;
+    }
+    else
+    {
+        av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
+        return -1;
+    }
+
+    // * Would like something much better than this...xlat(offset + out_count)?
+    pkt->dts = pkt->pts;
+    av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n",
+           pkt->pts, t->track_pts, n);
+    return 0;
+}
+
+
+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
+{
+    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
+        container_of(ctx, V4L2m2mContext, output) :
+        container_of(ctx, V4L2m2mContext, capture);
+}
+
+static inline AVCodecContext *logger(const V4L2Context *ctx)
+{
+    return ctx_to_m2mctx(ctx)->avctx;
 }
 
 static AVRational v4l2_get_sar(V4L2Context *ctx)
@@ -82,21 +213,29 @@ static AVRational v4l2_get_sar(V4L2Context *ctx)
     return sar;
 }
 
-static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2)
+static inline int ctx_buffers_alloced(const V4L2Context * const ctx)
+{
+    return ctx->bufrefs != NULL;
+}
+
+// Width/Height changed or we don't have an alloc in the first place?
+static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2)
 {
-    struct v4l2_format *fmt1 = &ctx->format;
-    int ret =  V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
-        fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
-        fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
-        :
-        fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
-        fmt1->fmt.pix.height != fmt2->fmt.pix.height;
+    const struct v4l2_format *fmt1 = &ctx->format;
+    int ret = !ctx_buffers_alloced(ctx) ||
+        (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
+            fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
+            fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
+            :
+            fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
+            fmt1->fmt.pix.height != fmt2->fmt.pix.height);
 
     if (ret)
-        av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n",
+        av_log(logger(ctx), AV_LOG_DEBUG, "V4L2 %s changed: alloc=%d (%dx%d) -> (%dx%d)\n",
             ctx->name,
-            v4l2_get_width(fmt1), v4l2_get_height(fmt1),
-            v4l2_get_width(fmt2), v4l2_get_height(fmt2));
+            ctx_buffers_alloced(ctx),
+            ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1),
+            ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2));
 
     return ret;
 }
@@ -154,76 +293,100 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd
     }
 }
 
-static int v4l2_start_decode(V4L2Context *ctx)
+static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r)
 {
-    struct v4l2_decoder_cmd cmd = {
-        .cmd = V4L2_DEC_CMD_START,
-        .flags = 0,
+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
+    struct v4l2_selection selection = {
+        .type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+        .target = V4L2_SEL_TGT_COMPOSE
     };
-    int ret;
 
-    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DECODER_CMD, &cmd);
-    if (ret)
+    memset(r, 0, sizeof(*r));
+    if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection))
         return AVERROR(errno);
 
+    *r = selection.r;
     return 0;
 }
 
-/**
- * handle resolution change event and end of stream event
- * returns 1 if reinit was successful, negative if it failed
- * returns 0 if reinit was not executed
- */
-static int v4l2_handle_event(V4L2Context *ctx)
+static int do_source_change(V4L2m2mContext * const s)
 {
-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
-    struct v4l2_format cap_fmt = s->capture.format;
-    struct v4l2_event evt = { 0 };
+    AVCodecContext *const avctx = s->avctx;
+
     int ret;
+    int reinit;
+    struct v4l2_format cap_fmt = s->capture.format;
 
-    ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt);
-    if (ret < 0) {
-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name);
-        return 0;
-    }
+    s->capture.done = 0;
 
-    if (evt.type == V4L2_EVENT_EOS) {
-        ctx->done = 1;
+    ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
+    if (ret) {
+        av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name);
         return 0;
     }
 
-    if (evt.type != V4L2_EVENT_SOURCE_CHANGE)
-        return 0;
+    get_default_selection(&s->capture, &s->capture.selection);
 
-    ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
-    if (ret) {
-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name);
-        return 0;
+    reinit = ctx_resolution_changed(&s->capture, &cap_fmt);
+    if ((s->quirks & FF_V4L2_QUIRK_REINIT_ALWAYS) != 0)
+        reinit = 1;
+
+    s->capture.format = cap_fmt;
+    if (reinit) {
+        s->capture.height = ff_v4l2_get_format_height(&cap_fmt);
+        s->capture.width = ff_v4l2_get_format_width(&cap_fmt);
     }
 
-    if (v4l2_resolution_changed(&s->capture, &cap_fmt)) {
-        s->capture.height = v4l2_get_height(&cap_fmt);
-        s->capture.width = v4l2_get_width(&cap_fmt);
-        s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
-    } else {
-        v4l2_start_decode(ctx);
-        return 0;
+    // If we don't support selection (or it is bust) and we obviously have HD then kludge
+    if ((s->capture.selection.width == 0 || s->capture.selection.height == 0) &&
+        (s->capture.height == 1088 && s->capture.width == 1920)) {
+        s->capture.selection = (struct v4l2_rect){.width = 1920, .height = 1080};
     }
 
-    s->reinit = 1;
+    s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
 
-    if (s->avctx)
-        ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height);
-    if (ret < 0)
-        av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n");
+    av_log(avctx, AV_LOG_DEBUG, "Source change: Fmt: %s, SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n",
+           av_fourcc2str(ff_v4l2_get_format_pixelformat(&cap_fmt)),
+           s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den,
+           s->capture.width, s->capture.height,
+           s->capture.selection.width, s->capture.selection.height,
+           s->capture.selection.left, s->capture.selection.top, reinit);
 
-    ret = ff_v4l2_m2m_codec_reinit(s);
-    if (ret) {
-        av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n");
-        return AVERROR(EINVAL);
+    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
+    if (ret)
+        av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n");
+    s->draining = 0;
+
+    if (!reinit) {
+        /* Buffers are OK so just stream off to ack */
+        av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__);
+    }
+    else {
+        if (avctx)
+            ret = ff_set_dimensions(s->avctx,
+                                    s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width,
+                                    s->capture.selection.height != 0 ? s->capture.selection.height : s->capture.height);
+        if (ret < 0)
+            av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n");
+
+        ff_v4l2_context_release(&s->capture);
+
+        if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) ||
+            s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) {
+            av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n",
+                   s->capture.width, s->capture.height,
+                   ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format));
+            return AVERROR(EINVAL);
+        }
+
+        // Update pixel format - should only actually do something on initial change
+        s->capture.av_pix_fmt =
+            ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO);
+        avctx->pix_fmt = s->output_drm ? AV_PIX_FMT_DRM_PRIME : s->capture.av_pix_fmt;
+        avctx->sw_pix_fmt = s->capture.av_pix_fmt;
     }
 
-    /* reinit executed */
+    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON);
     return 1;
 }
 
@@ -267,175 +430,300 @@ static int v4l2_stop_encode(V4L2Context *ctx)
     return 0;
 }
 
-static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout)
+// DQ a buffer
+// Amalgamates all the various ways there are of signalling EOS/Event to
+// generate a consistant EPIPE.
+//
+// Sets ctx->flag_last if next dq would produce EPIPE (i.e. stream has stopped)
+//
+// Returns:
+//  0               Success
+//  AVERROR(EPIPE)  Nothing more to read
+//  AVERROR(ENOSPC) No buffers in Q to put result in
+//  *               AVERROR(..)
+
+ static int
+dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf)
 {
-    struct v4l2_plane planes[VIDEO_MAX_PLANES];
-    struct v4l2_buffer buf = { 0 };
-    V4L2Buffer *avbuf;
-    struct pollfd pfd = {
-        .events =  POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */
-        .fd = ctx_to_m2mctx(ctx)->fd,
+    V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
+    AVCodecContext * const avctx = m->avctx;
+    V4L2Buffer * avbuf;
+    const int is_mp = V4L2_TYPE_IS_MULTIPLANAR(ctx->type);
+
+    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
+
+    struct v4l2_buffer buf = {
+        .type = ctx->type,
+        .memory = V4L2_MEMORY_MMAP,
     };
-    int i, ret;
 
-    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) {
-        for (i = 0; i < ctx->num_buffers; i++) {
-            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
-                break;
-        }
-        if (i == ctx->num_buffers)
-            av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to "
-                                                "userspace. Increase num_capture_buffers "
-                                                "to prevent device deadlock or dropped "
-                                                "packets/frames.\n");
-    }
-
-    /* if we are draining and there are no more capture buffers queued in the driver we are done */
-    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) {
-        for (i = 0; i < ctx->num_buffers; i++) {
-            /* capture buffer initialization happens during decode hence
-             * detection happens at runtime
-             */
-            if (!ctx->buffers)
-                break;
+    *ppavbuf = NULL;
+
+    if (ctx->flag_last)
+        return AVERROR(EPIPE);
 
-            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
-                goto start;
+    if (is_mp) {
+        buf.length = VIDEO_MAX_PLANES;
+        buf.m.planes = planes;
+    }
+
+    while (ioctl(m->fd, VIDIOC_DQBUF, &buf) != 0) {
+        const int err = errno;
+        av_assert0(AVERROR(err) < 0);
+        if (err != EINTR) {
+            av_log(avctx, AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
+                ctx->name, av_err2str(AVERROR(err)));
+
+            if (err == EPIPE)
+                ctx->flag_last = 1;
+
+            return AVERROR(err);
         }
-        ctx->done = 1;
-        return NULL;
     }
+    atomic_fetch_sub(&ctx->q_count, 1);
+
+    avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
+    ff_v4l2_buffer_set_avail(avbuf);
+    avbuf->buf = buf;
+    if (is_mp) {
+        memcpy(avbuf->planes, planes, sizeof(planes));
+        avbuf->buf.m.planes = avbuf->planes;
+    }
+    // Done with any attached buffer
+    av_buffer_unref(&avbuf->ref_buf);
+
+    if (V4L2_TYPE_IS_CAPTURE(ctx->type)) {
+        // Zero length cap buffer return == EOS unless marked as error
+        if ((buf.flags & V4L2_BUF_FLAG_ERROR) == 0 &&
+            (is_mp ? buf.m.planes[0].bytesused : buf.bytesused) == 0) {
+            av_log(avctx, AV_LOG_DEBUG, "Buffer empty - reQ\n");
+
+            // Must reQ so we don't leak
+            // May not matter if the next thing we do is release all the
+            // buffers but better to be tidy.
+            ff_v4l2_buffer_enqueue(avbuf);
+
+            ctx->flag_last = 1;
+            return AVERROR(EPIPE);
+        }
 
-start:
-    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
-        pfd.events =  POLLOUT | POLLWRNORM;
-    else {
-        /* no need to listen to requests for more input while draining */
-        if (ctx_to_m2mctx(ctx)->draining)
-            pfd.events =  POLLIN | POLLRDNORM | POLLPRI;
+#ifdef V4L2_BUF_FLAG_LAST
+        // If flag_last set then this contains data but is the last frame
+        // so remember that but return OK
+        if ((buf.flags & V4L2_BUF_FLAG_LAST) != 0)
+            ctx->flag_last = 1;
+#endif
     }
 
-    for (;;) {
-        ret = poll(&pfd, 1, timeout);
-        if (ret > 0)
-            break;
-        if (errno == EINTR)
+    *ppavbuf = avbuf;
+    return 0;
+}
+
+/**
+ * handle resolution change event and end of stream event
+ * Expects to be called after the stream has stopped
+ *
+ * Returns:
+ *  1 if reinit was successful
+ *  0 if no reinit required
+ *  AVERROR_EOF on end of stream
+ *  other -ve value for error
+ */
+static int
+get_event(V4L2m2mContext * const m)
+{
+    AVCodecContext * const avctx = m->avctx;
+    struct v4l2_event evt = { 0 };
+
+    while (ioctl(m->fd, VIDIOC_DQEVENT, &evt) != 0) {
+        const int rv = AVERROR(errno);
+        if (rv == AVERROR(EINTR))
             continue;
-        return NULL;
+        // I'd expect trying to get a non-existant event would return EAGAIN
+        // but it actually returns ENOENT. Also take EAGAIN to mean the same
+        // in case this ever gets "fixed"
+        if (rv == AVERROR(EAGAIN) || rv == AVERROR(ENOENT)) {
+            av_log(avctx, AV_LOG_WARNING, "V4L2 failed to get expected event - assume EOS\n");
+            return AVERROR_EOF;
+        }
+        av_log(avctx, AV_LOG_ERROR, "V4L2 VIDIOC_DQEVENT: %s\n", av_err2str(rv));
+        return rv;
     }
 
-    /* 0. handle errors */
-    if (pfd.revents & POLLERR) {
-        /* if we are trying to get free buffers but none have been queued yet,
-         * or if no buffers have been allocated yet, no need to raise a warning
-         */
-        if (timeout == 0) {
-            if (!ctx->buffers)
-                return NULL;
+    av_log(avctx, AV_LOG_DEBUG, "Dq event %d\n", evt.type);
 
-            for (i = 0; i < ctx->num_buffers; i++) {
-                if (ctx->buffers[i].status != V4L2BUF_AVAILABLE)
-                    av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
-            }
+    if (evt.type == V4L2_EVENT_EOS) {
+        av_log(avctx, AV_LOG_TRACE, "V4L2 VIDIOC_EVENT_EOS\n");
+        return AVERROR_EOF;
+    }
+
+    if (evt.type == V4L2_EVENT_SOURCE_CHANGE)
+        return do_source_change(m);
+
+    return 0;
+}
+
+static inline int
+dq_ok(const V4L2Context * const c)
+{
+    return c->streamon && atomic_load(&c->q_count) != 0;
+}
+
+// Get a buffer
+// If output then just gets the buffer in the expected way
+// If capture then runs the capture state m/c to deal with res change etc.
+// If return value == 0 then *ppavbuf != NULL
+
+static int
+get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout)
+{
+    V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
+    AVCodecContext * const avctx = m->avctx;
+    const int is_cap = V4L2_TYPE_IS_CAPTURE(ctx->type);
+
+    const unsigned int poll_cap = (POLLIN | POLLRDNORM);
+    const unsigned int poll_out = (POLLOUT | POLLWRNORM);
+    const unsigned int poll_event = POLLPRI;
+
+    *ppavbuf = NULL;
+
+    for (;;) {
+        struct pollfd pfd = {
+            .fd = m->fd,
+            // If capture && stream not started then assume we are waiting for the initial event
+            .events = !is_cap ? poll_out :
+                !ff_v4l2_ctx_eos(ctx) && ctx->streamon ? poll_cap :
+                    poll_event,
+        };
+        int ret;
+
+        if (ctx->done) {
+            av_log(avctx, AV_LOG_TRACE, "V4L2 %s already done\n", ctx->name);
+            return AVERROR_EOF;
         }
-        else
-            av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
 
-        return NULL;
-    }
+        // If capture && timeout == -1 then also wait for rx buffer free
+        if (is_cap && timeout == -1 && dq_ok(&m->output) && !m->draining)
+            pfd.events |= poll_out;
 
-    /* 1. handle resolution changes */
-    if (pfd.revents & POLLPRI) {
-        ret = v4l2_handle_event(ctx);
-        if (ret < 0) {
-            /* if re-init failed, abort */
-            ctx->done = 1;
-            return NULL;
+        // If nothing Qed all we will get is POLLERR - avoid that
+        if ((pfd.events == poll_out && !dq_ok(&m->output)) ||
+            (pfd.events == poll_cap && !dq_ok(&m->capture)) ||
+            (pfd.events == (poll_cap | poll_out) && !dq_ok(&m->capture) && !dq_ok(&m->output))) {
+            av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name);
+            return AVERROR(ENOSPC);
         }
-        if (ret) {
-            /* if re-init was successful drop the buffer (if there was one)
-             * since we had to reconfigure capture (unmap all buffers)
-             */
-            return NULL;
+
+        // Timeout kludged s.t. "forever" eventually gives up & produces logging
+        // If waiting for an event when we have seen a last_frame then we expect
+        //   it to be ready already so force a short timeout
+        ret = poll(&pfd, 1,
+                   ff_v4l2_ctx_eos(ctx) ? 10 :
+                   timeout == -1 ? 3000 : timeout);
+        if (ret < 0) {
+            ret = AVERROR(errno);  // Remember errno before logging etc.
+            av_assert0(ret < 0);
         }
-    }
 
-    /* 2. dequeue the buffer */
-    if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) {
+        av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s ret=%d, timeout=%d, events=%#x, revents=%#x\n",
+               ctx->name, ret, timeout, pfd.events, pfd.revents);
 
-        if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
-            /* there is a capture buffer ready */
-            if (pfd.revents & (POLLIN | POLLRDNORM))
-                goto dequeue;
+        if (ret < 0) {
+            if (ret == AVERROR(EINTR))
+                continue;
+            av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll error %d (%s)\n", ctx->name, AVUNERROR(ret), av_err2str(ret));
+            return ret;
+        }
 
-            /* the driver is ready to accept more input; instead of waiting for the capture
-             * buffer to complete we return NULL so input can proceed (we are single threaded)
-             */
-            if (pfd.revents & (POLLOUT | POLLWRNORM))
-                return NULL;
+        if (ret == 0) {
+            if (timeout == -1)
+                av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll unexpected timeout: events=%#x\n", ctx->name, pfd.events);
+            if (ff_v4l2_ctx_eos(ctx)) {
+                av_log(avctx, AV_LOG_WARNING, "V4L2 %s poll event timeout\n", ctx->name);
+                ret = get_event(m);
+                if (ret < 0) {
+                    ctx->done = 1;
+                    return ret;
+                }
+            }
+            return AVERROR(EAGAIN);
         }
 
-dequeue:
-        memset(&buf, 0, sizeof(buf));
-        buf.memory = V4L2_MEMORY_MMAP;
-        buf.type = ctx->type;
-        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
-            memset(planes, 0, sizeof(planes));
-            buf.length = VIDEO_MAX_PLANES;
-            buf.m.planes = planes;
+        if ((pfd.revents & POLLERR) != 0) {
+            av_log(avctx, AV_LOG_WARNING, "V4L2 %s POLLERR\n", ctx->name);
+            return AVERROR_UNKNOWN;
         }
 
-        ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf);
-        if (ret) {
-            if (errno != EAGAIN) {
+        if ((pfd.revents & poll_event) != 0) {
+            ret = get_event(m);
+            if (ret < 0) {
                 ctx->done = 1;
-                if (errno != EPIPE)
-                    av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
-                        ctx->name, av_err2str(AVERROR(errno)));
+                return ret;
             }
-            return NULL;
+            continue;
         }
 
-        if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) {
-            int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ?
-                            buf.m.planes[0].bytesused : buf.bytesused;
-            if (bytesused == 0) {
-                ctx->done = 1;
-                return NULL;
-            }
-#ifdef V4L2_BUF_FLAG_LAST
-            if (buf.flags & V4L2_BUF_FLAG_LAST)
-                ctx->done = 1;
-#endif
+        if ((pfd.revents & poll_cap) != 0) {
+            ret = dq_buf(ctx, ppavbuf);
+            if (ret == AVERROR(EPIPE))
+                continue;
+            return ret;
         }
 
-        avbuf = &ctx->buffers[buf.index];
-        avbuf->status = V4L2BUF_AVAILABLE;
-        avbuf->buf = buf;
-        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
-            memcpy(avbuf->planes, planes, sizeof(planes));
-            avbuf->buf.m.planes = avbuf->planes;
+        if ((pfd.revents & poll_out) != 0) {
+            if (is_cap)
+                return AVERROR(EAGAIN);
+            return dq_buf(ctx, ppavbuf);
         }
-        return avbuf;
+
+        av_log(avctx, AV_LOG_ERROR, "V4L2 poll unexpected events=%#x, revents=%#x\n", pfd.events, pfd.revents);
+        return AVERROR_UNKNOWN;
     }
+}
 
-    return NULL;
+// Clear out flags and timestamps that should should be set by the user
+// Returns the passed avbuf
+static V4L2Buffer *
+clean_v4l2_buffer(V4L2Buffer * const avbuf)
+{
+    struct v4l2_buffer *const buf = &avbuf->buf;
+
+    buf->flags = 0;
+    buf->field = V4L2_FIELD_ANY;
+    buf->timestamp = (struct timeval){0};
+    buf->timecode = (struct v4l2_timecode){0};
+    buf->sequence = 0;
+
+    return avbuf;
+}
+
+int
+ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1)
+{
+    V4L2Buffer * avbuf;
+    if (timeout1 != 0) {
+        int rv = get_qbuf(ctx, &avbuf, timeout1);
+        if (rv != 0)
+            return rv;
+    }
+    do {
+        get_qbuf(ctx, &avbuf, 0);
+    } while (avbuf);
+    return 0;
 }
 
 static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
 {
-    int timeout = 0; /* return when no more buffers to dequeue */
     int i;
 
     /* get back as many output buffers as possible */
-    if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
-          do {
-          } while (v4l2_dequeue_v4l2buf(ctx, timeout));
-    }
+    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
+        ff_v4l2_dq_all(ctx, 0);
 
     for (i = 0; i < ctx->num_buffers; i++) {
-        if (ctx->buffers[i].status == V4L2BUF_AVAILABLE)
-            return &ctx->buffers[i];
+        V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
+        if (avbuf->status == V4L2BUF_AVAILABLE)
+            return clean_v4l2_buffer(avbuf);
     }
 
     return NULL;
@@ -443,25 +731,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
 
 static int v4l2_release_buffers(V4L2Context* ctx)
 {
-    struct v4l2_requestbuffers req = {
-        .memory = V4L2_MEMORY_MMAP,
-        .type = ctx->type,
-        .count = 0, /* 0 -> unmaps buffers from the driver */
-    };
-    int i, j;
+    int i;
+    int ret = 0;
+    const int fd = ctx_to_m2mctx(ctx)->fd;
 
-    for (i = 0; i < ctx->num_buffers; i++) {
-        V4L2Buffer *buffer = &ctx->buffers[i];
+    // Orphan any buffers in the wild
+    ff_weak_link_break(&ctx->wl_master);
+
+    if (ctx->bufrefs) {
+        for (i = 0; i < ctx->num_buffers; i++)
+            av_buffer_unref(ctx->bufrefs + i);
+    }
+
+    if (fd != -1) {
+        struct v4l2_requestbuffers req = {
+            .memory = V4L2_MEMORY_MMAP,
+            .type = ctx->type,
+            .count = 0, /* 0 -> unmap all buffers from the driver */
+        };
 
-        for (j = 0; j < buffer->num_planes; j++) {
-            struct V4L2Plane_info *p = &buffer->plane_info[j];
-            if (p->mm_addr && p->length)
-                if (munmap(p->mm_addr, p->length) < 0)
-                    av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno)));
+        while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) {
+            if (errno == EINTR)
+                continue;
+
+            ret = AVERROR(errno);
+
+            av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n",
+                ctx->name, av_err2str(AVERROR(errno)));
+
+            if (ctx_to_m2mctx(ctx)->output_drm)
+                av_log(logger(ctx), AV_LOG_ERROR,
+                    "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n"
+                    "for all buffers: \n"
+                    "  1. drmModeRmFB(..)\n"
+                    "  2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n");
         }
     }
+    atomic_store(&ctx->q_count, 0);
 
-    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req);
+    return ret;
 }
 
 static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt)
@@ -490,6 +798,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm
 
 static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
 {
+    V4L2m2mContext* s = ctx_to_m2mctx(ctx);
+    V4L2m2mPriv *priv = s->avctx->priv_data;
     enum AVPixelFormat pixfmt = ctx->av_pix_fmt;
     struct v4l2_fmtdesc fdesc;
     int ret;
@@ -503,21 +813,22 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
             return 0;
     }
 
-    for (;;) {
+    for (;; ++fdesc.index) {
         ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc);
         if (ret)
             return AVERROR(EINVAL);
 
+        if (priv->pix_fmt != AV_PIX_FMT_NONE) {
+            if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt))
+                continue;
+        }
+
         pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO);
         ret = v4l2_try_raw_format(ctx, pixfmt);
-        if (ret){
-            fdesc.index++;
-            continue;
+        if (ret == 0) {
+            *p = pixfmt;
+            return 0;
         }
-
-        *p = pixfmt;
-
-        return 0;
     }
 
     return AVERROR(EINVAL);
@@ -560,30 +871,131 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p)
   *
   *****************************************************************************/
 
-int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
+
+static void flush_all_buffers_status(V4L2Context* const ctx)
+{
+    int i;
+
+    if (!ctx->bufrefs)
+        return;
+
+    for (i = 0; i < ctx->num_buffers; ++i) {
+        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
+        if (buf->status == V4L2BUF_IN_DRIVER)
+            ff_v4l2_buffer_set_avail(buf);
+    }
+    atomic_store(&ctx->q_count, 0);
+}
+
+static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx)
+{
+    int i;
+    int rv;
+
+    if (!ctx->bufrefs) {
+        rv = ff_v4l2_context_init(ctx);
+        if (rv) {
+            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
+            return rv;
+        }
+    }
+
+    ff_mutex_lock(&ctx->lock);
+    for (i = 0; i < ctx->num_buffers; ++i) {
+        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
+        if (buf->status == V4L2BUF_AVAILABLE) {
+            rv = ff_v4l2_buffer_enqueue(buf);
+            if (rv < 0)
+                break;
+        }
+    }
+    ff_mutex_unlock(&ctx->lock);
+    return rv;
+}
+
+static int set_streamon(AVCodecContext * const avctx, V4L2Context*const ctx)
 {
     int type = ctx->type;
-    int ret;
+    int ret = 0;
 
-    ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type);
-    if (ret < 0)
-        return AVERROR(errno);
+    if (!V4L2_TYPE_IS_OUTPUT(ctx->type))
+        stuff_all_buffers(avctx, ctx);
 
-    ctx->streamon = (cmd == VIDIOC_STREAMON);
+    if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMON, &type) < 0) {
+        ret = AVERROR(errno);
+        av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name,
+               av_err2str(ret));
+        return ret;
+    }
 
-    return 0;
+    ctx->first_buf = 1;
+    ctx->streamon = 1;
+    ctx->flag_last = 0;
+    av_log(avctx, AV_LOG_DEBUG, "%s set status ON OK\n", ctx->name);
+    return ret;
+}
+
+static int set_streamoff(AVCodecContext * const avctx, V4L2Context*const ctx)
+{
+    int type = ctx->type;
+    int ret = 0;
+    const int has_bufs = ctx_buffers_alloced(ctx);
+
+    // Avoid doing anything if there is nothing we can do
+    if (!has_bufs && !ctx->streamon)
+        return 0;
+
+    if (has_bufs)
+        ff_mutex_lock(&ctx->lock);
+
+    if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMOFF, &type) < 0) {
+        ret = AVERROR(errno);
+        av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name,
+               av_err2str(ret));
+    }
+    else {
+        flush_all_buffers_status(ctx);
+
+        ctx->streamon = 0;
+        ctx->flag_last = 0;
+
+        av_log(avctx, AV_LOG_DEBUG, "%s set status OFF OK\n", ctx->name);
+    }
+
+    if (has_bufs)
+        ff_mutex_unlock(&ctx->lock);
+    return ret;
+}
+
+
+int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
+{
+    AVCodecContext * const avctx = logger(ctx);
+
+    switch (cmd) {
+        case VIDIOC_STREAMOFF:
+            return set_streamoff(avctx, ctx);
+        case VIDIOC_STREAMON:
+            return set_streamon(avctx, ctx);
+        default:
+            av_log(avctx, AV_LOG_ERROR, "%s: Unexpected cmd: %d\n", __func__, cmd);
+            break;
+    }
+    return AVERROR_BUG;
 }
 
 int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
 {
-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+    V4L2m2mContext *const s = ctx_to_m2mctx(ctx);
+    AVCodecContext *const avctx = s->avctx;
+    int64_t track_ts;
     V4L2Buffer* avbuf;
     int ret;
 
     if (!frame) {
         ret = v4l2_stop_encode(ctx);
         if (ret)
-            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
+            av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
         s->draining= 1;
         return 0;
     }
@@ -592,23 +1004,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
     if (!avbuf)
         return AVERROR(EAGAIN);
 
-    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf);
+    track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame);
+
+    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts);
     if (ret)
         return ret;
 
     return ff_v4l2_buffer_enqueue(avbuf);
 }
 
-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
+                                   const void * extdata, size_t extlen)
 {
     V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+    AVCodecContext *const avctx = s->avctx;
     V4L2Buffer* avbuf;
     int ret;
+    int64_t track_ts;
 
     if (!pkt->size) {
         ret = v4l2_stop_decode(ctx);
+        // Log but otherwise ignore stop failure
         if (ret)
-            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name);
+            av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret);
         s->draining = 1;
         return 0;
     }
@@ -617,8 +1035,13 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
     if (!avbuf)
         return AVERROR(EAGAIN);
 
-    ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf);
-    if (ret)
+    track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt);
+
+    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts);
+    if (ret == AVERROR(ENOMEM))
+        av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n",
+               __func__, pkt->size, avbuf->planes[0].length);
+    else if (ret)
         return ret;
 
     return ff_v4l2_buffer_enqueue(avbuf);
@@ -626,42 +1049,77 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
 
 int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
 {
+    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+    AVCodecContext *const avctx = s->avctx;
     V4L2Buffer *avbuf;
+    int rv;
 
-    /*
-     * timeout=-1 blocks until:
-     *  1. decoded frame available
-     *  2. an input buffer is ready to be dequeued
-     */
-    avbuf = v4l2_dequeue_v4l2buf(ctx, timeout);
-    if (!avbuf) {
-        if (ctx->done)
-            return AVERROR_EOF;
-
-        return AVERROR(EAGAIN);
-    }
+    do {
+        if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
+            return rv;
+        if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0)
+            return rv;
+    } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0);
 
-    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
+   return 0;
 }
 
-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
+int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout)
 {
+    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+    AVCodecContext *const avctx = s->avctx;
     V4L2Buffer *avbuf;
+    int rv;
 
-    /*
-     * blocks until:
-     *  1. encoded packet available
-     *  2. an input buffer ready to be dequeued
-     */
-    avbuf = v4l2_dequeue_v4l2buf(ctx, -1);
-    if (!avbuf) {
-        if (ctx->done)
-            return AVERROR_EOF;
+    do {
+        if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
+            return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv;  // Caller not currently expecting ENOSPC
+        if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0)
+            return rv;
+    } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0);
 
-        return AVERROR(EAGAIN);
+    return 0;
+}
+
+// Return 0 terminated list of drm fourcc video formats for this context
+// NULL if none found or error
+// Returned list is malloced so must be freed
+uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN)
+{
+    unsigned int i;
+    unsigned int n = 0;
+    unsigned int size = 0;
+    uint32_t * e = NULL;
+    *pN = 0;
+
+    for (i = 0; i < 1024; ++i) {
+        struct v4l2_fmtdesc fdesc = {
+            .index = i,
+            .type = ctx->type
+        };
+
+        if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc))
+            return e;
+
+        if (n + 1 >= size) {
+            unsigned int newsize = (size == 0) ? 16 : size * 2;
+            uint32_t * t = av_realloc(e, newsize * sizeof(*t));
+            if (!t)
+                return e;
+            e = t;
+            size = newsize;
+        }
+
+        e[n] = fdesc.pixelformat;
+        e[++n] = 0;
+        if (pN)
+            *pN = n;
     }
 
-    return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf);
+    // If we've looped 1024 times we are clearly confused
+    *pN = 0;
+    av_free(e);
+    return NULL;
 }
 
 int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
@@ -693,78 +1151,194 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
 
 int ff_v4l2_context_set_format(V4L2Context* ctx)
 {
-    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
+    int ret;
+
+    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
+    if (ret != 0)
+        return ret;
+
+    // Check returned size against min size and if smaller have another go
+    // Only worry about plane[0] as this is meant to enforce limits for
+    // encoded streams where we might know a bit more about the shape
+    // than the driver
+    if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) {
+        if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage)
+            return 0;
+        ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size;
+    }
+    else {
+        if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage)
+            return 0;
+        ctx->format.fmt.pix.sizeimage = ctx->min_buf_size;
+    }
+
+    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
+    return ret;
 }
 
 void ff_v4l2_context_release(V4L2Context* ctx)
 {
     int ret;
 
-    if (!ctx->buffers)
+    if (!ctx->bufrefs)
         return;
 
     ret = v4l2_release_buffers(ctx);
     if (ret)
         av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name);
 
-    av_freep(&ctx->buffers);
+    av_freep(&ctx->bufrefs);
+    av_buffer_unref(&ctx->frames_ref);
+
+    ff_mutex_destroy(&ctx->lock);
+    pthread_cond_destroy(&ctx->cond);
 }
 
-int ff_v4l2_context_init(V4L2Context* ctx)
+
+static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem)
 {
-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
     struct v4l2_requestbuffers req;
-    int ret, i;
-
-    if (!v4l2_type_supported(ctx)) {
-        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
-        return AVERROR_PATCHWELCOME;
-    }
+    int ret;
+    int i;
 
-    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
-    if (ret)
-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name);
+    av_assert0(ctx->bufrefs == NULL);
 
     memset(&req, 0, sizeof(req));
-    req.count = ctx->num_buffers;
-    req.memory = V4L2_MEMORY_MMAP;
+    req.count = req_buffers;
+    req.memory = mem;
     req.type = ctx->type;
-    ret = ioctl(s->fd, VIDIOC_REQBUFS, &req);
-    if (ret < 0) {
-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno));
-        return AVERROR(errno);
+    while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) {
+        if (errno != EINTR) {
+            ret = AVERROR(errno);
+            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret));
+            return ret;
+        }
     }
 
     ctx->num_buffers = req.count;
-    ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer));
-    if (!ctx->buffers) {
+    ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs));
+    if (!ctx->bufrefs) {
         av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name);
-        return AVERROR(ENOMEM);
+        goto fail_release;
     }
 
-    for (i = 0; i < req.count; i++) {
-        ctx->buffers[i].context = ctx;
-        ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i);
-        if (ret < 0) {
+    ctx->wl_master = ff_weak_link_new(ctx);
+    if (!ctx->wl_master) {
+        ret = AVERROR(ENOMEM);
+        goto fail_release;
+    }
+
+    for (i = 0; i < ctx->num_buffers; i++) {
+        ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem);
+        if (ret) {
             av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret));
-            goto error;
+            goto fail_release;
         }
     }
 
     av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name,
         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat),
         req.count,
-        v4l2_get_width(&ctx->format),
-        v4l2_get_height(&ctx->format),
+        ff_v4l2_get_format_width(&ctx->format),
+        ff_v4l2_get_format_height(&ctx->format),
         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage,
         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline);
 
     return 0;
 
-error:
+fail_release:
     v4l2_release_buffers(ctx);
+    av_freep(&ctx->bufrefs);
+    return ret;
+}
+
+int ff_v4l2_context_frames_set(V4L2Context *const ctx)
+{
+    AVHWFramesContext *hwframes;
+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
+    const int w = ctx->width != 0 ? ctx->width : s->avctx->width;
+    const int h = ctx->height != 0 ? ctx->height : s->avctx->height;
+    int ret;
+
+    if (ctx->frames_ref != NULL) {
+        const AVHWFramesContext * const hwf = (AVHWFramesContext*)ctx->frames_ref->data;
+        if (hwf->sw_format == ctx->av_pix_fmt && hwf->width == w && hwf->height == h)
+            return 0;
+        av_buffer_unref(&ctx->frames_ref);
+    }
+
+    ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref);
+    if (!ctx->frames_ref)
+        return AVERROR(ENOMEM);
+
+    hwframes = (AVHWFramesContext*)ctx->frames_ref->data;
+    hwframes->format = AV_PIX_FMT_DRM_PRIME;
+    hwframes->sw_format = ctx->av_pix_fmt;
+    hwframes->width = w;
+    hwframes->height = h;
+    ret = av_hwframe_ctx_init(ctx->frames_ref);
+    if (ret < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "Failed to create hwframes context: %s\n", av_err2str(ret));
+        av_buffer_unref(&ctx->frames_ref);
+        return ret;
+    }
+
+    av_log(s->avctx, AV_LOG_DEBUG, "%s: HWFramesContext set to %s, %dx%d\n", __func__,
+           av_get_pix_fmt_name(ctx->av_pix_fmt), w, h);
+    return 0;
+}
+
+int ff_v4l2_context_init(V4L2Context* ctx)
+{
+    struct v4l2_queryctrl qctrl;
+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
+    int ret;
+
+    // It is not valid to reinit a context without a previous release
+    av_assert0(ctx->bufrefs == NULL);
+
+    if (!v4l2_type_supported(ctx)) {
+        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    ff_mutex_init(&ctx->lock, NULL);
+    pthread_cond_init(&ctx->cond, NULL);
+    atomic_init(&ctx->q_count, 0);
+
+    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
+    if (ret) {
+        ret = AVERROR(errno);
+        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret));
+        goto fail_unlock;
+    }
+
+    memset(&qctrl, 0, sizeof(qctrl));
+    qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT;
+    if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) {
+        ret = AVERROR(errno);
+        if (ret != AVERROR(EINVAL)) {
+            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret));
+            goto fail_unlock;
+        }
+        // Control unsupported - set default if wanted
+        if (ctx->num_buffers < 2)
+            ctx->num_buffers = 4;
+    }
+    else {
+        if (ctx->num_buffers < 2)
+            ctx->num_buffers = qctrl.minimum + 2;
+        ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum);
+    }
 
-    av_freep(&ctx->buffers);
+    ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem);
+    if (ret < 0)
+        goto fail_unlock;
+
+    return 0;
 
+fail_unlock:
+    ff_mutex_destroy(&ctx->lock);
     return ret;
 }
diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
index fdd5cf5284..99201ca254 100644
--- a/libavcodec/v4l2_context.h
+++ b/libavcodec/v4l2_context.h
@@ -32,6 +32,8 @@
 #include "libavutil/rational.h"
 #include "codec_id.h"
 #include "packet.h"
+#include "libavutil/buffer.h"
+#include "libavutil/thread.h"
 #include "v4l2_buffers.h"
 
 typedef struct V4L2Context {
@@ -71,28 +73,57 @@ typedef struct V4L2Context {
      */
     int width, height;
     AVRational sample_aspect_ratio;
+    struct v4l2_rect selection;
 
     /**
-     * Indexed array of V4L2Buffers
+     * If the default size of buffer is less than this then try to
+     * set to this.
      */
-    V4L2Buffer *buffers;
+    uint32_t min_buf_size;
+
+    /**
+     * Indexed array of pointers to V4L2Buffers
+     */
+    AVBufferRef **bufrefs;
 
     /**
      * Readonly after init.
      */
     int num_buffers;
 
+    /**
+     * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF
+     */
+    enum v4l2_memory buf_mem;
+
     /**
      * Whether the stream has been started (VIDIOC_STREAMON has been sent).
      */
     int streamon;
 
+    /* 1st buffer after stream on */
+    int first_buf;
+
     /**
      *  Either no more buffers available or an unrecoverable error was notified
      *  by the V4L2 kernel driver: once set the context has to be exited.
      */
     int done;
 
+    int flag_last;
+
+    /**
+     * If NZ then when Qing frame/pkt use this rather than the
+     * "real" PTS
+     */
+    uint64_t track_ts;
+
+    AVBufferRef *frames_ref;
+    atomic_int q_count;
+    struct ff_weak_link_master *wl_master;
+
+    AVMutex lock;
+    pthread_cond_t cond;
 } V4L2Context;
 
 /**
@@ -103,6 +134,14 @@ typedef struct V4L2Context {
  */
 int ff_v4l2_context_init(V4L2Context* ctx);
 
+/**
+ * (re)set the hwframecontext from the current v4l2 context
+ *
+ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables.
+ * @return 0 in case of success, a negative value representing the error otherwise.
+ */
+int ff_v4l2_context_frames_set(V4L2Context *const ctx);
+
 /**
  * Sets the V4L2Context format in the v4l2 driver.
  *
@@ -120,6 +159,19 @@ int ff_v4l2_context_set_format(V4L2Context* ctx);
  */
 int ff_v4l2_context_get_format(V4L2Context* ctx, int probe);
 
+/**
+ * Get the list of drm fourcc pixel formats for this context
+ *
+ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context
+ *       description for required variables.
+ * @param[in] pN A pointer to receive the number of formats
+ *       found. May be NULL if not wanted.
+ * @return Pointer to malloced list of zero terminated formats,
+ *         NULL if none or error. As list is malloced it must be
+ *         freed.
+ */
+uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN);
+
 /**
  * Releases a V4L2Context.
  *
@@ -148,7 +200,7 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd);
  * @param[inout] pkt The AVPacket to dequeue to.
  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
  */
-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
+int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout);
 
 /**
  * Dequeues a buffer from a V4L2Context to an AVFrame.
@@ -157,7 +209,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
  * @param[in] ctx The V4L2Context to dequeue from.
  * @param[inout] f The AVFrame to dequeue to.
  * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds)
+ *
  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
+ *                AVERROR(ENOSPC) if no buffer availible to put
+ *                the frame in
  */
 int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
 
@@ -171,7 +226,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
  * @param[in] pkt A pointer to an AVPacket.
  * @return 0 in case of success, a negative error otherwise.
  */
-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt);
+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size);
 
 /**
  * Enqueues a buffer to a V4L2Context from an AVFrame
@@ -184,4 +239,28 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt);
  */
 int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f);
 
+/**
+ * Dequeue all buffers on this queue
+ *
+ * Used to recycle output buffers
+ *
+ * @param[in] ctx The V4L2Context to dequeue from.
+ * @param[in] timeout1 A timeout on dequeuing the 1st buffer, 
+ *       all others have a timeout of zero
+ * @return AVERROR(EAGAIN) if timeout1 non-zero then the return
+ *         of the first dequeue operation, 0 otherwise.
+ */
+int ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1);
+
+/**
+ * Returns the number of buffers currently queued
+ *
+ * @param[in] ctx The V4L2Context to evaluate
+ */
+static inline int
+ff_v4l2_context_q_count(const V4L2Context* const ctx)
+{
+    return atomic_load(&ctx->q_count);
+}
+
 #endif // AVCODEC_V4L2_CONTEXT_H
diff --git a/libavcodec/v4l2_fmt.c b/libavcodec/v4l2_fmt.c
index 6df47e3f5a..100827b7af 100644
--- a/libavcodec/v4l2_fmt.c
+++ b/libavcodec/v4l2_fmt.c
@@ -42,6 +42,14 @@ static const struct fmt_conversion {
     { AV_FMT(RGB24),       AV_CODEC(RAWVIDEO),    V4L2_FMT(RGB24) },
     { AV_FMT(BGR0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGR32) },
     { AV_FMT(0RGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGB32) },
+    { AV_FMT(BGR0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGRX32) },
+    { AV_FMT(RGB0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGBX32) },
+    { AV_FMT(0BGR),        AV_CODEC(RAWVIDEO),    V4L2_FMT(XBGR32) },
+    { AV_FMT(0RGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(XRGB32) },
+    { AV_FMT(BGRA),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGRA32) },
+    { AV_FMT(RGBA),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGBA32) },
+    { AV_FMT(ABGR),        AV_CODEC(RAWVIDEO),    V4L2_FMT(ABGR32) },
+    { AV_FMT(ARGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(ARGB32) },
     { AV_FMT(GRAY8),       AV_CODEC(RAWVIDEO),    V4L2_FMT(GREY) },
     { AV_FMT(YUV420P),     AV_CODEC(RAWVIDEO),    V4L2_FMT(YUV420) },
     { AV_FMT(YUYV422),     AV_CODEC(RAWVIDEO),    V4L2_FMT(YUYV) },
@@ -104,6 +112,9 @@ static const struct fmt_conversion {
 #ifdef V4L2_PIX_FMT_HEVC
     { AV_FMT(NONE),        AV_CODEC(HEVC),        V4L2_FMT(HEVC) },
 #endif
+#ifdef V4L2_PIX_FMT_AV1
+    { AV_FMT(NONE),        AV_CODEC(AV1),         V4L2_FMT(AV1) },
+#endif
 #ifdef V4L2_PIX_FMT_VC1_ANNEX_G
     { AV_FMT(NONE),        AV_CODEC(VC1),         V4L2_FMT(VC1_ANNEX_G) },
 #endif
diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
index 07c7500000..827f67efc3 100644
--- a/libavcodec/v4l2_m2m.c
+++ b/libavcodec/v4l2_m2m.c
@@ -36,6 +36,15 @@
 #include "v4l2_context.h"
 #include "v4l2_fmt.h"
 #include "v4l2_m2m.h"
+#include "v4l2_req_dmabufs.h"
+
+static void
+xlat_init(xlat_track_t * const x)
+{
+    memset(x, 0, sizeof(*x));
+    x->last_pts = AV_NOPTS_VALUE;
+}
+
 
 static inline int v4l2_splane_video(struct v4l2_capability *cap)
 {
@@ -69,7 +78,9 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe)
 
     s->capture.done = s->output.done = 0;
     s->capture.name = "capture";
+    s->capture.buf_mem = s->db_ctl != NULL ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
     s->output.name = "output";
+    s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
     atomic_init(&s->refcount, 0);
     sem_init(&s->refsync, 0, 0);
 
@@ -86,18 +97,58 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe)
     if (v4l2_mplane_video(&cap)) {
         s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
         s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
+        s->output.format.type = s->output.type;
         return 0;
     }
 
     if (v4l2_splane_video(&cap)) {
         s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
         s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
+        s->output.format.type = s->output.type;
         return 0;
     }
 
     return AVERROR(EINVAL);
 }
 
+static int check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
+{
+    struct v4l2_format fmt = {.type = s->output.type};
+    int rv;
+    uint32_t pixfmt = ff_v4l2_format_avfmt_to_v4l2(avctx->pix_fmt);
+    unsigned int w;
+    unsigned int h;
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
+        fmt.fmt.pix_mp.pixelformat = pixfmt;
+        fmt.fmt.pix_mp.width = avctx->width;
+        fmt.fmt.pix_mp.height = avctx->height;
+    }
+    else {
+        fmt.fmt.pix.pixelformat = pixfmt;
+        fmt.fmt.pix.width = avctx->width;
+        fmt.fmt.pix.height = avctx->height;
+    }
+
+    rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt);
+
+    if (rv != 0) {
+        rv = AVERROR(errno);
+        av_log(avctx, AV_LOG_ERROR, "%s: Tryfmt failed: %s\n", __func__, av_err2str(rv));
+        return rv;
+    }
+
+    w = ff_v4l2_get_format_width(&fmt);
+    h = ff_v4l2_get_format_height(&fmt);
+
+    if (w < avctx->width || h < avctx->height) {
+        av_log(avctx, AV_LOG_WARNING, "%s: Size check failed: asked for %dx%d, got: %dx%d\n", __func__, avctx->width, avctx->height, w, h);
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
 static int v4l2_probe_driver(V4L2m2mContext *s)
 {
     void *log_ctx = s->avctx;
@@ -117,6 +168,11 @@ static int v4l2_probe_driver(V4L2m2mContext *s)
         goto done;
     }
 
+    // If being given frames (encode) check that V4L2 can cope with the size
+    if (s->output.av_codec_id == AV_CODEC_ID_RAWVIDEO &&
+        (ret = check_size(s->avctx, s)) != 0)
+        goto done;
+
     ret = ff_v4l2_context_get_format(&s->capture, 1);
     if (ret) {
         av_log(log_ctx, AV_LOG_DEBUG, "v4l2 capture format not supported\n");
@@ -218,13 +274,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
         av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n");
 
     /* 2. unmap the capture buffers (v4l2 and ffmpeg):
-     *    we must wait for all references to be released before being allowed
-     *    to queue new buffers.
      */
-    av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n");
-    if (atomic_load(&s->refcount))
-        while(sem_wait(&s->refsync) == -1 && errno == EINTR);
-
     ff_v4l2_context_release(&s->capture);
 
     /* 3. get the new capture format */
@@ -243,7 +293,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
 
     /* 5. complete reinit */
     s->draining = 0;
-    s->reinit = 0;
 
     return 0;
 }
@@ -259,6 +308,9 @@ static void v4l2_m2m_destroy_context(AVRefStructOpaque unused, void *context)
         close(s->fd);
     av_frame_free(&s->frame);
     av_packet_unref(&s->buf_pkt);
+    av_freep(&s->extdata_data);
+
+    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n");
 }
 
 av_cold int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
@@ -269,6 +321,11 @@ av_cold int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
     if (!s)
         return 0;
 
+    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n");
+
+    if (s->avctx && av_codec_is_decoder(s->avctx->codec))
+        av_packet_unref(&s->buf_pkt);
+
     if (s->fd >= 0) {
         ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
         if (ret)
@@ -280,6 +337,14 @@ av_cold int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
     }
 
     ff_v4l2_context_release(&s->output);
+    av_buffer_unref(&s->device_ref);
+
+    dmabufs_ctl_unref(&s->db_ctl);
+
+    if (s->fd != -1) {
+        close(s->fd);
+        s->fd = -1;
+    }
 
     s->self_ref = NULL;
     av_refstruct_unref(&priv->context);
@@ -341,6 +406,7 @@ int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s)
     priv->context->output.num_buffers  = priv->num_output_buffers;
     priv->context->self_ref = priv->context;
     priv->context->fd = -1;
+    xlat_init(&priv->context->xlat);
 
     priv->context->frame = av_frame_alloc();
     if (!priv->context->frame) {
diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
index 4ba33dc335..b958a0ac34 100644
--- a/libavcodec/v4l2_m2m.h
+++ b/libavcodec/v4l2_m2m.h
@@ -30,6 +30,7 @@
 #include <linux/videodev2.h>
 
 #include "libavcodec/avcodec.h"
+#include "libavutil/pixfmt.h"
 #include "v4l2_context.h"
 
 #define container_of(ptr, type, member) ({ \
@@ -40,6 +41,34 @@
     { "num_output_buffers", "Number of buffers in the output context",\
         OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS }
 
+#define FF_V4L2_M2M_TRACK_SIZE 128
+typedef struct V4L2m2mTrackEl {
+    int     discard;   // If we see this buffer its been flushed, so discard
+    int     pending;
+    int64_t pts;
+    int64_t dts;
+    int64_t duration;
+    int64_t track_pts;
+} V4L2m2mTrackEl;
+
+typedef struct pts_stats_s
+{
+    void * logctx;
+    const char * name;  // For debug
+    unsigned int last_count;
+    unsigned int last_interval;
+    int64_t last_pts;
+    int64_t guess;
+} pts_stats_t;
+
+typedef struct xlat_track_s {
+    unsigned int track_no;
+    int64_t last_pts;    // Last valid PTS decoded
+    V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
+} xlat_track_t;
+
+struct dmabufs_ctl;
+
 typedef struct V4L2m2mContext {
     char devname[PATH_MAX];
     int fd;
@@ -52,10 +81,10 @@ typedef struct V4L2m2mContext {
     AVCodecContext *avctx;
     sem_t refsync;
     atomic_uint refcount;
-    int reinit;
 
     /* null frame/packet received */
     int draining;
+    int running;
     AVPacket buf_pkt;
 
     /* Reference to a frame. Only used during encoding */
@@ -66,6 +95,37 @@ typedef struct V4L2m2mContext {
 
     /* reference back to V4L2m2mPriv */
     void *priv;
+
+    AVBufferRef *device_ref;
+
+    /* generate DRM frames */
+    int output_drm;
+
+    /* input frames are drmprime */
+    int input_drm;
+
+    /* Frame tracking */
+    xlat_track_t xlat;
+
+    pts_stats_t pts_stat;
+
+    /* req pkt */
+    int req_pkt;
+    int reorder_size;
+
+    /* Ext data sent */
+    int extdata_sent;
+    /* Ext data sent in packet - overrides ctx */
+    void * extdata_data;
+    size_t extdata_size;
+
+#define FF_V4L2_QUIRK_REINIT_ALWAYS             1
+#define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN    2
+#define FF_V4L2_QUIRK_8192_MACRO_MAX            4   // Size limited to 8192 macroblocks
+    /* Quirks */
+    unsigned int quirks;
+
+    struct dmabufs_ctl * db_ctl;
 } V4L2m2mContext;
 
 typedef struct V4L2m2mPriv {
@@ -75,6 +135,8 @@ typedef struct V4L2m2mPriv {
 
     int num_output_buffers;
     int num_capture_buffers;
+    const char * dmabuf_alloc;
+    enum AVPixelFormat pix_fmt;
 } V4L2m2mPriv;
 
 /**
@@ -128,4 +190,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx);
  */
 int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx);
 
+
+static inline unsigned int ff_v4l2_get_format_width(const struct v4l2_format * const fmt)
+{
+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
+}
+
+static inline unsigned int ff_v4l2_get_format_height(const struct v4l2_format * const fmt)
+{
+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
+}
+
+static inline uint32_t ff_v4l2_get_format_pixelformat(const struct v4l2_format * const fmt)
+{
+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
+}
+
+static inline int ff_v4l2_ctx_eos(const V4L2Context * const ctx)
+{
+    return ctx->flag_last;
+}
+
+
 #endif /* AVCODEC_V4L2_M2M_H */
diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
index ba1fe769e0..41db79ccd7 100644
--- a/libavcodec/v4l2_m2m_dec.c
+++ b/libavcodec/v4l2_m2m_dec.c
@@ -21,8 +21,15 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config_components.h"
+
 #include <linux/videodev2.h>
 #include <sys/ioctl.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_drm.h"
+#include "libavutil/mem.h"
 #include "libavutil/pixfmt.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/opt.h"
@@ -30,75 +37,279 @@
 #include "codec_internal.h"
 #include "libavcodec/decode.h"
 
+#include "libavcodec/hwaccels.h"
+#include "libavcodec/internal.h"
+#include "libavcodec/hwconfig.h"
+
 #include "v4l2_context.h"
 #include "v4l2_m2m.h"
 #include "v4l2_fmt.h"
+#include "v4l2_req_dmabufs.h"
 
-static int v4l2_try_start(AVCodecContext *avctx)
+#if CONFIG_H264_DECODER
+#include "h264_parse.h"
+#endif
+#if CONFIG_HEVC_DECODER
+#include "hevc/parse.h"
+#endif
+
+// Pick 64 for max last count - that is >1sec at 60fps
+#define STATS_LAST_COUNT_MAX 64
+#define STATS_INTERVAL_MAX (1 << 30)
+
+#ifndef FF_API_BUFFER_SIZE_T
+#define FF_API_BUFFER_SIZE_T 1
+#endif
+
+#define DUMP_FAILED_EXTRADATA 0
+
+#if DUMP_FAILED_EXTRADATA
+static inline char hex1(unsigned int x)
 {
-    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
-    V4L2Context *const capture = &s->capture;
-    V4L2Context *const output = &s->output;
-    struct v4l2_selection selection = { 0 };
-    int ret;
+    x &= 0xf;
+    return x <= 9 ? '0' + x : 'a' + x - 10;
+}
 
-    /* 1. start the output process */
-    if (!output->streamon) {
-        ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON);
-        if (ret < 0) {
-            av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n");
-            return ret;
-        }
+static inline char * hex2(char * s, unsigned int x)
+{
+    *s++ = hex1(x >> 4);
+    *s++ = hex1(x);
+    return s;
+}
+
+static inline char * hex4(char * s, unsigned int x)
+{
+    s = hex2(s, x >> 8);
+    s = hex2(s, x);
+    return s;
+}
+
+static inline char * dash2(char * s)
+{
+    *s++ = '-';
+    *s++ = '-';
+    return s;
+}
+
+static void
+data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len)
+{
+    size_t i;
+    s = hex4(s, offset);
+    m += offset;
+    for (i = 0; i != 8; ++i) {
+        *s++ = ' ';
+        s = len > i + offset ? hex2(s, *m++) : dash2(s);
     }
+    *s++ = ' ';
+    *s++ = ':';
+    for (; i != 16; ++i) {
+        *s++ = ' ';
+        s = len > i + offset ? hex2(s, *m++) : dash2(s);
+    }
+    *s++ = 0;
+}
 
-    if (capture->streamon)
-        return 0;
+static void
+log_dump(void * logctx, int lvl, const void * const data, const size_t len)
+{
+    size_t i;
+    for (i = 0; i < len; i += 16) {
+        char buf[80];
+        data16(buf, i, data, len);
+        av_log(logctx, lvl, "%s\n", buf);
+    }
+}
+#endif
 
-    /* 2. get the capture format */
-    capture->format.type = capture->type;
-    ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format);
-    if (ret) {
-        av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n");
-        return ret;
+static unsigned int pts_stats_interval(const pts_stats_t * const stats)
+{
+    return stats->last_interval;
+}
+
+static int64_t pts_stats_guess(const pts_stats_t * const stats, const int fail_bad_guess)
+{
+    if (stats->last_count <= 1)
+        return stats->last_pts;
+    if (stats->last_pts == AV_NOPTS_VALUE ||
+            fail_bad_guess && (stats->last_interval == 0 ||
+                               stats->last_count >= STATS_LAST_COUNT_MAX))
+        return AV_NOPTS_VALUE;
+    return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval;
+}
+
+static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
+{
+    if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
+        if (stats->last_count < STATS_LAST_COUNT_MAX)
+            ++stats->last_count;
+        return;
     }
 
-    /* 2.1 update the AVCodecContext */
-    avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
-    capture->av_pix_fmt = avctx->pix_fmt;
+    if (stats->last_pts != AV_NOPTS_VALUE) {
+        const int64_t interval = pts - stats->last_pts;
 
-    /* 3. set the crop parameters */
-    selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-    selection.r.height = avctx->coded_height;
-    selection.r.width = avctx->coded_width;
-    ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection);
-    if (!ret) {
-        ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
-        if (ret) {
-            av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n");
-        } else {
-            av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height);
-            /* update the size of the resulting frame */
-            capture->height = selection.r.height;
-            capture->width  = selection.r.width;
+        if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
+            stats->last_count >= STATS_LAST_COUNT_MAX) {
+            if (stats->last_interval != 0)
+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
+                       __func__, stats->name, interval, stats->last_count);
+            stats->last_interval = 0;
+        }
+        else {
+            const int64_t frame_time = interval / (int64_t)stats->last_count;
+
+            if (frame_time != stats->last_interval)
+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
+                       __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
+            stats->last_interval = frame_time;
         }
     }
 
-    /* 4. init the capture context now that we have the capture format */
-    if (!capture->buffers) {
-        ret = ff_v4l2_context_init(capture);
-        if (ret) {
-            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
-            return AVERROR(ENOMEM);
+    stats->last_pts = pts;
+    stats->last_count = 1;
+}
+
+static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
+{
+    *stats = (pts_stats_t){
+        .logctx = logctx,
+        .name = name,
+        .last_count = 1,
+        .last_interval = 0,
+        .last_pts = AV_NOPTS_VALUE
+    };
+}
+
+// If abdata == NULL then this just counts space required
+// Unpacks avcC if detected
+static int
+h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata)
+{
+    const uint8_t * const xdend = extradata + extrasize;
+    const uint8_t * p = extradata;
+    uint8_t * d = abdata;
+    unsigned int n;
+    unsigned int len;
+    const unsigned int hdrlen = 4;
+    unsigned int need_pps = 1;
+
+    if (extrasize < 8)
+        return AVERROR(EINVAL);
+
+    if (p[0] == 0 && p[1] == 0) {
+        // Assume a couple of leading zeros are good enough to indicate NAL
+        if (abdata)
+            memcpy(d, p, extrasize);
+        return extrasize;
+    }
+
+    // avcC starts with a 1
+    if (p[0] != 1)
+        return AVERROR(EINVAL);
+
+    p += 5;
+    n = *p++ & 0x1f;
+
+doxps:
+    while (n--) {
+        if (xdend - p < 2)
+            return AVERROR(EINVAL);
+        len = (p[0] << 8) | p[1];
+        p += 2;
+        if (xdend - p < (ptrdiff_t)len)
+            return AVERROR(EINVAL);
+        if (abdata) {
+            d[0] = 0;
+            d[1] = 0;
+            d[2] = 0;
+            d[3] = 1;
+            memcpy(d + 4, p, len);
         }
+        d += len + hdrlen;
+        p += len;
+    }
+    if (need_pps) {
+        need_pps = 0;
+        if (p >= xdend)
+            return AVERROR(EINVAL);
+        n = *p++;
+        goto doxps;
     }
 
-    /* 5. start the capture process */
-    ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
-    if (ret) {
-        av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n");
+    return d - abdata;
+}
+
+static int
+copy_extradata(AVCodecContext * const avctx,
+               const void * const src_data, const int src_len,
+               void ** const pdst_data, size_t * const pdst_len)
+{
+    int len;
+
+    *pdst_len = 0;
+    av_freep(pdst_data);
+
+    if (avctx->codec_id == AV_CODEC_ID_H264)
+        len = h264_xd_copy(src_data, src_len, NULL);
+    else
+        len = src_len < 0 ? AVERROR(EINVAL) : src_len;
+
+    // Zero length is OK but we want to stop - -ve is error val
+    if (len <= 0)
+        return len;
+
+    if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
+        return AVERROR(ENOMEM);
+
+    if (avctx->codec_id == AV_CODEC_ID_H264)
+        h264_xd_copy(src_data, src_len, *pdst_data);
+    else
+        memcpy(*pdst_data, src_data, len);
+    *pdst_len = len;
+
+    return 0;
+}
+
+
+
+static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s)
+{
+    int ret;
+    struct v4l2_decoder_cmd cmd = {
+        .cmd = V4L2_DEC_CMD_START,
+        .flags = 0,
+    };
+
+    if (s->output.streamon)
+        return 0;
+
+    ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON);
+    if (ret != 0) {
+        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret));
         return ret;
     }
 
+    // STREAMON should do implicit START so this just for those that don't.
+    // It is optional so don't worry if it fails
+    if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) {
+        ret = AVERROR(errno);
+        av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret));
+    }
+    else {
+        av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n");
+    }
+    return 0;
+}
+
+static int v4l2_try_start(AVCodecContext *avctx)
+{
+    V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+    int ret;
+
+    /* 1. start the output process */
+    if ((ret = check_output_streamon(avctx, s)) != 0)
+        return ret;
     return 0;
 }
 
@@ -133,51 +344,887 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s)
     return 0;
 }
 
-static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+static void
+set_best_effort_pts(AVCodecContext *const avctx,
+             pts_stats_t * const ps,
+             AVFrame *const frame)
+{
+    pts_stats_add(ps, frame->pts);
+
+    frame->best_effort_timestamp = pts_stats_guess(ps, 1);
+    // If we can't guess from just PTS - try DTS
+    if (frame->best_effort_timestamp == AV_NOPTS_VALUE)
+        frame->best_effort_timestamp = frame->pkt_dts;
+
+    // We can't emulate what s/w does in a useful manner and using the
+    // "correct" answer seems to just confuse things.
+    frame->pkt_dts               = frame->pts;
+    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n",
+           frame->pts, frame->best_effort_timestamp, frame->pkt_dts);
+}
+
+static void
+xlat_flush(xlat_track_t * const x)
+{
+    unsigned int i;
+    // Do not reset track_no - this ensures that any frames left in the decoder
+    // that turn up later get discarded.
+
+    x->last_pts = AV_NOPTS_VALUE;
+    for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) {
+        x->track_els[i].pending = 0;
+        x->track_els[i].discard = 1;
+    }
+}
+
+static void
+xlat_init(xlat_track_t * const x)
+{
+    memset(x, 0, sizeof(*x));
+    xlat_flush(x);
+}
+
+static int
+xlat_pending(const V4L2m2mContext * const s)
+{
+    const xlat_track_t *const x = &s->xlat;
+    unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE;
+    int i;
+    const int64_t now = pts_stats_guess(&s->pts_stat, 0);
+    int64_t first_dts = AV_NOPTS_VALUE;
+    int no_dts_count = 0;
+    unsigned int interval = pts_stats_interval(&s->pts_stat);
+
+    for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) {
+        const V4L2m2mTrackEl * const t = x->track_els + n;
+
+        if (first_dts == AV_NOPTS_VALUE)
+            if (t->dts == AV_NOPTS_VALUE)
+                ++no_dts_count;
+            else
+                first_dts = t->dts;
+
+        // Discard only set on never-set or flushed entries
+        // So if we get here we've never successfully decoded a frame so allow
+        // more frames into the buffer before stalling
+        if (t->discard)
+            return i - 16;
+
+        // If we've got this frame out then everything before this point
+        // must have entered the decoder
+        if (!t->pending)
+            break;
+
+        // If we've never seen a pts all we can do is count frames
+        if (now == AV_NOPTS_VALUE)
+            continue;
+
+        if (t->dts != AV_NOPTS_VALUE && now >= t->dts)
+            break;
+    }
+
+    if (first_dts != AV_NOPTS_VALUE && now != AV_NOPTS_VALUE && interval != 0 && s->reorder_size != 0) {
+        const int iframes = (first_dts - now) / (int)interval;
+        const int t = iframes - s->reorder_size + no_dts_count;
+
+//        av_log(s->avctx, AV_LOG_DEBUG, "Last:%"PRId64", Now:%"PRId64", First:%"PRId64", delta=%"PRId64", frames=%d, nodts=%d\n",
+//               x->last_dts, now, first_dts, first_dts - now, iframes, no_dts_count);
+
+        if (iframes > 0 && iframes < 64 && t < i) {
+            return t;
+        }
+    }
+
+    return i;
+}
+
+static inline int stream_started(const V4L2m2mContext * const s) {
+    return s->output.streamon;
+}
+
+#define NQ_OK        0
+#define NQ_Q_FULL    1
+#define NQ_SRC_EMPTY 2
+#define NQ_NONE      3
+#define NQ_DRAINING  4
+#define NQ_DEAD      5
+
+#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING)
+#define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE)
+
+// do_not_get      If true then no new packet will be got but status will
+//                  be set appropriately
+
+// AVERROR_EOF     Flushing an already flushed stream
+// -ve             Error (all errors except EOF are unexpected)
+// NQ_OK (0)       OK
+// NQ_Q_FULL       Dst full (retry if we think V4L2 Q has space now)
+// NQ_SRC_EMPTY    Src empty (do not retry)
+// NQ_NONE         Enqueue not attempted
+// NQ_DRAINING     At EOS, dQ dest until EOS there too
+// NQ_DEAD         Not running (do not retry, do not attempt capture dQ)
+
+static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s, const int do_not_get)
 {
-    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
-    V4L2Context *const capture = &s->capture;
-    V4L2Context *const output = &s->output;
     int ret;
 
-    if (!s->buf_pkt.size) {
-        ret = ff_decode_get_packet(avctx, &s->buf_pkt);
+    // If we don't already have a coded packet - get a new one
+    // We will already have a coded pkt if the output Q was full last time we
+    // tried to Q it
+    if (!s->buf_pkt.size && !do_not_get) {
+        unsigned int i;
+
+        for (i = 0; i < 256; ++i) {
+            uint8_t * side_data;
+            size_t side_size;
+
+            ret = ff_decode_get_packet(avctx, &s->buf_pkt);
+            if (ret != 0)
+                break;
+
+            // New extradata is the only side-data we undertand
+            side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
+            if (side_data) {
+                av_log(avctx, AV_LOG_DEBUG, "New extradata\n");
+                if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0)
+                    av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret));
+                s->extdata_sent = 0;
+            }
+
+            if (s->buf_pkt.size != 0)
+                break;
+
+            if (s->buf_pkt.side_data_elems == 0) {
+                av_log(avctx, AV_LOG_WARNING, "Empty pkt from ff_decode_get_packet - treating as EOF\n");
+                ret = AVERROR_EOF;
+                break;
+            }
+
+            // Retry a side-data only pkt
+        }
+        // If i >= 256 something has gone wrong
+        if (i >= 256) {
+            av_log(avctx, AV_LOG_ERROR, "Too many side-data only packets\n");
+            return AVERROR(EIO);
+        }
+
+        if (ret == AVERROR(EAGAIN)) {
+            if (!stream_started(s)) {
+                av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__);
+                return NQ_DEAD;
+            }
+            return NQ_SRC_EMPTY;
+        }
+
+        if (ret == AVERROR_EOF) {
+            // EOF - enter drain mode
+            av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n",
+                   ret, s->buf_pkt.size, stream_started(s), s->draining);
+            if (!stream_started(s)) {
+                av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n");
+                s->draining = 1;
+                s->capture.done = 1;
+                return AVERROR_EOF;
+            }
+
+            if (!s->draining) {
+                // Calling enqueue with an empty pkt starts drain
+                av_assert0(s->buf_pkt.size == 0);
+                ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
+                if (ret) {
+                    av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret);
+                    return ret;
+                }
+            }
+            return NQ_DRAINING;
+        }
+
         if (ret < 0) {
-            if (ret == AVERROR(EAGAIN))
-                return ff_v4l2_context_dequeue_frame(capture, frame, 0);
-            else if (ret != AVERROR_EOF)
-                return ret;
+            av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret);
+            return ret;
         }
     }
 
-    if (s->draining)
-        goto dequeue;
+    if (s->draining) {
+        if (s->buf_pkt.size) {
+            av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n");
+            av_packet_unref(&s->buf_pkt);
+        }
+        return NQ_DRAINING;
+    }
+
+    if (!s->buf_pkt.size)
+        return NQ_NONE;
+
+    if ((ret = check_output_streamon(avctx, s)) != 0)
+        return ret;
 
-    ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt);
-    if (ret < 0 && ret != AVERROR(EAGAIN))
-        goto fail;
+    if (s->extdata_sent)
+        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
+    else
+        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size);
 
-    /* if EAGAIN don't unref packet and try to enqueue in the next iteration */
-    if (ret != AVERROR(EAGAIN))
+    if (ret == AVERROR(EAGAIN)) {
+        // Out of input buffers - keep packet
+        ret = NQ_Q_FULL;
+    }
+    else {
+        // In all other cases we are done with this packet
         av_packet_unref(&s->buf_pkt);
+        s->extdata_sent = 1;
 
-    if (!s->draining) {
-        ret = v4l2_try_start(avctx);
         if (ret) {
-            /* can't recover */
-            if (ret != AVERROR(ENOMEM))
-                ret = 0;
-            goto fail;
+            av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret);
+            return ret;
+        }
+    }
+
+    // Start if we haven't
+    {
+        const int ret2 = v4l2_try_start(avctx);
+        if (ret2) {
+            av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2);
+            ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD;
         }
     }
 
-dequeue:
-    return ff_v4l2_context_dequeue_frame(capture, frame, -1);
-fail:
-    av_packet_unref(&s->buf_pkt);
     return ret;
 }
 
+static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx)
+{
+    int rv = 0;
+
+    ff_mutex_lock(&ctx->lock);
+
+    while (atomic_load(&ctx->q_count) == 0 && ctx->streamon) {
+        if (pthread_cond_wait(&ctx->cond, &ctx->lock) != 0) {
+            rv = AVERROR(errno);
+            av_log(avctx, AV_LOG_ERROR, "Cond wait failure: %s\n", av_err2str(rv));
+            break;
+        }
+    }
+
+    ff_mutex_unlock(&ctx->lock);
+    return rv;
+}
+
+static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+    int src_rv = -1;
+    int dst_rv = 1;  // Non-zero (done), non-negative (error) number
+    unsigned int i = 0;
+
+    do {
+        const int pending = xlat_pending(s);
+        const int prefer_dq = (pending > 4);
+        const int last_src_rv = src_rv;
+
+        av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt);
+
+        // Enqueue another pkt for decode if
+        // (a) We don't have a lot of stuff in the buffer already OR
+        // (b) ... we (think we) do but we've failed to get a frame already OR
+        // (c) We've dequeued a lot of frames without asking for input
+        src_rv = try_enqueue_src(avctx, s, !(!prefer_dq || i != 0 || s->req_pkt > 2));
+
+        // If we got a frame last time or we've already tried to get a frame and
+        // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN)
+        // indicating that we want more input.
+        // This should mean that once decode starts we enter a stable state where
+        // we alternately ask for input and produce output
+        if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY)
+            break;
+
+        if (src_rv == NQ_Q_FULL && last_src_rv == NQ_Q_FULL) {
+            av_log(avctx, AV_LOG_WARNING, "Poll thinks src Q has space; none found\n");
+            break;
+        }
+
+        // Try to get a new frame if
+        // (a) we haven't already got one AND
+        // (b) enqueue returned a status indicating that decode should be attempted
+        if (dst_rv != 0 && TRY_DQ(src_rv)) {
+            // Pick a timeout depending on state
+            // The pending count isn't completely reliable so it is good enough
+            // hint that we want a frame but not good enough to require it in
+            // all cases; however if it has got > 31 that exceeds its margin of
+            // error so require a frame to prevent ridiculous levels of latency
+            const int t =
+                src_rv == NQ_Q_FULL ? -1 :
+                src_rv == NQ_DRAINING ? 300 :
+                prefer_dq ? (s->running && pending > 31 ? 100 : 5) : 0;
+
+            // Dequeue frame will unref any previous contents of frame
+            // if it returns success so we don't need an explicit unref
+            // when discarding
+            // This returns AVERROR(EAGAIN) on timeout or if
+            // there is room in the input Q and timeout == -1
+            dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
+
+            // Failure due to no buffer in Q?
+            if (dst_rv == AVERROR(ENOSPC)) {
+                // Wait & retry
+                if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) {
+                    dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
+                }
+            }
+
+            if (dst_rv == 0) {
+                set_best_effort_pts(avctx, &s->pts_stat, frame);
+                if (!s->running) {
+                    s->running = 1;
+                    av_log(avctx, AV_LOG_VERBOSE, "Decode running\n");
+                }
+            }
+
+            if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
+                av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
+                dst_rv = AVERROR_EOF;
+                s->capture.done = 1;
+            }
+            else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
+                av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
+                       s->draining, s->capture.done);
+            else if (dst_rv && dst_rv != AVERROR(EAGAIN))
+                av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n",
+                       s->draining, s->capture.done, dst_rv);
+        }
+
+        ++i;
+        if (i >= 256) {
+            av_log(avctx, AV_LOG_ERROR, "Unexpectedly large retry count: %d\n", i);
+            src_rv = AVERROR(EIO);
+        }
+
+        // Continue trying to enqueue packets if either
+        // (a) we succeeded last time OR
+        // (b) we didn't ret a frame and we can retry the input
+    } while (src_rv == NQ_OK || (dst_rv == AVERROR(EAGAIN) && RETRY_NQ(src_rv)));
+
+    // Ensure that the frame contains nothing if we aren't returning a frame
+    // (might happen when discarding)
+    if (dst_rv)
+        av_frame_unref(frame);
+
+    // If we got a frame this time ask for a pkt next time
+    s->req_pkt = (dst_rv == 0) ? s->req_pkt + 1 : 0;
+
+#if 0
+    if (dst_rv == 0)
+    {
+        static int z = 0;
+        if (++z > 50) {
+            av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n");
+            ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
+            return -1;
+        }
+    }
+#endif
+
+    return dst_rv == 0 ? 0 :
+        src_rv < 0 ? src_rv :
+        dst_rv < 0 ? dst_rv :
+            AVERROR(EAGAIN);
+}
+
+#if 0
+#include <time.h>
+static int64_t us_time(void)
+{
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
+}
+
+static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    int ret;
+    const int64_t now = us_time();
+    int64_t done;
+    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
+    ret = v4l2_receive_frame2(avctx, frame);
+    done = us_time();
+    av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret);
+    return ret;
+}
+#endif
+
+static uint32_t
+avprofile_to_v4l2(const enum AVCodecID codec_id, const int avprofile)
+{
+    switch (codec_id) {
+        case AV_CODEC_ID_H264:
+            switch (avprofile) {
+                case AV_PROFILE_H264_BASELINE:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
+                case AV_PROFILE_H264_CONSTRAINED_BASELINE:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE;
+                case AV_PROFILE_H264_MAIN:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_MAIN;
+                case AV_PROFILE_H264_EXTENDED:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED;
+                case AV_PROFILE_H264_HIGH:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH;
+                case AV_PROFILE_H264_HIGH_10:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10;
+                case AV_PROFILE_H264_HIGH_10_INTRA:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA;
+                case AV_PROFILE_H264_MULTIVIEW_HIGH:
+                case AV_PROFILE_H264_HIGH_422:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422;
+                case AV_PROFILE_H264_HIGH_422_INTRA:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA;
+                case AV_PROFILE_H264_STEREO_HIGH:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH;
+                case AV_PROFILE_H264_HIGH_444_PREDICTIVE:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE;
+                case AV_PROFILE_H264_HIGH_444_INTRA:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA;
+                case AV_PROFILE_H264_CAVLC_444:
+                    return V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA;
+                case AV_PROFILE_H264_HIGH_444:
+                default:
+                    break;
+//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE		= 12,
+//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH		= 13,
+//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA	= 14,
+//                    V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH		= 16,
+//                    V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH		= 17,
+            }
+            break;
+        case AV_CODEC_ID_MPEG2VIDEO:
+        case AV_CODEC_ID_MPEG4:
+        case AV_CODEC_ID_VC1:
+        case AV_CODEC_ID_VP8:
+        case AV_CODEC_ID_VP9:
+        case AV_CODEC_ID_AV1:
+            // Most profiles are a simple number that matches the V4L2 enum
+            return avprofile;
+        default:
+            break;
+    }
+    return ~(uint32_t)0;
+}
+
+// This check mirrors Chrome's profile check by testing to see if the profile
+// exists as a possible value for the V4L2 profile control
+static int
+check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s)
+{
+    struct v4l2_queryctrl query_ctrl;
+    struct v4l2_querymenu query_menu;
+    uint32_t profile_id;
+
+    // An unset profile is almost certainly zero or -99 - do not reject
+    if (avctx->profile <= 0) {
+        av_log(avctx, AV_LOG_VERBOSE, "Profile %d <= 0 - check skipped\n", avctx->profile);
+        return 0;
+    }
+
+    memset(&query_ctrl, 0, sizeof(query_ctrl));
+    switch (avctx->codec_id) {
+        case AV_CODEC_ID_MPEG2VIDEO:
+            profile_id = V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE;
+            break;
+        case AV_CODEC_ID_MPEG4:
+            profile_id = V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE;
+            break;
+        case AV_CODEC_ID_H264:
+            profile_id = V4L2_CID_MPEG_VIDEO_H264_PROFILE;
+            break;
+        case AV_CODEC_ID_VP8:
+            profile_id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE;
+            break;
+        case AV_CODEC_ID_VP9:
+            profile_id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE;
+            break;
+#ifdef V4L2_CID_MPEG_VIDEO_AV1_PROFILE
+        case AV_CODEC_ID_AV1:
+            profile_id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE;
+            break;
+#endif
+        default:
+            av_log(avctx, AV_LOG_VERBOSE, "Can't map profile for codec id %d; profile check skipped\n", avctx->codec_id);
+            return 0;
+    }
+
+    query_ctrl = (struct v4l2_queryctrl){.id = profile_id};
+    if (ioctl(s->fd, VIDIOC_QUERYCTRL, &query_ctrl) != 0) {
+        av_log(avctx, AV_LOG_VERBOSE, "Query profile ctrl (%#x) not supported: assume OK\n", query_ctrl.id);
+    }
+    else {
+        av_log(avctx, AV_LOG_DEBUG, "%s: Control supported: %#x\n", __func__, query_ctrl.id);
+
+        query_menu = (struct v4l2_querymenu){
+            .id = query_ctrl.id,
+            .index = avprofile_to_v4l2(avctx->codec_id, avctx->profile),
+        };
+
+        if (query_menu.index > query_ctrl.maximum ||
+            query_menu.index < query_ctrl.minimum ||
+            ioctl(s->fd, VIDIOC_QUERYMENU, &query_menu) != 0) {
+            return AVERROR(ENOENT);
+        }
+    }
+
+    return 0;
+};
+
+static int
+check_size(AVCodecContext * const avctx, V4L2m2mContext * const s, const uint32_t fcc)
+{
+    unsigned int i;
+    const uint32_t w = avctx->coded_width;
+    const uint32_t h = avctx->coded_height;
+
+    if (w == 0 || h == 0 || fcc == 0) {
+        av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc));
+        return 0;
+    }
+
+    if ((s->quirks & FF_V4L2_QUIRK_8192_MACRO_MAX) != 0 &&
+        ((w + 15) >> 4) * ((h + 15) >> 4) > 8192)
+    {
+        av_log(avctx, AV_LOG_DEBUG, "%s: %d x %d > 8192 macroblocks (quirk)\n", __func__, w, h);
+        return AVERROR(-EINVAL);
+    }
+
+    // Test with TRY_FMT
+    {
+        struct v4l2_format fmt = {
+            .type = s->capture.format.type,
+        };
+        int rv;
+
+        if (V4L2_TYPE_IS_MULTIPLANAR(s->capture.format.type))
+        {
+            fmt.fmt.pix.width = w;
+            fmt.fmt.pix.height = h;
+            fmt.fmt.pix.pixelformat = fcc;
+        }
+        else
+        {
+            fmt.fmt.pix_mp.width = w;
+            fmt.fmt.pix_mp.height = h;
+            fmt.fmt.pix_mp.pixelformat = fcc;
+        }
+
+        while ((rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt)) != 0 && errno == EINTR)
+            /* Loop */;
+
+        if (rv != 0) {
+            rv = AVERROR(errno);
+            if (rv != AVERROR(ENOTTY)) {
+                av_log(avctx, AV_LOG_WARNING, "%s: Try FMT failed\n", __func__);
+                return rv;
+            }
+            av_log(avctx, AV_LOG_DEBUG, "%s: Try FMT not supported\n", __func__);
+            // Just continue hopefully
+        }
+        else if (V4L2_TYPE_IS_MULTIPLANAR(s->capture.format.type))
+        {
+            av_log(avctx, AV_LOG_TRACE, "%s: requested %s %dx%d TRY_FMT %s %dx%d\n", __func__,
+                   av_fourcc2str(fcc), w, h,
+                   av_fourcc2str(fmt.fmt.pix.pixelformat), fmt.fmt.pix.width, fmt.fmt.pix.height);
+            if (fmt.fmt.pix.width < w || fmt.fmt.pix.height < h || fmt.fmt.pix.pixelformat != fcc) {
+                av_log(avctx, AV_LOG_DEBUG, "%s: TRY_FMT returned incompatible size\n", __func__);
+                return AVERROR(EINVAL);
+            }
+        }
+        else
+        {
+            av_log(avctx, AV_LOG_TRACE, "%s: requested %s %dx%d TRY_FMT %s %dx%d\n", __func__,
+                   av_fourcc2str(fcc), w, h,
+                   av_fourcc2str(fmt.fmt.pix_mp.pixelformat), fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height);
+            if (fmt.fmt.pix_mp.width < w || fmt.fmt.pix_mp.height < h || fmt.fmt.pix_mp.pixelformat != fcc) {
+                av_log(avctx, AV_LOG_DEBUG, "%s: TRY_FMT returned incompatible size\n", __func__);
+                return AVERROR(EINVAL);
+            }
+        }
+    }
+
+    if ((s->quirks & FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN) != 0) {
+        av_log(avctx, AV_LOG_TRACE, "%s: Skipped (quirk): Size %dx%d, fcc %s\n", __func__, w, h, av_fourcc2str(fcc));
+        return 0;
+    }
+
+    for (i = 0;; ++i) {
+        struct v4l2_frmsizeenum fs = {
+            .index = i,
+            .pixel_format = fcc,
+        };
+
+        while (ioctl(s->fd, VIDIOC_ENUM_FRAMESIZES, &fs) != 0) {
+            const int err = AVERROR(errno);
+            if (err == AVERROR(EINTR))
+                continue;
+            if (i == 0 && err == AVERROR(ENOTTY)) {
+                av_log(avctx, AV_LOG_DEBUG, "Framesize enum not supported\n");
+                return 0;
+            }
+            if (err != AVERROR(EINVAL)) {
+                av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err));
+                return err;
+            }
+            av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in %u frame size enums\n",
+                   w, h, av_fourcc2str(fcc), i);
+            return err;
+        }
+
+        switch (fs.type) {
+            case V4L2_FRMSIZE_TYPE_DISCRETE:
+                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Discrete: %dx%d\n", __func__, i,
+                       fs.discrete.width,fs.discrete.height);
+                if (w == fs.discrete.width && h == fs.discrete.height)
+                    return 0;
+                break;
+            case V4L2_FRMSIZE_TYPE_STEPWISE:
+                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Stepwise: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
+                       fs.stepwise.min_width, fs.stepwise.min_height,
+                       fs.stepwise.max_width, fs.stepwise.max_height,
+                       fs.stepwise.step_width,fs.stepwise.step_height);
+                if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
+                    h >= fs.stepwise.min_height && h <= fs.stepwise.max_height &&
+                    (w - fs.stepwise.min_width) % fs.stepwise.step_width == 0 &&
+                    (h - fs.stepwise.min_height) % fs.stepwise.step_height == 0)
+                    return 0;
+                break;
+            case V4L2_FRMSIZE_TYPE_CONTINUOUS:
+                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Continuous: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
+                       fs.stepwise.min_width, fs.stepwise.min_height,
+                       fs.stepwise.max_width, fs.stepwise.max_height,
+                       fs.stepwise.step_width,fs.stepwise.step_height);
+                if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
+                    h >= fs.stepwise.min_height && h <= fs.stepwise.max_height)
+                    return 0;
+                break;
+            default:
+                av_log(avctx, AV_LOG_ERROR, "Unexpected framesize enum: %d", fs.type);
+                return AVERROR(EINVAL);
+        }
+    }
+}
+
+static int
+get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s)
+{
+    struct v4l2_capability cap;
+
+    memset(&cap, 0, sizeof(cap));
+    while (ioctl(s->fd, VIDIOC_QUERYCAP, &cap) != 0) {
+        int err = errno;
+        if (err == EINTR)
+            continue;
+        av_log(avctx, AV_LOG_ERROR, "V4L2: Failed to get capabilities: %s\n", strerror(err));
+        return AVERROR(err);
+    }
+
+    // Could be made table driven if we have a few more but right now there
+    // seems no point
+
+    // Meson (amlogic) always gives a resolution changed event after output
+    // streamon and userspace must (re)allocate capture buffers and streamon
+    // capture to clear the event even if the capture buffers were the right
+    // size in the first place.
+    if (strcmp(cap.driver, "meson-vdec") == 0)
+        s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS | FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN;
+    // RPI has a max 8192 macroblock limit but no way of signaling it
+    if (strcmp(cap.driver, "bcm2835-codec") == 0)
+        s->quirks |= FF_V4L2_QUIRK_8192_MACRO_MAX;
+
+    av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks);
+    return 0;
+}
+
+// This heuristic is for H264 but use for everything
+static uint32_t max_coded_size(const AVCodecContext * const avctx)
+{
+    uint32_t wxh = avctx->coded_width * avctx->coded_height;
+    uint32_t size;
+
+    size = wxh * 3 / 2;
+    // H.264 Annex A table A-1 gives minCR which is either 2 or 4
+    // unfortunately that doesn't yield an actually useful limit
+    // and it should be noted that frame 0 is special cased to allow
+    // a bigger number which really isn't helpful for us. So just pick
+    // frame_size / 2
+    size /= 2;
+    // Add 64k to allow for any overheads and/or encoder hopefulness
+    // with small WxH
+    return size + (1 << 16);
+}
+
+static void
+parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s)
+{
+    s->reorder_size = 0;
+
+    if (!avctx->extradata || !avctx->extradata_size)
+        return;
+
+    switch (avctx->codec_id) {
+#if CONFIG_H264_DECODER
+        case AV_CODEC_ID_H264:
+        {
+            H264ParamSets ps;
+            int is_avc = 0;
+            int nal_length_size = 0;
+            int ret;
+
+            memset(&ps, 0, sizeof(ps));
+
+            ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
+                                           &ps, &is_avc, &nal_length_size,
+                                           avctx->err_recognition, avctx);
+            if (ret > 0) {
+                const SPS * sps = NULL;
+                unsigned int i;
+                for (i = 0; i != MAX_SPS_COUNT; ++i) {
+                    if (ps.sps_list[i]) {
+                        sps = ps.sps_list[i];
+                        break;
+                    }
+                }
+                if (sps) {
+                    avctx->profile = ff_h264_get_profile(sps);
+                    avctx->level = sps->level_idc;
+                    s->reorder_size = sps->num_reorder_frames;
+                }
+            }
+            ff_h264_ps_uninit(&ps);
+            break;
+        }
+#endif
+#if CONFIG_HEVC_DECODER
+        case AV_CODEC_ID_HEVC:
+        {
+            HEVCParamSets ps;
+            HEVCSEI sei;
+            int is_nalff = 0;
+            int nal_length_size = 0;
+            int ret;
+
+            memset(&ps, 0, sizeof(ps));
+            memset(&sei, 0, sizeof(sei));
+
+            ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size,
+                                           &ps, &sei, &is_nalff, &nal_length_size,
+                                           avctx->err_recognition, 0, avctx);
+            if (ret > 0) {
+                const HEVCSPS * sps = NULL;
+                unsigned int i;
+                for (i = 0; i != HEVC_MAX_SPS_COUNT; ++i) {
+                    if (ps.sps_list[i]) {
+                        sps = ps.sps_list[i];
+                        break;
+                    }
+                }
+                if (sps) {
+                    avctx->profile = sps->ptl.general_ptl.profile_idc;
+                    avctx->level   = sps->ptl.general_ptl.level_idc;
+                    s->reorder_size = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering;
+                }
+            }
+            ff_hevc_ps_uninit(&ps);
+            ff_hevc_reset_sei(&sei);
+            break;
+        }
+#endif
+        default:
+            break;
+    }
+}
+
+static int
+choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s)
+{
+    const V4L2m2mPriv * const priv = avctx->priv_data;
+    unsigned int fmts_n;
+    uint32_t *fmts = ff_v4l2_context_enum_drm_formats(&s->capture, &fmts_n);
+    enum AVPixelFormat *fmts2 = NULL;
+    enum AVPixelFormat gf_pix_fmt;
+    unsigned int i;
+    unsigned int n = 0;
+    unsigned int pref_n = 1;
+    int rv = AVERROR(ENOENT);
+
+    if (!fmts)
+        return AVERROR(ENOENT);
+
+    if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 3))) == NULL) {
+        rv = AVERROR(ENOMEM);
+        goto error;
+    }
+
+    // Filter for formats that are supported by ffmpeg and
+    // can accomodate the stream size
+    fmts2[n++] = AV_PIX_FMT_DRM_PRIME;
+    for (i = 0; i != fmts_n; ++i) {
+        const enum AVPixelFormat f = ff_v4l2_format_v4l2_to_avfmt(fmts[i], AV_CODEC_ID_RAWVIDEO);
+        av_log(avctx, AV_LOG_TRACE, "VLC pix %s -> %s\n", av_fourcc2str(fmts[i]), av_get_pix_fmt_name(f));
+        if (f == AV_PIX_FMT_NONE)
+            continue;
+
+        if (check_size(avctx, s, fmts[i]) != 0)
+            continue;
+
+        if (f == priv->pix_fmt)
+            pref_n = n;
+        fmts2[n++] = f;
+    }
+
+    if (n < 2) {
+        av_log(avctx, AV_LOG_DEBUG, "%s: No usable formats found\n", __func__);
+        goto error;
+    }
+
+    if (n != 2) {
+        // ffmpeg.c really only expects one s/w format. It thinks that the
+        // last format in the list is the s/w format of the h/w format but
+        // also chooses the first non-h/w format as the preferred s/w format.
+        // The only way of reconciling this is to dup our preferred format into
+        // both last & first place :-(
+        const enum AVPixelFormat t = fmts2[pref_n];
+        fmts2[pref_n] = fmts2[1];
+        fmts2[1] = t;
+        fmts2[n++] = t;
+    }
+
+    fmts2[n] = AV_PIX_FMT_NONE;
+
+    gf_pix_fmt = ff_get_format(avctx, fmts2);
+    av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n",
+           avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt),
+           avctx->coded_width, avctx->coded_height,
+           gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
+
+    if (gf_pix_fmt == AV_PIX_FMT_NONE)
+        goto error;
+
+    if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) {
+        avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
+        s->capture.av_pix_fmt = avctx->sw_pix_fmt;
+        s->output_drm = 1;
+    }
+    else {
+        avctx->pix_fmt = gf_pix_fmt;
+        s->capture.av_pix_fmt = gf_pix_fmt;
+        s->output_drm = 0;
+    }
+
+    // Get format converts capture.av_pix_fmt back into a V4L2 format in the context
+    if ((rv = ff_v4l2_context_get_format(&s->capture, 0)) != 0)
+        goto error;
+    rv = ff_v4l2_context_set_format(&s->capture);
+
+error:
+    av_free(fmts2);
+    av_free(fmts);
+    return rv;
+}
+
 static av_cold int v4l2_decode_init(AVCodecContext *avctx)
 {
     V4L2Context *capture, *output;
@@ -185,10 +1232,17 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
     V4L2m2mPriv *priv = avctx->priv_data;
     int ret;
 
+    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
+
     ret = ff_v4l2_m2m_create_context(priv, &s);
     if (ret < 0)
         return ret;
 
+    parse_extradata(avctx, s);
+
+    xlat_init(&s->xlat);
+    pts_stats_init(&s->pts_stat, avctx, "decoder");
+
     capture = &s->capture;
     output = &s->output;
 
@@ -196,14 +1250,45 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
      * by the v4l2 driver; this event will trigger a full pipeline reconfig and
      * the proper values will be retrieved from the kernel driver.
      */
-    output->height = capture->height = avctx->coded_height;
-    output->width = capture->width = avctx->coded_width;
+//    output->height = capture->height = avctx->coded_height;
+//    output->width = capture->width = avctx->coded_width;
+    output->height = capture->height = 0;
+    output->width = capture->width = 0;
 
     output->av_codec_id = avctx->codec_id;
     output->av_pix_fmt  = AV_PIX_FMT_NONE;
+    output->min_buf_size = max_coded_size(avctx);
 
     capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
     capture->av_pix_fmt = avctx->pix_fmt;
+    capture->min_buf_size = 0;
+
+    capture->av_pix_fmt = AV_PIX_FMT_NONE;
+    s->output_drm = 0;
+
+    s->db_ctl = NULL;
+    if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) {
+        if (strcmp(priv->dmabuf_alloc, "cma") == 0)
+            s->db_ctl = dmabufs_ctl_new();
+        else {
+            av_log(avctx, AV_LOG_ERROR, "Unknown dmabuf alloc method: '%s'\n", priv->dmabuf_alloc);
+            return AVERROR(EINVAL);
+        }
+        if (!s->db_ctl) {
+            av_log(avctx, AV_LOG_ERROR, "Can't open dmabuf provider '%s'\n", priv->dmabuf_alloc);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
+    if (!s->device_ref) {
+        ret = AVERROR(ENOMEM);
+        return ret;
+    }
+
+    ret = av_hwdevice_ctx_init(s->device_ref);
+    if (ret < 0)
+        return ret;
 
     s->avctx = avctx;
     ret = ff_v4l2_m2m_codec_init(priv);
@@ -212,12 +1297,90 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
         return ret;
     }
 
-    return v4l2_prepare_decoder(s);
+    if (avctx->extradata &&
+        (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret));
+#if DUMP_FAILED_EXTRADATA
+        log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size);
+#endif
+        return ret;
+    }
+
+    if ((ret = get_quirks(avctx, s)) != 0)
+        return ret;
+
+    if ((ret = check_profile(avctx, s)) != 0) {
+        av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile);
+        return ret;
+    }
+
+    // Size check done as part of format filtering
+    if ((ret = choose_capture_format(avctx, s)) != 0)
+        return ret;
+
+    if ((ret = v4l2_prepare_decoder(s)) < 0)
+        return ret;
+
+    return 0;
 }
 
 static av_cold int v4l2_decode_close(AVCodecContext *avctx)
 {
-    return ff_v4l2_m2m_codec_end(avctx->priv_data);
+    int rv;
+    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
+    rv = ff_v4l2_m2m_codec_end(avctx->priv_data);
+    av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv);
+    return rv;
+}
+
+static void v4l2_decode_flush(AVCodecContext *avctx)
+{
+    // An alternatve and more drastic form of flush is to simply do this:
+    //    v4l2_decode_close(avctx);
+    //    v4l2_decode_init(avctx);
+    // The downside is that this keeps a decoder open until all the frames
+    // associated with it have been returned.  This is a bit wasteful on
+    // possibly limited h/w resources and fails on a Pi for this reason unless
+    // more GPU mem is allocated than is the default.
+
+    V4L2m2mPriv * const priv = avctx->priv_data;
+    V4L2m2mContext * const s = priv->context;
+    V4L2Context * const output = &s->output;
+    V4L2Context * const capture = &s->capture;
+
+    av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon);
+
+    // Reflushing everything is benign, quick and avoids having to worry about
+    // states like EOS processing so don't try to optimize out (having got it
+    // wrong once)
+
+    ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF);
+
+    // Clear any buffered input packet
+    av_packet_unref(&s->buf_pkt);
+
+    // Clear a pending EOS
+    if (ff_v4l2_ctx_eos(capture)) {
+        // Arguably we could delay this but this is easy and doesn't require
+        // thought or extra vars
+        ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF);
+        ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
+    }
+
+    // V4L2 makes no guarantees about whether decoded frames are flushed or not
+    // so mark all frames we are tracking to be discarded if they appear
+    xlat_flush(&s->xlat);
+
+    // resend extradata
+    s->extdata_sent = 0;
+    // clear status vars
+    s->running = 0;
+    s->draining = 0;
+    output->done = 0;
+    capture->done = 0;
+
+    // Stream on will occur when we actually submit a new frame
+    av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__);
 }
 
 #define OFFSET(x) offsetof(V4L2m2mPriv, x)
@@ -227,9 +1390,16 @@ static const AVOption options[] = {
     V4L_M2M_DEFAULT_OPTS,
     { "num_capture_buffers", "Number of buffers in the capture context",
         OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS },
+    { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS },
+    { "dmabuf_alloc", "Dmabuf alloc method", OFFSET(dmabuf_alloc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS },
     { NULL},
 };
 
+static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = {
+    HW_CONFIG_INTERNAL(DRM_PRIME),
+    NULL
+};
+
 #define M2MDEC_CLASS(NAME) \
     static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \
         .class_name = #NAME "_v4l2m2m_decoder", \
@@ -240,7 +1410,7 @@ static const AVOption options[] = {
 
 #define M2MDEC(NAME, LONGNAME, CODEC, bsf_name) \
     M2MDEC_CLASS(NAME) \
-    const FFCodec ff_ ## NAME ## _v4l2m2m_decoder = { \
+    FFCodec ff_ ## NAME ## _v4l2m2m_decoder = { \
         .p.name         = #NAME "_v4l2m2m" , \
         CODEC_LONG_NAME("V4L2 mem2mem " LONGNAME " decoder wrapper"), \
         .p.type         = AVMEDIA_TYPE_VIDEO, \
@@ -250,11 +1420,13 @@ static const AVOption options[] = {
         .init           = v4l2_decode_init, \
         FF_CODEC_RECEIVE_FRAME_CB(v4l2_receive_frame), \
         .close          = v4l2_decode_close, \
+        .flush          = v4l2_decode_flush, \
         .bsfs           = bsf_name, \
         .p.capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
         .caps_internal  = FF_CODEC_CAP_NOT_INIT_THREADSAFE | \
                           FF_CODEC_CAP_INIT_CLEANUP, \
         .p.wrapper_name = "v4l2m2m", \
+        .hw_configs     = v4l2_m2m_hw_configs, \
     }
 
 M2MDEC(h264,  "H.264", AV_CODEC_ID_H264,       "h264_mp4toannexb");
@@ -266,3 +1438,4 @@ M2MDEC(h263,  "H.263", AV_CODEC_ID_H263,       NULL);
 M2MDEC(vc1 ,  "VC1",   AV_CODEC_ID_VC1,        NULL);
 M2MDEC(vp8,   "VP8",   AV_CODEC_ID_VP8,        NULL);
 M2MDEC(vp9,   "VP9",   AV_CODEC_ID_VP9,        NULL);
+M2MDEC(av1,   "AV1",   AV_CODEC_ID_AV1,        NULL);
diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
index 93703ccc63..e902d37f2d 100644
--- a/libavcodec/v4l2_m2m_enc.c
+++ b/libavcodec/v4l2_m2m_enc.c
@@ -21,13 +21,17 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
+
 #include <linux/videodev2.h>
 #include <sys/ioctl.h>
 #include <search.h>
+
 #include "encode.h"
 #include "libavcodec/avcodec.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/pixfmt.h"
+#include "libavutil/mem.h"
 #include "libavutil/opt.h"
 #include "codec_internal.h"
 #include "profiles.h"
@@ -38,6 +42,39 @@
 #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x
 #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x
 
+#if CONFIG_LIBDRM
+#include <drm_fourcc.h>
+
+// P030 should be defined in drm_fourcc.h and hopefully will be sometime
+// in the future but until then...
+#ifndef DRM_FORMAT_P030
+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
+#endif
+
+#ifndef DRM_FORMAT_NV15
+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
+#endif
+
+#ifndef DRM_FORMAT_NV20
+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
+#endif
+
+#endif
+
+#ifndef V4L2_CID_CODEC_BASE
+#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
+#endif
+
+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
+// in videodev2.h hopefully will be sometime in the future but until then...
+#ifndef V4L2_PIX_FMT_NV12_10_COL128
+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
+#endif
+
+#ifndef V4L2_PIX_FMT_NV12_COL128
+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
+#endif
+
 static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den)
 {
     struct v4l2_streamparm parm = { 0 };
@@ -148,15 +185,14 @@ static inline int v4l2_mpeg4_profile_from_ff(int p)
 static int v4l2_check_b_frame_support(V4L2m2mContext *s)
 {
     if (s->avctx->max_b_frames)
-        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n");
+        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames);
 
-    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0);
+    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1);
     v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0);
     if (s->avctx->max_b_frames == 0)
         return 0;
 
     avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding");
-
     return AVERROR_PATCHWELCOME;
 }
 
@@ -271,17 +307,212 @@ static int v4l2_prepare_encoder(V4L2m2mContext *s)
     return 0;
 }
 
+static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame)
+{
+#if !CONFIG_LIBDRM
+    return AVERROR_OPTION_NOT_FOUND;
+#else
+    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
+
+    const uint32_t drm_fmt = src->layers[0].format;
+    // Treat INVALID as LINEAR
+    const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
+        DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
+    uint32_t pix_fmt = 0;
+    uint32_t w = 0;
+    uint32_t h = 0;
+    uint32_t bpl = src->layers[0].planes[0].pitch;
+
+    // We really don't expect multiple layers
+    // All formats that we currently cope with are single object
+
+    if (src->nb_layers != 1 || src->nb_objects != 1)
+        return AVERROR(EINVAL);
+
+    switch (drm_fmt) {
+        case DRM_FORMAT_YUV420:
+            if (mod == DRM_FORMAT_MOD_LINEAR) {
+                if (src->layers[0].nb_planes != 3)
+                    break;
+                pix_fmt = V4L2_PIX_FMT_YUV420;
+                h = src->layers[0].planes[1].offset / bpl;
+                w = bpl;
+            }
+            break;
+
+        case DRM_FORMAT_NV12:
+            if (mod == DRM_FORMAT_MOD_LINEAR) {
+                if (src->layers[0].nb_planes != 2)
+                    break;
+                pix_fmt = V4L2_PIX_FMT_NV12;
+                h = src->layers[0].planes[1].offset / bpl;
+                w = bpl;
+            }
+            else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
+                if (src->layers[0].nb_planes != 2)
+                    break;
+                pix_fmt = V4L2_PIX_FMT_NV12_COL128;
+                w = bpl;
+                h = src->layers[0].planes[1].offset / 128;
+                bpl = fourcc_mod_broadcom_param(mod);
+            }
+            break;
+
+        case DRM_FORMAT_P030:
+            if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
+                if (src->layers[0].nb_planes != 2)
+                    break;
+                pix_fmt =  V4L2_PIX_FMT_NV12_10_COL128;
+                w = bpl / 2;  // Matching lie to how we construct this
+                h = src->layers[0].planes[1].offset / 128;
+                bpl = fourcc_mod_broadcom_param(mod);
+            }
+            break;
+
+        default:
+            break;
+    }
+
+    if (!pix_fmt)
+        return AVERROR(EINVAL);
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
+        struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
+
+        pix->width = w;
+        pix->height = h;
+        pix->pixelformat = pix_fmt;
+        pix->plane_fmt[0].bytesperline = bpl;
+        pix->num_planes = 1;
+    }
+    else {
+        struct v4l2_pix_format *const pix = &format->fmt.pix;
+
+        pix->width = w;
+        pix->height = h;
+        pix->pixelformat = pix_fmt;
+        pix->bytesperline = bpl;
+    }
+
+    return 0;
+#endif
+}
+
+// Do we have similar enough formats to be usable?
+static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b)
+{
+    if (a->type != b->type)
+        return 0;
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) {
+        const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp;
+        const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp;
+        unsigned int i;
+        if (pa->pixelformat != pb->pixelformat ||
+            pa->num_planes != pb->num_planes)
+            return 0;
+        for (i = 0; i != pa->num_planes; ++i) {
+            if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline)
+                return 0;
+        }
+    }
+    else {
+        const struct v4l2_pix_format *const pa = &a->fmt.pix;
+        const struct v4l2_pix_format *const pb = &b->fmt.pix;
+        if (pa->pixelformat != pb->pixelformat ||
+            pa->bytesperline != pb->bytesperline)
+            return 0;
+    }
+    return 1;
+}
+
+static inline int q_full(const V4L2Context *const output)
+{
+    return ff_v4l2_context_q_count(output) == output->num_buffers;
+}
+
 static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
 {
     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
     V4L2Context *const output = &s->output;
+    int rv;
+    const int needs_slot = q_full(output);
+
+    av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot);
+
+    // Signal EOF if needed (doesn't need q slot)
+    if (!frame) {
+        av_log(avctx, AV_LOG_TRACE, "--- %s: EOS\n", __func__);
+        return ff_v4l2_context_enqueue_frame(output, frame);
+    }
+
+    if ((rv = ff_v4l2_dq_all(output, needs_slot? 500 : 0)) != 0) {
+        // We should be able to return AVERROR(EAGAIN) to indicate buffer
+        // exhaustion, but ffmpeg currently treats that as fatal.
+        av_log(avctx, AV_LOG_WARNING, "Failed to get buffer for src frame: %s\n", av_err2str(rv));
+        return rv;
+    }
+
+    if (s->input_drm && !output->streamon) {
+        struct v4l2_format req_format = {.type = output->format.type};
+
+        // Set format when we first get a buffer
+        if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n");
+            return rv;
+        }
+
+        ff_v4l2_context_release(output);
+
+        output->format = req_format;
+
+        if ((rv = ff_v4l2_context_set_format(output)) != 0) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n");
+            return rv;
+        }
+
+        if (!fmt_eq(&req_format, &output->format)) {
+            av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n");
+            return AVERROR(EINVAL);
+        }
+
+        output->selection.top = frame->crop_top;
+        output->selection.left = frame->crop_left;
+        output->selection.width = av_frame_cropped_width(frame);
+        output->selection.height = av_frame_cropped_height(frame);
+
+        if ((rv = ff_v4l2_context_init(output)) != 0) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n");
+            return rv;
+        }
+
+        {
+            struct v4l2_selection selection = {
+                .type = V4L2_BUF_TYPE_VIDEO_OUTPUT,
+                .target = V4L2_SEL_TGT_CROP,
+                .r = output->selection
+            };
+            if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) {
+                av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n",
+                       selection.r.width, selection.r.height, selection.r.left, selection.r.top,
+                       av_err2str(AVERROR(errno)));
+            }
+            av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n",
+                   selection.r.width, selection.r.height, selection.r.left, selection.r.top);
+        }
+    }
 
 #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME
-    if (frame && frame->pict_type == AV_PICTURE_TYPE_I)
+    if (frame->pict_type == AV_PICTURE_TYPE_I)
         v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1);
 #endif
 
-    return ff_v4l2_context_enqueue_frame(output, frame);
+    rv = ff_v4l2_context_enqueue_frame(output, frame);
+    if (rv) {
+        av_log(avctx, AV_LOG_ERROR, "Enqueue frame failed: %s\n", av_err2str(rv));
+    }
+
+    return rv;
 }
 
 static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
@@ -292,6 +523,11 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
     AVFrame *frame = s->frame;
     int ret;
 
+    av_log(avctx, AV_LOG_TRACE, "<<< %s: qlen out %d cap %d\n", __func__,
+           ff_v4l2_context_q_count(output), ff_v4l2_context_q_count(capture));
+
+    ff_v4l2_dq_all(output, 0);
+
     if (s->draining)
         goto dequeue;
 
@@ -328,7 +564,115 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
     }
 
 dequeue:
-    return ff_v4l2_context_dequeue_packet(capture, avpkt);
+    // Dequeue a frame
+    for (;;) {
+        int t = q_full(output) ? -1 : s->draining ? 300 : 0;
+        int rv2;
+
+        // If output is full wait for either a packet or output to become not full
+        ret = ff_v4l2_context_dequeue_packet(capture, avpkt, t);
+
+        // If output was full retry packet dequeue
+        t = (ret != AVERROR(EAGAIN) || t != -1) ? 0 : 300;
+        rv2 = ff_v4l2_dq_all(output, t);
+        if (t == 0 || rv2 != 0)
+            break;
+    }
+    if (ret)
+        return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret;
+
+    if (capture->first_buf == 1) {
+        uint8_t * data;
+        const int len = avpkt->size;
+
+        // 1st buffer after streamon should be SPS/PPS
+        capture->first_buf = 2;
+
+        // Clear both possible stores so there is no chance of confusion
+        av_freep(&s->extdata_data);
+        s->extdata_size = 0;
+        av_freep(&avctx->extradata);
+        avctx->extradata_size = 0;
+
+        if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
+            goto fail_no_mem;
+
+        memcpy(data, avpkt->data, len);
+        av_packet_unref(avpkt);
+
+        // We need to copy the header, but keep local if not global
+        if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) {
+            avctx->extradata = data;
+            avctx->extradata_size = len;
+        }
+        else {
+            s->extdata_data = data;
+            s->extdata_size = len;
+        }
+
+        ret = ff_v4l2_context_dequeue_packet(capture, avpkt, 0);
+        ff_v4l2_dq_all(output, 0);
+        if (ret)
+            return ret;
+    }
+
+    // First frame must be key so mark as such even if encoder forgot
+    if (capture->first_buf == 2) {
+        avpkt->flags |= AV_PKT_FLAG_KEY;
+
+        // Add any extradata to the 1st packet we emit as we cannot create it at init
+        if (avctx->extradata_size > 0 && avctx->extradata) {
+            void * const side = av_packet_new_side_data(avpkt,
+                                           AV_PKT_DATA_NEW_EXTRADATA,
+                                           avctx->extradata_size);
+            if (!side)
+                goto fail_no_mem;
+
+            memcpy(side, avctx->extradata, avctx->extradata_size);
+        }
+    }
+
+    // Add SPS/PPS to the start of every key frame if non-global headers
+    if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) {
+        const size_t newlen = s->extdata_size + avpkt->size;
+        AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE);
+
+        if (buf == NULL)
+            goto fail_no_mem;
+
+        memcpy(buf->data, s->extdata_data, s->extdata_size);
+        memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size);
+
+        av_buffer_unref(&avpkt->buf);
+        avpkt->buf = buf;
+        avpkt->data = buf->data;
+        avpkt->size = newlen;
+    }
+    else if (ff_v4l2_context_q_count(capture) < 2) {
+        // Avoid running out of capture buffers
+        // In most cases the buffers will be returned quickly in which case
+        // we don't copy and can use the v4l2 buffers directly but sometimes
+        // ffmpeg seems to hold onto all of them for a long time (.mkv
+        // creation?) so avoid deadlock in those cases.
+        AVBufferRef * const buf = av_buffer_alloc(avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (buf == NULL)
+            goto fail_no_mem;
+
+        memcpy(buf->data, avpkt->data, avpkt->size);
+        av_buffer_unref(&avpkt->buf);  // Will recycle the V4L2 buffer
+
+        avpkt->buf = buf;
+        avpkt->data = buf->data;
+    }
+
+    capture->first_buf = 0;
+    return 0;
+
+fail_no_mem:
+    av_log(avctx, AV_LOG_ERROR, "Rx pkt failed: No memory\n");
+    ret = AVERROR(ENOMEM);
+    av_packet_unref(avpkt);
+    return ret;
 }
 
 static av_cold int v4l2_encode_init(AVCodecContext *avctx)
@@ -340,6 +684,8 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
     uint32_t v4l2_fmt_output;
     int ret;
 
+    av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt);
+
     ret = ff_v4l2_m2m_create_context(priv, &s);
     if (ret < 0)
         return ret;
@@ -347,13 +693,17 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
     capture = &s->capture;
     output  = &s->output;
 
+    s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME);
+
     /* common settings output/capture */
     output->height = capture->height = avctx->height;
     output->width = capture->width = avctx->width;
 
     /* output context */
     output->av_codec_id = AV_CODEC_ID_RAWVIDEO;
-    output->av_pix_fmt = avctx->pix_fmt;
+    output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt :
+            avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt :
+            AV_PIX_FMT_YUV420P;
 
     /* capture context */
     capture->av_codec_id = avctx->codec_id;
@@ -372,7 +722,7 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
         v4l2_fmt_output = output->format.fmt.pix.pixelformat;
 
     pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO);
-    if (pix_fmt_output != avctx->pix_fmt) {
+    if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output);
         av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name);
         return AVERROR(EINVAL);
@@ -390,9 +740,10 @@ static av_cold int v4l2_encode_close(AVCodecContext *avctx)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 
 #define V4L_M2M_CAPTURE_OPTS \
-    V4L_M2M_DEFAULT_OPTS,\
+    { "num_output_buffers", "Number of buffers in the output context",\
+        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\
     { "num_capture_buffers", "Number of buffers in the capture context", \
-        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS }
+        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS }
 
 static const AVOption mpeg4_options[] = {
     V4L_M2M_CAPTURE_OPTS,
diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c
new file mode 100644
index 0000000000..e157d4d557
--- /dev/null
+++ b/libavcodec/v4l2_req_dmabufs.c
@@ -0,0 +1,433 @@
+/*
+    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use, copy,
+    modify, merge, publish, distribute, sublicense, and/or sell copies
+    of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdatomic.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
+
+#include "v4l2_req_dmabufs.h"
+#include "v4l2_req_utils.h"
+
+#define TRACE_ALLOC 0
+
+#ifndef __O_CLOEXEC
+#define __O_CLOEXEC 0
+#endif
+
+struct dmabufs_ctl;
+struct dmabuf_h;
+
+struct dmabuf_fns {
+    int (*buf_alloc)(struct dmabufs_ctl * dbsc, struct dmabuf_h * dh, size_t size);
+    void (*buf_free)(struct dmabuf_h * dh);
+    int (*ctl_new)(struct dmabufs_ctl * dbsc);
+    void (*ctl_free)(struct dmabufs_ctl * dbsc);
+};
+
+struct dmabufs_ctl {
+    atomic_int ref_count;
+    int fd;
+    size_t page_size;
+    void * v;
+    const struct dmabuf_fns * fns;
+};
+
+struct dmabuf_h {
+    int fd;
+    size_t size;
+    size_t len;
+    void * mapptr;
+    void * v;
+    const struct dmabuf_fns * fns;
+};
+
+#if TRACE_ALLOC
+static unsigned int total_bufs = 0;
+static size_t total_size = 0;
+#endif
+
+struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size)
+{
+    struct dmabuf_h *dh;
+
+    if (mapptr == MAP_FAILED)
+        return NULL;
+
+    dh = malloc(sizeof(*dh));
+    if (!dh)
+        return NULL;
+
+    *dh = (struct dmabuf_h) {
+        .fd = -1,
+        .size = size,
+        .mapptr = mapptr
+    };
+
+    return dh;
+}
+
+struct dmabuf_h * dmabuf_import(int fd, size_t size)
+{
+    struct dmabuf_h *dh;
+
+    fd = dup(fd);
+    if (fd < 0  || size == 0)
+        return NULL;
+
+    dh = malloc(sizeof(*dh));
+    if (!dh) {
+        close(fd);
+        return NULL;
+    }
+
+    *dh = (struct dmabuf_h) {
+        .fd = fd,
+        .size = size,
+        .mapptr = MAP_FAILED
+    };
+
+#if TRACE_ALLOC
+    ++total_bufs;
+    total_size += dh->size;
+    request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
+#endif
+
+    return dh;
+}
+
+struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size)
+{
+    struct dmabuf_h * dh;
+    if (old != NULL) {
+        if (old->size >= size) {
+            return old;
+        }
+        dmabuf_free(old);
+    }
+
+    if (size == 0 ||
+        (dh = malloc(sizeof(*dh))) == NULL)
+        return NULL;
+
+    *dh = (struct dmabuf_h){
+        .fd = -1,
+        .mapptr = MAP_FAILED,
+        .fns = dbsc->fns
+    };
+
+    if (dh->fns->buf_alloc(dbsc, dh, size) != 0)
+        goto fail;
+
+
+#if TRACE_ALLOC
+    ++total_bufs;
+    total_size += dh->size;
+    request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
+#endif
+
+    return dh;
+
+fail:
+    free(dh);
+    return NULL;
+}
+
+int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags)
+{
+    struct dma_buf_sync sync = {
+        .flags = flags
+    };
+    if (dh->fd == -1)
+        return 0;
+    while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) {
+        const int err = errno;
+        if (errno == EINTR)
+            continue;
+        request_log("%s: ioctl failed: flags=%#x\n", __func__, flags);
+        return -err;
+    }
+    return 0;
+}
+
+int dmabuf_write_start(struct dmabuf_h * const dh)
+{
+    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
+}
+
+int dmabuf_write_end(struct dmabuf_h * const dh)
+{
+    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
+}
+
+int dmabuf_read_start(struct dmabuf_h * const dh)
+{
+    if (!dmabuf_map(dh))
+        return -1;
+    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
+}
+
+int dmabuf_read_end(struct dmabuf_h * const dh)
+{
+    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
+}
+
+
+void * dmabuf_map(struct dmabuf_h * const dh)
+{
+    if (!dh)
+        return NULL;
+    if (dh->mapptr != MAP_FAILED)
+        return dh->mapptr;
+    dh->mapptr = mmap(NULL, dh->size,
+              PROT_READ | PROT_WRITE,
+              MAP_SHARED | MAP_POPULATE,
+              dh->fd, 0);
+    if (dh->mapptr == MAP_FAILED) {
+        request_log("%s: Map failed\n", __func__);
+        return NULL;
+    }
+    return dh->mapptr;
+}
+
+int dmabuf_fd(const struct dmabuf_h * const dh)
+{
+    if (!dh)
+        return -1;
+    return dh->fd;
+}
+
+size_t dmabuf_size(const struct dmabuf_h * const dh)
+{
+    if (!dh)
+        return 0;
+    return dh->size;
+}
+
+size_t dmabuf_len(const struct dmabuf_h * const dh)
+{
+    if (!dh)
+        return 0;
+    return dh->len;
+}
+
+void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len)
+{
+    dh->len = len;
+}
+
+void dmabuf_free(struct dmabuf_h * dh)
+{
+    if (!dh)
+        return;
+
+#if TRACE_ALLOC
+    --total_bufs;
+    total_size -= dh->size;
+    request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
+#endif
+
+    if (dh->fns != NULL && dh->fns->buf_free)
+        dh->fns->buf_free(dh);
+
+    if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL)
+        munmap(dh->mapptr, dh->size);
+    if (dh->fd != -1)
+        while (close(dh->fd) == -1 && errno == EINTR)
+            /* loop */;
+    free(dh);
+}
+
+static struct dmabufs_ctl * dmabufs_ctl_new2(const struct dmabuf_fns * const fns)
+{
+    struct dmabufs_ctl * dbsc = calloc(1, sizeof(*dbsc));
+
+    if (!dbsc)
+        return NULL;
+
+    dbsc->fd = -1;
+    dbsc->fns = fns;
+    dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE);
+
+    if (fns->ctl_new(dbsc) != 0)
+        goto fail;
+
+    return dbsc;
+
+fail:
+    free(dbsc);
+    return NULL;
+}
+
+static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc)
+{
+    request_debug(NULL, "Free dmabuf ctl\n");
+
+    dbsc->fns->ctl_free(dbsc);
+
+    free(dbsc);
+}
+
+void dmabufs_ctl_unref(struct dmabufs_ctl ** const pDbsc)
+{
+    struct dmabufs_ctl * const dbsc = *pDbsc;
+
+    if (!dbsc)
+        return;
+    *pDbsc = NULL;
+
+    if (atomic_fetch_sub(&dbsc->ref_count, 1) != 0)
+        return;
+
+    dmabufs_ctl_free(dbsc);
+}
+
+struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc)
+{
+    atomic_fetch_add(&dbsc->ref_count, 1);
+    return dbsc;
+}
+
+//-----------------------------------------------------------------------------
+//
+// Alloc dmabuf via CMA
+
+static int ctl_cma_new2(struct dmabufs_ctl * dbsc, const char * const * names)
+{
+    for (; *names != NULL; ++names)
+    {
+        while ((dbsc->fd = open(*names, O_RDWR | __O_CLOEXEC)) == -1 &&
+               errno == EINTR)
+            /* Loop */;
+        if (dbsc->fd != -1)
+        {
+            request_debug(NULL, "%s: Using dma_heap device %s\n", __func__, *names);
+            return 0;
+        }
+        request_debug(NULL, "%s: Not using dma_heap device %s: %s\n", __func__, *names, strerror(errno));
+    }
+    request_log("Unable to open any dma_heap device\n");
+    return -1;
+}
+
+static int ctl_cma_new(struct dmabufs_ctl * dbsc)
+{
+    static const char * const names[] = {
+        "/dev/dma_heap/linux,cma",
+        "/dev/dma_heap/reserved",
+        NULL
+    };
+
+    return ctl_cma_new2(dbsc, names);
+}
+
+static void ctl_cma_free(struct dmabufs_ctl * dbsc)
+{
+    if (dbsc->fd != -1)
+        while (close(dbsc->fd) == -1 && errno == EINTR)
+            /* loop */;
+}
+
+static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size)
+{
+    struct dma_heap_allocation_data data = {
+        .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1),
+        .fd = 0,
+        .fd_flags = O_RDWR,
+        .heap_flags = 0
+    };
+
+    while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) {
+        int err = errno;
+        request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n",
+                (uint64_t)data.len,
+                dbsc->fd,
+                err,
+                strerror(err));
+        if (err == EINTR)
+            continue;
+        return -err;
+    }
+
+    dh->fd = data.fd;
+    dh->size = (size_t)data.len;
+
+//    fprintf(stderr, "%s: size=%#zx, ftell=%#zx\n", __func__,
+//            dh->size, (size_t)lseek(dh->fd, 0, SEEK_END));
+
+    return 0;
+}
+
+static void buf_cma_free(struct dmabuf_h * dh)
+{
+    // Nothing needed
+}
+
+static const struct dmabuf_fns dmabuf_cma_fns = {
+    .buf_alloc  = buf_cma_alloc,
+    .buf_free   = buf_cma_free,
+    .ctl_new    = ctl_cma_new,
+    .ctl_free   = ctl_cma_free,
+};
+
+struct dmabufs_ctl * dmabufs_ctl_new(void)
+{
+    request_debug(NULL, "Dmabufs using CMA\n");
+    return dmabufs_ctl_new2(&dmabuf_cma_fns);
+}
+
+static int ctl_cma_new_vidbuf_cached(struct dmabufs_ctl * dbsc)
+{
+    static const char * const names[] = {
+        "/dev/dma_heap/vidbuf_cached",
+        "/dev/dma_heap/linux,cma",
+        "/dev/dma_heap/reserved",
+        NULL
+    };
+
+    return ctl_cma_new2(dbsc, names);
+}
+
+static const struct dmabuf_fns dmabuf_vidbuf_cached_fns = {
+    .buf_alloc  = buf_cma_alloc,
+    .buf_free   = buf_cma_free,
+    .ctl_new    = ctl_cma_new_vidbuf_cached,
+    .ctl_free   = ctl_cma_free,
+};
+
+struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void)
+{
+    request_debug(NULL, "Dmabufs using Vidbuf\n");
+    return dmabufs_ctl_new2(&dmabuf_vidbuf_cached_fns);
+}
+
diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h
new file mode 100644
index 0000000000..9226ab2498
--- /dev/null
+++ b/libavcodec/v4l2_req_dmabufs.h
@@ -0,0 +1,69 @@
+/*
+    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use, copy,
+    modify, merge, publish, distribute, sublicense, and/or sell copies
+    of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AVCODEC_V4L2_REQ_DMABUFS_H
+#define AVCODEC_V4L2_REQ_DMABUFS_H
+
+#include <stddef.h>
+
+struct dmabufs_ctl;
+struct dmabuf_h;
+
+struct dmabufs_ctl * dmabufs_ctl_new(void);
+struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void);
+void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc);
+struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc);
+
+// Need not preserve old contents
+// On NULL return old buffer is freed
+struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size);
+
+static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) {
+    return dmabuf_realloc(dbsc, NULL, size);
+}
+/* Create from existing fd - dups(fd) */
+struct dmabuf_h * dmabuf_import(int fd, size_t size);
+/* Import an MMAP - return NULL if mapptr = MAP_FAIL */
+struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size);
+
+void * dmabuf_map(struct dmabuf_h * const dh);
+
+/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */
+int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags);
+
+int dmabuf_write_start(struct dmabuf_h * const dh);
+int dmabuf_write_end(struct dmabuf_h * const dh);
+int dmabuf_read_start(struct dmabuf_h * const dh);
+int dmabuf_read_end(struct dmabuf_h * const dh);
+
+int dmabuf_fd(const struct dmabuf_h * const dh);
+/* Allocated size */
+size_t dmabuf_size(const struct dmabuf_h * const dh);
+/* Bytes in use */
+size_t dmabuf_len(const struct dmabuf_h * const dh);
+/* Set bytes in use */
+void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len);
+void dmabuf_free(struct dmabuf_h * dh);
+
+#endif
diff --git a/libavcodec/v4l2_req_utils.h b/libavcodec/v4l2_req_utils.h
new file mode 100644
index 0000000000..a6160c5e1c
--- /dev/null
+++ b/libavcodec/v4l2_req_utils.h
@@ -0,0 +1,51 @@
+/*
+    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use, copy,
+    modify, merge, publish, distribute, sublicense, and/or sell copies
+    of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AVCODEC_V4L2_REQ_UTILS_H
+#define AVCODEC_V4L2_REQ_UTILS_H
+
+#include <stdint.h>
+#include "libavutil/log.h"
+
+#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__)
+
+#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__)
+#define request_warn(_ctx, ...) av_log(_ctx, AV_LOG_WARNING, __VA_ARGS__)
+#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__)
+#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__)
+
+static inline char safechar(char c) {
+    return c > 0x20 && c < 0x7f ? c : '.';
+}
+
+static inline const char * strfourcc(char tbuf[5], uint32_t fcc) {
+    tbuf[0] = safechar((fcc >>  0) & 0xff);
+    tbuf[1] = safechar((fcc >>  8) & 0xff);
+    tbuf[2] = safechar((fcc >> 16) & 0xff);
+    tbuf[3] = safechar((fcc >> 24) & 0xff);
+    tbuf[4] = '\0';
+    return tbuf;
+}
+
+#endif
diff --git a/libavcodec/v4l2_request.c b/libavcodec/v4l2_request.c
new file mode 100644
index 0000000000..36eec85402
--- /dev/null
+++ b/libavcodec/v4l2_request.c
@@ -0,0 +1,784 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include <linux/media.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "libavutil/hwcontext_v4l2request_internal.h"
+#include "libavutil/mem.h"
+#include "decode.h"
+#include "internal.h"
+#include "v4l2_request.h"
+
+#define V4L2_PLANES_MAX 2
+
+static const AVClass v4l2_request_context_class = {
+    .class_name = "V4L2RequestContext",
+    .item_name  = av_default_item_name,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static inline V4L2RequestContext *v4l2_request_context(AVCodecContext *avctx)
+{
+    return (V4L2RequestContext *)avctx->internal->hwaccel_priv_data;
+}
+
+static inline uint32_t v4l2_request_frameindex(AVFrame *frame)
+{
+    return (uint32_t)(uintptr_t)frame->data[1];
+}
+
+uint64_t ff_v4l2_request_get_capture_timestamp(AVFrame *frame)
+{
+    /*
+     * The CAPTURE buffer index is used as a base for V4L2 frame reference.
+     * This works because frames are decoded into a CAPTURE buffer that is
+     * closely tied to an AVFrame.
+     */
+    struct timeval timestamp = {
+        .tv_sec = 0,
+        .tv_usec = v4l2_request_frameindex(frame) + 1,
+    };
+    return v4l2_timeval_to_ns(&timestamp);
+}
+
+int ff_v4l2_request_query_control(AVCodecContext *avctx,
+                                  struct v4l2_query_ext_ctrl *control)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+
+    if (ioctl(ctx->fctxi->video_fd, VIDIOC_QUERY_EXT_CTRL, control) < 0) {
+        int ret = AVERROR(errno);
+        // Skip error logging when driver does not support control id (EINVAL)
+        if (errno != EINVAL)
+            av_log(ctx, AV_LOG_ERROR, "Failed to query control %u: %s (%d)\n",
+                   control->id, strerror(errno), errno);
+        return ret;
+    }
+
+    return 0;
+}
+
+int ff_v4l2_request_query_control_default_value(AVCodecContext *avctx,
+                                                uint32_t id)
+{
+    struct v4l2_query_ext_ctrl control = {
+        .id = id,
+    };
+    int ret;
+
+    ret = ff_v4l2_request_query_control(avctx, &control);
+    if (ret < 0)
+        return ret;
+
+    return control.default_value;
+}
+
+static int v4l2_request_set_controls(V4L2RequestContext *ctx, int request_fd,
+                                     struct v4l2_ext_control *control, int count)
+{
+    struct v4l2_ext_controls controls = {
+        .controls = control,
+        .count = count,
+        .request_fd = request_fd,
+        .which = (request_fd >= 0) ? V4L2_CTRL_WHICH_REQUEST_VAL : 0,
+    };
+
+    if (!control || !count)
+        return 0;
+
+    if (ioctl(ctx->fctxi->video_fd, VIDIOC_S_EXT_CTRLS, &controls) < 0)
+        return AVERROR(errno);
+
+    return 0;
+}
+
+int ff_v4l2_request_set_controls(AVCodecContext *avctx,
+                                 struct v4l2_ext_control *control, int count)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+    int ret;
+
+    ret = v4l2_request_set_controls(ctx, -1, control, count);
+    if (ret < 0)
+        av_log(ctx, AV_LOG_ERROR, "Failed to set %d control(s): %s (%d)\n",
+               count, strerror(errno), errno);
+
+    return ret;
+}
+
+static int v4l2_request_queue_buffer(V4L2RequestContext *ctx,
+                                     struct v4l2_buffer *buffer)
+{
+    struct v4l2_plane planes[V4L2_PLANES_MAX] = {};
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(buffer->type)) {
+        planes[0].bytesused = buffer->bytesused;
+        buffer->bytesused = 0;
+        buffer->length = FF_ARRAY_ELEMS(planes);
+        buffer->m.planes = planes;
+    }
+
+    // Queue the buffer
+    if (ioctl(ctx->fctxi->video_fd, VIDIOC_QBUF, buffer) < 0)
+        return AVERROR(errno);
+
+    // Mark the buffer as queued
+    if (V4L2_TYPE_IS_OUTPUT(buffer->type))
+        ctx->queued_output |= 1 << buffer->index;
+    else
+        ctx->queued_capture |= 1 << buffer->index;
+
+    return 0;
+}
+
+static int v4l2_request_queue_capture_buffer(V4L2RequestContext *ctx,
+                                             uint32_t index)
+{
+    struct v4l2_buffer buffer = {
+        .index = index,
+        .type = ctx->fctxi->capture.format.type,
+        .memory = V4L2_MEMORY_MMAP,
+    };
+    return v4l2_request_queue_buffer(ctx, &buffer);
+}
+
+static int v4l2_request_queue_output_buffer(V4L2RequestContext *ctx,
+                                            V4L2RequestOutputBuffer *output,
+                                            uint32_t flags)
+{
+    struct v4l2_buffer buffer = {
+        .index = output->index,
+        .type = ctx->fctxi->output.format.type,
+        .memory = V4L2_MEMORY_MMAP,
+        .timestamp = output->timestamp,
+        .bytesused = output->bytesused,
+        .request_fd = output->request_fd,
+        .flags = V4L2_BUF_FLAG_REQUEST_FD | flags,
+    };
+    return v4l2_request_queue_buffer(ctx, &buffer);
+}
+
+static int v4l2_request_dequeue_buffer(V4L2RequestContext *ctx,
+                                       enum v4l2_buf_type type)
+{
+    struct v4l2_plane planes[V4L2_PLANES_MAX] = {};
+    struct v4l2_buffer buffer = {
+        .type = type,
+        .memory = V4L2_MEMORY_MMAP,
+    };
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(buffer.type)) {
+        buffer.length = FF_ARRAY_ELEMS(planes);
+        buffer.m.planes = planes;
+    }
+
+    // Dequeue next completed buffer
+    if (ioctl(ctx->fctxi->video_fd, VIDIOC_DQBUF, &buffer) < 0)
+        return AVERROR(errno);
+
+    // Mark the buffer as dequeued
+    if (V4L2_TYPE_IS_OUTPUT(buffer.type))
+        ctx->queued_output &= ~(1 << buffer.index);
+    else
+        ctx->queued_capture &= ~(1 << buffer.index);
+
+    return 0;
+}
+
+static inline int v4l2_request_dequeue_completed_buffers(V4L2RequestContext *ctx,
+                                                         enum v4l2_buf_type type)
+{
+    int ret;
+
+    do {
+        ret = v4l2_request_dequeue_buffer(ctx, type);
+    } while (!ret);
+
+    return ret;
+}
+
+static int v4l2_request_wait_on_capture(V4L2RequestContext *ctx, uint32_t index)
+{
+    enum v4l2_buf_type type = ctx->fctxi->capture.format.type;
+    struct pollfd pollfd = {
+        .fd = ctx->fctxi->video_fd,
+        .events = POLLIN,
+    };
+
+    ff_mutex_lock(&ctx->mutex);
+
+    // Dequeue all completed CAPTURE buffers
+    if (ctx->queued_capture)
+        v4l2_request_dequeue_completed_buffers(ctx, type);
+
+    // Wait on the specific CAPTURE buffer
+    while (ctx->queued_capture & (1 << index)) {
+        int ret = poll(&pollfd, 1, 2000);
+        if (ret <= 0)
+            goto fail;
+
+        ret = v4l2_request_dequeue_buffer(ctx, type);
+        if (ret < 0 && ret != AVERROR(EAGAIN))
+            goto fail;
+    }
+
+    ff_mutex_unlock(&ctx->mutex);
+    return 0;
+
+fail:
+    ff_mutex_unlock(&ctx->mutex);
+    av_log(ctx, AV_LOG_ERROR, "Failed waiting on CAPTURE buffer %d\n", index);
+    return AVERROR(EINVAL);
+}
+
+static V4L2RequestOutputBuffer *v4l2_request_next_output(V4L2RequestContext *ctx)
+{
+    enum v4l2_buf_type type = ctx->fctxi->output.format.type;
+    V4L2RequestOutputBuffer *output;
+    struct pollfd pollfd = {
+        .fd = ctx->fctxi->video_fd,
+        .events = POLLOUT,
+    };
+    uint8_t index;
+
+    ff_mutex_lock(&ctx->mutex);
+
+    // Use next OUTPUT buffer in the circular queue
+    index = ctx->next_output;
+    output = &ctx->output[index];
+    ctx->next_output = (index + 1) % FF_ARRAY_ELEMS(ctx->output);
+
+    // Dequeue all completed OUTPUT buffers
+    if (ctx->queued_output)
+        v4l2_request_dequeue_completed_buffers(ctx, type);
+
+    // Wait on the specific OUTPUT buffer
+    while (ctx->queued_output & (1 << output->index)) {
+        int ret = poll(&pollfd, 1, 2000);
+        if (ret <= 0)
+            goto fail;
+
+        ret = v4l2_request_dequeue_buffer(ctx, type);
+        if (ret < 0 && ret != AVERROR(EAGAIN))
+            goto fail;
+    }
+
+    ff_mutex_unlock(&ctx->mutex);
+
+    // Reset bytesused state
+    output->bytesused = 0;
+
+    return output;
+
+fail:
+    ff_mutex_unlock(&ctx->mutex);
+    av_log(ctx, AV_LOG_ERROR, "Failed waiting on OUTPUT buffer %d\n",
+           output->index);
+    return NULL;
+}
+
+static int v4l2_request_wait_on_request(V4L2RequestContext *ctx,
+                                        V4L2RequestOutputBuffer *output)
+{
+    struct pollfd pollfd = {
+        .fd = output->request_fd,
+        .events = POLLPRI,
+    };
+
+    // Wait on the specific request to complete
+    while (ctx->queued_request & (1 << output->index)) {
+        int ret = poll(&pollfd, 1, 2000);
+        if (ret <= 0)
+            break;
+
+        // Mark request as dequeued
+        if (pollfd.revents & (POLLPRI | POLLERR)) {
+            ctx->queued_request &= ~(1 << output->index);
+            break;
+        }
+    }
+
+    // Reinit the request object
+    if (ioctl(output->request_fd, MEDIA_REQUEST_IOC_REINIT) < 0) {
+        int ret = AVERROR(errno);
+        av_log(ctx, AV_LOG_ERROR, "Failed to reinit request object %d: %s (%d)\n",
+               output->request_fd, strerror(errno), errno);
+        return ret;
+    }
+
+    // Ensure request is marked as dequeued
+    ctx->queued_request &= ~(1 << output->index);
+
+    return 0;
+}
+
+int ff_v4l2_request_append_output(AVCodecContext *avctx,
+                                  V4L2RequestPictureContext *pic,
+                                  const uint8_t *data, uint32_t size)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+
+    // Append data to OUTPUT buffer and ensure there is enough space for padding
+    if (pic->output->bytesused + size + AV_INPUT_BUFFER_PADDING_SIZE <= pic->output->size) {
+        memcpy(pic->output->addr + pic->output->bytesused, data, size);
+        pic->output->bytesused += size;
+        return 0;
+    } else {
+        av_log(ctx, AV_LOG_ERROR,
+               "Failed to append %u bytes data to OUTPUT buffer %d (%u of %u used)\n",
+               size, pic->output->index, pic->output->bytesused, pic->output->size);
+        return AVERROR(ENOMEM);
+    }
+}
+
+static int v4l2_request_queue_decode(AVCodecContext *avctx,
+                                     V4L2RequestPictureContext *pic,
+                                     struct v4l2_ext_control *control, int count,
+                                     bool first_slice, bool last_slice)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+    uint32_t flags;
+    int ret;
+
+    if (first_slice) {
+        /*
+         * Wait on dequeue of the target CAPTURE buffer. Otherwise V4L2 decoder
+         * may use a different CAPTURE buffer than hwaccel expects.
+         *
+         * Normally decoding has already completed when a CAPTURE buffer is
+         * reused so this is more or less a no-op, however in some situations
+         * FFmpeg may reuse an AVFrame early, i.e. when no output frame was
+         * produced prior time, and a synchronization is necessary.
+         */
+        ret = v4l2_request_wait_on_capture(ctx, pic->capture_index);
+        if (ret < 0)
+            return ret;
+    }
+
+    ff_mutex_lock(&ctx->mutex);
+
+    /*
+     * The OUTPUT buffer tied to prior use of current request object can
+     * independently be dequeued before the full decode request has been
+     * completed. This may happen when a decoder use multi stage decoding,
+     * e.g. rpi-hevc-dec. In such case we can start reusing the OUTPUT buffer,
+     * however we must wait on the prior request to fully complete before we
+     * can reuse the request object, and a synchronization is necessary.
+     */
+    ret = v4l2_request_wait_on_request(ctx, pic->output);
+    if (ret < 0)
+        goto fail;
+
+    /*
+     * Dequeue any completed OUTPUT buffers, this is strictly not necessary,
+     * however if a synchronization was necessary for the CAPTURE and/or request
+     * there is more than likely one or more OUTPUT buffers that can be dequeued.
+     */
+    if (ctx->queued_output)
+        v4l2_request_dequeue_completed_buffers(ctx, ctx->fctxi->output.format.type);
+
+    // Set codec controls for current request
+    ret = v4l2_request_set_controls(ctx, pic->output->request_fd, control, count);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to set %d control(s) for request %d: %s (%d)\n",
+               count, pic->output->request_fd, strerror(errno), errno);
+        goto fail;
+    }
+
+    // Ensure there is zero padding at the end of bitstream data
+    memset(pic->output->addr + pic->output->bytesused, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    /*
+     * Use CAPTURE buffer index as base for V4L2 frame reference.
+     * This works because a CAPTURE buffer is closely tied to a AVFrame
+     * and FFmpeg handle all frame reference tracking for us.
+     */
+    pic->output->timestamp = (struct timeval) {
+        .tv_sec = 0,
+        .tv_usec = pic->capture_index + 1,
+    };
+
+    /*
+     * Queue the OUTPUT buffer of current request. The CAPTURE buffer may be
+     * hold by the V4L2 decoder unless this is the last slice of a frame.
+     */
+    flags = last_slice ? 0 : V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
+    ret = v4l2_request_queue_output_buffer(ctx, pic->output, flags);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to queue OUTPUT buffer %d for request %d: %s (%d)\n",
+               pic->output->index, pic->output->request_fd, strerror(errno), errno);
+        goto fail;
+    }
+
+    if (first_slice) {
+        /*
+         * Queue the target CAPTURE buffer, hwaccel expect and depend on that
+         * this specific CAPTURE buffer will be used as decode target for
+         * current request, otherwise frames may be output in wrong order or
+         * wrong CAPTURE buffer could get used as a reference frame.
+         */
+        ret = v4l2_request_queue_capture_buffer(ctx, pic->capture_index);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to queue CAPTURE buffer %d for request %d: %s (%d)\n",
+                   pic->capture_index, pic->output->request_fd, strerror(errno), errno);
+            goto fail;
+        }
+    }
+
+    // Queue current request
+    ret = ioctl(pic->output->request_fd, MEDIA_REQUEST_IOC_QUEUE);
+    if (ret < 0) {
+        ret = AVERROR(errno);
+        av_log(ctx, AV_LOG_ERROR, "Failed to queue request object %d: %s (%d)\n",
+               pic->output->request_fd, strerror(errno), errno);
+        goto fail;
+    }
+
+    // Mark current request as queued
+    ctx->queued_request |= 1 << pic->output->index;
+
+    ret = 0;
+fail:
+    ff_mutex_unlock(&ctx->mutex);
+    return ret;
+}
+
+int ff_v4l2_request_decode_slice(AVCodecContext *avctx,
+                                 V4L2RequestPictureContext *pic,
+                                 struct v4l2_ext_control *control, int count,
+                                 bool first_slice, bool last_slice)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+
+    /*
+     * Fallback to queue each slice as a full frame when holding CAPTURE
+     * buffers is not supported by the driver.
+     */
+    if ((ctx->fctxi->output.capabilities & V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF) !=
+        V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF)
+        return v4l2_request_queue_decode(avctx, pic, control, count, true, true);
+
+    return v4l2_request_queue_decode(avctx, pic, control, count,
+                                     first_slice, last_slice);
+}
+
+int ff_v4l2_request_decode_frame(AVCodecContext *avctx,
+                                 V4L2RequestPictureContext *pic,
+                                 struct v4l2_ext_control *control, int count)
+{
+    return v4l2_request_queue_decode(avctx, pic, control, count, true, true);
+}
+
+static int v4l2_request_post_process(void *logctx, AVFrame *frame)
+{
+    uint32_t index = v4l2_request_frameindex(frame);
+    FrameDecodeData *fdd = frame->private_ref;
+    V4L2RequestContext *ctx = fdd->hwaccel_priv;
+
+    // Wait on CAPTURE buffer before returning the frame to application
+    return v4l2_request_wait_on_capture(ctx, index);
+}
+
+int ff_v4l2_request_reset_picture(AVCodecContext *avctx, V4L2RequestPictureContext *pic)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+
+    // Get and wait on next OUTPUT buffer from circular queue
+    pic->output = v4l2_request_next_output(ctx);
+    if (!pic->output)
+        return AVERROR(EINVAL);
+
+    return 0;
+}
+
+int ff_v4l2_request_start_frame(AVCodecContext *avctx,
+                                V4L2RequestPictureContext *pic,
+                                AVFrame *frame)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+    uint32_t index = v4l2_request_frameindex(frame);
+    FrameDecodeData *fdd = frame->private_ref;
+    int ret;
+
+    // Get next OUTPUT buffer from circular queue
+    ret = ff_v4l2_request_reset_picture(avctx, pic);
+    if (ret)
+        return ret;
+
+    // Ensure CAPTURE buffer is dequeued before reuse
+    ret = v4l2_request_wait_on_capture(ctx, index);
+    if (ret)
+        return ret;
+
+    // Wait on CAPTURE buffer in post_process() before returning to application
+    fdd->hwaccel_priv = ctx;
+    fdd->post_process = v4l2_request_post_process;
+
+    // CAPTURE buffer used for current frame
+    pic->capture_index = index;
+
+    return 0;
+}
+
+void ff_v4l2_request_flush(AVCodecContext *avctx)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+    enum v4l2_buf_type type = ctx->fctxi->output.format.type;
+    struct pollfd pollfd = {
+        .fd = ctx->fctxi->video_fd,
+        .events = POLLOUT,
+    };
+
+    ff_mutex_lock(&ctx->mutex);
+
+    // Dequeue all completed OUTPUT buffers
+    if (ctx->queued_output)
+        v4l2_request_dequeue_completed_buffers(ctx, type);
+
+    // Wait on any remaining OUTPUT buffer
+    while (ctx->queued_output) {
+        int ret = poll(&pollfd, 1, 2000);
+        if (ret <= 0)
+            break;
+
+        ret = v4l2_request_dequeue_buffer(ctx, type);
+        if (ret < 0 && ret != AVERROR(EAGAIN))
+            break;
+    }
+
+    // Dequeue all completed CAPTURE buffers
+    if (ctx->queued_capture)
+        v4l2_request_dequeue_completed_buffers(ctx, ctx->fctxi->capture.format.type);
+
+    ff_mutex_unlock(&ctx->mutex);
+}
+
+static void v4l2_request_output_buffer_uninit(V4L2RequestOutputBuffer *output)
+{
+    // Close the request associated with the OUTPUT buffer
+    if (output->request_fd >= 0) {
+        close(output->request_fd);
+        output->request_fd = -1;
+    }
+
+    // Umap the OUTPUT buffer memory
+    if (output->addr) {
+        munmap(output->addr, output->size);
+        output->addr = NULL;
+    }
+
+    // Return the OUTPUT buffer to the frames context OUTPUT pool
+    av_buffer_unref(&output->ref);
+}
+
+static int v4l2_request_output_buffer_init(V4L2RequestContext *ctx,
+                                           V4L2RequestOutputBuffer *output)
+{
+    struct v4l2_format *format = &ctx->fctxi->output.format;
+    struct v4l2_buffer *buffer;
+    off_t offset;
+    void *addr;
+    int ret;
+
+    // Get an OUTPUT buffer from frames context OUTPUT pool
+    output->ref = av_buffer_pool_get(ctx->fctxi->output.pool);
+    if (!output->ref)
+        return AVERROR(ENOMEM);
+
+    buffer = (struct v4l2_buffer *)output->ref->data;
+    output->index = buffer->index;
+    output->size = V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
+                   format->fmt.pix_mp.plane_fmt[0].sizeimage :
+                   format->fmt.pix.sizeimage;
+    output->bytesused = 0;
+
+    // Map the OUTPUT buffer memory, raw bitstream data is written into it
+    offset = V4L2_TYPE_IS_MULTIPLANAR(buffer->type) ?
+             buffer->m.planes[0].m.mem_offset :
+             buffer->m.offset;
+    addr = mmap(NULL, output->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                ctx->fctxi->video_fd, offset);
+    if (addr == MAP_FAILED) {
+        ret = AVERROR(errno);
+        av_log(ctx, AV_LOG_ERROR, "Failed to map OUTPUT buffer %d: %s (%d)\n",
+               output->index, strerror(errno), errno);
+        goto fail;
+    }
+    output->addr = addr;
+
+    // Allocate and associated a request for the OUTPUT buffer
+    if (ioctl(ctx->fctxi->media_fd, MEDIA_IOC_REQUEST_ALLOC, &output->request_fd) < 0) {
+        ret = AVERROR(errno);
+        av_log(ctx, AV_LOG_ERROR, "Failed to allocate request for OUTPUT buffer %d: %s (%d)\n",
+               output->index, strerror(errno), errno);
+        goto fail;
+    }
+
+    return 0;
+
+fail:
+    v4l2_request_output_buffer_uninit(output);
+    return ret;
+}
+
+int ff_v4l2_request_frame_params(AVCodecContext *avctx,
+                                 AVBufferRef *hw_frames_ctx,
+                                 uint32_t pixelformat,
+                                 uint8_t bit_depth)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)hw_frames_ctx->data;
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+
+    // Set parameters used during frames context initialization
+    fctx->pixelformat = pixelformat;
+    fctx->bit_depth = bit_depth;
+    if (ctx) {
+        fctx->init_controls = ctx->init_controls;
+        fctx->nb_init_controls = ctx->nb_init_controls;
+    }
+
+    hwfc->format = AV_PIX_FMT_DRM_PRIME;
+    hwfc->sw_format = AV_PIX_FMT_NONE;
+    hwfc->width = avctx->coded_width;
+    hwfc->height = avctx->coded_height;
+
+    // Pre-allocate CAPTURE buffers to ensure CAPTURE queue can be started
+    hwfc->initial_pool_size = 1;
+
+    return 0;
+}
+
+int ff_v4l2_request_uninit(AVCodecContext *avctx)
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+    enum v4l2_buf_type type;
+
+    if (ctx->fctxi) {
+        // Flush and wait on all pending requests
+        ff_v4l2_request_flush(avctx);
+
+        // Stop streaming on OUTPUT queue
+        type = ctx->fctxi->output.format.type;
+        if (ioctl(ctx->fctxi->video_fd, VIDIOC_STREAMOFF, &type) < 0)
+            av_log(ctx, AV_LOG_WARNING, "Failed to stop OUTPUT streaming: %s (%d)\n",
+                   strerror(errno), errno);
+
+        // Stop streaming on CAPTURE queue
+        type = ctx->fctxi->capture.format.type;
+        if (ioctl(ctx->fctxi->video_fd, VIDIOC_STREAMOFF, &type) < 0)
+            av_log(ctx, AV_LOG_WARNING, "Failed to stop CAPTURE streaming: %s (%d)\n",
+                   strerror(errno), errno);
+
+        // Release OUTPUT buffers and requests
+        for (int i = 0; i < FF_ARRAY_ELEMS(ctx->output); i++)
+            v4l2_request_output_buffer_uninit(&ctx->output[i]);
+
+        ctx->fctxi = NULL;
+    }
+
+    av_buffer_unref(&ctx->frames_ref);
+    ff_mutex_destroy(&ctx->mutex);
+
+    return 0;
+}
+
+int ff_v4l2_request_init(AVCodecContext *avctx,
+                         struct v4l2_ext_control *control, int count,
+                         int (*post_frames_ctx)(AVCodecContext *avctx))
+{
+    V4L2RequestContext *ctx = v4l2_request_context(avctx);
+    AVHWFramesContext *hwfc;
+    AVV4L2RequestFramesContext *fctx;
+    enum v4l2_buf_type type;
+    int ret;
+
+    // Set initial default values
+    ctx->av_class = &v4l2_request_context_class;
+    ctx->init_controls = control;
+    ctx->nb_init_controls = count;
+    ff_mutex_init(&ctx->mutex, NULL);
+    for (int i = 0; i < FF_ARRAY_ELEMS(ctx->output); i++) {
+        ctx->output[i].index = i;
+        ctx->output[i].request_fd = -1;
+    }
+
+    // Create frames context and allocate initial CAPTURE buffers
+    ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_V4L2REQUEST);
+    if (ret < 0)
+        goto fail;
+
+    ctx->frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
+    if (!ctx->frames_ref) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    // Get internal hwctx from frames context
+    hwfc = (AVHWFramesContext *)ctx->frames_ref->data;
+    fctx = hwfc->hwctx;
+    ctx->fctxi = fctx->internal;
+
+    // Reset init controls after successful frames context initialization
+    ctx->init_controls = NULL;
+    ctx->nb_init_controls = 0;
+
+    // Check codec-specific controls, e.g. profile and level
+    if (post_frames_ctx) {
+        ret = post_frames_ctx(avctx);
+        if (ret < 0)
+            goto fail;
+    }
+
+    // Allocate OUTPUT buffers and requests for circular queue
+    for (int i = 0; i < FF_ARRAY_ELEMS(ctx->output); i++) {
+        ret = v4l2_request_output_buffer_init(ctx, &ctx->output[i]);
+        if (ret < 0)
+            goto fail;
+    }
+
+    // Start streaming on OUTPUT queue
+    type = ctx->fctxi->output.format.type;
+    if (ioctl(ctx->fctxi->video_fd, VIDIOC_STREAMON, &type) < 0) {
+        ret = AVERROR(errno);
+        av_log(ctx, AV_LOG_ERROR, "Failed to start OUTPUT streaming: %s (%d)\n",
+               strerror(errno), errno);
+        goto fail;
+    }
+
+    // Start streaming on CAPTURE queue
+    type = ctx->fctxi->capture.format.type;
+    if (ioctl(ctx->fctxi->video_fd, VIDIOC_STREAMON, &type) < 0) {
+        ret = AVERROR(errno);
+        av_log(ctx, AV_LOG_ERROR, "Failed to start CAPTURE streaming: %s (%d)\n",
+               strerror(errno), errno);
+        goto fail;
+    }
+
+    return 0;
+
+fail:
+    ff_v4l2_request_uninit(avctx);
+    return ret;
+}
diff --git a/libavcodec/v4l2_request.h b/libavcodec/v4l2_request.h
new file mode 100644
index 0000000000..53759f20bb
--- /dev/null
+++ b/libavcodec/v4l2_request.h
@@ -0,0 +1,105 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_V4L2_REQUEST_H
+#define AVCODEC_V4L2_REQUEST_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <linux/videodev2.h>
+
+#include "libavutil/buffer.h"
+#include "libavutil/log.h"
+#include "libavutil/thread.h"
+#include "avcodec.h"
+
+typedef struct AVV4L2RequestFramesContextInternal AVV4L2RequestFramesContextInternal;
+
+typedef struct V4L2RequestOutputBuffer {
+    AVBufferRef *ref;
+    uint32_t index;
+    int request_fd;
+    uint8_t *addr;
+    uint32_t size;
+    uint32_t bytesused;
+    struct timeval timestamp;
+} V4L2RequestOutputBuffer;
+
+typedef struct V4L2RequestContext {
+    const AVClass *av_class;
+    AVBufferRef *frames_ref;
+    AVV4L2RequestFramesContextInternal *fctxi;
+    AVMutex mutex;
+    V4L2RequestOutputBuffer output[4];
+    uint8_t next_output;
+    uint32_t queued_output;
+    uint32_t queued_request;
+    uint64_t queued_capture;
+    struct v4l2_ext_control *init_controls;
+    int nb_init_controls;
+} V4L2RequestContext;
+
+typedef struct V4L2RequestPictureContext {
+    V4L2RequestOutputBuffer *output;
+    uint32_t capture_index;
+} V4L2RequestPictureContext;
+
+uint64_t ff_v4l2_request_get_capture_timestamp(AVFrame *frame);
+
+int ff_v4l2_request_query_control(AVCodecContext *avctx,
+                                  struct v4l2_query_ext_ctrl *control);
+
+int ff_v4l2_request_query_control_default_value(AVCodecContext *avctx,
+                                                uint32_t id);
+
+int ff_v4l2_request_set_controls(AVCodecContext *avctx,
+                                 struct v4l2_ext_control *control, int count);
+
+int ff_v4l2_request_append_output(AVCodecContext *avctx,
+                                  V4L2RequestPictureContext *pic,
+                                  const uint8_t *data, uint32_t size);
+
+int ff_v4l2_request_decode_slice(AVCodecContext *avctx,
+                                 V4L2RequestPictureContext *pic,
+                                 struct v4l2_ext_control *control, int count,
+                                 bool first_slice, bool last_slice);
+
+int ff_v4l2_request_decode_frame(AVCodecContext *avctx,
+                                 V4L2RequestPictureContext *pic,
+                                 struct v4l2_ext_control *control, int count);
+
+int ff_v4l2_request_reset_picture(AVCodecContext *avctx,
+                                  V4L2RequestPictureContext *pic);
+
+int ff_v4l2_request_start_frame(AVCodecContext *avctx,
+                                V4L2RequestPictureContext *pic, AVFrame *frame);
+
+void ff_v4l2_request_flush(AVCodecContext *avctx);
+
+int ff_v4l2_request_frame_params(AVCodecContext *avctx,
+                                 AVBufferRef *hw_frames_ctx,
+                                 uint32_t pixelformat,
+                                 uint8_t bit_depth);
+
+int ff_v4l2_request_uninit(AVCodecContext *avctx);
+
+int ff_v4l2_request_init(AVCodecContext *avctx,
+                         struct v4l2_ext_control *control, int count,
+                         int (*post_frames_ctx)(AVCodecContext *avctx));
+
+#endif /* AVCODEC_V4L2_REQUEST_H */
diff --git a/libavcodec/v4l2_request_av1.c b/libavcodec/v4l2_request_av1.c
new file mode 100644
index 0000000000..4eabdbf31b
--- /dev/null
+++ b/libavcodec/v4l2_request_av1.c
@@ -0,0 +1,636 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/mem.h"
+#include "hwaccel_internal.h"
+#include "hwconfig.h"
+#include "av1dec.h"
+#include "internal.h"
+#include "v4l2_request.h"
+
+#define V4L2_AV1_CONTROLS_MAX 4
+
+typedef struct V4L2RequestContextAV1 {
+    V4L2RequestContext base;
+    bool has_film_grain;
+} V4L2RequestContextAV1;
+
+typedef struct V4L2RequestControlsAV1 {
+    V4L2RequestPictureContext pic;
+    struct v4l2_ctrl_av1_sequence sequence;
+    struct v4l2_ctrl_av1_frame frame;
+    struct v4l2_ctrl_av1_film_grain film_grain;
+    struct v4l2_ctrl_av1_tile_group_entry tile_group_entry;
+    struct v4l2_ctrl_av1_tile_group_entry *tile_group_entries;
+    unsigned int allocated_tile_group_entries;
+    unsigned int num_tile_group_entries;
+} V4L2RequestControlsAV1;
+
+static int get_bit_depth_from_seq(const AV1RawSequenceHeader *seq)
+{
+    if (seq->seq_profile == AV_PROFILE_AV1_PROFESSIONAL &&
+        seq->color_config.high_bitdepth)
+        return seq->color_config.twelve_bit ? 12 : 10;
+    else
+        return seq->color_config.high_bitdepth ? 10 : 8;
+}
+
+static void fill_sequence(struct v4l2_ctrl_av1_sequence *ctrl,
+                          const AV1DecContext *s)
+{
+    const AV1RawSequenceHeader *seq = s->raw_seq;
+
+    *ctrl = (struct v4l2_ctrl_av1_sequence) {
+        .seq_profile = seq->seq_profile,
+        .order_hint_bits = seq->enable_order_hint ?
+                           seq->order_hint_bits_minus_1 + 1 : 0,
+        .bit_depth = get_bit_depth_from_seq(seq),
+        .max_frame_width_minus_1 = seq->max_frame_width_minus_1,
+        .max_frame_height_minus_1 = seq->max_frame_height_minus_1,
+    };
+
+    if (seq->still_picture)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_STILL_PICTURE;
+
+    if (seq->use_128x128_superblock)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK;
+
+    if (seq->enable_filter_intra)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA;
+
+    if (seq->enable_intra_edge_filter)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER;
+
+    if (seq->enable_interintra_compound)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND;
+
+    if (seq->enable_masked_compound)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND;
+
+    if (seq->enable_warped_motion)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_WARPED_MOTION;
+
+    if (seq->enable_dual_filter)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER;
+
+    if (seq->enable_order_hint)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_ORDER_HINT;
+
+    if (seq->enable_jnt_comp)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP;
+
+    if (seq->enable_ref_frame_mvs)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_REF_FRAME_MVS;
+
+    if (seq->enable_superres)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_SUPERRES;
+
+    if (seq->enable_cdef)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF;
+
+    if (seq->enable_restoration)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_RESTORATION;
+
+    if (seq->color_config.mono_chrome)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_MONO_CHROME;
+
+    if (seq->color_config.color_range)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_COLOR_RANGE;
+
+    if (seq->color_config.subsampling_x)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_X;
+
+    if (seq->color_config.subsampling_y)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_Y;
+
+    if (seq->film_grain_params_present)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_FILM_GRAIN_PARAMS_PRESENT;
+
+    if (seq->color_config.separate_uv_delta_q)
+        ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_SEPARATE_UV_DELTA_Q;
+}
+
+static void fill_frame(struct v4l2_ctrl_av1_frame *ctrl, const AV1DecContext *s)
+{
+    const AV1RawFrameHeader *frame_header = s->raw_frame_header;
+    const CodedBitstreamAV1Context *cbctx = s->cbc->priv_data;
+    uint8_t remap_lr_type[4] = {
+        V4L2_AV1_FRAME_RESTORE_NONE,
+        V4L2_AV1_FRAME_RESTORE_SWITCHABLE,
+        V4L2_AV1_FRAME_RESTORE_WIENER,
+        V4L2_AV1_FRAME_RESTORE_SGRPROJ,
+    };
+    int i, j;
+
+    *ctrl = (struct v4l2_ctrl_av1_frame) {
+        .tile_info = {
+            .context_update_tile_id = frame_header->context_update_tile_id,
+            .tile_cols = frame_header->tile_cols,
+            .tile_rows = frame_header->tile_rows,
+            .tile_size_bytes = frame_header->tile_cols_log2 > 0 ||
+                               frame_header->tile_rows_log2 > 0 ?
+                               frame_header->tile_size_bytes_minus1 + 1 : 0,
+        },
+
+        .quantization = {
+            .base_q_idx = frame_header->base_q_idx,
+            .delta_q_y_dc = frame_header->delta_q_y_dc,
+            .delta_q_u_dc = frame_header->delta_q_u_dc,
+            .delta_q_u_ac = frame_header->delta_q_u_ac,
+            .delta_q_v_dc = frame_header->delta_q_v_dc,
+            .delta_q_v_ac = frame_header->delta_q_v_ac,
+            .qm_y = frame_header->qm_y,
+            .qm_u = frame_header->qm_u,
+            .qm_v = frame_header->qm_v,
+            .delta_q_res = frame_header->delta_q_res,
+        },
+
+        .loop_filter = {
+            .level[0] = frame_header->loop_filter_level[0],
+            .level[1] = frame_header->loop_filter_level[1],
+            .level[2] = frame_header->loop_filter_level[2],
+            .level[3] = frame_header->loop_filter_level[3],
+            .sharpness = frame_header->loop_filter_sharpness,
+            .mode_deltas[0] = frame_header->loop_filter_mode_deltas[0],
+            .mode_deltas[1] = frame_header->loop_filter_mode_deltas[1],
+            .delta_lf_res = frame_header->delta_lf_res,
+        },
+
+        .cdef = {
+            .damping_minus_3 = frame_header->cdef_damping_minus_3,
+            .bits = frame_header->cdef_bits,
+        },
+
+        .loop_restoration = {
+            .lr_unit_shift = frame_header->lr_unit_shift,
+            .lr_uv_shift = frame_header->lr_uv_shift,
+        },
+
+        .superres_denom = frame_header->use_superres ?
+                          frame_header->coded_denom + AV1_SUPERRES_DENOM_MIN :
+                          AV1_SUPERRES_NUM,
+        .skip_mode_frame[0] = frame_header->skip_mode_present ?
+                              s->cur_frame.skip_mode_frame_idx[0] : 0,
+        .skip_mode_frame[1] = frame_header->skip_mode_present ?
+                              s->cur_frame.skip_mode_frame_idx[1] : 0,
+        .primary_ref_frame = frame_header->primary_ref_frame,
+        .frame_type = frame_header->frame_type,
+        .order_hint = frame_header->order_hint,
+        .upscaled_width = cbctx->upscaled_width,
+        .interpolation_filter = frame_header->interpolation_filter,
+        .tx_mode = frame_header->tx_mode,
+        .frame_width_minus_1 = cbctx->frame_width - 1,
+        .frame_height_minus_1 = cbctx->frame_height - 1,
+        .render_width_minus_1 = cbctx->render_width - 1,
+        .render_height_minus_1 = cbctx->render_height - 1,
+        .current_frame_id = frame_header->current_frame_id,
+        .refresh_frame_flags = frame_header->refresh_frame_flags,
+    };
+
+    if (frame_header->segmentation_enabled)
+        ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_ENABLED;
+
+    if (frame_header->segmentation_update_map)
+        ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP;
+
+    if (frame_header->segmentation_temporal_update)
+        ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE;
+
+    if (frame_header->segmentation_update_data)
+        ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_UPDATE_DATA;
+
+    for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
+        for (j = 0; j < AV1_SEG_LVL_MAX; j++) {
+            if (frame_header->feature_enabled[i][j]) {
+                ctrl->segmentation.feature_enabled[i] |= V4L2_AV1_SEGMENT_FEATURE_ENABLED(j);
+                ctrl->segmentation.last_active_seg_id = i;
+                if (j >= AV1_SEG_LVL_REF_FRAME)
+                    ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_SEG_ID_PRE_SKIP;
+            }
+            ctrl->segmentation.feature_data[i][j] = frame_header->feature_value[i][j];
+        }
+    }
+
+    if (frame_header->uniform_tile_spacing_flag)
+        ctrl->tile_info.flags |= V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING;
+
+    for (i = 0; i < frame_header->tile_cols; i++) {
+        ctrl->tile_info.mi_col_starts[i] = frame_header->tile_start_col_sb[i];
+        ctrl->tile_info.width_in_sbs_minus_1[i] = frame_header->width_in_sbs_minus_1[i];
+    }
+    ctrl->tile_info.mi_col_starts[i] = 2 * ((cbctx->frame_width + 7) >> 3);
+
+    for (i = 0; i < frame_header->tile_rows; i++) {
+        ctrl->tile_info.mi_row_starts[i] = frame_header->tile_start_row_sb[i];
+        ctrl->tile_info.height_in_sbs_minus_1[i] = frame_header->height_in_sbs_minus_1[i];
+    }
+    ctrl->tile_info.mi_row_starts[i] = 2 * ((cbctx->frame_height + 7) >> 3);
+
+    if (frame_header->diff_uv_delta)
+        ctrl->quantization.flags |= V4L2_AV1_QUANTIZATION_FLAG_DIFF_UV_DELTA;
+
+    if (frame_header->using_qmatrix)
+        ctrl->quantization.flags |= V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX;
+
+    if (frame_header->delta_q_present)
+        ctrl->quantization.flags |= V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT;
+
+    if (frame_header->loop_filter_delta_enabled)
+        ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED;
+
+    if (frame_header->loop_filter_delta_update)
+        ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_UPDATE;
+
+    if (frame_header->delta_lf_present)
+        ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT;
+
+    if (frame_header->delta_lf_multi)
+        ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI;
+
+    for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
+        ctrl->loop_filter.ref_deltas[i] = frame_header->loop_filter_ref_deltas[i];
+    }
+
+    for (i = 0; i < cbctx->num_planes; i++) {
+        ctrl->loop_restoration.frame_restoration_type[i] =
+                                        remap_lr_type[frame_header->lr_type[i]];
+        if (frame_header->lr_type[i] != AV1_RESTORE_NONE) {
+            ctrl->loop_restoration.flags |= V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR;
+            if (i > 0)
+                ctrl->loop_restoration.flags |= V4L2_AV1_LOOP_RESTORATION_FLAG_USES_CHROMA_LR;
+        }
+    }
+    if (ctrl->loop_restoration.flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
+        ctrl->loop_restoration.loop_restoration_size[0] =
+            1 << (6 + frame_header->lr_unit_shift);
+        ctrl->loop_restoration.loop_restoration_size[1] =
+            1 << (6 + frame_header->lr_unit_shift - frame_header->lr_uv_shift);
+        ctrl->loop_restoration.loop_restoration_size[2] =
+            1 << (6 + frame_header->lr_unit_shift - frame_header->lr_uv_shift);
+    }
+
+    for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
+        ctrl->global_motion.type[i] = s->cur_frame.gm_type[i];
+        for (j = 0; j < 6; ++j) {
+            ctrl->global_motion.params[i][j] = s->cur_frame.gm_params[i][j];
+            if (s->cur_frame.gm_invalid[i])
+                ctrl->global_motion.invalid |= V4L2_AV1_GLOBAL_MOTION_IS_INVALID(i);
+        }
+
+        if (frame_header->is_global[i])
+            ctrl->global_motion.flags[i] |= V4L2_AV1_GLOBAL_MOTION_FLAG_IS_GLOBAL;
+
+        if (frame_header->is_rot_zoom[i])
+            ctrl->global_motion.flags[i] |= V4L2_AV1_GLOBAL_MOTION_FLAG_IS_ROT_ZOOM;
+
+        if (frame_header->is_translation[i])
+            ctrl->global_motion.flags[i] |= V4L2_AV1_GLOBAL_MOTION_FLAG_IS_TRANSLATION;
+    }
+
+    for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
+        AVFrame *ref = s->ref[i].f;
+
+        ctrl->order_hints[i] = s->cur_frame.order_hints[i];
+        if (ref)
+            ctrl->reference_frame_ts[i] = ff_v4l2_request_get_capture_timestamp(ref);
+        if (i < AV1_REFS_PER_FRAME)
+            ctrl->ref_frame_idx[i] = frame_header->ref_frame_idx[i];
+    }
+
+    for (i = 0; i < (1 << frame_header->cdef_bits); i++) {
+        ctrl->cdef.y_pri_strength[i] = frame_header->cdef_y_pri_strength[i];
+        ctrl->cdef.y_sec_strength[i] = frame_header->cdef_y_sec_strength[i];
+        ctrl->cdef.uv_pri_strength[i] = frame_header->cdef_uv_pri_strength[i];
+        ctrl->cdef.uv_sec_strength[i] = frame_header->cdef_uv_sec_strength[i];
+    }
+
+    if (frame_header->show_frame)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_SHOW_FRAME;
+
+    if (frame_header->showable_frame)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_SHOWABLE_FRAME;
+
+    if (frame_header->error_resilient_mode)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE;
+
+    if (frame_header->disable_cdf_update)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE;
+
+    if (frame_header->allow_screen_content_tools)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS;
+
+    if (s->cur_frame.force_integer_mv)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV;
+
+    if (frame_header->allow_intrabc)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC;
+
+    if (frame_header->use_superres)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_USE_SUPERRES;
+
+    if (frame_header->allow_high_precision_mv)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV;
+
+    if (frame_header->is_motion_mode_switchable)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE;
+
+    if (frame_header->use_ref_frame_mvs)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS;
+
+    if (frame_header->disable_frame_end_update_cdf)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF;
+
+    if (frame_header->allow_warped_motion)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION;
+
+    if (frame_header->reference_select)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT;
+
+    if (frame_header->reduced_tx_set)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET;
+
+    if (frame_header->skip_mode_present) {
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_SKIP_MODE_ALLOWED; // FIXME
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT;
+    }
+
+    if (frame_header->frame_size_override_flag)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_FRAME_SIZE_OVERRIDE;
+
+    if (frame_header->buffer_removal_time_present_flag) {
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_BUFFER_REMOVAL_TIME_PRESENT;
+        for (i = 0; i < AV1_MAX_OPERATING_POINTS; i++)
+            ctrl->buffer_removal_time[i] = frame_header->buffer_removal_time[i];
+    }
+
+    if (frame_header->frame_refs_short_signaling)
+        ctrl->flags |= V4L2_AV1_FRAME_FLAG_FRAME_REFS_SHORT_SIGNALING;
+}
+
+static void fill_film_grain(struct v4l2_ctrl_av1_film_grain *ctrl,
+                            const AV1DecContext *s)
+{
+    const AV1RawFilmGrainParams *film_grain = &s->cur_frame.film_grain;
+    int i;
+
+    *ctrl = (struct v4l2_ctrl_av1_film_grain) {
+        .cr_mult = film_grain->cr_mult,
+        .grain_seed = film_grain->grain_seed,
+        .film_grain_params_ref_idx = film_grain->film_grain_params_ref_idx,
+        .num_y_points = film_grain->num_y_points,
+        .num_cb_points = film_grain->num_cb_points,
+        .num_cr_points = film_grain->num_cr_points,
+        .grain_scaling_minus_8 = film_grain->grain_scaling_minus_8,
+        .ar_coeff_lag = film_grain->ar_coeff_lag,
+        .ar_coeff_shift_minus_6 = film_grain->ar_coeff_shift_minus_6,
+        .grain_scale_shift = film_grain->grain_scale_shift,
+        .cb_mult = film_grain->cb_mult,
+        .cb_luma_mult = film_grain->cb_luma_mult,
+        .cr_luma_mult = film_grain->cr_luma_mult,
+        .cb_offset = film_grain->cb_offset,
+        .cr_offset = film_grain->cr_offset,
+    };
+
+    if (film_grain->apply_grain)
+        ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN;
+
+    if (film_grain->update_grain)
+        ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_UPDATE_GRAIN;
+
+    if (film_grain->chroma_scaling_from_luma)
+        ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA;
+
+    if (film_grain->overlap_flag)
+        ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP;
+
+    if (film_grain->clip_to_restricted_range)
+        ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE;
+
+    if (!film_grain->apply_grain)
+        return;
+
+    for (i = 0; i < film_grain->num_y_points; i++) {
+        ctrl->point_y_value[i] = film_grain->point_y_value[i];
+        ctrl->point_y_scaling[i] = film_grain->point_y_scaling[i];
+    }
+
+    for (i = 0; i < film_grain->num_cb_points; i++) {
+        ctrl->point_cb_value[i] = film_grain->point_cb_value[i];
+        ctrl->point_cb_scaling[i] = film_grain->point_cb_scaling[i];
+    }
+
+    for (i = 0; i < film_grain->num_cr_points; i++) {
+        ctrl->point_cr_value[i] = film_grain->point_cr_value[i];
+        ctrl->point_cr_scaling[i] = film_grain->point_cr_scaling[i];
+    }
+
+    for (i = 0; i < 24; i++) {
+        ctrl->ar_coeffs_y_plus_128[i] = film_grain->ar_coeffs_y_plus_128[i];
+    }
+
+    for (i = 0; i < 25; i++) {
+        ctrl->ar_coeffs_cb_plus_128[i] = film_grain->ar_coeffs_cb_plus_128[i];
+        ctrl->ar_coeffs_cr_plus_128[i] = film_grain->ar_coeffs_cr_plus_128[i];
+    }
+}
+
+static int v4l2_request_av1_start_frame(AVCodecContext *avctx,
+                                        av_unused const AVBufferRef *buf_ref,
+                                        av_unused const uint8_t *buffer,
+                                        av_unused uint32_t size)
+{
+    const AV1DecContext *s = avctx->priv_data;
+    V4L2RequestContextAV1 *ctx = avctx->internal->hwaccel_priv_data;
+    V4L2RequestControlsAV1 *controls = s->cur_frame.hwaccel_picture_private;
+    int ret;
+
+    ret = ff_v4l2_request_start_frame(avctx, &controls->pic, s->cur_frame.f);
+    if (ret)
+        return ret;
+
+    fill_sequence(&controls->sequence, s);
+    fill_frame(&controls->frame, s);
+
+    if (ctx->has_film_grain)
+        fill_film_grain(&controls->film_grain, s);
+
+    controls->tile_group_entries = &controls->tile_group_entry;
+    controls->allocated_tile_group_entries = 0;
+    controls->num_tile_group_entries = 0;
+
+    return 0;
+}
+
+static int v4l2_request_av1_decode_slice(AVCodecContext *avctx,
+                                         const uint8_t *buffer, uint32_t size)
+{
+    const AV1DecContext *s = avctx->priv_data;
+    const AV1RawFrameHeader *fh = s->raw_frame_header;
+    V4L2RequestControlsAV1 *controls = s->cur_frame.hwaccel_picture_private;
+
+    controls->num_tile_group_entries = fh->tile_cols * fh->tile_rows;
+    if (controls->num_tile_group_entries > V4L2_AV1_MAX_TILE_COUNT)
+        return AVERROR(EINVAL);
+
+    if (controls->num_tile_group_entries > 1 &&
+        controls->num_tile_group_entries > controls->allocated_tile_group_entries) {
+        struct v4l2_ctrl_av1_tile_group_entry *tile_group_entries;
+
+        tile_group_entries = av_realloc_array(controls->allocated_tile_group_entries ?
+                                              controls->tile_group_entries : NULL,
+                                              controls->num_tile_group_entries,
+                                              sizeof(*controls->tile_group_entries));
+        if (!tile_group_entries)
+            return AVERROR(ENOMEM);
+
+        if (!controls->allocated_tile_group_entries)
+            memcpy(tile_group_entries, controls->tile_group_entries,
+                   sizeof(*controls->tile_group_entries));
+
+        controls->tile_group_entries = tile_group_entries;
+        controls->allocated_tile_group_entries = controls->num_tile_group_entries;
+    }
+
+    for (int i = 0; i < controls->num_tile_group_entries; i++) {
+        controls->tile_group_entries[i] = (struct v4l2_ctrl_av1_tile_group_entry) {
+            .tile_offset = controls->pic.output->bytesused +
+                           s->tile_group_info[i].tile_offset,
+            .tile_size   = s->tile_group_info[i].tile_size,
+            .tile_row    = s->tile_group_info[i].tile_row,
+            .tile_col    = s->tile_group_info[i].tile_column,
+        };
+    }
+
+    return ff_v4l2_request_append_output(avctx, &controls->pic, buffer, size);
+}
+
+static int v4l2_request_av1_end_frame(AVCodecContext *avctx)
+{
+    const AV1DecContext *s = avctx->priv_data;
+    V4L2RequestContextAV1 *ctx = avctx->internal->hwaccel_priv_data;
+    V4L2RequestControlsAV1 *controls = s->cur_frame.hwaccel_picture_private;
+    int count = 0;
+
+    struct v4l2_ext_control control[V4L2_AV1_CONTROLS_MAX] = {};
+
+    control[count++] = (struct v4l2_ext_control) {
+        .id = V4L2_CID_STATELESS_AV1_SEQUENCE,
+        .ptr = &controls->sequence,
+        .size = sizeof(controls->sequence),
+    };
+
+    control[count++] = (struct v4l2_ext_control) {
+        .id = V4L2_CID_STATELESS_AV1_FRAME,
+        .ptr = &controls->frame,
+        .size = sizeof(controls->frame),
+    };
+
+    control[count++] = (struct v4l2_ext_control) {
+        .id = V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY,
+        .ptr = controls->tile_group_entries,
+        .size = sizeof(*controls->tile_group_entries) *
+                FFMAX(controls->num_tile_group_entries, 1),
+    };
+
+    if (ctx->has_film_grain) {
+        control[count++] = (struct v4l2_ext_control) {
+            .id = V4L2_CID_STATELESS_AV1_FILM_GRAIN,
+            .ptr = &controls->film_grain,
+            .size = sizeof(controls->film_grain),
+        };
+    }
+
+    return ff_v4l2_request_decode_frame(avctx, &controls->pic, control, count);
+}
+
+static void v4l2_request_av1_free_frame_priv(AVRefStructOpaque hwctx, void *data)
+{
+    V4L2RequestControlsAV1 *controls = data;
+
+    if (controls->allocated_tile_group_entries)
+        av_freep(&controls->tile_group_entries);
+}
+
+static int v4l2_request_av1_post_frames_ctx(AVCodecContext *avctx)
+{
+    V4L2RequestContextAV1 *ctx = avctx->internal->hwaccel_priv_data;
+
+    struct v4l2_query_ext_ctrl film_grain = {
+        .id = V4L2_CID_STATELESS_AV1_FILM_GRAIN,
+    };
+
+    // TODO: check V4L2_CID_MPEG_VIDEO_AV1_PROFILE
+    // TODO: check V4L2_CID_MPEG_VIDEO_AV1_LEVEL
+
+    if (!ff_v4l2_request_query_control(avctx, &film_grain))
+        ctx->has_film_grain = true;
+    else
+        ctx->has_film_grain = false;
+
+    return 0;
+}
+
+static int v4l2_request_av1_init(AVCodecContext *avctx)
+{
+    const AV1DecContext *s = avctx->priv_data;
+    struct v4l2_ctrl_av1_sequence sequence;
+
+    struct v4l2_ext_control control[] = {
+        {
+            .id = V4L2_CID_STATELESS_AV1_SEQUENCE,
+            .ptr = &sequence,
+            .size = sizeof(sequence),
+        },
+    };
+
+    fill_sequence(&sequence, s);
+
+    return ff_v4l2_request_init(avctx, control, FF_ARRAY_ELEMS(control),
+                                v4l2_request_av1_post_frames_ctx);
+}
+
+static int v4l2_request_av1_frame_params(AVCodecContext *avctx,
+                                         AVBufferRef *hw_frames_ctx)
+{
+    const AV1DecContext *s = avctx->priv_data;
+    const AV1RawSequenceHeader *seq = s ? s->raw_seq : NULL;
+    uint8_t bit_depth = seq ? get_bit_depth_from_seq(seq) : 0;
+
+    return ff_v4l2_request_frame_params(avctx, hw_frames_ctx,
+                                        V4L2_PIX_FMT_AV1_FRAME, bit_depth);
+}
+
+const FFHWAccel ff_av1_v4l2request_hwaccel = {
+    .p.name             = "av1_v4l2request",
+    .p.type             = AVMEDIA_TYPE_VIDEO,
+    .p.id               = AV_CODEC_ID_AV1,
+    .p.pix_fmt          = AV_PIX_FMT_DRM_PRIME,
+    .start_frame        = v4l2_request_av1_start_frame,
+    .decode_slice       = v4l2_request_av1_decode_slice,
+    .end_frame          = v4l2_request_av1_end_frame,
+    .flush              = ff_v4l2_request_flush,
+    .free_frame_priv    = v4l2_request_av1_free_frame_priv,
+    .frame_priv_data_size = sizeof(V4L2RequestControlsAV1),
+    .init               = v4l2_request_av1_init,
+    .uninit             = ff_v4l2_request_uninit,
+    .priv_data_size     = sizeof(V4L2RequestContextAV1),
+    .frame_params       = v4l2_request_av1_frame_params,
+    .caps_internal      = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/libavcodec/v4l2_request_h264.c b/libavcodec/v4l2_request_h264.c
new file mode 100644
index 0000000000..5abf631090
--- /dev/null
+++ b/libavcodec/v4l2_request_h264.c
@@ -0,0 +1,532 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "h264dec.h"
+#include "hwaccel_internal.h"
+#include "hwconfig.h"
+#include "internal.h"
+#include "v4l2_request.h"
+
+typedef struct V4L2RequestContextH264 {
+    V4L2RequestContext base;
+    enum v4l2_stateless_h264_decode_mode decode_mode;
+    enum v4l2_stateless_h264_start_code start_code;
+} V4L2RequestContextH264;
+
+typedef struct V4L2RequestControlsH264 {
+    V4L2RequestPictureContext pic;
+    struct v4l2_ctrl_h264_sps sps;
+    struct v4l2_ctrl_h264_pps pps;
+    struct v4l2_ctrl_h264_scaling_matrix scaling_matrix;
+    struct v4l2_ctrl_h264_decode_params decode_params;
+    struct v4l2_ctrl_h264_slice_params slice_params;
+    struct v4l2_ctrl_h264_pred_weights pred_weights;
+    bool pred_weights_required;
+    bool first_slice;
+    int num_slices;
+} V4L2RequestControlsH264;
+
+static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 };
+
+static void fill_weight_factors(struct v4l2_h264_weight_factors *weight_factors,
+                                int list, const H264SliceContext *sl)
+{
+    for (int i = 0; i < sl->ref_count[list]; i++) {
+        if (sl->pwt.luma_weight_flag[list]) {
+            weight_factors->luma_weight[i] = sl->pwt.luma_weight[i][list][0];
+            weight_factors->luma_offset[i] = sl->pwt.luma_weight[i][list][1];
+        } else {
+            weight_factors->luma_weight[i] = 1 << sl->pwt.luma_log2_weight_denom;
+            weight_factors->luma_offset[i] = 0;
+        }
+        for (int j = 0; j < 2; j++) {
+            if (sl->pwt.chroma_weight_flag[list]) {
+                weight_factors->chroma_weight[i][j] = sl->pwt.chroma_weight[i][list][j][0];
+                weight_factors->chroma_offset[i][j] = sl->pwt.chroma_weight[i][list][j][1];
+            } else {
+                weight_factors->chroma_weight[i][j] = 1 << sl->pwt.chroma_log2_weight_denom;
+                weight_factors->chroma_offset[i][j] = 0;
+            }
+        }
+    }
+}
+
+static void fill_dpb_entry(struct v4l2_h264_dpb_entry *entry,
+                           const H264Picture *pic, int long_idx)
+{
+    entry->reference_ts = ff_v4l2_request_get_capture_timestamp(pic->f);
+    entry->pic_num = pic->pic_id;
+    entry->frame_num = pic->long_ref ? long_idx : pic->frame_num;
+    entry->fields = pic->reference & V4L2_H264_FRAME_REF;
+    entry->flags = V4L2_H264_DPB_ENTRY_FLAG_VALID;
+    if (entry->fields)
+        entry->flags |= V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;
+    if (pic->long_ref)
+        entry->flags |= V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM;
+    if (pic->field_picture)
+        entry->flags |= V4L2_H264_DPB_ENTRY_FLAG_FIELD;
+    if (pic->field_poc[0] != INT_MAX)
+        entry->top_field_order_cnt = pic->field_poc[0];
+    if (pic->field_poc[1] != INT_MAX)
+        entry->bottom_field_order_cnt = pic->field_poc[1];
+}
+
+static void fill_dpb(struct v4l2_ctrl_h264_decode_params *decode_params,
+                     const H264Context *h)
+{
+    int entries = 0;
+
+    for (int i = 0; i < h->short_ref_count; i++) {
+        const H264Picture *pic = h->short_ref[i];
+        if (pic && (pic->field_poc[0] != INT_MAX || pic->field_poc[1] != INT_MAX))
+            fill_dpb_entry(&decode_params->dpb[entries++], pic, pic->pic_id);
+    }
+
+    if (!h->long_ref_count)
+        return;
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(h->long_ref); i++) {
+        const H264Picture *pic = h->long_ref[i];
+        if (pic && (pic->field_poc[0] != INT_MAX || pic->field_poc[1] != INT_MAX))
+            fill_dpb_entry(&decode_params->dpb[entries++], pic, i);
+    }
+}
+
+static void fill_ref_list(struct v4l2_h264_reference *reference,
+                          struct v4l2_ctrl_h264_decode_params *decode_params,
+                          const H264Ref *ref)
+{
+    uint64_t timestamp;
+
+    if (!ref->parent)
+        return;
+
+    timestamp = ff_v4l2_request_get_capture_timestamp(ref->parent->f);
+
+    for (uint8_t i = 0; i < FF_ARRAY_ELEMS(decode_params->dpb); i++) {
+        struct v4l2_h264_dpb_entry *entry = &decode_params->dpb[i];
+        if ((entry->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID) &&
+            entry->reference_ts == timestamp) {
+            reference->fields = ref->reference & V4L2_H264_FRAME_REF;
+            reference->index = i;
+            return;
+        }
+    }
+}
+
+static void fill_sps(struct v4l2_ctrl_h264_sps *ctrl, const H264Context *h)
+{
+    const SPS *sps = h->ps.sps;
+
+    *ctrl = (struct v4l2_ctrl_h264_sps) {
+        .profile_idc = sps->profile_idc,
+        .constraint_set_flags = sps->constraint_set_flags,
+        .level_idc = sps->level_idc,
+        .seq_parameter_set_id = sps->sps_id,
+        .chroma_format_idc = sps->chroma_format_idc,
+        .bit_depth_luma_minus8 = sps->bit_depth_luma - 8,
+        .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8,
+        .log2_max_frame_num_minus4 = sps->log2_max_frame_num - 4,
+        .pic_order_cnt_type = sps->poc_type,
+        .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4,
+        .max_num_ref_frames = sps->ref_frame_count,
+        .num_ref_frames_in_pic_order_cnt_cycle = sps->poc_cycle_length,
+        .offset_for_non_ref_pic = sps->offset_for_non_ref_pic,
+        .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field,
+        .pic_width_in_mbs_minus1 = h->mb_width - 1,
+        .pic_height_in_map_units_minus1 = sps->frame_mbs_only_flag ?
+                                          h->mb_height - 1 : h->mb_height / 2 - 1,
+    };
+
+    if (sps->poc_cycle_length > 0 && sps->poc_cycle_length <= 255)
+        memcpy(ctrl->offset_for_ref_frame, sps->offset_for_ref_frame,
+               sps->poc_cycle_length * sizeof(ctrl->offset_for_ref_frame[0]));
+
+    if (sps->residual_color_transform_flag)
+        ctrl->flags |= V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE;
+
+    if (sps->transform_bypass)
+        ctrl->flags |= V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS;
+
+    if (sps->delta_pic_order_always_zero_flag)
+        ctrl->flags |= V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO;
+
+    if (sps->gaps_in_frame_num_allowed_flag)
+        ctrl->flags |= V4L2_H264_SPS_FLAG_GAPS_IN_FRAME_NUM_VALUE_ALLOWED;
+
+    if (sps->frame_mbs_only_flag)
+        ctrl->flags |= V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY;
+
+    if (sps->mb_aff)
+        ctrl->flags |= V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD;
+
+    if (sps->direct_8x8_inference_flag)
+        ctrl->flags |= V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE;
+}
+
+static void fill_pps(struct v4l2_ctrl_h264_pps *ctrl, const H264Context *h)
+{
+    const SPS *sps = h->ps.sps;
+    const PPS *pps = h->ps.pps;
+    const H264SliceContext *sl = &h->slice_ctx[0];
+    int qp_bd_offset = 6 * (sps->bit_depth_luma - 8);
+
+    *ctrl = (struct v4l2_ctrl_h264_pps) {
+        .pic_parameter_set_id = sl->pps_id,
+        .seq_parameter_set_id = pps->sps_id,
+        .num_slice_groups_minus1 = pps->slice_group_count - 1,
+        .num_ref_idx_l0_default_active_minus1 = pps->ref_count[0] - 1,
+        .num_ref_idx_l1_default_active_minus1 = pps->ref_count[1] - 1,
+        .weighted_bipred_idc = pps->weighted_bipred_idc,
+        .pic_init_qp_minus26 = pps->init_qp - 26 - qp_bd_offset,
+        .pic_init_qs_minus26 = pps->init_qs - 26 - qp_bd_offset,
+        .chroma_qp_index_offset = pps->chroma_qp_index_offset[0],
+        .second_chroma_qp_index_offset = pps->chroma_qp_index_offset[1],
+    };
+
+    if (pps->cabac)
+        ctrl->flags |= V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE;
+
+    if (pps->pic_order_present)
+        ctrl->flags |= V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT;
+
+    if (pps->weighted_pred)
+        ctrl->flags |= V4L2_H264_PPS_FLAG_WEIGHTED_PRED;
+
+    if (pps->deblocking_filter_parameters_present)
+        ctrl->flags |= V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT;
+
+    if (pps->constrained_intra_pred)
+        ctrl->flags |= V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED;
+
+    if (pps->redundant_pic_cnt_present)
+        ctrl->flags |= V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT;
+
+    if (pps->transform_8x8_mode)
+        ctrl->flags |= V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE;
+
+    /* FFmpeg always provide a scaling matrix */
+    ctrl->flags |= V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT;
+}
+
+static int v4l2_request_h264_start_frame(AVCodecContext *avctx,
+                                         av_unused const AVBufferRef *buf_ref,
+                                         av_unused const uint8_t *buffer,
+                                         av_unused uint32_t size)
+{
+    const H264Context *h = avctx->priv_data;
+    const PPS *pps = h->ps.pps;
+    const SPS *sps = h->ps.sps;
+    const H264SliceContext *sl = &h->slice_ctx[0];
+    V4L2RequestControlsH264 *controls = h->cur_pic_ptr->hwaccel_picture_private;
+    int ret;
+
+    ret = ff_v4l2_request_start_frame(avctx, &controls->pic, h->cur_pic_ptr->f);
+    if (ret)
+        return ret;
+
+    fill_sps(&controls->sps, h);
+    fill_pps(&controls->pps, h);
+
+    memcpy(controls->scaling_matrix.scaling_list_4x4, pps->scaling_matrix4,
+           sizeof(controls->scaling_matrix.scaling_list_4x4));
+    memcpy(controls->scaling_matrix.scaling_list_8x8[0], pps->scaling_matrix8[0],
+           sizeof(controls->scaling_matrix.scaling_list_8x8[0]));
+    memcpy(controls->scaling_matrix.scaling_list_8x8[1], pps->scaling_matrix8[3],
+           sizeof(controls->scaling_matrix.scaling_list_8x8[1]));
+
+    if (sps->chroma_format_idc == 3) {
+        memcpy(controls->scaling_matrix.scaling_list_8x8[2], pps->scaling_matrix8[1],
+               sizeof(controls->scaling_matrix.scaling_list_8x8[2]));
+        memcpy(controls->scaling_matrix.scaling_list_8x8[3], pps->scaling_matrix8[4],
+               sizeof(controls->scaling_matrix.scaling_list_8x8[3]));
+        memcpy(controls->scaling_matrix.scaling_list_8x8[4], pps->scaling_matrix8[2],
+               sizeof(controls->scaling_matrix.scaling_list_8x8[4]));
+        memcpy(controls->scaling_matrix.scaling_list_8x8[5], pps->scaling_matrix8[5],
+               sizeof(controls->scaling_matrix.scaling_list_8x8[5]));
+    }
+
+    controls->decode_params = (struct v4l2_ctrl_h264_decode_params) {
+        .nal_ref_idc = h->nal_ref_idc,
+        .frame_num = h->poc.frame_num,
+        .top_field_order_cnt = h->cur_pic_ptr->field_poc[0] != INT_MAX ?
+                               h->cur_pic_ptr->field_poc[0] : 0,
+        .bottom_field_order_cnt = h->cur_pic_ptr->field_poc[1] != INT_MAX ?
+                                  h->cur_pic_ptr->field_poc[1] : 0,
+        .idr_pic_id = sl->idr_pic_id,
+        .pic_order_cnt_lsb = sl->poc_lsb,
+        .delta_pic_order_cnt_bottom = sl->delta_poc_bottom,
+        .delta_pic_order_cnt0 = sl->delta_poc[0],
+        .delta_pic_order_cnt1 = sl->delta_poc[1],
+        /* Size in bits of dec_ref_pic_marking() syntax element. */
+        .dec_ref_pic_marking_bit_size = sl->ref_pic_marking_bit_size,
+        /* Size in bits of pic order count syntax. */
+        .pic_order_cnt_bit_size = sl->pic_order_cnt_bit_size,
+        .slice_group_change_cycle = 0, /* slice group not supported by FFmpeg */
+    };
+
+    if (h->picture_idr)
+        controls->decode_params.flags |= V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC;
+
+    if (FIELD_PICTURE(h))
+        controls->decode_params.flags |= V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC;
+
+    if (h->picture_structure == PICT_BOTTOM_FIELD)
+        controls->decode_params.flags |= V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD;
+
+#if defined(V4L2_H264_DECODE_PARAM_FLAG_PFRAME)
+    if (sl->slice_type_nos == AV_PICTURE_TYPE_P)
+        controls->decode_params.flags |= V4L2_H264_DECODE_PARAM_FLAG_PFRAME;
+#endif
+
+#if defined(V4L2_H264_DECODE_PARAM_FLAG_BFRAME)
+    if (sl->slice_type_nos == AV_PICTURE_TYPE_B)
+        controls->decode_params.flags |= V4L2_H264_DECODE_PARAM_FLAG_BFRAME;
+#endif
+
+    fill_dpb(&controls->decode_params, h);
+
+    controls->first_slice = true;
+    controls->num_slices = 0;
+
+    return 0;
+}
+
+static int v4l2_request_h264_queue_decode(AVCodecContext *avctx, bool last_slice)
+{
+    const H264Context *h = avctx->priv_data;
+    V4L2RequestContextH264 *ctx = avctx->internal->hwaccel_priv_data;
+    V4L2RequestControlsH264 *controls = h->cur_pic_ptr->hwaccel_picture_private;
+
+    struct v4l2_ext_control control[] = {
+        {
+            .id = V4L2_CID_STATELESS_H264_SPS,
+            .ptr = &controls->sps,
+            .size = sizeof(controls->sps),
+        },
+        {
+            .id = V4L2_CID_STATELESS_H264_PPS,
+            .ptr = &controls->pps,
+            .size = sizeof(controls->pps),
+        },
+        {
+            .id = V4L2_CID_STATELESS_H264_SCALING_MATRIX,
+            .ptr = &controls->scaling_matrix,
+            .size = sizeof(controls->scaling_matrix),
+        },
+        {
+            .id = V4L2_CID_STATELESS_H264_DECODE_PARAMS,
+            .ptr = &controls->decode_params,
+            .size = sizeof(controls->decode_params),
+        },
+        {
+            .id = V4L2_CID_STATELESS_H264_SLICE_PARAMS,
+            .ptr = &controls->slice_params,
+            .size = sizeof(controls->slice_params),
+        },
+        {
+            .id = V4L2_CID_STATELESS_H264_PRED_WEIGHTS,
+            .ptr = &controls->pred_weights,
+            .size = sizeof(controls->pred_weights),
+        },
+    };
+
+    if (ctx->decode_mode == V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED) {
+        int count = FF_ARRAY_ELEMS(control) - (controls->pred_weights_required ? 0 : 1);
+        return ff_v4l2_request_decode_slice(avctx, &controls->pic, control, count,
+                                            controls->first_slice, last_slice);
+    }
+
+    return ff_v4l2_request_decode_frame(avctx, &controls->pic,
+                                        control, FF_ARRAY_ELEMS(control) - 2);
+}
+
+static int v4l2_request_h264_decode_slice(AVCodecContext *avctx,
+                                          const uint8_t *buffer, uint32_t size)
+{
+    const H264Context *h = avctx->priv_data;
+    const PPS *pps = h->ps.pps;
+    const H264SliceContext *sl = &h->slice_ctx[0];
+    V4L2RequestContextH264 *ctx = avctx->internal->hwaccel_priv_data;
+    V4L2RequestControlsH264 *controls = h->cur_pic_ptr->hwaccel_picture_private;
+    int i, ret, count;
+
+    if (ctx->decode_mode == V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED &&
+        controls->num_slices) {
+        ret = v4l2_request_h264_queue_decode(avctx, false);
+        if (ret)
+            return ret;
+
+        ff_v4l2_request_reset_picture(avctx, &controls->pic);
+        controls->first_slice = 0;
+    }
+
+    if (ctx->start_code == V4L2_STATELESS_H264_START_CODE_ANNEX_B) {
+        ret = ff_v4l2_request_append_output(avctx, &controls->pic,
+                                            nalu_slice_start_code, 3);
+        if (ret)
+            return ret;
+    }
+
+    ret = ff_v4l2_request_append_output(avctx, &controls->pic, buffer, size);
+    if (ret)
+        return ret;
+
+    if (ctx->decode_mode != V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED)
+        return 0;
+
+    controls->slice_params = (struct v4l2_ctrl_h264_slice_params) {
+        /* Offset in bits to slice_data() from the beginning of this slice. */
+        .header_bit_size = get_bits_count(&sl->gb),
+
+        .first_mb_in_slice = sl->first_mb_addr,
+
+        .slice_type = ff_h264_get_slice_type(sl),
+        .colour_plane_id = 0, /* separate colour plane not supported by FFmpeg */
+        .redundant_pic_cnt = sl->redundant_pic_count,
+        .cabac_init_idc = sl->cabac_init_idc,
+        .slice_qp_delta = sl->qscale - pps->init_qp,
+        .slice_qs_delta = 0, /* not implemented by FFmpeg */
+        .disable_deblocking_filter_idc = sl->deblocking_filter < 2 ?
+                                         !sl->deblocking_filter :
+                                         sl->deblocking_filter,
+        .slice_alpha_c0_offset_div2 = sl->slice_alpha_c0_offset / 2,
+        .slice_beta_offset_div2 = sl->slice_beta_offset / 2,
+        .num_ref_idx_l0_active_minus1 = sl->list_count > 0 ? sl->ref_count[0] - 1 : 0,
+        .num_ref_idx_l1_active_minus1 = sl->list_count > 1 ? sl->ref_count[1] - 1 : 0,
+    };
+
+    if (sl->slice_type == AV_PICTURE_TYPE_B && sl->direct_spatial_mv_pred)
+        controls->slice_params.flags |= V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED;
+
+    /* V4L2_H264_SLICE_FLAG_SP_FOR_SWITCH: not implemented by FFmpeg */
+
+    controls->pred_weights_required =
+        V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(&controls->pps, &controls->slice_params);
+    if (controls->pred_weights_required) {
+        controls->pred_weights.chroma_log2_weight_denom = sl->pwt.chroma_log2_weight_denom;
+        controls->pred_weights.luma_log2_weight_denom = sl->pwt.luma_log2_weight_denom;
+    }
+
+    count = sl->list_count > 0 ? sl->ref_count[0] : 0;
+    for (i = 0; i < count; i++)
+        fill_ref_list(&controls->slice_params.ref_pic_list0[i],
+                      &controls->decode_params, &sl->ref_list[0][i]);
+    if (count && controls->pred_weights_required)
+        fill_weight_factors(&controls->pred_weights.weight_factors[0], 0, sl);
+
+    count = sl->list_count > 1 ? sl->ref_count[1] : 0;
+    for (i = 0; i < count; i++)
+        fill_ref_list(&controls->slice_params.ref_pic_list1[i],
+                      &controls->decode_params, &sl->ref_list[1][i]);
+    if (count && controls->pred_weights_required)
+        fill_weight_factors(&controls->pred_weights.weight_factors[1], 1, sl);
+
+    controls->num_slices++;
+    return 0;
+}
+
+static int v4l2_request_h264_end_frame(AVCodecContext *avctx)
+{
+    return v4l2_request_h264_queue_decode(avctx, true);
+}
+
+static int v4l2_request_h264_post_frames_ctx(AVCodecContext *avctx)
+{
+    V4L2RequestContextH264 *ctx = avctx->internal->hwaccel_priv_data;
+
+    struct v4l2_ext_control control[] = {
+        { .id = V4L2_CID_STATELESS_H264_DECODE_MODE, },
+        { .id = V4L2_CID_STATELESS_H264_START_CODE, },
+    };
+
+    ctx->decode_mode = ff_v4l2_request_query_control_default_value(avctx,
+                                            V4L2_CID_STATELESS_H264_DECODE_MODE);
+    if (ctx->decode_mode != V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED &&
+        ctx->decode_mode != V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED) {
+        av_log(ctx, AV_LOG_VERBOSE, "Unsupported decode mode: %d\n",
+               ctx->decode_mode);
+        return AVERROR(EINVAL);
+    }
+
+    ctx->start_code = ff_v4l2_request_query_control_default_value(avctx,
+                                            V4L2_CID_STATELESS_H264_START_CODE);
+    if (ctx->start_code != V4L2_STATELESS_H264_START_CODE_NONE &&
+        ctx->start_code != V4L2_STATELESS_H264_START_CODE_ANNEX_B) {
+        av_log(ctx, AV_LOG_VERBOSE, "Unsupported start code: %d\n",
+               ctx->start_code);
+        return AVERROR(EINVAL);
+    }
+
+    // TODO: check V4L2_CID_MPEG_VIDEO_H264_PROFILE control
+    // TODO: check V4L2_CID_MPEG_VIDEO_H264_LEVEL control
+
+    control[0].value = ctx->decode_mode;
+    control[1].value = ctx->start_code;
+
+    return ff_v4l2_request_set_controls(avctx, control, FF_ARRAY_ELEMS(control));
+}
+
+static int v4l2_request_h264_init(AVCodecContext *avctx)
+{
+    const H264Context *h = avctx->priv_data;
+    struct v4l2_ctrl_h264_sps sps;
+
+    struct v4l2_ext_control control[] = {
+        {
+            .id = V4L2_CID_STATELESS_H264_SPS,
+            .ptr = &sps,
+            .size = sizeof(sps),
+        },
+    };
+
+    fill_sps(&sps, h);
+
+    return ff_v4l2_request_init(avctx, control, FF_ARRAY_ELEMS(control),
+                                v4l2_request_h264_post_frames_ctx);
+}
+
+static int v4l2_request_h264_frame_params(AVCodecContext *avctx,
+                                          AVBufferRef *hw_frames_ctx)
+{
+    const H264Context *h = avctx->priv_data;
+    const SPS *sps = h ? h->ps.sps : NULL;
+    uint8_t bit_depth = sps ? sps->bit_depth_luma : 0;
+
+    return ff_v4l2_request_frame_params(avctx, hw_frames_ctx,
+                                        V4L2_PIX_FMT_H264_SLICE, bit_depth);
+}
+
+const FFHWAccel ff_h264_v4l2request_hwaccel = {
+    .p.name             = "h264_v4l2request",
+    .p.type             = AVMEDIA_TYPE_VIDEO,
+    .p.id               = AV_CODEC_ID_H264,
+    .p.pix_fmt          = AV_PIX_FMT_DRM_PRIME,
+    .start_frame        = v4l2_request_h264_start_frame,
+    .decode_slice       = v4l2_request_h264_decode_slice,
+    .end_frame          = v4l2_request_h264_end_frame,
+    .flush              = ff_v4l2_request_flush,
+    .frame_priv_data_size = sizeof(V4L2RequestControlsH264),
+    .init               = v4l2_request_h264_init,
+    .uninit             = ff_v4l2_request_uninit,
+    .priv_data_size     = sizeof(V4L2RequestContextH264),
+    .frame_params       = v4l2_request_h264_frame_params,
+    .caps_internal      = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
new file mode 100644
index 0000000000..26cba2c75f
--- /dev/null
+++ b/libavcodec/v4l2_request_hevc.c
@@ -0,0 +1,752 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/mem.h"
+#include "hevc/hevcdec.h"
+#include "hwaccel_internal.h"
+#include "hwconfig.h"
+#include "internal.h"
+#include "v4l2_request.h"
+
+#define V4L2_HEVC_CONTROLS_MAX 6
+
+typedef struct V4L2RequestContextHEVC {
+    V4L2RequestContext base;
+    enum v4l2_stateless_hevc_decode_mode decode_mode;
+    enum v4l2_stateless_hevc_start_code start_code;
+    unsigned int max_slice_params;
+    unsigned int max_entry_point_offsets;
+    bool has_scaling_matrix;
+} V4L2RequestContextHEVC;
+
+typedef struct V4L2RequestControlsHEVC {
+    V4L2RequestPictureContext pic;
+    struct v4l2_ctrl_hevc_sps sps;
+    struct v4l2_ctrl_hevc_pps pps;
+    struct v4l2_ctrl_hevc_decode_params decode_params;
+    struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix;
+    struct v4l2_ctrl_hevc_slice_params slice_params;
+    struct v4l2_ctrl_hevc_slice_params *frame_slice_params;
+    unsigned int allocated_slice_params;
+    unsigned int num_slice_params;
+    uint32_t *entry_point_offsets;
+    unsigned int allocated_entry_point_offsets;
+    unsigned int num_entry_point_offsets;
+    bool first_slice;
+} V4L2RequestControlsHEVC;
+
+static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 };
+
+static void fill_pred_weight_table(struct v4l2_hevc_pred_weight_table *table,
+                                   const HEVCContext *h)
+{
+    int32_t luma_weight_denom, chroma_weight_denom;
+    const SliceHeader *sh = &h->sh;
+
+    if (sh->slice_type == HEVC_SLICE_I ||
+        (sh->slice_type == HEVC_SLICE_P && !h->pps->weighted_pred_flag) ||
+        (sh->slice_type == HEVC_SLICE_B && !h->pps->weighted_bipred_flag))
+        return;
+
+    table->luma_log2_weight_denom = sh->luma_log2_weight_denom;
+
+    if (h->pps->sps->chroma_format_idc)
+        table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom -
+                                                sh->luma_log2_weight_denom;
+
+    luma_weight_denom = (1 << sh->luma_log2_weight_denom);
+    chroma_weight_denom = (1 << sh->chroma_log2_weight_denom);
+
+    for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) {
+        table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom;
+        table->luma_offset_l0[i] = sh->luma_offset_l0[i];
+        table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom;
+        table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom;
+        table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0];
+        table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1];
+    }
+
+    if (sh->slice_type != HEVC_SLICE_B)
+        return;
+
+    for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) {
+        table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom;
+        table->luma_offset_l1[i] = sh->luma_offset_l1[i];
+        table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom;
+        table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom;
+        table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0];
+        table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1];
+    }
+}
+
+static uint8_t get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame,
+                                 struct v4l2_ctrl_hevc_decode_params *decode_params)
+{
+    uint64_t timestamp;
+
+    if (!frame || !frame->f)
+        return 0;
+
+    timestamp = ff_v4l2_request_get_capture_timestamp(frame->f);
+
+    for (uint8_t i = 0; i < decode_params->num_active_dpb_entries; i++) {
+        struct v4l2_hevc_dpb_entry *entry = &decode_params->dpb[i];
+        if (entry->timestamp == timestamp)
+            return i;
+    }
+
+    return 0;
+}
+
+static void fill_decode_params(struct v4l2_ctrl_hevc_decode_params *decode_params,
+                               const HEVCContext *h)
+{
+    const HEVCFrame *pic = h->cur_frame;
+    const HEVCLayerContext *l = &h->layers[h->cur_layer];
+    const SliceHeader *sh = &h->sh;
+    int i, entries = 0;
+
+    *decode_params = (struct v4l2_ctrl_hevc_decode_params) {
+        .pic_order_cnt_val = h->poc,
+        .short_term_ref_pic_set_size = sh->short_term_ref_pic_set_size,
+        .long_term_ref_pic_set_size = sh->long_term_ref_pic_set_size,
+        .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
+        .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
+        .num_poc_lt_curr = h->rps[LT_CURR].nb_refs,
+    };
+
+#if HAVE_STRUCT_V4L2_CTRL_HEVC_DECODE_PARAMS_NUM_DELTA_POCS_OF_REF_RPS_IDX
+    if (h->sh.short_term_ref_pic_set_sps_flag == 0 && h->sh.short_term_rps)
+        decode_params->num_delta_pocs_of_ref_rps_idx =
+                                h->sh.short_term_rps->rps_idx_num_delta_pocs;
+#endif
+
+    for (i = 0; i < FF_ARRAY_ELEMS(l->DPB); i++) {
+        const HEVCFrame *frame = &l->DPB[i];
+        if (frame != pic &&
+            (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) {
+            struct v4l2_hevc_dpb_entry *entry = &decode_params->dpb[entries++];
+
+            entry->timestamp = ff_v4l2_request_get_capture_timestamp(frame->f);
+            entry->field_pic = !!(frame->f->flags & AV_FRAME_FLAG_INTERLACED);
+            entry->flags = 0;
+            if (frame->flags & HEVC_FRAME_FLAG_LONG_REF)
+                entry->flags |= V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE;
+
+            entry->pic_order_cnt_val = frame->poc;
+        }
+    }
+
+    decode_params->num_active_dpb_entries = entries;
+
+    if (IS_IRAP(h))
+        decode_params->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC;
+
+    if (IS_IDR(h))
+        decode_params->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC;
+
+    if (sh->no_output_of_prior_pics_flag)
+        decode_params->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR;
+
+    for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; i++) {
+        decode_params->poc_st_curr_before[i] =
+            get_ref_pic_index(h, h->rps[ST_CURR_BEF].ref[i], decode_params);
+        decode_params->poc_st_curr_after[i] =
+            get_ref_pic_index(h, h->rps[ST_CURR_AFT].ref[i], decode_params);
+        decode_params->poc_lt_curr[i] =
+            get_ref_pic_index(h, h->rps[LT_CURR].ref[i], decode_params);
+    }
+}
+
+static int fill_slice_params(V4L2RequestControlsHEVC *controls, int slice,
+                             uint32_t size, bool max_entry_point_offsets,
+                             const HEVCContext *h)
+{
+    struct v4l2_ctrl_hevc_slice_params *slice_params = &controls->frame_slice_params[slice];
+    struct v4l2_ctrl_hevc_decode_params *decode_params = &controls->decode_params;
+    const SliceHeader *sh = &h->sh;
+    RefPicList *rpl;
+    int i, offsets;
+
+    *slice_params = (struct v4l2_ctrl_hevc_slice_params) {
+        .bit_size = (size - sh->data_offset) * 8,
+        .data_byte_offset = controls->pic.output->bytesused + sh->data_offset,
+        .num_entry_point_offsets = sh->num_entry_point_offsets,
+
+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
+        .nal_unit_type = h->nal_unit_type,
+        .nuh_temporal_id_plus1 = h->temporal_id + 1,
+
+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+        .slice_type = sh->slice_type,
+        .colour_plane_id = sh->colour_plane_id,
+        .slice_pic_order_cnt = sh->poc,
+        .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0,
+        .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0,
+        .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ?
+                              sh->collocated_ref_idx : 0,
+        .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
+                                         0 : 5 - sh->max_num_merge_cand,
+        .slice_qp_delta = sh->slice_qp_delta,
+        .slice_cb_qp_offset = sh->slice_cb_qp_offset,
+        .slice_cr_qp_offset = sh->slice_cr_qp_offset,
+        .slice_act_y_qp_offset = 0,
+        .slice_act_cb_qp_offset = 0,
+        .slice_act_cr_qp_offset = 0,
+        .slice_beta_offset_div2 = sh->beta_offset / 2,
+        .slice_tc_offset_div2 = sh->tc_offset / 2,
+
+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
+        .pic_struct = h->sei.picture_timing.picture_struct,
+
+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+        .slice_segment_addr = sh->slice_segment_addr,
+        .short_term_ref_pic_set_size = sh->short_term_ref_pic_set_size,
+        .long_term_ref_pic_set_size = sh->long_term_ref_pic_set_size,
+    };
+
+    if (h->pps->pps_slice_act_qp_offsets_present_flag) {
+        slice_params->slice_act_y_qp_offset = sh->slice_act_y_qp_offset;
+        slice_params->slice_act_cb_qp_offset = sh->slice_act_cb_qp_offset;
+        slice_params->slice_act_cr_qp_offset = sh->slice_act_cr_qp_offset;
+    }
+
+    if (sh->slice_sample_adaptive_offset_flag[0])
+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA;
+
+    if (sh->slice_sample_adaptive_offset_flag[1])
+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA;
+
+    if (sh->slice_temporal_mvp_enabled_flag)
+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED;
+
+    if (sh->mvd_l1_zero_flag)
+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO;
+
+    if (sh->cabac_init_flag)
+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT;
+
+    if (sh->collocated_list == L0)
+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0;
+
+    if (sh->use_integer_mv_flag)
+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV;
+
+    if (sh->disable_deblocking_filter_flag)
+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED;
+
+    if (sh->slice_loop_filter_across_slices_enabled_flag)
+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED;
+
+    if (sh->dependent_slice_segment_flag)
+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT;
+
+    if (sh->slice_type != HEVC_SLICE_I) {
+        rpl = &h->cur_frame->refPicList[0];
+        for (i = 0; i < rpl->nb_refs; i++)
+            slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], decode_params);
+    }
+
+    if (sh->slice_type == HEVC_SLICE_B) {
+        rpl = &h->cur_frame->refPicList[1];
+        for (i = 0; i < rpl->nb_refs; i++)
+            slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], decode_params);
+    }
+
+    fill_pred_weight_table(&slice_params->pred_weight_table, h);
+
+    if (!max_entry_point_offsets)
+        return 0;
+
+    if (controls->allocated_entry_point_offsets < controls->num_entry_point_offsets + sh->num_entry_point_offsets) {
+        void *entry_point_offsets = controls->entry_point_offsets;
+        offsets = controls->allocated_entry_point_offsets == 0 ? 128 : controls->allocated_entry_point_offsets * 2;
+        while (controls->num_entry_point_offsets + sh->num_entry_point_offsets > offsets)
+            offsets *= 2;
+        entry_point_offsets = av_realloc_array(entry_point_offsets, offsets, sizeof(*controls->entry_point_offsets));
+        if (!entry_point_offsets)
+            return AVERROR(ENOMEM);
+        controls->entry_point_offsets = entry_point_offsets;
+        controls->allocated_entry_point_offsets = offsets;
+    }
+
+    for (i = 0, offsets = controls->num_entry_point_offsets; i < sh->num_entry_point_offsets; i++)
+        controls->entry_point_offsets[offsets + i] = sh->entry_point_offset[i];
+    controls->num_entry_point_offsets += sh->num_entry_point_offsets;
+
+    return 0;
+}
+
+static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCContext *h)
+{
+    const HEVCPPS *pps = h->pps;
+    const HEVCSPS *sps = pps->sps;
+
+    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
+    *ctrl = (struct v4l2_ctrl_hevc_sps) {
+        .video_parameter_set_id = sps->vps_id,
+        .seq_parameter_set_id = pps->sps_id,
+        .pic_width_in_luma_samples = sps->width,
+        .pic_height_in_luma_samples = sps->height,
+        .bit_depth_luma_minus8 = sps->bit_depth - 8,
+        .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8,
+        .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4,
+        .sps_max_dec_pic_buffering_minus1 =
+            sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1,
+        .sps_max_num_reorder_pics =
+            sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics,
+        .sps_max_latency_increase_plus1 =
+            sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1,
+        .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3,
+        .log2_diff_max_min_luma_coding_block_size =
+            sps->log2_diff_max_min_coding_block_size,
+        .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2,
+        .log2_diff_max_min_luma_transform_block_size =
+            sps->log2_max_trafo_size - sps->log2_min_tb_size,
+        .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter,
+        .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra,
+        .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1,
+        .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1,
+        .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3,
+        .log2_diff_max_min_pcm_luma_coding_block_size =
+            sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size,
+        .num_short_term_ref_pic_sets = sps->nb_st_rps,
+        .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps,
+        .chroma_format_idc = sps->chroma_format_idc,
+        .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1,
+    };
+
+    if (sps->separate_colour_plane)
+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE;
+
+    if (sps->scaling_list_enabled)
+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED;
+
+    if (sps->amp_enabled)
+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED;
+
+    if (sps->sao_enabled)
+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET;
+
+    if (sps->pcm_enabled)
+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED;
+
+    if (sps->pcm_loop_filter_disabled)
+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED;
+
+    if (sps->long_term_ref_pics_present)
+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT;
+
+    if (sps->temporal_mvp_enabled)
+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED;
+
+    if (sps->strong_intra_smoothing_enabled)
+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED;
+}
+
+static int v4l2_request_hevc_start_frame(AVCodecContext *avctx,
+                                         av_unused const AVBufferRef *buf_ref,
+                                         av_unused const uint8_t *buffer,
+                                         av_unused uint32_t size)
+{
+    const HEVCContext *h = avctx->priv_data;
+    const HEVCPPS *pps = h->pps;
+    const HEVCSPS *sps = pps->sps;
+    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
+    V4L2RequestControlsHEVC *controls = h->cur_frame->hwaccel_picture_private;
+    const SliceHeader *sh = &h->sh;
+    int ret;
+
+    ret = ff_v4l2_request_start_frame(avctx, &controls->pic, h->cur_frame->f);
+    if (ret)
+        return ret;
+
+    fill_sps(&controls->sps, h);
+    fill_decode_params(&controls->decode_params, h);
+
+    if (ctx->has_scaling_matrix) {
+        const ScalingList *sl = pps->scaling_list_data_present_flag ?
+                                &pps->scaling_list :
+                                sps->scaling_list_enabled ?
+                                &sps->scaling_list : NULL;
+        if (sl) {
+            for (int i = 0; i < 6; i++) {
+                for (int j = 0; j < 16; j++)
+                    controls->scaling_matrix.scaling_list_4x4[i][j] = sl->sl[0][i][j];
+                for (int j = 0; j < 64; j++) {
+                    controls->scaling_matrix.scaling_list_8x8[i][j]   = sl->sl[1][i][j];
+                    controls->scaling_matrix.scaling_list_16x16[i][j] = sl->sl[2][i][j];
+                    if (i < 2)
+                        controls->scaling_matrix.scaling_list_32x32[i][j] = sl->sl[3][i * 3][j];
+                }
+                controls->scaling_matrix.scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i];
+                if (i < 2)
+                    controls->scaling_matrix.scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3];
+            }
+        }
+    }
+
+    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
+    controls->pps = (struct v4l2_ctrl_hevc_pps) {
+        .pic_parameter_set_id = sh->pps_id,
+        .num_extra_slice_header_bits = pps->num_extra_slice_header_bits,
+        .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active - 1,
+        .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active - 1,
+        .init_qp_minus26 = pps->pic_init_qp_minus26,
+        .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth,
+        .pps_cb_qp_offset = pps->cb_qp_offset,
+        .pps_cr_qp_offset = pps->cr_qp_offset,
+        .pps_beta_offset_div2 = pps->beta_offset / 2,
+        .pps_tc_offset_div2 = pps->tc_offset / 2,
+        .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2,
+    };
+
+    if (pps->dependent_slice_segments_enabled_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED;
+
+    if (pps->output_flag_present_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT;
+
+    if (pps->sign_data_hiding_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED;
+
+    if (pps->cabac_init_present_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT;
+
+    if (pps->constrained_intra_pred_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED;
+
+    if (pps->transform_skip_enabled_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED;
+
+    if (pps->cu_qp_delta_enabled_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED;
+
+    if (pps->pic_slice_level_chroma_qp_offsets_present_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT;
+
+    if (pps->weighted_pred_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED;
+
+    if (pps->weighted_bipred_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED;
+
+    if (pps->transquant_bypass_enable_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED;
+
+    if (pps->tiles_enabled_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED;
+
+    if (pps->entropy_coding_sync_enabled_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED;
+
+    if (pps->loop_filter_across_tiles_enabled_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
+
+    if (pps->seq_loop_filter_across_slices_enabled_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED;
+
+    if (pps->deblocking_filter_override_enabled_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED;
+
+    if (pps->disable_dbf)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER;
+
+    if (pps->lists_modification_present_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT;
+
+    if (pps->slice_header_extension_present_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT;
+
+    if (pps->deblocking_filter_control_present_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT;
+
+    if (pps->uniform_spacing_flag)
+        controls->pps.flags |= V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING;
+
+    if (pps->tiles_enabled_flag) {
+        controls->pps.num_tile_columns_minus1 = pps->num_tile_columns - 1;
+        controls->pps.num_tile_rows_minus1 = pps->num_tile_rows - 1;
+
+        for (int i = 0; i < pps->num_tile_columns; i++)
+            controls->pps.column_width_minus1[i] = pps->column_width[i] - 1;
+
+        for (int i = 0; i < pps->num_tile_rows; i++)
+            controls->pps.row_height_minus1[i] = pps->row_height[i] - 1;
+    }
+
+    controls->first_slice = true;
+    controls->frame_slice_params = &controls->slice_params;
+    controls->allocated_slice_params = 0;
+    controls->num_slice_params = 0;
+    controls->allocated_entry_point_offsets = 0;
+    controls->num_entry_point_offsets = 0;
+
+    return 0;
+}
+
+static int v4l2_request_hevc_queue_decode(AVCodecContext *avctx, bool last_slice)
+{
+    const HEVCContext *h = avctx->priv_data;
+    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
+    V4L2RequestControlsHEVC *controls = h->cur_frame->hwaccel_picture_private;
+    int count = 0;
+
+    struct v4l2_ext_control control[V4L2_HEVC_CONTROLS_MAX] = {};
+
+    control[count++] = (struct v4l2_ext_control) {
+        .id = V4L2_CID_STATELESS_HEVC_SPS,
+        .ptr = &controls->sps,
+        .size = sizeof(controls->sps),
+    };
+
+    control[count++] = (struct v4l2_ext_control) {
+        .id = V4L2_CID_STATELESS_HEVC_PPS,
+        .ptr = &controls->pps,
+        .size = sizeof(controls->pps),
+    };
+
+    control[count++] = (struct v4l2_ext_control) {
+        .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
+        .ptr = &controls->decode_params,
+        .size = sizeof(controls->decode_params),
+    };
+
+    if (ctx->has_scaling_matrix) {
+        control[count++] = (struct v4l2_ext_control) {
+            .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
+            .ptr = &controls->scaling_matrix,
+            .size = sizeof(controls->scaling_matrix),
+        };
+    }
+
+    if (ctx->max_slice_params && controls->num_slice_params) {
+        control[count++] = (struct v4l2_ext_control) {
+            .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
+            .ptr = controls->frame_slice_params,
+            .size = sizeof(*controls->frame_slice_params) *
+                    FFMIN(controls->num_slice_params, ctx->max_slice_params),
+        };
+    }
+
+    if (ctx->max_entry_point_offsets && controls->num_entry_point_offsets) {
+        control[count++] = (struct v4l2_ext_control) {
+            .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS,
+            .ptr = controls->entry_point_offsets,
+            .size = sizeof(*controls->entry_point_offsets) *
+                    FFMIN(controls->num_entry_point_offsets,
+                          ctx->max_entry_point_offsets),
+        };
+    }
+
+    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)
+        return ff_v4l2_request_decode_slice(avctx, &controls->pic, control, count,
+                                            controls->first_slice, last_slice);
+
+    return ff_v4l2_request_decode_frame(avctx, &controls->pic, control, count);
+}
+
+static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx,
+                                          const uint8_t *buffer, uint32_t size)
+{
+    const HEVCContext *h = avctx->priv_data;
+    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
+    V4L2RequestControlsHEVC *controls = h->cur_frame->hwaccel_picture_private;
+    const SliceHeader *sh = &h->sh;
+    int ret, slice = controls->num_slice_params;
+
+    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED &&
+        (slice >= ctx->max_slice_params || (ctx->max_entry_point_offsets &&
+         (controls->num_entry_point_offsets + sh->num_entry_point_offsets > ctx->max_entry_point_offsets)))) {
+        ret = v4l2_request_hevc_queue_decode(avctx, false);
+        if (ret)
+            return ret;
+
+        ff_v4l2_request_reset_picture(avctx, &controls->pic);
+        slice = controls->num_slice_params = 0;
+        controls->num_entry_point_offsets = 0;
+        controls->first_slice = false;
+    }
+
+    if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) {
+        ret = ff_v4l2_request_append_output(avctx, &controls->pic,
+                                            nalu_slice_start_code, 3);
+        if (ret)
+            return ret;
+    }
+
+    if (ctx->max_slice_params) {
+        if (slice && controls->allocated_slice_params < slice + 1) {
+            void *slice_params = controls->allocated_slice_params == 0 ? NULL : controls->frame_slice_params;
+            int slices = controls->allocated_slice_params == 0 ? 8 : controls->allocated_slice_params * 2;
+            slice_params = av_realloc_array(slice_params, slices, sizeof(*controls->frame_slice_params));
+            if (!slice_params)
+                return AVERROR(ENOMEM);
+            if (controls->allocated_slice_params == 0)
+                memcpy(slice_params, controls->frame_slice_params, sizeof(*controls->frame_slice_params));
+            controls->frame_slice_params = slice_params;
+            controls->allocated_slice_params = slices;
+        }
+
+        ret = fill_slice_params(controls, slice, size,
+                                !!ctx->max_entry_point_offsets, h);
+        if (ret)
+            return ret;
+    }
+
+    ret = ff_v4l2_request_append_output(avctx, &controls->pic, buffer, size);
+    if (ret)
+        return ret;
+
+    controls->num_slice_params++;
+    return 0;
+}
+
+static int v4l2_request_hevc_end_frame(AVCodecContext *avctx)
+{
+    return v4l2_request_hevc_queue_decode(avctx, true);
+}
+
+static void v4l2_request_hevc_free_frame_priv(AVRefStructOpaque hwctx, void *data)
+{
+    V4L2RequestControlsHEVC *controls = data;
+
+    if (controls->allocated_slice_params)
+        av_freep(&controls->frame_slice_params);
+
+    av_freep(&controls->entry_point_offsets);
+}
+
+static int v4l2_request_hevc_post_frames_ctx(AVCodecContext *avctx)
+{
+    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
+    int ret;
+
+    struct v4l2_ext_control control[] = {
+        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
+        { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
+    };
+    struct v4l2_query_ext_ctrl scaling_matrix = {
+        .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
+    };
+    struct v4l2_query_ext_ctrl entry_point_offsets = {
+        .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS,
+    };
+    struct v4l2_query_ext_ctrl slice_params = {
+        .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
+    };
+
+    ctx->decode_mode = ff_v4l2_request_query_control_default_value(avctx,
+                                        V4L2_CID_STATELESS_HEVC_DECODE_MODE);
+    if (ctx->decode_mode != V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED &&
+        ctx->decode_mode != V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) {
+        av_log(ctx, AV_LOG_VERBOSE, "Unsupported decode mode: %d\n",
+               ctx->decode_mode);
+        return AVERROR(EINVAL);
+    }
+
+    ctx->start_code = ff_v4l2_request_query_control_default_value(avctx,
+                                        V4L2_CID_STATELESS_HEVC_START_CODE);
+    if (ctx->start_code != V4L2_STATELESS_HEVC_START_CODE_NONE &&
+        ctx->start_code != V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) {
+        av_log(ctx, AV_LOG_VERBOSE, "Unsupported start code: %d\n",
+               ctx->start_code);
+        return AVERROR(EINVAL);
+    }
+
+    // TODO: check V4L2_CID_MPEG_VIDEO_HEVC_PROFILE control
+    // TODO: check V4L2_CID_MPEG_VIDEO_HEVC_LEVEL control
+
+    ret = ff_v4l2_request_query_control(avctx, &scaling_matrix);
+    if (!ret)
+        ctx->has_scaling_matrix = true;
+    else
+        ctx->has_scaling_matrix = false;
+
+    ret = ff_v4l2_request_query_control(avctx, &entry_point_offsets);
+    if (!ret)
+        ctx->max_entry_point_offsets = FFMAX(entry_point_offsets.dims[0], 1);
+    else
+        ctx->max_entry_point_offsets = 0;
+
+    ret = ff_v4l2_request_query_control(avctx, &slice_params);
+    if (!ret)
+        ctx->max_slice_params = FFMAX(slice_params.dims[0], 1);
+    else
+        ctx->max_slice_params = 0;
+
+    av_log(ctx, AV_LOG_VERBOSE, "%s-based decoder with SLICE_PARAMS=%u, "
+           "ENTRY_POINT_OFFSETS=%u and SCALING_MATRIX=%d controls\n",
+          ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED ? "slice" : "frame",
+          ctx->max_slice_params, ctx->max_entry_point_offsets, ctx->has_scaling_matrix);
+
+    control[0].value = ctx->decode_mode;
+    control[1].value = ctx->start_code;
+
+    return ff_v4l2_request_set_controls(avctx, control, FF_ARRAY_ELEMS(control));
+}
+
+static int v4l2_request_hevc_init(AVCodecContext *avctx)
+{
+    const HEVCContext *h = avctx->priv_data;
+    struct v4l2_ctrl_hevc_sps sps;
+
+    struct v4l2_ext_control control[] = {
+        {
+            .id = V4L2_CID_STATELESS_HEVC_SPS,
+            .ptr = &sps,
+            .size = sizeof(sps),
+        },
+    };
+
+    fill_sps(&sps, h);
+
+    return ff_v4l2_request_init(avctx, control, FF_ARRAY_ELEMS(control),
+                                v4l2_request_hevc_post_frames_ctx);
+}
+
+static int v4l2_request_hevc_frame_params(AVCodecContext *avctx,
+                                          AVBufferRef *hw_frames_ctx)
+{
+    const HEVCContext *h = avctx->priv_data;
+    const HEVCSPS *sps = h && h->pps ? h->pps->sps : NULL;
+    uint8_t bit_depth = sps ? sps->bit_depth : 0;
+
+    return ff_v4l2_request_frame_params(avctx, hw_frames_ctx,
+                                        V4L2_PIX_FMT_HEVC_SLICE, bit_depth);
+}
+
+const FFHWAccel ff_hevc_v4l2request_hwaccel = {
+    .p.name             = "hevc_v4l2request",
+    .p.type             = AVMEDIA_TYPE_VIDEO,
+    .p.id               = AV_CODEC_ID_HEVC,
+    .p.pix_fmt          = AV_PIX_FMT_DRM_PRIME,
+    .start_frame        = v4l2_request_hevc_start_frame,
+    .decode_slice       = v4l2_request_hevc_decode_slice,
+    .end_frame          = v4l2_request_hevc_end_frame,
+    .flush              = ff_v4l2_request_flush,
+    .free_frame_priv    = v4l2_request_hevc_free_frame_priv,
+    .frame_priv_data_size = sizeof(V4L2RequestControlsHEVC),
+    .init               = v4l2_request_hevc_init,
+    .uninit             = ff_v4l2_request_uninit,
+    .priv_data_size     = sizeof(V4L2RequestContextHEVC),
+    .frame_params       = v4l2_request_hevc_frame_params,
+    .caps_internal      = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/libavcodec/v4l2_request_mpeg2.c b/libavcodec/v4l2_request_mpeg2.c
new file mode 100644
index 0000000000..5d89b7168e
--- /dev/null
+++ b/libavcodec/v4l2_request_mpeg2.c
@@ -0,0 +1,184 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "hwaccel_internal.h"
+#include "hwconfig.h"
+#include "mathops.h"
+#include "mpegvideo.h"
+#include "v4l2_request.h"
+
+typedef struct V4L2RequestControlsMPEG2 {
+    V4L2RequestPictureContext pic;
+    struct v4l2_ctrl_mpeg2_sequence sequence;
+    struct v4l2_ctrl_mpeg2_picture picture;
+    struct v4l2_ctrl_mpeg2_quantisation quantisation;
+} V4L2RequestControlsMPEG2;
+
+static int v4l2_request_mpeg2_start_frame(AVCodecContext *avctx,
+                                          av_unused const AVBufferRef *buf_ref,
+                                          av_unused const uint8_t *buffer,
+                                          av_unused uint32_t size)
+{
+    const MpegEncContext *s = avctx->priv_data;
+    V4L2RequestControlsMPEG2 *controls = s->cur_pic.ptr->hwaccel_picture_private;
+    int ret;
+
+    ret = ff_v4l2_request_start_frame(avctx, &controls->pic, s->cur_pic.ptr->f);
+    if (ret)
+        return ret;
+
+    controls->sequence = (struct v4l2_ctrl_mpeg2_sequence) {
+        /* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence header */
+        .horizontal_size = s->width,
+        .vertical_size = s->height,
+        .vbv_buffer_size = controls->pic.output->size,
+
+        /* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence extension */
+        .profile_and_level_indication = 0,
+        .chroma_format = s->chroma_format,
+    };
+
+    if (s->progressive_sequence)
+        controls->sequence.flags |= V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE;
+
+    controls->picture = (struct v4l2_ctrl_mpeg2_picture) {
+        /* ISO/IEC 13818-2, ITU-T Rec. H.262: Picture header */
+        .picture_coding_type = s->pict_type,
+
+        /* ISO/IEC 13818-2, ITU-T Rec. H.262: Picture coding extension */
+        .f_code[0][0] = s->mpeg_f_code[0][0],
+        .f_code[0][1] = s->mpeg_f_code[0][1],
+        .f_code[1][0] = s->mpeg_f_code[1][0],
+        .f_code[1][1] = s->mpeg_f_code[1][1],
+        .picture_structure = s->picture_structure,
+        .intra_dc_precision = s->intra_dc_precision,
+    };
+
+    if (s->top_field_first)
+        controls->picture.flags |= V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST;
+
+    if (s->frame_pred_frame_dct)
+        controls->picture.flags |= V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT;
+
+    if (s->concealment_motion_vectors)
+        controls->picture.flags |= V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV;
+
+    if (s->intra_vlc_format)
+        controls->picture.flags |= V4L2_MPEG2_PIC_FLAG_INTRA_VLC;
+
+    if (s->q_scale_type)
+        controls->picture.flags |= V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE;
+
+    if (s->alternate_scan)
+        controls->picture.flags |= V4L2_MPEG2_PIC_FLAG_ALT_SCAN;
+
+    if (s->repeat_first_field)
+        controls->picture.flags |= V4L2_MPEG2_PIC_FLAG_REPEAT_FIRST;
+
+    if (s->progressive_frame)
+        controls->picture.flags |= V4L2_MPEG2_PIC_FLAG_PROGRESSIVE;
+
+    switch (s->pict_type) {
+    case AV_PICTURE_TYPE_B:
+        if (s->next_pic.ptr)
+            controls->picture.backward_ref_ts =
+                ff_v4l2_request_get_capture_timestamp(s->next_pic.ptr->f);
+        // fall-through
+    case AV_PICTURE_TYPE_P:
+        if (s->last_pic.ptr)
+            controls->picture.forward_ref_ts =
+                ff_v4l2_request_get_capture_timestamp(s->last_pic.ptr->f);
+    }
+
+    for (int i = 0; i < 64; i++) {
+        int n = s->idsp.idct_permutation[ff_zigzag_direct[i]];
+        controls->quantisation.intra_quantiser_matrix[i] = s->intra_matrix[n];
+        controls->quantisation.non_intra_quantiser_matrix[i] = s->inter_matrix[n];
+        controls->quantisation.chroma_intra_quantiser_matrix[i] = s->chroma_intra_matrix[n];
+        controls->quantisation.chroma_non_intra_quantiser_matrix[i] = s->chroma_inter_matrix[n];
+    }
+
+    return 0;
+}
+
+static int v4l2_request_mpeg2_decode_slice(AVCodecContext *avctx,
+                                           const uint8_t *buffer, uint32_t size)
+{
+    const MpegEncContext *s = avctx->priv_data;
+    V4L2RequestControlsMPEG2 *controls = s->cur_pic.ptr->hwaccel_picture_private;
+
+    return ff_v4l2_request_append_output(avctx, &controls->pic, buffer, size);
+}
+
+static int v4l2_request_mpeg2_end_frame(AVCodecContext *avctx)
+{
+    const MpegEncContext *s = avctx->priv_data;
+    V4L2RequestControlsMPEG2 *controls = s->cur_pic.ptr->hwaccel_picture_private;
+
+    struct v4l2_ext_control control[] = {
+        {
+            .id = V4L2_CID_STATELESS_MPEG2_SEQUENCE,
+            .ptr = &controls->sequence,
+            .size = sizeof(controls->sequence),
+        },
+        {
+            .id = V4L2_CID_STATELESS_MPEG2_PICTURE,
+            .ptr = &controls->picture,
+            .size = sizeof(controls->picture),
+        },
+        {
+            .id = V4L2_CID_STATELESS_MPEG2_QUANTISATION,
+            .ptr = &controls->quantisation,
+            .size = sizeof(controls->quantisation),
+        },
+    };
+
+    return ff_v4l2_request_decode_frame(avctx, &controls->pic,
+                                        control, FF_ARRAY_ELEMS(control));
+}
+
+static int v4l2_request_mpeg2_init(AVCodecContext *avctx)
+{
+    return ff_v4l2_request_init(avctx, NULL, 0, NULL);
+}
+
+static int v4l2_request_mpeg2_frame_params(AVCodecContext *avctx,
+                                           AVBufferRef *hw_frames_ctx)
+{
+    return ff_v4l2_request_frame_params(avctx, hw_frames_ctx,
+                                        V4L2_PIX_FMT_MPEG2_SLICE, 8);
+}
+
+const FFHWAccel ff_mpeg2_v4l2request_hwaccel = {
+    .p.name             = "mpeg2_v4l2request",
+    .p.type             = AVMEDIA_TYPE_VIDEO,
+    .p.id               = AV_CODEC_ID_MPEG2VIDEO,
+    .p.pix_fmt          = AV_PIX_FMT_DRM_PRIME,
+    .start_frame        = v4l2_request_mpeg2_start_frame,
+    .decode_slice       = v4l2_request_mpeg2_decode_slice,
+    .end_frame          = v4l2_request_mpeg2_end_frame,
+    .flush              = ff_v4l2_request_flush,
+    .frame_priv_data_size = sizeof(V4L2RequestControlsMPEG2),
+    .init               = v4l2_request_mpeg2_init,
+    .uninit             = ff_v4l2_request_uninit,
+    .priv_data_size     = sizeof(V4L2RequestContext),
+    .frame_params       = v4l2_request_mpeg2_frame_params,
+    .caps_internal      = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/libavcodec/v4l2_request_vp8.c b/libavcodec/v4l2_request_vp8.c
new file mode 100644
index 0000000000..3fb23951df
--- /dev/null
+++ b/libavcodec/v4l2_request_vp8.c
@@ -0,0 +1,242 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "hwaccel_internal.h"
+#include "hwconfig.h"
+#include "v4l2_request.h"
+#include "vp8.h"
+
+typedef struct V4L2RequestControlsVP8 {
+    V4L2RequestPictureContext pic;
+    struct v4l2_ctrl_vp8_frame frame;
+} V4L2RequestControlsVP8;
+
+static int v4l2_request_vp8_start_frame(AVCodecContext *avctx,
+                                        av_unused const AVBufferRef *buf_ref,
+                                        const uint8_t *buffer,
+                                        av_unused uint32_t size)
+{
+    const VP8Context *s = avctx->priv_data;
+    V4L2RequestControlsVP8 *controls = s->framep[VP8_FRAME_CURRENT]->hwaccel_picture_private;
+    struct v4l2_ctrl_vp8_frame *ctrl = &controls->frame;
+    unsigned int header_size = 3 + 7 * s->keyframe;
+    const uint8_t *data = buffer + header_size;
+    int ret, i, j, k;
+
+    ret = ff_v4l2_request_start_frame(avctx, &controls->pic,
+                                      s->framep[VP8_FRAME_CURRENT]->tf.f);
+    if (ret)
+        return ret;
+
+    *ctrl = (struct v4l2_ctrl_vp8_frame) {
+        .lf = {
+            .sharpness_level = s->filter.sharpness,
+            .level = s->filter.level,
+        },
+
+        .quant = {
+            .y_ac_qi = s->quant.yac_qi,
+            .y_dc_delta = s->quant.ydc_delta,
+            .y2_dc_delta = s->quant.y2dc_delta,
+            .y2_ac_delta = s->quant.y2ac_delta,
+            .uv_dc_delta = s->quant.uvdc_delta,
+            .uv_ac_delta = s->quant.uvac_delta,
+        },
+
+        .coder_state = {
+            .range = s->coder_state_at_header_end.range,
+            .value = s->coder_state_at_header_end.value,
+            .bit_count = s->coder_state_at_header_end.bit_count,
+        },
+
+        .width = avctx->width,
+        .height = avctx->height,
+
+        .horizontal_scale = 0, /* scale not supported by FFmpeg */
+        .vertical_scale = 0, /* scale not supported by FFmpeg */
+
+        .version = s->profile & 0x3,
+        .prob_skip_false = s->prob->mbskip,
+        .prob_intra = s->prob->intra,
+        .prob_last = s->prob->last,
+        .prob_gf = s->prob->golden,
+        .num_dct_parts = s->num_coeff_partitions,
+
+        .first_part_size = s->header_partition_size,
+        .first_part_header_bits = (8 * (s->coder_state_at_header_end.input - data) -
+                                   s->coder_state_at_header_end.bit_count - 8),
+    };
+
+    for (i = 0; i < 4; i++) {
+        ctrl->segment.quant_update[i] = s->segmentation.base_quant[i];
+        ctrl->segment.lf_update[i] = s->segmentation.filter_level[i];
+    }
+
+    for (i = 0; i < 3; i++)
+        ctrl->segment.segment_probs[i] = s->prob->segmentid[i];
+
+    if (s->segmentation.enabled)
+        ctrl->segment.flags |= V4L2_VP8_SEGMENT_FLAG_ENABLED;
+
+    if (s->segmentation.update_map)
+        ctrl->segment.flags |= V4L2_VP8_SEGMENT_FLAG_UPDATE_MAP;
+
+    if (s->segmentation.update_feature_data)
+        ctrl->segment.flags |= V4L2_VP8_SEGMENT_FLAG_UPDATE_FEATURE_DATA;
+
+    if (!s->segmentation.absolute_vals)
+        ctrl->segment.flags |= V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE;
+
+    for (i = 0; i < 4; i++) {
+        ctrl->lf.ref_frm_delta[i] = s->lf_delta.ref[i];
+        ctrl->lf.mb_mode_delta[i] = s->lf_delta.mode[i + MODE_I4x4];
+    }
+
+    if (s->lf_delta.enabled)
+        ctrl->lf.flags |= V4L2_VP8_LF_ADJ_ENABLE;
+
+    if (s->lf_delta.update)
+        ctrl->lf.flags |= V4L2_VP8_LF_DELTA_UPDATE;
+
+    if (s->filter.simple)
+        ctrl->lf.flags |= V4L2_VP8_LF_FILTER_TYPE_SIMPLE;
+
+    if (s->keyframe) {
+        static const uint8_t keyframe_y_mode_probs[4] = {
+            145, 156, 163, 128
+        };
+        static const uint8_t keyframe_uv_mode_probs[3] = {
+            142, 114, 183
+        };
+
+        memcpy(ctrl->entropy.y_mode_probs, keyframe_y_mode_probs, 4);
+        memcpy(ctrl->entropy.uv_mode_probs, keyframe_uv_mode_probs, 3);
+    } else {
+        for (i = 0; i < 4; i++)
+            ctrl->entropy.y_mode_probs[i] = s->prob->pred16x16[i];
+        for (i = 0; i < 3; i++)
+            ctrl->entropy.uv_mode_probs[i] = s->prob->pred8x8c[i];
+    }
+    for (i = 0; i < 2; i++)
+        for (j = 0; j < 19; j++)
+            ctrl->entropy.mv_probs[i][j] = s->prob->mvc[i][j];
+
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 8; j++) {
+            static const int coeff_bands_inverse[8] = {
+                0, 1, 2, 3, 5, 6, 4, 15
+            };
+            int coeff_pos = coeff_bands_inverse[j];
+
+            for (k = 0; k < 3; k++) {
+                memcpy(ctrl->entropy.coeff_probs[i][j][k],
+                       s->prob->token[i][coeff_pos][k], 11);
+            }
+        }
+    }
+
+    for (i = 0; i < 8; i++)
+        ctrl->dct_part_sizes[i] = s->coeff_partition_size[i];
+
+    if (s->framep[VP8_FRAME_PREVIOUS])
+        ctrl->last_frame_ts =
+            ff_v4l2_request_get_capture_timestamp(s->framep[VP8_FRAME_PREVIOUS]->tf.f);
+    if (s->framep[VP8_FRAME_GOLDEN])
+        ctrl->golden_frame_ts =
+            ff_v4l2_request_get_capture_timestamp(s->framep[VP8_FRAME_GOLDEN]->tf.f);
+    if (s->framep[VP8_FRAME_ALTREF])
+        ctrl->alt_frame_ts =
+            ff_v4l2_request_get_capture_timestamp(s->framep[VP8_FRAME_ALTREF]->tf.f);
+
+    if (s->keyframe)
+        ctrl->flags |= V4L2_VP8_FRAME_FLAG_KEY_FRAME;
+
+    if (s->profile & 0x4)
+        ctrl->flags |= V4L2_VP8_FRAME_FLAG_EXPERIMENTAL;
+
+    if (!s->invisible)
+        ctrl->flags |= V4L2_VP8_FRAME_FLAG_SHOW_FRAME;
+
+    if (s->mbskip_enabled)
+        ctrl->flags |= V4L2_VP8_FRAME_FLAG_MB_NO_SKIP_COEFF;
+
+    if (s->sign_bias[VP8_FRAME_GOLDEN])
+        ctrl->flags |= V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN;
+
+    if (s->sign_bias[VP8_FRAME_ALTREF])
+        ctrl->flags |= V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT;
+
+    return 0;
+}
+
+static int v4l2_request_vp8_decode_slice(AVCodecContext *avctx,
+                                         const uint8_t *buffer, uint32_t size)
+{
+    const VP8Context *s = avctx->priv_data;
+    V4L2RequestControlsVP8 *controls = s->framep[VP8_FRAME_CURRENT]->hwaccel_picture_private;
+
+    return ff_v4l2_request_append_output(avctx, &controls->pic, buffer, size);
+}
+
+static int v4l2_request_vp8_end_frame(AVCodecContext *avctx)
+{
+    const VP8Context *s = avctx->priv_data;
+    V4L2RequestControlsVP8 *controls = s->framep[VP8_FRAME_CURRENT]->hwaccel_picture_private;
+
+    struct v4l2_ext_control control[] = {
+        {
+            .id = V4L2_CID_STATELESS_VP8_FRAME,
+            .ptr = &controls->frame,
+            .size = sizeof(controls->frame),
+        },
+    };
+
+    return ff_v4l2_request_decode_frame(avctx, &controls->pic,
+                                        control, FF_ARRAY_ELEMS(control));
+}
+
+static int v4l2_request_vp8_init(AVCodecContext *avctx)
+{
+    return ff_v4l2_request_init(avctx, NULL, 0, NULL);
+}
+
+static int v4l2_request_vp8_frame_params(AVCodecContext *avctx,
+                                         AVBufferRef *hw_frames_ctx)
+{
+    return ff_v4l2_request_frame_params(avctx, hw_frames_ctx,
+                                        V4L2_PIX_FMT_VP8_FRAME, 8);
+}
+
+const FFHWAccel ff_vp8_v4l2request_hwaccel = {
+    .p.name             = "vp8_v4l2request",
+    .p.type             = AVMEDIA_TYPE_VIDEO,
+    .p.id               = AV_CODEC_ID_VP8,
+    .p.pix_fmt          = AV_PIX_FMT_DRM_PRIME,
+    .start_frame        = v4l2_request_vp8_start_frame,
+    .decode_slice       = v4l2_request_vp8_decode_slice,
+    .end_frame          = v4l2_request_vp8_end_frame,
+    .flush              = ff_v4l2_request_flush,
+    .frame_priv_data_size = sizeof(V4L2RequestControlsVP8),
+    .init               = v4l2_request_vp8_init,
+    .uninit             = ff_v4l2_request_uninit,
+    .priv_data_size     = sizeof(V4L2RequestContext),
+    .frame_params       = v4l2_request_vp8_frame_params,
+    .caps_internal      = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/libavcodec/v4l2_request_vp9.c b/libavcodec/v4l2_request_vp9.c
new file mode 100644
index 0000000000..e93937daf2
--- /dev/null
+++ b/libavcodec/v4l2_request_vp9.c
@@ -0,0 +1,530 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "hwaccel_internal.h"
+#include "hwconfig.h"
+#include "internal.h"
+#include "v4l2_request.h"
+#include "vp89_rac.h"
+#include "vp9dec.h"
+
+#define V4L2_VP9_CONTROLS_MAX 2
+
+typedef struct V4L2RequestContextVP9 {
+    V4L2RequestContext base;
+    bool has_compressed_hdr;
+} V4L2RequestContextVP9;
+
+typedef struct V4L2RequestControlsVP9 {
+    V4L2RequestPictureContext pic;
+    struct v4l2_ctrl_vp9_frame frame;
+    struct v4l2_ctrl_vp9_compressed_hdr compressed_hdr;
+} V4L2RequestControlsVP9;
+
+// differential forward probability updates
+static int read_prob_delta(VPXRangeCoder *c)
+{
+    static const uint8_t inv_map_table[255] = {
+          7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176,
+        189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,
+         10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,
+         25,  26,  27,  28,  29,  30,  31,  32,  34,  35,  36,  37,  38,  39,
+         40,  41,  42,  43,  44,  45,  47,  48,  49,  50,  51,  52,  53,  54,
+         55,  56,  57,  58,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
+         70,  71,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
+         86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  99, 100,
+        101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
+        116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
+        131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
+        146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
+        161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+        177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
+        192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
+        207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
+        222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
+        237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
+        252, 253, 253,
+    };
+    int d;
+
+    /* This code is trying to do a differential probability update. For a
+     * current probability A in the range [1, 255], the difference to a new
+     * probability of any value can be expressed differentially as 1-A, 255-A
+     * where some part of this (absolute range) exists both in positive as
+     * well as the negative part, whereas another part only exists in one
+     * half. We're trying to code this shared part differentially, i.e.
+     * times two where the value of the lowest bit specifies the sign, and
+     * the single part is then coded on top of this. This absolute difference
+     * then again has a value of [0, 254], but a bigger value in this range
+     * indicates that we're further away from the original value A, so we
+     * can code this as a VLC code, since higher values are increasingly
+     * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
+     * updates vs. the 'fine, exact' updates further down the range, which
+     * adds one extra dimension to this differential update model. */
+
+    if (!vp89_rac_get(c)) {
+        d = vp89_rac_get_uint(c, 4) + 0;
+    } else if (!vp89_rac_get(c)) {
+        d = vp89_rac_get_uint(c, 4) + 16;
+    } else if (!vp89_rac_get(c)) {
+        d = vp89_rac_get_uint(c, 5) + 32;
+    } else {
+        d = vp89_rac_get_uint(c, 7);
+        if (d >= 65)
+            d = (d << 1) - 65 + vp89_rac_get(c);
+        d += 64;
+        av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
+    }
+
+    return inv_map_table[d];
+}
+
+static void fill_compressed_hdr(struct v4l2_ctrl_vp9_compressed_hdr *ctrl,
+                                const uint8_t *buffer, uint32_t size,
+                                AVCodecContext *avctx)
+{
+    const VP9Context *s = avctx->priv_data;
+    enum CompPredMode comppredmode;
+    int ret, i, j, k, l, m, n;
+    VPXRangeCoder c;
+
+    ret = ff_vpx_init_range_decoder(&c, buffer + s->s.h.uncompressed_header_size,
+                                    s->s.h.compressed_header_size);
+    if (ret < 0)
+        return;
+
+    if (vpx_rac_get_prob_branchy(&c, 128)) // marker bit
+        return;
+
+    // txfm updates
+    if (s->s.h.lossless) {
+        ctrl->tx_mode = V4L2_VP9_TX_MODE_ONLY_4X4;
+    } else {
+        ctrl->tx_mode = vp89_rac_get_uint(&c, 2);
+        if (ctrl->tx_mode == V4L2_VP9_TX_MODE_ALLOW_32X32)
+            ctrl->tx_mode += vp89_rac_get(&c);
+
+        if (ctrl->tx_mode == V4L2_VP9_TX_MODE_SELECT) {
+            for (i = 0; i < 2; i++)
+                if (vpx_rac_get_prob_branchy(&c, 252))
+                    ctrl->tx8[i][0] = read_prob_delta(&c);
+            for (i = 0; i < 2; i++)
+                for (j = 0; j < 2; j++)
+                    if (vpx_rac_get_prob_branchy(&c, 252))
+                        ctrl->tx16[i][j] = read_prob_delta(&c);
+            for (i = 0; i < 2; i++)
+                for (j = 0; j < 3; j++)
+                    if (vpx_rac_get_prob_branchy(&c, 252))
+                        ctrl->tx32[i][j] = read_prob_delta(&c);
+        }
+    }
+
+    // coef updates
+    for (i = 0; i < 4; i++) {
+        if (vp89_rac_get(&c)) {
+            for (j = 0; j < 2; j++)
+                for (k = 0; k < 2; k++)
+                    for (l = 0; l < 6; l++)
+                        for (m = 0; m < 6; m++) {
+                            if (m >= 3 && l == 0) // dc only has 3 pt
+                                break;
+                            for (n = 0; n < 3; n++)
+                                if (vpx_rac_get_prob_branchy(&c, 252))
+                                    ctrl->coef[i][j][k][l][m][n] =
+                                                            read_prob_delta(&c);
+                        }
+        }
+        if (ctrl->tx_mode == i)
+            break;
+    }
+
+    // mode updates
+    for (i = 0; i < 3; i++)
+        if (vpx_rac_get_prob_branchy(&c, 252))
+            ctrl->skip[i] = read_prob_delta(&c);
+    if (!s->s.h.keyframe && !s->s.h.intraonly) {
+        for (i = 0; i < 7; i++)
+            for (j = 0; j < 3; j++)
+                if (vpx_rac_get_prob_branchy(&c, 252))
+                    ctrl->inter_mode[i][j] = read_prob_delta(&c);
+
+        if (s->s.h.filtermode == FILTER_SWITCHABLE)
+            for (i = 0; i < 4; i++)
+                for (j = 0; j < 2; j++)
+                    if (vpx_rac_get_prob_branchy(&c, 252))
+                        ctrl->interp_filter[i][j] = read_prob_delta(&c);
+
+        for (i = 0; i < 4; i++)
+            if (vpx_rac_get_prob_branchy(&c, 252))
+                ctrl->is_inter[i] = read_prob_delta(&c);
+
+        if (s->s.h.allowcompinter) {
+            comppredmode = vp89_rac_get(&c);
+            if (comppredmode)
+                comppredmode += vp89_rac_get(&c);
+            if (comppredmode == PRED_SWITCHABLE)
+                for (i = 0; i < 5; i++)
+                    if (vpx_rac_get_prob_branchy(&c, 252))
+                        ctrl->comp_mode[i] = read_prob_delta(&c);
+        } else {
+            comppredmode = PRED_SINGLEREF;
+        }
+
+        if (comppredmode != PRED_COMPREF) {
+            for (i = 0; i < 5; i++) {
+                if (vpx_rac_get_prob_branchy(&c, 252))
+                    ctrl->single_ref[i][0] = read_prob_delta(&c);
+                if (vpx_rac_get_prob_branchy(&c, 252))
+                    ctrl->single_ref[i][1] = read_prob_delta(&c);
+            }
+        }
+
+        if (comppredmode != PRED_SINGLEREF) {
+            for (i = 0; i < 5; i++)
+                if (vpx_rac_get_prob_branchy(&c, 252))
+                    ctrl->comp_ref[i] = read_prob_delta(&c);
+        }
+
+        for (i = 0; i < 4; i++)
+            for (j = 0; j < 9; j++)
+                if (vpx_rac_get_prob_branchy(&c, 252))
+                    ctrl->y_mode[i][j] = read_prob_delta(&c);
+
+        for (i = 0; i < 4; i++)
+            for (j = 0; j < 4; j++)
+                for (k = 0; k < 3; k++)
+                    if (vpx_rac_get_prob_branchy(&c, 252))
+                        ctrl->partition[(i * 4) + j][k] = read_prob_delta(&c);
+
+        // mv fields
+        for (i = 0; i < 3; i++)
+            if (vpx_rac_get_prob_branchy(&c, 252))
+                ctrl->mv.joint[i] = (vp89_rac_get_uint(&c, 7) << 1) | 1;
+
+        for (i = 0; i < 2; i++) {
+            if (vpx_rac_get_prob_branchy(&c, 252))
+                ctrl->mv.sign[i] = (vp89_rac_get_uint(&c, 7) << 1) | 1;
+
+            for (j = 0; j < 10; j++)
+                if (vpx_rac_get_prob_branchy(&c, 252))
+                    ctrl->mv.classes[i][j] = (vp89_rac_get_uint(&c, 7) << 1) | 1;
+
+            if (vpx_rac_get_prob_branchy(&c, 252))
+                ctrl->mv.class0_bit[i] = (vp89_rac_get_uint(&c, 7) << 1) | 1;
+
+            for (j = 0; j < 10; j++)
+                if (vpx_rac_get_prob_branchy(&c, 252))
+                    ctrl->mv.bits[i][j] = (vp89_rac_get_uint(&c, 7) << 1) | 1;
+        }
+
+        for (i = 0; i < 2; i++) {
+            for (j = 0; j < 2; j++)
+                for (k = 0; k < 3; k++)
+                    if (vpx_rac_get_prob_branchy(&c, 252))
+                        ctrl->mv.class0_fr[i][j][k] =
+                                            (vp89_rac_get_uint(&c, 7) << 1) | 1;
+
+            for (j = 0; j < 3; j++)
+                if (vpx_rac_get_prob_branchy(&c, 252))
+                    ctrl->mv.fr[i][j] = (vp89_rac_get_uint(&c, 7) << 1) | 1;
+        }
+
+        if (s->s.h.highprecisionmvs) {
+            for (i = 0; i < 2; i++) {
+                if (vpx_rac_get_prob_branchy(&c, 252))
+                    ctrl->mv.class0_hp[i] = (vp89_rac_get_uint(&c, 7) << 1) | 1;
+
+                if (vpx_rac_get_prob_branchy(&c, 252))
+                    ctrl->mv.hp[i] = (vp89_rac_get_uint(&c, 7) << 1) | 1;
+            }
+        }
+    }
+
+    memcpy(ctrl->uv_mode, s->prob.p.uv_mode, sizeof(ctrl->uv_mode));
+}
+
+static void fill_frame(struct v4l2_ctrl_vp9_frame *ctrl, AVCodecContext *avctx)
+{
+    const VP9Context *s = avctx->priv_data;
+    AVFrame *ref;
+    int i;
+
+    *ctrl = (struct v4l2_ctrl_vp9_frame) {
+        .lf = {
+            .level = s->s.h.filter.level,
+            .sharpness = s->s.h.filter.sharpness,
+        },
+
+        .quant = {
+            .base_q_idx = s->s.h.yac_qi,
+            .delta_q_y_dc = s->s.h.ydc_qdelta,
+            .delta_q_uv_dc = s->s.h.uvdc_qdelta,
+            .delta_q_uv_ac = s->s.h.uvac_qdelta,
+        },
+
+        .compressed_header_size = s->s.h.compressed_header_size,
+        .uncompressed_header_size = s->s.h.uncompressed_header_size,
+        .frame_width_minus_1 = avctx->width - 1,
+        .frame_height_minus_1 = avctx->height - 1,
+        .render_width_minus_1 = s->w - 1,
+        .render_height_minus_1 = s->h - 1,
+        .reset_frame_context = s->s.h.resetctx > 0 ? s->s.h.resetctx - 1 : 0,
+        .frame_context_idx = s->s.h.framectxid,
+        .profile = s->s.h.profile,
+        .bit_depth = s->s.h.bpp,
+        .interpolation_filter = s->s.h.filtermode ^ (s->s.h.filtermode <= 1),
+        .tile_cols_log2 = s->s.h.tiling.log2_tile_cols,
+        .tile_rows_log2 = s->s.h.tiling.log2_tile_rows,
+        .reference_mode = s->s.h.comppredmode,
+    };
+
+    for (i = 0; i < 4; i++)
+        ctrl->lf.ref_deltas[i] = s->s.h.lf_delta.ref[i];
+
+    for (i = 0; i < 2; i++)
+        ctrl->lf.mode_deltas[i] = s->s.h.lf_delta.mode[i];
+
+    if (s->s.h.lf_delta.enabled)
+        ctrl->lf.flags |= V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED;
+
+    if (s->s.h.lf_delta.updated)
+        ctrl->lf.flags |= V4L2_VP9_LOOP_FILTER_FLAG_DELTA_UPDATE;
+
+    for (i = 0; i < 8; i++) {
+        if (s->s.h.segmentation.feat[i].q_enabled) {
+            ctrl->seg.feature_data[i][V4L2_VP9_SEG_LVL_ALT_Q] =
+                                            s->s.h.segmentation.feat[i].q_val;
+            ctrl->seg.feature_enabled[i] |= 1 << V4L2_VP9_SEG_LVL_ALT_Q;
+        }
+
+        if (s->s.h.segmentation.feat[i].lf_enabled) {
+            ctrl->seg.feature_data[i][V4L2_VP9_SEG_LVL_ALT_L] =
+                                            s->s.h.segmentation.feat[i].lf_val;
+            ctrl->seg.feature_enabled[i] |= 1 << V4L2_VP9_SEG_LVL_ALT_L;
+        }
+
+        if (s->s.h.segmentation.feat[i].ref_enabled) {
+            ctrl->seg.feature_data[i][V4L2_VP9_SEG_LVL_REF_FRAME] =
+                                            s->s.h.segmentation.feat[i].ref_val;
+            ctrl->seg.feature_enabled[i] |= 1 << V4L2_VP9_SEG_LVL_REF_FRAME;
+        }
+
+        if (s->s.h.segmentation.feat[i].skip_enabled)
+            ctrl->seg.feature_enabled[i] |= 1 << V4L2_VP9_SEG_LVL_SKIP;
+    }
+
+    for (i = 0; i < 7; i++)
+        ctrl->seg.tree_probs[i] = s->s.h.segmentation.prob[i];
+
+    if (s->s.h.segmentation.temporal) {
+        for (i = 0; i < 3; i++)
+            ctrl->seg.pred_probs[i] = s->s.h.segmentation.pred_prob[i];
+    } else {
+        memset(ctrl->seg.pred_probs, 255, sizeof(ctrl->seg.pred_probs));
+    }
+
+    if (s->s.h.segmentation.enabled)
+        ctrl->seg.flags |= V4L2_VP9_SEGMENTATION_FLAG_ENABLED;
+
+    if (s->s.h.segmentation.update_map)
+        ctrl->seg.flags |= V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP;
+
+    if (s->s.h.segmentation.temporal)
+        ctrl->seg.flags |= V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE;
+
+    if (s->frame_header->segmentation_update_data)
+        ctrl->seg.flags |= V4L2_VP9_SEGMENTATION_FLAG_UPDATE_DATA;
+
+    if (s->s.h.segmentation.absolute_vals)
+        ctrl->seg.flags |= V4L2_VP9_SEGMENTATION_FLAG_ABS_OR_DELTA_UPDATE;
+
+    if (s->s.h.keyframe)
+        ctrl->flags |= V4L2_VP9_FRAME_FLAG_KEY_FRAME;
+
+    if (!s->s.h.invisible)
+        ctrl->flags |= V4L2_VP9_FRAME_FLAG_SHOW_FRAME;
+
+    if (s->s.h.errorres)
+        ctrl->flags |= V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT;
+
+    if (s->s.h.intraonly)
+        ctrl->flags |= V4L2_VP9_FRAME_FLAG_INTRA_ONLY;
+
+    if (!s->s.h.keyframe && s->s.h.highprecisionmvs)
+        ctrl->flags |= V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV;
+
+    if (s->s.h.refreshctx)
+        ctrl->flags |= V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX;
+
+    if (s->s.h.parallelmode)
+        ctrl->flags |= V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE;
+
+    if (s->ss_h)
+        ctrl->flags |= V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING;
+
+    if (s->ss_v)
+        ctrl->flags |= V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING;
+
+    if (avctx->color_range == AVCOL_RANGE_JPEG)
+        ctrl->flags |= V4L2_VP9_FRAME_FLAG_COLOR_RANGE_FULL_SWING;
+
+    ref = s->s.refs[s->s.h.refidx[0]].f;
+    if (ref && ref->private_ref)
+        ctrl->last_frame_ts = ff_v4l2_request_get_capture_timestamp(ref);
+
+    ref = s->s.refs[s->s.h.refidx[1]].f;
+    if (ref && ref->private_ref)
+        ctrl->golden_frame_ts = ff_v4l2_request_get_capture_timestamp(ref);
+
+    ref = s->s.refs[s->s.h.refidx[2]].f;
+    if (ref && ref->private_ref)
+        ctrl->alt_frame_ts = ff_v4l2_request_get_capture_timestamp(ref);
+
+    if (s->s.h.signbias[0])
+        ctrl->ref_frame_sign_bias |= V4L2_VP9_SIGN_BIAS_LAST;
+
+    if (s->s.h.signbias[1])
+        ctrl->ref_frame_sign_bias |= V4L2_VP9_SIGN_BIAS_GOLDEN;
+
+    if (s->s.h.signbias[2])
+        ctrl->ref_frame_sign_bias |= V4L2_VP9_SIGN_BIAS_ALT;
+}
+
+static int v4l2_request_vp9_start_frame(AVCodecContext *avctx,
+                                        av_unused const AVBufferRef *buf_ref,
+                                        const uint8_t *buffer,
+                                        uint32_t size)
+{
+    const VP9SharedContext *h = avctx->priv_data;
+    const VP9Frame *f = &h->frames[CUR_FRAME];
+    V4L2RequestContextVP9 *ctx = avctx->internal->hwaccel_priv_data;
+    V4L2RequestControlsVP9 *controls = f->hwaccel_picture_private;
+    int ret;
+
+    ret = ff_v4l2_request_start_frame(avctx, &controls->pic, f->tf.f);
+    if (ret)
+        return ret;
+
+    fill_frame(&controls->frame, avctx);
+
+    if (ctx->has_compressed_hdr)
+        fill_compressed_hdr(&controls->compressed_hdr, buffer, size, avctx);
+
+    return 0;
+}
+
+static int v4l2_request_vp9_decode_slice(AVCodecContext *avctx,
+                                         const uint8_t *buffer, uint32_t size)
+{
+    const VP9SharedContext *h = avctx->priv_data;
+    V4L2RequestControlsVP9 *controls = h->frames[CUR_FRAME].hwaccel_picture_private;
+
+    return ff_v4l2_request_append_output(avctx, &controls->pic, buffer, size);
+}
+
+static int v4l2_request_vp9_end_frame(AVCodecContext *avctx)
+{
+    const VP9SharedContext *h = avctx->priv_data;
+    V4L2RequestContextVP9 *ctx = avctx->internal->hwaccel_priv_data;
+    V4L2RequestControlsVP9 *controls = h->frames[CUR_FRAME].hwaccel_picture_private;
+    int count = 0;
+
+    struct v4l2_ext_control control[V4L2_VP9_CONTROLS_MAX] = {};
+
+    control[count++] = (struct v4l2_ext_control) {
+        .id = V4L2_CID_STATELESS_VP9_FRAME,
+        .ptr = &controls->frame,
+        .size = sizeof(controls->frame),
+    };
+
+    if (ctx->has_compressed_hdr) {
+        control[count++] = (struct v4l2_ext_control) {
+            .id = V4L2_CID_STATELESS_VP9_COMPRESSED_HDR,
+            .ptr = &controls->compressed_hdr,
+            .size = sizeof(controls->compressed_hdr),
+        };
+    }
+
+    return ff_v4l2_request_decode_frame(avctx, &controls->pic, control, count);
+}
+
+static int v4l2_request_vp9_post_frames_ctx(AVCodecContext *avctx)
+{
+    V4L2RequestContextVP9 *ctx = avctx->internal->hwaccel_priv_data;
+
+    struct v4l2_query_ext_ctrl compressed_hdr = {
+        .id = V4L2_CID_STATELESS_VP9_COMPRESSED_HDR,
+    };
+
+    // TODO: check V4L2_CID_MPEG_VIDEO_VP9_PROFILE
+    // TODO: check V4L2_CID_MPEG_VIDEO_VP9_LEVEL
+
+    if (!ff_v4l2_request_query_control(avctx, &compressed_hdr))
+        ctx->has_compressed_hdr = true;
+    else
+        ctx->has_compressed_hdr = false;
+
+    return 0;
+}
+
+static int v4l2_request_vp9_init(AVCodecContext *avctx)
+{
+    struct v4l2_ctrl_vp9_frame frame;
+
+    struct v4l2_ext_control control[] = {
+        {
+            .id = V4L2_CID_STATELESS_VP9_FRAME,
+            .ptr = &frame,
+            .size = sizeof(frame),
+        },
+    };
+
+    fill_frame(&frame, avctx);
+
+    return ff_v4l2_request_init(avctx, control, FF_ARRAY_ELEMS(control),
+                                v4l2_request_vp9_post_frames_ctx);
+}
+
+static int v4l2_request_vp9_frame_params(AVCodecContext *avctx,
+                                         AVBufferRef *hw_frames_ctx)
+{
+    const VP9Context *s = avctx->priv_data;
+    uint8_t bit_depth = s ? s->s.h.bpp : 0;
+
+    return ff_v4l2_request_frame_params(avctx, hw_frames_ctx,
+                                        V4L2_PIX_FMT_VP9_FRAME, bit_depth);
+}
+
+const FFHWAccel ff_vp9_v4l2request_hwaccel = {
+    .p.name             = "vp9_v4l2request",
+    .p.type             = AVMEDIA_TYPE_VIDEO,
+    .p.id               = AV_CODEC_ID_VP9,
+    .p.pix_fmt          = AV_PIX_FMT_DRM_PRIME,
+    .start_frame        = v4l2_request_vp9_start_frame,
+    .decode_slice       = v4l2_request_vp9_decode_slice,
+    .end_frame          = v4l2_request_vp9_end_frame,
+    .flush              = ff_v4l2_request_flush,
+    .frame_priv_data_size = sizeof(V4L2RequestControlsVP9),
+    .init               = v4l2_request_vp9_init,
+    .uninit             = ff_v4l2_request_uninit,
+    .priv_data_size     = sizeof(V4L2RequestContextVP9),
+    .frame_params       = v4l2_request_vp9_frame_params,
+    .caps_internal      = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 9010e19e6b..8a1e3425ca 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -184,6 +184,9 @@ static enum AVPixelFormat get_pixel_format(VP8Context *s)
 #endif
 #if CONFIG_VP8_NVDEC_HWACCEL
         AV_PIX_FMT_CUDA,
+#endif
+#if CONFIG_VP8_V4L2REQUEST_HWACCEL
+        AV_PIX_FMT_DRM_PRIME,
 #endif
         AV_PIX_FMT_YUV420P,
         AV_PIX_FMT_NONE,
@@ -2975,6 +2978,9 @@ const FFCodec ff_vp8_decoder = {
 #endif
 #if CONFIG_VP8_NVDEC_HWACCEL
                                HWACCEL_NVDEC(vp8),
+#endif
+#if CONFIG_VP8_V4L2REQUEST_HWACCEL
+                               HWACCEL_V4L2REQUEST(vp8),
 #endif
                                NULL
                            },
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 1f34197a28..fd2c1c43a0 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -168,6 +168,7 @@ static int update_size(AVCodecContext *avctx, int w, int h)
                      CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
                      CONFIG_VP9_D3D12VA_HWACCEL + \
                      CONFIG_VP9_NVDEC_HWACCEL + \
+                     CONFIG_VP9_V4L2REQUEST_HWACCEL + \
                      CONFIG_VP9_VAAPI_HWACCEL + \
                      CONFIG_VP9_VDPAU_HWACCEL + \
                      CONFIG_VP9_VIDEOTOOLBOX_HWACCEL + \
@@ -213,6 +214,9 @@ static int update_size(AVCodecContext *avctx, int w, int h)
 #endif
 #if CONFIG_VP9_VULKAN_HWACCEL
             *fmtp++ = AV_PIX_FMT_VULKAN;
+#endif
+#if CONFIG_VP9_V4L2REQUEST_HWACCEL
+            *fmtp++ = AV_PIX_FMT_DRM_PRIME;
 #endif
             break;
         case AV_PIX_FMT_YUV420P12:
@@ -1959,6 +1963,9 @@ const FFCodec ff_vp9_decoder = {
 #endif
 #if CONFIG_VP9_VULKAN_HWACCEL
                                HWACCEL_VULKAN(vp9),
+#endif
+#if CONFIG_VP9_V4L2REQUEST_HWACCEL
+                               HWACCEL_V4L2REQUEST(vp9),
 #endif
                                NULL
                            },
diff --git a/libavcodec/weak_link.c b/libavcodec/weak_link.c
new file mode 100644
index 0000000000..71f6cc3567
--- /dev/null
+++ b/libavcodec/weak_link.c
@@ -0,0 +1,127 @@
+/*
+    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use, copy,
+    modify, merge, publish, distribute, sublicense, and/or sell copies
+    of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include "weak_link.h"
+
+struct ff_weak_link_master {
+    atomic_int ref_count;    /* 0 is single ref for easier atomics */
+    pthread_rwlock_t lock;
+    void * ptr;
+};
+
+static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c)
+{
+    return (struct ff_weak_link_master *)c;
+}
+
+struct ff_weak_link_master * ff_weak_link_new(void * p)
+{
+    struct ff_weak_link_master * w = malloc(sizeof(*w));
+    if (!w)
+        return NULL;
+    atomic_init(&w->ref_count, 0);
+    w->ptr = p;
+    if (pthread_rwlock_init(&w->lock, NULL)) {
+        free(w);
+        return NULL;
+    }
+    return w;
+}
+
+static void weak_link_do_unref(struct ff_weak_link_master * const w)
+{
+    int n = atomic_fetch_sub(&w->ref_count, 1);
+    if (n)
+        return;
+
+    pthread_rwlock_destroy(&w->lock);
+    free(w);
+}
+
+// Unref & break link
+void ff_weak_link_break(struct ff_weak_link_master ** ppLink)
+{
+    struct ff_weak_link_master * const w = *ppLink;
+    if (!w)
+        return;
+
+    *ppLink = NULL;
+    pthread_rwlock_wrlock(&w->lock);
+    w->ptr = NULL;
+    pthread_rwlock_unlock(&w->lock);
+
+    weak_link_do_unref(w);
+}
+
+struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w)
+{
+    if (!w)
+        return NULL;
+    atomic_fetch_add(&w->ref_count, 1);
+    return (struct ff_weak_link_client*)w;
+}
+
+void ff_weak_link_unref(struct ff_weak_link_client ** ppLink)
+{
+    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
+    if (!w)
+        return;
+
+    *ppLink = NULL;
+    weak_link_do_unref(w);
+}
+
+void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink)
+{
+    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
+
+    if (!w)
+        return NULL;
+
+    if (pthread_rwlock_rdlock(&w->lock))
+        goto broken;
+
+    if (w->ptr)
+        return w->ptr;
+
+    pthread_rwlock_unlock(&w->lock);
+
+broken:
+    *ppLink = NULL;
+    weak_link_do_unref(w);
+    return NULL;
+}
+
+// Ignores a NULL c (so can be on the return path of both broken & live links)
+void ff_weak_link_unlock(struct ff_weak_link_client * c)
+{
+    struct ff_weak_link_master * const w = weak_link_x(c);
+    if (w)
+        pthread_rwlock_unlock(&w->lock);
+}
+
+
diff --git a/libavcodec/weak_link.h b/libavcodec/weak_link.h
new file mode 100644
index 0000000000..5c66b29f9b
--- /dev/null
+++ b/libavcodec/weak_link.h
@@ -0,0 +1,46 @@
+/*
+    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without
+    restriction, including without limitation the rights to use, copy,
+    modify, merge, publish, distribute, sublicense, and/or sell copies
+    of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AVCODEC_WEAK_LINK_H
+#define AVCODEC_WEAK_LINK_H
+
+struct ff_weak_link_master;
+struct ff_weak_link_client;
+
+struct ff_weak_link_master * ff_weak_link_new(void * p);
+void ff_weak_link_break(struct ff_weak_link_master ** ppLink);
+
+struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w);
+void ff_weak_link_unref(struct ff_weak_link_client ** ppLink);
+
+// Returns NULL if link broken - in this case it will also zap
+//   *ppLink and unref the weak_link.
+// Returns NULL if *ppLink is NULL (so a link once broken stays broken)
+//
+// The above does mean that there is a race if this is called simultainiously
+// by two threads using the same weak_link_client (so don't do that)
+void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink);
+void ff_weak_link_unlock(struct ff_weak_link_client * c);
+
+#endif
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index a530cfae29..830becf8d4 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -279,6 +279,7 @@ OBJS-$(CONFIG_DEFLICKER_FILTER)              += vf_deflicker.o
 OBJS-$(CONFIG_DEINTERLACE_D3D12_FILTER)      += vf_deinterlace_d3d12.o
 OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER)        += vf_vpp_qsv.o
 OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER)      += vf_deinterlace_vaapi.o vaapi_vpp.o
+OBJS-$(CONFIG_DEINTERLACE_V4L2M2M_FILTER)    += vf_deinterlace_v4l2m2m.o
 OBJS-$(CONFIG_DEJUDDER_FILTER)               += vf_dejudder.o
 OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
 OBJS-$(CONFIG_DENOISE_VAAPI_FILTER)          += vf_misc_vaapi.o vaapi_vpp.o
@@ -553,6 +554,7 @@ OBJS-$(CONFIG_TRANSPOSE_VT_FILTER)           += vf_transpose_vt.o
 OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER)       += vf_transpose_vulkan.o vulkan.o vulkan_filter.o
 OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
+OBJS-$(CONFIG_UNSAND_FILTER)                 += vf_unsand.o
 OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
 OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER)         += vf_unsharp_opencl.o opencl.o \
                                                 opencl/unsharp.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index e26859e159..53f8979db0 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -253,6 +253,7 @@ extern const FFFilter ff_vf_deflate;
 extern const FFFilter ff_vf_deflicker;
 extern const FFFilter ff_vf_deinterlace_qsv;
 extern const FFFilter ff_vf_deinterlace_d3d12;
+extern const FFFilter ff_vf_deinterlace_v4l2m2m;
 extern const FFFilter ff_vf_deinterlace_vaapi;
 extern const FFFilter ff_vf_dejudder;
 extern const FFFilter ff_vf_delogo;
@@ -448,6 +449,7 @@ extern const FFFilter ff_vf_scale_d3d11;
 extern const FFFilter ff_vf_scale_d3d12;
 extern const FFFilter ff_vf_scale_npp;
 extern const FFFilter ff_vf_scale_qsv;
+extern const FFFilter ff_vf_scale_v4l2m2m;
 extern const FFFilter ff_vf_scale_vaapi;
 extern const FFFilter ff_vf_scale_vt;
 extern const FFFilter ff_vf_scale_vulkan;
@@ -520,6 +522,7 @@ extern const FFFilter ff_vf_transpose_vt;
 extern const FFFilter ff_vf_transpose_vulkan;
 extern const FFFilter ff_vf_trim;
 extern const FFFilter ff_vf_unpremultiply;
+extern const FFFilter ff_vf_unsand;
 extern const FFFilter ff_vf_unsharp;
 extern const FFFilter ff_vf_unsharp_opencl;
 extern const FFFilter ff_vf_untile;
diff --git a/libavfilter/buffersink.c b/libavfilter/buffersink.c
index 75b93cee30..72ca351774 100644
--- a/libavfilter/buffersink.c
+++ b/libavfilter/buffersink.c
@@ -89,6 +89,11 @@ typedef struct BufferSinkContext {
     unsigned          nb_channel_layouts;
 
     AVFrame *peeked_frame;
+
+    union {
+        av_buffersink_alloc_video_frame * video;
+    } alloc_cb;
+    void * alloc_v;
 } BufferSinkContext;
 
 int attribute_align_arg av_buffersink_get_frame(AVFilterContext *ctx, AVFrame *frame)
@@ -164,6 +169,22 @@ int attribute_align_arg av_buffersink_get_samples(AVFilterContext *ctx,
     return get_frame_internal(ctx, frame, 0, nb_samples);
 }
 
+static AVFrame * alloc_video_buffer(AVFilterLink *link, int w, int h)
+{
+    AVFilterContext * const ctx = link->dst;
+    BufferSinkContext * const bs = ctx->priv;
+    return bs->alloc_cb.video ? bs->alloc_cb.video(ctx, bs->alloc_v, w, h) :
+        ff_default_get_video_buffer(link, w, h);
+}
+
+int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v)
+{
+    BufferSinkContext * const bs = ctx->priv;
+    bs->alloc_cb.video = cb;
+    bs->alloc_v = v;
+    return 0;
+}
+
 static av_cold int common_init(AVFilterContext *ctx)
 {
     BufferSinkContext *buf = ctx->priv;
@@ -565,6 +586,14 @@ static const AVOption abuffersink_options[] = {
 AVFILTER_DEFINE_CLASS(buffersink);
 AVFILTER_DEFINE_CLASS(abuffersink);
 
+static const AVFilterPad avfilter_vsink_buffer_inputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .get_buffer = {.video = alloc_video_buffer},
+    },
+};
+
 const FFFilter ff_vsink_buffer = {
     .p.name        = "buffersink",
     .p.description = NULL_IF_CONFIG_SMALL("Buffer video frames, and make them available to the end of the filter graph."),
@@ -574,7 +603,7 @@ const FFFilter ff_vsink_buffer = {
     .init          = init_video,
     .uninit        = uninit,
     .activate      = activate,
-    FILTER_INPUTS(ff_video_default_filterpad),
+    FILTER_INPUTS(avfilter_vsink_buffer_inputs),
     FILTER_QUERY_FUNC2(vsink_query_formats),
 };
 
diff --git a/libavfilter/buffersink.h b/libavfilter/buffersink.h
index 4e658a9795..027abc7483 100644
--- a/libavfilter/buffersink.h
+++ b/libavfilter/buffersink.h
@@ -165,6 +165,9 @@ int av_buffersink_get_frame(AVFilterContext *ctx, AVFrame *frame);
  */
 int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples);
 
+typedef AVFrame * av_buffersink_alloc_video_frame(AVFilterContext * ctx, void * v, int w, int h);
+int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v);
+
 /**
  * @}
  */
diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c
index b18d3f24dd..a13279cc7d 100644
--- a/libavfilter/buffersrc.c
+++ b/libavfilter/buffersrc.c
@@ -227,7 +227,7 @@ int attribute_align_arg av_buffersrc_add_frame_flags(AVFilterContext *ctx, AVFra
 
         switch (ctx->outputs[0]->type) {
         case AVMEDIA_TYPE_VIDEO:
-            CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height,
+            CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame),
                                      frame->format, frame->colorspace,
                                      frame->color_range, frame->alpha_mode, frame->pts);
             break;
diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
index 67efc3a8c3..a482e4cfc1 100644
--- a/libavfilter/vf_bwdif.c
+++ b/libavfilter/vf_bwdif.c
@@ -124,19 +124,28 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
     YADIFContext *yadif = &bwdif->yadif;
     ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff };
     int i;
+    int last_plane = -1;
 
     for (i = 0; i < yadif->csp->nb_components; i++) {
         int w = dstpic->width;
         int h = dstpic->height;
+        const AVComponentDescriptor * const comp = yadif->csp->comp + i;
+
+        // If the last plane was the same as this plane assume we've dealt
+        // with all the pels already
+        if (last_plane == comp->plane)
+            continue;
+        last_plane = comp->plane;
 
         if (i == 1 || i == 2) {
             w = AV_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w);
             h = AV_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h);
         }
 
-        td.w     = w;
-        td.h     = h;
-        td.plane = i;
+        // comp step is in bytes but td.w is in pels
+        td.w       = w * comp->step / ((comp->depth + 7) / 8);
+        td.h       = h;
+        td.plane   = comp->plane;
 
         ff_filter_execute(ctx, filter_slice, &td, NULL,
                           FFMIN((h+3)/4, ff_filter_get_nb_threads(ctx)));
@@ -160,6 +169,7 @@ static const enum AVPixelFormat pix_fmts[] = {
     AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
     AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
     AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+    AV_PIX_FMT_NV12,
     AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
     AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
     AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16,
diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
new file mode 100644
index 0000000000..38ead43f49
--- /dev/null
+++ b/libavfilter/vf_deinterlace_v4l2m2m.c
@@ -0,0 +1,2102 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * deinterlace video filter - V4L2 M2M
+ */
+
+#include <drm_fourcc.h>
+
+#include <linux/videodev2.h>
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdatomic.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "config.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_drm.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/time.h"
+
+#define FF_INTERNAL_FIELDS 1
+#include "framequeue.h"
+#include "filters.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "scale_eval.h"
+#include "video.h"
+
+#ifndef DRM_FORMAT_P030
+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
+#endif
+
+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
+// in drm_fourcc.h hopefully will be sometime in the future but until then...
+#ifndef V4L2_PIX_FMT_NV12_10_COL128
+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
+#endif
+
+#ifndef V4L2_PIX_FMT_NV12_COL128
+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
+#endif
+
+typedef struct V4L2Queue V4L2Queue;
+typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared;
+
+typedef enum filter_type_v4l2_e
+{
+    FILTER_V4L2_DEINTERLACE = 1,
+    FILTER_V4L2_SCALE,
+} filter_type_v4l2_t;
+
+typedef struct V4L2Buffer {
+    int enqueued;
+    int reenqueue;
+    struct v4l2_buffer buffer;
+    AVFrame frame;
+    struct v4l2_plane planes[VIDEO_MAX_PLANES];
+    int num_planes;
+    AVDRMFrameDescriptor drm_frame;
+    V4L2Queue *q;
+} V4L2Buffer;
+
+typedef struct V4L2Queue {
+    struct v4l2_format format;
+    struct v4l2_selection sel;
+    int eos;
+    int num_buffers;
+    V4L2Buffer *buffers;
+    const char * name;
+    DeintV4L2M2MContextShared *ctx;
+} V4L2Queue;
+
+typedef struct pts_stats_s
+{
+    void * logctx;
+    const char * name;  // For debug
+    unsigned int last_count;
+    unsigned int last_interval;
+    int64_t last_pts;
+} pts_stats_t;
+
+#define PTS_TRACK_SIZE 32
+typedef struct pts_track_el_s
+{
+    uint32_t n;
+    unsigned int interval;
+    AVFrame * props;
+} pts_track_el_t;
+
+typedef struct pts_track_s
+{
+    uint32_t n;
+    uint32_t last_n;
+    int got_2;
+    void * logctx;
+    pts_stats_t stats;
+    pts_track_el_t a[PTS_TRACK_SIZE];
+} pts_track_t;
+
+typedef enum drain_state_e
+{
+    DRAIN_NONE = 0,     // Not draining
+    DRAIN_TIMEOUT,      // Drain until normal timeout setup yields no frame
+    DRAIN_LAST,         // Drain with long timeout last_frame in received on output expected
+    DRAIN_EOS,          // Drain with long timeout EOS expected
+    DRAIN_DONE          // Drained
+} drain_state_t;
+
+typedef struct DeintV4L2M2MContextShared {
+    void * logctx;  // For logging - will be NULL when done
+    filter_type_v4l2_t filter_type;
+
+    int fd;
+    int done;   // fd closed - awating all refs dropped
+    int width;
+    int height;
+
+    int drain;          // EOS received (inlink status)
+    drain_state_t drain_state;
+    int64_t drain_pts;  // PTS associated with inline status
+
+    unsigned int frames_rx;
+    unsigned int frames_tx;
+
+    // from options
+    int output_width;
+    int output_height;
+    enum AVPixelFormat output_format;
+
+    int has_enc_stop;
+    // We expect to get exactly the same number of frames out as we put in
+    // We can drain by matching input to output
+    int one_to_one;
+
+    int orig_width;
+    int orig_height;
+    atomic_uint refcount;
+
+    AVBufferRef *hw_frames_ctx;
+
+    unsigned int field_order;
+
+    pts_track_t track;
+
+    V4L2Queue output;
+    V4L2Queue capture;
+} DeintV4L2M2MContextShared;
+
+typedef struct DeintV4L2M2MContext {
+    const AVClass *class;
+
+    DeintV4L2M2MContextShared *shared;
+
+    char * w_expr;
+    char * h_expr;
+    char * output_format_string;;
+
+    int force_original_aspect_ratio;
+    int force_divisible_by;
+
+    char *colour_primaries_string;
+    char *colour_transfer_string;
+    char *colour_matrix_string;
+    int   colour_range;
+    char *chroma_location_string;
+
+    enum AVColorPrimaries colour_primaries;
+    enum AVColorTransferCharacteristic colour_transfer;
+    enum AVColorSpace colour_matrix;
+    enum AVChromaLocation chroma_location;
+} DeintV4L2M2MContext;
+
+
+static inline void frame_set_progressive(AVFrame* frame)
+{
+    frame->flags &= ~(AV_FRAME_FLAG_TOP_FIELD_FIRST | AV_FRAME_FLAG_INTERLACED);
+}
+
+static inline int frame_is_interlaced(const AVFrame* const frame)
+{
+    return (frame->flags & AV_FRAME_FLAG_INTERLACED) != 0;
+}
+
+static inline int frame_is_tff(const AVFrame* const frame)
+{
+    return (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) != 0;
+}
+
+static inline int drain_frame_expected(const drain_state_t d)
+{
+    return d == DRAIN_EOS || d == DRAIN_LAST;
+}
+
+// These just list the ones we know we can cope with
+static uint32_t
+fmt_av_to_v4l2(const enum AVPixelFormat avfmt)
+{
+    switch (avfmt) {
+    case AV_PIX_FMT_YUV420P:
+        return V4L2_PIX_FMT_YUV420;
+    case AV_PIX_FMT_NV12:
+        return V4L2_PIX_FMT_NV12;
+#if CONFIG_SAND
+    case AV_PIX_FMT_RPI4_8:
+    case AV_PIX_FMT_SAND128:
+        return V4L2_PIX_FMT_NV12_COL128;
+#endif
+    default:
+        break;
+    }
+    return 0;
+}
+
+static enum AVPixelFormat
+fmt_v4l2_to_av(const uint32_t pixfmt)
+{
+    switch (pixfmt) {
+    case V4L2_PIX_FMT_YUV420:
+        return AV_PIX_FMT_YUV420P;
+    case V4L2_PIX_FMT_NV12:
+        return AV_PIX_FMT_NV12;
+#if CONFIG_SAND
+    case V4L2_PIX_FMT_NV12_COL128:
+        return AV_PIX_FMT_RPI4_8;
+#endif
+    default:
+        break;
+    }
+    return AV_PIX_FMT_NONE;
+}
+
+static unsigned int pts_stats_interval(const pts_stats_t * const stats)
+{
+    return stats->last_interval;
+}
+
+// Pick 64 for max last count - that is >1sec at 60fps
+#define STATS_LAST_COUNT_MAX 64
+#define STATS_INTERVAL_MAX (1 << 30)
+static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
+{
+    if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
+        if (stats->last_count < STATS_LAST_COUNT_MAX)
+            ++stats->last_count;
+        return;
+    }
+
+    if (stats->last_pts != AV_NOPTS_VALUE) {
+        const int64_t interval = pts - stats->last_pts;
+
+        if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
+            stats->last_count >= STATS_LAST_COUNT_MAX) {
+            if (stats->last_interval != 0)
+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
+                       __func__, stats->name, interval, stats->last_count);
+            stats->last_interval = 0;
+        }
+        else {
+            const int64_t frame_time = interval / (int64_t)stats->last_count;
+
+            if (frame_time != stats->last_interval)
+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
+                       __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
+            stats->last_interval = frame_time;
+        }
+    }
+
+    stats->last_pts = pts;
+    stats->last_count = 1;
+}
+
+static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
+{
+    *stats = (pts_stats_t){
+        .logctx = logctx,
+        .name = name,
+        .last_count = 1,
+        .last_interval = 0,
+        .last_pts = AV_NOPTS_VALUE
+    };
+}
+
+static inline uint32_t pts_track_next_n(pts_track_t * const trk)
+{
+    if (++trk->n == 0)
+        trk->n = 1;
+    return trk->n;
+}
+
+static int pts_track_get_frame(pts_track_t * const trk, const struct timeval tv, AVFrame * const dst)
+{
+    uint32_t n = (uint32_t)(tv.tv_usec / 2 + tv.tv_sec * 500000);
+    pts_track_el_t * t;
+
+    // As a first guess assume that n==0 means last frame
+    if (n == 0) {
+        n = trk->last_n;
+        if (n == 0)
+            goto fail;
+    }
+
+    t = trk->a + (n & (PTS_TRACK_SIZE - 1));
+
+    if (t->n != n) {
+        av_log(trk->logctx, AV_LOG_ERROR, "%s: track failure: got %u, expected %u\n", __func__, n, trk->n);
+        goto fail;
+    }
+
+    // 1st frame is simple - just believe it
+    if (n != trk->last_n) {
+        trk->last_n = n;
+        trk->got_2 = 0;
+        return av_frame_copy_props(dst, t->props);
+    }
+
+    // Only believe in a single interpolated frame
+    if (trk->got_2)
+        goto fail;
+    trk->got_2 = 1;
+
+    av_frame_copy_props(dst, t->props);
+
+
+    // If we can't guess - don't
+    if (t->interval == 0) {
+        dst->best_effort_timestamp = AV_NOPTS_VALUE;
+        dst->pts = AV_NOPTS_VALUE;
+        dst->pkt_dts = AV_NOPTS_VALUE;
+    }
+    else {
+        if (dst->best_effort_timestamp != AV_NOPTS_VALUE)
+            dst->best_effort_timestamp += t->interval / 2;
+        if (dst->pts != AV_NOPTS_VALUE)
+            dst->pts += t->interval / 2;
+        if (dst->pkt_dts != AV_NOPTS_VALUE)
+            dst->pkt_dts += t->interval / 2;
+    }
+
+    return 0;
+
+fail:
+    trk->last_n = 0;
+    trk->got_2 = 0;
+    dst->pts = AV_NOPTS_VALUE;
+    dst->pkt_dts = AV_NOPTS_VALUE;
+    return 0;
+}
+
+// We are only ever expecting in-order frames so nothing more clever is required
+static unsigned int
+pts_track_count(const pts_track_t * const trk)
+{
+    return (trk->n - trk->last_n) & (PTS_TRACK_SIZE - 1);
+}
+
+static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src)
+{
+    const uint32_t n = pts_track_next_n(trk);
+    pts_track_el_t * const t = trk->a + (n & (PTS_TRACK_SIZE - 1));
+
+    pts_stats_add(&trk->stats, src->pts);
+
+    t->n = n;
+    t->interval = pts_stats_interval(&trk->stats); // guess that next interval is the same as the last
+    av_frame_unref(t->props);
+    av_frame_copy_props(t->props, src);
+
+    // We now know what the previous interval was, rather than having to guess,
+    // so set it.  There is a better than decent chance that this is before
+    // we use it.
+    if (t->interval != 0) {
+        pts_track_el_t * const prev_t = trk->a + ((n - 1) & (PTS_TRACK_SIZE - 1));
+        prev_t->interval = t->interval;
+    }
+
+    // In case deinterlace interpolates frames use every other usec
+    return (struct timeval){.tv_sec = n / 500000, .tv_usec = (n % 500000) * 2};
+}
+
+static void pts_track_uninit(pts_track_t * const trk)
+{
+    unsigned int i;
+    for (i = 0; i != PTS_TRACK_SIZE; ++i) {
+        trk->a[i].n = 0;
+        av_frame_free(&trk->a[i].props);
+    }
+}
+
+static int pts_track_init(pts_track_t * const trk, void *logctx)
+{
+    unsigned int i;
+    trk->n = 1;
+    pts_stats_init(&trk->stats, logctx, "track");
+    for (i = 0; i != PTS_TRACK_SIZE; ++i) {
+        trk->a[i].n = 0;
+        if ((trk->a[i].props = av_frame_alloc()) == NULL) {
+            pts_track_uninit(trk);
+            return AVERROR(ENOMEM);
+        }
+    }
+    return 0;
+}
+
+static inline uint32_t
+fmt_bpl(const struct v4l2_format * const fmt, const unsigned int plane_n)
+{
+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.plane_fmt[plane_n].bytesperline : fmt->fmt.pix.bytesperline;
+}
+
+static inline uint32_t
+fmt_height(const struct v4l2_format * const fmt)
+{
+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
+}
+
+static inline uint32_t
+fmt_width(const struct v4l2_format * const fmt)
+{
+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
+}
+
+static inline uint32_t
+fmt_pixelformat(const struct v4l2_format * const fmt)
+{
+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
+}
+
+static inline uint32_t
+buf_bytesused0(const struct v4l2_buffer * const buf)
+{
+    return V4L2_TYPE_IS_MULTIPLANAR(buf->type) ? buf->m.planes[0].bytesused : buf->bytesused;
+}
+
+static void
+init_format(V4L2Queue * const q, const uint32_t format_type)
+{
+    memset(&q->format, 0, sizeof(q->format));
+    memset(&q->sel,    0, sizeof(q->sel));
+    q->format.type = format_type;
+    q->sel.type    = format_type;
+}
+
+static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx)
+{
+    struct v4l2_capability cap;
+    int ret;
+
+    memset(&cap, 0, sizeof(cap));
+    ret = ioctl(ctx->fd, VIDIOC_QUERYCAP, &cap);
+    if (ret < 0)
+        return ret;
+
+    if (ctx->filter_type == FILTER_V4L2_SCALE &&
+        strcmp("bcm2835-codec-isp", cap.card) != 0)
+    {
+        av_log(ctx->logctx, AV_LOG_DEBUG, "Not ISP\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
+        av_log(ctx->logctx, AV_LOG_DEBUG, "No streaming\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) {
+        init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+        init_format(&ctx->output,  V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
+    }
+    else if (cap.capabilities & V4L2_CAP_VIDEO_M2M) {
+        init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+        init_format(&ctx->output,  V4L2_BUF_TYPE_VIDEO_OUTPUT);
+    }
+    else {
+        av_log(ctx->logctx, AV_LOG_DEBUG, "Not M2M\n");
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+// Just use for probe - doesn't modify q format
+static int deint_v4l2m2m_try_format(V4L2Queue *queue, const uint32_t width, const uint32_t height, const enum AVPixelFormat avfmt)
+{
+    struct v4l2_format fmt         = {.type = queue->format.type};
+    DeintV4L2M2MContextShared *ctx = queue->ctx;
+    int ret, field;
+    // Pick YUV to test with if not otherwise specified
+    uint32_t pixelformat = avfmt == AV_PIX_FMT_NONE ? V4L2_PIX_FMT_YUV420 : fmt_av_to_v4l2(avfmt);
+    enum AVPixelFormat r_avfmt;
+
+
+    ret = ioctl(ctx->fd, VIDIOC_G_FMT, &fmt);
+    if (ret)
+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret);
+
+    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && V4L2_TYPE_IS_OUTPUT(fmt.type))
+        field = V4L2_FIELD_INTERLACED_TB;
+    else
+        field = V4L2_FIELD_NONE;
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
+        fmt.fmt.pix_mp.pixelformat = pixelformat;
+        fmt.fmt.pix_mp.field = field;
+        fmt.fmt.pix_mp.width = width;
+        fmt.fmt.pix_mp.height = height;
+    } else {
+        fmt.fmt.pix.pixelformat = pixelformat;
+        fmt.fmt.pix.field = field;
+        fmt.fmt.pix.width = width;
+        fmt.fmt.pix.height = height;
+    }
+
+    av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__,
+         fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
+         fmt.fmt.pix_mp.pixelformat,
+         fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
+
+    ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, &fmt);
+    if (ret)
+        return AVERROR(EINVAL);
+
+    av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__,
+         fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
+         fmt.fmt.pix_mp.pixelformat,
+         fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
+
+    r_avfmt = fmt_v4l2_to_av(fmt_pixelformat(&fmt));
+    if (r_avfmt != avfmt && avfmt != AV_PIX_FMT_NONE) {
+        av_log(ctx->logctx, AV_LOG_DEBUG, "Unable to set format %s on %s port\n", av_get_pix_fmt_name(avfmt), V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src");
+        return AVERROR(EINVAL);
+    }
+    if (r_avfmt == AV_PIX_FMT_NONE) {
+        av_log(ctx->logctx, AV_LOG_DEBUG, "No supported format on %s port\n", V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src");
+        return AVERROR(EINVAL);
+    }
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
+        if (fmt.fmt.pix_mp.field != field) {
+            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
+
+            return AVERROR(EINVAL);
+        }
+    } else {
+        if (fmt.fmt.pix.field != field) {
+            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
+
+            return AVERROR(EINVAL);
+        }
+    }
+
+    return 0;
+}
+
+static int
+do_s_fmt(V4L2Queue * const q)
+{
+    DeintV4L2M2MContextShared * const ctx = q->ctx;
+    const uint32_t pixelformat = fmt_pixelformat(&q->format);
+    int ret;
+
+    ret = ioctl(ctx->fd, VIDIOC_S_FMT, &q->format);
+    if (ret) {
+        ret = AVERROR(errno);
+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %s\n", av_err2str(ret));
+        return ret;
+    }
+
+    if (pixelformat != fmt_pixelformat(&q->format)) {
+        av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt_pixelformat(&q->format)));
+        return AVERROR(EINVAL);
+    }
+
+    q->sel.target = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE,
+    q->sel.flags  = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_FLAG_LE : V4L2_SEL_FLAG_GE;
+
+    ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &q->sel);
+    if (ret) {
+        ret = AVERROR(errno);
+        av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %s\n", av_err2str(ret));
+    }
+
+    return 0;
+}
+
+static void
+set_fmt_color(struct v4l2_format *const fmt,
+               const enum AVColorPrimaries avcp,
+               const enum AVColorSpace avcs,
+               const enum AVColorTransferCharacteristic avxc)
+{
+    enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
+    enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
+    enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
+
+    switch (avcp) {
+    case AVCOL_PRI_BT709:
+        cs = V4L2_COLORSPACE_REC709;
+        ycbcr = V4L2_YCBCR_ENC_709;
+        break;
+    case AVCOL_PRI_BT470M:
+        cs = V4L2_COLORSPACE_470_SYSTEM_M;
+        ycbcr = V4L2_YCBCR_ENC_601;
+        break;
+    case AVCOL_PRI_BT470BG:
+        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
+        break;
+    case AVCOL_PRI_SMPTE170M:
+        cs = V4L2_COLORSPACE_SMPTE170M;
+        break;
+    case AVCOL_PRI_SMPTE240M:
+        cs = V4L2_COLORSPACE_SMPTE240M;
+        break;
+    case AVCOL_PRI_BT2020:
+        cs = V4L2_COLORSPACE_BT2020;
+        break;
+    case AVCOL_PRI_SMPTE428:
+    case AVCOL_PRI_SMPTE431:
+    case AVCOL_PRI_SMPTE432:
+    case AVCOL_PRI_EBU3213:
+    case AVCOL_PRI_RESERVED:
+    case AVCOL_PRI_FILM:
+    case AVCOL_PRI_UNSPECIFIED:
+    default:
+        break;
+    }
+
+    switch (avcs) {
+    case AVCOL_SPC_RGB:
+        cs = V4L2_COLORSPACE_SRGB;
+        break;
+    case AVCOL_SPC_BT709:
+        cs = V4L2_COLORSPACE_REC709;
+        break;
+    case AVCOL_SPC_FCC:
+        cs = V4L2_COLORSPACE_470_SYSTEM_M;
+        break;
+    case AVCOL_SPC_BT470BG:
+        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
+        break;
+    case AVCOL_SPC_SMPTE170M:
+        cs = V4L2_COLORSPACE_SMPTE170M;
+        break;
+    case AVCOL_SPC_SMPTE240M:
+        cs = V4L2_COLORSPACE_SMPTE240M;
+        break;
+    case AVCOL_SPC_BT2020_CL:
+        cs = V4L2_COLORSPACE_BT2020;
+        ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
+        break;
+    case AVCOL_SPC_BT2020_NCL:
+        cs = V4L2_COLORSPACE_BT2020;
+        break;
+    default:
+        break;
+    }
+
+    switch (xfer) {
+    case AVCOL_TRC_BT709:
+        xfer = V4L2_XFER_FUNC_709;
+        break;
+    case AVCOL_TRC_IEC61966_2_1:
+        xfer = V4L2_XFER_FUNC_SRGB;
+        break;
+    case AVCOL_TRC_SMPTE240M:
+        xfer = V4L2_XFER_FUNC_SMPTE240M;
+        break;
+    case AVCOL_TRC_SMPTE2084:
+        xfer = V4L2_XFER_FUNC_SMPTE2084;
+        break;
+    default:
+        break;
+    }
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
+        fmt->fmt.pix_mp.colorspace = cs;
+        fmt->fmt.pix_mp.ycbcr_enc = ycbcr;
+        fmt->fmt.pix_mp.xfer_func = xfer;
+    } else {
+        fmt->fmt.pix.colorspace = cs;
+        fmt->fmt.pix.ycbcr_enc = ycbcr;
+        fmt->fmt.pix.xfer_func = xfer;
+    }
+}
+
+static void
+set_fmt_color_range(struct v4l2_format *const fmt, const enum AVColorRange avcr)
+{
+    const enum v4l2_quantization q =
+        avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
+        avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
+            V4L2_QUANTIZATION_DEFAULT;
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
+        fmt->fmt.pix_mp.quantization = q;
+    } else {
+        fmt->fmt.pix.quantization = q;
+    }
+}
+
+static enum AVColorPrimaries get_color_primaries(const struct v4l2_format *const fmt)
+{
+    enum v4l2_ycbcr_encoding ycbcr;
+    enum v4l2_colorspace cs;
+
+    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+        fmt->fmt.pix_mp.colorspace :
+        fmt->fmt.pix.colorspace;
+
+    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+        fmt->fmt.pix_mp.ycbcr_enc:
+        fmt->fmt.pix.ycbcr_enc;
+
+    switch(ycbcr) {
+    case V4L2_YCBCR_ENC_XV709:
+    case V4L2_YCBCR_ENC_709: return AVCOL_PRI_BT709;
+    case V4L2_YCBCR_ENC_XV601:
+    case V4L2_YCBCR_ENC_601:return AVCOL_PRI_BT470M;
+    default:
+        break;
+    }
+
+    switch(cs) {
+    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_PRI_BT470BG;
+    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_PRI_SMPTE170M;
+    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_PRI_SMPTE240M;
+    case V4L2_COLORSPACE_BT2020: return AVCOL_PRI_BT2020;
+    default:
+        break;
+    }
+
+    return AVCOL_PRI_UNSPECIFIED;
+}
+
+static enum AVColorSpace get_color_space(const struct v4l2_format *const fmt)
+{
+    enum v4l2_ycbcr_encoding ycbcr;
+    enum v4l2_colorspace cs;
+
+    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+        fmt->fmt.pix_mp.colorspace :
+        fmt->fmt.pix.colorspace;
+
+    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+        fmt->fmt.pix_mp.ycbcr_enc:
+        fmt->fmt.pix.ycbcr_enc;
+
+    switch(cs) {
+    case V4L2_COLORSPACE_SRGB: return AVCOL_SPC_RGB;
+    case V4L2_COLORSPACE_REC709: return AVCOL_SPC_BT709;
+    case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_SPC_FCC;
+    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_SPC_BT470BG;
+    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_SPC_SMPTE170M;
+    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_SPC_SMPTE240M;
+    case V4L2_COLORSPACE_BT2020:
+        if (ycbcr == V4L2_YCBCR_ENC_BT2020_CONST_LUM)
+            return AVCOL_SPC_BT2020_CL;
+        else
+             return AVCOL_SPC_BT2020_NCL;
+    default:
+        break;
+    }
+
+    return AVCOL_SPC_UNSPECIFIED;
+}
+
+static enum AVColorTransferCharacteristic get_color_trc(const struct v4l2_format *const fmt)
+{
+    enum v4l2_ycbcr_encoding ycbcr;
+    enum v4l2_xfer_func xfer;
+    enum v4l2_colorspace cs;
+
+    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+        fmt->fmt.pix_mp.colorspace :
+        fmt->fmt.pix.colorspace;
+
+    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+        fmt->fmt.pix_mp.ycbcr_enc:
+        fmt->fmt.pix.ycbcr_enc;
+
+    xfer = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+        fmt->fmt.pix_mp.xfer_func:
+        fmt->fmt.pix.xfer_func;
+
+    switch (xfer) {
+    case V4L2_XFER_FUNC_709: return AVCOL_TRC_BT709;
+    case V4L2_XFER_FUNC_SRGB: return AVCOL_TRC_IEC61966_2_1;
+    default:
+        break;
+    }
+
+    switch (cs) {
+    case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_TRC_GAMMA22;
+    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_TRC_GAMMA28;
+    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_TRC_SMPTE170M;
+    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_TRC_SMPTE240M;
+    default:
+        break;
+    }
+
+    switch (ycbcr) {
+    case V4L2_YCBCR_ENC_XV709:
+    case V4L2_YCBCR_ENC_XV601: return AVCOL_TRC_BT1361_ECG;
+    default:
+        break;
+    }
+
+    return AVCOL_TRC_UNSPECIFIED;
+}
+
+static enum AVColorRange get_color_range(const struct v4l2_format *const fmt)
+{
+    enum v4l2_quantization qt;
+
+    qt = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+        fmt->fmt.pix_mp.quantization :
+        fmt->fmt.pix.quantization;
+
+    switch (qt) {
+    case V4L2_QUANTIZATION_LIM_RANGE: return AVCOL_RANGE_MPEG;
+    case V4L2_QUANTIZATION_FULL_RANGE: return AVCOL_RANGE_JPEG;
+    default:
+        break;
+    }
+
+     return AVCOL_RANGE_UNSPECIFIED;
+}
+
+static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame)
+{
+    struct v4l2_format *const format = &q->format;
+    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
+
+    const uint32_t drm_fmt = src->layers[0].format;
+    // Treat INVALID as LINEAR
+    const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
+        DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
+    uint32_t pix_fmt = 0;
+    uint32_t w = 0;
+    uint32_t h = 0;
+    uint32_t bpl = src->layers[0].planes[0].pitch;
+
+    // We really don't expect multiple layers
+    // All formats that we currently cope with are single object
+
+    if (src->nb_layers != 1 || src->nb_objects != 1)
+        return AVERROR(EINVAL);
+
+    switch (drm_fmt) {
+        case DRM_FORMAT_YUV420:
+            if (mod == DRM_FORMAT_MOD_LINEAR) {
+                if (src->layers[0].nb_planes != 3)
+                    break;
+                pix_fmt = V4L2_PIX_FMT_YUV420;
+                h = src->layers[0].planes[1].offset / bpl;
+                w = bpl;
+            }
+            break;
+
+        case DRM_FORMAT_NV12:
+            if (mod == DRM_FORMAT_MOD_LINEAR) {
+                if (src->layers[0].nb_planes != 2)
+                    break;
+                pix_fmt = V4L2_PIX_FMT_NV12;
+                h = src->layers[0].planes[1].offset / bpl;
+                w = bpl;
+            }
+#if CONFIG_SAND
+            else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
+                if (src->layers[0].nb_planes != 2)
+                    break;
+                pix_fmt = V4L2_PIX_FMT_NV12_COL128;
+                w = bpl;
+                h = src->layers[0].planes[1].offset / 128;
+                bpl = fourcc_mod_broadcom_param(mod);
+            }
+#endif
+            break;
+
+        case DRM_FORMAT_P030:
+#if CONFIG_SAND
+            if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
+                if (src->layers[0].nb_planes != 2)
+                    break;
+                pix_fmt =  V4L2_PIX_FMT_NV12_10_COL128;
+                w = bpl / 2;  // Matching lie to how we construct this
+                h = src->layers[0].planes[1].offset / 128;
+                bpl = fourcc_mod_broadcom_param(mod);
+            }
+#endif
+            break;
+
+        default:
+            break;
+    }
+
+    if (!pix_fmt)
+        return AVERROR(EINVAL);
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
+        struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
+
+        pix->width = w;
+        pix->height = h;
+        pix->pixelformat = pix_fmt;
+        pix->plane_fmt[0].bytesperline = bpl;
+        pix->num_planes = 1;
+    }
+    else {
+        struct v4l2_pix_format *const pix = &format->fmt.pix;
+
+        pix->width = w;
+        pix->height = h;
+        pix->pixelformat = pix_fmt;
+        pix->bytesperline = bpl;
+    }
+
+    set_fmt_color(format, frame->color_primaries, frame->colorspace, frame->color_trc);
+    set_fmt_color_range(format, frame->color_range);
+
+    q->sel.r.width = frame->width - (frame->crop_left + frame->crop_right);
+    q->sel.r.height = frame->height - (frame->crop_top + frame->crop_bottom);
+    q->sel.r.left = frame->crop_left;
+    q->sel.r.top = frame->crop_top;
+
+    return 0;
+}
+
+
+static int set_dst_format(DeintV4L2M2MContext * const priv, V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height)
+{
+    struct v4l2_format * const fmt   = &queue->format;
+    struct v4l2_selection *const sel = &queue->sel;
+
+    memset(&fmt->fmt, 0, sizeof(fmt->fmt));
+
+    // Align w/h to 16 here in case there are alignment requirements at the next
+    // stage of the filter chain (also RPi deinterlace setup is bust and this
+    // fixes it)
+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
+        fmt->fmt.pix_mp.pixelformat = pixelformat;
+        fmt->fmt.pix_mp.field = field;
+        fmt->fmt.pix_mp.width = FFALIGN(width, 16);
+        fmt->fmt.pix_mp.height = FFALIGN(height, 16);
+    } else {
+        fmt->fmt.pix.pixelformat = pixelformat;
+        fmt->fmt.pix.field = field;
+        fmt->fmt.pix.width = FFALIGN(width, 16);
+        fmt->fmt.pix.height = FFALIGN(height, 16);
+    }
+
+    set_fmt_color(fmt, priv->colour_primaries, priv->colour_matrix, priv->colour_transfer);
+    set_fmt_color_range(fmt, priv->colour_range);
+
+    sel->r.width = width;
+    sel->r.height = height;
+    sel->r.left = 0;
+    sel->r.top = 0;
+
+    return do_s_fmt(queue);
+}
+
+static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node)
+{
+    int ret;
+
+    ctx->fd = open(node, O_RDWR | O_NONBLOCK, 0);
+    if (ctx->fd < 0)
+        return AVERROR(errno);
+
+    ret = deint_v4l2m2m_prepare_context(ctx);
+    if (ret) {
+        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to prepare context\n");
+        goto fail;
+    }
+
+    ret = deint_v4l2m2m_try_format(&ctx->capture, ctx->output_width, ctx->output_height, ctx->output_format);
+    if (ret) {
+        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try dst format\n");
+        goto fail;
+    }
+
+    ret = deint_v4l2m2m_try_format(&ctx->output, ctx->width, ctx->height, AV_PIX_FMT_NONE);
+    if (ret) {
+        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try src format\n");
+        goto fail;
+    }
+
+    return 0;
+
+fail:
+    close(ctx->fd);
+    ctx->fd = -1;
+
+    return ret;
+}
+
+static int deint_v4l2m2m_find_device(DeintV4L2M2MContextShared *ctx)
+{
+    int ret = AVERROR(EINVAL);
+    struct dirent *entry;
+    char node[PATH_MAX];
+    DIR *dirp;
+
+    dirp = opendir("/dev");
+    if (!dirp)
+        return AVERROR(errno);
+
+    for (entry = readdir(dirp); entry; entry = readdir(dirp)) {
+
+        if (strncmp(entry->d_name, "video", 5))
+            continue;
+
+        snprintf(node, sizeof(node), "/dev/%s", entry->d_name);
+        av_log(ctx->logctx, AV_LOG_DEBUG, "probing device %s\n", node);
+        ret = deint_v4l2m2m_probe_device(ctx, node);
+        if (!ret)
+            break;
+    }
+
+    closedir(dirp);
+
+    if (ret) {
+        av_log(ctx->logctx, AV_LOG_ERROR, "Could not find a valid device\n");
+        ctx->fd = -1;
+
+        return ret;
+    }
+
+    av_log(ctx->logctx, AV_LOG_INFO, "Using device %s\n", node);
+
+    return 0;
+}
+
+static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf)
+{
+    int ret;
+
+    ret = ioctl(buf->q->ctx->fd, VIDIOC_QBUF, &buf->buffer);
+    if (ret < 0)
+        return AVERROR(errno);
+
+    buf->enqueued = 1;
+
+    return 0;
+}
+
+static void
+drm_frame_init(AVDRMFrameDescriptor * const d)
+{
+    unsigned int i;
+    for (i = 0; i != AV_DRM_MAX_PLANES; ++i) {
+        d->objects[i].fd = -1;
+    }
+}
+
+static void
+drm_frame_uninit(AVDRMFrameDescriptor * const d)
+{
+    unsigned int i;
+    for (i = 0; i != d->nb_objects; ++i) {
+        if (d->objects[i].fd != -1) {
+            close(d->objects[i].fd);
+            d->objects[i].fd = -1;
+        }
+    }
+}
+
+static void
+avbufs_delete(V4L2Buffer** ppavbufs, const unsigned int n)
+{
+    unsigned int i;
+    V4L2Buffer* const avbufs = *ppavbufs;
+
+    if (avbufs == NULL)
+        return;
+    *ppavbufs = NULL;
+
+    for (i = 0; i != n; ++i) {
+        V4L2Buffer* const avbuf = avbufs + i;
+        drm_frame_uninit(&avbuf->drm_frame);
+    }
+
+    av_free(avbufs);
+}
+
+static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf)
+{
+    struct v4l2_exportbuffer expbuf;
+    int i, ret;
+    uint64_t mod = DRM_FORMAT_MOD_LINEAR;
+
+    AVDRMFrameDescriptor * const drm_desc = &avbuf->drm_frame;
+    AVDRMLayerDescriptor * const layer = &drm_desc->layers[0];
+    const struct v4l2_format *const fmt = &q->format;
+    const uint32_t height = fmt_height(fmt);
+    ptrdiff_t bpl0;
+
+    /* fill the DRM frame descriptor */
+    drm_desc->nb_layers = 1;
+    layer->nb_planes = avbuf->num_planes;
+
+    for (int i = 0; i < avbuf->num_planes; i++) {
+        layer->planes[i].object_index = i;
+        layer->planes[i].offset = 0;
+        layer->planes[i].pitch = fmt_bpl(fmt, i);
+    }
+    bpl0 = layer->planes[0].pitch;
+
+    switch (fmt_pixelformat(fmt)) {
+#if CONFIG_SAND
+        case V4L2_PIX_FMT_NV12_COL128:
+            mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0);
+            layer->format = V4L2_PIX_FMT_NV12;
+
+            if (avbuf->num_planes > 1)
+                break;
+
+            layer->nb_planes = 2;
+            layer->planes[1].object_index = 0;
+            layer->planes[1].offset = height * 128;
+            layer->planes[0].pitch = fmt_width(fmt);
+            layer->planes[1].pitch = layer->planes[0].pitch;
+            break;
+#endif
+
+        case DRM_FORMAT_NV12:
+            layer->format = V4L2_PIX_FMT_NV12;
+
+            if (avbuf->num_planes > 1)
+                break;
+
+            layer->nb_planes = 2;
+            layer->planes[1].object_index = 0;
+            layer->planes[1].offset = bpl0 * height;
+            layer->planes[1].pitch = bpl0;
+            break;
+
+        case V4L2_PIX_FMT_YUV420:
+            layer->format = DRM_FORMAT_YUV420;
+
+            if (avbuf->num_planes > 1)
+                break;
+
+            layer->nb_planes = 3;
+            layer->planes[1].object_index = 0;
+            layer->planes[1].offset = bpl0 * height;
+            layer->planes[1].pitch = bpl0 / 2;
+            layer->planes[2].object_index = 0;
+            layer->planes[2].offset = layer->planes[1].offset + ((bpl0 * height) / 4);
+            layer->planes[2].pitch = bpl0 / 2;
+            break;
+
+        default:
+            drm_desc->nb_layers = 0;
+            return AVERROR(EINVAL);
+    }
+
+    drm_desc->nb_objects = 0;
+    for (i = 0; i < avbuf->num_planes; i++) {
+        memset(&expbuf, 0, sizeof(expbuf));
+
+        expbuf.index = avbuf->buffer.index;
+        expbuf.type = avbuf->buffer.type;
+        expbuf.plane = i;
+
+        ret = ioctl(avbuf->q->ctx->fd, VIDIOC_EXPBUF, &expbuf);
+        if (ret < 0)
+            return AVERROR(errno);
+
+        drm_desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type) ?
+            avbuf->buffer.m.planes[i].length : avbuf->buffer.length;
+        drm_desc->objects[i].fd = expbuf.fd;
+        drm_desc->objects[i].format_modifier = mod;
+        drm_desc->nb_objects = i + 1;
+    }
+
+    return 0;
+}
+
+static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
+{
+    struct v4l2_format *fmt = &queue->format;
+    DeintV4L2M2MContextShared *ctx = queue->ctx;
+    struct v4l2_requestbuffers req;
+    int ret, i, multiplanar;
+    uint32_t memory;
+
+    memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ?
+        V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
+
+    multiplanar = V4L2_TYPE_IS_MULTIPLANAR(fmt->type);
+
+    memset(&req, 0, sizeof(req));
+    req.count = queue->num_buffers;
+    req.memory = memory;
+    req.type = fmt->type;
+
+    ret = ioctl(ctx->fd, VIDIOC_REQBUFS, &req);
+    if (ret < 0) {
+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_REQBUFS failed: %s\n", strerror(errno));
+
+        return AVERROR(errno);
+    }
+
+    queue->num_buffers = req.count;
+    queue->buffers = av_mallocz(queue->num_buffers * sizeof(V4L2Buffer));
+    if (!queue->buffers) {
+        av_log(ctx->logctx, AV_LOG_ERROR, "malloc enomem\n");
+
+        return AVERROR(ENOMEM);
+    }
+
+    for (i = 0; i < queue->num_buffers; i++) {
+        V4L2Buffer * const buf = &queue->buffers[i];
+
+        buf->enqueued = 0;
+        buf->q = queue;
+
+        buf->buffer.type = fmt->type;
+        buf->buffer.memory = memory;
+        buf->buffer.index = i;
+
+        if (multiplanar) {
+            buf->buffer.length = VIDEO_MAX_PLANES;
+            buf->buffer.m.planes = buf->planes;
+        }
+
+        drm_frame_init(&buf->drm_frame);
+    }
+
+    for (i = 0; i < queue->num_buffers; i++) {
+        V4L2Buffer * const buf = &queue->buffers[i];
+
+        ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer);
+        if (ret < 0) {
+            ret = AVERROR(errno);
+
+            goto fail;
+        }
+
+        buf->num_planes = multiplanar ? buf->buffer.length : 1;
+
+        if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) {
+            ret = deint_v4l2m2m_enqueue_buffer(buf);
+            if (ret)
+                goto fail;
+
+            ret = v4l2_buffer_export_drm(queue, buf);
+            if (ret)
+                goto fail;
+        }
+    }
+
+    return 0;
+
+fail:
+    avbufs_delete(&queue->buffers, queue->num_buffers);
+    queue->num_buffers = 0;
+    return ret;
+}
+
+static int deint_v4l2m2m_streamon(V4L2Queue *queue)
+{
+    DeintV4L2M2MContextShared * const ctx = queue->ctx;
+    int type = queue->format.type;
+    int ret;
+
+    ret = ioctl(ctx->fd, VIDIOC_STREAMON, &type);
+    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
+    if (ret < 0)
+        return AVERROR(errno);
+
+    return 0;
+}
+
+static int deint_v4l2m2m_streamoff(V4L2Queue *queue)
+{
+    DeintV4L2M2MContextShared * const ctx = queue->ctx;
+    int type = queue->format.type;
+    int ret;
+
+    ret = ioctl(ctx->fd, VIDIOC_STREAMOFF, &type);
+    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
+    if (ret < 0)
+        return AVERROR(errno);
+
+    return 0;
+}
+
+// timeout in ms
+static V4L2Buffer* deint_v4l2m2m_dequeue_buffer(V4L2Queue *queue, int timeout)
+{
+    struct v4l2_plane planes[VIDEO_MAX_PLANES];
+    DeintV4L2M2MContextShared *ctx = queue->ctx;
+    struct v4l2_buffer buf = { 0 };
+    V4L2Buffer* avbuf = NULL;
+    struct pollfd pfd;
+    short events;
+    int ret;
+
+    if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
+        events =  POLLOUT | POLLWRNORM;
+    else
+        events = POLLIN | POLLRDNORM;
+
+    pfd.events = events;
+    pfd.fd = ctx->fd;
+
+    for (;;) {
+        ret = poll(&pfd, 1, timeout);
+        if (ret > 0)
+            break;
+        if (errno == EINTR)
+            continue;
+        return NULL;
+    }
+
+    if (pfd.revents & POLLERR)
+        return NULL;
+
+    if (pfd.revents & events) {
+        memset(&buf, 0, sizeof(buf));
+        buf.memory = V4L2_MEMORY_MMAP;
+        buf.type = queue->format.type;
+        if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
+            memset(planes, 0, sizeof(planes));
+            buf.length = VIDEO_MAX_PLANES;
+            buf.m.planes = planes;
+        }
+
+        ret = ioctl(ctx->fd, VIDIOC_DQBUF, &buf);
+        if (ret) {
+            if (errno != EAGAIN)
+                av_log(ctx->logctx, AV_LOG_DEBUG, "VIDIOC_DQBUF, errno (%s)\n",
+                       av_err2str(AVERROR(errno)));
+            return NULL;
+        }
+
+        avbuf = &queue->buffers[buf.index];
+        avbuf->enqueued = 0;
+        avbuf->buffer = buf;
+        if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
+            memcpy(avbuf->planes, planes, sizeof(planes));
+            avbuf->buffer.m.planes = avbuf->planes;
+        }
+        return avbuf;
+    }
+
+    return NULL;
+}
+
+static V4L2Buffer *deint_v4l2m2m_find_free_buf(V4L2Queue *queue)
+{
+    int i;
+    V4L2Buffer *buf = NULL;
+
+    for (i = 0; i < queue->num_buffers; i++)
+        if (!queue->buffers[i].enqueued) {
+            buf = &queue->buffers[i];
+            break;
+        }
+    return buf;
+}
+
+static void deint_v4l2m2m_unref_queued(V4L2Queue *queue)
+{
+    int i;
+    V4L2Buffer *buf = NULL;
+
+    if (!queue || !queue->buffers)
+        return;
+    for (i = 0; i < queue->num_buffers; i++) {
+        buf = &queue->buffers[i];
+        if (queue->buffers[i].enqueued)
+            av_frame_unref(&buf->frame);
+    }
+}
+
+static void recycle_q(V4L2Queue * const queue)
+{
+    V4L2Buffer* avbuf;
+    while (avbuf = deint_v4l2m2m_dequeue_buffer(queue, 0), avbuf) {
+        av_frame_unref(&avbuf->frame);
+    }
+}
+
+static int count_enqueued(V4L2Queue *queue)
+{
+    int i;
+    int n = 0;
+
+    if (queue->buffers == NULL)
+        return 0;
+
+    for (i = 0; i < queue->num_buffers; i++)
+        if (queue->buffers[i].enqueued)
+            ++n;
+    return n;
+}
+
+static int deint_v4l2m2m_enqueue_frame(V4L2Queue * const queue, AVFrame * const frame)
+{
+    DeintV4L2M2MContextShared *const ctx = queue->ctx;
+    AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)frame->data[0];
+    V4L2Buffer *buf;
+    int i;
+
+    if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
+        recycle_q(queue);
+
+    buf = deint_v4l2m2m_find_free_buf(queue);
+    if (!buf) {
+        av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d finding free buf\n", __func__, 0);
+        return AVERROR(EAGAIN);
+    }
+    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type))
+        for (i = 0; i < drm_desc->nb_objects; i++)
+            buf->buffer.m.planes[i].m.fd = drm_desc->objects[i].fd;
+    else
+        buf->buffer.m.fd = drm_desc->objects[0].fd;
+
+    buf->buffer.field = !frame_is_interlaced(frame) ? V4L2_FIELD_NONE :
+        frame_is_tff(frame) ? V4L2_FIELD_INTERLACED_TB :
+            V4L2_FIELD_INTERLACED_BT;
+
+    if (ctx->field_order != buf->buffer.field) {
+        av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Field changed: %d->%d\n", __func__, ctx->field_order, buf->buffer.field);
+        ctx->field_order = buf->buffer.field;
+    }
+
+    buf->buffer.timestamp = pts_track_add_frame(&ctx->track, frame);
+
+    buf->drm_frame.objects[0].fd = drm_desc->objects[0].fd;
+
+    av_frame_move_ref(&buf->frame, frame);
+
+    return deint_v4l2m2m_enqueue_buffer(buf);
+}
+
+static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx)
+{
+    if (atomic_fetch_sub(&ctx->refcount, 1) == 1) {
+        V4L2Queue *capture = &ctx->capture;
+        V4L2Queue *output  = &ctx->output;
+
+        av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__);
+
+        if (ctx->fd >= 0) {
+            deint_v4l2m2m_streamoff(capture);
+            deint_v4l2m2m_streamoff(output);
+        }
+
+        avbufs_delete(&capture->buffers, capture->num_buffers);
+
+        deint_v4l2m2m_unref_queued(output);
+
+        av_buffer_unref(&ctx->hw_frames_ctx);
+
+        if (capture->buffers)
+            av_free(capture->buffers);
+
+        if (output->buffers)
+            av_free(output->buffers);
+
+        if (ctx->fd >= 0) {
+            close(ctx->fd);
+            ctx->fd = -1;
+        }
+
+        av_free(ctx);
+    }
+}
+
+static void v4l2_free_buffer(void *opaque, uint8_t *unused)
+{
+    V4L2Buffer *buf                = opaque;
+    DeintV4L2M2MContextShared *ctx = buf->q->ctx;
+
+    if (!ctx->done)
+        deint_v4l2m2m_enqueue_buffer(buf);
+
+    deint_v4l2m2m_destroy_context(ctx);
+}
+
+// timeout in ms
+static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout)
+{
+    DeintV4L2M2MContextShared *ctx = queue->ctx;
+    V4L2Buffer* avbuf;
+    enum AVColorPrimaries color_primaries;
+    enum AVColorSpace colorspace;
+    enum AVColorTransferCharacteristic color_trc;
+    enum AVColorRange color_range;
+
+    av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__);
+
+    if (queue->eos) {
+        av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: EOS\n", __func__);
+        return AVERROR_EOF;
+    }
+
+    avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout);
+    if (!avbuf) {
+        av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout);
+        return AVERROR(EAGAIN);
+    }
+
+    if (V4L2_TYPE_IS_CAPTURE(avbuf->buffer.type)) {
+        if ((avbuf->buffer.flags & V4L2_BUF_FLAG_LAST) != 0)
+            queue->eos = 1;
+        if (buf_bytesused0(&avbuf->buffer) == 0)
+            return queue->eos ? AVERROR_EOF : AVERROR(EINVAL);
+    }
+
+    // Fill in PTS and anciliary info from src frame
+    pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame);
+
+    frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame,
+                            sizeof(avbuf->drm_frame), v4l2_free_buffer,
+                            avbuf, AV_BUFFER_FLAG_READONLY);
+    if (!frame->buf[0]) {
+        av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d creating buffer\n", __func__, 0);
+        return AVERROR(ENOMEM);
+    }
+
+    atomic_fetch_add(&ctx->refcount, 1);
+
+    frame->data[0] = (uint8_t *)&avbuf->drm_frame;
+    frame->format = AV_PIX_FMT_DRM_PRIME;
+    if (ctx->hw_frames_ctx)
+        frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
+    frame->height = ctx->output_height;
+    frame->width = ctx->output_width;
+
+    color_primaries = get_color_primaries(&ctx->capture.format);
+    colorspace      = get_color_space(&ctx->capture.format);
+    color_trc       = get_color_trc(&ctx->capture.format);
+    color_range     = get_color_range(&ctx->capture.format);
+
+    // If the color parameters are unspecified by V4L2 then leave alone as they
+    // will have been copied from src
+    if (color_primaries != AVCOL_PRI_UNSPECIFIED)
+        frame->color_primaries = color_primaries;
+    if (colorspace != AVCOL_SPC_UNSPECIFIED)
+        frame->colorspace = colorspace;
+    if (color_trc != AVCOL_TRC_UNSPECIFIED)
+        frame->color_trc = color_trc;
+    if (color_range != AVCOL_RANGE_UNSPECIFIED)
+        frame->color_range = color_range;
+
+    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE) {
+        // Not interlaced now
+        frame_set_progressive(frame);
+        // Duration halved
+        frame->duration /= 2;
+    }
+
+    if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) {
+        av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n");
+        frame->decode_error_flags |= FF_DECODE_ERROR_INVALID_BITSTREAM;
+    }
+
+    av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: PTS=%"PRId64"\n", __func__, frame->pts);
+    return 0;
+}
+
+static int deint_v4l2m2m_config_props(AVFilterLink *outlink)
+{
+    AVFilterLink *inlink           = outlink->src->inputs[0];
+    AVFilterContext *avctx         = outlink->src;
+    DeintV4L2M2MContext *priv      = avctx->priv;
+    DeintV4L2M2MContextShared *ctx = priv->shared;
+    int ret;
+
+    ctx->height = avctx->inputs[0]->h;
+    ctx->width = avctx->inputs[0]->w;
+
+    if (ctx->filter_type == FILTER_V4L2_SCALE) {
+        if ((ret = ff_scale_eval_dimensions(priv,
+                                            priv->w_expr, priv->h_expr,
+                                            inlink, outlink,
+                                            &ctx->output_width, &ctx->output_height)) < 0)
+            return ret;
+
+        ff_scale_adjust_dimensions(inlink, &ctx->output_width, &ctx->output_height,
+                                   priv->force_original_aspect_ratio, priv->force_divisible_by, 1.0);
+    }
+    else {
+        ctx->output_width  = ctx->width;
+        ctx->output_height = ctx->height;
+    }
+
+    av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d\n", __func__,
+           ctx->width, ctx->height, ctx->output_width, ctx->output_height);
+
+    outlink->time_base           = inlink->time_base;
+    outlink->w                   = ctx->output_width;
+    outlink->h                   = ctx->output_height;
+    outlink->format              = inlink->format;
+
+    if (inlink->sample_aspect_ratio.num)
+        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
+    else
+        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+    return deint_v4l2m2m_find_device(ctx);
+}
+
+static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc)
+{
+    const uint64_t mod = drm_desc->objects[0].format_modifier;
+    const int is_linear = (mod == DRM_FORMAT_MOD_LINEAR || mod == DRM_FORMAT_MOD_INVALID);
+
+    // Only currently support single object things
+    if (drm_desc->nb_objects != 1)
+        return 0;
+
+    switch (drm_desc->layers[0].format) {
+    case DRM_FORMAT_YUV420:
+        return is_linear ? V4L2_PIX_FMT_YUV420 : 0;
+    case DRM_FORMAT_NV12:
+        return is_linear ? V4L2_PIX_FMT_NV12 :
+#if CONFIG_SAND
+            fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 :
+#endif
+            0;
+    default:
+        break;
+    }
+    return 0;
+}
+
+static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    AVFilterContext *avctx         = link->dst;
+    DeintV4L2M2MContext *priv      = avctx->priv;
+    DeintV4L2M2MContextShared *ctx = priv->shared;
+    V4L2Queue *capture             = &ctx->capture;
+    V4L2Queue *output              = &ctx->output;
+    int ret;
+
+    av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" dts: %"PRId64" field :%d interlaced: %d aspect:%d/%d\n",
+           __func__, in->pts, in->pkt_dts, frame_is_tff(in), frame_is_interlaced(in), in->sample_aspect_ratio.num, in->sample_aspect_ratio.den);
+
+    if (ctx->field_order == V4L2_FIELD_ANY) {
+        const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0];
+        uint32_t pixelformat = desc_pixelformat(drm_desc);
+
+        if (pixelformat == 0) {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n",
+                   av_fourcc2str(drm_desc->layers[0].format),
+                   drm_desc->nb_objects, drm_desc->objects[0].format_modifier);
+            return AVERROR(EINVAL);
+        }
+
+        ctx->orig_width = drm_desc->layers[0].planes[0].pitch;
+        ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width;
+
+        av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height,
+           drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset);
+
+        if ((ret = set_src_fmt(output, in)) != 0) {
+            av_log(avctx, AV_LOG_WARNING, "Unknown input DRM format: %s mod: %#" PRIx64 "\n",
+                   av_fourcc2str(drm_desc->layers[0].format), drm_desc->objects[0].format_modifier);
+            return ret;
+        }
+
+        ret = do_s_fmt(output);
+        if (ret) {
+            av_log(avctx, AV_LOG_WARNING, "Failed to set source format\n");
+            return ret;
+        }
+
+        if (ctx->output_format != AV_PIX_FMT_NONE)
+           pixelformat = fmt_av_to_v4l2(ctx->output_format);
+        ret = set_dst_format(priv, capture, pixelformat, V4L2_FIELD_NONE, ctx->output_width, ctx->output_height);
+        if (ret) {
+            av_log(avctx, AV_LOG_WARNING, "Failed to set destination format\n");
+            return ret;
+        }
+
+        ret = deint_v4l2m2m_allocate_buffers(capture);
+        if (ret) {
+            av_log(avctx, AV_LOG_WARNING, "Failed to allocate destination buffers\n");
+            return ret;
+        }
+
+        ret = deint_v4l2m2m_streamon(capture);
+        if (ret) {
+            av_log(avctx, AV_LOG_WARNING, "Failed set destination streamon: %s\n", av_err2str(ret));
+            return ret;
+        }
+
+        ret = deint_v4l2m2m_allocate_buffers(output);
+        if (ret) {
+            av_log(avctx, AV_LOG_WARNING, "Failed to allocate src buffers\n");
+            return ret;
+        }
+
+        ret = deint_v4l2m2m_streamon(output);
+        if (ret) {
+            av_log(avctx, AV_LOG_WARNING, "Failed set src streamon: %s\n", av_err2str(ret));
+            return ret;
+        }
+
+        if (frame_is_tff(in))
+            ctx->field_order = V4L2_FIELD_INTERLACED_TB;
+        else
+            ctx->field_order = V4L2_FIELD_INTERLACED_BT;
+
+        {
+            struct v4l2_encoder_cmd ecmd = {
+                .cmd = V4L2_ENC_CMD_STOP
+            };
+            ctx->has_enc_stop = 0;
+            if (ioctl(ctx->fd, VIDIOC_TRY_ENCODER_CMD, &ecmd) == 0) {
+                av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop succeeded\n");
+                ctx->has_enc_stop = 1;
+            }
+            else {
+                av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop fail: %s\n", av_err2str(AVERROR(errno)));
+            }
+
+        }
+    }
+
+    ret = deint_v4l2m2m_enqueue_frame(output, in);
+
+    av_log(priv, AV_LOG_TRACE, ">>> %s: %s\n", __func__, av_err2str(ret));
+    return ret;
+}
+
+static int
+ack_inlink(AVFilterContext * const avctx, DeintV4L2M2MContextShared *const s,
+           AVFilterLink * const inlink)
+{
+    int instatus;
+    int64_t inpts;
+
+    if (ff_inlink_acknowledge_status(inlink, &instatus, &inpts) <= 0)
+        return 0;
+
+    s->drain      = instatus;
+    s->drain_pts  = inpts;
+    s->drain_state = DRAIN_TIMEOUT;
+
+    if (s->field_order == V4L2_FIELD_ANY) {  // Not yet started
+        s->drain_state = DRAIN_DONE;
+    }
+    else if (s->one_to_one) {
+        s->drain_state = DRAIN_LAST;
+    }
+    else if (s->has_enc_stop) {
+        struct v4l2_encoder_cmd ecmd = {
+            .cmd = V4L2_ENC_CMD_STOP
+        };
+        if (ioctl(s->fd, VIDIOC_ENCODER_CMD, &ecmd) == 0) {
+            av_log(avctx->priv, AV_LOG_DEBUG, "Do Encode stop\n");
+            s->drain_state = DRAIN_EOS;
+        }
+        else {
+            av_log(avctx->priv, AV_LOG_WARNING, "Encode stop fail: %s\n", av_err2str(AVERROR(errno)));
+        }
+    }
+    return 1;
+}
+
+static int deint_v4l2m2m_activate(AVFilterContext *avctx)
+{
+    DeintV4L2M2MContext * const priv = avctx->priv;
+    DeintV4L2M2MContextShared *const s = priv->shared;
+    AVFilterLink * const outlink = avctx->outputs[0];
+    AVFilterLink * const inlink = avctx->inputs[0];
+    int n = 0;
+    int cn = 99;
+    int did_something = 0;
+
+    av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__);
+
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx);
+
+    ack_inlink(avctx, s, inlink);
+
+    if (s->field_order != V4L2_FIELD_ANY)  // Can't DQ if no setup!
+    {
+        AVFrame * frame = av_frame_alloc();
+        int rv;
+
+        recycle_q(&s->output);
+        n = count_enqueued(&s->output);
+
+        if (frame == NULL) {
+            av_log(priv, AV_LOG_ERROR, "%s: error allocating frame\n", __func__);
+            return AVERROR(ENOMEM);
+        }
+
+        rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame,
+                                         drain_frame_expected(s->drain_state) || n > 4 ? 300 : 0);
+        if (rv != 0) {
+            av_frame_free(&frame);
+            if (rv == AVERROR_EOF) {
+                av_log(priv, AV_LOG_DEBUG, "%s: --- DQ EOF\n", __func__);
+                s->drain_state = DRAIN_DONE;
+            }
+            else if (rv == AVERROR(EAGAIN)) {
+                if (s->drain_state != DRAIN_NONE) {
+                    av_log(priv, AV_LOG_DEBUG, "%s: --- DQ empty - drain done\n", __func__);
+                    s->drain_state = DRAIN_DONE;
+                }
+            }
+            else {
+                av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv));
+                return rv;
+            }
+        }
+        else {
+            frame_set_progressive(frame);
+            // frame is always consumed by filter_frame - even on error despite
+            // a somewhat confusing comment in the header
+            rv = ff_filter_frame(outlink, frame);
+            ++s->frames_tx;
+
+            av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv));
+            did_something = 1;
+
+            if (s->drain_state != DRAIN_NONE && pts_track_count(&s->track) == 0) {
+                av_log(priv, AV_LOG_DEBUG, "%s: --- DQ last - drain done\n", __func__);
+                s->drain_state = DRAIN_DONE;
+            }
+        }
+
+        cn = count_enqueued(&s->capture);
+    }
+
+    if (s->drain_state == DRAIN_DONE) {
+        ff_outlink_set_status(outlink, s->drain, s->drain_pts);
+        av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(s->drain));
+        return 0;
+    }
+
+    recycle_q(&s->output);
+    n = count_enqueued(&s->output);
+
+    while (n < 6 && !s->drain) {
+        AVFrame * frame;
+        int rv;
+
+        if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) {
+            av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv));
+            return rv;
+        }
+
+        if (frame == NULL) {
+            av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__);
+            if (!ack_inlink(avctx, s, inlink)) {
+                ff_inlink_request_frame(inlink);
+                av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__);
+            }
+            break;
+        }
+        ++s->frames_rx;
+
+        rv = deint_v4l2m2m_filter_frame(inlink, frame);
+        av_frame_free(&frame);
+
+        if (rv != 0)
+            return rv;
+
+        av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__);
+        did_something = 1;
+        ++n;
+    }
+
+    if ((n > 4 || s->drain) && ff_outlink_frame_wanted(outlink)) {
+        ff_filter_set_ready(avctx, 1);
+        did_something = 1;
+        av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__);
+    }
+
+    av_log(priv, AV_LOG_TRACE, ">>> %s: OK (n=%d, cn=%d)\n", __func__, n, cn);
+    return did_something ? 0 : FFERROR_NOT_READY;
+}
+
+static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filter_type_v4l2_t filter_type)
+{
+    DeintV4L2M2MContext * const priv = avctx->priv;
+    DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared));
+
+    if (!ctx) {
+        av_log(priv, AV_LOG_ERROR, "%s: error %d allocating context\n", __func__, 0);
+        return AVERROR(ENOMEM);
+    }
+    priv->shared = ctx;
+    ctx->logctx = priv;
+    ctx->filter_type = filter_type;
+    ctx->fd = -1;
+    ctx->output.ctx = ctx;
+    ctx->output.num_buffers = 8;
+    ctx->output.name = "OUTPUT";
+    ctx->capture.ctx = ctx;
+    ctx->capture.num_buffers = 12;
+    ctx->capture.name = "CAPTURE";
+    ctx->done = 0;
+    ctx->field_order = V4L2_FIELD_ANY;
+
+    pts_track_init(&ctx->track, priv);
+
+    atomic_init(&ctx->refcount, 1);
+
+    if (priv->output_format_string) {
+        ctx->output_format = av_get_pix_fmt(priv->output_format_string);
+        if (ctx->output_format == AV_PIX_FMT_NONE) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid ffmpeg output format '%s'.\n", priv->output_format_string);
+            return AVERROR(EINVAL);
+        }
+        if (fmt_av_to_v4l2(ctx->output_format) == 0) {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported output format for V4L2: %s.\n", av_get_pix_fmt_name(ctx->output_format));
+            return AVERROR(EINVAL);
+        }
+    } else {
+        // Use the input format once that is configured.
+        ctx->output_format = AV_PIX_FMT_NONE;
+    }
+
+#define STRING_OPTION(var_name, func_name, default_value) do { \
+        if (priv->var_name ## _string) { \
+            int var = av_ ## func_name ## _from_name(priv->var_name ## _string); \
+            if (var < 0) { \
+                av_log(avctx, AV_LOG_ERROR, "Invalid %s.\n", #var_name); \
+                return AVERROR(EINVAL); \
+            } \
+            priv->var_name = var; \
+        } else { \
+            priv->var_name = default_value; \
+        } \
+    } while (0)
+
+    STRING_OPTION(colour_primaries, color_primaries, AVCOL_PRI_UNSPECIFIED);
+    STRING_OPTION(colour_transfer,  color_transfer,  AVCOL_TRC_UNSPECIFIED);
+    STRING_OPTION(colour_matrix,    color_space,     AVCOL_SPC_UNSPECIFIED);
+    STRING_OPTION(chroma_location,  chroma_location, AVCHROMA_LOC_UNSPECIFIED);
+
+    return 0;
+}
+
+static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
+{
+    return common_v4l2m2m_init(avctx, FILTER_V4L2_DEINTERLACE);
+}
+
+static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx)
+{
+    int rv;
+    DeintV4L2M2MContext * priv;
+    DeintV4L2M2MContextShared * ctx;
+
+    if ((rv = common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE)) != 0)
+        return rv;
+
+    priv = avctx->priv;
+    ctx = priv->shared;
+
+    ctx->one_to_one = 1;
+    return 0;
+}
+
+static void deint_v4l2m2m_uninit(AVFilterContext *avctx)
+{
+    DeintV4L2M2MContext *priv = avctx->priv;
+    DeintV4L2M2MContextShared *ctx = priv->shared;
+
+    av_log(priv, AV_LOG_VERBOSE, "Frames Rx: %u, Frames Tx: %u\n",
+           ctx->frames_rx, ctx->frames_tx);
+    ctx->done = 1;
+    ctx->logctx = NULL;  // Log to NULL works, log to missing crashes
+    pts_track_uninit(&ctx->track);
+    deint_v4l2m2m_destroy_context(ctx);
+}
+
+static const AVOption deinterlace_v4l2m2m_options[] = {
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m);
+
+#define OFFSET(x) offsetof(DeintV4L2M2MContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
+
+static const AVOption scale_v4l2m2m_options[] = {
+    { "w", "Output video width",
+      OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
+    { "h", "Output video height",
+      OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
+    { "format", "Output video format (software format of hardware frames)",
+      OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
+      // These colour properties match the ones of the same name in vf_scale.
+      { "out_color_matrix", "Output colour matrix coefficient set",
+      OFFSET(colour_matrix_string), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
+    { "out_range", "Output colour range",
+      OFFSET(colour_range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED },
+      AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, FLAGS, "range" },
+        { "full",    "Full range",
+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
+        { "limited", "Limited range",
+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
+        { "jpeg",    "Full range",
+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
+        { "mpeg",    "Limited range",
+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
+        { "tv",      "Limited range",
+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
+        { "pc",      "Full range",
+          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
+    // These colour properties match the ones in the VAAPI scaler
+    { "out_color_primaries", "Output colour primaries",
+      OFFSET(colour_primaries_string), AV_OPT_TYPE_STRING,
+      { .str = NULL }, .flags = FLAGS },
+    { "out_color_transfer", "Output colour transfer characteristics",
+      OFFSET(colour_transfer_string),  AV_OPT_TYPE_STRING,
+      { .str = NULL }, .flags = FLAGS },
+    { "out_chroma_location", "Output chroma sample location",
+      OFFSET(chroma_location_string),  AV_OPT_TYPE_STRING,
+      { .str = NULL }, .flags = FLAGS },
+    { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" },
+    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(scale_v4l2m2m);
+
+static const AVFilterPad deint_v4l2m2m_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+    },
+};
+
+static const AVFilterPad deint_v4l2m2m_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = deint_v4l2m2m_config_props,
+    },
+};
+
+FFFilter ff_vf_deinterlace_v4l2m2m = {
+    .p.name         = "deinterlace_v4l2m2m",
+    .p.description  = NULL_IF_CONFIG_SMALL("V4L2 M2M deinterlacer"),
+    .p.priv_class   = &deinterlace_v4l2m2m_class,
+    .priv_size      = sizeof(DeintV4L2M2MContext),
+    .init           = &deint_v4l2m2m_init,
+    .uninit         = &deint_v4l2m2m_uninit,
+    FILTER_INPUTS(deint_v4l2m2m_inputs),
+    FILTER_OUTPUTS(deint_v4l2m2m_outputs),
+    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_DRM_PRIME),
+    .activate       = deint_v4l2m2m_activate,
+};
+
+FFFilter ff_vf_scale_v4l2m2m = {
+    .p.name         = "scale_v4l2m2m",
+    .p.description  = NULL_IF_CONFIG_SMALL("V4L2 M2M scaler"),
+    .p.priv_class   = &scale_v4l2m2m_class,
+    .priv_size      = sizeof(DeintV4L2M2MContext),
+    .init           = &scale_v4l2m2m_init,
+    .uninit         = &deint_v4l2m2m_uninit,
+    FILTER_INPUTS(deint_v4l2m2m_inputs),
+    FILTER_OUTPUTS(deint_v4l2m2m_outputs),
+    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_DRM_PRIME),
+    .activate       = deint_v4l2m2m_activate,
+};
+
diff --git a/libavfilter/vf_unsand.c b/libavfilter/vf_unsand.c
new file mode 100644
index 0000000000..9f0aca5358
--- /dev/null
+++ b/libavfilter/vf_unsand.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2007 Bobby Bingham
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * format and noformat video filters
+ */
+
+#include <string.h>
+
+#include "libavutil/internal.h"
+#include "libavutil/mem.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+#include "libavutil/rpi_sand_fns.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "video.h"
+
+typedef struct UnsandContext {
+    const AVClass *class;
+} UnsandContext;
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+//    UnsandContext *s = ctx->priv;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+//    UnsandContext *s = ctx->priv;
+
+    return 0;
+}
+
+
+static int filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    AVFilterLink * const outlink = link->dst->outputs[0];
+    AVFrame *out = NULL;
+    int rv = 0;
+
+    if (outlink->format == in->format) {
+        // If nothing to do then do nothing
+        out = in;
+    }
+    else
+    {
+        if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL)
+        {
+            rv = AVERROR(ENOMEM);
+            goto fail;
+        }
+        if (av_rpi_sand_to_planar_frame(out, in) != 0)
+        {
+            rv = -1;
+            goto fail;
+        }
+
+        av_frame_free(&in);
+    }
+
+    return ff_filter_frame(outlink, out);
+
+fail:
+    av_frame_free(&out);
+    av_frame_free(&in);
+    return rv;
+}
+
+#if 0
+static void dump_fmts(const AVFilterFormats * fmts)
+{
+    int i;
+    if (fmts== NULL) {
+        printf("NULL\n");
+        return;
+    }
+    for (i = 0; i < fmts->nb_formats; ++i) {
+        printf(" %d", fmts->formats[i]);
+    }
+    printf("\n");
+}
+#endif
+
+static int query_formats(AVFilterContext *ctx)
+{
+//    UnsandContext *s = ctx->priv;
+    int ret;
+
+    // If we aren't connected at both ends then just do nothing
+    if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL)
+        return 0;
+
+    // Our output formats depend on our input formats and we can't/don't
+    // want to convert between bit depths so we need to wait for the source
+    // to have an opinion before we do
+    if (ctx->inputs[0]->incfg.formats == NULL)
+        return AVERROR(EAGAIN);
+
+    // Accept anything
+    if (ctx->inputs[0]->outcfg.formats == NULL &&
+        (ret = ff_formats_ref(ctx->inputs[0]->incfg.formats, &ctx->inputs[0]->outcfg.formats)) < 0)
+        return ret;
+
+    // Filter out sand formats
+
+    // Generate a container if we don't already have one
+    if (ctx->outputs[0]->incfg.formats == NULL)
+    {
+        // Somewhat rubbish way of ensuring we have a good structure
+        const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE};
+        AVFilterFormats *formats = ff_make_format_list(out_fmts);
+
+        if (formats == NULL)
+            return AVERROR(ENOMEM);
+        if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats)) < 0)
+            return ret;
+    }
+
+    // Replace old format list with new filtered list derived from what our
+    // input says it can do
+    {
+        const AVFilterFormats * const src_ff = ctx->inputs[0]->outcfg.formats;
+        AVFilterFormats * const dst_ff = ctx->outputs[0]->incfg.formats;
+        enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats);
+        int i;
+        int n = 0;
+        int seen_420p = 0;
+        int seen_420p10 = 0;
+
+        for (i = 0; i < src_ff->nb_formats; ++i) {
+            const enum AVPixelFormat f = src_ff->formats[i];
+
+            switch (f){
+                case AV_PIX_FMT_YUV420P:
+                case AV_PIX_FMT_SAND128:
+                case AV_PIX_FMT_RPI4_8:
+                    if (!seen_420p) {
+                        seen_420p = 1;
+                        dst_fmts[n++] = AV_PIX_FMT_YUV420P;
+                    }
+                    break;
+                case AV_PIX_FMT_SAND64_10:
+                case AV_PIX_FMT_YUV420P10:
+                case AV_PIX_FMT_RPI4_10:
+                    if (!seen_420p10) {
+                        seen_420p10 = 1;
+                        dst_fmts[n++] = AV_PIX_FMT_YUV420P10;
+                    }
+                    break;
+                default:
+                    dst_fmts[n++] = f;
+                    break;
+            }
+        }
+
+        av_freep(&dst_ff->formats);
+        dst_ff->formats = dst_fmts;
+        dst_ff->nb_formats = n;
+    }
+
+//    printf("Unsand: %s calc: ", __func__);
+//    dump_fmts(ctx->outputs[0]->incfg.formats);
+
+    return 0;
+}
+
+
+#define OFFSET(x) offsetof(UnsandContext, x)
+static const AVOption unsand_options[] = {
+    { NULL }
+};
+
+
+AVFILTER_DEFINE_CLASS(unsand);
+
+static const AVFilterPad avfilter_vf_unsand_inputs[] = {
+    {
+        .name             = "default",
+        .type             = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_vf_unsand_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+};
+
+FFFilter ff_vf_unsand = {
+    .p.name        = "unsand",
+    .p.description = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"),
+    .p.priv_class  = &unsand_class,
+
+    .init          = init,
+    .uninit        = uninit,
+
+    FILTER_QUERY_FUNC(query_formats),
+
+    .priv_size     = sizeof(UnsandContext),
+
+    FILTER_INPUTS(avfilter_vf_unsand_inputs),
+    FILTER_OUTPUTS(avfilter_vf_unsand_outputs),
+};
+
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 4786a9345a..04e8736dfc 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -180,6 +180,7 @@ OBJS-$(CONFIG_CODEC2_MUXER)              += codec2.o rawenc.o
 OBJS-$(CONFIG_CODEC2RAW_DEMUXER)         += codec2.o pcm.o
 OBJS-$(CONFIG_CODEC2RAW_MUXER)           += rawenc.o
 OBJS-$(CONFIG_CONCAT_DEMUXER)            += concatdec.o
+OBJS-$(CONFIG_CONFORM_MUXER)             += conformenc.o
 OBJS-$(CONFIG_CRC_MUXER)                 += crcenc.o
 OBJS-$(CONFIG_DATA_DEMUXER)              += rawdec.o
 OBJS-$(CONFIG_DATA_MUXER)                += rawenc.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 6ec361fb7b..0b30685fe9 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -127,6 +127,7 @@ extern const FFOutputFormat ff_codec2_muxer;
 extern const FFInputFormat  ff_codec2raw_demuxer;
 extern const FFOutputFormat ff_codec2raw_muxer;
 extern const FFInputFormat  ff_concat_demuxer;
+extern const FFOutputFormat ff_conform_muxer;
 extern const FFOutputFormat ff_crc_muxer;
 extern const FFInputFormat  ff_dash_demuxer;
 extern const FFOutputFormat ff_dash_muxer;
diff --git a/libavformat/conformenc.c b/libavformat/conformenc.c
new file mode 100644
index 0000000000..99243f4622
--- /dev/null
+++ b/libavformat/conformenc.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+// *** This module is a work in progress and its utility is strictly
+//     limited to testing.
+
+#include "config.h"
+#include "libavutil/opt.h"
+#include "libavutil/frame.h"
+#include "libavutil/md5.h"
+#include "libavutil/mem.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_drm.h"
+#if CONFIG_SAND
+#include "libavutil/rpi_sand_fns.h"
+#endif
+#include "mux.h"
+
+#include "pthread.h"
+#include <semaphore.h>
+#include <unistd.h>
+
+#define TRACE_ALL 0
+
+#define DRM_MODULE "vc4"
+
+enum conform_optype_e {
+    CONFORM_OPTYPE_NONE,
+    CONFORM_OPTYPE_PLANAR,
+    CONFORM_OPTYPE_RAW_CROP,
+    CONFORM_OPTYPE_RAW_FULL,
+};
+
+static const struct {
+    const char * name;
+    enum conform_optype_e op;
+} optable[] = {
+    {"planar", CONFORM_OPTYPE_PLANAR},
+    {"raw_crop", CONFORM_OPTYPE_RAW_CROP},
+    {"raw_full", CONFORM_OPTYPE_RAW_FULL},
+    {NULL, CONFORM_OPTYPE_NONE}
+};
+
+static enum conform_optype_e
+op_str_to_enum(const char * const str)
+{
+    unsigned int i;
+    for (i = 0; optable[i].name != NULL; ++i) {
+        if (strcmp(optable[i].name, str) == 0)
+            break;
+    }
+    return optable[i].op;
+}
+
+enum conform_outtype_e {
+    CONFORM_OUTTYPE_NONE,
+    CONFORM_OUTTYPE_MD5,
+    CONFORM_OUTTYPE_FILE,
+};
+
+static const struct {
+    const char * name;
+    enum conform_optype_e op;
+} outtable[] = {
+    {"md5", CONFORM_OUTTYPE_MD5},
+    {"file", CONFORM_OUTTYPE_FILE},
+    {NULL, CONFORM_OPTYPE_NONE}
+};
+
+static enum conform_outtype_e
+out_str_to_enum(const char * const str)
+{
+    unsigned int i;
+    for (i = 0; outtable[i].name != NULL; ++i) {
+        if (strcmp(outtable[i].name, str) == 0)
+            break;
+    }
+    return outtable[i].op;
+}
+
+
+// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS
+// we get initial flicker probably due to dodgy drm timing
+#define AUX_SIZE 3
+typedef struct conform_display_env_s
+{
+    AVClass *class;
+
+    void * line_buf;
+    size_t line_size;
+
+    struct AVMD5 * frame_md5;
+    struct AVMD5 * md5;
+
+    int frame_md5_flag;
+    int use_corrupt;
+    char * optype_str;
+    enum conform_optype_e optype;
+    char * outtype_str;
+    enum conform_outtype_e outtype;
+
+    unsigned long long foffset;
+    unsigned int fno;
+} conform_display_env_t;
+
+
+static int conform_vout_write_trailer(AVFormatContext *s)
+{
+    conform_display_env_t * const de = s->priv_data;
+
+#if TRACE_ALL
+    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
+#endif
+
+    if (de->md5) {
+        uint8_t m[16];
+        av_md5_final(de->md5, m);
+        avio_printf(s->pb, "MD5=%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+                    m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8], m[9], m[10], m[11], m[12], m[13], m[14], m[15]);
+    }
+
+    return 0;
+}
+
+static int conform_vout_write_header(AVFormatContext *s)
+{
+    conform_display_env_t * const de = s->priv_data;
+
+#if TRACE_ALL
+    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
+#endif
+
+    if (de->md5)
+        av_md5_init(de->md5);
+    de->fno = 1;
+    de->foffset = 0;
+
+    return 0;
+}
+
+static int start_frame(AVFormatContext * const s, conform_display_env_t * const de, const AVFrame * const sf)
+{
+    if (!de->use_corrupt && (sf->flags & AV_FRAME_FLAG_CORRUPT) != 0) {
+        av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", sf->format, sf->pts);
+        if (de->frame_md5)
+            avio_printf(s->pb, "MD5-Frame-%d=*BAD*\n", de->fno);
+        ++de->fno;
+        return -1;
+    }
+
+    if (de->frame_md5)
+        av_md5_init(de->frame_md5);
+    return 0;
+}
+
+static void add_block(AVFormatContext * const s, conform_display_env_t * const de,
+                      const void * const line, const size_t size)
+{
+    if (de->frame_md5)
+        av_md5_update(de->frame_md5, line, size);
+    if (de->md5)
+        av_md5_update(de->md5, line, size);
+    else
+        avio_write(s->pb, line, size);
+    de->foffset += size;
+}
+
+static void end_frame(AVFormatContext * const s, conform_display_env_t * const de)
+{
+    if (de->frame_md5) {
+        uint8_t m[16];
+        av_md5_final(de->frame_md5, m);
+        avio_printf(s->pb, "MD5-Frame-%d=%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+                    de->fno,
+                    m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8], m[9], m[10], m[11], m[12], m[13], m[14], m[15]);
+    }
+    ++de->fno;
+}
+
+static int conform_planar(AVFormatContext * const s, conform_display_env_t * const de, const AVFrame * const sf)
+{
+    AVFrame * cf = NULL;
+    const AVFrame * f = sf;
+
+    const AVPixFmtDescriptor * pix_desc = av_pix_fmt_desc_get(sf->format);
+    int is_hw = (pix_desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0;
+    enum AVPixelFormat fmt = is_hw ? AV_PIX_FMT_NONE : sf->format;
+    unsigned int i;
+    char * meta = NULL;
+
+    if (is_hw) {
+        enum AVPixelFormat *xfmts = NULL;
+        av_hwframe_transfer_get_formats(sf->hw_frames_ctx, AV_HWFRAME_TRANSFER_DIRECTION_FROM, &xfmts, 0);
+        fmt = *xfmts;
+        av_free(xfmts);
+    }
+
+    av_dict_get_string(sf->metadata, &meta, '=', ';');
+    av_log(s, AV_LOG_DEBUG, "%s: Frame %3d: %#08llx %dx%d crop(ltrb) %zd,%zd,%zd,%zd fmt %s -> %s PTS %"PRId64" [%s]%s\n", __func__,
+           de->fno, de->foffset,
+           sf->width, sf->height, sf->crop_left, sf->crop_top, sf->crop_right, sf->crop_bottom,
+           av_get_pix_fmt_name(sf->format), av_get_pix_fmt_name(fmt), sf->pts, meta,
+           (sf->flags & AV_FRAME_FLAG_CORRUPT) != 0 ? "BAD" : "");
+    free(meta);
+
+    if (start_frame(s, de, sf))
+        return 0;
+
+    if (is_hw) {
+        cf = av_frame_alloc();
+        cf->format = fmt;
+        av_hwframe_transfer_data(cf, sf, AV_HWFRAME_TRANSFER_DIRECTION_FROM);
+        pix_desc = av_pix_fmt_desc_get(cf->format);
+        f = cf;
+    }
+
+    // This is fully generic - much optimisation possible
+    for (i = 0; i != pix_desc->nb_components; ++i) {
+        const AVComponentDescriptor * const cd = pix_desc->comp + i;
+        const unsigned int srw = ((i == 1 || i == 2) ? pix_desc->log2_chroma_w : 0);
+        const unsigned int rndw = (1 << srw) - 1;
+        const unsigned int srh = ((i == 1 || i == 2) ? pix_desc->log2_chroma_h : 0);
+        const unsigned int rndh = (1 << srh) - 1;
+        const unsigned int srp = cd->shift;
+        const unsigned int bpp = cd->depth > 8 ? 2 : 1;
+        const unsigned int h = (f->height - (f->crop_top + f->crop_bottom) + rndh) >> srh;
+        const unsigned int w = (f->width - (f->crop_left + f->crop_right) + rndw) >> srw;
+        unsigned int y;
+        for (y = 0; y < h; ++y) {
+            const void *const lstart = f->data[cd->plane] + (y + (f->crop_top >> srh)) * f->linesize[cd->plane] + cd->offset + (f->crop_left >> srw) * cd->step;
+            unsigned int x;
+
+            // If line_buf construction would be a simple copy then bypass
+            if (srp == 0 && cd->step == bpp) {
+                add_block(s, de, lstart, w * bpp);
+                continue;
+            }
+
+            if (bpp == 1) {
+                uint8_t *d = de->line_buf;
+                const uint8_t *s = lstart;
+                for (x = 0; x != w; ++x) {
+                    *d++ = *s >> srp;
+                    s += cd->step;
+                }
+            }
+            else {
+                uint16_t *d = de->line_buf;
+                const uint8_t *s = lstart;
+                for (x = 0; x != w; ++x) {
+                    *d++ = *(uint16_t*)s >> srp;
+                    s += cd->step;
+                }
+            }
+
+            // We have one line
+
+            add_block(s, de, de->line_buf, w * bpp);
+        }
+    }
+
+    end_frame(s, de);
+
+    av_frame_free(&cf);
+
+    return 0;
+}
+
+static int conform_raw(AVFormatContext * const s, conform_display_env_t * const de, const AVFrame * const sf, const int full)
+{
+    AVFrame * cf = NULL;
+    const AVFrame * f = sf;
+
+    const AVPixFmtDescriptor * pix_desc = av_pix_fmt_desc_get(sf->format);
+    int is_hw = (pix_desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0;
+    unsigned int i;
+    unsigned int planes_done = 0;
+
+    av_log(s, AV_LOG_DEBUG, "%s: Frame %3d: %#08llx %dx%d crop(ltrb) %zd,%zd,%zd,%zd fmt %s\n", __func__,
+           de->fno, de->foffset,
+           sf->width, sf->height, sf->crop_left, sf->crop_top, sf->crop_right, sf->crop_bottom,
+           av_get_pix_fmt_name(sf->format));
+
+    if (start_frame(s, de, sf))
+        return 0;
+
+    if (is_hw) {
+        int rv;
+        cf = av_frame_alloc();
+        if ((rv = av_hwframe_map(cf, sf, AV_HWFRAME_MAP_READ)) != 0) {
+            av_log(s, AV_LOG_ERROR, "Failed to map input frame\n");
+            return rv;
+        }
+        pix_desc = av_pix_fmt_desc_get(cf->format);
+        f = cf;
+    }
+
+#if CONFIG_SAND
+    if (av_rpi_is_sand_frame(f)) {
+        // Raw sand doesn't make sense cropped so treat as full
+        // If Single buffer SAND (i.e. a single stripe has Y & C) then dump as
+        // one buffer - otherwise y then C
+        const unsigned int stride1 = av_rpi_sand_frame_stride1(f);
+        const unsigned int w = ((f->width + stride1 - 1) / stride1) * stride1;
+        const unsigned int stride2_y = av_rpi_sand_frame_stride2_y(f);
+        const unsigned int stride2_c = av_rpi_sand_frame_stride2_c(f);
+        if (stride2_c == stride2_y) {
+            // Single buffer
+            av_log(s, AV_LOG_TRACE, "%s: %s single %d x %d\n", __func__, av_get_pix_fmt_name(f->format), w, stride2_y);
+            add_block(s, de, f->data[0], w * stride2_y);
+        }
+        else {
+            // Two buffers
+            av_log(s, AV_LOG_TRACE, "%s: %s double %d x %d,%d\n", __func__, av_get_pix_fmt_name(f->format), w, stride2_y, stride2_c);
+            add_block(s, de, f->data[0], w * stride2_y);
+            add_block(s, de, f->data[1], w * stride2_c);
+        }
+    }
+    else
+#endif
+    if (!full) {
+        for (i = 0; i != pix_desc->nb_components; ++i) {
+            const AVComponentDescriptor * const cd = pix_desc->comp + i;
+            const unsigned int srw = ((i == 1 || i == 2) ? pix_desc->log2_chroma_w : 0);
+            const unsigned int rndw = (1 << srw) - 1;
+            const unsigned int srh = ((i == 1 || i == 2) ? pix_desc->log2_chroma_h : 0);
+            const unsigned int rndh = (1 << srh) - 1;
+            const unsigned int h = (f->height - (f->crop_top + f->crop_bottom) + rndh) >> srh;
+            const unsigned int w = (f->width - (f->crop_left + f->crop_right) + rndw) >> srw;
+            const unsigned int plane_bit = (1U << cd->plane);
+            unsigned int y;
+
+            if ((planes_done & plane_bit) != 0)
+                continue;
+            planes_done |= plane_bit;
+
+            for (y = 0; y < h; ++y) {
+                const void *const lstart = f->data[cd->plane] + (y + (f->crop_top >> srh)) * f->linesize[cd->plane] + (f->crop_left >> srw) * cd->step;
+
+                // We have one line
+
+                add_block(s, de, lstart, w * cd->step);
+            }
+        }
+    }
+    else {
+        for (i = 0; i != pix_desc->nb_components; ++i) {
+            const AVComponentDescriptor * const cd = pix_desc->comp + i;
+            const unsigned int srh = ((i == 1 || i == 2) ? pix_desc->log2_chroma_h : 0);
+            const unsigned int rndh = (1 << srh) - 1;
+            const unsigned int h = (f->height + rndh) >> srh;
+            const unsigned int plane_bit = (1U << cd->plane);
+
+            if ((planes_done & plane_bit) != 0)
+                continue;
+            planes_done |= plane_bit;
+
+            add_block(s, de, f->data[cd->plane], h * f->linesize[cd->plane]);
+        }
+    }
+
+    end_frame(s, de);
+
+    av_frame_free(&cf);
+    return 0;
+}
+
+static int conform_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    conform_display_env_t * const de = s->priv_data;
+    const AVFrame * const sf = (AVFrame *)pkt->data;
+
+    switch (de->optype) {
+    case CONFORM_OPTYPE_PLANAR:
+        return conform_planar(s, de, sf);
+    case CONFORM_OPTYPE_RAW_CROP:
+        return conform_raw(s, de, sf, 0);
+    case CONFORM_OPTYPE_RAW_FULL:
+        return conform_raw(s, de, sf, 1);
+    default:
+        break;
+    }
+    return 0;
+}
+
+static int conform_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
+                          unsigned flags)
+{
+    av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
+    return AVERROR_PATCHWELCOME;
+}
+
+// deinit is called if init fails so no need to clean up explicity here
+static int conform_vout_init(struct AVFormatContext * s)
+{
+    conform_display_env_t * const de = s->priv_data;
+
+    av_log(s, AV_LOG_DEBUG, "<<< %s (%s -> %s)\n", __func__, de->optype_str, de->outtype_str);
+
+    de->optype = op_str_to_enum(de->optype_str);
+    if (de->optype == CONFORM_OPTYPE_NONE) {
+        av_log(s, AV_LOG_ERROR, "Unknown optype '%s'\n", de->optype_str);
+        return AVERROR_OPTION_NOT_FOUND;
+    }
+    de->outtype = out_str_to_enum(de->outtype_str);
+    if (de->outtype == CONFORM_OUTTYPE_NONE) {
+        av_log(s, AV_LOG_ERROR, "Unknown output '%s'\n", de->optype_str);
+        return AVERROR_OPTION_NOT_FOUND;
+    }
+
+    de->line_size = (8192 * 4); // 4bpp * 8k seems plenty
+    de->line_buf = av_malloc(de->line_size);
+    if (de->outtype == CONFORM_OUTTYPE_MD5) {
+        de->md5 = av_md5_alloc();
+        if (de->frame_md5_flag)
+            de->frame_md5 = av_md5_alloc();
+    }
+
+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
+
+    return 0;
+}
+
+static void conform_vout_deinit(struct AVFormatContext * s)
+{
+    conform_display_env_t * const de = s->priv_data;
+
+    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
+
+    av_freep(&de->line_buf);
+    av_freep(&de->md5);
+    av_freep(&de->frame_md5);
+
+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
+}
+
+
+#define OFFSET(x) offsetof(conform_display_env_t, x)
+static const AVOption options[] = {
+    { "conform_frame_md5", "Produce per-frame MD5s as well as final", OFFSET(frame_md5_flag), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
+    { "conform_out", "Output type ('md5', 'file') [default: md5]", OFFSET(outtype_str), AV_OPT_TYPE_STRING, { .str = "md5" }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
+    { "conform_type", "Type of buffer to work on ('planar', 'raw_crop', 'raw_full') [default: planar]", OFFSET(optype_str), AV_OPT_TYPE_STRING, {.str = "planar"}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
+    { "conform_corrupt", "Use frames marked corrupt", OFFSET(use_corrupt), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
+    { NULL }
+};
+
+static const AVClass conform_vid_class = {
+    .class_name = "conform vid muxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
+};
+
+const FFOutputFormat ff_conform_muxer = {
+    .p = {
+        .name           = "conform",
+        .long_name      = NULL_IF_CONFIG_SMALL("Video out conformance test helper"),
+        .audio_codec    = AV_CODEC_ID_NONE,
+        .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
+        .flags          = AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
+        .priv_class     = &conform_vid_class,
+    },
+    .priv_data_size = sizeof(conform_display_env_t),
+    .write_header   = conform_vout_write_header,
+    .write_packet   = conform_vout_write_packet,
+    .write_uncoded_frame = conform_vout_write_frame,
+    .write_trailer  = conform_vout_write_trailer,
+    .init           = conform_vout_init,
+    .deinit         = conform_vout_deinit,
+};
+
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 2c85672389..cb78ee92fe 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -87,6 +87,10 @@
 
 #define IS_WEBM(mkv) (CONFIG_WEBM_MUXER && CONFIG_MATROSKA_MUXER ? \
                       ((mkv)->mode == MODE_WEBM) : CONFIG_WEBM_MUXER)
+
+/* Reserved size for H264 headers if not extant at init time */
+#define MAX_H264_HEADER_SIZE 1024
+
 #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \
                               !(mkv)->is_live)
 
@@ -1170,8 +1174,12 @@ static int mkv_assemble_native_codecprivate(AVFormatContext *s, AVIOContext *dyn
     case AV_CODEC_ID_WAVPACK:
         return put_wv_codecpriv(dyn_cp, extradata, extradata_size);
     case AV_CODEC_ID_H264:
-        return ff_isom_write_avcc(dyn_cp, extradata,
-                                  extradata_size);
+        if (extradata_size)
+            return ff_isom_write_avcc(dyn_cp, extradata,
+                                      extradata_size);
+        else
+            *size_to_reserve = MAX_H264_HEADER_SIZE;
+        break;
     case AV_CODEC_ID_HEVC:
         return ff_isom_write_hvcc(dyn_cp, extradata,
                                   extradata_size, 0, s);
@@ -3027,8 +3035,8 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt)
         }
         break;
 #endif
-    // FIXME: Remove the following once libaom starts propagating proper extradata during init()
-    //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2208
+    // FIXME: Remove the following once libaom starts propagating extradata during init()
+    //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012
     case AV_CODEC_ID_AV1:
         if (side_data_size && mkv->track.bc && !par->extradata_size) {
             // If the reserved space doesn't suffice, only write
@@ -3040,6 +3048,16 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt)
         } else if (!par->extradata_size)
             return AVERROR_INVALIDDATA;
         break;
+    // H264 V4L2 has a similar issue
+    case AV_CODEC_ID_H264:
+        if (side_data_size && mkv->track.bc && !par->extradata_size) {
+            ret = mkv_update_codecprivate(s, mkv, side_data, side_data_size,
+                                          par, mkv->track.bc, track, 0);
+            if (ret < 0)
+                return ret;
+        } else if (!par->extradata_size)
+            return AVERROR_INVALIDDATA;
+        break;
     default:
         if (side_data_size)
             av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index);
@@ -3502,6 +3520,13 @@ static int mkv_init(struct AVFormatContext *s)
             track->reformat = mkv_reformat_wavpack;
             break;
         case AV_CODEC_ID_H264:
+            // Default to reformat if no extradata as the only current
+            // encoder which does this is v4l2m2m which needs reformat
+            if (par->extradata_size == 0 ||
+                (par->extradata_size > 3 &&
+                 (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1)))
+                track->reformat = mkv_reformat_h2645;
+            break;
         case AV_CODEC_ID_HEVC:
         case AV_CODEC_ID_VVC:
             if (((par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 0) ||
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index fe6b259561..4a04318cca 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -7199,6 +7199,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
     if (trk->par->codec_id == AV_CODEC_ID_MP4ALS ||
             trk->par->codec_id == AV_CODEC_ID_AAC ||
             trk->par->codec_id == AV_CODEC_ID_AV1 ||
+            trk->par->codec_id == AV_CODEC_ID_H264 ||
             trk->par->codec_id == AV_CODEC_ID_FLAC) {
         size_t side_size;
         uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
index 1d4b00a0f4..0dda6ab0e7 100644
--- a/libavformat/rtpenc.c
+++ b/libavformat/rtpenc.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "avc.h"
 #include "avformat.h"
 #include "mpegts.h"
 #include "internal.h"
@@ -616,8 +617,25 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt)
         ff_rtp_send_vc2hq(s1, pkt->data, size, st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0);
         break;
     case AV_CODEC_ID_H264:
+    {
+        uint8_t *side_data;
+        size_t side_data_size = 0;
+
+        side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
+                                            &side_data_size);
+
+        if (side_data_size != 0) {
+            int ps_size = side_data_size;
+            uint8_t * ps_buf = NULL;
+
+            ff_avc_write_annexb_extradata(side_data, &ps_buf, &ps_size);
+            av_log(s1, AV_LOG_TRACE, "H264: write side data=%d\n", ps_size);
+            ff_rtp_send_h264_hevc(s1, ps_buf ? ps_buf : side_data, ps_size);
+            av_free(ps_buf);
+        }
         ff_rtp_send_h264_hevc(s1, pkt->data, size);
         break;
+    }
     case AV_CODEC_ID_H261:
         ff_rtp_send_h261(s1, pkt->data, size);
         break;
diff --git a/libavutil/Makefile b/libavutil/Makefile
index c5241895ff..792ee70d73 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -52,6 +52,7 @@ HEADERS = adler32.h                                                     \
           hwcontext_mediacodec.h                                        \
           hwcontext_opencl.h                                            \
           hwcontext_oh.h                                                \
+          hwcontext_v4l2request.h                                       \
           hwcontext_vaapi.h                                             \
           hwcontext_videotoolbox.h                                      \
           hwcontext_vdpau.h                                             \
@@ -81,6 +82,7 @@ HEADERS = adler32.h                                                     \
           refstruct.h                                                   \
           replaygain.h                                                  \
           ripemd.h                                                      \
+	  rpi_sand_fns.h                                                \
           samplefmt.h                                                   \
           sha.h                                                         \
           sha512.h                                                      \
@@ -214,6 +216,8 @@ OBJS-$(CONFIG_MEDIACODEC)               += hwcontext_mediacodec.o
 OBJS-$(CONFIG_OHCODEC)                  += hwcontext_oh.o
 OBJS-$(CONFIG_OPENCL)                   += hwcontext_opencl.o
 OBJS-$(CONFIG_QSV)                      += hwcontext_qsv.o
+OBJS-$(CONFIG_V4L2_REQUEST)             += hwcontext_v4l2request.o
+OBJS-$(CONFIG_SAND)                     += rpi_sand_fns.o
 OBJS-$(CONFIG_VAAPI)                    += hwcontext_vaapi.o
 OBJS-$(CONFIG_VIDEOTOOLBOX)             += hwcontext_videotoolbox.o
 OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o
@@ -245,6 +249,9 @@ SKIPHEADERS-$(CONFIG_AMF)              += hwcontext_amf.h               \
                                           hwcontext_amf_internal.h
 SKIPHEADERS-$(CONFIG_QSV)              += hwcontext_qsv.h
 SKIPHEADERS-$(CONFIG_OPENCL)           += hwcontext_opencl.h
+SKIPHEADERS-$(CONFIG_V4L2_REQUEST)     += hwcontext_v4l2request.h       \
+                                          hwcontext_v4l2request_internal.h
+SKIPHEADERS-$(CONFIG-RPI)              += rpi_sand_fn_pw.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += hwcontext_vaapi.h
 SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += hwcontext_videotoolbox.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += hwcontext_vdpau.h
diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile
index b70702902f..5396abcd9b 100644
--- a/libavutil/aarch64/Makefile
+++ b/libavutil/aarch64/Makefile
@@ -6,7 +6,9 @@ ARMV8-OBJS += aarch64/crc.o
 
 NEON-OBJS += aarch64/float_dsp_neon.o                                 \
              aarch64/tx_float_neon.o                                  \
+             aarch64/rpi_sand_neon.o                                  \
 
 SVE-OBJS += aarch64/cpu_sve.o                                         \
 
 SME-OBJS += aarch64/cpu_sme.o                                         \
+
diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S
new file mode 100644
index 0000000000..3a6bc3de74
--- /dev/null
+++ b/libavutil/aarch64/rpi_sand_neon.S
@@ -0,0 +1,672 @@
+/*
+Copyright (c) 2021 Michael Eiler
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Authors: Michael Eiler <eiler.mike@gmail.com>
+*/
+
+#include "asm.S"
+
+// void ff_rpi_sand8_lines_to_planar_y8(
+//   uint8_t * dest,            : x0
+//   unsigned int dst_stride,   : w1
+//   const uint8_t * src,       : x2
+//   unsigned int src_stride1,  : w3, always 128
+//   unsigned int src_stride2,  : w4
+//   unsigned int _x,           : w5
+//   unsigned int y,            : w6
+//   unsigned int _w,           : w7
+//   unsigned int h);           : [sp, #0]
+
+function ff_rpi_sand8_lines_to_planar_y8, export=1
+    // w15 contains the number of rows we need to process
+    ldr w15, [sp, #0]
+
+    // w8 will contain the number of blocks per row
+    // w8 = floor(_w/stride1)
+    // stride1 is assumed to always be 128
+    mov w8, w1
+    lsr w8, w8, #7
+
+    // in case the width of the image is not a multiple of 128, there will
+    // be an incomplete block at the end of every row
+    // w9 contains the number of pixels stored within this block
+    // w9 = _w - w8 * 128
+    lsl w9, w8, #7
+    sub w9, w7, w9
+
+    // this is the value we have to add to the src pointer after reading a complete block
+    // it will move the address to the start of the next block
+    // w10 = stride2 * stride1 - stride1 
+    mov w10, w4
+    lsl w10, w10, #7
+    sub w10, w10, #128
+
+    // w11 is the row offset, meaning the start offset of the first block of every collumn
+    // this will be increased with stride1 within every iteration of the row_loop
+    eor w11, w11, w11
+
+    // w12 = 0, processed row count
+    eor w12, w12, w12
+row_loop:
+    // start of the first block within the current row
+    // x13 = row offset + src
+    mov x13, x2
+    add x13, x13, x11
+
+    // w14 = 0, processed block count
+    eor w14, w14, w14
+
+    cmp w8, #0
+    beq no_main_y8
+
+block_loop:
+    // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128
+    // fortunately these aren't callee saved ones, meaning we don't need to backup them
+    ld1 { v0.16b,  v1.16b,  v2.16b,  v3.16b}, [x13], #64
+    ld1 { v4.16b,  v5.16b,  v6.16b,  v7.16b}, [x13], #64 
+
+    // write these registers back to the destination vector and increase the dst address by 128
+    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
+    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x0], #64
+
+    // move the source register to the beginning of the next block (x13 = src + block offset)
+    add x13, x13, x10
+    // increase the block counter
+    add w14, w14, #1
+
+    // continue with the block_loop if we haven't copied all full blocks yet
+    cmp w8, w14
+    bgt block_loop
+
+    // handle the last block at the end of each row
+    // at most 127 byte values copied from src to dst
+no_main_y8:
+    eor w5, w5, w5 // i = 0
+incomplete_block_loop_y8:
+    cmp w5, w9
+    bge incomplete_block_loop_end_y8
+
+    ldrb w6, [x13]
+    strb w6, [x0]
+    add x13, x13, #1
+    add x0, x0, #1
+
+    add w5, w5, #1
+    b incomplete_block_loop_y8
+incomplete_block_loop_end_y8:
+    
+   
+    // increase the row offset by 128 (stride1) 
+    add w11, w11, #128
+    // increment the row counter
+    add w12, w12, #1
+    
+    // process the next row if we haven't finished yet
+    cmp w15, w12
+    bgt row_loop
+
+    ret
+endfunc
+
+
+
+// void ff_rpi_sand8_lines_to_planar_c8(
+//   uint8_t * dst_u,           : x0
+//   unsigned int dst_stride_u, : w1 == width
+//   uint8_t * dst_v,           : x2
+//   unsigned int dst_stride_v, : w3 == width
+//   const uint8_t * src,       : x4
+//   unsigned int stride1,      : w5 == 128
+//   unsigned int stride2,      : w6
+//   unsigned int _x,           : w7
+//   unsigned int y,            : [sp, #0]
+//   unsigned int _w,           : [sp, #8]
+//   unsigned int h);           : [sp, #16]
+
+function ff_rpi_sand8_lines_to_planar_c8, export=1
+    // w7 = width
+    ldr w7, [sp, #8]
+
+    // w15 contains the number of rows we need to process
+    // counts down
+    ldr w15, [sp, #16]
+
+    // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6
+    mov w8, w7
+    lsr w8, w8, #6
+
+    // number of pixels in block at the end of every row
+    // w9 = _w - (w8 * 64)
+    lsl w9, w8, #6
+    sub w9, w7, w9
+
+    // Skip at the end of the line to account for stride
+    sub w12, w1, w7
+
+    // address delta to the beginning of the next block
+    // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128
+    lsl w10, w6, #7
+    sub w10, w10, #128
+
+    // w11 = row address start offset = 0
+    eor w11, w11, w11
+
+row_loop_c8:
+    // start of the first block within the current row
+    // x13 = row offset + src
+    mov x13, x4
+    add x13, x13, x11
+
+    // w14 = 0, processed block count
+    eor w14, w14, w14
+
+    cmp w8, #0
+    beq no_main_c8
+
+block_loop_c8:
+    // load the full block -> 128 bytes, the block contains 64 interleaved U and V values 
+    ld2 { v0.16b,  v1.16b }, [x13], #32
+    ld2 { v2.16b,  v3.16b }, [x13], #32
+    ld2 { v4.16b,  v5.16b }, [x13], #32
+    ld2 { v6.16b,  v7.16b }, [x13], #32
+
+    // swap register so that we can write them out with a single instruction
+    mov v16.16b, v1.16b
+    mov v17.16b, v3.16b
+    mov v18.16b, v5.16b
+    mov v1.16b, v2.16b
+    mov v2.16b, v4.16b
+    mov v3.16b, v6.16b
+    mov v4.16b, v16.16b
+    mov v5.16b, v17.16b
+    mov v6.16b, v18.16b
+
+    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
+    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x2], #64
+
+    // increment row counter and move src to the beginning of the next block
+    add w14, w14, #1
+    add x13, x13, x10
+    
+    // jump to block_loop_c8 iff the block count is smaller than the number of full blocks
+    cmp w8, w14
+    bgt block_loop_c8
+
+no_main_c8:
+    // handle incomplete block at the end of every row
+    eor w5, w5, w5 // point counter, this might be 
+incomplete_block_loop_c8:
+    cmp w5, w9
+    bge incomplete_block_loop_end_c8
+
+    ldrb w1, [x13]
+    strb w1, [x0]
+    add x13, x13, #1
+
+    ldrb w1, [x13]
+    strb w1, [x2]
+    add x13, x13, #1
+
+    add x0, x0, #1
+    add x2, x2, #1
+
+    add w5, w5, #1
+    b incomplete_block_loop_c8
+incomplete_block_loop_end_c8:
+
+    // increase row_offset by stride1
+    add w11, w11, #128
+    add x0, x0, w12, sxtw
+    add x2, x2, w12, sxtw
+
+    // jump to row_Loop_c8 iff the row count is small than the height
+    subs w15, w15, #1
+    bgt row_loop_c8
+
+    ret
+endfunc
+
+// Unzip chroma
+//
+// On entry:
+// a0 = V0, U2,  ...
+// a1 = U0, V1,  ...
+// a2 = U1, V2,  ...
+// b0 = V8, U10, ...
+// b1 = U8, V9,  ...
+// b2 = U9, V10, ...
+//
+// On exit:
+// d0 = U0, U3, ...
+// ...
+// a0 = V0, V3, ..
+// ...
+//
+// Reg order for USAND is a1, a0, a2 (i.e. swap natural order of 1st 2 dest regs)
+
+.macro UZPH_C d0, d1, d2, a0, a1, a2, b0, b1, b2
+                uzp1            \d0\().8h, \a1\().8h, \b1\().8h
+                uzp1            \d1\().8h, \a2\().8h, \b2\().8h
+                uzp2            \d2\().8h, \a0\().8h, \b0\().8h
+
+                uzp1            \a0\().8h, \a0\().8h, \b0\().8h
+                uzp2            \a1\().8h, \a1\().8h, \b1\().8h
+                uzp2            \a2\().8h, \a2\().8h, \b2\().8h
+.endm
+
+// SAND30 -> 10bit
+.macro USAND10 d0, d1, d2, a0, a1
+                shrn            \d2\().4h, \a0\().4s, #14
+                shrn            \d1\().4h, \a0\().4s, #10
+
+                shrn2           \d2\().8h, \a1\().4s, #14
+                shrn2           \d1\().8h, \a1\().4s, #10
+                uzp1            \d0\().8h, \a0\().8h, \a1\().8h
+
+                ushr            \d2\().8h, \d2\().8h, #6
+                bic             \d0\().8h, #0xfc,     lsl #8
+                bic             \d1\().8h, #0xfc,     lsl #8
+.endm
+
+// SAND30 -> 8bit
+.macro USAND8 d0, d1, d2, a0, a1, a2, a3, t0, t1, t2
+                shrn            \d1\().4h,  \a0\().4s,  #12
+                shrn2           \d1\().8h,  \a1\().4s,  #12
+                uzp1            \d0\().8h,  \a0\().8h,  \a1\().8h
+                uzp2            \d2\().8h,  \a0\().8h,  \a1\().8h
+
+                shrn            \t1\().4h,  \a2\().4s,  #12
+                shrn2           \t1\().8h,  \a3\().4s,  #12
+                uzp1            \t0\().8h,  \a2\().8h,  \a3\().8h
+                uzp2            \t2\().8h,  \a2\().8h,  \a3\().8h
+
+                shrn            \d0\().8b,  \d0\().8h,  #2
+                shrn2           \d0\().16b, \t0\().8h,  #2
+                shrn            \d2\().8b,  \d2\().8h,  #6
+                shrn2           \d2\().16b, \t2\().8h,  #6
+                uzp1            \d1\().16b, \d1\().16b, \t1\().16b
+.endm
+
+
+// void ff_rpi_sand30_lines_to_planar_c16(
+//   uint8_t * dst_u,            // [x0]
+//   unsigned int dst_stride_u,  // [w1]
+//   uint8_t * dst_v,            // [x2]
+//   unsigned int dst_stride_v,  // [w3]
+//   const uint8_t * src,        // [x4]
+//   unsigned int stride1,       // [w5]      128
+//   unsigned int stride2,       // [w6]
+//   unsigned int _x,            // [w7]      0
+//   unsigned int y,             // [sp, #0]
+//   unsigned int _w,            // [sp, #8]  w9
+//   unsigned int h);            // [sp, #16] w10
+
+function ff_rpi_sand30_lines_to_planar_c16, export=1
+                ldr             w7,  [sp, #0]                   // y
+                ldr             w8,  [sp, #8]                   // _w
+                ldr             w10, [sp, #16]                  // h
+                lsl             w6,  w6,  #7                    // Fixup stride2
+                sub             w6,  w6,  #64
+                uxtw            x6,  w6
+                sub             w1,  w1,  w8,  LSL #1           // Fixup chroma strides
+                sub             w3,  w3,  w8,  LSL #1
+                lsl             w7,  w7,  #7                    // Add y to src
+                add             x4,  x4,  w7,  UXTW
+10:
+                mov             w13, #0
+                mov             x5,  x4
+                mov             w9,  w8
+1:
+                ld1             {v0.4s-v3.4s}, [x5], #64
+                ld1             {v4.4s-v7.4s}, [x5], x6
+                subs            w9,  w9,  #48
+
+                USAND10         v17, v16, v18, v0, v1
+                USAND10         v20, v19, v21, v2, v3
+                UZPH_C          v0, v1, v2, v16, v17, v18, v19, v20, v21
+                USAND10         v23, v22, v24, v4, v5
+                USAND10         v26, v25, v27, v6, v7
+                UZPH_C          v4, v5, v6, v22, v23, v24, v25, v26, v27
+
+                blt             2f
+
+                st3             {v0.8h-v2.8h},   [x0], #48
+                st3             {v4.8h-v6.8h},   [x0], #48
+                st3             {v16.8h-v18.8h}, [x2], #48
+                st3             {v22.8h-v24.8h}, [x2], #48
+
+                bne             1b
+11:
+                subs            w10, w10, #1
+                add             x4,  x4,  #128
+                add             x0,  x0,  w1,  UXTW
+                add             x2,  x2,  w3,  UXTW
+                bne             10b
+99:
+                ret
+
+// Partial final write
+2:
+                cmp             w9,  #24-48
+                blt             1f
+                st3             {v0.8h  - v2.8h},  [x0], #48
+                st3             {v16.8h - v18.8h}, [x2], #48
+                beq             11b
+                mov             v0.16b,  v4.16b
+                mov             v1.16b,  v5.16b
+                sub             w9,  w9,  #24
+                mov             v2.16b,  v6.16b
+                mov             v16.16b, v22.16b
+                mov             v17.16b, v23.16b
+                mov             v18.16b, v24.16b
+1:
+                cmp             w9,  #12-48
+                blt             1f
+                st3             {v0.4h  - v2.4h},  [x0], #24
+                st3             {v16.4h - v18.4h}, [x2], #24
+                beq             11b
+                mov             v0.d[0],  v0.d[1]
+                sub             w9,  w9,  #12
+                mov             v1.d[0],  v1.d[1]
+                mov             v2.d[0],  v2.d[1]
+                mov             v16.d[0], v16.d[1]
+                mov             v17.d[0], v17.d[1]
+                mov             v18.d[0], v18.d[1]
+1:
+                cmp             w9,  #6-48
+                blt             1f
+                st3             {v0.h  - v2.h}[0],  [x0], #6
+                st3             {v0.h  - v2.h}[1],  [x0], #6
+                st3             {v16.h - v18.h}[0], [x2], #6
+                st3             {v16.h - v18.h}[1], [x2], #6
+                beq             11b
+                mov             v0.s[0],  v0.s[1]
+                sub             w9,  w9,  #6
+                mov             v1.s[0],  v1.s[1]
+                mov             v2.s[0],  v2.s[1]
+                mov             v16.s[0], v16.s[1]
+                mov             v17.s[0], v17.s[1]
+                mov             v18.s[0], v18.s[1]
+1:
+                cmp             w9,  #3-48
+                blt             1f
+                st3             {v0.h  - v2.h}[0],  [x0], #6
+                st3             {v16.h - v18.h}[0], [x2], #6
+                beq             11b
+                mov             v0.h[0],  v0.h[1]
+                sub             w9,  w9,  #3
+                mov             v1.h[0],  v1.h[1]
+                mov             v16.h[0], v16.h[1]
+                mov             v17.h[0], v17.h[1]
+1:
+                cmp             w9,  #2-48
+                blt             1f
+                st2             {v0.h  - v1.h}[0],  [x0], #4
+                st2             {v16.h - v17.h}[0], [x2], #4
+                b               11b
+1:
+                st1             {v0.h}[0],  [x0], #2
+                st1             {v16.h}[0], [x2], #2
+                b               11b
+endfunc
+
+
+//void ff_rpi_sand30_lines_to_planar_p010(
+//  uint8_t * dest,
+//  unsigned int dst_stride,
+//  const uint8_t * src,
+//  unsigned int src_stride1,
+//  unsigned int src_stride2,
+//  unsigned int _x,
+//  unsigned int y,
+//  unsigned int _w,
+//  unsigned int h);
+
+// void ff_rpi_sand30_lines_to_planar_y8(
+//   uint8_t * dest,            : x0
+//   unsigned int dst_stride,   : w1
+//   const uint8_t * src,       : x2
+//   unsigned int src_stride1,  : w3, always 128
+//   unsigned int src_stride2,  : w4
+//   unsigned int _x,           : w5
+//   unsigned int y,            : w6
+//   unsigned int _w,           : w7
+//   unsigned int h);           : [sp, #0]
+//
+// Assumes that we are starting on a stripe boundary and that overreading
+// within the stripe is OK. However it does respect the dest size for wri
+
+function ff_rpi_sand30_lines_to_planar_y16, export=1
+                lsl             w4,  w4,  #7
+                sub             w4,  w4,  #64
+                uxtw            x4,  w4
+                sub             w1,  w1,  w7, lsl #1
+                uxtw            x6,  w6
+                add             x8,  x2,  x6, lsl #7
+                ldr             w6,  [sp, #0]
+
+10:
+                mov             x2,  x8
+                mov             w5,  w7
+1:
+                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
+                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
+
+                subs            w5,  w5,  #96
+
+                USAND10         v16, v17, v18, v0, v1
+                USAND10         v19, v20, v21, v2, v3
+                USAND10         v22, v23, v24, v4, v5
+                USAND10         v25, v26, v27, v6, v7
+
+                blt             2f
+
+                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
+                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
+                st3             {v22.8h, v23.8h, v24.8h}, [x0], #48
+                st3             {v25.8h, v26.8h, v27.8h}, [x0], #48
+
+                bne             1b
+
+11:
+                subs            w6,  w6,  #1
+                add             x0,  x0,  w1,  uxtw
+                add             x8,  x8,  #128
+                bne             10b
+
+                ret
+
+// Partial final write
+2:
+                cmp             w5,  #48-96
+                blt             1f
+                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
+                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
+                beq             11b
+                mov             v16.16b, v22.16b
+                mov             v17.16b, v23.16b
+                sub             w5,  w5,  #48
+                mov             v18.16b, v24.16b
+                mov             v19.16b, v25.16b
+                mov             v20.16b, v26.16b
+                mov             v21.16b, v27.16b
+1:
+                cmp             w5,  #24-96
+                blt             1f
+                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
+                beq             11b
+                mov             v16.16b, v19.16b
+                mov             v17.16b, v20.16b
+                sub             w5,  w5,  #24
+                mov             v18.16b, v21.16b
+1:
+                cmp             w5,  #12-96
+                blt             1f
+                st3             {v16.4h, v17.4h, v18.4h}, [x0], #24
+                beq             11b
+                mov             v16.d[0], v16.d[1]
+                sub             w5,  w5,  #12
+                mov             v17.d[0], v17.d[1]
+                mov             v18.d[0], v18.d[1]
+1:
+                cmp             w5,  #6-96
+                blt             1f
+                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
+                st3             {v16.h, v17.h, v18.h}[1], [x0], #6
+                beq             11b
+                mov             v16.s[0], v16.s[1]
+                sub             w5,  w5,  #6
+                mov             v17.s[0], v17.s[1]
+                mov             v18.s[0], v18.s[1]
+1:
+                cmp             w5,  #3-96
+                blt             1f
+                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
+                beq             11b
+                mov             v16.h[0], v16.h[1]
+                sub             w5,  w5,  #3
+                mov             v17.h[0], v17.h[1]
+1:
+                cmp             w5,  #2-96
+                blt             1f
+                st2             {v16.h, v17.h}[0], [x0], #4
+                b               11b
+1:
+                st1             {v16.h}[0], [x0], #2
+                b               11b
+
+endfunc
+
+// void ff_rpi_sand30_lines_to_planar_y8(
+//   uint8_t * dest,            : x0
+//   unsigned int dst_stride,   : w1
+//   const uint8_t * src,       : x2
+//   unsigned int src_stride1,  : w3, always 128
+//   unsigned int src_stride2,  : w4
+//   unsigned int _x,           : w5
+//   unsigned int y,            : w6
+//   unsigned int _w,           : w7
+//   unsigned int h);           : [sp, #0]
+//
+// Assumes that we are starting on a stripe boundary and that overreading
+// within the stripe is OK. However it does respect the dest size for wri
+
+function ff_rpi_sand30_lines_to_planar_y8, export=1
+                lsl             w4,  w4,  #7
+                sub             w4,  w4,  #64
+                uxtw            x4,  w4
+                sub             w1,  w1,  w7
+                uxtw            x6,  w6
+                add             x8,  x2,  x6, lsl #7
+                ldr             w6,  [sp, #0]
+
+10:
+                mov             x2,  x8
+                mov             w5,  w7
+1:
+                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
+                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
+
+                subs            w5,  w5,  #96
+
+                // v0, v1
+                USAND8          v16, v17, v18, v0, v1, v2, v3, v22, v23, v24
+                USAND8          v19, v20, v21, v4, v5, v6, v7, v22, v23, v24
+
+                blt             2f
+
+                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
+                st3             {v19.16b, v20.16b, v21.16b}, [x0], #48
+
+                bne             1b
+
+11:
+                subs            w6,  w6,  #1
+                add             x0,  x0,  w1,  uxtw
+                add             x8,  x8,  #128
+                bne             10b
+
+                ret
+
+// Partial final write
+2:
+                cmp             w5,  #48-96
+                blt             1f
+                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
+                beq             11b
+                mov             v16.16b, v22.16b
+                mov             v17.16b, v23.16b
+                sub             w5,  w5,  #48
+                mov             v18.16b, v24.16b
+1:
+                cmp             w5,  #24-96
+                blt             1f
+                st3             {v16.8b, v17.8b, v18.8b}, [x0], #24
+                beq             11b
+                mov             v16.d[0], v16.d[1]
+                sub             w5,  w5,  #24
+                mov             v17.d[0], v17.d[1]
+                mov             v18.d[0], v18.d[1]
+1:
+                cmp             w5,  #12-96
+                blt             1f
+                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
+                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
+                st3             {v16.b, v17.b, v18.b}[2], [x0], #3
+                st3             {v16.b, v17.b, v18.b}[3], [x0], #3
+                beq             11b
+                mov             v16.s[0], v16.s[1]
+                sub             w5,  w5,  #12
+                mov             v17.s[0], v17.s[1]
+                mov             v18.s[0], v18.s[1]
+1:
+                cmp             w5,  #6-96
+                blt             1f
+                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
+                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
+                beq             11b
+                mov             v16.h[0], v16.h[1]
+                sub             w5,  w5,  #6
+                mov             v17.h[0], v17.h[1]
+                mov             v18.h[0], v18.h[1]
+1:
+                cmp             w5,  #3-96
+                blt             1f
+                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
+                beq             11b
+                mov             v16.b[0], v16.b[1]
+                sub             w5,  w5,  #3
+                mov             v17.b[0], v17.b[1]
+1:
+                cmp             w5,  #2-96
+                blt             1f
+                st2             {v16.b, v17.b}[0], [x0], #2
+                b               11b
+1:
+                st1             {v16.b}[0], [x0], #1
+                b               11b
+
+endfunc
+
diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h
new file mode 100644
index 0000000000..e0e94f6aa1
--- /dev/null
+++ b/libavutil/aarch64/rpi_sand_neon.h
@@ -0,0 +1,61 @@
+/*
+Copyright (c) 2021 Michael Eiler
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Authors: Michael Eiler <eiler.mike@gmail.com>
+*/
+
+#ifndef AVUTIL_AARCH64_RPI_SAND_NEON_H
+#define AVUTIL_AARCH64_RPI_SAND_NEON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
+  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
+  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
+
+void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u,
+  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src,
+  unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y,
+  unsigned int _w, unsigned int h);
+
+void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride,
+  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
+  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
+
+void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u,
+  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1,
+  unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
+
+void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
+  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
+  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* AVCODEC_SAND_NEON_H */
diff --git a/libavutil/arm/Makefile b/libavutil/arm/Makefile
index 5da44b0542..b74b7c4e2f 100644
--- a/libavutil/arm/Makefile
+++ b/libavutil/arm/Makefile
@@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o                                    \
 
 NEON-OBJS += arm/float_dsp_init_neon.o                                  \
              arm/float_dsp_neon.o                                       \
+             arm/rpi_sand_neon.o                                        \
diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S
new file mode 100644
index 0000000000..60e697f681
--- /dev/null
+++ b/libavutil/arm/rpi_sand_neon.S
@@ -0,0 +1,925 @@
+/*
+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Authors: John Cox
+*/
+
+#include "libavutil/arm/asm.S"
+
+
+@ General notes:
+@ Having done some timing on this in sand8->y8 (Pi4)
+@  vst1 (680fps) is a bit faster than vstm (660fps)
+@  vldm (680fps) is noticably faster than vld1 (480fps)
+@  (or it might be that a mix is what is required)
+@
+@ At least on a Pi4 it is no more expensive to have a single auto-inc register
+@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted
+@ the latter was better)
+@
+@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless
+@ the memory is uncached.
+@ As these are Sand -> planar we can assume that src is going to be aligned but
+@ it is possible that dest isn't (converting to .yuv or other packed format).
+@ Luckily vst1 is faster than vstm :-) so all is well
+@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4
+@ .8 stores would let us do non-word aligned stores into uncached but it
+@ probably isn't worth it.
+
+
+
+
+@ void ff_rpi_sand128b_stripe_to_8_10(
+@   uint8_t * dest,             // [r0]
+@   const uint8_t * src1,       // [r1]
+@   const uint8_t * src2,       // [r2]
+@   unsigned int lines);        // [r3]
+
+.macro  stripe2_to_8, bit_depth
+        vpush    {q4-q7}
+1:
+        vldm     r1!, {q0-q7}
+        subs     r3, #1
+        vldm     r2!, {q8-q15}
+        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
+        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
+        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
+        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
+        vqrshrn.u16 d4,  q4,  #\bit_depth - 8
+        vqrshrn.u16 d5,  q5,  #\bit_depth - 8
+        vqrshrn.u16 d6,  q6,  #\bit_depth - 8
+        vqrshrn.u16 d7,  q7,  #\bit_depth - 8
+        vqrshrn.u16 d8,  q8,  #\bit_depth - 8
+        vqrshrn.u16 d9,  q9,  #\bit_depth - 8
+        vqrshrn.u16 d10, q10, #\bit_depth - 8
+        vqrshrn.u16 d11, q11, #\bit_depth - 8
+        vqrshrn.u16 d12, q12, #\bit_depth - 8
+        vqrshrn.u16 d13, q13, #\bit_depth - 8
+        vqrshrn.u16 d14, q14, #\bit_depth - 8
+        vqrshrn.u16 d15, q15, #\bit_depth - 8
+        vstm     r0!, {q0-q7}
+        bne      1b
+        vpop     {q4-q7}
+        bx       lr
+.endm
+
+function ff_rpi_sand128b_stripe_to_8_10, export=1
+        stripe2_to_8     10
+endfunc
+
+@ void ff_rpi_sand8_lines_to_planar_y8(
+@   uint8_t * dest,             // [r0]
+@   unsigned int dst_stride,    // [r1]
+@   const uint8_t * src,        // [r2]
+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+@   unsigned int src_stride2,   // [sp, #0]  -> r3
+@   unsigned int _x,            // [sp, #4]  Ignored - 0
+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+@   unsigned int h);            // [sp, #16] -> r7
+@
+@ Assumes that we are starting on a stripe boundary and that overreading
+@ within the stripe is OK. However it does respect the dest size for writing
+
+function ff_rpi_sand8_lines_to_planar_y8, export=1
+                push            {r4-r8, lr}     @ +24            L
+                ldr             r3,  [sp, #24]
+                ldr             r6,  [sp, #36]
+                ldr             r7,  [sp, #32]  @ y
+                lsl             r3,  #7
+                sub             r1,  r6
+                add             r8,  r2,  r7,  lsl #7
+                ldr             r7,  [sp, #40]
+
+10:
+                mov             r2,  r8
+                add             r4,  r0,  #24
+                mov             r5,  r6
+                mov             lr,  #0
+1:
+                vldm            r2,  {q8-q15}
+                add             r2,  r3
+                subs            r5,  #128
+                blt             2f
+                vst1.8          {d16, d17, d18, d19}, [r0]!
+                vst1.8          {d20, d21, d22, d23}, [r0]!
+                vst1.8          {d24, d25, d26, d27}, [r0]!
+                vst1.8          {d28, d29, d30, d31}, [r0]!
+                bne             1b
+11:
+                subs            r7,  #1
+                add             r0,  r1
+                add             r8,  #128
+                bne             10b
+
+                pop             {r4-r8, pc}
+
+@ Partial final write
+2:
+                cmp             r5,  #64-128
+                blt             1f
+                vst1.8          {d16, d17, d18, d19}, [r0]!
+                vst1.8          {d20, d21, d22, d23}, [r0]!
+                beq             11b
+                vmov            q8,  q12
+                vmov            q9,  q13
+                sub             r5,  #64
+                vmov            q10, q14
+                vmov            q11, q15
+1:
+                cmp             r5,  #32-128
+                blt             1f
+                vst1.8          {d16, d17, d18, d19}, [r0]!
+                beq             11b
+                vmov            q8,  q10
+                sub             r5,  #32
+                vmov            q9,  q11
+1:
+                cmp             r5,  #16-128
+                blt             1f
+                vst1.8          {d16, d17}, [r0]!
+                beq             11b
+                sub             r5,  #16
+                vmov            q8,  q9
+1:
+                cmp             r5,  #8-128
+                blt             1f
+                vst1.8          {d16}, [r0]!
+                beq             11b
+                sub             r5,  #8
+                vmov            d16, d17
+1:
+                cmp             r5,  #4-128
+                blt             1f
+                vst1.32         {d16[0]}, [r0]!
+                beq             11b
+                sub             r5,  #4
+                vshr.u64        d16, #32
+1:
+                cmp             r5,  #2-128
+                blt             1f
+                vst1.16         {d16[0]}, [r0]!
+                beq             11b
+                vst1.8          {d16[2]}, [r0]!
+                b               11b
+1:
+                vst1.8          {d16[0]}, [r0]!
+                b               11b
+endfunc
+
+@ void ff_rpi_sand8_lines_to_planar_c8(
+@   uint8_t * dst_u,            // [r0]
+@   unsigned int dst_stride_u,  // [r1]
+@   uint8_t * dst_v,            // [r2]
+@   unsigned int dst_stride_v,  // [r3]
+@   const uint8_t * src,        // [sp, #0]  -> r4, r5
+@   unsigned int stride1,       // [sp, #4]  128
+@   unsigned int stride2,       // [sp, #8]  -> r8
+@   unsigned int _x,            // [sp, #12] 0
+@   unsigned int y,             // [sp, #16] (r7 in prefix)
+@   unsigned int _w,            // [sp, #20] -> r12, r6
+@   unsigned int h);            // [sp, #24] -> r7
+@
+@ Assumes that we are starting on a stripe boundary and that overreading
+@ within the stripe is OK. However it does respect the dest size for writing
+
+function ff_rpi_sand8_lines_to_planar_c8, export=1
+                push            {r4-r8, lr}     @ +24
+
+                ldr             r5,  [sp, #24]
+                ldr             r8,  [sp, #32]
+                ldr             r7,  [sp, #40]
+                ldr             r6,  [sp, #44]
+                lsl             r8,  #7
+                add             r5,  r5,  r7,  lsl #7
+                sub             r1,  r1,  r6
+                sub             r3,  r3,  r6
+                ldr             r7,  [sp, #48]
+                vpush           {q4-q7}
+
+10:
+                mov             r4,  r5
+                mov             r12, r6
+1:
+                subs            r12, #64
+                vldm            r4,  {q0-q7}
+                add             r4,  r8
+                it              gt
+                vldmgt          r4,  {q8-q15}
+                add             r4,  r8
+
+                vuzp.8          q0,  q1
+                vuzp.8          q2,  q3
+                vuzp.8          q4,  q5
+                vuzp.8          q6,  q7
+
+                vuzp.8          q8,  q9
+                vuzp.8          q10, q11
+                vuzp.8          q12, q13
+                vuzp.8          q14, q15
+                subs            r12, #64
+
+                @ Rearrange regs so we can use vst1 with 4 regs
+                vswp            q1,  q2
+                vswp            q5,  q6
+                vswp            q9,  q10
+                vswp            q13, q14
+                blt             2f
+
+                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
+                vst1.8          {d8,  d9,  d10, d11}, [r0]!
+                vst1.8          {d16, d17, d18, d19}, [r0]!
+                vst1.8          {d24, d25, d26, d27}, [r0]!
+
+                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
+                vst1.8          {d12, d13, d14, d15}, [r2]!
+                vst1.8          {d20, d21, d22, d23}, [r2]!
+                vst1.8          {d28, d29, d30, d31}, [r2]!
+                bne             1b
+11:
+                subs            r7,  #1
+                add             r5,  #128
+                add             r0,  r1
+                add             r2,  r3
+                bne             10b
+                vpop            {q4-q7}
+                pop             {r4-r8,pc}
+
+2:
+                cmp             r12, #64-128
+                blt             1f
+                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
+                vst1.8          {d8,  d9,  d10, d11}, [r0]!
+                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
+                vst1.8          {d12, d13, d14, d15}, [r2]!
+                beq             11b
+                sub             r12, #64
+                vmov            q0,  q8
+                vmov            q1,  q9
+                vmov            q2,  q10
+                vmov            q3,  q11
+                vmov            q4,  q12
+                vmov            q5,  q13
+                vmov            q6,  q14
+                vmov            q7,  q15
+1:
+                cmp             r12, #32-128
+                blt             1f
+                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
+                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
+                beq             11b
+                sub             r12, #32
+                vmov            q0,  q4
+                vmov            q1,  q5
+                vmov            q2,  q6
+                vmov            q3,  q7
+1:
+                cmp             r12, #16-128
+                blt             1f
+                vst1.8          {d0,  d1 }, [r0]!
+                vst1.8          {d4,  d5 }, [r2]!
+                beq             11b
+                sub             r12, #16
+                vmov            q0,  q1
+                vmov            q2,  q3
+1:
+                cmp             r12, #8-128
+                blt             1f
+                vst1.8          {d0}, [r0]!
+                vst1.8          {d4}, [r2]!
+                beq             11b
+                sub             r12, #8
+                vmov            d0,  d1
+                vmov            d4,  d5
+1:
+                cmp             r12, #4-128
+                blt             1f
+                vst1.32         {d0[0]}, [r0]!
+                vst1.32         {d4[0]}, [r2]!
+                beq             11b
+                sub             r12, #4
+                vmov            s0,  s1
+                vmov            s8,  s9
+1:
+                cmp             r12, #2-128
+                blt             1f
+                vst1.16         {d0[0]}, [r0]!
+                vst1.16         {d4[0]}, [r2]!
+                beq             11b
+                vst1.8          {d0[2]}, [r0]!
+                vst1.8          {d4[2]}, [r2]!
+                b               11b
+1:
+                vst1.8          {d0[0]}, [r0]!
+                vst1.8          {d4[0]}, [r2]!
+                b               11b
+endfunc
+
+
+
+@ void ff_rpi_sand30_lines_to_planar_y16(
+@   uint8_t * dest,             // [r0]
+@   unsigned int dst_stride,    // [r1]
+@   const uint8_t * src,        // [r2]
+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+@   unsigned int src_stride2,   // [sp, #0]  -> r3
+@   unsigned int _x,            // [sp, #4]  Ignored - 0
+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+@   unsigned int h);            // [sp, #16] -> r7
+@
+@ Assumes that we are starting on a stripe boundary and that overreading
+@ within the stripe is OK. However it does respect the dest size for writing
+
+function ff_rpi_sand30_lines_to_planar_y16, export=1
+                push            {r4-r8, lr}     @ +24
+                ldr             r3,  [sp, #24]
+                ldr             r6,  [sp, #36]
+                ldr             r7,  [sp, #32]  @ y
+                mov             r12, #48
+                sub             r3,  #1
+                lsl             r3,  #7
+                sub             r1,  r1,  r6,  lsl #1
+                add             r8,  r2,  r7,  lsl #7
+                ldr             r7,  [sp, #40]
+
+10:
+                mov             r2,  r8
+                add             r4,  r0,  #24
+                mov             r5,  r6
+                mov             lr,  #0
+1:
+                vldm            r2!, {q10-q13}
+                add             lr,  #64
+
+                vshrn.u32       d4 , q10, #14    @ Cannot vshrn.u32 #20!
+                ands            lr,  #127
+                vshrn.u32       d2,  q10, #10
+                vmovn.u32       d0,  q10
+
+                vshrn.u32       d5,  q11, #14
+                it              eq
+                addeq           r2,  r3
+                vshrn.u32       d3,  q11, #10
+                vmovn.u32       d1,  q11
+
+                subs            r5,  #48
+                vshr.u16        q2,  #6
+                vbic.u16        q0,  #0xfc00
+                vbic.u16        q1,  #0xfc00
+
+                vshrn.u32       d20, q12, #14
+                vshrn.u32       d18, q12, #10
+                vmovn.u32       d16, q12
+
+                vshrn.u32       d21, q13, #14
+                vshrn.u32       d19, q13, #10
+                vmovn.u32       d17, q13
+
+                vshr.u16        q10, #6
+                vbic.u16        q8,  #0xfc00
+                vbic.u16        q9 , #0xfc00
+                blt             2f
+
+                vst3.16         {d0,  d2,  d4},  [r0], r12
+                vst3.16         {d1,  d3,  d5},  [r4], r12
+                vst3.16         {d16, d18, d20}, [r0], r12
+                vst3.16         {d17, d19, d21}, [r4], r12
+
+                bne             1b
+
+11:
+                subs            r7,  #1
+                add             r0,  r1
+                add             r8,  #128
+                bne             10b
+
+                pop             {r4-r8, pc}
+
+@ Partial final write
+2:
+                cmp             r5,  #24-48
+                blt             1f
+                vst3.16         {d0,  d2,  d4},  [r0], r12
+                vst3.16         {d1,  d3,  d5},  [r4]
+                beq             11b
+                vmov            q0,  q8
+                sub             r5,  #24
+                vmov            q1,  q9
+                vmov            q2,  q10
+1:
+                cmp             r5,  #12-48
+                blt             1f
+                vst3.16         {d0,  d2,  d4},  [r0]!
+                beq             11b
+                vmov            d0, d1
+                sub             r5, #12
+                vmov            d2, d3
+                vmov            d4, d5
+1:
+                cmp             r5,  #6-48
+                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
+                blt             1f
+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
+                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
+                add             r0,  #12
+                beq             11b
+                vmov            s0,  s1
+                sub             r5,  #6
+                vmov            s4,  s5
+                vmov            s8,  s9
+1:
+                cmp             r5, #3-48
+                blt             1f
+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
+                beq             11b
+                sub             r5, #3
+                vshr.u32        d0, #16
+                vshr.u32        d2, #16
+1:
+                cmp             r5, #2-48
+                blt             1f
+                vst2.16         {d0[0], d2[0]}, [r0]!
+                b               11b
+1:
+                vst1.16         {d0[0]}, [r0]!
+                b               11b
+
+endfunc
+
+
+@ void ff_rpi_sand30_lines_to_planar_c16(
+@   uint8_t * dst_u,            // [r0]
+@   unsigned int dst_stride_u,  // [r1]
+@   uint8_t * dst_v,            // [r2]
+@   unsigned int dst_stride_v,  // [r3]
+@   const uint8_t * src,        // [sp, #0]  -> r4, r5
+@   unsigned int stride1,       // [sp, #4]  128
+@   unsigned int stride2,       // [sp, #8]  -> r8
+@   unsigned int _x,            // [sp, #12] 0
+@   unsigned int y,             // [sp, #16] (r7 in prefix)
+@   unsigned int _w,            // [sp, #20] -> r6, r9
+@   unsigned int h);            // [sp, #24] -> r7
+@
+@ Assumes that we are starting on a stripe boundary and that overreading
+@ within the stripe is OK. However it does respect the dest size for writing
+
+function ff_rpi_sand30_lines_to_planar_c16, export=1
+                push            {r4-r10, lr}    @ +32
+                ldr             r5,  [sp, #32]
+                ldr             r8,  [sp, #40]
+                ldr             r7,  [sp, #48]
+                ldr             r9,  [sp, #52]
+                mov             r12, #48
+                sub             r8,  #1
+                lsl             r8,  #7
+                add             r5,  r5,  r7,  lsl #7
+                sub             r1,  r1,  r9,  lsl #1
+                sub             r3,  r3,  r9,  lsl #1
+                ldr             r7,  [sp, #56]
+10:
+                mov             lr,  #0
+                mov             r4,  r5
+                mov             r6,  r9
+1:
+                vldm            r4!, {q0-q3}
+                add             lr,  #64
+
+                @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2
+                vshrn.u32       d20, q0,  #14
+                vmovn.u32       d18, q0
+                vshrn.u32       d0,  q0,  #10
+                ands            lr,  #127
+
+                vshrn.u32       d21, q1,  #14
+                vmovn.u32       d19, q1
+                vshrn.u32       d1,  q1,  #10
+
+                vshrn.u32       d22, q2,  #10
+                vmovn.u32       d2,  q2
+                vshrn.u32       d4,  q2,  #14
+
+                add             r10, r0,  #24
+                vshrn.u32       d23, q3,  #10
+                vmovn.u32       d3,  q3
+                vshrn.u32       d5,  q3,  #14
+
+                it              eq
+                addeq           r4,  r8
+                vuzp.16         q0,  q11
+                vuzp.16         q9,  q1
+                vuzp.16         q10, q2
+
+                @ q0   V0, V3,..
+                @ q9   U0, U3...
+                @ q10  U1, U4...
+                @ q11  U2, U5,..
+                @ q1   V1, V4,
+                @ q2   V2, V5,..
+
+                subs            r6,  #24
+                vbic.u16        q11, #0xfc00
+                vbic.u16        q9,  #0xfc00
+                vshr.u16        q10, #6
+                vshr.u16        q2,  #6
+                vbic.u16        q0,  #0xfc00
+                vbic.u16        q1,  #0xfc00
+
+                blt             2f
+
+                vst3.16         {d18, d20, d22}, [r0],  r12
+                vst3.16         {d19, d21, d23}, [r10]
+                add             r10, r2,  #24
+                vst3.16         {d0,  d2,  d4},  [r2],  r12
+                vst3.16         {d1,  d3,  d5},  [r10]
+
+                bne             1b
+
+11:
+                subs            r7,  #1
+                add             r5,  #128
+                add             r0,  r1
+                add             r2,  r3
+                bne             10b
+
+                pop             {r4-r10, pc}
+
+@ Partial final write
+2:
+                cmp             r6,  #-12
+                blt             1f
+                vst3.16         {d18, d20, d22}, [r0]!
+                vst3.16         {d0,  d2,  d4},  [r2]!
+                beq             11b
+                vmov            d18, d19
+                vmov            d20, d21
+                vmov            d22, d23
+                sub             r6,  #12
+                vmov            d0,  d1
+                vmov            d2,  d3
+                vmov            d4,  d5
+1:
+                cmp             r6,  #-18
+                @ Rezip here as it makes the remaining tail handling easier
+                vzip.16         d0,  d18
+                vzip.16         d2,  d20
+                vzip.16         d4,  d22
+                blt             1f
+                vst3.16         {d0[1],  d2[1],  d4[1]},  [r0]!
+                vst3.16         {d0[0],  d2[0],  d4[0]},  [r2]!
+                vst3.16         {d0[3],  d2[3],  d4[3]},  [r0]!
+                vst3.16         {d0[2],  d2[2],  d4[2]},  [r2]!
+                beq             11b
+                vmov            d0,  d18
+                vmov            d2,  d20
+                sub             r6,  #6
+                vmov            d4,  d22
+1:
+                cmp             r6,  #-21
+                blt             1f
+                vst3.16         {d0[1], d2[1], d4[1]}, [r0]!
+                vst3.16         {d0[0], d2[0], d4[0]}, [r2]!
+                beq             11b
+                vmov            s4,  s5
+                sub             r6,  #3
+                vmov            s0,  s1
+1:
+                cmp             r6,  #-22
+                blt             1f
+                vst2.16         {d0[1], d2[1]}, [r0]!
+                vst2.16         {d0[0], d2[0]}, [r2]!
+                b               11b
+1:
+                vst1.16         {d0[1]}, [r0]!
+                vst1.16         {d0[0]}, [r2]!
+                b               11b
+
+endfunc
+
+@ void ff_rpi_sand30_lines_to_planar_p010(
+@   uint8_t * dest,             // [r0]
+@   unsigned int dst_stride,    // [r1]
+@   const uint8_t * src,        // [r2]
+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+@   unsigned int src_stride2,   // [sp, #0]  -> r3
+@   unsigned int _x,            // [sp, #4]  Ignored - 0
+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+@   unsigned int h);            // [sp, #16] -> r7
+@
+@ Assumes that we are starting on a stripe boundary and that overreading
+@ within the stripe is OK. However it does respect the dest size for writing
+
+function ff_rpi_sand30_lines_to_planar_p010, export=1
+                push            {r4-r8, lr}     @ +24
+                ldr             r3,  [sp, #24]
+                ldr             r6,  [sp, #36]
+                ldr             r7,  [sp, #32]  @ y
+                mov             r12, #48
+                vmov.u16        q15, #0xffc0
+                sub             r3,  #1
+                lsl             r3,  #7
+                sub             r1,  r1,  r6,  lsl #1
+                add             r8,  r2,  r7,  lsl #7
+                ldr             r7,  [sp, #40]
+
+10:
+                mov             r2,  r8
+                add             r4,  r0,  #24
+                mov             r5,  r6
+                mov             lr,  #0
+1:
+                vldm            r2!, {q10-q13}
+                add             lr,  #64
+
+                vshl.u32        q14, q10, #6
+                ands            lr,  #127
+                vshrn.u32       d4,  q10, #14
+                vshrn.u32       d2,  q10, #4
+                vmovn.u32       d0,  q14
+
+                vshl.u32        q14, q11, #6
+                it              eq
+                addeq           r2,  r3
+                vshrn.u32       d5,  q11, #14
+                vshrn.u32       d3,  q11, #4
+                vmovn.u32       d1,  q14
+
+                subs            r5,  #48
+                vand            q2,  q15
+                vand            q1,  q15
+                vand            q0,  q15
+
+                vshl.u32        q14, q12, #6
+                vshrn.u32       d20, q12, #14
+                vshrn.u32       d18, q12, #4
+                vmovn.u32       d16, q14
+
+                vshl.u32        q14, q13, #6
+                vshrn.u32       d21, q13, #14
+                vshrn.u32       d19, q13, #4
+                vmovn.u32       d17, q14
+
+                vand            q10, q15
+                vand            q9,  q15
+                vand            q8,  q15
+                blt             2f
+
+                vst3.16         {d0,  d2,  d4},  [r0], r12
+                vst3.16         {d1,  d3,  d5},  [r4], r12
+                vst3.16         {d16, d18, d20}, [r0], r12
+                vst3.16         {d17, d19, d21}, [r4], r12
+
+                bne             1b
+
+11:
+                subs            r7,  #1
+                add             r0,  r1
+                add             r8,  #128
+                bne             10b
+
+                pop             {r4-r8, pc}
+
+@ Partial final write
+2:
+                cmp             r5,  #24-48
+                blt             1f
+                vst3.16         {d0,  d2,  d4},  [r0], r12
+                vst3.16         {d1,  d3,  d5},  [r4]
+                beq             11b
+                vmov            q0,  q8
+                sub             r5,  #24
+                vmov            q1,  q9
+                vmov            q2,  q10
+1:
+                cmp             r5,  #12-48
+                blt             1f
+                vst3.16         {d0,  d2,  d4},  [r0]!
+                beq             11b
+                vmov            d0, d1
+                sub             r5, #12
+                vmov            d2, d3
+                vmov            d4, d5
+1:
+                cmp             r5,  #6-48
+                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
+                blt             1f
+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
+                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
+                add             r0,  #12
+                beq             11b
+                vmov            s0,  s1
+                sub             r5,  #6
+                vmov            s4,  s5
+                vmov            s8,  s9
+1:
+                cmp             r5, #3-48
+                blt             1f
+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
+                beq             11b
+                sub             r5, #3
+                vshr.u32        d0, #16
+                vshr.u32        d2, #16
+1:
+                cmp             r5, #2-48
+                blt             1f
+                vst2.16         {d0[0], d2[0]}, [r0]!
+                b               11b
+1:
+                vst1.16         {d0[0]}, [r0]!
+                b               11b
+
+endfunc
+
+
+@ void ff_rpi_sand30_lines_to_planar_y8(
+@   uint8_t * dest,             // [r0]
+@   unsigned int dst_stride,    // [r1]
+@   const uint8_t * src,        // [r2]
+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+@   unsigned int src_stride2,   // [sp, #0]  -> r3
+@   unsigned int _x,            // [sp, #4]  Ignored - 0
+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+@   unsigned int h);            // [sp, #16] -> r7
+@
+@ Assumes that we are starting on a stripe boundary and that overreading
+@ within the stripe is OK. However it does respect the dest size for wri
+
+function ff_rpi_sand30_lines_to_planar_y8, export=1
+                push            {r4-r8, lr}     @ +24
+                ldr             r3,  [sp, #24]
+                ldr             r6,  [sp, #36]
+                ldr             r7,  [sp, #32]  @ y
+                mov             r12, #48
+                lsl             r3,  #7
+                sub             r1,  r1,  r6
+                add             r8,  r2,  r7,  lsl #7
+                ldr             r7,  [sp, #40]
+
+10:
+                mov             r2,  r8
+                add             r4,  r0,  #24
+                mov             r5,  r6
+1:
+                vldm            r2,  {q8-q15}
+
+                subs            r5,  #96
+
+                vmovn.u32       d0,  q8
+                vshrn.u32       d2,  q8,  #12
+                vshrn.u32       d4,  q8,  #16    @ Cannot vshrn.u32 #20!
+
+                add             r2,  r3
+
+                vmovn.u32       d1,  q9
+                vshrn.u32       d3,  q9,  #12
+                vshrn.u32       d5,  q9,  #16
+
+                pld             [r2, #0]
+
+                vshrn.u16       d0,  q0,  #2
+                vmovn.u16       d1,  q1
+                vshrn.u16       d2,  q2,  #6
+
+                vmovn.u32       d16, q10
+                vshrn.u32       d18, q10, #12
+                vshrn.u32       d20, q10, #16
+
+                vmovn.u32       d17, q11
+                vshrn.u32       d19, q11, #12
+                vshrn.u32       d21, q11, #16
+
+                pld             [r2, #64]
+
+                vshrn.u16       d4,  q8,  #2
+                vmovn.u16       d5,  q9
+                vshrn.u16       d6,  q10, #6
+
+                vmovn.u32       d16, q12
+                vshrn.u32       d18, q12, #12
+                vshrn.u32       d20, q12, #16
+
+                vmovn.u32       d17, q13
+                vshrn.u32       d19, q13, #12
+                vshrn.u32       d21, q13, #16
+
+                vshrn.u16       d16, q8,  #2
+                vmovn.u16       d17, q9
+                vshrn.u16       d18, q10, #6
+
+                vmovn.u32       d20, q14
+                vshrn.u32       d22, q14, #12
+                vshrn.u32       d24, q14, #16
+
+                vmovn.u32       d21, q15
+                vshrn.u32       d23, q15, #12
+                vshrn.u32       d25, q15, #16
+
+                vshrn.u16       d20, q10, #2
+                vmovn.u16       d21, q11
+                vshrn.u16       d22, q12, #6
+
+                blt             2f
+
+                vst3.8          {d0,  d1,  d2},  [r0], r12
+                vst3.8          {d4,  d5,  d6},  [r4], r12
+                vst3.8          {d16, d17, d18}, [r0], r12
+                vst3.8          {d20, d21, d22}, [r4], r12
+
+                bne             1b
+
+11:
+                subs            r7,  #1
+                add             r0,  r1
+                add             r8,  #128
+                bne             10b
+
+                pop             {r4-r8, pc}
+
+@ Partial final write
+2:
+                cmp             r5,  #48-96
+                blt             1f
+                vst3.8          {d0,  d1,  d2},  [r0], r12
+                vst3.8          {d4,  d5,  d6},  [r4], r12
+                beq             11b
+                vmov            q0,  q8
+                vmov            q2,  q10
+                sub             r5,  #48
+                vmov            d2,  d18
+                vmov            d6,  d22
+1:
+                cmp             r5,  #24-96
+                blt             1f
+                vst3.8          {d0,  d1,  d2},  [r0]!
+                beq             11b
+                vmov            q0,  q2
+                sub             r5,  #24
+                vmov            d2,  d6
+1:
+                cmp             r5,  #12-96
+                blt             1f
+                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
+                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
+                vst3.8          {d0[2], d1[2], d2[2]}, [r0]!
+                vst3.8          {d0[3], d1[3], d2[3]}, [r0]!
+                beq             11b
+                vmov            s0,  s1
+                sub             r5,  #12
+                vmov            s2,  s3
+                vmov            s4,  s5
+1:
+                cmp             r5,  #6-96
+                blt             1f
+                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
+                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
+                add             r0,  #12
+                beq             11b
+                vshr.u32        d0,  #16
+                sub             r5,  #6
+                vshr.u32        d1,  #16
+                vshr.u32        d2,  #16
+1:
+                cmp             r5, #3-96
+                blt             1f
+                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
+                beq             11b
+                sub             r5, #3
+                vshr.u32        d0, #8
+                vshr.u32        d1, #8
+1:
+                cmp             r5, #2-96
+                blt             1f
+                vst2.8          {d0[0], d1[0]}, [r0]!
+                b               11b
+1:
+                vst1.8          {d0[0]}, [r0]!
+                b               11b
+
+endfunc
+
+
diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h
new file mode 100644
index 0000000000..d8126676ee
--- /dev/null
+++ b/libavutil/arm/rpi_sand_neon.h
@@ -0,0 +1,110 @@
+/*
+Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Authors: John Cox
+*/
+
+#ifndef AVUTIL_ARM_RPI_SAND_NEON_H
+#define AVUTIL_ARM_RPI_SAND_NEON_H
+
+void ff_rpi_sand128b_stripe_to_8_10(
+  uint8_t * dest,             // [r0]
+  const uint8_t * src1,       // [r1]
+  const uint8_t * src2,       // [r2]
+  unsigned int lines);        // [r3]
+
+void ff_rpi_sand8_lines_to_planar_y8(
+  uint8_t * dest,             // [r0]
+  unsigned int dst_stride,    // [r1]
+  const uint8_t * src,        // [r2]
+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+  unsigned int src_stride2,   // [sp, #0]  -> r3
+  unsigned int _x,            // [sp, #4]  Ignored - 0
+  unsigned int y,             // [sp, #8]  (r7 in prefix)
+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+  unsigned int h);            // [sp, #16] -> r7
+
+void ff_rpi_sand8_lines_to_planar_c8(
+  uint8_t * dst_u,            // [r0]
+  unsigned int dst_stride_u,  // [r1]
+  uint8_t * dst_v,            // [r2]
+  unsigned int dst_stride_v,  // [r3]
+  const uint8_t * src,        // [sp, #0]  -> r4, r5
+  unsigned int stride1,       // [sp, #4]  128
+  unsigned int stride2,       // [sp, #8]  -> r8
+  unsigned int _x,            // [sp, #12] 0
+  unsigned int y,             // [sp, #16] (r7 in prefix)
+  unsigned int _w,            // [sp, #20] -> r12, r6
+  unsigned int h);            // [sp, #24] -> r7
+
+void ff_rpi_sand30_lines_to_planar_y16(
+  uint8_t * dest,             // [r0]
+  unsigned int dst_stride,    // [r1]
+  const uint8_t * src,        // [r2]
+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+  unsigned int src_stride2,   // [sp, #0]  -> r3
+  unsigned int _x,            // [sp, #4]  Ignored - 0
+  unsigned int y,             // [sp, #8]  (r7 in prefix)
+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+  unsigned int h);            // [sp, #16] -> r7
+
+void ff_rpi_sand30_lines_to_planar_c16(
+  uint8_t * dst_u,            // [r0]
+  unsigned int dst_stride_u,  // [r1]
+  uint8_t * dst_v,            // [r2]
+  unsigned int dst_stride_v,  // [r3]
+  const uint8_t * src,        // [sp, #0]  -> r4, r5
+  unsigned int stride1,       // [sp, #4]  128
+  unsigned int stride2,       // [sp, #8]  -> r8
+  unsigned int _x,            // [sp, #12] 0
+  unsigned int y,             // [sp, #16] (r7 in prefix)
+  unsigned int _w,            // [sp, #20] -> r6, r9
+  unsigned int h);            // [sp, #24] -> r7
+
+void ff_rpi_sand30_lines_to_planar_p010(
+  uint8_t * dest,             // [r0]
+  unsigned int dst_stride,    // [r1]
+  const uint8_t * src,        // [r2]
+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+  unsigned int src_stride2,   // [sp, #0]  -> r3
+  unsigned int _x,            // [sp, #4]  Ignored - 0
+  unsigned int y,             // [sp, #8]  (r7 in prefix)
+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+  unsigned int h);            // [sp, #16] -> r7
+
+void ff_rpi_sand30_lines_to_planar_y8(
+  uint8_t * dest,             // [r0]
+  unsigned int dst_stride,    // [r1]
+  const uint8_t * src,        // [r2]
+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+  unsigned int src_stride2,   // [sp, #0]  -> r3
+  unsigned int _x,            // [sp, #4]  Ignored - 0
+  unsigned int y,             // [sp, #8]  (r7 in prefix)
+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+  unsigned int h);            // [sp, #16] -> r7
+
+#endif // AVUTIL_ARM_SAND_NEON_H
+
diff --git a/libavutil/frame.c b/libavutil/frame.c
index be30eb09d2..1756d67918 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -16,6 +16,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
+
 #include "channel_layout.h"
 #include "avassert.h"
 #include "buffer.h"
@@ -27,6 +29,9 @@
 #include "samplefmt.h"
 #include "side_data.h"
 #include "hwcontext.h"
+#if CONFIG_SAND
+#include "rpi_sand_fns.h"
+#endif
 
 static void get_frame_defaults(AVFrame *frame)
 {
@@ -772,6 +777,12 @@ int av_frame_apply_cropping(AVFrame *frame, int flags)
         (frame->crop_top + frame->crop_bottom) >= frame->height)
         return AVERROR(ERANGE);
 
+#if CONFIG_SAND
+    // Sand cannot be cropped - do not try
+    if (av_rpi_is_sand_format(frame->format))
+        return 0;
+#endif
+
     desc = av_pix_fmt_desc_get(frame->format);
     if (!desc)
         return AVERROR_BUG;
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 771c9ce453..7965015b8f 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -1025,6 +1025,16 @@ int av_frame_apply_cropping(AVFrame *frame, int flags);
  */
 const char *av_frame_side_data_name(enum AVFrameSideDataType type);
 
+
+static inline int av_frame_cropped_width(const AVFrame * const frame)
+{
+    return frame->width - (frame->crop_left + frame->crop_right);
+}
+static inline int av_frame_cropped_height(const AVFrame * const frame)
+{
+    return frame->height - (frame->crop_top + frame->crop_bottom);
+}
+
 /**
  * @return side data descriptor corresponding to a given side data type, NULL
  *         when not available.
diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 83bd7457e8..7f93d90c8e 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -71,6 +71,9 @@ static const HWContextType * const hw_table[] = {
 #endif
 #if CONFIG_OHCODEC
     &ff_hwcontext_type_oh,
+#endif
+#if CONFIG_V4L2_REQUEST
+    &ff_hwcontext_type_v4l2request,
 #endif
     NULL,
 };
@@ -83,6 +86,7 @@ static const char *const hw_type_names[] = {
     [AV_HWDEVICE_TYPE_D3D12VA] = "d3d12va",
     [AV_HWDEVICE_TYPE_OPENCL] = "opencl",
     [AV_HWDEVICE_TYPE_QSV]    = "qsv",
+    [AV_HWDEVICE_TYPE_V4L2REQUEST] = "v4l2request",
     [AV_HWDEVICE_TYPE_VAAPI]  = "vaapi",
     [AV_HWDEVICE_TYPE_VDPAU]  = "vdpau",
     [AV_HWDEVICE_TYPE_VIDEOTOOLBOX] = "videotoolbox",
diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
index 29374cf0a7..88e47cc7f5 100644
--- a/libavutil/hwcontext.h
+++ b/libavutil/hwcontext.h
@@ -41,6 +41,7 @@ enum AVHWDeviceType {
     AV_HWDEVICE_TYPE_AMF,
     /* OpenHarmony Codec device */
     AV_HWDEVICE_TYPE_OHCODEC,
+    AV_HWDEVICE_TYPE_V4L2REQUEST,
 };
 
 /**
diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c
index 565c02dead..dac9befae9 100644
--- a/libavutil/hwcontext_drm.c
+++ b/libavutil/hwcontext_drm.c
@@ -21,6 +21,7 @@
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <unistd.h>
+#include <sys/ioctl.h>
 
 /* This was introduced in version 4.6. And may not exist all without an
  * optional package. So to prevent a hard dependency on needing the Linux
@@ -31,6 +32,7 @@
 #endif
 
 #include <drm.h>
+#include <libdrm/drm_fourcc.h>
 #include <xf86drm.h>
 
 #include "avassert.h"
@@ -40,6 +42,9 @@
 #include "imgutils.h"
 #include "mem.h"
 
+#if CONFIG_SAND
+#include "libavutil/rpi_sand_fns.h"
+#endif
 
 static void drm_device_free(AVHWDeviceContext *hwdev)
 {
@@ -54,6 +59,11 @@ static int drm_device_create(AVHWDeviceContext *hwdev, const char *device,
     AVDRMDeviceContext *hwctx = hwdev->hwctx;
     drmVersionPtr version;
 
+    if (device == NULL) {
+        hwctx->fd = -1;
+        return 0;
+    }
+
     hwctx->fd = open(device, O_RDWR);
     if (hwctx->fd < 0)
         return AVERROR(errno);
@@ -140,6 +150,8 @@ static int drm_map_frame(AVHWFramesContext *hwfc,
     if (flags & AV_HWFRAME_MAP_WRITE)
         mmap_prot |= PROT_WRITE;
 
+    if (dst->format == AV_PIX_FMT_NONE)
+        dst->format = hwfc->sw_format;
 #if HAVE_LINUX_DMA_BUF_H
     if (flags & AV_HWFRAME_MAP_READ)
         map->sync_flags |= DMA_BUF_SYNC_READ;
@@ -186,12 +198,34 @@ static int drm_map_frame(AVHWFramesContext *hwfc,
 
     dst->width  = src->width;
     dst->height = src->height;
+    // Crop copied with props
+
+#if CONFIG_SAND
+    // Rework for sand frames
+    if (av_rpi_is_sand_frame(dst)) {
+        // As it stands the sand formats hold stride2 in linesize[3]
+        // linesize[0] & [1] contain stride1 which is always 128 for everything we do
+        // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1]
+        int mod_stride = fourcc_mod_broadcom_param(desc->objects[0].format_modifier);
+        if (mod_stride == 0) {
+            dst->linesize[3] = dst->linesize[0];
+            dst->linesize[4] = dst->linesize[1];
+        }
+        else {
+            dst->linesize[3] = mod_stride;
+            dst->linesize[4] = mod_stride;
+        }
+        dst->linesize[0] = 128;
+        dst->linesize[1] = 128;
+    }
+#endif
 
     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
-                                &drm_unmap_frame, map);
+                                drm_unmap_frame, map);
     if (err < 0)
         goto fail;
 
+    av_frame_copy_props(dst, src);
     return 0;
 
 fail:
@@ -207,16 +241,29 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx,
                                     enum AVHWFrameTransferDirection dir,
                                     enum AVPixelFormat **formats)
 {
-    enum AVPixelFormat *pix_fmts;
+    enum AVPixelFormat *p;
 
-    pix_fmts = av_malloc_array(2, sizeof(*pix_fmts));
-    if (!pix_fmts)
+    p = *formats = av_malloc_array(3, sizeof(*p));
+    if (!p)
         return AVERROR(ENOMEM);
 
-    pix_fmts[0] = ctx->sw_format;
-    pix_fmts[1] = AV_PIX_FMT_NONE;
+    // **** Offer native sand too ????
+    *p++ =
+#if CONFIG_SAND
+        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ?
+            AV_PIX_FMT_YUV420P :
+        ctx->sw_format == AV_PIX_FMT_RPI4_10 ?
+            AV_PIX_FMT_YUV420P10LE :
+#endif
+            ctx->sw_format;
+
+#if CONFIG_SAND
+    if (ctx->sw_format == AV_PIX_FMT_RPI4_10 ||
+        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128)
+        *p++ = AV_PIX_FMT_NV12;
+#endif
 
-    *formats = pix_fmts;
+    *p = AV_PIX_FMT_NONE;
     return 0;
 }
 
@@ -232,18 +279,62 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc,
     map = av_frame_alloc();
     if (!map)
         return AVERROR(ENOMEM);
-    map->format = dst->format;
 
+    // Map to default
+    map->format = AV_PIX_FMT_NONE;
     err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ);
     if (err)
         goto fail;
 
-    map->width  = dst->width;
-    map->height = dst->height;
+#if 0
+    av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__,
+           hwfc->sw_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE,
+           map->width, map->height,
+           map->linesize[0],
+           map->linesize[1],
+           map->linesize[2],
+           map->linesize[3],
+           dst->width, dst->height,
+           dst->linesize[0],
+           dst->linesize[1],
+           dst->linesize[2]);
+#endif
+#if CONFIG_SAND
+    if (av_rpi_is_sand_frame(map)) {
+        const unsigned int w = FFMIN(dst->width, map->width);
+        const unsigned int h = FFMIN(dst->height, map->height);
+
+        map->crop_top = 0;
+        map->crop_bottom = 0;
+        map->crop_left = 0;
+        map->crop_right = 0;
+
+        if (av_rpi_sand_to_planar_frame(dst, map) != 0)
+        {
+            av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__);
+            err = AVERROR(EINVAL);
+            goto fail;
+        }
+
+        dst->width = w;
+        dst->height = h;
+        // Cropping restored as part of props
+    }
+    else
+#endif
+    {
+        dst->width  = map->width;
+        dst->height = map->height;
+        err = av_frame_copy(dst, map);
+    }
+
+    av_frame_copy_props(dst, src);
 
-    err = av_frame_copy(dst, map);
     if (err)
+    {
+        av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__);
         goto fail;
+    }
 
     err = 0;
 fail:
@@ -258,7 +349,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc,
     int err;
 
     if (src->width > hwfc->width || src->height > hwfc->height)
+    {
+        av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height);
         return AVERROR(EINVAL);
+    }
 
     map = av_frame_alloc();
     if (!map)
@@ -288,9 +382,7 @@ static int drm_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
 {
     int err;
 
-    if (dst->format == AV_PIX_FMT_NONE)
-        dst->format = hwfc->sw_format;
-    else if (hwfc->sw_format != dst->format)
+    if (hwfc->sw_format != dst->format && dst->format != AV_PIX_FMT_NONE)
         return AVERROR(ENOSYS);
 
     err = drm_map_frame(hwfc, dst, src, flags);
diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
index dcfdc2016a..94e4da16c0 100644
--- a/libavutil/hwcontext_internal.h
+++ b/libavutil/hwcontext_internal.h
@@ -158,6 +158,7 @@ extern const HWContextType ff_hwcontext_type_drm;
 extern const HWContextType ff_hwcontext_type_dxva2;
 extern const HWContextType ff_hwcontext_type_opencl;
 extern const HWContextType ff_hwcontext_type_qsv;
+extern const HWContextType ff_hwcontext_type_v4l2request;
 extern const HWContextType ff_hwcontext_type_vaapi;
 extern const HWContextType ff_hwcontext_type_vdpau;
 extern const HWContextType ff_hwcontext_type_videotoolbox;
diff --git a/libavutil/hwcontext_v4l2request.c b/libavutil/hwcontext_v4l2request.c
new file mode 100644
index 0000000000..3c762866da
--- /dev/null
+++ b/libavutil/hwcontext_v4l2request.c
@@ -0,0 +1,1185 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include <fcntl.h>
+#include <linux/dma-buf.h>
+#include <linux/media.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <drm_fourcc.h>
+#include <libudev.h>
+
+#include "avassert.h"
+#include "hwcontext_drm.h"
+#include "hwcontext_internal.h"
+#include "hwcontext_v4l2request_internal.h"
+#include "mem.h"
+
+/* These are not in mainline kernel headers yet */
+#ifndef V4L2_PIX_FMT_NV12_COL128
+#define V4L2_PIX_FMT_NV12MT_COL128 v4l2_fourcc('N', 'c', '1', '2')
+#endif
+#ifndef V4L2_PIX_FMT_NV12MT_10_COL128
+#define V4L2_PIX_FMT_NV12MT_10_COL128 v4l2_fourcc('N', 'c', '3', '0')
+#endif
+
+typedef struct V4L2RequestVideoDecoder {
+    dev_t media_dev;
+    dev_t video_dev;
+    uint32_t *pixelformats;
+    int nb_pixelformats;
+} V4L2RequestVideoDecoder;
+
+typedef struct V4L2RequestDeviceContext {
+    V4L2RequestVideoDecoder *decoders;
+    int nb_decoders;
+} V4L2RequestDeviceContext;
+
+typedef struct V4L2RequestFramesContext {
+    AVV4L2RequestFramesContext p;
+    AVV4L2RequestFramesContextInternal internal;
+} V4L2RequestFramesContext;
+
+typedef struct V4L2RequestFrameDescriptor {
+    AVDRMFrameDescriptor base;
+    AVBufferRef *ref;
+    uint32_t index;
+    int fd[AV_DRM_MAX_PLANES];
+} V4L2RequestFrameDescriptor;
+
+static const struct {
+    uint32_t pixelformat;
+    enum AVPixelFormat sw_format;
+    uint32_t drm_format;
+    uint64_t format_modifier;
+    uint32_t bit_depth;
+} v4l2request_capture_pixelformats[] = {
+    { V4L2_PIX_FMT_NV12, AV_PIX_FMT_NV12, DRM_FORMAT_NV12, DRM_FORMAT_MOD_LINEAR, 8 },
+#if defined(V4L2_PIX_FMT_NV12_32L32)
+    { V4L2_PIX_FMT_NV12_32L32, AV_PIX_FMT_YUV420P, DRM_FORMAT_NV12, DRM_FORMAT_MOD_ALLWINNER_TILED, 8 },
+#endif
+#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15)
+    { V4L2_PIX_FMT_NV15, AV_PIX_FMT_YUV420P10, DRM_FORMAT_NV15, DRM_FORMAT_MOD_LINEAR, 10 },
+#endif
+    { V4L2_PIX_FMT_NV16, AV_PIX_FMT_NV16, DRM_FORMAT_NV16, DRM_FORMAT_MOD_LINEAR, 8 },
+#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20)
+    { V4L2_PIX_FMT_NV20, AV_PIX_FMT_YUV422P10, DRM_FORMAT_NV20, DRM_FORMAT_MOD_LINEAR, 10 },
+#endif
+#if defined(V4L2_PIX_FMT_P010) && defined(DRM_FORMAT_P010)
+    { V4L2_PIX_FMT_P010, AV_PIX_FMT_P010, DRM_FORMAT_P010, DRM_FORMAT_MOD_LINEAR, 10 },
+#endif
+#if defined(V4L2_PIX_FMT_NV12MT_COL128) && defined(V4L2_PIX_FMT_NV12MT_10_COL128)
+    { V4L2_PIX_FMT_NV12MT_COL128, AV_PIX_FMT_YUV420P, DRM_FORMAT_NV12, DRM_FORMAT_MOD_BROADCOM_SAND128, 8 },
+#if defined(DRM_FORMAT_P030)
+    { V4L2_PIX_FMT_NV12MT_10_COL128, AV_PIX_FMT_YUV420P10, DRM_FORMAT_P030, DRM_FORMAT_MOD_BROADCOM_SAND128, 10 },
+#endif
+#endif
+#if defined(V4L2_PIX_FMT_YUV420_8_AFBC_16X16_SPLIT)
+    {
+        .pixelformat = V4L2_PIX_FMT_YUV420_8_AFBC_16X16_SPLIT,
+        .sw_format = AV_PIX_FMT_YUV420P,
+        .drm_format = DRM_FORMAT_YUV420_8BIT,
+        .format_modifier = DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
+                                                   AFBC_FORMAT_MOD_SPARSE |
+                                                   AFBC_FORMAT_MOD_SPLIT),
+        .bit_depth = 8,
+    },
+#endif
+#if defined(V4L2_PIX_FMT_YUV420_10_AFBC_16X16_SPLIT)
+    {
+        .pixelformat = V4L2_PIX_FMT_YUV420_10_AFBC_16X16_SPLIT,
+        .sw_format = AV_PIX_FMT_YUV420P10,
+        .drm_format = DRM_FORMAT_YUV420_10BIT,
+        .format_modifier = DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
+                                                   AFBC_FORMAT_MOD_SPARSE |
+                                                   AFBC_FORMAT_MOD_SPLIT),
+        .bit_depth = 10,
+    },
+#endif
+#if defined(V4L2_PIX_FMT_NV12_COL128) && defined(V4L2_PIX_FMT_NV12_10_COL128)
+    { V4L2_PIX_FMT_NV12_COL128, AV_PIX_FMT_YUV420P, DRM_FORMAT_NV12, DRM_FORMAT_MOD_BROADCOM_SAND128, 8 },
+#if defined(DRM_FORMAT_P030)
+    { V4L2_PIX_FMT_NV12_10_COL128, AV_PIX_FMT_YUV420P10, DRM_FORMAT_P030, DRM_FORMAT_MOD_BROADCOM_SAND128, 10 },
+#endif
+#endif
+};
+
+static int v4l2request_set_drm_descriptor(AVDRMFrameDescriptor *desc,
+                                          struct v4l2_format *format)
+{
+    AVDRMLayerDescriptor *layer = &desc->layers[0];
+    uint32_t pixelformat = V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
+                           format->fmt.pix_mp.pixelformat :
+                           format->fmt.pix.pixelformat;
+    uint64_t format_modifier;
+
+    layer->format = 0;
+    for (int i = 0; i < FF_ARRAY_ELEMS(v4l2request_capture_pixelformats); i++) {
+        if (pixelformat == v4l2request_capture_pixelformats[i].pixelformat) {
+            layer->format = v4l2request_capture_pixelformats[i].drm_format;
+            format_modifier = v4l2request_capture_pixelformats[i].format_modifier;
+            break;
+        }
+    }
+    if (!layer->format)
+        return AVERROR(ENOENT);
+
+    for (int i = 0; i < desc->nb_objects; i++) {
+        desc->objects[i].format_modifier = format_modifier;
+        desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
+                                format->fmt.pix_mp.plane_fmt[i].sizeimage :
+                                format->fmt.pix.sizeimage;
+    }
+
+    desc->nb_layers = 1;
+    layer->nb_planes = 1;
+
+    layer->planes[0].object_index = 0;
+    layer->planes[0].offset = 0;
+    layer->planes[0].pitch = V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
+                             format->fmt.pix_mp.plane_fmt[0].bytesperline :
+                             format->fmt.pix.bytesperline;
+
+    // AFBC formats only use 1 plane, remaining use 2 planes
+    if ((desc->objects[0].format_modifier >> 56) != DRM_FORMAT_MOD_VENDOR_ARM) {
+        layer->nb_planes = 2;
+        layer->planes[1].object_index = 0;
+        layer->planes[1].offset = layer->planes[0].pitch *
+                                  (V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
+                                   format->fmt.pix_mp.height :
+                                   format->fmt.pix.height);
+        layer->planes[1].pitch = layer->planes[0].pitch;
+    }
+
+#if defined(V4L2_PIX_FMT_NV12MT_COL128) && defined(V4L2_PIX_FMT_NV12MT_10_COL128)
+    // Raspberry Pi formats need special handling
+    if (pixelformat == V4L2_PIX_FMT_NV12MT_COL128 ||
+        pixelformat == V4L2_PIX_FMT_NV12MT_10_COL128) {
+        layer->planes[1].object_index = 1;
+        layer->planes[1].offset = 0;
+        layer->planes[0].pitch = (V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
+                                  format->fmt.pix_mp.height :
+                                  format->fmt.pix.height);
+        layer->planes[1].pitch = layer->planes[0].pitch / 2;
+    }
+#endif
+
+#if defined(V4L2_PIX_FMT_NV12_COL128) && defined(V4L2_PIX_FMT_NV12_10_COL128)
+    // Raspberry Pi formats need special handling
+    if (pixelformat == V4L2_PIX_FMT_NV12_COL128 ||
+        pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
+        desc->objects[0].format_modifier =
+            DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(layer->planes[0].pitch);
+        layer->planes[1].offset = 128 *
+                                  (V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
+                                   format->fmt.pix_mp.height :
+                                   format->fmt.pix.height);
+        layer->planes[0].pitch = (V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
+                                  format->fmt.pix_mp.width :
+                                  format->fmt.pix.width);
+        if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128)
+            layer->planes[0].pitch *= 2;
+        layer->planes[1].pitch = layer->planes[0].pitch;
+    }
+#endif
+
+    return 0;
+}
+
+static void v4l2request_device_uninit(AVHWDeviceContext *hwdev)
+{
+    V4L2RequestDeviceContext *hwctx = hwdev->hwctx;
+
+    av_freep(&hwctx->decoders);
+    hwctx->nb_decoders = 0;
+}
+
+static int v4l2request_device_create(AVHWDeviceContext *hwdev, const char *device,
+                                     AVDictionary *opts, int flags)
+{
+    V4L2RequestDeviceContext *hwctx = hwdev->hwctx;
+
+    hwctx->decoders = NULL;
+    hwctx->nb_decoders = 0;
+
+    // TODO: enumerate V4L2 Request API capable video decoders
+    //       and fill hwctx->decoders and hwctx->nb_decoders,
+    //       limit to decoders for the media 'device' when specified
+
+    return 0;
+}
+
+static int v4l2request_set_format(AVHWFramesContext *hwfc,
+                                  enum v4l2_buf_type type,
+                                  uint32_t pixelformat,
+                                  uint32_t buffersize)
+{
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+    struct v4l2_format format = {
+        .type = type,
+    };
+
+    if (V4L2_TYPE_IS_MULTIPLANAR(type)) {
+        format.fmt.pix_mp.width = hwfc->width;
+        format.fmt.pix_mp.height = hwfc->height;
+        format.fmt.pix_mp.pixelformat = pixelformat;
+        format.fmt.pix_mp.plane_fmt[0].sizeimage = buffersize;
+        format.fmt.pix_mp.num_planes = 1;
+    } else {
+        format.fmt.pix.width = hwfc->width;
+        format.fmt.pix.height = hwfc->height;
+        format.fmt.pix.pixelformat = pixelformat;
+        format.fmt.pix.sizeimage = buffersize;
+    }
+
+    if (ioctl(fctxi->video_fd, VIDIOC_S_FMT, &format) < 0)
+        return AVERROR(errno);
+
+    return 0;
+}
+
+static int v4l2request_select_capture_format(AVHWFramesContext *hwfc)
+{
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+    enum v4l2_buf_type type = fctxi->capture.format.type;
+    uint32_t pixelformat, fallback = 0;
+    struct v4l2_format format = {
+        .type = type,
+    };
+    struct v4l2_fmtdesc fmtdesc = {
+        .index = 0,
+        .type = type,
+    };
+
+    // Get the driver preferred (or default) format
+    if (ioctl(fctxi->video_fd, VIDIOC_G_FMT, &format) < 0)
+        return AVERROR(errno);
+
+    pixelformat = V4L2_TYPE_IS_MULTIPLANAR(type) ?
+                  format.fmt.pix_mp.pixelformat :
+                  format.fmt.pix.pixelformat;
+
+    // Try to use the driver preferred format when it is a known format
+    for (int i = 0; i < FF_ARRAY_ELEMS(v4l2request_capture_pixelformats); i++) {
+        if (pixelformat == v4l2request_capture_pixelformats[i].pixelformat &&
+            (fctx->bit_depth == v4l2request_capture_pixelformats[i].bit_depth ||
+             !fctx->bit_depth))
+            return v4l2request_set_format(hwfc, type, pixelformat, 0);
+    }
+
+    // Next try to use the first known format with matching bit depth
+    while (ioctl(fctxi->video_fd, VIDIOC_ENUM_FMT, &fmtdesc) >= 0) {
+        for (int i = 0; i < FF_ARRAY_ELEMS(v4l2request_capture_pixelformats); i++) {
+            if (fmtdesc.pixelformat == v4l2request_capture_pixelformats[i].pixelformat) {
+                if (fctx->bit_depth == v4l2request_capture_pixelformats[i].bit_depth ||
+                    !fctx->bit_depth)
+                    return v4l2request_set_format(hwfc, type, fmtdesc.pixelformat, 0);
+                else if (!fallback)
+                    fallback = fmtdesc.pixelformat;
+            }
+        }
+
+        fmtdesc.index++;
+    }
+
+    // Fallback to use the first known format
+    if (fallback)
+        return v4l2request_set_format(hwfc, type, fallback, 0);
+
+    return AVERROR(errno);
+}
+
+static int v4l2request_try_framesize(AVHWFramesContext *hwfc,
+                                     uint32_t pixelformat)
+{
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+    struct v4l2_frmsizeenum frmsize = {
+        .index = 0,
+        .pixel_format = pixelformat,
+    };
+
+    // Enumerate and check if frame size is supported
+    while (ioctl(fctxi->video_fd, VIDIOC_ENUM_FRAMESIZES, &frmsize) >= 0) {
+        if (frmsize.type == V4L2_FRMSIZE_TYPE_DISCRETE &&
+            hwfc->width == frmsize.discrete.width &&
+            hwfc->height == frmsize.discrete.height) {
+            return 0;
+        } else if ((frmsize.type == V4L2_FRMSIZE_TYPE_STEPWISE ||
+                    frmsize.type == V4L2_FRMSIZE_TYPE_CONTINUOUS) &&
+                   hwfc->width >= frmsize.stepwise.min_width &&
+                   hwfc->height >= frmsize.stepwise.min_height &&
+                   hwfc->width <= frmsize.stepwise.max_width &&
+                   hwfc->height <= frmsize.stepwise.max_height) {
+            return 0;
+        }
+
+        frmsize.index++;
+    }
+
+    return AVERROR(errno);
+}
+
+static int v4l2request_try_format(AVHWFramesContext *hwfc,
+                                  enum v4l2_buf_type type,
+                                  uint32_t pixelformat)
+{
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+    struct v4l2_fmtdesc fmtdesc = {
+        .index = 0,
+        .type = type,
+    };
+
+    // Enumerate and check if format is supported
+    while (ioctl(fctxi->video_fd, VIDIOC_ENUM_FMT, &fmtdesc) >= 0) {
+        if (fmtdesc.pixelformat == pixelformat)
+            return 0;
+
+        fmtdesc.index++;
+    }
+
+    return AVERROR(errno);
+}
+
+static int v4l2request_set_controls(AVHWFramesContext *hwfc,
+                                    struct v4l2_ext_control *control, int count)
+{
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+    struct v4l2_ext_controls controls = {
+        .controls = control,
+        .count = count,
+    };
+
+    if (!control || !count)
+        return 0;
+
+    if (ioctl(fctxi->video_fd, VIDIOC_S_EXT_CTRLS, &controls) < 0)
+        return AVERROR(errno);
+
+    return 0;
+}
+
+static int v4l2request_probe_video_device(AVHWFramesContext *hwfc,
+                                          const char *path,
+                                          uint32_t pixelformat,
+                                          uint32_t buffersize)
+{
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+    struct v4l2_capability capability;
+    struct v4l2_create_buffers buffers;
+    unsigned int capabilities;
+    int ret;
+
+    /*
+     * Open video device in non-blocking mode to support decoding using
+     * multiple queued requests, required for e.g. multi stage decoding.
+     */
+    fctxi->video_fd = open(path, O_RDWR | O_NONBLOCK);
+    if (fctxi->video_fd < 0) {
+        ret = AVERROR(errno);
+        av_log(hwfc, AV_LOG_ERROR, "Failed to open video device %s: %s (%d)\n",
+               path, strerror(errno), errno);
+        return ret;
+    }
+
+    // Query capabilities of the video device
+    if (ioctl(fctxi->video_fd, VIDIOC_QUERYCAP, &capability) < 0) {
+        ret = AVERROR(errno);
+        av_log(hwfc, AV_LOG_ERROR, "Failed to query capabilities of %s: %s (%d)\n",
+               path, strerror(errno), errno);
+        goto fail;
+    }
+
+    // Use device capabilities of the opened device when supported
+    capabilities = (capability.capabilities & V4L2_CAP_DEVICE_CAPS) ?
+                   capability.device_caps : capability.capabilities;
+
+    // Ensure streaming is supported on the video device
+    if ((capabilities & V4L2_CAP_STREAMING) != V4L2_CAP_STREAMING) {
+        ret = AVERROR(EINVAL);
+        av_log(hwfc, AV_LOG_VERBOSE, "Device %s is missing streaming capability\n", path);
+        goto fail;
+    }
+
+    // Ensure multi- or single-planar API can be used
+    if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) == V4L2_CAP_VIDEO_M2M_MPLANE) {
+        fctxi->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
+        fctxi->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
+    } else if ((capabilities & V4L2_CAP_VIDEO_M2M) == V4L2_CAP_VIDEO_M2M) {
+        fctxi->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
+        fctxi->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+    } else {
+        ret = AVERROR(EINVAL);
+        av_log(hwfc, AV_LOG_VERBOSE, "Device %s is missing mem2mem capability\n", path);
+        goto fail;
+    }
+
+    // Query OUTPUT buffer capabilities
+    buffers = (struct v4l2_create_buffers) {
+        .count = 0,
+        .memory = V4L2_MEMORY_MMAP,
+        .format.type = fctxi->output.format.type,
+    };
+    if (ioctl(fctxi->video_fd, VIDIOC_CREATE_BUFS, &buffers) < 0) {
+        ret = AVERROR(errno);
+        av_log(hwfc, AV_LOG_ERROR,
+               "Failed to query OUTPUT buffer capabilities of %s: %s (%d)\n",
+               path, strerror(errno), errno);
+        goto fail;
+    }
+    fctxi->output.capabilities = buffers.capabilities;
+
+    // Ensure requests can be used
+    if ((buffers.capabilities & V4L2_BUF_CAP_SUPPORTS_REQUESTS) !=
+        V4L2_BUF_CAP_SUPPORTS_REQUESTS) {
+        ret = AVERROR(EINVAL);
+        av_log(hwfc, AV_LOG_VERBOSE, "Device %s is missing support for requests\n", path);
+        goto fail;
+    }
+
+    // Ensure the codec pixelformat can be used
+    ret = v4l2request_try_format(hwfc, fctxi->output.format.type, pixelformat);
+    if (ret < 0) {
+        av_log(hwfc, AV_LOG_VERBOSE, "Device %s is missing support for pixelformat %s\n",
+               path, av_fourcc2str(pixelformat));
+        goto fail;
+    }
+
+    // Ensure frame size is supported, when driver support ENUM_FRAMESIZES
+    ret = v4l2request_try_framesize(hwfc, pixelformat);
+    if (ret < 0 && ret != AVERROR(ENOTTY)) {
+        av_log(hwfc, AV_LOG_VERBOSE,
+               "Device %s is missing support for frame size %dx%d of pixelformat %s\n",
+               path, hwfc->width, hwfc->height, av_fourcc2str(pixelformat));
+        goto fail;
+    }
+
+    // Set the codec pixelformat and OUTPUT buffersize to be used
+    ret = v4l2request_set_format(hwfc, fctxi->output.format.type, pixelformat, buffersize);
+    if (ret < 0) {
+        av_log(hwfc, AV_LOG_ERROR,
+               "Failed to set OUTPUT pixelformat %s of %s: %s (%d)\n",
+               av_fourcc2str(pixelformat), path, strerror(errno), errno);
+        goto fail;
+    }
+
+    // Get format details for OUTPUT buffers
+    if (ioctl(fctxi->video_fd, VIDIOC_G_FMT, &fctxi->output.format) < 0) {
+        ret = AVERROR(errno);
+        av_log(hwfc, AV_LOG_ERROR, "Failed to get OUTPUT format: %s (%d)\n",
+               strerror(errno), errno);
+        goto fail;
+    }
+
+    /*
+     * Set any codec specific controls that can help assist the driver
+     * make a decision on what CAPTURE buffer format can be used.
+     */
+    ret = v4l2request_set_controls(hwfc, fctx->init_controls, fctx->nb_init_controls);
+    if (ret < 0) {
+        av_log(hwfc, AV_LOG_VERBOSE,
+               "Failed to set %d control(s): %s (%d)\n",
+               fctx->nb_init_controls, strerror(errno), errno);
+        goto fail;
+    }
+
+    // Select a supported CAPTURE buffer format
+    ret = v4l2request_select_capture_format(hwfc);
+    if (ret < 0) {
+        av_log(hwfc, AV_LOG_VERBOSE,
+               "Failed to select a CAPTURE format %s of %s: %s (%d)\n",
+               av_fourcc2str(pixelformat), path, strerror(errno), errno);
+        goto fail;
+    }
+
+    // Query CAPTURE buffer capabilities
+    buffers = (struct v4l2_create_buffers) {
+        .count = 0,
+        .memory = V4L2_MEMORY_MMAP,
+        .format.type = fctxi->capture.format.type,
+    };
+    if (ioctl(fctxi->video_fd, VIDIOC_CREATE_BUFS, &buffers) < 0) {
+        ret = AVERROR(errno);
+        av_log(hwfc, AV_LOG_ERROR,
+               "Failed to query CAPTURE buffer capabilities of %s: %s (%d)\n",
+               path, strerror(errno), errno);
+        goto fail;
+    }
+    fctxi->capture.capabilities = buffers.capabilities;
+
+    // Get format details for CAPTURE buffers
+    if (ioctl(fctxi->video_fd, VIDIOC_G_FMT, &fctxi->capture.format) < 0) {
+        ret = AVERROR(errno);
+        av_log(hwfc, AV_LOG_ERROR, "Failed to get CAPTURE format: %s (%d)\n",
+               strerror(errno), errno);
+        goto fail;
+    }
+
+    // All tests passed, video device should be capable
+    return 0;
+
+fail:
+    if (fctxi->video_fd >= 0) {
+        close(fctxi->video_fd);
+        fctxi->video_fd = -1;
+    }
+    return ret;
+}
+
+static int v4l2request_probe_video_devices(AVHWFramesContext *hwfc,
+                                           struct udev *udev,
+                                           uint32_t pixelformat,
+                                           uint32_t buffersize)
+{
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+    struct media_device_info device_info;
+    struct media_v2_topology topology = {0};
+    struct media_v2_interface *interfaces;
+    struct udev_device *device;
+    const char *path;
+    dev_t devnum;
+    int ret;
+
+    if (ioctl(fctxi->media_fd, MEDIA_IOC_DEVICE_INFO, &device_info) < 0)
+        return AVERROR(errno);
+
+    if (ioctl(fctxi->media_fd, MEDIA_IOC_G_TOPOLOGY, &topology) < 0) {
+        ret = AVERROR(errno);
+        av_log(hwfc, AV_LOG_ERROR, "Failed to get media topology: %s (%d)\n",
+               strerror(errno), errno);
+        return ret;
+    }
+
+    if (!topology.num_interfaces)
+        return AVERROR(ENOENT);
+
+    interfaces = av_calloc(topology.num_interfaces, sizeof(struct media_v2_interface));
+    if (!interfaces)
+        return AVERROR(ENOMEM);
+
+    topology.ptr_interfaces = (__u64)(uintptr_t)interfaces;
+    if (ioctl(fctxi->media_fd, MEDIA_IOC_G_TOPOLOGY, &topology) < 0) {
+        ret = AVERROR(errno);
+        av_log(hwfc, AV_LOG_ERROR, "Failed to get media topology: %s (%d)\n",
+               strerror(errno), errno);
+        goto fail;
+    }
+
+    ret = AVERROR(ENOENT);
+    for (int i = 0; i < topology.num_interfaces; i++) {
+        if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO)
+            continue;
+
+        devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor);
+        device = udev_device_new_from_devnum(udev, 'c', devnum);
+        if (!device)
+            continue;
+
+        path = udev_device_get_devnode(device);
+        if (path)
+            ret = v4l2request_probe_video_device(hwfc, path, pixelformat, buffersize);
+        udev_device_unref(device);
+
+        // Stop when we have found a capable video device
+        if (!ret) {
+            av_log(hwfc, AV_LOG_INFO,
+                   "Using V4L2 media driver %s (%u.%u.%u) for %s\n",
+                   device_info.driver,
+                   device_info.driver_version >> 16,
+                   (device_info.driver_version >> 8) & 0xff,
+                   device_info.driver_version & 0xff,
+                   av_fourcc2str(pixelformat));
+            break;
+        }
+    }
+
+fail:
+    av_free(interfaces);
+    return ret;
+}
+
+static int v4l2request_probe_media_device(AVHWFramesContext *hwfc,
+                                          struct udev_device *device,
+                                          uint32_t pixelformat,
+                                          uint32_t buffersize)
+{
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+    const char *path;
+    int ret;
+
+    path = udev_device_get_devnode(device);
+    if (!path)
+        return AVERROR(ENODEV);
+
+    // Open enumerated media device
+    fctxi->media_fd = open(path, O_RDWR);
+    if (fctxi->media_fd < 0) {
+        ret = AVERROR(errno);
+        av_log(hwfc, AV_LOG_ERROR, "Failed to open media device %s: %s (%d)\n",
+               path, strerror(errno), errno);
+        return ret;
+    }
+
+    // Probe video devices of current media device
+    ret = v4l2request_probe_video_devices(hwfc, udev_device_get_udev(device),
+                                          pixelformat, buffersize);
+
+    // Cleanup when no capable video device was found
+    if (ret < 0) {
+        close(fctxi->media_fd);
+        fctxi->media_fd = -1;
+    }
+
+    return ret;
+}
+
+static int v4l2request_probe_media_devices(AVHWFramesContext *hwfc,
+                                           struct udev *udev,
+                                           uint32_t pixelformat,
+                                           uint32_t buffersize)
+{
+    struct udev_enumerate *enumerate;
+    struct udev_list_entry *devices;
+    struct udev_list_entry *entry;
+    struct udev_device *device;
+    int ret;
+
+    enumerate = udev_enumerate_new(udev);
+    if (!enumerate)
+        return AVERROR(ENOMEM);
+
+    udev_enumerate_add_match_subsystem(enumerate, "media");
+    udev_enumerate_scan_devices(enumerate);
+    devices = udev_enumerate_get_list_entry(enumerate);
+
+    ret = AVERROR(ENOENT);
+    udev_list_entry_foreach(entry, devices) {
+        const char *path = udev_list_entry_get_name(entry);
+        if (!path)
+            continue;
+
+        device = udev_device_new_from_syspath(udev, path);
+        if (!device)
+            continue;
+
+        // Probe media device for a capable video device
+        ret = v4l2request_probe_media_device(hwfc, device, pixelformat, buffersize);
+        udev_device_unref(device);
+
+        // Stop when we have found a capable media and video device
+        if (!ret)
+            break;
+    }
+
+    udev_enumerate_unref(enumerate);
+    return ret;
+}
+
+static int v4l2request_open_decoder(AVHWFramesContext *hwfc)
+{
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    uint32_t buffersize;
+    struct udev *udev;
+    int ret;
+
+    // Ensure codec pixelformat is set
+    if (!fctx->pixelformat)
+        return AVERROR(EINVAL);
+
+    // FIXME: locate a decoder using hwdevice context decoders
+
+    udev = udev_new();
+    if (!udev)
+        return AVERROR(ENOMEM);
+
+    buffersize = FFMAX(hwfc->width * hwfc->height * 3 / 2, 256 * 1024);
+
+    // Probe all media devices (auto-detection)
+    ret = v4l2request_probe_media_devices(hwfc, udev, fctx->pixelformat, buffersize);
+
+    udev_unref(udev);
+    return ret;
+}
+
+static AVBufferRef *v4l2request_v4l2_buffer_alloc(AVHWFramesContext *hwfc,
+                                                  struct v4l2_format *format)
+{
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+    struct v4l2_create_buffers buffers = {
+        .count = 1,
+        .memory = V4L2_MEMORY_MMAP,
+        .format = *format,
+    };
+    struct v4l2_buffer *buffer;
+    uint8_t num_planes;
+    AVBufferRef *ref;
+
+    num_planes = V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
+                 format->fmt.pix_mp.num_planes : 0;
+
+    ref = av_buffer_allocz(sizeof(struct v4l2_buffer) +
+                           (sizeof(struct v4l2_plane) * num_planes));
+    if (!ref)
+        return NULL;
+
+    buffer = (struct v4l2_buffer *)ref->data;
+    buffer->type = format->type;
+
+    if (num_planes) {
+        buffer->length = num_planes;
+        buffer->m.planes = (struct v4l2_plane *)(buffer + 1);
+    }
+
+    // Create the buffer
+    if (ioctl(fctxi->video_fd, VIDIOC_CREATE_BUFS, &buffers) < 0) {
+        av_log(hwfc, AV_LOG_ERROR, "Failed to create buffer of type %d: %s (%d)\n",
+               buffer->type, strerror(errno), errno);
+        goto fail;
+    }
+
+    buffer->memory = buffers.memory;
+    buffer->index = buffers.index;
+
+    // Query more details of the created buffer
+    if (ioctl(fctxi->video_fd, VIDIOC_QUERYBUF, buffer) < 0) {
+        av_log(hwfc, AV_LOG_ERROR, "Failed to query buffer %d of type %d: %s (%d)\n",
+               buffer->index, buffer->type, strerror(errno), errno);
+        goto fail;
+    }
+
+    return ref;
+
+fail:
+    av_buffer_unref(&ref);
+    return NULL;
+}
+
+static AVBufferRef *v4l2request_capture_buffer_alloc(void *opaque, size_t size)
+{
+    AVHWFramesContext *hwfc = opaque;
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+
+    return v4l2request_v4l2_buffer_alloc(hwfc, &fctxi->capture.format);
+}
+
+static AVBufferRef *v4l2request_output_buffer_alloc(void *opaque, size_t size)
+{
+    AVHWFramesContext *hwfc = opaque;
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+
+    return v4l2request_v4l2_buffer_alloc(hwfc, &fctxi->output.format);
+}
+
+static void v4l2request_frame_free(void *opaque, uint8_t *data)
+{
+    V4L2RequestFrameDescriptor *desc = (V4L2RequestFrameDescriptor *)data;
+
+    // Close the exported CAPTURE buffer memory planes
+    for (int i = 0; i < FF_ARRAY_ELEMS(desc->fd); i++) {
+        if (desc->fd[i] >= 0) {
+            close(desc->fd[i]);
+            desc->fd[i] = -1;
+        }
+    }
+
+    // Return the CAPTURE buffer to the frames context CAPTURE pool
+    av_buffer_unref(&desc->ref);
+
+    av_free(data);
+}
+
+static AVBufferRef *v4l2request_frame_alloc(void *opaque, size_t size)
+{
+    AVHWFramesContext *hwfc = opaque;
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+    struct v4l2_format *format = &fctxi->capture.format;
+    V4L2RequestFrameDescriptor *desc;
+    struct v4l2_buffer *buffer;
+    AVBufferRef *ref;
+    uint8_t *data;
+
+    data = av_mallocz(size);
+    if (!data)
+        return NULL;
+
+    ref = av_buffer_create(data, size, v4l2request_frame_free,
+                           hwfc, AV_BUFFER_FLAG_READONLY);
+    if (!ref) {
+        av_free(data);
+        return NULL;
+    }
+
+    // Set initial default values
+    desc = (V4L2RequestFrameDescriptor *)data;
+    for (int i = 0; i < FF_ARRAY_ELEMS(desc->fd); i++)
+        desc->fd[i] = -1;
+
+    // Get a CAPTURE buffer from frames context CAPTURE pool
+    desc->ref = av_buffer_pool_get(fctxi->capture.pool);
+    if (!desc->ref)
+       goto fail;
+
+    buffer = (struct v4l2_buffer *)desc->ref->data;
+    desc->index = buffer->index;
+
+    // Export CAPTURE buffer memory planes
+    desc->base.nb_objects = V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
+                            format->fmt.pix_mp.num_planes : 1;
+    av_assert0(desc->base.nb_objects <= AV_DRM_MAX_PLANES);
+    for (int i = 0; i < desc->base.nb_objects; i++) {
+        struct v4l2_exportbuffer exportbuffer = {
+            .type = buffer->type,
+            .index = buffer->index,
+            .plane = i,
+            .flags = O_RDONLY,
+        };
+        if (ioctl(fctxi->video_fd, VIDIOC_EXPBUF, &exportbuffer) < 0) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to export memory plane %d (%d): %s (%d)\n",
+                   i, buffer->index, strerror(errno), errno);
+            goto fail;
+        }
+        desc->base.objects[i].fd = desc->fd[i] = exportbuffer.fd;
+    }
+
+    // Set AVDRMFrameDescriptor based on CAPTURE buffer format
+    if (v4l2request_set_drm_descriptor(&desc->base, format) < 0)
+        goto fail;
+
+    return ref;
+
+fail:
+    av_buffer_unref(&ref);
+    return NULL;
+}
+
+static int v4l2request_frames_init(AVHWFramesContext *hwfc)
+{
+    V4L2RequestFramesContext *hwctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi;
+    uint32_t pixelformat;
+    int ret;
+
+    // Set initial default values
+    fctxi = &hwctx->internal;
+    hwctx->p.internal = fctxi;
+    fctxi->media_fd = -1;
+    fctxi->video_fd = -1;
+
+    // Locate and open a capable video decoder device
+    ret = v4l2request_open_decoder(hwfc);
+    if (ret < 0)
+        return ret;
+
+    // Reset init controls after video device is opened
+    hwctx->p.init_controls = NULL;
+    hwctx->p.nb_init_controls = 0;
+
+    // Update frames context with CAPTURE format details
+    if (V4L2_TYPE_IS_MULTIPLANAR(fctxi->capture.format.type)) {
+        hwfc->width = fctxi->capture.format.fmt.pix_mp.width;
+        hwfc->height = fctxi->capture.format.fmt.pix_mp.height;
+        pixelformat = fctxi->capture.format.fmt.pix_mp.pixelformat;
+    } else {
+        hwfc->width = fctxi->capture.format.fmt.pix.width;
+        hwfc->height = fctxi->capture.format.fmt.pix.height;
+        pixelformat = fctxi->capture.format.fmt.pix.pixelformat;
+    }
+
+    hwfc->sw_format = AV_PIX_FMT_NONE;
+    for (int i = 0; i < FF_ARRAY_ELEMS(v4l2request_capture_pixelformats); i++) {
+        if (pixelformat == v4l2request_capture_pixelformats[i].pixelformat) {
+            hwctx->p.bit_depth = v4l2request_capture_pixelformats[i].bit_depth;
+            hwfc->sw_format = v4l2request_capture_pixelformats[i].sw_format;
+            break;
+        }
+    }
+
+    // Initialize buffer pool for CAPTURE buffers
+    fctxi->capture.pool = av_buffer_pool_init2(sizeof(struct v4l2_buffer), hwfc,
+                                               v4l2request_capture_buffer_alloc, NULL);
+    if (!fctxi->capture.pool)
+        return AVERROR(ENOMEM);
+
+    // Initialize buffer pool for OUTPUT buffers
+    fctxi->output.pool = av_buffer_pool_init2(sizeof(struct v4l2_buffer), hwfc,
+                                              v4l2request_output_buffer_alloc, NULL);
+    if (!fctxi->output.pool)
+        return AVERROR(ENOMEM);
+
+    // Initialize buffer pool for frame descriptors
+    ffhwframesctx(hwfc)->pool_internal =
+                av_buffer_pool_init2(sizeof(V4L2RequestFrameDescriptor), hwfc,
+                                     v4l2request_frame_alloc, NULL);
+    if (!ffhwframesctx(hwfc)->pool_internal)
+        return AVERROR(ENOMEM);
+
+    av_log(hwfc, AV_LOG_VERBOSE, "Using CAPTURE buffer format %s (%dx%d)\n",
+           av_fourcc2str(pixelformat), hwfc->width, hwfc->height);
+
+    return 0;
+}
+
+static void v4l2request_frames_uninit(AVHWFramesContext *hwfc)
+{
+    AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
+    AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
+
+    av_buffer_pool_uninit(&fctxi->capture.pool);
+    av_buffer_pool_uninit(&fctxi->output.pool);
+
+    if (fctxi->video_fd >= 0) {
+        close(fctxi->video_fd);
+        fctxi->video_fd = -1;
+    }
+
+    if (fctxi->media_fd) {
+        close(fctxi->media_fd);
+        fctxi->media_fd = -1;
+    }
+}
+
+static int v4l2request_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
+{
+    V4L2RequestFrameDescriptor *desc;
+
+    frame->buf[0] = av_buffer_pool_get(hwfc->pool);
+    if (!frame->buf[0])
+        return AVERROR(ENOMEM);
+
+    desc = (V4L2RequestFrameDescriptor *)frame->buf[0]->data;
+    frame->data[0] = (uint8_t *)&desc->base;
+    frame->data[1] = (uint8_t *)(uintptr_t)desc->index;
+
+    frame->format = AV_PIX_FMT_DRM_PRIME;
+    frame->width  = hwfc->width;
+    frame->height = hwfc->height;
+
+    return 0;
+}
+
+typedef struct V4L2RequestMapping {
+    // Address and length of each mmap()ed region.
+    int nb_regions;
+    int object[AV_DRM_MAX_PLANES];
+    void *address[AV_DRM_MAX_PLANES];
+    size_t length[AV_DRM_MAX_PLANES];
+} V4L2RequestMapping;
+
+static void v4l2request_unmap_frame(AVHWFramesContext *hwfc,
+                                    HWMapDescriptor *hwmap)
+{
+    V4L2RequestMapping *map = hwmap->priv;
+
+    for (int i = 0; i < map->nb_regions; i++) {
+        struct dma_buf_sync sync = {
+            .flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ,
+        };
+        ioctl(map->object[i], DMA_BUF_IOCTL_SYNC, &sync);
+        munmap(map->address[i], map->length[i]);
+    }
+
+    av_free(map);
+}
+
+static int v4l2request_map_frame(AVHWFramesContext *hwfc,
+                                 AVFrame *dst, const AVFrame *src)
+{
+    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
+    struct dma_buf_sync sync = {
+        .flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ,
+    };
+    V4L2RequestMapping *map;
+    int ret, i, p, plane;
+    void *addr;
+
+    map = av_mallocz(sizeof(*map));
+    if (!map)
+        return AVERROR(ENOMEM);
+
+    av_assert0(desc->nb_objects <= AV_DRM_MAX_PLANES);
+    for (i = 0; i < desc->nb_objects; i++) {
+        addr = mmap(NULL, desc->objects[i].size, PROT_READ, MAP_SHARED,
+                    desc->objects[i].fd, 0);
+        if (addr == MAP_FAILED) {
+            ret = AVERROR(errno);
+            av_log(hwfc, AV_LOG_ERROR, "Failed to map DRM object %d to memory: %s (%d)\n",
+                   desc->objects[i].fd, strerror(errno), errno);
+            goto fail;
+        }
+
+        map->address[i] = addr;
+        map->length[i]  = desc->objects[i].size;
+        map->object[i]  = desc->objects[i].fd;
+
+        /*
+         * We're not checking for errors here because the kernel may not
+         * support the ioctl, in which case its okay to carry on
+         */
+        ioctl(desc->objects[i].fd, DMA_BUF_IOCTL_SYNC, &sync);
+    }
+    map->nb_regions = i;
+
+    plane = 0;
+    for (i = 0; i < desc->nb_layers; i++) {
+        const AVDRMLayerDescriptor *layer = &desc->layers[i];
+        for (p = 0; p < layer->nb_planes; p++) {
+            dst->data[plane] =
+                (uint8_t *)map->address[layer->planes[p].object_index] +
+                                        layer->planes[p].offset;
+            dst->linesize[plane] =      layer->planes[p].pitch;
+            ++plane;
+        }
+    }
+    av_assert0(plane <= AV_DRM_MAX_PLANES);
+
+    dst->width  = src->width;
+    dst->height = src->height;
+
+    ret = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
+                                v4l2request_unmap_frame, map);
+    if (ret < 0)
+        goto fail;
+
+    return 0;
+
+fail:
+    for (i = 0; i < desc->nb_objects; i++) {
+        if (map->address[i])
+            munmap(map->address[i], map->length[i]);
+    }
+    av_free(map);
+    return ret;
+}
+
+static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
+                                            enum AVHWFrameTransferDirection dir,
+                                            enum AVPixelFormat **formats)
+{
+    enum AVPixelFormat *fmts;
+
+    if (dir == AV_HWFRAME_TRANSFER_DIRECTION_TO)
+        return AVERROR(ENOSYS);
+
+    fmts = av_malloc_array(2, sizeof(*fmts));
+    if (!fmts)
+        return AVERROR(ENOMEM);
+
+    fmts[0] = hwfc->sw_format;
+    fmts[1] = AV_PIX_FMT_NONE;
+
+    if (hwfc->sw_format == AV_PIX_FMT_YUV420P ||
+        hwfc->sw_format == AV_PIX_FMT_YUV420P10 ||
+        hwfc->sw_format == AV_PIX_FMT_YUV422P10)
+        fmts[0] = AV_PIX_FMT_NONE;
+
+    *formats = fmts;
+    return 0;
+}
+
+static int v4l2request_transfer_data_from(AVHWFramesContext *hwfc,
+                                          AVFrame *dst, const AVFrame *src)
+{
+    AVFrame *map;
+    int ret;
+
+    if (dst->width > hwfc->width || dst->height > hwfc->height)
+        return AVERROR(EINVAL);
+
+    map = av_frame_alloc();
+    if (!map)
+        return AVERROR(ENOMEM);
+    map->format = dst->format;
+
+    ret = v4l2request_map_frame(hwfc, map, src);
+    if (ret)
+        goto fail;
+
+    map->width  = dst->width;
+    map->height = dst->height;
+
+    ret = av_frame_copy(dst, map);
+    if (ret)
+        goto fail;
+
+    ret = 0;
+fail:
+    av_frame_free(&map);
+    return ret;
+}
+
+static int v4l2request_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
+                                const AVFrame *src, int flags)
+{
+    int ret;
+
+    if (!(flags & AV_HWFRAME_MAP_READ))
+        return AVERROR(ENOSYS);
+
+    if (hwfc->sw_format == AV_PIX_FMT_NONE ||
+        hwfc->sw_format == AV_PIX_FMT_YUV420P ||
+        hwfc->sw_format == AV_PIX_FMT_YUV420P10 ||
+        hwfc->sw_format == AV_PIX_FMT_YUV422P10)
+        return AVERROR(ENOSYS);
+    else if (dst->format == AV_PIX_FMT_NONE)
+        dst->format = hwfc->sw_format;
+    else if (hwfc->sw_format != dst->format)
+        return AVERROR(ENOSYS);
+
+    ret = v4l2request_map_frame(hwfc, dst, src);
+    if (ret)
+        return ret;
+
+    return av_frame_copy_props(dst, src);
+}
+
+const HWContextType ff_hwcontext_type_v4l2request = {
+    .type                   = AV_HWDEVICE_TYPE_V4L2REQUEST,
+    .name                   = "V4L2 Request API",
+
+    .device_hwctx_size      = sizeof(V4L2RequestDeviceContext),
+    .device_create          = v4l2request_device_create,
+    .device_uninit          = v4l2request_device_uninit,
+
+    .frames_hwctx_size      = sizeof(V4L2RequestFramesContext),
+    .frames_init            = v4l2request_frames_init,
+    .frames_uninit          = v4l2request_frames_uninit,
+    .frames_get_buffer      = v4l2request_get_buffer,
+    .transfer_get_formats   = v4l2request_transfer_get_formats,
+    .transfer_data_from     = v4l2request_transfer_data_from,
+    .map_from               = v4l2request_map_from,
+
+    .pix_fmts = (const enum AVPixelFormat[]) {
+        AV_PIX_FMT_DRM_PRIME,
+        AV_PIX_FMT_NONE
+    },
+};
diff --git a/libavutil/hwcontext_v4l2request.h b/libavutil/hwcontext_v4l2request.h
new file mode 100644
index 0000000000..ec6beadd64
--- /dev/null
+++ b/libavutil/hwcontext_v4l2request.h
@@ -0,0 +1,70 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HWCONTEXT_V4L2REQUEST_H
+#define AVUTIL_HWCONTEXT_V4L2REQUEST_H
+
+#include <stdint.h>
+#include <linux/videodev2.h>
+
+/**
+ * @file
+ * An API-specific header for AV_HWDEVICE_TYPE_V4L2REQUEST.
+ */
+
+typedef struct AVV4L2RequestFramesContextInternal AVV4L2RequestFramesContextInternal;
+
+/**
+ * V4L2 Request API frames context.
+ *
+ * This struct is allocated as AVHWFramesContext.hwctx
+ */
+typedef struct AVV4L2RequestFramesContext {
+    /**
+     * Internal context for the initialized V4L2 stateless decoder/encoder session.
+     */
+    AVV4L2RequestFramesContextInternal *internal;
+
+    /**
+     * V4L2_PIX_FMT_* coded pixel format to set on the OUTPUT queue (decoders)
+     * or the CAPTURE queue (encoders) during initialization.
+     *
+     * This field must be set by caller before av_hwframe_ctx_init() is called.
+     */
+    uint32_t pixelformat;
+
+    /**
+     * Optional bit depth of the frame pixel format, e.g. 8 or 10.
+     *
+     * This field should be set by caller before av_hwframe_ctx_init() is called,
+     * the field will be updated to match the selected frame pixel format after
+     * successful initialization.
+     */
+    uint32_t bit_depth;
+
+    /**
+     * Optional codec-specific extended controls to be set during initialization.
+     *
+     * These fields should be set by caller before av_hwframe_ctx_init() is called,
+     * fields are reset to NULL and 0 after successful initialization.
+     */
+    struct v4l2_ext_control *init_controls;
+    int nb_init_controls;
+} AVV4L2RequestFramesContext;
+
+#endif /* AVUTIL_HWCONTEXT_V4L2REQUEST_H */
diff --git a/libavutil/hwcontext_v4l2request_internal.h b/libavutil/hwcontext_v4l2request_internal.h
new file mode 100644
index 0000000000..20436b2b2d
--- /dev/null
+++ b/libavutil/hwcontext_v4l2request_internal.h
@@ -0,0 +1,67 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HWCONTEXT_V4L2REQUEST_INTERNAL_H
+#define AVUTIL_HWCONTEXT_V4L2REQUEST_INTERNAL_H
+
+#include "buffer.h"
+#include "hwcontext_v4l2request.h"
+
+/**
+ * @file
+ * FFmpeg internal API-specific header for AV_HWDEVICE_TYPE_V4L2REQUEST.
+ */
+
+/**
+ * Internal context for the initialized V4L2 stateless decoder/encoder session.
+ */
+struct AVV4L2RequestFramesContextInternal {
+    /**
+     * Media device file descriptor of the initialized session.
+     */
+    int media_fd;
+
+    /**
+     * Video device file descriptor of the initialized session.
+     */
+    int video_fd;
+
+    /**
+     * Details of the initialized CAPTURE and OUTPUT queues.
+     */
+    struct {
+        /**
+         * V4L2 buffer format.
+         */
+        struct v4l2_format format;
+
+        /**
+         * V4L2 buffer capabilities flags.
+         */
+        uint32_t capabilities;
+
+        /**
+        * Buffer pool of allocated V4L2 buffers.
+        *
+        * AVBufferRef.data points to a struct v4l2_buffer for the created buffer.
+        */
+        AVBufferPool *pool;
+    } capture, output;
+};
+
+#endif /* AVUTIL_HWCONTEXT_V4L2REQUEST_INTERNAL_H */
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index 90f9596def..3b4256c885 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -3267,6 +3267,50 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .name = "ohcodec",
         .flags = AV_PIX_FMT_FLAG_HWACCEL,
     },
+    [AV_PIX_FMT_SAND128] = {
+        .name = "sand128",
+        .nb_components = 3,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .comp = {
+            { 0, 1, 0, 0, 8 },        /* Y */
+            { 1, 2, 0, 0, 8 },        /* U */
+            { 1, 2, 1, 0, 8 },        /* V */
+        },
+        .flags = 0,
+    },
+    [AV_PIX_FMT_SAND64_10] = {
+        .name = "sand64_10",
+        .nb_components = 3,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .comp = {
+            { 0, 2, 0, 0, 10 },        /* Y */
+            { 1, 4, 0, 0, 10 },        /* U */
+            { 1, 4, 2, 0, 10 },        /* V */
+        },
+        .flags = 0,
+    },
+    [AV_PIX_FMT_SAND64_16] = {
+        .name = "sand64_16",
+        .nb_components = 3,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .comp = {
+            { 0, 2, 0, 0, 16 },        /* Y */
+            { 1, 4, 0, 0, 16 },        /* U */
+            { 1, 4, 2, 0, 16 },        /* V */
+        },
+        .flags = 0,
+    },
+    [AV_PIX_FMT_RPI4_8] = {
+        .name = "rpi4_8",
+        .flags = AV_PIX_FMT_FLAG_HWACCEL,
+    },
+    [AV_PIX_FMT_RPI4_10] = {
+        .name = "rpi4_10",
+        .flags = AV_PIX_FMT_FLAG_HWACCEL,
+    },
 };
 
 static const char * const color_range_names[] = {
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index 2e7b6457e0..eaa2b1dcb1 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -498,6 +498,14 @@ enum AVPixelFormat {
     AV_PIX_FMT_GBRP12MSBLE,    ///< planar GBR 4:4:4 36bpp, lowest bits zero, little-endian
 
     AV_PIX_FMT_OHCODEC, /// hardware decoding through openharmony
+// RPI - not on ifdef so can be got at by calling progs
+// #define so code that uses this can know it is there
+#define AVUTIL_HAVE_PIX_FMT_SAND 1
+    AV_PIX_FMT_SAND128,    ///< 4:2:0  8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding
+    AV_PIX_FMT_SAND64_10,  ///< 4:2:0 10-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
+    AV_PIX_FMT_SAND64_16,  ///< 4:2:0 16-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
+    AV_PIX_FMT_RPI4_8,
+    AV_PIX_FMT_RPI4_10,
 
     AV_PIX_FMT_NB         ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
 };
diff --git a/libavutil/rpi_sand_fn_pw.c b/libavutil/rpi_sand_fn_pw.c
new file mode 100644
index 0000000000..0d5d203dc3
--- /dev/null
+++ b/libavutil/rpi_sand_fn_pw.c
@@ -0,0 +1,227 @@
+/*
+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Authors: John Cox
+*/
+
+// * Included twice from rpi_sand_fn with different PW
+
+#define STRCAT(x,y) x##y
+
+#if PW == 1
+#define pixel uint8_t
+#define FUNC(f) STRCAT(f, 8)
+#elif PW == 2
+#define pixel uint16_t
+#define FUNC(f) STRCAT(f, 16)
+#else
+#error Unexpected PW
+#endif
+
+// Fetches a single patch - offscreen fixup not done here
+// w <= stride1
+// unclipped
+void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h)
+{
+    const unsigned int x = _x;
+    const unsigned int w = _w;
+    const unsigned int mask = stride1 - 1;
+
+#if PW == 1 && HAVE_SAND_ASM
+    if (_x == 0) {
+        ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride,
+                                     src, stride1, stride2, _x, y, _w, h);
+        return;
+    }
+#endif
+
+    if ((x & ~mask) == ((x + w) & ~mask)) {
+        // All in one sand stripe
+        const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) {
+            memcpy(dst, p, w);
+        }
+    }
+    else
+    {
+        // Two+ stripe
+        const unsigned int sstride = stride1 * stride2;
+        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+        const uint8_t * p2 = p1 + sstride - (x & mask);
+        const unsigned int w1 = stride1 - (x & mask);
+        const unsigned int w3 = (x + w) & mask;
+        const unsigned int w2 = w - (w1 + w3);
+
+        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) {
+            unsigned int j;
+            const uint8_t * p = p2;
+            uint8_t * d = dst;
+            memcpy(d, p1, w1);
+            d += w1;
+            for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) {
+                memcpy(d, p, stride1);
+            }
+            memcpy(d, p, w3);
+        }
+    }
+}
+
+// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V)
+
+void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u,
+                             uint8_t * dst_v, const unsigned int dst_stride_v,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h)
+{
+    const unsigned int x = _x * 2;
+    const unsigned int w = _w * 2;
+    const unsigned int mask = stride1 - 1;
+
+#if PW == 1 && HAVE_SAND_ASM
+    if (_x == 0) {
+        ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v,
+                                     src, stride1, stride2, _x, y, _w, h);
+        return;
+    }
+#endif
+
+    if ((x & ~mask) == ((x + w) & ~mask)) {
+        // All in one sand stripe
+        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) {
+            pixel * du = (pixel *)dst_u;
+            pixel * dv = (pixel *)dst_v;
+            const pixel * p = (const pixel *)p1;
+            for (unsigned int k = 0; k < w; k += 2 * PW) {
+                *du++ = *p++;
+                *dv++ = *p++;
+            }
+        }
+    }
+    else
+    {
+        // Two+ stripe
+        const unsigned int sstride = stride1 * stride2;
+        const unsigned int sstride_p = (sstride - stride1) / PW;
+
+        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+        const uint8_t * p2 = p1 + sstride - (x & mask);
+        const unsigned int w1 = stride1 - (x & mask);
+        const unsigned int w3 = (x + w) & mask;
+        const unsigned int w2 = w - (w1 + w3);
+
+        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) {
+            unsigned int j;
+            const pixel * p = (const pixel *)p1;
+            pixel * du = (pixel *)dst_u;
+            pixel * dv = (pixel *)dst_v;
+            for (unsigned int k = 0; k < w1; k += 2 * PW) {
+                *du++ = *p++;
+                *dv++ = *p++;
+            }
+            for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) {
+                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
+                    *du++ = *p++;
+                    *dv++ = *p++;
+                }
+            }
+            for (unsigned int k = 0; k < w3; k += 2 * PW) {
+                *du++ = *p++;
+                *dv++ = *p++;
+            }
+        }
+    }
+}
+
+void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c,
+                             unsigned int stride1, unsigned int stride2,
+                             const uint8_t * src_u, const unsigned int src_stride_u,
+                             const uint8_t * src_v, const unsigned int src_stride_v,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h)
+{
+    const unsigned int x = _x * 2;
+    const unsigned int w = _w * 2;
+    const unsigned int mask = stride1 - 1;
+    if ((x & ~mask) == ((x + w) & ~mask)) {
+        // All in one sand stripe
+        uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) {
+            const pixel * su = (const pixel *)src_u;
+            const pixel * sv = (const pixel *)src_v;
+            pixel * p = (pixel *)p1;
+            for (unsigned int k = 0; k < w; k += 2 * PW) {
+                *p++ = *su++;
+                *p++ = *sv++;
+            }
+        }
+    }
+    else
+    {
+        // Two+ stripe
+        const unsigned int sstride = stride1 * stride2;
+        const unsigned int sstride_p = (sstride - stride1) / PW;
+
+        const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+        const uint8_t * p2 = p1 + sstride - (x & mask);
+        const unsigned int w1 = stride1 - (x & mask);
+        const unsigned int w3 = (x + w) & mask;
+        const unsigned int w2 = w - (w1 + w3);
+
+        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) {
+            unsigned int j;
+            const pixel * su = (const pixel *)src_u;
+            const pixel * sv = (const pixel *)src_v;
+            pixel * p = (pixel *)p1;
+            for (unsigned int k = 0; k < w1; k += 2 * PW) {
+                *p++ = *su++;
+                *p++ = *sv++;
+            }
+            for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) {
+                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
+                    *p++ = *su++;
+                    *p++ = *sv++;
+                }
+            }
+            for (unsigned int k = 0; k < w3; k += 2 * PW) {
+                *p++ = *su++;
+                *p++ = *sv++;
+            }
+        }
+    }
+}
+
+
+#undef pixel
+#undef STRCAT
+#undef FUNC
+
diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c
new file mode 100644
index 0000000000..5d3ea4db1e
--- /dev/null
+++ b/libavutil/rpi_sand_fns.c
@@ -0,0 +1,449 @@
+/*
+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Authors: John Cox
+*/
+
+#include "config.h"
+#include <stdint.h>
+#include <string.h>
+#include "rpi_sand_fns.h"
+#include "avassert.h"
+#include "frame.h"
+
+#if ARCH_ARM && HAVE_NEON
+#include "libavutil/arm/cpu.h"
+#include "libavutil/arm/rpi_sand_neon.h"
+#define HAVE_SAND_ASM 1
+#elif ARCH_AARCH64 && HAVE_NEON
+#include "libavutil/aarch64/cpu.h"
+#include "libavutil/aarch64/rpi_sand_neon.h"
+#define HAVE_SAND_ASM 1
+#else
+#define HAVE_SAND_ASM 0
+#endif
+
+#define PW 1
+#include "rpi_sand_fn_pw.c"
+#undef PW
+
+#define PW 2
+#include "rpi_sand_fn_pw.c"
+#undef PW
+
+#if 1
+// Simple round
+static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
+{
+    const unsigned int rnd = (1 << shr) >> 1;
+    const uint16_t * src = (const uint16_t *)_src;
+
+    for (; n != 0; --n) {
+        *dst++ = (*src++ + rnd) >> shr;
+    }
+}
+#else
+// Dithered variation
+static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
+{
+    unsigned int rnd = (1 << shr) >> 1;
+    const unsigned int mask = ((1 << shr) - 1);
+    const uint16_t * src = (const uint16_t *)_src;
+
+    for (; n != 0; --n) {
+        rnd = *src++ + (rnd & mask);
+        *dst++ = rnd >> shr;
+    }
+}
+#endif
+
+// Fetches a single patch - offscreen fixup not done here
+// w <= stride1
+// unclipped
+// _x & _w in pixels, strides in bytes
+void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h)
+{
+    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
+    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
+    const unsigned int x1 = ((_x + _w) / 3) * 4;
+    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
+    const unsigned int mask = stride1 - 1;
+    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
+    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
+
+#if HAVE_SAND_ASM
+    if (_x == 0 && have_neon(av_get_cpu_flags())) {
+        ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
+        return;
+    }
+#endif
+
+    if (x0 == x1) {
+        // *******************
+        // Partial single word xfer
+        return;
+    }
+
+    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
+    {
+        unsigned int x = x0;
+        const uint32_t * p = (const uint32_t *)p0;
+        uint16_t * d = (uint16_t *)dst;
+
+        if (xskip0 != 0) {
+            const uint32_t p3 = *p++;
+
+            if (xskip0 == 1)
+                *d++ = (p3 >> 10) & 0x3ff;
+            *d++ = (p3 >> 20) & 0x3ff;
+
+            if (((x += 4) & mask) == 0)
+                p += slice_inc;
+        }
+
+        while (x != x1) {
+            const uint32_t p3 = *p++;
+            *d++ = p3 & 0x3ff;
+            *d++ = (p3 >> 10) & 0x3ff;
+            *d++ = (p3 >> 20) & 0x3ff;
+
+            if (((x += 4) & mask) == 0)
+                p += slice_inc;
+        }
+
+        if (xrem1 != 0) {
+            const uint32_t p3 = *p;
+
+            *d++ = p3 & 0x3ff;
+            if (xrem1 == 2)
+                *d++ = (p3 >> 10) & 0x3ff;
+        }
+    }
+}
+
+
+void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
+                             uint8_t * dst_v, const unsigned int dst_stride_v,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h)
+{
+    const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word
+    const unsigned int xskip0 = _x - (x0 >> 3) * 3;
+    const unsigned int x1 = ((_x + _w) / 3) * 8;
+    const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3;
+    const unsigned int mask = stride1 - 1;
+    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
+    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
+
+#if HAVE_SAND_ASM
+    if (_x == 0 && have_neon(av_get_cpu_flags())) {
+        ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v,
+                                       src, stride1, stride2, _x, y, _w, h);
+        return;
+    }
+#endif
+
+    if (x0 == x1) {
+        // *******************
+        // Partial single word xfer
+        return;
+    }
+
+    for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1)
+    {
+        unsigned int x = x0;
+        const uint32_t * p = (const uint32_t *)p0;
+        uint16_t * du = (uint16_t *)dst_u;
+        uint16_t * dv = (uint16_t *)dst_v;
+
+        if (xskip0 != 0) {
+            const uint32_t p3a = *p++;
+            const uint32_t p3b = *p++;
+
+            if (xskip0 == 1)
+            {
+                *du++ = (p3a >> 20) & 0x3ff;
+                *dv++ = (p3b >>  0) & 0x3ff;
+            }
+            *du++ = (p3b >> 10) & 0x3ff;
+            *dv++ = (p3b >> 20) & 0x3ff;
+
+            if (((x += 8) & mask) == 0)
+                p += slice_inc;
+        }
+
+        while (x != x1) {
+            const uint32_t p3a = *p++;
+            const uint32_t p3b = *p++;
+
+            *du++ = p3a & 0x3ff;
+            *dv++ = (p3a >> 10) & 0x3ff;
+            *du++ = (p3a >> 20) & 0x3ff;
+            *dv++ = p3b & 0x3ff;
+            *du++ = (p3b >> 10) & 0x3ff;
+            *dv++ = (p3b >> 20) & 0x3ff;
+
+            if (((x += 8) & mask) == 0)
+                p += slice_inc;
+        }
+
+        if (xrem1 != 0) {
+            const uint32_t p3a = *p++;
+            const uint32_t p3b = *p++;
+
+            *du++ = p3a & 0x3ff;
+            *dv++ = (p3a >> 10) & 0x3ff;
+            if (xrem1 == 2)
+            {
+                *du++ = (p3a >> 20) & 0x3ff;
+                *dv++ = p3b & 0x3ff;
+            }
+        }
+    }
+}
+
+// Fetches a single patch - offscreen fixup not done here
+// w <= stride1
+// single lose bottom 2 bits truncation
+// _x & _w in pixels, strides in bytes
+void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h)
+{
+    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
+    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
+    const unsigned int x1 = ((_x + _w) / 3) * 4;
+    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
+    const unsigned int mask = stride1 - 1;
+    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
+    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
+
+#if HAVE_SAND_ASM
+    if (_x == 0) {
+        ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
+        return;
+    }
+#endif
+
+    if (x0 == x1) {
+        // *******************
+        // Partial single word xfer
+        return;
+    }
+
+    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
+    {
+        unsigned int x = x0;
+        const uint32_t * p = (const uint32_t *)p0;
+        uint8_t * d = dst;
+
+        if (xskip0 != 0) {
+            const uint32_t p3 = *p++;
+
+            if (xskip0 == 1)
+                *d++ = (p3 >> 12) & 0xff;
+            *d++ = (p3 >> 22) & 0xff;
+
+            if (((x += 4) & mask) == 0)
+                p += slice_inc;
+        }
+
+        while (x != x1) {
+            const uint32_t p3 = *p++;
+            *d++ = (p3 >> 2) & 0xff;
+            *d++ = (p3 >> 12) & 0xff;
+            *d++ = (p3 >> 22) & 0xff;
+
+            if (((x += 4) & mask) == 0)
+                p += slice_inc;
+        }
+
+        if (xrem1 != 0) {
+            const uint32_t p3 = *p;
+
+            *d++ = (p3 >> 2) & 0xff;
+            if (xrem1 == 2)
+                *d++ = (p3 >> 12) & 0xff;
+        }
+    }
+}
+
+
+
+// w/h in pixels
+void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
+                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
+                         unsigned int w, unsigned int h, const unsigned int shr)
+{
+    const unsigned int n = dst_stride1 / 2;
+    unsigned int j;
+
+    // This is true for our current layouts
+    av_assert0(dst_stride1 == src_stride1);
+
+    // As we have the same stride1 for src & dest and src is wider than dest
+    // then if we loop on src we can always write contiguously to dest
+    // We make no effort to copy an exact width - round up to nearest src stripe
+    // as we will always have storage in dest for that
+
+#if ARCH_ARM && HAVE_NEON
+    if (shr == 3 && src_stride1 == 128) {
+        for (j = 0; j + n < w; j += dst_stride1) {
+            uint8_t * d = dst + j * dst_stride2;
+            const uint8_t * s1 = src + j * 2 * src_stride2;
+            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
+
+            ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h);
+        }
+    }
+    else
+#endif
+    {
+        for (j = 0; j + n < w; j += dst_stride1) {
+            uint8_t * d = dst + j * dst_stride2;
+            const uint8_t * s1 = src + j * 2 * src_stride2;
+            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
+
+            for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) {
+                cpy16_to_8(d, s1, n, shr);
+                cpy16_to_8(d + n, s2, n, shr);
+            }
+        }
+    }
+
+    // Fix up a trailing dest half stripe
+    if (j < w) {
+        uint8_t * d = dst + j * dst_stride2;
+        const uint8_t * s1 = src + j * 2 * src_stride2;
+
+        for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) {
+            cpy16_to_8(d, s1, n, shr);
+        }
+    }
+}
+
+int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src)
+{
+    const int w = av_frame_cropped_width(src);
+    const int h = av_frame_cropped_height(src);
+    const int x = src->crop_left;
+    const int y = src->crop_top;
+    const unsigned int stride2_y = av_rpi_sand_frame_stride2_y(src);
+    const unsigned int stride2_c = av_rpi_sand_frame_stride2_c(src);
+
+    // We will crop as part of the conversion
+    dst->crop_top = 0;
+    dst->crop_left = 0;
+    dst->crop_bottom = 0;
+    dst->crop_right = 0;
+
+    switch (src->format){
+        case AV_PIX_FMT_SAND128:
+        case AV_PIX_FMT_RPI4_8:
+            switch (dst->format){
+                case AV_PIX_FMT_YUV420P:
+                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
+                                             src->data[0],
+                                             av_rpi_sand_frame_stride1(src), stride2_y,
+                                             x, y, w, h);
+                    av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1],
+                                             dst->data[2], dst->linesize[2],
+                                             src->data[1],
+                                             av_rpi_sand_frame_stride1(src), stride2_c,
+                                             x/2, y/2,  w/2, h/2);
+                    break;
+                case AV_PIX_FMT_NV12:
+                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
+                                             src->data[0],
+                                             av_rpi_sand_frame_stride1(src), stride2_y,
+                                             x, y, w, h);
+                    av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1],
+                                             src->data[1],
+                                             av_rpi_sand_frame_stride1(src), stride2_c,
+                                             x/2, y/2, w, h/2);
+                    break;
+                default:
+                    return -1;
+            }
+            break;
+        case AV_PIX_FMT_SAND64_10:
+            switch (dst->format){
+                case AV_PIX_FMT_YUV420P10:
+                    av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0],
+                                             src->data[0],
+                                             av_rpi_sand_frame_stride1(src), stride2_y,
+                                             x*2, y, w*2, h);
+                    av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1],
+                                             dst->data[2], dst->linesize[2],
+                                             src->data[1],
+                                             av_rpi_sand_frame_stride1(src), stride2_c,
+                                             x, y/2,  w, h/2);
+                    break;
+                default:
+                    return -1;
+            }
+            break;
+        case AV_PIX_FMT_RPI4_10:
+            switch (dst->format){
+                case AV_PIX_FMT_YUV420P10:
+                    av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0],
+                                             src->data[0],
+                                             av_rpi_sand_frame_stride1(src), stride2_y,
+                                             x, y, w, h);
+                    av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1],
+                                             dst->data[2], dst->linesize[2],
+                                             src->data[1],
+                                             av_rpi_sand_frame_stride1(src), stride2_c,
+                                             x/2, y/2, w/2, h/2);
+                    break;
+                case AV_PIX_FMT_NV12:
+                    av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0],
+                                             src->data[0],
+                                             av_rpi_sand_frame_stride1(src), stride2_y,
+                                             x, y, w, h);
+                    av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1],
+                                             src->data[1],
+                                             av_rpi_sand_frame_stride1(src), stride2_c,
+                                             x/2, y/2, w, h/2);
+                    break;
+                default:
+                    return -1;
+            }
+            break;
+        default:
+            return -1;
+    }
+
+    return av_frame_copy_props(dst, src);
+}
diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h
new file mode 100644
index 0000000000..c4031fcb69
--- /dev/null
+++ b/libavutil/rpi_sand_fns.h
@@ -0,0 +1,157 @@
+/*
+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Authors: John Cox
+*/
+
+#ifndef AVUTIL_RPI_SAND_FNS_H
+#define AVUTIL_RPI_SAND_FNS_H
+
+#include "libavutil/frame.h"
+
+// For all these fns _x & _w are measured as coord * PW
+// For the C fns coords are in chroma pels (so luma / 2)
+// Strides are in bytes
+
+void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h);
+void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h);
+
+void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u,
+                             uint8_t * dst_v, const unsigned int dst_stride_v,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h);
+void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
+                             uint8_t * dst_v, const unsigned int dst_stride_v,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h);
+
+void av_rpi_planar_to_sand_c8(uint8_t * dst_c,
+                             unsigned int stride1, unsigned int stride2,
+                             const uint8_t * src_u, const unsigned int src_stride_u,
+                             const uint8_t * src_v, const unsigned int src_stride_v,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h);
+void av_rpi_planar_to_sand_c16(uint8_t * dst_c,
+                             unsigned int stride1, unsigned int stride2,
+                             const uint8_t * src_u, const unsigned int src_stride_u,
+                             const uint8_t * src_v, const unsigned int src_stride_v,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h);
+
+void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h);
+void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
+                             uint8_t * dst_v, const unsigned int dst_stride_v,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h);
+
+void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
+                             const uint8_t * src,
+                             unsigned int stride1, unsigned int stride2,
+                             unsigned int _x, unsigned int y,
+                             unsigned int _w, unsigned int h);
+
+// w/h in pixels
+void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
+                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
+                         unsigned int w, unsigned int h, const unsigned int shr);
+
+
+// dst must contain required pixel format & allocated data buffers
+// Cropping on the src buffer will be honoured and dst crop will be set to zero
+int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src);
+
+
+static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame)
+{
+#ifdef RPI_ZC_SAND128_ONLY
+    // If we are sure we only only support 128 byte sand formats replace the
+    // var with a constant which should allow for better optimisation
+    return 128;
+#else
+    return frame->linesize[0];
+#endif
+}
+
+static inline unsigned int av_rpi_sand_frame_stride2_y(const AVFrame * const frame)
+{
+    return frame->linesize[3];
+}
+
+static inline unsigned int av_rpi_sand_frame_stride2_c(const AVFrame * const frame)
+{
+    return frame->linesize[4];
+}
+
+static inline int av_rpi_is_sand_format(const int format)
+{
+    return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10);
+}
+
+static inline int av_rpi_is_sand_frame(const AVFrame * const frame)
+{
+    return av_rpi_is_sand_format(frame->format);
+}
+
+static inline int av_rpi_is_sand8_frame(const AVFrame * const frame)
+{
+    return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8);
+}
+
+static inline int av_rpi_is_sand16_frame(const AVFrame * const frame)
+{
+    return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16);
+}
+
+static inline int av_rpi_is_sand30_frame(const AVFrame * const frame)
+{
+    return (frame->format == AV_PIX_FMT_RPI4_10);
+}
+
+static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame)
+{
+    return av_rpi_is_sand8_frame(frame) ? 0 : 1;
+}
+
+#endif
+
diff --git a/libswscale/aarch64/rgb2rgb.c b/libswscale/aarch64/rgb2rgb.c
index b9d8aa4dc2..9c48c22e39 100644
--- a/libswscale/aarch64/rgb2rgb.c
+++ b/libswscale/aarch64/rgb2rgb.c
@@ -70,6 +70,12 @@ void ff_shuffle_bytes_3102_neon(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_shuffle_bytes_2013_neon(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_shuffle_bytes_2130_neon(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_shuffle_bytes_1203_neon(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_bgr24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, const int32_t *rgb2yuv);
+void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, const int32_t *rgb2yuv);
 
 void ff_uyvytoyuv422_neon(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                           const uint8_t *src, int width, int height,
@@ -107,5 +113,7 @@ av_cold void rgb2rgb_init_aarch64(void)
         uyvytoyuv420       = ff_uyvytoyuv420_neon;
         yuyvtoyuv422       = ff_yuyvtoyuv422_neon;
         yuyvtoyuv420       = ff_yuyvtoyuv420_neon;
+        ff_rgb24toyv12 = ff_rgb24toyv12_aarch64;
+        ff_bgr24toyv12 = ff_bgr24toyv12_aarch64;
     }
 }
diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S
index 665aa4496b..2de0f806be 100644
--- a/libswscale/aarch64/rgb2rgb_neon.S
+++ b/libswscale/aarch64/rgb2rgb_neon.S
@@ -871,3 +871,359 @@ interleaved_yuv_to_planar uyvy, yuv422
 interleaved_yuv_to_planar uyvy, yuv420
 interleaved_yuv_to_planar yuyv, yuv422
 interleaved_yuv_to_planar yuyv, yuv420
+
+// Expand rgb2 into r0+r1/g0+g1/b0+b1
+.macro XRGB3Y r0, g0, b0, r1, g1, b1, r2, g2, b2
+        uxtl            \r0\().8h, \r2\().8b
+        uxtl            \g0\().8h, \g2\().8b
+        uxtl            \b0\().8h, \b2\().8b
+
+        uxtl2           \r1\().8h, \r2\().16b
+        uxtl2           \g1\().8h, \g2\().16b
+        uxtl2           \b1\().8h, \b2\().16b
+.endm
+
+// Expand rgb2 into r0+r1/g0+g1/b0+b1
+// and pick every other el to put back into rgb2 for chroma
+.macro XRGB3YC r0, g0, b0, r1, g1, b1, r2, g2, b2
+        XRGB3Y          \r0, \g0, \b0, \r1, \g1, \b1, \r2, \g2, \b2
+
+        bic             \r2\().8h, #0xff, LSL #8
+        bic             \g2\().8h, #0xff, LSL #8
+        bic             \b2\().8h, #0xff, LSL #8
+.endm
+
+.macro SMLAL3 d0, d1, s0, s1, s2, c0, c1, c2
+        smull           \d0\().4s, \s0\().4h, \c0
+        smlal           \d0\().4s, \s1\().4h, \c1
+        smlal           \d0\().4s, \s2\().4h, \c2
+        smull2          \d1\().4s, \s0\().8h, \c0
+        smlal2          \d1\().4s, \s1\().8h, \c1
+        smlal2          \d1\().4s, \s2\().8h, \c2
+.endm
+
+// d0 may be s0
+// s0, s2 corrupted
+.macro SHRN_Y d0, s0, s1, s2, s3, k128h
+        shrn            \s0\().4h, \s0\().4s, #12
+        shrn2           \s0\().8h, \s1\().4s, #12
+        add             \s0\().8h, \s0\().8h, \k128h\().8h     // +128 (>> 3 = 16)
+        sqrshrun        \d0\().8b, \s0\().8h, #3
+        shrn            \s2\().4h, \s2\().4s, #12
+        shrn2           \s2\().8h, \s3\().4s, #12
+        add             \s2\().8h, \s2\().8h, \k128h\().8h
+        sqrshrun2       \d0\().16b, v28.8h, #3
+.endm
+
+.macro SHRN_C d0, s0, s1, k128b
+        shrn            \s0\().4h, \s0\().4s, #14
+        shrn2           \s0\().8h, \s1\().4s, #14
+        sqrshrn         \s0\().8b, \s0\().8h, #1
+        add             \d0\().8b, \s0\().8b, \k128b\().8b     // +128
+.endm
+
+.macro STB2V s0, n, a
+        st1             {\s0\().b}[(\n+0)], [\a], #1
+        st1             {\s0\().b}[(\n+1)], [\a], #1
+.endm
+
+.macro STB4V s0, n, a
+        STB2V           \s0, (\n+0), \a
+        STB2V           \s0, (\n+2), \a
+.endm
+
+
+// void ff_rgb24toyv12_aarch64(
+//              const uint8_t *src,             // x0
+//              uint8_t *ydst,                  // x1
+//              uint8_t *udst,                  // x2
+//              uint8_t *vdst,                  // x3
+//              int width,                      // w4
+//              int height,                     // w5
+//              int lumStride,                  // w6
+//              int chromStride,                // w7
+//              int srcStr,                     // [sp, #0]
+//              int32_t *rgb2yuv);              // [sp, #8]
+
+function ff_rgb24toyv12_aarch64, export=1
+        ldr             x15, [sp, #8]
+        ld3             {v3.s, v4.s, v5.s}[0], [x15], #12
+        ld3             {v3.s, v4.s, v5.s}[1], [x15], #12
+        ld3             {v3.s, v4.s, v5.s}[2], [x15]
+        mov             v6.16b, v3.16b
+        mov             v3.16b, v5.16b
+        mov             v5.16b, v6.16b
+        b               99f
+endfunc
+
+// void ff_bgr24toyv12_aarch64(
+//              const uint8_t *src,             // x0
+//              uint8_t *ydst,                  // x1
+//              uint8_t *udst,                  // x2
+//              uint8_t *vdst,                  // x3
+//              int width,                      // w4
+//              int height,                     // w5
+//              int lumStride,                  // w6
+//              int chromStride,                // w7
+//              int srcStr,                     // [sp, #0]
+//              int32_t *rgb2yuv);              // [sp, #8] (including Mac)
+
+// regs
+// v0-2         Src bytes - reused as chroma src
+// v3-5         Coeffs (packed very inefficiently - could be squashed)
+// v6           128b
+// v7           128h
+// v8-15        Reserved
+// v16-18       Lo Src expanded as H
+// v19          -
+// v20-22       Hi Src expanded as H
+// v23          -
+// v24          U out
+// v25          U tmp
+// v26          Y out
+// v27-29       Y tmp
+// v30          V out
+// v31          V tmp
+
+function ff_bgr24toyv12_aarch64, export=1
+        ldr             x15, [sp, #8]
+        ld3             {v3.s, v4.s, v5.s}[0], [x15], #12
+        ld3             {v3.s, v4.s, v5.s}[1], [x15], #12
+        ld3             {v3.s, v4.s, v5.s}[2], [x15]
+
+99:
+        ldr             w14, [sp, #0]
+        movi            v7.8b, #128
+        uxtl            v6.8h, v7.8b
+        // Ensure if nothing to do then we do nothing
+        cmp             w4, #0
+        b.le            90f
+        cmp             w5, #0
+        b.le            90f
+        // If w % 16 != 0 then -16 so we do main loop 1 fewer times with
+        // the remainder done in the tail
+        tst             w4, #15
+        b.eq            1f
+        sub             w4, w4, #16
+1:
+
+// -------------------- Even line body - YUV
+11:
+        subs            w9,  w4, #0
+        mov             x10, x0
+        mov             x11, x1
+        mov             x12, x2
+        mov             x13, x3
+        b.lt            12f
+
+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
+        subs            w9, w9, #16
+        b.le            13f
+
+10:
+        XRGB3YC         v16, v17, v18,  v20, v21, v22,  v0, v1, v2
+
+        // Testing shows it is faster to stack the smull/smlal ops together
+        // rather than interleave them between channels and indeed even the
+        // shift/add sections seem happier not interleaved
+
+        // Y0
+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
+        // Y1
+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
+        SHRN_Y          v26, v26, v27, v28, v29, v6
+
+        // U
+        // Vector subscript *2 as we loaded into S but are only using H
+        SMLAL3          v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2]
+
+        // V
+        SMLAL3          v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4]
+
+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
+
+        SHRN_C          v24, v24, v25, v7
+        SHRN_C          v30, v30, v31, v7
+
+        subs            w9, w9, #16
+
+        st1             {v26.16b}, [x11], #16
+        st1             {v24.8b}, [x12], #8
+        st1             {v30.8b}, [x13], #8
+
+        b.gt            10b
+
+// -------------------- Even line tail - YUV
+// If width % 16 == 0 then simply runs once with preloaded RGB
+// If other then deals with preload & then does remaining tail
+
+13:
+        // Body is simple copy of main loop body minus preload
+
+        XRGB3YC         v16, v17, v18,  v20, v21, v22,  v0, v1, v2
+        // Y0
+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
+        // Y1
+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
+        SHRN_Y          v26, v26, v27, v28, v29, v6
+        // U
+        SMLAL3          v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2]
+        // V
+        SMLAL3          v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4]
+
+        cmp             w9, #-16
+
+        SHRN_C          v24, v24, v25, v7
+        SHRN_C          v30, v30, v31, v7
+
+        // Here:
+        // w9 == 0      width % 16 == 0, tail done
+        // w9 > -16     1st tail done (16 pels), remainder still to go
+        // w9 == -16    shouldn't happen
+        // w9 > -32     2nd tail done
+        // w9 <= -32    shouldn't happen
+
+        b.lt            2f
+        st1             {v26.16b}, [x11], #16
+        st1             {v24.8b}, [x12], #8
+        st1             {v30.8b}, [x13], #8
+        cbz             w9, 3f
+
+12:
+        sub             w9, w9, #16
+
+        tbz             w9, #3, 1f
+        ld3             {v0.8b, v1.8b, v2.8b},  [x10], #24
+1:      tbz             w9, #2, 1f
+        ld3             {v0.b, v1.b, v2.b}[8],  [x10], #3
+        ld3             {v0.b, v1.b, v2.b}[9],  [x10], #3
+        ld3             {v0.b, v1.b, v2.b}[10], [x10], #3
+        ld3             {v0.b, v1.b, v2.b}[11], [x10], #3
+1:      tbz             w9, #1, 1f
+        ld3             {v0.b, v1.b, v2.b}[12], [x10], #3
+        ld3             {v0.b, v1.b, v2.b}[13], [x10], #3
+1:      tbz             w9, #0, 13b
+        ld3             {v0.b, v1.b, v2.b}[14], [x10], #3
+        b               13b
+
+2:
+        tbz             w9, #3, 1f
+        st1             {v26.8b},    [x11], #8
+        STB4V           v24, 0, x12
+        STB4V           v30, 0, x13
+1:      tbz             w9, #2, 1f
+        STB4V           v26  8, x11
+        STB2V           v24, 4, x12
+        STB2V           v30, 4, x13
+1:      tbz             w9, #1, 1f
+        STB2V           v26, 12, x11
+        st1             {v24.b}[6],  [x12], #1
+        st1             {v30.b}[6],  [x13], #1
+1:      tbz             w9, #0, 1f
+        st1             {v26.b}[14], [x11]
+        st1             {v24.b}[7],  [x12]
+        st1             {v30.b}[7],  [x13]
+1:
+3:
+
+// -------------------- Odd line body - Y only
+
+        subs            w5, w5, #1
+        b.eq            90f
+
+        subs            w9,  w4, #0
+        add             x0, x0, w14, sxtw
+        add             x1, x1, w6, sxtw
+        mov             x10, x0
+        mov             x11, x1
+        b.lt            12f
+
+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
+        subs            w9, w9, #16
+        b.le            13f
+
+10:
+        XRGB3Y          v16, v17, v18,  v20, v21, v22,  v0, v1, v2
+        // Y0
+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
+        // Y1
+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
+
+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
+
+        SHRN_Y          v26, v26, v27, v28, v29, v6
+
+        subs            w9, w9, #16
+
+        st1             {v26.16b}, [x11], #16
+
+        b.gt            10b
+
+// -------------------- Odd line tail - Y
+// If width % 16 == 0 then simply runs once with preloaded RGB
+// If other then deals with preload & then does remaining tail
+
+13:
+        // Body is simple copy of main loop body minus preload
+
+        XRGB3Y          v16, v17, v18,  v20, v21, v22,  v0, v1, v2
+        // Y0
+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
+        // Y1
+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
+
+        cmp             w9, #-16
+
+        SHRN_Y          v26, v26, v27, v28, v29, v6
+
+        // Here:
+        // w9 == 0      width % 16 == 0, tail done
+        // w9 > -16     1st tail done (16 pels), remainder still to go
+        // w9 == -16    shouldn't happen
+        // w9 > -32     2nd tail done
+        // w9 <= -32    shouldn't happen
+
+        b.lt            2f
+        st1             {v26.16b}, [x11], #16
+        cbz             w9, 3f
+
+12:
+        sub             w9, w9, #16
+
+        tbz             w9, #3, 1f
+        ld3             {v0.8b, v1.8b, v2.8b},  [x10], #24
+1:      tbz             w9, #2, 1f
+        ld3             {v0.b, v1.b, v2.b}[8],  [x10], #3
+        ld3             {v0.b, v1.b, v2.b}[9],  [x10], #3
+        ld3             {v0.b, v1.b, v2.b}[10], [x10], #3
+        ld3             {v0.b, v1.b, v2.b}[11], [x10], #3
+1:      tbz             w9, #1, 1f
+        ld3             {v0.b, v1.b, v2.b}[12], [x10], #3
+        ld3             {v0.b, v1.b, v2.b}[13], [x10], #3
+1:      tbz             w9, #0, 13b
+        ld3             {v0.b, v1.b, v2.b}[14], [x10], #3
+        b               13b
+
+2:
+        tbz             w9, #3, 1f
+        st1             {v26.8b},    [x11], #8
+1:      tbz             w9, #2, 1f
+        STB4V           v26, 8,  x11
+1:      tbz             w9, #1, 1f
+        STB2V           v26, 12, x11
+1:      tbz             w9, #0, 1f
+        st1             {v26.b}[14], [x11]
+1:
+3:
+
+// ------------------- Loop to start
+
+        add             x0, x0, w14, sxtw
+        add             x1, x1, w6, sxtw
+        add             x2, x2, w7, sxtw
+        add             x3, x3, w7, sxtw
+        subs            w5, w5, #1
+        b.gt            11b
+90:
+        ret
+endfunc
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 4bba657bd1..b7edcb9ae0 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -83,6 +83,31 @@ void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
                        int width, int height,
                        int lumStride, int chromStride, int srcStride,
                        const int32_t *rgb2yuv);
+void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
+                       uint8_t *udst, uint8_t *vdst,
+                       int width, int height,
+                       int lumStride, int chromStride, int srcStride,
+                       const int32_t *rgb2yuv);
+void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst,
+                      uint8_t *udst, uint8_t *vdst,
+                      int width, int height,
+                      int lumStride, int chromStride, int srcStride,
+                      const int32_t *rgb2yuv);
+void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst,
+                      uint8_t *udst, uint8_t *vdst,
+                      int width, int height,
+                      int lumStride, int chromStride, int srcStride,
+                      const int32_t *rgb2yuv);
+void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst,
+                      uint8_t *udst, uint8_t *vdst,
+                      int width, int height,
+                      int lumStride, int chromStride, int srcStride,
+                      const int32_t *rgb2yuv);
+void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst,
+                      uint8_t *udst, uint8_t *vdst,
+                      int width, int height,
+                      int lumStride, int chromStride, int srcStride,
+                      const int32_t *rgb2yuv);
 void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
                  int srcStride, int dstStride);
 void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index ed6e08a565..27442db484 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -92,6 +92,9 @@ void   x2rgb10tobgr64_bswap(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
                       uint8_t *vdst, int width, int height, int lumStride,
                       int chromStride, int srcStride, const int32_t *rgb2yuv);
+void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                      uint8_t *vdst, int width, int height, int lumStride,
+                      int chromStride, int srcStride, const int32_t *rgb2yuv);
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
@@ -131,6 +134,26 @@ extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
                               int width, int height,
                               int lumStride, int chromStride, int srcStride,
                               const int32_t *rgb2yuv);
+extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                              int width, int height,
+                              int lumStride, int chromStride, int srcStride,
+                              const int32_t *rgb2yuv);
+extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                             int width, int height,
+                             int lumStride, int chromStride, int srcStride,
+                             const int32_t *rgb2yuv);
+extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                             int width, int height,
+                             int lumStride, int chromStride, int srcStride,
+                             const int32_t *rgb2yuv);
+extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                             int width, int height,
+                             int lumStride, int chromStride, int srcStride,
+                             const int32_t *rgb2yuv);
+extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                             int width, int height,
+                             int lumStride, int chromStride, int srcStride,
+                             const int32_t *rgb2yuv);
 extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
                         int srcStride, int dstStride);
 
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 48959b6b5f..195c11f3ed 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -573,13 +573,20 @@ static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
     dst[2 * srcWidth - 1] = src[srcWidth - 1];
 }
 
+#if 0
+// This is the upstream version of this function. It has centre positioned
+// chroma rather than our simpler top-left and only covers one arrangement
+// of RGB. Left here for reference and to fix some ugly diffing by git.
+// If this version is wanted - lose the entire patchset
+
 /**
  * width should be a multiple of 2.
  * (If this is a problem for anyone then tell me, and I will fix it.)
  */
 void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
                    uint8_t *vdst, int width, int height, int lumStride,
-                   int chromStride, int srcStride, const int32_t *rgb2yuv)
+                   int chromStride, int srcStride, const int32_t *rgb2yuv,
+                   const uint8_t x[9])
 {
     int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
     int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -639,6 +646,237 @@ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
         vdst  += chromStride;
     }
 }
+#endif
+
+/**
+ * Chrominance data is only taken from every second line,
+ * others are ignored. This matches the ARM64 asm.
+ */
+static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, const int32_t *rgb2yuv,
+                   const uint8_t x[9])
+{
+    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
+    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
+    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
+    int y;
+    const int chromWidth = width >> 1;
+
+    for (y = 0; y < height; y += 2) {
+        int i;
+
+        for (i = 0; i < chromWidth; i++) {
+            unsigned int b = src[6 * i + 0];
+            unsigned int g = src[6 * i + 1];
+            unsigned int r = src[6 * i + 2];
+
+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
+            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
+            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
+
+            udst[i]     = U;
+            vdst[i]     = V;
+            ydst[2 * i] = Y;
+
+            b = src[6 * i + 3];
+            g = src[6 * i + 4];
+            r = src[6 * i + 5];
+
+            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+            ydst[2 * i + 1] = Y;
+        }
+        if ((width & 1) != 0) {
+            unsigned int b = src[6 * i + 0];
+            unsigned int g = src[6 * i + 1];
+            unsigned int r = src[6 * i + 2];
+
+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
+            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
+            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
+
+            udst[i]     = U;
+            vdst[i]     = V;
+            ydst[2 * i] = Y;
+        }
+        ydst += lumStride;
+        src  += srcStride;
+
+        if (y+1 == height)
+            break;
+
+        for (i = 0; i < chromWidth; i++) {
+            unsigned int b = src[6 * i + 0];
+            unsigned int g = src[6 * i + 1];
+            unsigned int r = src[6 * i + 2];
+
+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+
+            ydst[2 * i] = Y;
+
+            b = src[6 * i + 3];
+            g = src[6 * i + 4];
+            r = src[6 * i + 5];
+
+            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+            ydst[2 * i + 1] = Y;
+        }
+        if ((width & 1) != 0) {
+            unsigned int b = src[6 * i + 0];
+            unsigned int g = src[6 * i + 1];
+            unsigned int r = src[6 * i + 2];
+
+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+
+            ydst[2 * i] = Y;
+        }
+        udst += chromStride;
+        vdst += chromStride;
+        ydst += lumStride;
+        src  += srcStride;
+    }
+}
+
+static const uint8_t x_rgb[9] = {
+    RY_IDX, GY_IDX, BY_IDX,
+    RU_IDX, GU_IDX, BU_IDX,
+    RV_IDX, GV_IDX, BV_IDX,
+};
+
+static const uint8_t x_bgr[9] = {
+     BY_IDX, GY_IDX, RY_IDX,
+     BU_IDX, GU_IDX, RU_IDX,
+     BV_IDX, GV_IDX, RV_IDX,
+};
+
+void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, const int32_t *rgb2yuv)
+{
+    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
+}
+
+void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, const int32_t *rgb2yuv)
+{
+    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
+}
+
+static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, const int32_t *rgb2yuv,
+                   const uint8_t x[9])
+{
+    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
+    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
+    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
+    int y;
+    const int chromWidth = width >> 1;
+
+    for (y = 0; y < height; y += 2) {
+        int i;
+        for (i = 0; i < chromWidth; i++) {
+            unsigned int b = src[8 * i + 2];
+            unsigned int g = src[8 * i + 1];
+            unsigned int r = src[8 * i + 0];
+
+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
+            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
+            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
+
+            udst[i]     = U;
+            vdst[i]     = V;
+            ydst[2 * i] = Y;
+
+            b = src[8 * i + 6];
+            g = src[8 * i + 5];
+            r = src[8 * i + 4];
+
+            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+            ydst[2 * i + 1] = Y;
+        }
+        if ((width & 1) != 0) {
+            unsigned int b = src[8 * i + 2];
+            unsigned int g = src[8 * i + 1];
+            unsigned int r = src[8 * i + 0];
+
+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
+            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
+            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
+
+            udst[i]     = U;
+            vdst[i]     = V;
+            ydst[2 * i] = Y;
+        }
+        ydst += lumStride;
+        src  += srcStride;
+
+        if (y+1 == height)
+            break;
+
+        for (i = 0; i < chromWidth; i++) {
+            unsigned int b = src[8 * i + 2];
+            unsigned int g = src[8 * i + 1];
+            unsigned int r = src[8 * i + 0];
+
+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+
+            ydst[2 * i] = Y;
+
+            b = src[8 * i + 6];
+            g = src[8 * i + 5];
+            r = src[8 * i + 4];
+
+            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+            ydst[2 * i + 1] = Y;
+        }
+        if ((width & 1) != 0) {
+            unsigned int b = src[8 * i + 2];
+            unsigned int g = src[8 * i + 1];
+            unsigned int r = src[8 * i + 0];
+
+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+
+            ydst[2 * i] = Y;
+        }
+        udst += chromStride;
+        vdst += chromStride;
+        ydst += lumStride;
+        src  += srcStride;
+    }
+}
+
+static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, const int32_t *rgb2yuv)
+{
+    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
+}
+
+static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, const int32_t *rgb2yuv)
+{
+    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
+}
+
+// As the general code does no SIMD-like ops simply adding 1 to the src address
+// will fix the ignored alpha position
+static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, const int32_t *rgb2yuv)
+{
+    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
+}
+
+static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, const int32_t *rgb2yuv)
+{
+    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
+}
+
 
 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
                               uint8_t *dest, int width, int height,
@@ -863,6 +1101,11 @@ static av_cold void rgb2rgb_init_c(void)
     yuv422ptouyvy      = yuv422ptouyvy_c;
     planar2x           = planar2x_c;
     ff_rgb24toyv12     = ff_rgb24toyv12_c;
+    ff_bgr24toyv12     = ff_bgr24toyv12_c;
+    ff_rgbxtoyv12      = ff_rgbxtoyv12_c;
+    ff_bgrxtoyv12      = ff_bgrxtoyv12_c;
+    ff_xrgbtoyv12      = ff_xrgbtoyv12_c;
+    ff_xbgrtoyv12      = ff_xbgrtoyv12_c;
     interleaveBytes    = interleaveBytes_c;
     deinterleaveBytes  = deinterleaveBytes_c;
 
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index f612f88c4d..d71e1776e7 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -2063,9 +2063,94 @@ static int bgr24ToYv12Wrapper(SwsInternal *c, const uint8_t *const src[],
     return srcSliceH;
 }
 
-static int yvu9ToYv12Wrapper(SwsInternal *c, const uint8_t *const src[],
+static int rgb24ToYv12Wrapper(SwsInternal *c, const uint8_t *const src[],
+                             const int srcStride[], int srcSliceY, int srcSliceH,
+                             uint8_t *const dst[], const int dstStride[])
+{
+    ff_bgr24toyv12(
+        src[0],
+        dst[0] +  srcSliceY       * dstStride[0],
+        dst[1] + (srcSliceY >> 1) * dstStride[1],
+        dst[2] + (srcSliceY >> 1) * dstStride[2],
+        c->opts.src_w, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0],
+        c->input_rgb2yuv_table);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->opts.src_w, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
+static int bgrxToYv12Wrapper(SwsInternal *c, const uint8_t *const src[],
                              const int srcStride[], int srcSliceY, int srcSliceH,
                              uint8_t *const dst[], const int dstStride[])
+{
+    ff_bgrxtoyv12(
+        src[0],
+        dst[0] +  srcSliceY       * dstStride[0],
+        dst[1] + (srcSliceY >> 1) * dstStride[1],
+        dst[2] + (srcSliceY >> 1) * dstStride[2],
+        c->opts.src_w, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0],
+        c->input_rgb2yuv_table);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->opts.src_w, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
+static int rgbxToYv12Wrapper(SwsInternal *c, const uint8_t *const src[],
+                             const int srcStride[], int srcSliceY, int srcSliceH,
+                             uint8_t *const dst[], const int dstStride[])
+{
+    ff_rgbxtoyv12(
+        src[0],
+        dst[0] +  srcSliceY       * dstStride[0],
+        dst[1] + (srcSliceY >> 1) * dstStride[1],
+        dst[2] + (srcSliceY >> 1) * dstStride[2],
+        c->opts.src_w, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0],
+        c->input_rgb2yuv_table);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->opts.src_w, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
+static int xbgrToYv12Wrapper(SwsInternal *c, const uint8_t *const src[],
+                             const int srcStride[], int srcSliceY, int srcSliceH,
+                             uint8_t *const dst[], const int dstStride[])
+{
+    ff_xbgrtoyv12(
+        src[0],
+        dst[0] +  srcSliceY       * dstStride[0],
+        dst[1] + (srcSliceY >> 1) * dstStride[1],
+        dst[2] + (srcSliceY >> 1) * dstStride[2],
+        c->opts.src_w, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0],
+        c->input_rgb2yuv_table);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->opts.src_w, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
+static int xrgbToYv12Wrapper(SwsInternal *c, const uint8_t *const src[],
+                             const int srcStride[], int srcSliceY, int srcSliceH,
+                             uint8_t *const dst[], const int dstStride[])
+{
+    ff_xrgbtoyv12(
+        src[0],
+        dst[0] +  srcSliceY       * dstStride[0],
+        dst[1] + (srcSliceY >> 1) * dstStride[1],
+        dst[2] + (srcSliceY >> 1) * dstStride[2],
+        c->opts.src_w, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0],
+        c->input_rgb2yuv_table);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->opts.src_w, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
+static int yvu9ToYv12Wrapper(SwsInternal *c, const uint8_t *const src[],
+                             const int srcStride[], int srcSliceY, int srcSliceH,
+                             uint8_t * const dst[], const int dstStride[])
 {
     ff_copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->opts.src_w,
                  dst[0], dstStride[0]);
@@ -2380,7 +2465,6 @@ void ff_get_unscaled_swscale(SwsInternal *c)
     const enum AVPixelFormat dstFormat = c->opts.dst_format;
     const int flags = c->opts.flags;
     const int dstH = c->opts.dst_h;
-    const int dstW = c->opts.dst_w;
     int needsDither;
 
     needsDither = isAnyRGB(dstFormat) &&
@@ -2438,8 +2522,34 @@ void ff_get_unscaled_swscale(SwsInternal *c)
     /* bgr24toYV12 */
     if (srcFormat == AV_PIX_FMT_BGR24 &&
         (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
-        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
+        !(flags & SWS_ACCURATE_RND))
         c->convert_unscaled = bgr24ToYv12Wrapper;
+    /* rgb24toYV12 */
+    if (srcFormat == AV_PIX_FMT_RGB24 &&
+        (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
+        !(flags & SWS_ACCURATE_RND))
+        c->convert_unscaled = rgb24ToYv12Wrapper;
+
+    /* bgrxtoYV12 */
+    if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) ||
+         (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+        !(flags & SWS_ACCURATE_RND))
+        c->convert_unscaled = bgrxToYv12Wrapper;
+    /* rgbx24toYV12 */
+    if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) ||
+         (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+        !(flags & SWS_ACCURATE_RND))
+        c->convert_unscaled = rgbxToYv12Wrapper;
+    /* xbgrtoYV12 */
+    if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) ||
+         (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+        !(flags & SWS_ACCURATE_RND))
+        c->convert_unscaled = xbgrToYv12Wrapper;
+    /* xrgb24toYV12 */
+    if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) ||
+         (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+        !(flags & SWS_ACCURATE_RND))
+        c->convert_unscaled = xrgbToYv12Wrapper;
 
     /* AYUV/VUYA/UYVA -> AYUV/VUYA/UYVA */
     if (isAYUV(srcFormat) && isAYUV(dstFormat) && findRgbConvFn(c))
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index a9b58f5d1d..d10cef13af 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -97,8 +97,9 @@ AVUTILOBJS                              += crc.o
 AVUTILOBJS                              += fixed_dsp.o
 AVUTILOBJS                              += float_dsp.o
 AVUTILOBJS                              += lls.o
+AVUTILOBJS-$(CONFIG_SAND)               += rpi_sand.o
 
-CHECKASMOBJS-$(CONFIG_AVUTIL)  += $(AVUTILOBJS)
+CHECKASMOBJS-$(CONFIG_AVUTIL)  += $(AVUTILOBJS) $(AVUTILOBJS-yes)
 
 CHECKASMOBJS-$(ARCH_AARCH64)            += aarch64/checkasm.o
 CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL)   += arm/checkasm.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index a4ac8f1483..8477614c26 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -356,6 +356,9 @@ static const struct {
         { "float_dsp", checkasm_check_float_dsp },
         { "lls",       checkasm_check_lls },
         { "av_tx",     checkasm_check_av_tx },
+    #if CONFIG_SAND
+        { "rpi_sand",  checkasm_check_rpi_sand },
+    #endif
 #endif
     { NULL }
     /* NOTE: When adding a new test to this list here, it also needs to be
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 568b40530c..225bf4d3b3 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -133,6 +133,7 @@ void checkasm_check_opusdsp(void);
 void checkasm_check_pixblockdsp(void);
 void checkasm_check_png(void);
 void checkasm_check_qpeldsp(void);
+void checkasm_check_rpi_sand(void);
 void checkasm_check_sbrdsp(void);
 void checkasm_check_rv34dsp(void);
 void checkasm_check_rv40dsp(void);
diff --git a/tests/checkasm/rpi_sand.c b/tests/checkasm/rpi_sand.c
new file mode 100644
index 0000000000..dd9c1350bc
--- /dev/null
+++ b/tests/checkasm/rpi_sand.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2023 John Cox
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavutil/common.h"
+#include "libavutil/rpi_sand_fns.h"
+
+#if ARCH_ARM
+#include "libavutil/arm/cpu.h"
+#include "libavutil/arm/rpi_sand_neon.h"
+#elif ARCH_AARCH64
+#include "libavutil/aarch64/cpu.h"
+#include "libavutil/aarch64/rpi_sand_neon.h"
+#else
+#define have_neon(flags) 0
+#define ff_rpi_sand30_lines_to_planar_y16 NULL
+#define ff_rpi_sand30_lines_to_planar_c16 NULL
+#endif
+
+static inline uint32_t pack30(unsigned int a, unsigned int b, unsigned int c)
+{
+    return (a & 0x3ff) | ((b & 0x3ff) << 10) | ((c & 0x3ff) << 20);
+}
+
+void checkasm_check_rpi_sand(void)
+{
+    const unsigned int w = 1280;
+    const unsigned int h = 66;
+    const unsigned int stride1 = 128;
+    const unsigned int stride2 = h*3/2;
+    const unsigned int ssize = ((w+95)/96)*128*h*3/2;
+    const unsigned int ysize = ((w + 32) * (h + 32) * 2);
+
+    uint8_t * sbuf0 = malloc(ssize);
+    uint8_t * sbuf1 = malloc(ssize);
+    uint8_t * ybuf0 = malloc(ysize);
+    uint8_t * ybuf1 = malloc(ysize);
+    uint8_t * vbuf0 = malloc(ysize);
+    uint8_t * vbuf1 = malloc(ysize);
+    uint8_t * yframe0 = (w + 32) * 16 + ybuf0;
+    uint8_t * yframe1 = (w + 32) * 16 + ybuf1;
+    uint8_t * vframe0 = (w + 32) * 16 + vbuf0;
+    uint8_t * vframe1 = (w + 32) * 16 + vbuf1;
+    unsigned int i;
+
+    for (i = 0; i != ssize; i += 4)
+        *(uint32_t*)(sbuf0 + i) = rnd();
+    memcpy(sbuf1, sbuf0, ssize);
+
+    if (check_func(have_neon(av_get_cpu_flags()) ? ff_rpi_sand30_lines_to_planar_y16 : av_rpi_sand30_to_planar_y16, "rpi_sand30_to_planar_y16")) {
+        declare_func(void, uint8_t * dst, const unsigned int dst_stride,
+                     const uint8_t * src,
+                     unsigned int stride1, unsigned int stride2,
+                     unsigned int _x, unsigned int y,
+                     unsigned int _w, unsigned int h);
+
+        memset(ybuf0, 0xbb, ysize);
+        memset(ybuf1, 0xbb, ysize);
+
+        call_ref(yframe0, (w + 32) * 2, sbuf0, stride1, stride2, 0, 0, w, h);
+        call_new(yframe1, (w + 32) * 2, sbuf1, stride1, stride2, 0, 0, w, h);
+
+        if (memcmp(sbuf0, sbuf1, ssize)
+            || memcmp(ybuf0, ybuf1, ysize))
+            fail();
+
+        bench_new(ybuf1, (w + 32) * 2, sbuf1, stride1, stride2, 0, 0, w, h);
+    }
+
+    if (check_func(have_neon(av_get_cpu_flags()) ? ff_rpi_sand30_lines_to_planar_c16 : av_rpi_sand30_to_planar_c16, "rpi_sand30_to_planar_c16")) {
+        declare_func(void, uint8_t * u_dst, const unsigned int u_stride,
+                     uint8_t * v_dst, const unsigned int v_stride,
+                     const uint8_t * src,
+                     unsigned int stride1, unsigned int stride2,
+                     unsigned int _x, unsigned int y,
+                     unsigned int _w, unsigned int h);
+
+        memset(ybuf0, 0xbb, ysize);
+        memset(ybuf1, 0xbb, ysize);
+        memset(vbuf0, 0xbb, ysize);
+        memset(vbuf1, 0xbb, ysize);
+
+        call_ref(yframe0, (w + 32), vframe0, (w + 32), sbuf0, stride1, stride2, 0, 0, w/2, h/2);
+        call_new(yframe1, (w + 32), vframe1, (w + 32), sbuf1, stride1, stride2, 0, 0, w/2, h/2);
+
+        if (memcmp(sbuf0, sbuf1, ssize)
+            || memcmp(ybuf0, ybuf1, ysize)
+            || memcmp(vbuf0, vbuf1, ysize))
+            fail();
+
+        bench_new(yframe1, (w + 32), vframe1, (w + 32), sbuf1, stride1, stride2, 0, 0, w/2, h/2);
+    }
+
+
+    report("sand30");
+
+    free(sbuf0);
+    free(sbuf1);
+    free(ybuf0);
+    free(ybuf1);
+    free(vbuf0);
+    free(vbuf1);
+}
+
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index b05dc61f67..203b10413e 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -48,6 +48,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                                 \
                 fate-checkasm-pixblockdsp                               \
                 fate-checkasm-png                                       \
                 fate-checkasm-qpeldsp                                   \
+                fate-checkasm-rpi_sand                                  \
                 fate-checkasm-sbrdsp                                    \
                 fate-checkasm-rv34dsp                                   \
                 fate-checkasm-rv40dsp                                   \
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index 07b8632c6f..3e94396d32 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -467,9 +467,9 @@ fate-filter-drawvg-interpreter: $(DRAWVG_SCRIPT_ALL)
 fate-filter-drawvg-interpreter: libavfilter/tests/drawvg$(EXESUF)
 fate-filter-drawvg-interpreter: CMD = run libavfilter/tests/drawvg$(EXESUF) $(DRAWVG_SCRIPT_ALL)
 
-FATE_FILTER_SAMPLES-$(call FILTERDEMDEC, FPS SCALE, MOV, QTRLE) += fate-filter-fps-cfr fate-filter-fps
-fate-filter-fps-cfr: CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -r 30 -fps_mode cfr -pix_fmt yuv420p
-fate-filter-fps:     CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -vf fps=30 -pix_fmt yuv420p
+#FATE_FILTER_SAMPLES-$(call FILTERDEMDEC, FPS SCALE, MOV, QTRLE) += fate-filter-fps-cfr fate-filter-fps
+#fate-filter-fps-cfr: CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -r 30 -fps_mode cfr -pix_fmt yuv420p
+#fate-filter-fps:     CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -vf fps=30 -pix_fmt yuv420p
 
 FATE_FILTER_SAMPLES-$(call FILTERFRAMECRC, TESTSRC2 FSYNC) += fate-filter-fsync-up fate-filter-fsync-down
 fate-filter-fsync-up: tests/data/maps/fsync-up
diff --git a/tests/ref/fate/imgutils b/tests/ref/fate/imgutils
index ccd1522ef2..024d90b7d1 100644
--- a/tests/ref/fate/imgutils
+++ b/tests/ref/fate/imgutils
@@ -306,6 +306,9 @@ gbrp10msbbe     planes: 3, linesizes: 128 128 128   0, plane_sizes:  6144  6144
 gbrp10msble     planes: 3, linesizes: 128 128 128   0, plane_sizes:  6144  6144  6144     0, plane_offsets:  6144  6144     0, total_size: 18432
 gbrp12msbbe     planes: 3, linesizes: 128 128 128   0, plane_sizes:  6144  6144  6144     0, plane_offsets:  6144  6144     0, total_size: 18432
 gbrp12msble     planes: 3, linesizes: 128 128 128   0, plane_sizes:  6144  6144  6144     0, plane_offsets:  6144  6144     0, total_size: 18432
+sand128         planes: 2, linesizes:  64  64   0   0, plane_sizes:  3072  1536     0     0, plane_offsets:  3072     0     0, total_size: 4608
+sand64_10       planes: 2, linesizes: 128 128   0   0, plane_sizes:  6144  3072     0     0, plane_offsets:  6144     0     0, total_size: 9216
+sand64_16       planes: 2, linesizes: 128 128   0   0, plane_sizes:  6144  3072     0     0, plane_offsets:  6144     0     0, total_size: 9216
 
 image_fill_black tests
 yuv420p         total_size:   4608,  black_unknown_crc: 0xd00f6cc6,  black_tv_crc: 0xd00f6cc6,  black_pc_crc: 0x234969af
@@ -559,3 +562,6 @@ gbrp10msbbe     total_size:  18432,  black_unknown_crc: 0x00000000,  black_tv_cr
 gbrp10msble     total_size:  18432,  black_unknown_crc: 0x00000000,  black_tv_crc: 0x00000000,  black_pc_crc: 0x00000000
 gbrp12msbbe     total_size:  18432,  black_unknown_crc: 0x00000000,  black_tv_crc: 0x00000000,  black_pc_crc: 0x00000000
 gbrp12msble     total_size:  18432,  black_unknown_crc: 0x00000000,  black_tv_crc: 0x00000000,  black_pc_crc: 0x00000000
+sand128         total_size:   4608,  black_unknown_crc: 0xd00f6cc6,  black_tv_crc: 0xd00f6cc6,  black_pc_crc: 0x234969af
+sand64_10       total_size:   9216,  black_unknown_crc: 0xee47624d,  black_tv_crc: 0xee47624d,  black_pc_crc: 0x7c6afe45
+sand64_16       total_size:   9216,  black_unknown_crc: 0xfff85b60,  black_tv_crc: 0xfff85b60,  black_pc_crc: 0xc03cff93
diff --git a/tests/ref/fate/sws-pixdesc-query b/tests/ref/fate/sws-pixdesc-query
index a062088d54..cc86427bbb 100644
--- a/tests/ref/fate/sws-pixdesc-query
+++ b/tests/ref/fate/sws-pixdesc-query
@@ -31,6 +31,7 @@ is16BPS:
   rgbaf16le
   rgbf16be
   rgbf16le
+  sand64_16
   xv48be
   xv48le
   y216be
@@ -93,6 +94,7 @@ isNBPS:
   p410le
   p412be
   p412le
+  sand64_10
   v30xbe
   v30xle
   x2bgr10be
@@ -287,6 +289,9 @@ isYUV:
   p412le
   p416be
   p416le
+  sand128
+  sand64_10
+  sand64_16
   uyva
   uyvy422
   uyyvyy411
@@ -939,6 +944,9 @@ Packed:
   rgbf16le
   rgbf32be
   rgbf32le
+  sand128
+  sand64_10
+  sand64_16
   uyva
   uyvy422
   uyyvyy411