// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2018 Maxime Jourdan * Copyright (C) 2015 Amlogic, Inc. All rights reserved. */ #include #include #include "dos_regs.h" #include "hevc_regs.h" #include "codec_vp9.h" #include "vdec_helpers.h" #include "codec_hevc_common.h" /* HEVC reg mapping */ #define VP9_DEC_STATUS_REG HEVC_ASSIST_SCRATCH_0 #define VP9_10B_DECODE_SLICE 5 #define VP9_HEAD_PARSER_DONE 0xf0 #define VP9_RPM_BUFFER HEVC_ASSIST_SCRATCH_1 #define VP9_SHORT_TERM_RPS HEVC_ASSIST_SCRATCH_2 #define VP9_ADAPT_PROB_REG HEVC_ASSIST_SCRATCH_3 #define VP9_MMU_MAP_BUFFER HEVC_ASSIST_SCRATCH_4 #define VP9_PPS_BUFFER HEVC_ASSIST_SCRATCH_5 #define VP9_SAO_UP HEVC_ASSIST_SCRATCH_6 #define VP9_STREAM_SWAP_BUFFER HEVC_ASSIST_SCRATCH_7 #define VP9_STREAM_SWAP_BUFFER2 HEVC_ASSIST_SCRATCH_8 #define VP9_PROB_SWAP_BUFFER HEVC_ASSIST_SCRATCH_9 #define VP9_COUNT_SWAP_BUFFER HEVC_ASSIST_SCRATCH_A #define VP9_SEG_MAP_BUFFER HEVC_ASSIST_SCRATCH_B #define VP9_SCALELUT HEVC_ASSIST_SCRATCH_D #define VP9_WAIT_FLAG HEVC_ASSIST_SCRATCH_E #define LMEM_DUMP_ADR HEVC_ASSIST_SCRATCH_F #define NAL_SEARCH_CTL HEVC_ASSIST_SCRATCH_I #define VP9_DECODE_MODE HEVC_ASSIST_SCRATCH_J #define DECODE_MODE_SINGLE 0 #define DECODE_STOP_POS HEVC_ASSIST_SCRATCH_K #define HEVC_DECODE_COUNT HEVC_ASSIST_SCRATCH_M #define HEVC_DECODE_SIZE HEVC_ASSIST_SCRATCH_N /* VP9 Constants */ #define LCU_SIZE 64 #define MAX_REF_PIC_NUM 24 #define REFS_PER_FRAME 3 #define REF_FRAMES 8 #define MV_MEM_UNIT 0x240 #define ADAPT_PROB_SIZE 0xf80 enum FRAME_TYPE { KEY_FRAME = 0, INTER_FRAME = 1, FRAME_TYPES, }; /* VP9 Workspace layout */ #define MPRED_MV_BUF_SIZE 0x120000 #define IPP_SIZE 0x4000 #define SAO_ABV_SIZE 0x30000 #define SAO_VB_SIZE 0x30000 #define SH_TM_RPS_SIZE 0x800 #define VPS_SIZE 0x800 #define SPS_SIZE 0x800 #define PPS_SIZE 0x2000 #define SAO_UP_SIZE 0x2800 #define SWAP_BUF_SIZE 0x800 #define SWAP_BUF2_SIZE 0x800 #define SCALELUT_SIZE 0x8000 #define DBLK_PARA_SIZE 0x80000 #define DBLK_DATA_SIZE 0x80000 #define SEG_MAP_SIZE 0xd800 #define PROB_SIZE 0x5000 #define COUNT_SIZE 0x3000 #define MMU_VBH_SIZE 0x5000 #define MPRED_ABV_SIZE 0x10000 #define MPRED_MV_SIZE (MPRED_MV_BUF_SIZE * MAX_REF_PIC_NUM) #define RPM_BUF_SIZE 0x100 #define LMEM_SIZE 0x800 #define IPP_OFFSET 0x00 #define SAO_ABV_OFFSET (IPP_OFFSET + IPP_SIZE) #define SAO_VB_OFFSET (SAO_ABV_OFFSET + SAO_ABV_SIZE) #define SH_TM_RPS_OFFSET (SAO_VB_OFFSET + SAO_VB_SIZE) #define VPS_OFFSET (SH_TM_RPS_OFFSET + SH_TM_RPS_SIZE) #define SPS_OFFSET (VPS_OFFSET + VPS_SIZE) #define PPS_OFFSET (SPS_OFFSET + SPS_SIZE) #define SAO_UP_OFFSET (PPS_OFFSET + PPS_SIZE) #define SWAP_BUF_OFFSET (SAO_UP_OFFSET + SAO_UP_SIZE) #define SWAP_BUF2_OFFSET (SWAP_BUF_OFFSET + SWAP_BUF_SIZE) #define SCALELUT_OFFSET (SWAP_BUF2_OFFSET + SWAP_BUF2_SIZE) #define DBLK_PARA_OFFSET (SCALELUT_OFFSET + SCALELUT_SIZE) #define DBLK_DATA_OFFSET (DBLK_PARA_OFFSET + DBLK_PARA_SIZE) #define SEG_MAP_OFFSET (DBLK_DATA_OFFSET + DBLK_DATA_SIZE) #define PROB_OFFSET (SEG_MAP_OFFSET + SEG_MAP_SIZE) #define COUNT_OFFSET (PROB_OFFSET + PROB_SIZE) #define MMU_VBH_OFFSET (COUNT_OFFSET + COUNT_SIZE) #define MPRED_ABV_OFFSET (MMU_VBH_OFFSET + MMU_VBH_SIZE) #define MPRED_MV_OFFSET (MPRED_ABV_OFFSET + MPRED_ABV_SIZE) #define RPM_OFFSET (MPRED_MV_OFFSET + MPRED_MV_SIZE) #define LMEM_OFFSET (RPM_OFFSET + RPM_BUF_SIZE) #define SIZE_WORKSPACE ALIGN(LMEM_OFFSET + LMEM_SIZE, 64 * SZ_1K) #define NONE -1 #define INTRA_FRAME 0 #define LAST_FRAME 1 #define GOLDEN_FRAME 2 #define ALTREF_FRAME 3 #define MAX_REF_FRAMES 4 /* * Defines, declarations, sub-functions for vp9 de-block loop filter Thr/Lvl table update * - struct segmentation is for loop filter only (removed something) * - function "vp9_loop_filter_init" and "vp9_loop_filter_frame_init" will be instantiated in C_Entry * - vp9_loop_filter_init run once before decoding start * - vp9_loop_filter_frame_init run before every frame decoding start * - set video format to VP9 is in vp9_loop_filter_init */ #define MAX_LOOP_FILTER 63 #define MAX_REF_LF_DELTAS 4 #define MAX_MODE_LF_DELTAS 2 #define SEGMENT_DELTADATA 0 #define SEGMENT_ABSDATA 1 #define MAX_SEGMENTS 8 /* VP9 PROB processing defines */ #define VP9_PARTITION_START 0 #define VP9_PARTITION_SIZE_STEP (3 * 4) #define VP9_PARTITION_ONE_SIZE (4 * VP9_PARTITION_SIZE_STEP) #define VP9_PARTITION_KEY_START 0 #define VP9_PARTITION_P_START VP9_PARTITION_ONE_SIZE #define VP9_PARTITION_SIZE (2 * VP9_PARTITION_ONE_SIZE) #define VP9_SKIP_START (VP9_PARTITION_START + VP9_PARTITION_SIZE) #define VP9_SKIP_SIZE 4 /* only use 3*/ #define VP9_TX_MODE_START (VP9_SKIP_START + VP9_SKIP_SIZE) #define VP9_TX_MODE_8_0_OFFSET 0 #define VP9_TX_MODE_8_1_OFFSET 1 #define VP9_TX_MODE_16_0_OFFSET 2 #define VP9_TX_MODE_16_1_OFFSET 4 #define VP9_TX_MODE_32_0_OFFSET 6 #define VP9_TX_MODE_32_1_OFFSET 9 #define VP9_TX_MODE_SIZE 12 #define VP9_COEF_START (VP9_TX_MODE_START + VP9_TX_MODE_SIZE) #define VP9_COEF_BAND_0_OFFSET 0 #define VP9_COEF_BAND_1_OFFSET (VP9_COEF_BAND_0_OFFSET + 3 * 3 + 1) #define VP9_COEF_BAND_2_OFFSET (VP9_COEF_BAND_1_OFFSET + 6 * 3) #define VP9_COEF_BAND_3_OFFSET (VP9_COEF_BAND_2_OFFSET + 6 * 3) #define VP9_COEF_BAND_4_OFFSET (VP9_COEF_BAND_3_OFFSET + 6 * 3) #define VP9_COEF_BAND_5_OFFSET (VP9_COEF_BAND_4_OFFSET + 6 * 3) #define VP9_COEF_SIZE_ONE_SET 100 /* ((3 + 5 * 6) * 3 + 1 padding)*/ #define VP9_COEF_4X4_START (VP9_COEF_START + 0 * VP9_COEF_SIZE_ONE_SET) #define VP9_COEF_8X8_START (VP9_COEF_START + 4 * VP9_COEF_SIZE_ONE_SET) #define VP9_COEF_16X16_START (VP9_COEF_START + 8 * VP9_COEF_SIZE_ONE_SET) #define VP9_COEF_32X32_START (VP9_COEF_START + 12 * VP9_COEF_SIZE_ONE_SET) #define VP9_COEF_SIZE_PLANE (2 * VP9_COEF_SIZE_ONE_SET) #define VP9_COEF_SIZE (4 * 2 * 2 * VP9_COEF_SIZE_ONE_SET) #define VP9_INTER_MODE_START (VP9_COEF_START + VP9_COEF_SIZE) #define VP9_INTER_MODE_SIZE 24 /* only use 21 (# * 7)*/ #define VP9_INTERP_START (VP9_INTER_MODE_START + VP9_INTER_MODE_SIZE) #define VP9_INTERP_SIZE 8 #define VP9_INTRA_INTER_START (VP9_INTERP_START + VP9_INTERP_SIZE) #define VP9_INTRA_INTER_SIZE 4 #define VP9_INTERP_INTRA_INTER_START VP9_INTERP_START #define VP9_INTERP_INTRA_INTER_SIZE (VP9_INTERP_SIZE + VP9_INTRA_INTER_SIZE) #define VP9_COMP_INTER_START \ (VP9_INTERP_INTRA_INTER_START + VP9_INTERP_INTRA_INTER_SIZE) #define VP9_COMP_INTER_SIZE 5 #define VP9_COMP_REF_START (VP9_COMP_INTER_START + VP9_COMP_INTER_SIZE) #define VP9_COMP_REF_SIZE 5 #define VP9_SINGLE_REF_START (VP9_COMP_REF_START + VP9_COMP_REF_SIZE) #define VP9_SINGLE_REF_SIZE 10 #define VP9_REF_MODE_START VP9_COMP_INTER_START #define VP9_REF_MODE_SIZE \ (VP9_COMP_INTER_SIZE + VP9_COMP_REF_SIZE + VP9_SINGLE_REF_SIZE) #define VP9_IF_Y_MODE_START (VP9_REF_MODE_START + VP9_REF_MODE_SIZE) #define VP9_IF_Y_MODE_SIZE 36 #define VP9_IF_UV_MODE_START (VP9_IF_Y_MODE_START + VP9_IF_Y_MODE_SIZE) #define VP9_IF_UV_MODE_SIZE 92 /* only use 90*/ #define VP9_MV_JOINTS_START (VP9_IF_UV_MODE_START + VP9_IF_UV_MODE_SIZE) #define VP9_MV_JOINTS_SIZE 3 #define VP9_MV_SIGN_0_START (VP9_MV_JOINTS_START + VP9_MV_JOINTS_SIZE) #define VP9_MV_SIGN_0_SIZE 1 #define VP9_MV_CLASSES_0_START (VP9_MV_SIGN_0_START + VP9_MV_SIGN_0_SIZE) #define VP9_MV_CLASSES_0_SIZE 10 #define VP9_MV_CLASS0_0_START \ (VP9_MV_CLASSES_0_START + VP9_MV_CLASSES_0_SIZE) #define VP9_MV_CLASS0_0_SIZE 1 #define VP9_MV_BITS_0_START (VP9_MV_CLASS0_0_START + VP9_MV_CLASS0_0_SIZE) #define VP9_MV_BITS_0_SIZE 10 #define VP9_MV_SIGN_1_START (VP9_MV_BITS_0_START + VP9_MV_BITS_0_SIZE) #define VP9_MV_SIGN_1_SIZE 1 #define VP9_MV_CLASSES_1_START \ (VP9_MV_SIGN_1_START + VP9_MV_SIGN_1_SIZE) #define VP9_MV_CLASSES_1_SIZE 10 #define VP9_MV_CLASS0_1_START \ (VP9_MV_CLASSES_1_START + VP9_MV_CLASSES_1_SIZE) #define VP9_MV_CLASS0_1_SIZE 1 #define VP9_MV_BITS_1_START \ (VP9_MV_CLASS0_1_START + VP9_MV_CLASS0_1_SIZE) #define VP9_MV_BITS_1_SIZE 10 #define VP9_MV_CLASS0_FP_0_START \ (VP9_MV_BITS_1_START + VP9_MV_BITS_1_SIZE) #define VP9_MV_CLASS0_FP_0_SIZE 9 #define VP9_MV_CLASS0_FP_1_START \ (VP9_MV_CLASS0_FP_0_START + VP9_MV_CLASS0_FP_0_SIZE) #define VP9_MV_CLASS0_FP_1_SIZE 9 #define VP9_MV_CLASS0_HP_0_START \ (VP9_MV_CLASS0_FP_1_START + VP9_MV_CLASS0_FP_1_SIZE) #define VP9_MV_CLASS0_HP_0_SIZE 2 #define VP9_MV_CLASS0_HP_1_START \ (VP9_MV_CLASS0_HP_0_START + VP9_MV_CLASS0_HP_0_SIZE) #define VP9_MV_CLASS0_HP_1_SIZE 2 #define VP9_MV_START VP9_MV_JOINTS_START #define VP9_MV_SIZE 72 /*only use 69*/ #define VP9_TOTAL_SIZE (VP9_MV_START + VP9_MV_SIZE) /* VP9 COUNT mem processing defines */ #define VP9_COEF_COUNT_START 0 #define VP9_COEF_COUNT_BAND_0_OFFSET 0 #define VP9_COEF_COUNT_BAND_1_OFFSET \ (VP9_COEF_COUNT_BAND_0_OFFSET + 3 * 5) #define VP9_COEF_COUNT_BAND_2_OFFSET \ (VP9_COEF_COUNT_BAND_1_OFFSET + 6 * 5) #define VP9_COEF_COUNT_BAND_3_OFFSET \ (VP9_COEF_COUNT_BAND_2_OFFSET + 6 * 5) #define VP9_COEF_COUNT_BAND_4_OFFSET \ (VP9_COEF_COUNT_BAND_3_OFFSET + 6 * 5) #define VP9_COEF_COUNT_BAND_5_OFFSET \ (VP9_COEF_COUNT_BAND_4_OFFSET + 6 * 5) #define VP9_COEF_COUNT_SIZE_ONE_SET 165 /* ((3 + 5 * 6) * 5 */ #define VP9_COEF_COUNT_4X4_START \ (VP9_COEF_COUNT_START + 0 * VP9_COEF_COUNT_SIZE_ONE_SET) #define VP9_COEF_COUNT_8X8_START \ (VP9_COEF_COUNT_START + 4 * VP9_COEF_COUNT_SIZE_ONE_SET) #define VP9_COEF_COUNT_16X16_START \ (VP9_COEF_COUNT_START + 8 * VP9_COEF_COUNT_SIZE_ONE_SET) #define VP9_COEF_COUNT_32X32_START \ (VP9_COEF_COUNT_START + 12 * VP9_COEF_COUNT_SIZE_ONE_SET) #define VP9_COEF_COUNT_SIZE_PLANE (2 * VP9_COEF_COUNT_SIZE_ONE_SET) #define VP9_COEF_COUNT_SIZE (4 * 2 * 2 * VP9_COEF_COUNT_SIZE_ONE_SET) #define VP9_INTRA_INTER_COUNT_START \ (VP9_COEF_COUNT_START + VP9_COEF_COUNT_SIZE) #define VP9_INTRA_INTER_COUNT_SIZE (4 * 2) #define VP9_COMP_INTER_COUNT_START \ (VP9_INTRA_INTER_COUNT_START + VP9_INTRA_INTER_COUNT_SIZE) #define VP9_COMP_INTER_COUNT_SIZE (5 * 2) #define VP9_COMP_REF_COUNT_START \ (VP9_COMP_INTER_COUNT_START + VP9_COMP_INTER_COUNT_SIZE) #define VP9_COMP_REF_COUNT_SIZE (5 * 2) #define VP9_SINGLE_REF_COUNT_START \ (VP9_COMP_REF_COUNT_START + VP9_COMP_REF_COUNT_SIZE) #define VP9_SINGLE_REF_COUNT_SIZE (10 * 2) #define VP9_TX_MODE_COUNT_START \ (VP9_SINGLE_REF_COUNT_START + VP9_SINGLE_REF_COUNT_SIZE) #define VP9_TX_MODE_COUNT_SIZE (12 * 2) #define VP9_SKIP_COUNT_START \ (VP9_TX_MODE_COUNT_START + VP9_TX_MODE_COUNT_SIZE) #define VP9_SKIP_COUNT_SIZE (3 * 2) #define VP9_MV_SIGN_0_COUNT_START \ (VP9_SKIP_COUNT_START + VP9_SKIP_COUNT_SIZE) #define VP9_MV_SIGN_0_COUNT_SIZE (1 * 2) #define VP9_MV_SIGN_1_COUNT_START \ (VP9_MV_SIGN_0_COUNT_START + VP9_MV_SIGN_0_COUNT_SIZE) #define VP9_MV_SIGN_1_COUNT_SIZE (1 * 2) #define VP9_MV_BITS_0_COUNT_START \ (VP9_MV_SIGN_1_COUNT_START + VP9_MV_SIGN_1_COUNT_SIZE) #define VP9_MV_BITS_0_COUNT_SIZE (10 * 2) #define VP9_MV_BITS_1_COUNT_START \ (VP9_MV_BITS_0_COUNT_START + VP9_MV_BITS_0_COUNT_SIZE) #define VP9_MV_BITS_1_COUNT_SIZE (10 * 2) #define VP9_MV_CLASS0_HP_0_COUNT_START \ (VP9_MV_BITS_1_COUNT_START + VP9_MV_BITS_1_COUNT_SIZE) #define VP9_MV_CLASS0_HP_0_COUNT_SIZE (2 * 2) #define VP9_MV_CLASS0_HP_1_COUNT_START \ (VP9_MV_CLASS0_HP_0_COUNT_START + VP9_MV_CLASS0_HP_0_COUNT_SIZE) #define VP9_MV_CLASS0_HP_1_COUNT_SIZE (2 * 2) /* Start merge_tree */ #define VP9_INTER_MODE_COUNT_START \ (VP9_MV_CLASS0_HP_1_COUNT_START + VP9_MV_CLASS0_HP_1_COUNT_SIZE) #define VP9_INTER_MODE_COUNT_SIZE (7 * 4) #define VP9_IF_Y_MODE_COUNT_START \ (VP9_INTER_MODE_COUNT_START + VP9_INTER_MODE_COUNT_SIZE) #define VP9_IF_Y_MODE_COUNT_SIZE (10 * 4) #define VP9_IF_UV_MODE_COUNT_START \ (VP9_IF_Y_MODE_COUNT_START + VP9_IF_Y_MODE_COUNT_SIZE) #define VP9_IF_UV_MODE_COUNT_SIZE (10 * 10) #define VP9_PARTITION_P_COUNT_START \ (VP9_IF_UV_MODE_COUNT_START + VP9_IF_UV_MODE_COUNT_SIZE) #define VP9_PARTITION_P_COUNT_SIZE (4 * 4 * 4) #define VP9_INTERP_COUNT_START \ (VP9_PARTITION_P_COUNT_START + VP9_PARTITION_P_COUNT_SIZE) #define VP9_INTERP_COUNT_SIZE (4 * 3) #define VP9_MV_JOINTS_COUNT_START \ (VP9_INTERP_COUNT_START + VP9_INTERP_COUNT_SIZE) #define VP9_MV_JOINTS_COUNT_SIZE (1 * 4) #define VP9_MV_CLASSES_0_COUNT_START \ (VP9_MV_JOINTS_COUNT_START + VP9_MV_JOINTS_COUNT_SIZE) #define VP9_MV_CLASSES_0_COUNT_SIZE (1 * 11) #define VP9_MV_CLASS0_0_COUNT_START \ (VP9_MV_CLASSES_0_COUNT_START + VP9_MV_CLASSES_0_COUNT_SIZE) #define VP9_MV_CLASS0_0_COUNT_SIZE (1 * 2) #define VP9_MV_CLASSES_1_COUNT_START \ (VP9_MV_CLASS0_0_COUNT_START + VP9_MV_CLASS0_0_COUNT_SIZE) #define VP9_MV_CLASSES_1_COUNT_SIZE (1 * 11) #define VP9_MV_CLASS0_1_COUNT_START \ (VP9_MV_CLASSES_1_COUNT_START + VP9_MV_CLASSES_1_COUNT_SIZE) #define VP9_MV_CLASS0_1_COUNT_SIZE (1 * 2) #define VP9_MV_CLASS0_FP_0_COUNT_START \ (VP9_MV_CLASS0_1_COUNT_START + VP9_MV_CLASS0_1_COUNT_SIZE) #define VP9_MV_CLASS0_FP_0_COUNT_SIZE (3 * 4) #define VP9_MV_CLASS0_FP_1_COUNT_START \ (VP9_MV_CLASS0_FP_0_COUNT_START + VP9_MV_CLASS0_FP_0_COUNT_SIZE) #define VP9_MV_CLASS0_FP_1_COUNT_SIZE (3 * 4) #define DC_PRED 0 /* Average of above and left pixels */ #define V_PRED 1 /* Vertical */ #define H_PRED 2 /* Horizontal */ #define D45_PRED 3 /* Directional 45 deg = round(arctan(1/1) * 180/pi) */ #define D135_PRED 4 /* Directional 135 deg = 180 - 45 */ #define D117_PRED 5 /* Directional 117 deg = 180 - 63 */ #define D153_PRED 6 /* Directional 153 deg = 180 - 27 */ #define D207_PRED 7 /* Directional 207 deg = 180 + 27 */ #define D63_PRED 8 /* Directional 63 deg = round(arctan(2/1) * 180/pi) */ #define TM_PRED 9 /* True-motion */ /* Use a static inline to avoid possible side effect from num being reused */ static inline int round_power_of_two(int value, int num) { return (value + (1 << (num - 1))) >> num; } #define MODE_MV_COUNT_SAT 20 static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = { 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 }; union rpm_param { struct { u16 data[RPM_BUF_SIZE]; } l; struct { u16 profile; u16 show_existing_frame; u16 frame_to_show_idx; u16 frame_type; /*1 bit*/ u16 show_frame; /*1 bit*/ u16 error_resilient_mode; /*1 bit*/ u16 intra_only; /*1 bit*/ u16 display_size_present; /*1 bit*/ u16 reset_frame_context; u16 refresh_frame_flags; u16 width; u16 height; u16 display_width; u16 display_height; u16 ref_info; u16 same_frame_size; u16 mode_ref_delta_enabled; u16 ref_deltas[4]; u16 mode_deltas[2]; u16 filter_level; u16 sharpness_level; u16 bit_depth; u16 seg_quant_info[8]; u16 seg_enabled; u16 seg_abs_delta; /* bit 15: feature enabled; bit 8, sign; bit[5:0], data */ u16 seg_lf_info[8]; } p; }; enum SEG_LVL_FEATURES { SEG_LVL_ALT_Q = 0, /* Use alternate Quantizer */ SEG_LVL_ALT_LF = 1, /* Use alternate loop filter value */ SEG_LVL_REF_FRAME = 2, /* Optional Segment reference frame */ SEG_LVL_SKIP = 3, /* Optional Segment (0,0) + skip mode */ SEG_LVL_MAX = 4 /* Number of features supported */ }; struct segmentation { u8 enabled; u8 update_map; u8 update_data; u8 abs_delta; u8 temporal_update; s16 feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; unsigned int feature_mask[MAX_SEGMENTS]; }; struct loop_filter_thresh { u8 mblim; u8 lim; u8 hev_thr; }; struct loop_filter_info_n { struct loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; u8 lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; }; struct loopfilter { int filter_level; int sharpness_level; int last_sharpness_level; u8 mode_ref_delta_enabled; u8 mode_ref_delta_update; /*0 = Intra, Last, GF, ARF*/ signed char ref_deltas[MAX_REF_LF_DELTAS]; signed char last_ref_deltas[MAX_REF_LF_DELTAS]; /*0 = ZERO_MV, MV*/ signed char mode_deltas[MAX_MODE_LF_DELTAS]; signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; }; struct vp9_frame { struct list_head list; struct vb2_v4l2_buffer *vbuf; int index; int intra_only; int show; int type; int done; unsigned int width; unsigned int height; }; struct codec_vp9 { /* VP9 context lock */ struct mutex lock; /* Common part with the HEVC decoder */ struct codec_hevc_common common; /* Buffer for the VP9 Workspace */ void *workspace_vaddr; dma_addr_t workspace_paddr; /* Contains many information parsed from the bitstream */ union rpm_param rpm_param; /* Whether we detected the bitstream as 10-bit */ int is_10bit; /* Coded resolution reported by the hardware */ u32 width, height; /* All ref frames used by the HW at a given time */ struct list_head ref_frames_list; u32 frames_num; /* In case of downsampling (decoding with FBC but outputting in NV12M), * we need to allocate additional buffers for FBC. */ void *fbc_buffer_vaddr[MAX_REF_PIC_NUM]; dma_addr_t fbc_buffer_paddr[MAX_REF_PIC_NUM]; int ref_frame_map[REF_FRAMES]; int next_ref_frame_map[REF_FRAMES]; struct vp9_frame *frame_refs[REFS_PER_FRAME]; u32 lcu_total; /* loop filter */ int default_filt_lvl; struct loop_filter_info_n lfi; struct loopfilter lf; struct segmentation seg_4lf; struct vp9_frame *cur_frame; struct vp9_frame *prev_frame; }; static int div_r32(s64 m, int n) { s64 qu = div_s64(m, n); return (int)qu; } static int clip_prob(int p) { return clamp_val(p, 1, 255); } static int segfeature_active(struct segmentation *seg, int segment_id, enum SEG_LVL_FEATURES feature_id) { return seg->enabled && (seg->feature_mask[segment_id] & (1 << feature_id)); } static int get_segdata(struct segmentation *seg, int segment_id, enum SEG_LVL_FEATURES feature_id) { return seg->feature_data[segment_id][feature_id]; } static void vp9_update_sharpness(struct loop_filter_info_n *lfi, int sharpness_lvl) { int lvl; /* For each possible value for the loop filter fill out limits*/ for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { /* Set loop filter parameters that control sharpness.*/ int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); if (sharpness_lvl > 0) { if (block_inside_limit > (9 - sharpness_lvl)) block_inside_limit = (9 - sharpness_lvl); } if (block_inside_limit < 1) block_inside_limit = 1; lfi->lfthr[lvl].lim = (u8)block_inside_limit; lfi->lfthr[lvl].mblim = (u8)(2 * (lvl + 2) + block_inside_limit); } } /* Instantiate this function once when decode is started */ static void vp9_loop_filter_init(struct amvdec_core *core, struct codec_vp9 *vp9) { struct loop_filter_info_n *lfi = &vp9->lfi; struct loopfilter *lf = &vp9->lf; struct segmentation *seg_4lf = &vp9->seg_4lf; int i; memset(lfi, 0, sizeof(struct loop_filter_info_n)); memset(lf, 0, sizeof(struct loopfilter)); memset(seg_4lf, 0, sizeof(struct segmentation)); lf->sharpness_level = 0; vp9_update_sharpness(lfi, lf->sharpness_level); lf->last_sharpness_level = lf->sharpness_level; for (i = 0; i < 32; i++) { unsigned int thr; thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) | (lfi->lfthr[i * 2 + 1].mblim & 0xff); thr = (thr << 16) | ((lfi->lfthr[i * 2].lim & 0x3f) << 8) | (lfi->lfthr[i * 2].mblim & 0xff); amvdec_write_dos(core, HEVC_DBLK_CFG9, thr); } if (core->platform->revision >= VDEC_REVISION_SM1) amvdec_write_dos(core, HEVC_DBLK_CFGB, (0x3 << 14) | /* dw fifo thres r and b */ (0x3 << 12) | /* dw fifo thres r or b */ (0x3 << 10) | /* dw fifo thres not r/b */ BIT(0)); /* VP9 video format */ else if (core->platform->revision >= VDEC_REVISION_G12A) /* VP9 video format */ amvdec_write_dos(core, HEVC_DBLK_CFGB, (0x54 << 8) | BIT(0)); else amvdec_write_dos(core, HEVC_DBLK_CFGB, 0x40400001); } static void vp9_loop_filter_frame_init(struct amvdec_core *core, struct segmentation *seg, struct loop_filter_info_n *lfi, struct loopfilter *lf, int default_filt_lvl) { int i; int seg_id; /* * n_shift is the multiplier for lf_deltas * the multiplier is: * - 1 for when filter_lvl is between 0 and 31 * - 2 when filter_lvl is between 32 and 63 */ const int scale = 1 << (default_filt_lvl >> 5); /* update limits if sharpness has changed */ if (lf->last_sharpness_level != lf->sharpness_level) { vp9_update_sharpness(lfi, lf->sharpness_level); lf->last_sharpness_level = lf->sharpness_level; /* Write to register */ for (i = 0; i < 32; i++) { unsigned int thr; thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) | (lfi->lfthr[i * 2 + 1].mblim & 0xff); thr = (thr << 16) | ((lfi->lfthr[i * 2].lim & 0x3f) << 8) | (lfi->lfthr[i * 2].mblim & 0xff); amvdec_write_dos(core, HEVC_DBLK_CFG9, thr); } } for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { int lvl_seg = default_filt_lvl; if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF); lvl_seg = clamp_t(int, seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data, 0, MAX_LOOP_FILTER); } if (!lf->mode_ref_delta_enabled) { /* * We could get rid of this if we assume that deltas * are set to zero when not in use. * encoder always uses deltas */ memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); } else { int ref, mode; const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; lfi->lvl[seg_id][INTRA_FRAME][0] = clamp_val(intra_lvl, 0, MAX_LOOP_FILTER); for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale + lf->mode_deltas[mode] * scale; lfi->lvl[seg_id][ref][mode] = clamp_val(inter_lvl, 0, MAX_LOOP_FILTER); } } } } for (i = 0; i < 16; i++) { unsigned int level; level = ((lfi->lvl[i >> 1][3][i & 1] & 0x3f) << 24) | ((lfi->lvl[i >> 1][2][i & 1] & 0x3f) << 16) | ((lfi->lvl[i >> 1][1][i & 1] & 0x3f) << 8) | (lfi->lvl[i >> 1][0][i & 1] & 0x3f); if (!default_filt_lvl) level = 0; amvdec_write_dos(core, HEVC_DBLK_CFGA, level); } } static void codec_vp9_flush_output(struct amvdec_session *sess) { struct codec_vp9 *vp9 = sess->priv; struct vp9_frame *tmp, *n; mutex_lock(&vp9->lock); list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) { if (!tmp->done) { if (tmp->show) amvdec_dst_buf_done(sess, tmp->vbuf, V4L2_FIELD_NONE); else v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf); vp9->frames_num--; } list_del(&tmp->list); kfree(tmp); } mutex_unlock(&vp9->lock); } static u32 codec_vp9_num_pending_bufs(struct amvdec_session *sess) { struct codec_vp9 *vp9 = sess->priv; if (!vp9) return 0; return vp9->frames_num; } static int codec_vp9_alloc_workspace(struct amvdec_core *core, struct codec_vp9 *vp9) { /* Allocate some memory for the VP9 decoder's state */ vp9->workspace_vaddr = dma_alloc_coherent(core->dev, SIZE_WORKSPACE, &vp9->workspace_paddr, GFP_KERNEL); if (!vp9->workspace_vaddr) { dev_err(core->dev, "Failed to allocate VP9 Workspace\n"); return -ENOMEM; } return 0; } static void codec_vp9_setup_workspace(struct amvdec_session *sess, struct codec_vp9 *vp9) { struct amvdec_core *core = sess->core; u32 revision = core->platform->revision; dma_addr_t wkaddr = vp9->workspace_paddr; amvdec_write_dos(core, HEVCD_IPP_LINEBUFF_BASE, wkaddr + IPP_OFFSET); amvdec_write_dos(core, VP9_RPM_BUFFER, wkaddr + RPM_OFFSET); amvdec_write_dos(core, VP9_SHORT_TERM_RPS, wkaddr + SH_TM_RPS_OFFSET); amvdec_write_dos(core, VP9_PPS_BUFFER, wkaddr + PPS_OFFSET); amvdec_write_dos(core, VP9_SAO_UP, wkaddr + SAO_UP_OFFSET); amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER, wkaddr + SWAP_BUF_OFFSET); amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER2, wkaddr + SWAP_BUF2_OFFSET); amvdec_write_dos(core, VP9_SCALELUT, wkaddr + SCALELUT_OFFSET); if (core->platform->revision >= VDEC_REVISION_G12A) amvdec_write_dos(core, HEVC_DBLK_CFGE, wkaddr + DBLK_PARA_OFFSET); amvdec_write_dos(core, HEVC_DBLK_CFG4, wkaddr + DBLK_PARA_OFFSET); amvdec_write_dos(core, HEVC_DBLK_CFG5, wkaddr + DBLK_DATA_OFFSET); amvdec_write_dos(core, VP9_SEG_MAP_BUFFER, wkaddr + SEG_MAP_OFFSET); amvdec_write_dos(core, VP9_PROB_SWAP_BUFFER, wkaddr + PROB_OFFSET); amvdec_write_dos(core, VP9_COUNT_SWAP_BUFFER, wkaddr + COUNT_OFFSET); amvdec_write_dos(core, LMEM_DUMP_ADR, wkaddr + LMEM_OFFSET); if (codec_hevc_use_mmu(revision, sess->pixfmt_cap, vp9->is_10bit)) { amvdec_write_dos(core, HEVC_SAO_MMU_VH0_ADDR, wkaddr + MMU_VBH_OFFSET); amvdec_write_dos(core, HEVC_SAO_MMU_VH1_ADDR, wkaddr + MMU_VBH_OFFSET + (MMU_VBH_SIZE / 2)); if (revision >= VDEC_REVISION_G12A) amvdec_write_dos(core, HEVC_ASSIST_MMU_MAP_ADDR, vp9->common.mmu_map_paddr); else amvdec_write_dos(core, VP9_MMU_MAP_BUFFER, vp9->common.mmu_map_paddr); } } static int codec_vp9_start(struct amvdec_session *sess) { struct amvdec_core *core = sess->core; struct codec_vp9 *vp9; u32 val; int i; int ret; vp9 = kzalloc(sizeof(*vp9), GFP_KERNEL); if (!vp9) return -ENOMEM; ret = codec_vp9_alloc_workspace(core, vp9); if (ret) goto free_vp9; codec_vp9_setup_workspace(sess, vp9); amvdec_write_dos_bits(core, HEVC_STREAM_CONTROL, BIT(0)); /* stream_fifo_hole */ if (core->platform->revision >= VDEC_REVISION_G12A) amvdec_write_dos_bits(core, HEVC_STREAM_FIFO_CTL, BIT(29)); val = amvdec_read_dos(core, HEVC_PARSER_INT_CONTROL) & 0x7fffffff; val |= (3 << 29) | BIT(24) | BIT(22) | BIT(7) | BIT(4) | BIT(0); amvdec_write_dos(core, HEVC_PARSER_INT_CONTROL, val); amvdec_write_dos_bits(core, HEVC_SHIFT_STATUS, BIT(0)); amvdec_write_dos(core, HEVC_SHIFT_CONTROL, BIT(10) | BIT(9) | (3 << 6) | BIT(5) | BIT(2) | BIT(1) | BIT(0)); amvdec_write_dos(core, HEVC_CABAC_CONTROL, BIT(0)); amvdec_write_dos(core, HEVC_PARSER_CORE_CONTROL, BIT(0)); amvdec_write_dos(core, HEVC_SHIFT_STARTCODE, 0x00000001); amvdec_write_dos(core, VP9_DEC_STATUS_REG, 0); amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, BIT(16)); for (i = 0; i < ARRAY_SIZE(vdec_hevc_parser_cmd); ++i) amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, vdec_hevc_parser_cmd[i]); amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_0, PARSER_CMD_SKIP_CFG_0); amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_1, PARSER_CMD_SKIP_CFG_1); amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_2, PARSER_CMD_SKIP_CFG_2); amvdec_write_dos(core, HEVC_PARSER_IF_CONTROL, BIT(5) | BIT(2) | BIT(0)); amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(0)); amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(1)); amvdec_write_dos(core, VP9_WAIT_FLAG, 1); /* clear mailbox interrupt */ amvdec_write_dos(core, HEVC_ASSIST_MBOX1_CLR_REG, 1); /* enable mailbox interrupt */ amvdec_write_dos(core, HEVC_ASSIST_MBOX1_MASK, 1); /* disable PSCALE for hardware sharing */ amvdec_write_dos(core, HEVC_PSCALE_CTRL, 0); /* Let the uCode do all the parsing */ amvdec_write_dos(core, NAL_SEARCH_CTL, 0x8); amvdec_write_dos(core, DECODE_STOP_POS, 0); amvdec_write_dos(core, VP9_DECODE_MODE, DECODE_MODE_SINGLE); pr_debug("decode_count: %u; decode_size: %u\n", amvdec_read_dos(core, HEVC_DECODE_COUNT), amvdec_read_dos(core, HEVC_DECODE_SIZE)); vp9_loop_filter_init(core, vp9); INIT_LIST_HEAD(&vp9->ref_frames_list); mutex_init(&vp9->lock); memset(&vp9->ref_frame_map, -1, sizeof(vp9->ref_frame_map)); memset(&vp9->next_ref_frame_map, -1, sizeof(vp9->next_ref_frame_map)); for (i = 0; i < REFS_PER_FRAME; ++i) vp9->frame_refs[i] = NULL; sess->priv = vp9; return 0; free_vp9: kfree(vp9); return ret; } static int codec_vp9_stop(struct amvdec_session *sess) { struct amvdec_core *core = sess->core; struct codec_vp9 *vp9 = sess->priv; mutex_lock(&vp9->lock); if (vp9->workspace_vaddr) dma_free_coherent(core->dev, SIZE_WORKSPACE, vp9->workspace_vaddr, vp9->workspace_paddr); codec_hevc_free_fbc_buffers(sess, &vp9->common); mutex_unlock(&vp9->lock); return 0; } /* * Program LAST & GOLDEN frames into the motion compensation reference cache * controller */ static void codec_vp9_set_mcrcc(struct amvdec_session *sess) { struct amvdec_core *core = sess->core; struct codec_vp9 *vp9 = sess->priv; u32 val; /* Reset mcrcc */ amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0x2); /* Disable on I-frame */ if (vp9->cur_frame->type == KEY_FRAME || vp9->cur_frame->intra_only) { amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0x0); return; } amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, BIT(1)); val = amvdec_read_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR) & 0xffff; val |= (val << 16); amvdec_write_dos(core, HEVCD_MCRCC_CTL2, val); val = amvdec_read_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR) & 0xffff; val |= (val << 16); amvdec_write_dos(core, HEVCD_MCRCC_CTL3, val); /* Enable mcrcc progressive-mode */ amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0xff0); } static void codec_vp9_set_sao(struct amvdec_session *sess, struct vb2_buffer *vb) { struct amvdec_core *core = sess->core; struct codec_vp9 *vp9 = sess->priv; dma_addr_t buf_y_paddr; dma_addr_t buf_u_v_paddr; u32 val; if (codec_hevc_use_downsample(sess->pixfmt_cap, vp9->is_10bit)) buf_y_paddr = vp9->common.fbc_buffer_paddr[vb->index]; else buf_y_paddr = vb2_dma_contig_plane_dma_addr(vb, 0); if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) { val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0200; amvdec_write_dos(core, HEVC_SAO_CTRL5, val); amvdec_write_dos(core, HEVC_CM_BODY_START_ADDR, buf_y_paddr); } if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) { buf_y_paddr = vb2_dma_contig_plane_dma_addr(vb, 0); buf_u_v_paddr = vb2_dma_contig_plane_dma_addr(vb, 1); amvdec_write_dos(core, HEVC_SAO_Y_START_ADDR, buf_y_paddr); amvdec_write_dos(core, HEVC_SAO_C_START_ADDR, buf_u_v_paddr); amvdec_write_dos(core, HEVC_SAO_Y_WPTR, buf_y_paddr); amvdec_write_dos(core, HEVC_SAO_C_WPTR, buf_u_v_paddr); } if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap, vp9->is_10bit)) { amvdec_write_dos(core, HEVC_CM_HEADER_START_ADDR, vp9->common.mmu_header_paddr[vb->index]); /* use HEVC_CM_HEADER_START_ADDR */ amvdec_write_dos_bits(core, HEVC_SAO_CTRL5, BIT(10)); } amvdec_write_dos(core, HEVC_SAO_Y_LENGTH, amvdec_get_output_size(sess)); amvdec_write_dos(core, HEVC_SAO_C_LENGTH, (amvdec_get_output_size(sess) / 2)); if (core->platform->revision >= VDEC_REVISION_G12A) { amvdec_clear_dos_bits(core, HEVC_DBLK_CFGB, BIT(4) | BIT(5) | BIT(8) | BIT(9)); /* enable first, compressed write */ if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(8)); /* enable second, uncompressed write */ if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(9)); /* dblk pipeline mode=1 for performance */ if (sess->width >= 1280) amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(4)); pr_debug("HEVC_DBLK_CFGB: %08X\n", amvdec_read_dos(core, HEVC_DBLK_CFGB)); } val = amvdec_read_dos(core, HEVC_SAO_CTRL1) & ~0x3ff0; val |= 0xff0; /* Set endianness for 2-bytes swaps (nv12) */ if (core->platform->revision < VDEC_REVISION_G12A) { val &= ~0x3; if (!codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) val |= BIT(0); /* disable cm compression */ /* TOFIX: Handle Amlogic Framebuffer compression */ } amvdec_write_dos(core, HEVC_SAO_CTRL1, val); pr_debug("HEVC_SAO_CTRL1: %08X\n", val); /* no downscale for NV12 */ val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0000; amvdec_write_dos(core, HEVC_SAO_CTRL5, val); val = amvdec_read_dos(core, HEVCD_IPP_AXIIF_CONFIG) & ~0x30; val |= 0xf; val &= ~BIT(12); /* NV12 */ amvdec_write_dos(core, HEVCD_IPP_AXIIF_CONFIG, val); } static dma_addr_t codec_vp9_get_frame_mv_paddr(struct codec_vp9 *vp9, struct vp9_frame *frame) { return vp9->workspace_paddr + MPRED_MV_OFFSET + (frame->index * MPRED_MV_BUF_SIZE); } static void codec_vp9_set_mpred_mv(struct amvdec_core *core, struct codec_vp9 *vp9) { int mpred_mv_rd_end_addr; int use_prev_frame_mvs = vp9->prev_frame->width == vp9->cur_frame->width && vp9->prev_frame->height == vp9->cur_frame->height && !vp9->prev_frame->intra_only && vp9->prev_frame->show && vp9->prev_frame->type != KEY_FRAME; amvdec_write_dos(core, HEVC_MPRED_CTRL3, 0x24122412); amvdec_write_dos(core, HEVC_MPRED_ABV_START_ADDR, vp9->workspace_paddr + MPRED_ABV_OFFSET); amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); if (use_prev_frame_mvs) amvdec_write_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); amvdec_write_dos(core, HEVC_MPRED_MV_WR_START_ADDR, codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame)); amvdec_write_dos(core, HEVC_MPRED_MV_WPTR, codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame)); amvdec_write_dos(core, HEVC_MPRED_MV_RD_START_ADDR, codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame)); amvdec_write_dos(core, HEVC_MPRED_MV_RPTR, codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame)); mpred_mv_rd_end_addr = codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame) + (vp9->lcu_total * MV_MEM_UNIT); amvdec_write_dos(core, HEVC_MPRED_MV_RD_END_ADDR, mpred_mv_rd_end_addr); } static void codec_vp9_update_next_ref(struct codec_vp9 *vp9) { union rpm_param *param = &vp9->rpm_param; u32 buf_idx = vp9->cur_frame->index; int ref_index = 0; int refresh_frame_flags; int mask; refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ? 0xff : param->p.refresh_frame_flags; for (mask = refresh_frame_flags; mask; mask >>= 1) { pr_debug("mask=%08X; ref_index=%d\n", mask, ref_index); if (mask & 1) vp9->next_ref_frame_map[ref_index] = buf_idx; else vp9->next_ref_frame_map[ref_index] = vp9->ref_frame_map[ref_index]; ++ref_index; } for (; ref_index < REF_FRAMES; ++ref_index) vp9->next_ref_frame_map[ref_index] = vp9->ref_frame_map[ref_index]; } static void codec_vp9_save_refs(struct codec_vp9 *vp9) { union rpm_param *param = &vp9->rpm_param; int i; for (i = 0; i < REFS_PER_FRAME; ++i) { const int ref = (param->p.ref_info >> (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7; if (vp9->ref_frame_map[ref] < 0) continue; pr_warn("%s: FIXME, would need to save ref %d\n", __func__, vp9->ref_frame_map[ref]); } } static void codec_vp9_update_ref(struct codec_vp9 *vp9) { union rpm_param *param = &vp9->rpm_param; int ref_index = 0; int mask; int refresh_frame_flags; if (!vp9->cur_frame) return; refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ? 0xff : param->p.refresh_frame_flags; for (mask = refresh_frame_flags; mask; mask >>= 1) { vp9->ref_frame_map[ref_index] = vp9->next_ref_frame_map[ref_index]; ++ref_index; } if (param->p.show_existing_frame) return; for (; ref_index < REF_FRAMES; ++ref_index) vp9->ref_frame_map[ref_index] = vp9->next_ref_frame_map[ref_index]; } static struct vp9_frame *codec_vp9_get_frame_by_idx(struct codec_vp9 *vp9, int idx) { struct vp9_frame *frame; list_for_each_entry(frame, &vp9->ref_frames_list, list) { if (frame->index == idx) return frame; } return NULL; } static void codec_vp9_sync_ref(struct codec_vp9 *vp9) { union rpm_param *param = &vp9->rpm_param; int i; for (i = 0; i < REFS_PER_FRAME; ++i) { const int ref = (param->p.ref_info >> (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7; const int idx = vp9->ref_frame_map[ref]; vp9->frame_refs[i] = codec_vp9_get_frame_by_idx(vp9, idx); if (!vp9->frame_refs[i]) pr_warn("%s: couldn't find VP9 ref %d\n", __func__, idx); } } static void codec_vp9_set_refs(struct amvdec_session *sess, struct codec_vp9 *vp9) { struct amvdec_core *core = sess->core; int i; for (i = 0; i < REFS_PER_FRAME; ++i) { struct vp9_frame *frame = vp9->frame_refs[i]; int id_y; int id_u_v; if (!frame) continue; if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) { id_y = frame->index; id_u_v = id_y; } else { id_y = frame->index * 2; id_u_v = id_y + 1; } amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR, (id_u_v << 16) | (id_u_v << 8) | id_y); } } static void codec_vp9_set_mc(struct amvdec_session *sess, struct codec_vp9 *vp9) { struct amvdec_core *core = sess->core; u32 scale = 0; u32 sz; int i; amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, 1); codec_vp9_set_refs(sess, vp9); amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, (16 << 8) | 1); codec_vp9_set_refs(sess, vp9); amvdec_write_dos(core, VP9D_MPP_REFINFO_TBL_ACCCONFIG, BIT(2)); for (i = 0; i < REFS_PER_FRAME; ++i) { if (!vp9->frame_refs[i]) continue; if (vp9->frame_refs[i]->width != vp9->width || vp9->frame_refs[i]->height != vp9->height) scale = 1; sz = amvdec_am21c_body_size(vp9->frame_refs[i]->width, vp9->frame_refs[i]->height); amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, vp9->frame_refs[i]->width); amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, vp9->frame_refs[i]->height); amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, (vp9->frame_refs[i]->width << 14) / vp9->width); amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, (vp9->frame_refs[i]->height << 14) / vp9->height); amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, sz >> 5); } amvdec_write_dos(core, VP9D_MPP_REF_SCALE_ENBL, scale); } static struct vp9_frame *codec_vp9_get_new_frame(struct amvdec_session *sess) { struct codec_vp9 *vp9 = sess->priv; union rpm_param *param = &vp9->rpm_param; struct vb2_v4l2_buffer *vbuf; struct vp9_frame *new_frame; new_frame = kzalloc(sizeof(*new_frame), GFP_KERNEL); if (!new_frame) return NULL; vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx); if (!vbuf) { dev_err(sess->core->dev, "No dst buffer available\n"); kfree(new_frame); return NULL; } while (codec_vp9_get_frame_by_idx(vp9, vbuf->vb2_buf.index)) { struct vb2_v4l2_buffer *old_vbuf = vbuf; vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx); v4l2_m2m_buf_queue(sess->m2m_ctx, old_vbuf); if (!vbuf) { dev_err(sess->core->dev, "No dst buffer available\n"); kfree(new_frame); return NULL; } } new_frame->vbuf = vbuf; new_frame->index = vbuf->vb2_buf.index; new_frame->intra_only = param->p.intra_only; new_frame->show = param->p.show_frame; new_frame->type = param->p.frame_type; new_frame->width = vp9->width; new_frame->height = vp9->height; list_add_tail(&new_frame->list, &vp9->ref_frames_list); vp9->frames_num++; return new_frame; } static void codec_vp9_show_existing_frame(struct codec_vp9 *vp9) { union rpm_param *param = &vp9->rpm_param; if (!param->p.show_existing_frame) return; pr_debug("showing frame %u\n", param->p.frame_to_show_idx); } static void codec_vp9_rm_noshow_frame(struct amvdec_session *sess) { struct codec_vp9 *vp9 = sess->priv; struct vp9_frame *tmp; list_for_each_entry(tmp, &vp9->ref_frames_list, list) { if (tmp->show) continue; pr_debug("rm noshow: %u\n", tmp->index); v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf); list_del(&tmp->list); kfree(tmp); vp9->frames_num--; return; } } static void codec_vp9_process_frame(struct amvdec_session *sess) { struct amvdec_core *core = sess->core; struct codec_vp9 *vp9 = sess->priv; union rpm_param *param = &vp9->rpm_param; int intra_only; if (!param->p.show_frame) codec_vp9_rm_noshow_frame(sess); vp9->cur_frame = codec_vp9_get_new_frame(sess); if (!vp9->cur_frame) return; pr_debug("frame %d: type: %08X; show_exist: %u; show: %u, intra_only: %u\n", vp9->cur_frame->index, param->p.frame_type, param->p.show_existing_frame, param->p.show_frame, param->p.intra_only); if (param->p.frame_type != KEY_FRAME) codec_vp9_sync_ref(vp9); codec_vp9_update_next_ref(vp9); codec_vp9_show_existing_frame(vp9); if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap, vp9->is_10bit)) codec_hevc_fill_mmu_map(sess, &vp9->common, &vp9->cur_frame->vbuf->vb2_buf); intra_only = param->p.show_frame ? 0 : param->p.intra_only; /* clear mpred (for keyframe only) */ if (param->p.frame_type != KEY_FRAME && !intra_only) { codec_vp9_set_mc(sess, vp9); codec_vp9_set_mpred_mv(core, vp9); } else { amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); } amvdec_write_dos(core, HEVC_PARSER_PICTURE_SIZE, (vp9->height << 16) | vp9->width); codec_vp9_set_mcrcc(sess); codec_vp9_set_sao(sess, &vp9->cur_frame->vbuf->vb2_buf); vp9_loop_filter_frame_init(core, &vp9->seg_4lf, &vp9->lfi, &vp9->lf, vp9->default_filt_lvl); /* ask uCode to start decoding */ amvdec_write_dos(core, VP9_DEC_STATUS_REG, VP9_10B_DECODE_SLICE); } static void codec_vp9_process_lf(struct codec_vp9 *vp9) { union rpm_param *param = &vp9->rpm_param; int i; vp9->lf.mode_ref_delta_enabled = param->p.mode_ref_delta_enabled; vp9->lf.sharpness_level = param->p.sharpness_level; vp9->default_filt_lvl = param->p.filter_level; vp9->seg_4lf.enabled = param->p.seg_enabled; vp9->seg_4lf.abs_delta = param->p.seg_abs_delta; for (i = 0; i < 4; i++) vp9->lf.ref_deltas[i] = param->p.ref_deltas[i]; for (i = 0; i < 2; i++) vp9->lf.mode_deltas[i] = param->p.mode_deltas[i]; for (i = 0; i < MAX_SEGMENTS; i++) vp9->seg_4lf.feature_mask[i] = (param->p.seg_lf_info[i] & 0x8000) ? (1 << SEG_LVL_ALT_LF) : 0; for (i = 0; i < MAX_SEGMENTS; i++) vp9->seg_4lf.feature_data[i][SEG_LVL_ALT_LF] = (param->p.seg_lf_info[i] & 0x100) ? -(param->p.seg_lf_info[i] & 0x3f) : (param->p.seg_lf_info[i] & 0x3f); } static void codec_vp9_resume(struct amvdec_session *sess) { struct codec_vp9 *vp9 = sess->priv; mutex_lock(&vp9->lock); if (codec_hevc_setup_buffers(sess, &vp9->common, vp9->is_10bit)) { mutex_unlock(&vp9->lock); amvdec_abort(sess); return; } codec_vp9_setup_workspace(sess, vp9); codec_hevc_setup_decode_head(sess, vp9->is_10bit); codec_vp9_process_lf(vp9); codec_vp9_process_frame(sess); mutex_unlock(&vp9->lock); } /* * The RPM section within the workspace contains * many information regarding the parsed bitstream */ static void codec_vp9_fetch_rpm(struct amvdec_session *sess) { struct codec_vp9 *vp9 = sess->priv; u16 *rpm_vaddr = vp9->workspace_vaddr + RPM_OFFSET; int i, j; for (i = 0; i < RPM_BUF_SIZE; i += 4) for (j = 0; j < 4; j++) vp9->rpm_param.l.data[i + j] = rpm_vaddr[i + 3 - j]; } static int codec_vp9_process_rpm(struct codec_vp9 *vp9) { union rpm_param *param = &vp9->rpm_param; int src_changed = 0; int is_10bit = 0; int pic_width_64 = ALIGN(param->p.width, 64); int pic_height_32 = ALIGN(param->p.height, 32); int pic_width_lcu = (pic_width_64 % LCU_SIZE) ? pic_width_64 / LCU_SIZE + 1 : pic_width_64 / LCU_SIZE; int pic_height_lcu = (pic_height_32 % LCU_SIZE) ? pic_height_32 / LCU_SIZE + 1 : pic_height_32 / LCU_SIZE; vp9->lcu_total = pic_width_lcu * pic_height_lcu; if (param->p.bit_depth == 10) is_10bit = 1; if (vp9->width != param->p.width || vp9->height != param->p.height || vp9->is_10bit != is_10bit) src_changed = 1; vp9->width = param->p.width; vp9->height = param->p.height; vp9->is_10bit = is_10bit; pr_debug("width: %u; height: %u; is_10bit: %d; src_changed: %d\n", vp9->width, vp9->height, is_10bit, src_changed); return src_changed; } static bool codec_vp9_is_ref(struct codec_vp9 *vp9, struct vp9_frame *frame) { int i; for (i = 0; i < REF_FRAMES; ++i) if (vp9->ref_frame_map[i] == frame->index) return true; return false; } static void codec_vp9_show_frame(struct amvdec_session *sess) { struct codec_vp9 *vp9 = sess->priv; struct vp9_frame *tmp, *n; list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) { if (!tmp->show || tmp == vp9->cur_frame) continue; if (!tmp->done) { pr_debug("Doning %u\n", tmp->index); amvdec_dst_buf_done(sess, tmp->vbuf, V4L2_FIELD_NONE); tmp->done = 1; vp9->frames_num--; } if (codec_vp9_is_ref(vp9, tmp) || tmp == vp9->prev_frame) continue; pr_debug("deleting %d\n", tmp->index); list_del(&tmp->list); kfree(tmp); } } static void vp9_tree_merge_probs(unsigned int *prev_prob, unsigned int *cur_prob, int coef_node_start, int tree_left, int tree_right, int tree_i, int node) { int prob_32, prob_res, prob_shift; int pre_prob, new_prob; int den, m_count, get_prob, factor; prob_32 = prev_prob[coef_node_start / 4 * 2]; prob_res = coef_node_start & 3; prob_shift = prob_res * 8; pre_prob = (prob_32 >> prob_shift) & 0xff; den = tree_left + tree_right; if (den == 0) { new_prob = pre_prob; } else { m_count = min(den, MODE_MV_COUNT_SAT); get_prob = clip_prob(div_r32(((int64_t)tree_left * 256 + (den >> 1)), den)); /* weighted_prob */ factor = count_to_update_factor[m_count]; new_prob = round_power_of_two(pre_prob * (256 - factor) + get_prob * factor, 8); } cur_prob[coef_node_start / 4 * 2] = (cur_prob[coef_node_start / 4 * 2] & (~(0xff << prob_shift))) | (new_prob << prob_shift); } static void adapt_coef_probs_cxt(unsigned int *prev_prob, unsigned int *cur_prob, unsigned int *count, int update_factor, int cxt_num, int coef_cxt_start, int coef_count_cxt_start) { int prob_32, prob_res, prob_shift; int pre_prob, new_prob; int num, den, m_count, get_prob, factor; int node, coef_node_start; int count_sat = 24; int cxt; for (cxt = 0; cxt < cxt_num; cxt++) { const int n0 = count[coef_count_cxt_start]; const int n1 = count[coef_count_cxt_start + 1]; const int n2 = count[coef_count_cxt_start + 2]; const int neob = count[coef_count_cxt_start + 3]; const int nneob = count[coef_count_cxt_start + 4]; const unsigned int branch_ct[3][2] = { { neob, nneob }, { n0, n1 + n2 }, { n1, n2 } }; coef_node_start = coef_cxt_start; for (node = 0 ; node < 3 ; node++) { prob_32 = prev_prob[coef_node_start / 4 * 2]; prob_res = coef_node_start & 3; prob_shift = prob_res * 8; pre_prob = (prob_32 >> prob_shift) & 0xff; /* get binary prob */ num = branch_ct[node][0]; den = branch_ct[node][0] + branch_ct[node][1]; m_count = min(den, count_sat); get_prob = (den == 0) ? 128u : clip_prob(div_r32(((int64_t)num * 256 + (den >> 1)), den)); factor = update_factor * m_count / count_sat; new_prob = round_power_of_two(pre_prob * (256 - factor) + get_prob * factor, 8); cur_prob[coef_node_start / 4 * 2] = (cur_prob[coef_node_start / 4 * 2] & (~(0xff << prob_shift))) | (new_prob << prob_shift); coef_node_start += 1; } coef_cxt_start = coef_cxt_start + 3; coef_count_cxt_start = coef_count_cxt_start + 5; } } static void adapt_coef_probs(int prev_kf, int cur_kf, int pre_fc, unsigned int *prev_prob, unsigned int *cur_prob, unsigned int *count) { int tx_size, coef_tx_size_start, coef_count_tx_size_start; int plane, coef_plane_start, coef_count_plane_start; int type, coef_type_start, coef_count_type_start; int band, coef_band_start, coef_count_band_start; int cxt_num; int coef_cxt_start, coef_count_cxt_start; int node, coef_node_start, coef_count_node_start; int tree_i, tree_left, tree_right; int mvd_i; int update_factor = cur_kf ? 112 : (prev_kf ? 128 : 112); int prob_32; int prob_res; int prob_shift; int pre_prob; int den; int get_prob; int m_count; int factor; int new_prob; for (tx_size = 0 ; tx_size < 4 ; tx_size++) { coef_tx_size_start = VP9_COEF_START + tx_size * 4 * VP9_COEF_SIZE_ONE_SET; coef_count_tx_size_start = VP9_COEF_COUNT_START + tx_size * 4 * VP9_COEF_COUNT_SIZE_ONE_SET; coef_plane_start = coef_tx_size_start; coef_count_plane_start = coef_count_tx_size_start; for (plane = 0 ; plane < 2 ; plane++) { coef_type_start = coef_plane_start; coef_count_type_start = coef_count_plane_start; for (type = 0 ; type < 2 ; type++) { coef_band_start = coef_type_start; coef_count_band_start = coef_count_type_start; for (band = 0 ; band < 6 ; band++) { if (band == 0) cxt_num = 3; else cxt_num = 6; coef_cxt_start = coef_band_start; coef_count_cxt_start = coef_count_band_start; adapt_coef_probs_cxt(prev_prob, cur_prob, count, update_factor, cxt_num, coef_cxt_start, coef_count_cxt_start); if (band == 0) { coef_band_start += 10; coef_count_band_start += 15; } else { coef_band_start += 18; coef_count_band_start += 30; } } coef_type_start += VP9_COEF_SIZE_ONE_SET; coef_count_type_start += VP9_COEF_COUNT_SIZE_ONE_SET; } coef_plane_start += 2 * VP9_COEF_SIZE_ONE_SET; coef_count_plane_start += 2 * VP9_COEF_COUNT_SIZE_ONE_SET; } } if (cur_kf == 0) { /* mode_mv_merge_probs - merge_intra_inter_prob */ for (coef_count_node_start = VP9_INTRA_INTER_COUNT_START; coef_count_node_start < (VP9_MV_CLASS0_HP_1_COUNT_START + VP9_MV_CLASS0_HP_1_COUNT_SIZE); coef_count_node_start += 2) { if (coef_count_node_start == VP9_INTRA_INTER_COUNT_START) coef_node_start = VP9_INTRA_INTER_START; else if (coef_count_node_start == VP9_COMP_INTER_COUNT_START) coef_node_start = VP9_COMP_INTER_START; else if (coef_count_node_start == VP9_TX_MODE_COUNT_START) coef_node_start = VP9_TX_MODE_START; else if (coef_count_node_start == VP9_SKIP_COUNT_START) coef_node_start = VP9_SKIP_START; else if (coef_count_node_start == VP9_MV_SIGN_0_COUNT_START) coef_node_start = VP9_MV_SIGN_0_START; else if (coef_count_node_start == VP9_MV_SIGN_1_COUNT_START) coef_node_start = VP9_MV_SIGN_1_START; else if (coef_count_node_start == VP9_MV_BITS_0_COUNT_START) coef_node_start = VP9_MV_BITS_0_START; else if (coef_count_node_start == VP9_MV_BITS_1_COUNT_START) coef_node_start = VP9_MV_BITS_1_START; else /* node_start == VP9_MV_CLASS0_HP_0_COUNT_START */ coef_node_start = VP9_MV_CLASS0_HP_0_START; den = count[coef_count_node_start] + count[coef_count_node_start + 1]; prob_32 = prev_prob[coef_node_start / 4 * 2]; prob_res = coef_node_start & 3; prob_shift = prob_res * 8; pre_prob = (prob_32 >> prob_shift) & 0xff; if (den == 0) { new_prob = pre_prob; } else { m_count = min(den, MODE_MV_COUNT_SAT); get_prob = clip_prob(div_r32(((int64_t) count[coef_count_node_start] * 256 + (den >> 1)), den)); /* weighted prob */ factor = count_to_update_factor[m_count]; new_prob = round_power_of_two(pre_prob * (256 - factor) + get_prob * factor, 8); } cur_prob[coef_node_start / 4 * 2] = (cur_prob[coef_node_start / 4 * 2] & (~(0xff << prob_shift))) | (new_prob << prob_shift); coef_node_start = coef_node_start + 1; } coef_node_start = VP9_INTER_MODE_START; coef_count_node_start = VP9_INTER_MODE_COUNT_START; for (tree_i = 0 ; tree_i < 7 ; tree_i++) { for (node = 0 ; node < 3 ; node++) { unsigned int start = coef_count_node_start; switch (node) { case 2: tree_left = count[start + 1]; tree_right = count[start + 3]; break; case 1: tree_left = count[start + 0]; tree_right = count[start + 1] + count[start + 3]; break; default: tree_left = count[start + 2]; tree_right = count[start + 0] + count[start + 1] + count[start + 3]; break; } vp9_tree_merge_probs(prev_prob, cur_prob, coef_node_start, tree_left, tree_right, tree_i, node); coef_node_start = coef_node_start + 1; } coef_count_node_start = coef_count_node_start + 4; } coef_node_start = VP9_IF_Y_MODE_START; coef_count_node_start = VP9_IF_Y_MODE_COUNT_START; for (tree_i = 0 ; tree_i < 14 ; tree_i++) { for (node = 0 ; node < 9 ; node++) { unsigned int start = coef_count_node_start; switch (node) { case 8: tree_left = count[start + D153_PRED]; tree_right = count[start + D207_PRED]; break; case 7: tree_left = count[start + D63_PRED]; tree_right = count[start + D207_PRED] + count[start + D153_PRED]; break; case 6: tree_left = count[start + D45_PRED]; tree_right = count[start + D207_PRED] + count[start + D153_PRED] + count[start + D63_PRED]; break; case 5: tree_left = count[start + D135_PRED]; tree_right = count[start + D117_PRED]; break; case 4: tree_left = count[start + H_PRED]; tree_right = count[start + D117_PRED] + count[start + D135_PRED]; break; case 3: tree_left = count[start + H_PRED] + count[start + D117_PRED] + count[start + D135_PRED]; tree_right = count[start + D45_PRED] + count[start + D207_PRED] + count[start + D153_PRED] + count[start + D63_PRED]; break; case 2: tree_left = count[start + V_PRED]; tree_right = count[start + H_PRED] + count[start + D117_PRED] + count[start + D135_PRED] + count[start + D45_PRED] + count[start + D207_PRED] + count[start + D153_PRED] + count[start + D63_PRED]; break; case 1: tree_left = count[start + TM_PRED]; tree_right = count[start + V_PRED] + count[start + H_PRED] + count[start + D117_PRED] + count[start + D135_PRED] + count[start + D45_PRED] + count[start + D207_PRED] + count[start + D153_PRED] + count[start + D63_PRED]; break; default: tree_left = count[start + DC_PRED]; tree_right = count[start + TM_PRED] + count[start + V_PRED] + count[start + H_PRED] + count[start + D117_PRED] + count[start + D135_PRED] + count[start + D45_PRED] + count[start + D207_PRED] + count[start + D153_PRED] + count[start + D63_PRED]; break; } vp9_tree_merge_probs(prev_prob, cur_prob, coef_node_start, tree_left, tree_right, tree_i, node); coef_node_start = coef_node_start + 1; } coef_count_node_start = coef_count_node_start + 10; } coef_node_start = VP9_PARTITION_P_START; coef_count_node_start = VP9_PARTITION_P_COUNT_START; for (tree_i = 0 ; tree_i < 16 ; tree_i++) { for (node = 0 ; node < 3 ; node++) { unsigned int start = coef_count_node_start; switch (node) { case 2: tree_left = count[start + 2]; tree_right = count[start + 3]; break; case 1: tree_left = count[start + 1]; tree_right = count[start + 2] + count[start + 3]; break; default: tree_left = count[start + 0]; tree_right = count[start + 1] + count[start + 2] + count[start + 3]; break; } vp9_tree_merge_probs(prev_prob, cur_prob, coef_node_start, tree_left, tree_right, tree_i, node); coef_node_start = coef_node_start + 1; } coef_count_node_start = coef_count_node_start + 4; } coef_node_start = VP9_INTERP_START; coef_count_node_start = VP9_INTERP_COUNT_START; for (tree_i = 0 ; tree_i < 4 ; tree_i++) { for (node = 0 ; node < 2 ; node++) { unsigned int start = coef_count_node_start; switch (node) { case 1: tree_left = count[start + 1]; tree_right = count[start + 2]; break; default: tree_left = count[start + 0]; tree_right = count[start + 1] + count[start + 2]; break; } vp9_tree_merge_probs(prev_prob, cur_prob, coef_node_start, tree_left, tree_right, tree_i, node); coef_node_start = coef_node_start + 1; } coef_count_node_start = coef_count_node_start + 3; } coef_node_start = VP9_MV_JOINTS_START; coef_count_node_start = VP9_MV_JOINTS_COUNT_START; for (tree_i = 0 ; tree_i < 1 ; tree_i++) { for (node = 0 ; node < 3 ; node++) { unsigned int start = coef_count_node_start; switch (node) { case 2: tree_left = count[start + 2]; tree_right = count[start + 3]; break; case 1: tree_left = count[start + 1]; tree_right = count[start + 2] + count[start + 3]; break; default: tree_left = count[start + 0]; tree_right = count[start + 1] + count[start + 2] + count[start + 3]; break; } vp9_tree_merge_probs(prev_prob, cur_prob, coef_node_start, tree_left, tree_right, tree_i, node); coef_node_start = coef_node_start + 1; } coef_count_node_start = coef_count_node_start + 4; } for (mvd_i = 0 ; mvd_i < 2 ; mvd_i++) { coef_node_start = mvd_i ? VP9_MV_CLASSES_1_START : VP9_MV_CLASSES_0_START; coef_count_node_start = mvd_i ? VP9_MV_CLASSES_1_COUNT_START : VP9_MV_CLASSES_0_COUNT_START; tree_i = 0; for (node = 0; node < 10; node++) { unsigned int start = coef_count_node_start; switch (node) { case 9: tree_left = count[start + 9]; tree_right = count[start + 10]; break; case 8: tree_left = count[start + 7]; tree_right = count[start + 8]; break; case 7: tree_left = count[start + 7] + count[start + 8]; tree_right = count[start + 9] + count[start + 10]; break; case 6: tree_left = count[start + 6]; tree_right = count[start + 7] + count[start + 8] + count[start + 9] + count[start + 10]; break; case 5: tree_left = count[start + 4]; tree_right = count[start + 5]; break; case 4: tree_left = count[start + 4] + count[start + 5]; tree_right = count[start + 6] + count[start + 7] + count[start + 8] + count[start + 9] + count[start + 10]; break; case 3: tree_left = count[start + 2]; tree_right = count[start + 3]; break; case 2: tree_left = count[start + 2] + count[start + 3]; tree_right = count[start + 4] + count[start + 5] + count[start + 6] + count[start + 7] + count[start + 8] + count[start + 9] + count[start + 10]; break; case 1: tree_left = count[start + 1]; tree_right = count[start + 2] + count[start + 3] + count[start + 4] + count[start + 5] + count[start + 6] + count[start + 7] + count[start + 8] + count[start + 9] + count[start + 10]; break; default: tree_left = count[start + 0]; tree_right = count[start + 1] + count[start + 2] + count[start + 3] + count[start + 4] + count[start + 5] + count[start + 6] + count[start + 7] + count[start + 8] + count[start + 9] + count[start + 10]; break; } vp9_tree_merge_probs(prev_prob, cur_prob, coef_node_start, tree_left, tree_right, tree_i, node); coef_node_start = coef_node_start + 1; } coef_node_start = mvd_i ? VP9_MV_CLASS0_1_START : VP9_MV_CLASS0_0_START; coef_count_node_start = mvd_i ? VP9_MV_CLASS0_1_COUNT_START : VP9_MV_CLASS0_0_COUNT_START; tree_i = 0; node = 0; tree_left = count[coef_count_node_start + 0]; tree_right = count[coef_count_node_start + 1]; vp9_tree_merge_probs(prev_prob, cur_prob, coef_node_start, tree_left, tree_right, tree_i, node); coef_node_start = mvd_i ? VP9_MV_CLASS0_FP_1_START : VP9_MV_CLASS0_FP_0_START; coef_count_node_start = mvd_i ? VP9_MV_CLASS0_FP_1_COUNT_START : VP9_MV_CLASS0_FP_0_COUNT_START; for (tree_i = 0; tree_i < 3; tree_i++) { for (node = 0; node < 3; node++) { unsigned int start = coef_count_node_start; switch (node) { case 2: tree_left = count[start + 2]; tree_right = count[start + 3]; break; case 1: tree_left = count[start + 1]; tree_right = count[start + 2] + count[start + 3]; break; default: tree_left = count[start + 0]; tree_right = count[start + 1] + count[start + 2] + count[start + 3]; break; } vp9_tree_merge_probs(prev_prob, cur_prob, coef_node_start, tree_left, tree_right, tree_i, node); coef_node_start = coef_node_start + 1; } coef_count_node_start = coef_count_node_start + 4; } } } } static irqreturn_t codec_vp9_threaded_isr(struct amvdec_session *sess) { struct amvdec_core *core = sess->core; struct codec_vp9 *vp9 = sess->priv; u32 dec_status = amvdec_read_dos(core, VP9_DEC_STATUS_REG); u32 prob_status = amvdec_read_dos(core, VP9_ADAPT_PROB_REG); int i; if (!vp9) return IRQ_HANDLED; mutex_lock(&vp9->lock); if (dec_status != VP9_HEAD_PARSER_DONE) { dev_err(core->dev_dec, "Unrecognized dec_status: %08X\n", dec_status); amvdec_abort(sess); goto unlock; } pr_debug("ISR: %08X;%08X\n", dec_status, prob_status); sess->keyframe_found = 1; if ((prob_status & 0xff) == 0xfd && vp9->cur_frame) { /* VP9_REQ_ADAPT_PROB */ u8 *prev_prob_b = ((u8 *)vp9->workspace_vaddr + PROB_OFFSET) + ((prob_status >> 8) * 0x1000); u8 *cur_prob_b = ((u8 *)vp9->workspace_vaddr + PROB_OFFSET) + 0x4000; u8 *count_b = (u8 *)vp9->workspace_vaddr + COUNT_OFFSET; int last_frame_type = vp9->prev_frame ? vp9->prev_frame->type : KEY_FRAME; adapt_coef_probs(last_frame_type == KEY_FRAME, vp9->cur_frame->type == KEY_FRAME ? 1 : 0, prob_status >> 8, (unsigned int *)prev_prob_b, (unsigned int *)cur_prob_b, (unsigned int *)count_b); memcpy(prev_prob_b, cur_prob_b, ADAPT_PROB_SIZE); amvdec_write_dos(core, VP9_ADAPT_PROB_REG, 0); } /* Invalidate first 3 refs */ for (i = 0; i < REFS_PER_FRAME ; ++i) vp9->frame_refs[i] = NULL; vp9->prev_frame = vp9->cur_frame; codec_vp9_update_ref(vp9); codec_vp9_fetch_rpm(sess); if (codec_vp9_process_rpm(vp9)) { amvdec_src_change(sess, vp9->width, vp9->height, 16); /* No frame is actually processed */ vp9->cur_frame = NULL; /* Show the remaining frame */ codec_vp9_show_frame(sess); /* FIXME: Save refs for resized frame */ if (vp9->frames_num) codec_vp9_save_refs(vp9); goto unlock; } codec_vp9_process_lf(vp9); codec_vp9_process_frame(sess); codec_vp9_show_frame(sess); unlock: mutex_unlock(&vp9->lock); return IRQ_HANDLED; } static irqreturn_t codec_vp9_isr(struct amvdec_session *sess) { return IRQ_WAKE_THREAD; } struct amvdec_codec_ops codec_vp9_ops = { .start = codec_vp9_start, .stop = codec_vp9_stop, .isr = codec_vp9_isr, .threaded_isr = codec_vp9_threaded_isr, .num_pending_bufs = codec_vp9_num_pending_bufs, .drain = codec_vp9_flush_output, .resume = codec_vp9_resume, };