1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Hantro VP9 codec driver 4 * 5 * Copyright (C) 2021 Collabora Ltd. 6 */ 7 8 #include <linux/types.h> 9 #include <media/v4l2-mem2mem.h> 10 11 #include "hantro.h" 12 #include "hantro_hw.h" 13 #include "hantro_vp9.h" 14 15 #define POW2(x) (1 << (x)) 16 17 #define MAX_LOG2_TILE_COLUMNS 6 18 #define MAX_NUM_TILE_COLS POW2(MAX_LOG2_TILE_COLUMNS) 19 #define MAX_TILE_COLS 20 20 #define MAX_TILE_ROWS 22 21 22 static size_t hantro_vp9_tile_filter_size(unsigned int height) 23 { 24 u32 h, height32, size; 25 26 h = roundup(height, 8); 27 28 height32 = roundup(h, 64); 29 size = 24 * height32 * (MAX_NUM_TILE_COLS - 1); /* luma: 8, chroma: 8 + 8 */ 30 31 return size; 32 } 33 34 static size_t hantro_vp9_bsd_control_size(unsigned int height) 35 { 36 u32 h, height32; 37 38 h = roundup(height, 8); 39 height32 = roundup(h, 64); 40 41 return 16 * (height32 / 4) * (MAX_NUM_TILE_COLS - 1); 42 } 43 44 static size_t hantro_vp9_segment_map_size(unsigned int width, unsigned int height) 45 { 46 u32 w, h; 47 int num_ctbs; 48 49 w = roundup(width, 8); 50 h = roundup(height, 8); 51 num_ctbs = ((w + 63) / 64) * ((h + 63) / 64); 52 53 return num_ctbs * 32; 54 } 55 56 static inline size_t hantro_vp9_prob_tab_size(void) 57 { 58 return roundup(sizeof(struct hantro_g2_all_probs), 16); 59 } 60 61 static inline size_t hantro_vp9_count_tab_size(void) 62 { 63 return roundup(sizeof(struct symbol_counts), 16); 64 } 65 66 static inline size_t hantro_vp9_tile_info_size(void) 67 { 68 return roundup((MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 15 + 16) & ~0xf, 16); 69 } 70 71 static void *get_coeffs_arr(struct symbol_counts *cnts, int i, int j, int k, int l, int m) 72 { 73 if (i == 0) 74 return &cnts->count_coeffs[j][k][l][m]; 75 76 if (i == 1) 77 return &cnts->count_coeffs8x8[j][k][l][m]; 78 79 if (i == 2) 80 return &cnts->count_coeffs16x16[j][k][l][m]; 81 82 if (i == 3) 83 return &cnts->count_coeffs32x32[j][k][l][m]; 84 85 return NULL; 86 } 87 88 static void *get_eobs1(struct symbol_counts *cnts, int i, int j, int k, int l, int m) 89 { 90 if (i == 0) 91 return &cnts->count_coeffs[j][k][l][m][3]; 92 93 if (i == 1) 94 return &cnts->count_coeffs8x8[j][k][l][m][3]; 95 96 if (i == 2) 97 return &cnts->count_coeffs16x16[j][k][l][m][3]; 98 99 if (i == 3) 100 return &cnts->count_coeffs32x32[j][k][l][m][3]; 101 102 return NULL; 103 } 104 105 #define INNER_LOOP \ 106 do { \ 107 for (m = 0; m < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0][0]); ++m) { \ 108 vp9_ctx->cnts.coeff[i][j][k][l][m] = \ 109 get_coeffs_arr(cnts, i, j, k, l, m); \ 110 vp9_ctx->cnts.eob[i][j][k][l][m][0] = \ 111 &cnts->count_eobs[i][j][k][l][m]; \ 112 vp9_ctx->cnts.eob[i][j][k][l][m][1] = \ 113 get_eobs1(cnts, i, j, k, l, m); \ 114 } \ 115 } while (0) 116 117 static void init_v4l2_vp9_count_tbl(struct hantro_ctx *ctx) 118 { 119 struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; 120 struct symbol_counts *cnts = vp9_ctx->misc.cpu + vp9_ctx->ctx_counters_offset; 121 int i, j, k, l, m; 122 123 vp9_ctx->cnts.partition = &cnts->partition_counts; 124 vp9_ctx->cnts.skip = &cnts->mbskip_count; 125 vp9_ctx->cnts.intra_inter = &cnts->intra_inter_count; 126 vp9_ctx->cnts.tx32p = &cnts->tx32x32_count; 127 /* 128 * g2 hardware uses tx16x16_count[2][3], while the api 129 * expects tx16p[2][4], so this must be explicitly copied 130 * into vp9_ctx->cnts.tx16p when passing the data to the 131 * vp9 library function 132 */ 133 vp9_ctx->cnts.tx8p = &cnts->tx8x8_count; 134 135 vp9_ctx->cnts.y_mode = &cnts->sb_ymode_counts; 136 vp9_ctx->cnts.uv_mode = &cnts->uv_mode_counts; 137 vp9_ctx->cnts.comp = &cnts->comp_inter_count; 138 vp9_ctx->cnts.comp_ref = &cnts->comp_ref_count; 139 vp9_ctx->cnts.single_ref = &cnts->single_ref_count; 140 vp9_ctx->cnts.filter = &cnts->switchable_interp_counts; 141 vp9_ctx->cnts.mv_joint = &cnts->mv_counts.joints; 142 vp9_ctx->cnts.sign = &cnts->mv_counts.sign; 143 vp9_ctx->cnts.classes = &cnts->mv_counts.classes; 144 vp9_ctx->cnts.class0 = &cnts->mv_counts.class0; 145 vp9_ctx->cnts.bits = &cnts->mv_counts.bits; 146 vp9_ctx->cnts.class0_fp = &cnts->mv_counts.class0_fp; 147 vp9_ctx->cnts.fp = &cnts->mv_counts.fp; 148 vp9_ctx->cnts.class0_hp = &cnts->mv_counts.class0_hp; 149 vp9_ctx->cnts.hp = &cnts->mv_counts.hp; 150 151 for (i = 0; i < ARRAY_SIZE(vp9_ctx->cnts.coeff); ++i) 152 for (j = 0; j < ARRAY_SIZE(vp9_ctx->cnts.coeff[i]); ++j) 153 for (k = 0; k < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0]); ++k) 154 for (l = 0; l < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0]); ++l) 155 INNER_LOOP; 156 } 157 158 int hantro_vp9_dec_init(struct hantro_ctx *ctx) 159 { 160 struct hantro_dev *vpu = ctx->dev; 161 const struct hantro_variant *variant = vpu->variant; 162 struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec; 163 struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge; 164 struct hantro_aux_buf *segment_map = &vp9_dec->segment_map; 165 struct hantro_aux_buf *misc = &vp9_dec->misc; 166 u32 i, max_width, max_height, size; 167 168 if (variant->num_dec_fmts < 1) 169 return -EINVAL; 170 171 for (i = 0; i < variant->num_dec_fmts; ++i) 172 if (variant->dec_fmts[i].fourcc == V4L2_PIX_FMT_VP9_FRAME) 173 break; 174 175 if (i == variant->num_dec_fmts) 176 return -EINVAL; 177 178 max_width = vpu->variant->dec_fmts[i].frmsize.max_width; 179 max_height = vpu->variant->dec_fmts[i].frmsize.max_height; 180 181 size = hantro_vp9_tile_filter_size(max_height); 182 vp9_dec->bsd_ctrl_offset = size; 183 size += hantro_vp9_bsd_control_size(max_height); 184 185 tile_edge->cpu = dma_alloc_coherent(vpu->dev, size, &tile_edge->dma, GFP_KERNEL); 186 if (!tile_edge->cpu) 187 return -ENOMEM; 188 189 tile_edge->size = size; 190 memset(tile_edge->cpu, 0, size); 191 192 size = hantro_vp9_segment_map_size(max_width, max_height); 193 vp9_dec->segment_map_size = size; 194 size *= 2; /* we need two areas of this size, used alternately */ 195 196 segment_map->cpu = dma_alloc_coherent(vpu->dev, size, &segment_map->dma, GFP_KERNEL); 197 if (!segment_map->cpu) 198 goto err_segment_map; 199 200 segment_map->size = size; 201 memset(segment_map->cpu, 0, size); 202 203 size = hantro_vp9_prob_tab_size(); 204 vp9_dec->ctx_counters_offset = size; 205 size += hantro_vp9_count_tab_size(); 206 vp9_dec->tile_info_offset = size; 207 size += hantro_vp9_tile_info_size(); 208 209 misc->cpu = dma_alloc_coherent(vpu->dev, size, &misc->dma, GFP_KERNEL); 210 if (!misc->cpu) 211 goto err_misc; 212 213 misc->size = size; 214 memset(misc->cpu, 0, size); 215 216 init_v4l2_vp9_count_tbl(ctx); 217 218 return 0; 219 220 err_misc: 221 dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma); 222 223 err_segment_map: 224 dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma); 225 226 return -ENOMEM; 227 } 228 229 void hantro_vp9_dec_exit(struct hantro_ctx *ctx) 230 { 231 struct hantro_dev *vpu = ctx->dev; 232 struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec; 233 struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge; 234 struct hantro_aux_buf *segment_map = &vp9_dec->segment_map; 235 struct hantro_aux_buf *misc = &vp9_dec->misc; 236 237 dma_free_coherent(vpu->dev, misc->size, misc->cpu, misc->dma); 238 dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma); 239 dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma); 240 } 241