1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 
26 #include "rc_calc_fpu.h"
27 
28 #include "qp_tables.h"
29 #include "amdgpu_dm/dc_fpu.h"
30 
31 #define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min)
32 
33 #define MODE_SELECT(val444, val422, val420) \
34 	(cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420))
35 
36 
37 #define TABLE_CASE(mode, bpc, max)   case (table_hash(mode, BPC_##bpc, max)): \
38 	table = qp_table_##mode##_##bpc##bpc_##max; \
39 	table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \
40 	break
41 
42 static int median3(int a, int b, int c)
43 {
44 	if (a > b)
45 		swap(a, b);
46 	if (b > c)
47 		swap(b, c);
48 	if (a > b)
49 		swap(b, c);
50 
51 	return b;
52 }
53 
54 static double dsc_roundf(double num)
55 {
56 	if (num < 0.0)
57 		num = num - 0.5;
58 	else
59 		num = num + 0.5;
60 
61 	return (int)(num);
62 }
63 
64 static double dsc_ceil(double num)
65 {
66 	double retval = (int)num;
67 
68 	if (retval != num && num > 0)
69 		retval = num + 1;
70 
71 	return (int)retval;
72 }
73 
74 static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
75 		       enum max_min max_min, float bpp)
76 {
77 	int mode = MODE_SELECT(444, 422, 420);
78 	int sel = table_hash(mode, bpc, max_min);
79 	int table_size = 0;
80 	int index;
81 	const struct qp_entry *table = 0L;
82 
83 	// alias enum
84 	enum { min = DAL_MM_MIN, max = DAL_MM_MAX };
85 	switch (sel) {
86 		TABLE_CASE(444,  8, max);
87 		TABLE_CASE(444,  8, min);
88 		TABLE_CASE(444, 10, max);
89 		TABLE_CASE(444, 10, min);
90 		TABLE_CASE(444, 12, max);
91 		TABLE_CASE(444, 12, min);
92 		TABLE_CASE(422,  8, max);
93 		TABLE_CASE(422,  8, min);
94 		TABLE_CASE(422, 10, max);
95 		TABLE_CASE(422, 10, min);
96 		TABLE_CASE(422, 12, max);
97 		TABLE_CASE(422, 12, min);
98 		TABLE_CASE(420,  8, max);
99 		TABLE_CASE(420,  8, min);
100 		TABLE_CASE(420, 10, max);
101 		TABLE_CASE(420, 10, min);
102 		TABLE_CASE(420, 12, max);
103 		TABLE_CASE(420, 12, min);
104 	}
105 
106 	if (table == 0)
107 		return;
108 
109 	index = (bpp - table[0].bpp) * 2;
110 
111 	/* requested size is bigger than the table */
112 	if (index >= table_size) {
113 		dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n");
114 		return;
115 	}
116 
117 	memcpy(qps, table[index].qps, sizeof(qp_set));
118 }
119 
120 static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
121 {
122 	int   *p = ofs;
123 
124 	if (mode == CM_444 || mode == CM_RGB) {
125 		*p++ = (bpp <=  6) ? (0) : ((((bpp >=  8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
126 		*p++ = (bpp <=  6) ? (-2) : ((((bpp >=  8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
127 		*p++ = (bpp <=  6) ? (-2) : ((((bpp >=  8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
128 		*p++ = (bpp <=  6) ? (-4) : ((((bpp >=  8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
129 		*p++ = (bpp <=  6) ? (-6) : ((((bpp >=  8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp -  6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
130 		*p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0))));
131 		*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0))));
132 		*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0))));
133 		*p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0))));
134 		*p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0))));
135 		*p++ = -10;
136 		*p++ = (bpp <=  6) ? (-12) : ((bpp >=  8) ? (-10) : (-12 + dsc_roundf((bpp -  6) * (2 / 2.0))));
137 		*p++ = -12;
138 		*p++ = -12;
139 		*p++ = -12;
140 	} else if (mode == CM_422) {
141 		*p++ = (bpp <=  8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp -  8) * (8 / 2.0))));
142 		*p++ = (bpp <=  8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp -  8) * (8 / 2.0))));
143 		*p++ = (bpp <=  8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp -  8) * (6 / 2.0))));
144 		*p++ = (bpp <=  8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp -  8) * (6 / 2.0))));
145 		*p++ = (bpp <=  8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp -  8) * (6 / 2.0))));
146 		*p++ = (bpp <=  8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp -  8) * (6 / 2.0))));
147 		*p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp -  8) * (6 / 2.0))));
148 		*p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp -  8) * (4 / 2.0))));
149 		*p++ = (bpp <=  8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp -  8) * (2 / 2.0))));
150 		*p++ = (bpp <=  8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp -  8) * (2 / 2.0))));
151 		*p++ = -10;
152 		*p++ = (bpp <=  6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp -  6) * (2.0 / 1))));
153 		*p++ = -12;
154 		*p++ = -12;
155 		*p++ = -12;
156 	} else {
157 		*p++ = (bpp <=  6) ? (2) : ((bpp >=  8) ? (10) : (2 + dsc_roundf((bpp -  6) * (8 / 2.0))));
158 		*p++ = (bpp <=  6) ? (0) : ((bpp >=  8) ? (8) : (0 + dsc_roundf((bpp -  6) * (8 / 2.0))));
159 		*p++ = (bpp <=  6) ? (0) : ((bpp >=  8) ? (6) : (0 + dsc_roundf((bpp -  6) * (6 / 2.0))));
160 		*p++ = (bpp <=  6) ? (-2) : ((bpp >=  8) ? (4) : (-2 + dsc_roundf((bpp -  6) * (6 / 2.0))));
161 		*p++ = (bpp <=  6) ? (-4) : ((bpp >=  8) ? (2) : (-4 + dsc_roundf((bpp -  6) * (6 / 2.0))));
162 		*p++ = (bpp <=  6) ? (-6) : ((bpp >=  8) ? (0) : (-6 + dsc_roundf((bpp -  6) * (6 / 2.0))));
163 		*p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-2) : (-8 + dsc_roundf((bpp -  6) * (6 / 2.0))));
164 		*p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-4) : (-8 + dsc_roundf((bpp -  6) * (4 / 2.0))));
165 		*p++ = (bpp <=  6) ? (-8) : ((bpp >=  8) ? (-6) : (-8 + dsc_roundf((bpp -  6) * (2 / 2.0))));
166 		*p++ = (bpp <=  6) ? (-10) : ((bpp >=  8) ? (-8) : (-10 + dsc_roundf((bpp -  6) * (2 / 2.0))));
167 		*p++ = -10;
168 		*p++ = (bpp <=  4) ? (-12) : ((bpp >=  5) ? (-10) : (-12 + dsc_roundf((bpp -  4) * (2 / 1.0))));
169 		*p++ = -12;
170 		*p++ = -12;
171 		*p++ = -12;
172 	}
173 }
174 
175 void _do_calc_rc_params(struct rc_params *rc,
176 		enum colour_mode cm,
177 		enum bits_per_comp bpc,
178 		u16 drm_bpp,
179 		bool is_navite_422_or_420,
180 		int slice_width,
181 		int slice_height,
182 		int minor_version)
183 {
184 	float bpp;
185 	float bpp_group;
186 	float initial_xmit_delay_factor;
187 	int padding_pixels;
188 	int i;
189 
190 	dc_assert_fp_enabled();
191 
192 	bpp = ((float)drm_bpp / 16.0);
193 	/* in native_422 or native_420 modes, the bits_per_pixel is double the
194 	 * target bpp (the latter is what calc_rc_params expects)
195 	 */
196 	if (is_navite_422_or_420)
197 		bpp /= 2.0;
198 
199 	rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
200 	rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
201 
202 	bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0);
203 
204 	switch (cm) {
205 	case CM_420:
206 		rc->initial_fullness_offset = (bpp >=  6) ? (2048) : ((bpp <=  4) ? (6144) : ((((bpp >  4) && (bpp <=  5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp -  5) * (3584)))));
207 		rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group)));
208 		rc->second_line_bpg_offset  = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group)));
209 		break;
210 	case CM_422:
211 		rc->initial_fullness_offset = (bpp >=  8) ? (2048) : ((bpp <=  7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584))));
212 		rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group)));
213 		rc->second_line_bpg_offset  = 0;
214 		break;
215 	case CM_444:
216 	case CM_RGB:
217 		rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <=  8) ? (6144) : ((((bpp >  8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2)))));
218 		rc->first_line_bpg_offset   = median3(0, (12 + (int) (0.09 *  min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group)));
219 		rc->second_line_bpg_offset  = 0;
220 		break;
221 	}
222 
223 	initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0;
224 	rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor);
225 
226 	if (cm == CM_422 || cm == CM_420)
227 		slice_width /= 2;
228 
229 	padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0;
230 	if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) {
231 		if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1)
232 			rc->initial_xmit_delay++;
233 	}
234 
235 	rc->flatness_min_qp     = ((bpc == BPC_8) ?  (3) : ((bpc == BPC_10) ? (7)  : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
236 	rc->flatness_max_qp     = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
237 	rc->flatness_det_thresh = 2 << (bpc - 8);
238 
239 	get_qp_set(rc->qp_min, cm, bpc, DAL_MM_MIN, bpp);
240 	get_qp_set(rc->qp_max, cm, bpc, DAL_MM_MAX, bpp);
241 	if (cm == CM_444 && minor_version == 1) {
242 		for (i = 0; i < QP_SET_SIZE; ++i) {
243 			rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0;
244 			rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0;
245 		}
246 	}
247 	get_ofs_set(rc->ofs, cm, bpp);
248 
249 	/* fixed parameters */
250 	rc->rc_model_size    = 8192;
251 	rc->rc_edge_factor   = 6;
252 	rc->rc_tgt_offset_hi = 3;
253 	rc->rc_tgt_offset_lo = 3;
254 
255 	rc->rc_buf_thresh[0] = 896;
256 	rc->rc_buf_thresh[1] = 1792;
257 	rc->rc_buf_thresh[2] = 2688;
258 	rc->rc_buf_thresh[3] = 3584;
259 	rc->rc_buf_thresh[4] = 4480;
260 	rc->rc_buf_thresh[5] = 5376;
261 	rc->rc_buf_thresh[6] = 6272;
262 	rc->rc_buf_thresh[7] = 6720;
263 	rc->rc_buf_thresh[8] = 7168;
264 	rc->rc_buf_thresh[9] = 7616;
265 	rc->rc_buf_thresh[10] = 7744;
266 	rc->rc_buf_thresh[11] = 7872;
267 	rc->rc_buf_thresh[12] = 8000;
268 	rc->rc_buf_thresh[13] = 8064;
269 }
270 
271 u32 _do_bytes_per_pixel_calc(int slice_width,
272 		u16 drm_bpp,
273 		bool is_navite_422_or_420)
274 {
275 	float bpp;
276 	u32 bytes_per_pixel;
277 	double d_bytes_per_pixel;
278 
279 	dc_assert_fp_enabled();
280 
281 	bpp = ((float)drm_bpp / 16.0);
282 	d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
283 	// TODO: Make sure the formula for calculating this is precise (ceiling
284 	// vs. floor, and at what point they should be applied)
285 	if (is_navite_422_or_420)
286 		d_bytes_per_pixel /= 2;
287 
288 	bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
289 
290 	return bytes_per_pixel;
291 }
292