1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 
26 #include "dc.h"
27 #include "opp.h"
28 #include "color_gamma.h"
29 
30 /* When calculating LUT values the first region and at least one subsequent
31  * region are calculated with full precision. These defines are a demarcation
32  * of where the second region starts and ends.
33  * These are hardcoded values to avoid recalculating them in loops.
34  */
35 #define PRECISE_LUT_REGION_START 224
36 #define PRECISE_LUT_REGION_END 239
37 
38 static struct hw_x_point coordinates_x[MAX_HW_POINTS + 2];
39 
40 // these are helpers for calculations to reduce stack usage
41 // do not depend on these being preserved across calls
42 
43 /* Helper to optimize gamma calculation, only use in translate_from_linear, in
44  * particular the dc_fixpt_pow function which is very expensive
45  * The idea is that our regions for X points are exponential and currently they all use
46  * the same number of points (NUM_PTS_IN_REGION) and in each region every point
47  * is exactly 2x the one at the same index in the previous region. In other words
48  * X[i] = 2 * X[i-NUM_PTS_IN_REGION] for i>=16
49  * The other fact is that (2x)^gamma = 2^gamma * x^gamma
50  * So we compute and save x^gamma for the first 16 regions, and for every next region
51  * just multiply with 2^gamma which can be computed once, and save the result so we
52  * recursively compute all the values.
53  */
54 
55 /*
56  * Regamma coefficients are used for both regamma and degamma. Degamma
57  * coefficients are calculated in our formula using the regamma coefficients.
58  */
59 									 /*sRGB     709     2.2 2.4 P3*/
60 static const int32_t numerator01[] = { 31308,   180000, 0,  0,  0};
61 static const int32_t numerator02[] = { 12920,   4500,   0,  0,  0};
62 static const int32_t numerator03[] = { 55,      99,     0,  0,  0};
63 static const int32_t numerator04[] = { 55,      99,     0,  0,  0};
64 static const int32_t numerator05[] = { 2400,    2222,   2200, 2400, 2600};
65 
66 /* one-time setup of X points */
setup_x_points_distribution(void)67 void setup_x_points_distribution(void)
68 {
69 	struct fixed31_32 region_size = dc_fixpt_from_int(128);
70 	int32_t segment;
71 	uint32_t seg_offset;
72 	uint32_t index;
73 	struct fixed31_32 increment;
74 
75 	coordinates_x[MAX_HW_POINTS].x = region_size;
76 	coordinates_x[MAX_HW_POINTS + 1].x = region_size;
77 
78 	for (segment = 6; segment > (6 - NUM_REGIONS); segment--) {
79 		region_size = dc_fixpt_div_int(region_size, 2);
80 		increment = dc_fixpt_div_int(region_size,
81 						NUM_PTS_IN_REGION);
82 		seg_offset = (segment + (NUM_REGIONS - 7)) * NUM_PTS_IN_REGION;
83 		coordinates_x[seg_offset].x = region_size;
84 
85 		for (index = seg_offset + 1;
86 				index < seg_offset + NUM_PTS_IN_REGION;
87 				index++) {
88 			coordinates_x[index].x = dc_fixpt_add
89 					(coordinates_x[index-1].x, increment);
90 		}
91 	}
92 }
93 
log_x_points_distribution(struct dal_logger * logger)94 void log_x_points_distribution(struct dal_logger *logger)
95 {
96 	int i = 0;
97 
98 	if (logger != NULL) {
99 		LOG_GAMMA_WRITE("Log X Distribution\n");
100 
101 		for (i = 0; i < MAX_HW_POINTS; i++)
102 			LOG_GAMMA_WRITE("%llu\n", coordinates_x[i].x.value);
103 	}
104 }
105 
compute_pq(struct fixed31_32 in_x,struct fixed31_32 * out_y)106 static void compute_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
107 {
108 	/* consts for PQ gamma formula. */
109 	const struct fixed31_32 m1 =
110 		dc_fixpt_from_fraction(159301758, 1000000000);
111 	const struct fixed31_32 m2 =
112 		dc_fixpt_from_fraction(7884375, 100000);
113 	const struct fixed31_32 c1 =
114 		dc_fixpt_from_fraction(8359375, 10000000);
115 	const struct fixed31_32 c2 =
116 		dc_fixpt_from_fraction(188515625, 10000000);
117 	const struct fixed31_32 c3 =
118 		dc_fixpt_from_fraction(186875, 10000);
119 
120 	struct fixed31_32 l_pow_m1;
121 	struct fixed31_32 base;
122 
123 	if (dc_fixpt_lt(in_x, dc_fixpt_zero))
124 		in_x = dc_fixpt_zero;
125 
126 	l_pow_m1 = dc_fixpt_pow(in_x, m1);
127 	base = dc_fixpt_div(
128 			dc_fixpt_add(c1,
129 					(dc_fixpt_mul(c2, l_pow_m1))),
130 			dc_fixpt_add(dc_fixpt_one,
131 					(dc_fixpt_mul(c3, l_pow_m1))));
132 	*out_y = dc_fixpt_pow(base, m2);
133 }
134 
compute_de_pq(struct fixed31_32 in_x,struct fixed31_32 * out_y)135 static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
136 {
137 	/* consts for dePQ gamma formula. */
138 	const struct fixed31_32 m1 =
139 		dc_fixpt_from_fraction(159301758, 1000000000);
140 	const struct fixed31_32 m2 =
141 		dc_fixpt_from_fraction(7884375, 100000);
142 	const struct fixed31_32 c1 =
143 		dc_fixpt_from_fraction(8359375, 10000000);
144 	const struct fixed31_32 c2 =
145 		dc_fixpt_from_fraction(188515625, 10000000);
146 	const struct fixed31_32 c3 =
147 		dc_fixpt_from_fraction(186875, 10000);
148 
149 	struct fixed31_32 l_pow_m1;
150 	struct fixed31_32 base, div;
151 	struct fixed31_32 base2;
152 
153 
154 	if (dc_fixpt_lt(in_x, dc_fixpt_zero))
155 		in_x = dc_fixpt_zero;
156 
157 	l_pow_m1 = dc_fixpt_pow(in_x,
158 			dc_fixpt_div(dc_fixpt_one, m2));
159 	base = dc_fixpt_sub(l_pow_m1, c1);
160 
161 	div = dc_fixpt_sub(c2, dc_fixpt_mul(c3, l_pow_m1));
162 
163 	base2 = dc_fixpt_div(base, div);
164 	// avoid complex numbers
165 	if (dc_fixpt_lt(base2, dc_fixpt_zero))
166 		base2 = dc_fixpt_sub(dc_fixpt_zero, base2);
167 
168 
169 	*out_y = dc_fixpt_pow(base2, dc_fixpt_div(dc_fixpt_one, m1));
170 
171 }
172 
173 
174 /* de gamma, non-linear to linear */
compute_hlg_eotf(struct fixed31_32 in_x,struct fixed31_32 * out_y,uint32_t sdr_white_level,uint32_t max_luminance_nits)175 static void compute_hlg_eotf(struct fixed31_32 in_x,
176 		struct fixed31_32 *out_y,
177 		uint32_t sdr_white_level, uint32_t max_luminance_nits)
178 {
179 	struct fixed31_32 a;
180 	struct fixed31_32 b;
181 	struct fixed31_32 c;
182 	struct fixed31_32 threshold;
183 	struct fixed31_32 x;
184 
185 	struct fixed31_32 scaling_factor =
186 			dc_fixpt_from_fraction(max_luminance_nits, sdr_white_level);
187 	a = dc_fixpt_from_fraction(17883277, 100000000);
188 	b = dc_fixpt_from_fraction(28466892, 100000000);
189 	c = dc_fixpt_from_fraction(55991073, 100000000);
190 	threshold = dc_fixpt_from_fraction(1, 2);
191 
192 	if (dc_fixpt_lt(in_x, threshold)) {
193 		x = dc_fixpt_mul(in_x, in_x);
194 		x = dc_fixpt_div_int(x, 3);
195 	} else {
196 		x = dc_fixpt_sub(in_x, c);
197 		x = dc_fixpt_div(x, a);
198 		x = dc_fixpt_exp(x);
199 		x = dc_fixpt_add(x, b);
200 		x = dc_fixpt_div_int(x, 12);
201 	}
202 	*out_y = dc_fixpt_mul(x, scaling_factor);
203 
204 }
205 
206 /* re gamma, linear to non-linear */
compute_hlg_oetf(struct fixed31_32 in_x,struct fixed31_32 * out_y,uint32_t sdr_white_level,uint32_t max_luminance_nits)207 static void compute_hlg_oetf(struct fixed31_32 in_x, struct fixed31_32 *out_y,
208 		uint32_t sdr_white_level, uint32_t max_luminance_nits)
209 {
210 	struct fixed31_32 a;
211 	struct fixed31_32 b;
212 	struct fixed31_32 c;
213 	struct fixed31_32 threshold;
214 	struct fixed31_32 x;
215 
216 	struct fixed31_32 scaling_factor =
217 			dc_fixpt_from_fraction(sdr_white_level, max_luminance_nits);
218 	a = dc_fixpt_from_fraction(17883277, 100000000);
219 	b = dc_fixpt_from_fraction(28466892, 100000000);
220 	c = dc_fixpt_from_fraction(55991073, 100000000);
221 	threshold = dc_fixpt_from_fraction(1, 12);
222 	x = dc_fixpt_mul(in_x, scaling_factor);
223 
224 
225 	if (dc_fixpt_lt(x, threshold)) {
226 		x = dc_fixpt_mul(x, dc_fixpt_from_fraction(3, 1));
227 		*out_y = dc_fixpt_pow(x, dc_fixpt_half);
228 	} else {
229 		x = dc_fixpt_mul(x, dc_fixpt_from_fraction(12, 1));
230 		x = dc_fixpt_sub(x, b);
231 		x = dc_fixpt_log(x);
232 		x = dc_fixpt_mul(a, x);
233 		*out_y = dc_fixpt_add(x, c);
234 	}
235 }
236 
237 
238 /* one-time pre-compute PQ values - only for sdr_white_level 80 */
precompute_pq(void)239 void precompute_pq(void)
240 {
241 	int i;
242 	struct fixed31_32 x;
243 	const struct hw_x_point *coord_x = coordinates_x + 32;
244 	struct fixed31_32 scaling_factor =
245 			dc_fixpt_from_fraction(80, 10000);
246 
247 	struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
248 
249 	/* pow function has problems with arguments too small */
250 	for (i = 0; i < 32; i++)
251 		pq_table[i] = dc_fixpt_zero;
252 
253 	for (i = 32; i <= MAX_HW_POINTS; i++) {
254 		x = dc_fixpt_mul(coord_x->x, scaling_factor);
255 		compute_pq(x, &pq_table[i]);
256 		++coord_x;
257 	}
258 }
259 
260 /* one-time pre-compute dePQ values - only for max pixel value 125 FP16 */
precompute_de_pq(void)261 void precompute_de_pq(void)
262 {
263 	int i;
264 	struct fixed31_32  y;
265 	uint32_t begin_index, end_index;
266 
267 	struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
268 	struct fixed31_32 *de_pq_table = mod_color_get_table(type_de_pq_table);
269 	/* X points is 2^-25 to 2^7
270 	 * De-gamma X is 2^-12 to 2^0 – we are skipping first -12-(-25) = 13 regions
271 	 */
272 	begin_index = 13 * NUM_PTS_IN_REGION;
273 	end_index = begin_index + 12 * NUM_PTS_IN_REGION;
274 
275 	for (i = 0; i <= begin_index; i++)
276 		de_pq_table[i] = dc_fixpt_zero;
277 
278 	for (; i <= end_index; i++) {
279 		compute_de_pq(coordinates_x[i].x, &y);
280 		de_pq_table[i] = dc_fixpt_mul(y, scaling_factor);
281 	}
282 
283 	for (; i <= MAX_HW_POINTS; i++)
284 		de_pq_table[i] = de_pq_table[i-1];
285 }
286 struct dividers {
287 	struct fixed31_32 divider1;
288 	struct fixed31_32 divider2;
289 	struct fixed31_32 divider3;
290 };
291 
292 
build_coefficients(struct gamma_coefficients * coefficients,enum dc_transfer_func_predefined type)293 static bool build_coefficients(struct gamma_coefficients *coefficients,
294 		enum dc_transfer_func_predefined type)
295 {
296 
297 	uint32_t i = 0;
298 	uint32_t index = 0;
299 	bool ret = true;
300 
301 	if (type == TRANSFER_FUNCTION_SRGB)
302 		index = 0;
303 	else if (type == TRANSFER_FUNCTION_BT709)
304 		index = 1;
305 	else if (type == TRANSFER_FUNCTION_GAMMA22)
306 		index = 2;
307 	else if (type == TRANSFER_FUNCTION_GAMMA24)
308 		index = 3;
309 	else if (type == TRANSFER_FUNCTION_GAMMA26)
310 		index = 4;
311 	else {
312 		ret = false;
313 		goto release;
314 	}
315 
316 	do {
317 		coefficients->a0[i] = dc_fixpt_from_fraction(
318 			numerator01[index], 10000000);
319 		coefficients->a1[i] = dc_fixpt_from_fraction(
320 			numerator02[index], 1000);
321 		coefficients->a2[i] = dc_fixpt_from_fraction(
322 			numerator03[index], 1000);
323 		coefficients->a3[i] = dc_fixpt_from_fraction(
324 			numerator04[index], 1000);
325 		coefficients->user_gamma[i] = dc_fixpt_from_fraction(
326 			numerator05[index], 1000);
327 
328 		++i;
329 	} while (i != ARRAY_SIZE(coefficients->a0));
330 release:
331 	return ret;
332 }
333 
translate_from_linear_space(struct translate_from_linear_space_args * args)334 static struct fixed31_32 translate_from_linear_space(
335 		struct translate_from_linear_space_args *args)
336 {
337 	const struct fixed31_32 one = dc_fixpt_from_int(1);
338 
339 	struct fixed31_32 scratch_1, scratch_2;
340 	struct calculate_buffer *cal_buffer = args->cal_buffer;
341 
342 	if (dc_fixpt_le(one, args->arg))
343 		return one;
344 
345 	if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0))) {
346 		scratch_1 = dc_fixpt_add(one, args->a3);
347 		scratch_2 = dc_fixpt_pow(
348 				dc_fixpt_neg(args->arg),
349 				dc_fixpt_recip(args->gamma));
350 		scratch_1 = dc_fixpt_mul(scratch_1, scratch_2);
351 		scratch_1 = dc_fixpt_sub(args->a2, scratch_1);
352 
353 		return scratch_1;
354 	} else if (dc_fixpt_le(args->a0, args->arg)) {
355 		if (cal_buffer->buffer_index == 0) {
356 			cal_buffer->gamma_of_2 = dc_fixpt_pow(dc_fixpt_from_int(2),
357 					dc_fixpt_recip(args->gamma));
358 		}
359 		scratch_1 = dc_fixpt_add(one, args->a3);
360 		/* In the first region (first 16 points) and in the
361 		 * region delimited by START/END we calculate with
362 		 * full precision to avoid error accumulation.
363 		 */
364 		if ((cal_buffer->buffer_index >= PRECISE_LUT_REGION_START &&
365 			cal_buffer->buffer_index <= PRECISE_LUT_REGION_END) ||
366 			(cal_buffer->buffer_index < 16))
367 			scratch_2 = dc_fixpt_pow(args->arg,
368 					dc_fixpt_recip(args->gamma));
369 		else
370 			scratch_2 = dc_fixpt_mul(cal_buffer->gamma_of_2,
371 					cal_buffer->buffer[cal_buffer->buffer_index%16]);
372 
373 		if (cal_buffer->buffer_index != -1) {
374 			cal_buffer->buffer[cal_buffer->buffer_index%16] = scratch_2;
375 			cal_buffer->buffer_index++;
376 		}
377 
378 		scratch_1 = dc_fixpt_mul(scratch_1, scratch_2);
379 		scratch_1 = dc_fixpt_sub(scratch_1, args->a2);
380 
381 		return scratch_1;
382 	} else
383 		return dc_fixpt_mul(args->arg, args->a1);
384 }
385 
386 
translate_from_linear_space_long(struct translate_from_linear_space_args * args)387 static struct fixed31_32 translate_from_linear_space_long(
388 		struct translate_from_linear_space_args *args)
389 {
390 	const struct fixed31_32 one = dc_fixpt_from_int(1);
391 
392 	if (dc_fixpt_lt(one, args->arg))
393 		return one;
394 
395 	if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0)))
396 		return dc_fixpt_sub(
397 			args->a2,
398 			dc_fixpt_mul(
399 				dc_fixpt_add(
400 					one,
401 					args->a3),
402 				dc_fixpt_pow(
403 					dc_fixpt_neg(args->arg),
404 					dc_fixpt_recip(args->gamma))));
405 	else if (dc_fixpt_le(args->a0, args->arg))
406 		return dc_fixpt_sub(
407 			dc_fixpt_mul(
408 				dc_fixpt_add(
409 					one,
410 					args->a3),
411 				dc_fixpt_pow(
412 						args->arg,
413 					dc_fixpt_recip(args->gamma))),
414 					args->a2);
415 	else
416 		return dc_fixpt_mul(args->arg, args->a1);
417 }
418 
calculate_gamma22(struct fixed31_32 arg,bool use_eetf,struct calculate_buffer * cal_buffer)419 static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg, bool use_eetf, struct calculate_buffer *cal_buffer)
420 {
421 	struct fixed31_32 gamma = dc_fixpt_from_fraction(22, 10);
422 	struct translate_from_linear_space_args scratch_gamma_args;
423 
424 	scratch_gamma_args.arg = arg;
425 	scratch_gamma_args.a0 = dc_fixpt_zero;
426 	scratch_gamma_args.a1 = dc_fixpt_zero;
427 	scratch_gamma_args.a2 = dc_fixpt_zero;
428 	scratch_gamma_args.a3 = dc_fixpt_zero;
429 	scratch_gamma_args.cal_buffer = cal_buffer;
430 	scratch_gamma_args.gamma = gamma;
431 
432 	if (use_eetf)
433 		return translate_from_linear_space_long(&scratch_gamma_args);
434 
435 	return translate_from_linear_space(&scratch_gamma_args);
436 }
437 
438 
translate_to_linear_space(struct fixed31_32 arg,struct fixed31_32 a0,struct fixed31_32 a1,struct fixed31_32 a2,struct fixed31_32 a3,struct fixed31_32 gamma)439 static struct fixed31_32 translate_to_linear_space(
440 	struct fixed31_32 arg,
441 	struct fixed31_32 a0,
442 	struct fixed31_32 a1,
443 	struct fixed31_32 a2,
444 	struct fixed31_32 a3,
445 	struct fixed31_32 gamma)
446 {
447 	struct fixed31_32 linear;
448 
449 	a0 = dc_fixpt_mul(a0, a1);
450 	if (dc_fixpt_le(arg, dc_fixpt_neg(a0)))
451 
452 		linear = dc_fixpt_neg(
453 				 dc_fixpt_pow(
454 				 dc_fixpt_div(
455 				 dc_fixpt_sub(a2, arg),
456 				 dc_fixpt_add(
457 				 dc_fixpt_one, a3)), gamma));
458 
459 	else if (dc_fixpt_le(dc_fixpt_neg(a0), arg) &&
460 			 dc_fixpt_le(arg, a0))
461 		linear = dc_fixpt_div(arg, a1);
462 	else
463 		linear =  dc_fixpt_pow(
464 					dc_fixpt_div(
465 					dc_fixpt_add(a2, arg),
466 					dc_fixpt_add(
467 					dc_fixpt_one, a3)), gamma);
468 
469 	return linear;
470 }
471 
translate_from_linear_space_ex(struct fixed31_32 arg,struct gamma_coefficients * coeff,uint32_t color_index,struct calculate_buffer * cal_buffer)472 static struct fixed31_32 translate_from_linear_space_ex(
473 	struct fixed31_32 arg,
474 	struct gamma_coefficients *coeff,
475 	uint32_t color_index,
476 	struct calculate_buffer *cal_buffer)
477 {
478 	struct translate_from_linear_space_args scratch_gamma_args;
479 
480 	scratch_gamma_args.arg = arg;
481 	scratch_gamma_args.a0 = coeff->a0[color_index];
482 	scratch_gamma_args.a1 = coeff->a1[color_index];
483 	scratch_gamma_args.a2 = coeff->a2[color_index];
484 	scratch_gamma_args.a3 = coeff->a3[color_index];
485 	scratch_gamma_args.gamma = coeff->user_gamma[color_index];
486 	scratch_gamma_args.cal_buffer = cal_buffer;
487 
488 	return translate_from_linear_space(&scratch_gamma_args);
489 }
490 
491 
translate_to_linear_space_ex(struct fixed31_32 arg,struct gamma_coefficients * coeff,uint32_t color_index)492 static inline struct fixed31_32 translate_to_linear_space_ex(
493 	struct fixed31_32 arg,
494 	struct gamma_coefficients *coeff,
495 	uint32_t color_index)
496 {
497 	return translate_to_linear_space(
498 		arg,
499 		coeff->a0[color_index],
500 		coeff->a1[color_index],
501 		coeff->a2[color_index],
502 		coeff->a3[color_index],
503 		coeff->user_gamma[color_index]);
504 }
505 
506 
find_software_points(const struct dc_gamma * ramp,const struct gamma_pixel * axis_x,struct fixed31_32 hw_point,enum channel_name channel,uint32_t * index_to_start,uint32_t * index_left,uint32_t * index_right,enum hw_point_position * pos)507 static bool find_software_points(
508 	const struct dc_gamma *ramp,
509 	const struct gamma_pixel *axis_x,
510 	struct fixed31_32 hw_point,
511 	enum channel_name channel,
512 	uint32_t *index_to_start,
513 	uint32_t *index_left,
514 	uint32_t *index_right,
515 	enum hw_point_position *pos)
516 {
517 	const uint32_t max_number = ramp->num_entries + 3;
518 
519 	struct fixed31_32 left, right;
520 
521 	uint32_t i = *index_to_start;
522 
523 	while (i < max_number) {
524 		if (channel == CHANNEL_NAME_RED) {
525 			left = axis_x[i].r;
526 
527 			if (i < max_number - 1)
528 				right = axis_x[i + 1].r;
529 			else
530 				right = axis_x[max_number - 1].r;
531 		} else if (channel == CHANNEL_NAME_GREEN) {
532 			left = axis_x[i].g;
533 
534 			if (i < max_number - 1)
535 				right = axis_x[i + 1].g;
536 			else
537 				right = axis_x[max_number - 1].g;
538 		} else {
539 			left = axis_x[i].b;
540 
541 			if (i < max_number - 1)
542 				right = axis_x[i + 1].b;
543 			else
544 				right = axis_x[max_number - 1].b;
545 		}
546 
547 		if (dc_fixpt_le(left, hw_point) &&
548 			dc_fixpt_le(hw_point, right)) {
549 			*index_to_start = i;
550 			*index_left = i;
551 
552 			if (i < max_number - 1)
553 				*index_right = i + 1;
554 			else
555 				*index_right = max_number - 1;
556 
557 			*pos = HW_POINT_POSITION_MIDDLE;
558 
559 			return true;
560 		} else if ((i == *index_to_start) &&
561 			dc_fixpt_le(hw_point, left)) {
562 			*index_to_start = i;
563 			*index_left = i;
564 			*index_right = i;
565 
566 			*pos = HW_POINT_POSITION_LEFT;
567 
568 			return true;
569 		} else if ((i == max_number - 1) &&
570 			dc_fixpt_le(right, hw_point)) {
571 			*index_to_start = i;
572 			*index_left = i;
573 			*index_right = i;
574 
575 			*pos = HW_POINT_POSITION_RIGHT;
576 
577 			return true;
578 		}
579 
580 		++i;
581 	}
582 
583 	return false;
584 }
585 
build_custom_gamma_mapping_coefficients_worker(const struct dc_gamma * ramp,struct pixel_gamma_point * coeff,const struct hw_x_point * coordinates_x,const struct gamma_pixel * axis_x,enum channel_name channel,uint32_t number_of_points)586 static bool build_custom_gamma_mapping_coefficients_worker(
587 	const struct dc_gamma *ramp,
588 	struct pixel_gamma_point *coeff,
589 	const struct hw_x_point *coordinates_x,
590 	const struct gamma_pixel *axis_x,
591 	enum channel_name channel,
592 	uint32_t number_of_points)
593 {
594 	uint32_t i = 0;
595 
596 	while (i <= number_of_points) {
597 		struct fixed31_32 coord_x;
598 
599 		uint32_t index_to_start = 0;
600 		uint32_t index_left = 0;
601 		uint32_t index_right = 0;
602 
603 		enum hw_point_position hw_pos;
604 
605 		struct gamma_point *point;
606 
607 		struct fixed31_32 left_pos;
608 		struct fixed31_32 right_pos;
609 
610 		if (channel == CHANNEL_NAME_RED)
611 			coord_x = coordinates_x[i].regamma_y_red;
612 		else if (channel == CHANNEL_NAME_GREEN)
613 			coord_x = coordinates_x[i].regamma_y_green;
614 		else
615 			coord_x = coordinates_x[i].regamma_y_blue;
616 
617 		if (!find_software_points(
618 			ramp, axis_x, coord_x, channel,
619 			&index_to_start, &index_left, &index_right, &hw_pos)) {
620 			BREAK_TO_DEBUGGER();
621 			return false;
622 		}
623 
624 		if (index_left >= ramp->num_entries + 3) {
625 			BREAK_TO_DEBUGGER();
626 			return false;
627 		}
628 
629 		if (index_right >= ramp->num_entries + 3) {
630 			BREAK_TO_DEBUGGER();
631 			return false;
632 		}
633 
634 		if (channel == CHANNEL_NAME_RED) {
635 			point = &coeff[i].r;
636 
637 			left_pos = axis_x[index_left].r;
638 			right_pos = axis_x[index_right].r;
639 		} else if (channel == CHANNEL_NAME_GREEN) {
640 			point = &coeff[i].g;
641 
642 			left_pos = axis_x[index_left].g;
643 			right_pos = axis_x[index_right].g;
644 		} else {
645 			point = &coeff[i].b;
646 
647 			left_pos = axis_x[index_left].b;
648 			right_pos = axis_x[index_right].b;
649 		}
650 
651 		if (hw_pos == HW_POINT_POSITION_MIDDLE)
652 			point->coeff = dc_fixpt_div(
653 				dc_fixpt_sub(
654 					coord_x,
655 					left_pos),
656 				dc_fixpt_sub(
657 					right_pos,
658 					left_pos));
659 		else if (hw_pos == HW_POINT_POSITION_LEFT)
660 			point->coeff = dc_fixpt_zero;
661 		else if (hw_pos == HW_POINT_POSITION_RIGHT)
662 			point->coeff = dc_fixpt_from_int(2);
663 		else {
664 			BREAK_TO_DEBUGGER();
665 			return false;
666 		}
667 
668 		point->left_index = index_left;
669 		point->right_index = index_right;
670 		point->pos = hw_pos;
671 
672 		++i;
673 	}
674 
675 	return true;
676 }
677 
calculate_mapped_value(struct pwl_float_data * rgb,const struct pixel_gamma_point * coeff,enum channel_name channel,uint32_t max_index)678 static struct fixed31_32 calculate_mapped_value(
679 	struct pwl_float_data *rgb,
680 	const struct pixel_gamma_point *coeff,
681 	enum channel_name channel,
682 	uint32_t max_index)
683 {
684 	const struct gamma_point *point;
685 
686 	struct fixed31_32 result;
687 
688 	if (channel == CHANNEL_NAME_RED)
689 		point = &coeff->r;
690 	else if (channel == CHANNEL_NAME_GREEN)
691 		point = &coeff->g;
692 	else
693 		point = &coeff->b;
694 
695 	if ((point->left_index < 0) || (point->left_index > max_index)) {
696 		BREAK_TO_DEBUGGER();
697 		return dc_fixpt_zero;
698 	}
699 
700 	if ((point->right_index < 0) || (point->right_index > max_index)) {
701 		BREAK_TO_DEBUGGER();
702 		return dc_fixpt_zero;
703 	}
704 
705 	if (point->pos == HW_POINT_POSITION_MIDDLE)
706 		if (channel == CHANNEL_NAME_RED)
707 			result = dc_fixpt_add(
708 				dc_fixpt_mul(
709 					point->coeff,
710 					dc_fixpt_sub(
711 						rgb[point->right_index].r,
712 						rgb[point->left_index].r)),
713 				rgb[point->left_index].r);
714 		else if (channel == CHANNEL_NAME_GREEN)
715 			result = dc_fixpt_add(
716 				dc_fixpt_mul(
717 					point->coeff,
718 					dc_fixpt_sub(
719 						rgb[point->right_index].g,
720 						rgb[point->left_index].g)),
721 				rgb[point->left_index].g);
722 		else
723 			result = dc_fixpt_add(
724 				dc_fixpt_mul(
725 					point->coeff,
726 					dc_fixpt_sub(
727 						rgb[point->right_index].b,
728 						rgb[point->left_index].b)),
729 				rgb[point->left_index].b);
730 	else if (point->pos == HW_POINT_POSITION_LEFT) {
731 		BREAK_TO_DEBUGGER();
732 		result = dc_fixpt_zero;
733 	} else {
734 		result = dc_fixpt_one;
735 	}
736 
737 	return result;
738 }
739 
build_pq(struct pwl_float_data_ex * rgb_regamma,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,uint32_t sdr_white_level)740 static void build_pq(struct pwl_float_data_ex *rgb_regamma,
741 		uint32_t hw_points_num,
742 		const struct hw_x_point *coordinate_x,
743 		uint32_t sdr_white_level)
744 {
745 	uint32_t i, start_index;
746 
747 	struct pwl_float_data_ex *rgb = rgb_regamma;
748 	const struct hw_x_point *coord_x = coordinate_x;
749 	struct fixed31_32 x;
750 	struct fixed31_32 output;
751 	struct fixed31_32 scaling_factor =
752 			dc_fixpt_from_fraction(sdr_white_level, 10000);
753 	struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
754 
755 	if (!mod_color_is_table_init(type_pq_table) && sdr_white_level == 80) {
756 		precompute_pq();
757 		mod_color_set_table_init_state(type_pq_table, true);
758 	}
759 
760 	/* TODO: start index is from segment 2^-24, skipping first segment
761 	 * due to x values too small for power calculations
762 	 */
763 	start_index = 32;
764 	rgb += start_index;
765 	coord_x += start_index;
766 
767 	for (i = start_index; i <= hw_points_num; i++) {
768 		/* Multiply 0.008 as regamma is 0-1 and FP16 input is 0-125.
769 		 * FP 1.0 = 80nits
770 		 */
771 		if (sdr_white_level == 80) {
772 			output = pq_table[i];
773 		} else {
774 			x = dc_fixpt_mul(coord_x->x, scaling_factor);
775 			compute_pq(x, &output);
776 		}
777 
778 		/* should really not happen? */
779 		if (dc_fixpt_lt(output, dc_fixpt_zero))
780 			output = dc_fixpt_zero;
781 		else if (dc_fixpt_lt(dc_fixpt_one, output))
782 			output = dc_fixpt_one;
783 
784 		rgb->r = output;
785 		rgb->g = output;
786 		rgb->b = output;
787 
788 		++coord_x;
789 		++rgb;
790 	}
791 }
792 
build_de_pq(struct pwl_float_data_ex * de_pq,uint32_t hw_points_num,const struct hw_x_point * coordinate_x)793 static void build_de_pq(struct pwl_float_data_ex *de_pq,
794 		uint32_t hw_points_num,
795 		const struct hw_x_point *coordinate_x)
796 {
797 	uint32_t i;
798 	struct fixed31_32 output;
799 	struct fixed31_32 *de_pq_table = mod_color_get_table(type_de_pq_table);
800 	struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
801 
802 	if (!mod_color_is_table_init(type_de_pq_table)) {
803 		precompute_de_pq();
804 		mod_color_set_table_init_state(type_de_pq_table, true);
805 	}
806 
807 
808 	for (i = 0; i <= hw_points_num; i++) {
809 		output = de_pq_table[i];
810 		/* should really not happen? */
811 		if (dc_fixpt_lt(output, dc_fixpt_zero))
812 			output = dc_fixpt_zero;
813 		else if (dc_fixpt_lt(scaling_factor, output))
814 			output = scaling_factor;
815 		de_pq[i].r = output;
816 		de_pq[i].g = output;
817 		de_pq[i].b = output;
818 	}
819 }
820 
build_regamma(struct pwl_float_data_ex * rgb_regamma,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,enum dc_transfer_func_predefined type,struct calculate_buffer * cal_buffer)821 static bool build_regamma(struct pwl_float_data_ex *rgb_regamma,
822 		uint32_t hw_points_num,
823 		const struct hw_x_point *coordinate_x,
824 		enum dc_transfer_func_predefined type,
825 		struct calculate_buffer *cal_buffer)
826 {
827 	uint32_t i;
828 	bool ret = false;
829 
830 	struct gamma_coefficients *coeff;
831 	struct pwl_float_data_ex *rgb = rgb_regamma;
832 	const struct hw_x_point *coord_x = coordinate_x;
833 
834 	coeff = kvzalloc(sizeof(*coeff), GFP_KERNEL);
835 	if (!coeff)
836 		goto release;
837 
838 	if (!build_coefficients(coeff, type))
839 		goto release;
840 
841 	memset(cal_buffer->buffer, 0, NUM_PTS_IN_REGION * sizeof(struct fixed31_32));
842 	cal_buffer->buffer_index = 0; // see variable definition for more info
843 
844 	i = 0;
845 	while (i <= hw_points_num) {
846 		/* TODO use y vs r,g,b */
847 		rgb->r = translate_from_linear_space_ex(
848 			coord_x->x, coeff, 0, cal_buffer);
849 		rgb->g = rgb->r;
850 		rgb->b = rgb->r;
851 		++coord_x;
852 		++rgb;
853 		++i;
854 	}
855 	cal_buffer->buffer_index = -1;
856 	ret = true;
857 release:
858 	kvfree(coeff);
859 	return ret;
860 }
861 
hermite_spline_eetf(struct fixed31_32 input_x,struct fixed31_32 max_display,struct fixed31_32 min_display,struct fixed31_32 max_content,struct fixed31_32 * out_x)862 static void hermite_spline_eetf(struct fixed31_32 input_x,
863 				struct fixed31_32 max_display,
864 				struct fixed31_32 min_display,
865 				struct fixed31_32 max_content,
866 				struct fixed31_32 *out_x)
867 {
868 	struct fixed31_32 min_lum_pq;
869 	struct fixed31_32 max_lum_pq;
870 	struct fixed31_32 max_content_pq;
871 	struct fixed31_32 ks;
872 	struct fixed31_32 E1;
873 	struct fixed31_32 E2;
874 	struct fixed31_32 E3;
875 	struct fixed31_32 t;
876 	struct fixed31_32 t2;
877 	struct fixed31_32 t3;
878 	struct fixed31_32 two;
879 	struct fixed31_32 three;
880 	struct fixed31_32 temp1;
881 	struct fixed31_32 temp2;
882 	struct fixed31_32 a = dc_fixpt_from_fraction(15, 10);
883 	struct fixed31_32 b = dc_fixpt_from_fraction(5, 10);
884 	struct fixed31_32 epsilon = dc_fixpt_from_fraction(1, 1000000); // dc_fixpt_epsilon is a bit too small
885 
886 	if (dc_fixpt_eq(max_content, dc_fixpt_zero)) {
887 		*out_x = dc_fixpt_zero;
888 		return;
889 	}
890 
891 	compute_pq(input_x, &E1);
892 	compute_pq(dc_fixpt_div(min_display, max_content), &min_lum_pq);
893 	compute_pq(dc_fixpt_div(max_display, max_content), &max_lum_pq);
894 	compute_pq(dc_fixpt_one, &max_content_pq); // always 1? DAL2 code is weird
895 	a = dc_fixpt_div(dc_fixpt_add(dc_fixpt_one, b), max_content_pq); // (1+b)/maxContent
896 	ks = dc_fixpt_sub(dc_fixpt_mul(a, max_lum_pq), b); // a * max_lum_pq - b
897 
898 	if (dc_fixpt_lt(E1, ks))
899 		E2 = E1;
900 	else if (dc_fixpt_le(ks, E1) && dc_fixpt_le(E1, dc_fixpt_one)) {
901 		if (dc_fixpt_lt(epsilon, dc_fixpt_sub(dc_fixpt_one, ks)))
902 			// t = (E1 - ks) / (1 - ks)
903 			t = dc_fixpt_div(dc_fixpt_sub(E1, ks),
904 					dc_fixpt_sub(dc_fixpt_one, ks));
905 		else
906 			t = dc_fixpt_zero;
907 
908 		two = dc_fixpt_from_int(2);
909 		three = dc_fixpt_from_int(3);
910 
911 		t2 = dc_fixpt_mul(t, t);
912 		t3 = dc_fixpt_mul(t2, t);
913 		temp1 = dc_fixpt_mul(two, t3);
914 		temp2 = dc_fixpt_mul(three, t2);
915 
916 		// (2t^3 - 3t^2 + 1) * ks
917 		E2 = dc_fixpt_mul(ks, dc_fixpt_add(dc_fixpt_one,
918 				dc_fixpt_sub(temp1, temp2)));
919 
920 		// (-2t^3 + 3t^2) * max_lum_pq
921 		E2 = dc_fixpt_add(E2, dc_fixpt_mul(max_lum_pq,
922 				dc_fixpt_sub(temp2, temp1)));
923 
924 		temp1 = dc_fixpt_mul(two, t2);
925 		temp2 = dc_fixpt_sub(dc_fixpt_one, ks);
926 
927 		// (t^3 - 2t^2 + t) * (1-ks)
928 		E2 = dc_fixpt_add(E2, dc_fixpt_mul(temp2,
929 				dc_fixpt_add(t, dc_fixpt_sub(t3, temp1))));
930 	} else
931 		E2 = dc_fixpt_one;
932 
933 	temp1 = dc_fixpt_sub(dc_fixpt_one, E2);
934 	temp2 = dc_fixpt_mul(temp1, temp1);
935 	temp2 = dc_fixpt_mul(temp2, temp2);
936 	// temp2 = (1-E2)^4
937 
938 	E3 =  dc_fixpt_add(E2, dc_fixpt_mul(min_lum_pq, temp2));
939 	compute_de_pq(E3, out_x);
940 
941 	*out_x = dc_fixpt_div(*out_x, dc_fixpt_div(max_display, max_content));
942 }
943 
build_freesync_hdr(struct pwl_float_data_ex * rgb_regamma,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,const struct hdr_tm_params * fs_params,struct calculate_buffer * cal_buffer)944 static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
945 		uint32_t hw_points_num,
946 		const struct hw_x_point *coordinate_x,
947 		const struct hdr_tm_params *fs_params,
948 		struct calculate_buffer *cal_buffer)
949 {
950 	uint32_t i;
951 	struct pwl_float_data_ex *rgb = rgb_regamma;
952 	const struct hw_x_point *coord_x = coordinate_x;
953 	const struct hw_x_point *prv_coord_x = coord_x;
954 	struct fixed31_32 scaledX = dc_fixpt_zero;
955 	struct fixed31_32 scaledX1 = dc_fixpt_zero;
956 	struct fixed31_32 max_display;
957 	struct fixed31_32 min_display;
958 	struct fixed31_32 max_content;
959 	struct fixed31_32 clip = dc_fixpt_one;
960 	struct fixed31_32 output;
961 	bool use_eetf = false;
962 	bool is_clipped = false;
963 	struct fixed31_32 sdr_white_level;
964 	struct fixed31_32 coordX_diff;
965 	struct fixed31_32 out_dist_max;
966 	struct fixed31_32 bright_norm;
967 
968 	if (fs_params->max_content == 0 ||
969 			fs_params->max_display == 0)
970 		return false;
971 
972 	max_display = dc_fixpt_from_int(fs_params->max_display);
973 	min_display = dc_fixpt_from_fraction(fs_params->min_display, 10000);
974 	max_content = dc_fixpt_from_int(fs_params->max_content);
975 	sdr_white_level = dc_fixpt_from_int(fs_params->sdr_white_level);
976 
977 	if (fs_params->min_display > 1000) // cap at 0.1 at the bottom
978 		min_display = dc_fixpt_from_fraction(1, 10);
979 	if (fs_params->max_display < 100) // cap at 100 at the top
980 		max_display = dc_fixpt_from_int(100);
981 
982 	// only max used, we don't adjust min luminance
983 	if (fs_params->max_content > fs_params->max_display)
984 		use_eetf = true;
985 	else
986 		max_content = max_display;
987 
988 	if (!use_eetf)
989 		cal_buffer->buffer_index = 0; // see var definition for more info
990 	rgb += 32; // first 32 points have problems with fixed point, too small
991 	coord_x += 32;
992 
993 	for (i = 32; i <= hw_points_num; i++) {
994 		if (!is_clipped) {
995 			if (use_eetf) {
996 				/* max content is equal 1 */
997 				scaledX1 = dc_fixpt_div(coord_x->x,
998 						dc_fixpt_div(max_content, sdr_white_level));
999 				hermite_spline_eetf(scaledX1, max_display, min_display,
1000 						max_content, &scaledX);
1001 			} else
1002 				scaledX = dc_fixpt_div(coord_x->x,
1003 						dc_fixpt_div(max_display, sdr_white_level));
1004 
1005 			if (dc_fixpt_lt(scaledX, clip)) {
1006 				if (dc_fixpt_lt(scaledX, dc_fixpt_zero))
1007 					output = dc_fixpt_zero;
1008 				else
1009 					output = calculate_gamma22(scaledX, use_eetf, cal_buffer);
1010 
1011 				// Ensure output respects reasonable boundaries
1012 				output = dc_fixpt_clamp(output, dc_fixpt_zero, dc_fixpt_one);
1013 
1014 				rgb->r = output;
1015 				rgb->g = output;
1016 				rgb->b = output;
1017 			} else {
1018 				/* Here clipping happens for the first time */
1019 				is_clipped = true;
1020 
1021 				/* The next few lines implement the equation
1022 				 * output = prev_out +
1023 				 * (coord_x->x - prev_coord_x->x) *
1024 				 * (1.0 - prev_out) /
1025 				 * (maxDisp/sdr_white_level - prevCoordX)
1026 				 *
1027 				 * This equation interpolates the first point
1028 				 * after max_display/80 so that the slope from
1029 				 * hw_x_before_max and hw_x_after_max is such
1030 				 * that we hit Y=1.0 at max_display/80.
1031 				 */
1032 
1033 				coordX_diff = dc_fixpt_sub(coord_x->x, prv_coord_x->x);
1034 				out_dist_max = dc_fixpt_sub(dc_fixpt_one, output);
1035 				bright_norm = dc_fixpt_div(max_display, sdr_white_level);
1036 
1037 				output = dc_fixpt_add(
1038 					output, dc_fixpt_mul(
1039 						coordX_diff, dc_fixpt_div(
1040 							out_dist_max,
1041 							dc_fixpt_sub(bright_norm, prv_coord_x->x)
1042 						)
1043 					)
1044 				);
1045 
1046 				/* Relaxing the maximum boundary to 1.07 (instead of 1.0)
1047 				 * because the last point in the curve must be such that
1048 				 * the maximum display pixel brightness interpolates to
1049 				 * exactly 1.0. The worst case scenario was calculated
1050 				 * around 1.057, so the limit of 1.07 leaves some safety
1051 				 * margin.
1052 				 */
1053 				output = dc_fixpt_clamp(output, dc_fixpt_zero,
1054 					dc_fixpt_from_fraction(107, 100));
1055 
1056 				rgb->r = output;
1057 				rgb->g = output;
1058 				rgb->b = output;
1059 			}
1060 		} else {
1061 			/* Every other clipping after the first
1062 			 * one is dealt with here
1063 			 */
1064 			rgb->r = clip;
1065 			rgb->g = clip;
1066 			rgb->b = clip;
1067 		}
1068 
1069 		prv_coord_x = coord_x;
1070 		++coord_x;
1071 		++rgb;
1072 	}
1073 	cal_buffer->buffer_index = -1;
1074 
1075 	return true;
1076 }
1077 
build_degamma(struct pwl_float_data_ex * curve,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,enum dc_transfer_func_predefined type)1078 static bool build_degamma(struct pwl_float_data_ex *curve,
1079 		uint32_t hw_points_num,
1080 		const struct hw_x_point *coordinate_x, enum dc_transfer_func_predefined type)
1081 {
1082 	uint32_t i;
1083 	struct gamma_coefficients coeff;
1084 	uint32_t begin_index, end_index;
1085 	bool ret = false;
1086 
1087 	if (!build_coefficients(&coeff, type))
1088 		goto release;
1089 
1090 	i = 0;
1091 
1092 	/* X points is 2^-25 to 2^7
1093 	 * De-gamma X is 2^-12 to 2^0 – we are skipping first -12-(-25) = 13 regions
1094 	 */
1095 	begin_index = 13 * NUM_PTS_IN_REGION;
1096 	end_index = begin_index + 12 * NUM_PTS_IN_REGION;
1097 
1098 	while (i != begin_index) {
1099 		curve[i].r = dc_fixpt_zero;
1100 		curve[i].g = dc_fixpt_zero;
1101 		curve[i].b = dc_fixpt_zero;
1102 		i++;
1103 	}
1104 
1105 	while (i != end_index) {
1106 		curve[i].r = translate_to_linear_space_ex(
1107 				coordinate_x[i].x, &coeff, 0);
1108 		curve[i].g = curve[i].r;
1109 		curve[i].b = curve[i].r;
1110 		i++;
1111 	}
1112 	while (i != hw_points_num + 1) {
1113 		curve[i].r = dc_fixpt_one;
1114 		curve[i].g = dc_fixpt_one;
1115 		curve[i].b = dc_fixpt_one;
1116 		i++;
1117 	}
1118 	ret = true;
1119 release:
1120 	return ret;
1121 }
1122 
1123 
1124 
1125 
1126 
build_hlg_degamma(struct pwl_float_data_ex * degamma,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,uint32_t sdr_white_level,uint32_t max_luminance_nits)1127 static void build_hlg_degamma(struct pwl_float_data_ex *degamma,
1128 		uint32_t hw_points_num,
1129 		const struct hw_x_point *coordinate_x,
1130 		uint32_t sdr_white_level, uint32_t max_luminance_nits)
1131 {
1132 	uint32_t i;
1133 
1134 	struct pwl_float_data_ex *rgb = degamma;
1135 	const struct hw_x_point *coord_x = coordinate_x;
1136 
1137 	i = 0;
1138 	// check when i == 434
1139 	while (i != hw_points_num + 1) {
1140 		compute_hlg_eotf(coord_x->x, &rgb->r, sdr_white_level, max_luminance_nits);
1141 		rgb->g = rgb->r;
1142 		rgb->b = rgb->r;
1143 		++coord_x;
1144 		++rgb;
1145 		++i;
1146 	}
1147 }
1148 
1149 
build_hlg_regamma(struct pwl_float_data_ex * regamma,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,uint32_t sdr_white_level,uint32_t max_luminance_nits)1150 static void build_hlg_regamma(struct pwl_float_data_ex *regamma,
1151 		uint32_t hw_points_num,
1152 		const struct hw_x_point *coordinate_x,
1153 		uint32_t sdr_white_level, uint32_t max_luminance_nits)
1154 {
1155 	uint32_t i;
1156 
1157 	struct pwl_float_data_ex *rgb = regamma;
1158 	const struct hw_x_point *coord_x = coordinate_x;
1159 
1160 	i = 0;
1161 
1162 	// when i == 471
1163 	while (i != hw_points_num + 1) {
1164 		compute_hlg_oetf(coord_x->x, &rgb->r, sdr_white_level, max_luminance_nits);
1165 		rgb->g = rgb->r;
1166 		rgb->b = rgb->r;
1167 		++coord_x;
1168 		++rgb;
1169 		++i;
1170 	}
1171 }
1172 
scale_gamma(struct pwl_float_data * pwl_rgb,const struct dc_gamma * ramp,struct dividers dividers)1173 static void scale_gamma(struct pwl_float_data *pwl_rgb,
1174 		const struct dc_gamma *ramp,
1175 		struct dividers dividers)
1176 {
1177 	const struct fixed31_32 max_driver = dc_fixpt_from_int(0xFFFF);
1178 	const struct fixed31_32 max_os = dc_fixpt_from_int(0xFF00);
1179 	struct fixed31_32 scaler = max_os;
1180 	uint32_t i;
1181 	struct pwl_float_data *rgb = pwl_rgb;
1182 	struct pwl_float_data *rgb_last = rgb + ramp->num_entries - 1;
1183 
1184 	i = 0;
1185 
1186 	do {
1187 		if (dc_fixpt_lt(max_os, ramp->entries.red[i]) ||
1188 			dc_fixpt_lt(max_os, ramp->entries.green[i]) ||
1189 			dc_fixpt_lt(max_os, ramp->entries.blue[i])) {
1190 			scaler = max_driver;
1191 			break;
1192 		}
1193 		++i;
1194 	} while (i != ramp->num_entries);
1195 
1196 	i = 0;
1197 
1198 	do {
1199 		rgb->r = dc_fixpt_div(
1200 			ramp->entries.red[i], scaler);
1201 		rgb->g = dc_fixpt_div(
1202 			ramp->entries.green[i], scaler);
1203 		rgb->b = dc_fixpt_div(
1204 			ramp->entries.blue[i], scaler);
1205 
1206 		++rgb;
1207 		++i;
1208 	} while (i != ramp->num_entries);
1209 
1210 	rgb->r = dc_fixpt_mul(rgb_last->r,
1211 			dividers.divider1);
1212 	rgb->g = dc_fixpt_mul(rgb_last->g,
1213 			dividers.divider1);
1214 	rgb->b = dc_fixpt_mul(rgb_last->b,
1215 			dividers.divider1);
1216 
1217 	++rgb;
1218 
1219 	rgb->r = dc_fixpt_mul(rgb_last->r,
1220 			dividers.divider2);
1221 	rgb->g = dc_fixpt_mul(rgb_last->g,
1222 			dividers.divider2);
1223 	rgb->b = dc_fixpt_mul(rgb_last->b,
1224 			dividers.divider2);
1225 
1226 	++rgb;
1227 
1228 	rgb->r = dc_fixpt_mul(rgb_last->r,
1229 			dividers.divider3);
1230 	rgb->g = dc_fixpt_mul(rgb_last->g,
1231 			dividers.divider3);
1232 	rgb->b = dc_fixpt_mul(rgb_last->b,
1233 			dividers.divider3);
1234 }
1235 
scale_gamma_dx(struct pwl_float_data * pwl_rgb,const struct dc_gamma * ramp,struct dividers dividers)1236 static void scale_gamma_dx(struct pwl_float_data *pwl_rgb,
1237 		const struct dc_gamma *ramp,
1238 		struct dividers dividers)
1239 {
1240 	uint32_t i;
1241 	struct fixed31_32 min = dc_fixpt_zero;
1242 	struct fixed31_32 max = dc_fixpt_one;
1243 
1244 	struct fixed31_32 delta = dc_fixpt_zero;
1245 	struct fixed31_32 offset = dc_fixpt_zero;
1246 
1247 	for (i = 0 ; i < ramp->num_entries; i++) {
1248 		if (dc_fixpt_lt(ramp->entries.red[i], min))
1249 			min = ramp->entries.red[i];
1250 
1251 		if (dc_fixpt_lt(ramp->entries.green[i], min))
1252 			min = ramp->entries.green[i];
1253 
1254 		if (dc_fixpt_lt(ramp->entries.blue[i], min))
1255 			min = ramp->entries.blue[i];
1256 
1257 		if (dc_fixpt_lt(max, ramp->entries.red[i]))
1258 			max = ramp->entries.red[i];
1259 
1260 		if (dc_fixpt_lt(max, ramp->entries.green[i]))
1261 			max = ramp->entries.green[i];
1262 
1263 		if (dc_fixpt_lt(max, ramp->entries.blue[i]))
1264 			max = ramp->entries.blue[i];
1265 	}
1266 
1267 	if (dc_fixpt_lt(min, dc_fixpt_zero))
1268 		delta = dc_fixpt_neg(min);
1269 
1270 	offset = dc_fixpt_add(min, max);
1271 
1272 	for (i = 0 ; i < ramp->num_entries; i++) {
1273 		pwl_rgb[i].r = dc_fixpt_div(
1274 			dc_fixpt_add(
1275 				ramp->entries.red[i], delta), offset);
1276 		pwl_rgb[i].g = dc_fixpt_div(
1277 			dc_fixpt_add(
1278 				ramp->entries.green[i], delta), offset);
1279 		pwl_rgb[i].b = dc_fixpt_div(
1280 			dc_fixpt_add(
1281 				ramp->entries.blue[i], delta), offset);
1282 
1283 	}
1284 
1285 	pwl_rgb[i].r =  dc_fixpt_sub(dc_fixpt_mul_int(
1286 				pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
1287 	pwl_rgb[i].g =  dc_fixpt_sub(dc_fixpt_mul_int(
1288 				pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
1289 	pwl_rgb[i].b =  dc_fixpt_sub(dc_fixpt_mul_int(
1290 				pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
1291 	++i;
1292 	pwl_rgb[i].r =  dc_fixpt_sub(dc_fixpt_mul_int(
1293 				pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
1294 	pwl_rgb[i].g =  dc_fixpt_sub(dc_fixpt_mul_int(
1295 				pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
1296 	pwl_rgb[i].b =  dc_fixpt_sub(dc_fixpt_mul_int(
1297 				pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
1298 }
1299 
1300 /* todo: all these scale_gamma functions are inherently the same but
1301  *  take different structures as params or different format for ramp
1302  *  values. We could probably implement it in a more generic fashion
1303  */
scale_user_regamma_ramp(struct pwl_float_data * pwl_rgb,const struct regamma_ramp * ramp,struct dividers dividers)1304 static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb,
1305 		const struct regamma_ramp *ramp,
1306 		struct dividers dividers)
1307 {
1308 	unsigned short max_driver = 0xFFFF;
1309 	unsigned short max_os = 0xFF00;
1310 	unsigned short scaler = max_os;
1311 	uint32_t i;
1312 	struct pwl_float_data *rgb = pwl_rgb;
1313 	struct pwl_float_data *rgb_last = rgb + GAMMA_RGB_256_ENTRIES - 1;
1314 
1315 	i = 0;
1316 	do {
1317 		if (ramp->gamma[i] > max_os ||
1318 				ramp->gamma[i + 256] > max_os ||
1319 				ramp->gamma[i + 512] > max_os) {
1320 			scaler = max_driver;
1321 			break;
1322 		}
1323 		i++;
1324 	} while (i != GAMMA_RGB_256_ENTRIES);
1325 
1326 	i = 0;
1327 	do {
1328 		rgb->r = dc_fixpt_from_fraction(
1329 				ramp->gamma[i], scaler);
1330 		rgb->g = dc_fixpt_from_fraction(
1331 				ramp->gamma[i + 256], scaler);
1332 		rgb->b = dc_fixpt_from_fraction(
1333 				ramp->gamma[i + 512], scaler);
1334 
1335 		++rgb;
1336 		++i;
1337 	} while (i != GAMMA_RGB_256_ENTRIES);
1338 
1339 	rgb->r = dc_fixpt_mul(rgb_last->r,
1340 			dividers.divider1);
1341 	rgb->g = dc_fixpt_mul(rgb_last->g,
1342 			dividers.divider1);
1343 	rgb->b = dc_fixpt_mul(rgb_last->b,
1344 			dividers.divider1);
1345 
1346 	++rgb;
1347 
1348 	rgb->r = dc_fixpt_mul(rgb_last->r,
1349 			dividers.divider2);
1350 	rgb->g = dc_fixpt_mul(rgb_last->g,
1351 			dividers.divider2);
1352 	rgb->b = dc_fixpt_mul(rgb_last->b,
1353 			dividers.divider2);
1354 
1355 	++rgb;
1356 
1357 	rgb->r = dc_fixpt_mul(rgb_last->r,
1358 			dividers.divider3);
1359 	rgb->g = dc_fixpt_mul(rgb_last->g,
1360 			dividers.divider3);
1361 	rgb->b = dc_fixpt_mul(rgb_last->b,
1362 			dividers.divider3);
1363 }
1364 
1365 /*
1366  * RS3+ color transform DDI - 1D LUT adjustment is composed with regamma here
1367  * Input is evenly distributed in the output color space as specified in
1368  * SetTimings
1369  *
1370  * Interpolation details:
1371  * 1D LUT has 4096 values which give curve correction in 0-1 float range
1372  * for evenly spaced points in 0-1 range. lut1D[index] gives correction
1373  * for index/4095.
1374  * First we find index for which:
1375  *	index/4095 < regamma_y < (index+1)/4095 =>
1376  *	index < 4095*regamma_y < index + 1
1377  * norm_y = 4095*regamma_y, and index is just truncating to nearest integer
1378  * lut1 = lut1D[index], lut2 = lut1D[index+1]
1379  *
1380  * adjustedY is then linearly interpolating regamma Y between lut1 and lut2
1381  *
1382  * Custom degamma on Linux uses the same interpolation math, so is handled here
1383  */
apply_lut_1d(const struct dc_gamma * ramp,uint32_t num_hw_points,struct dc_transfer_func_distributed_points * tf_pts)1384 static void apply_lut_1d(
1385 		const struct dc_gamma *ramp,
1386 		uint32_t num_hw_points,
1387 		struct dc_transfer_func_distributed_points *tf_pts)
1388 {
1389 	int i = 0;
1390 	int color = 0;
1391 	struct fixed31_32 *regamma_y;
1392 	struct fixed31_32 norm_y;
1393 	struct fixed31_32 lut1;
1394 	struct fixed31_32 lut2;
1395 	const int max_lut_index = 4095;
1396 	const struct fixed31_32 penult_lut_index_f =
1397 			dc_fixpt_from_int(max_lut_index-1);
1398 	const struct fixed31_32 max_lut_index_f =
1399 			dc_fixpt_from_int(max_lut_index);
1400 	int32_t index = 0, index_next = 0;
1401 	struct fixed31_32 index_f;
1402 	struct fixed31_32 delta_lut;
1403 	struct fixed31_32 delta_index;
1404 
1405 	if (ramp->type != GAMMA_CS_TFM_1D && ramp->type != GAMMA_CUSTOM)
1406 		return; // this is not expected
1407 
1408 	for (i = 0; i < num_hw_points; i++) {
1409 		for (color = 0; color < 3; color++) {
1410 			if (color == 0)
1411 				regamma_y = &tf_pts->red[i];
1412 			else if (color == 1)
1413 				regamma_y = &tf_pts->green[i];
1414 			else
1415 				regamma_y = &tf_pts->blue[i];
1416 
1417 			norm_y = dc_fixpt_mul(max_lut_index_f,
1418 						   *regamma_y);
1419 			index = dc_fixpt_floor(norm_y);
1420 			index_f = dc_fixpt_from_int(index);
1421 
1422 			if (index < 0)
1423 				continue;
1424 
1425 			if (index <= max_lut_index)
1426 				index_next = (index == max_lut_index) ? index : index+1;
1427 			else {
1428 				/* Here we are dealing with the last point in the curve,
1429 				 * which in some cases might exceed the range given by
1430 				 * max_lut_index. So we interpolate the value using
1431 				 * max_lut_index and max_lut_index - 1.
1432 				 */
1433 				index = max_lut_index - 1;
1434 				index_next = max_lut_index;
1435 				index_f = penult_lut_index_f;
1436 			}
1437 
1438 			if (color == 0) {
1439 				lut1 = ramp->entries.red[index];
1440 				lut2 = ramp->entries.red[index_next];
1441 			} else if (color == 1) {
1442 				lut1 = ramp->entries.green[index];
1443 				lut2 = ramp->entries.green[index_next];
1444 			} else {
1445 				lut1 = ramp->entries.blue[index];
1446 				lut2 = ramp->entries.blue[index_next];
1447 			}
1448 
1449 			// we have everything now, so interpolate
1450 			delta_lut = dc_fixpt_sub(lut2, lut1);
1451 			delta_index = dc_fixpt_sub(norm_y, index_f);
1452 
1453 			*regamma_y = dc_fixpt_add(lut1,
1454 				dc_fixpt_mul(delta_index, delta_lut));
1455 		}
1456 	}
1457 }
1458 
build_evenly_distributed_points(struct gamma_pixel * points,uint32_t numberof_points,struct dividers dividers)1459 static void build_evenly_distributed_points(
1460 	struct gamma_pixel *points,
1461 	uint32_t numberof_points,
1462 	struct dividers dividers)
1463 {
1464 	struct gamma_pixel *p = points;
1465 	struct gamma_pixel *p_last;
1466 
1467 	uint32_t i = 0;
1468 
1469 	// This function should not gets called with 0 as a parameter
1470 	ASSERT(numberof_points > 0);
1471 	p_last = p + numberof_points - 1;
1472 
1473 	do {
1474 		struct fixed31_32 value = dc_fixpt_from_fraction(i,
1475 			numberof_points - 1);
1476 
1477 		p->r = value;
1478 		p->g = value;
1479 		p->b = value;
1480 
1481 		++p;
1482 		++i;
1483 	} while (i < numberof_points);
1484 
1485 	p->r = dc_fixpt_div(p_last->r, dividers.divider1);
1486 	p->g = dc_fixpt_div(p_last->g, dividers.divider1);
1487 	p->b = dc_fixpt_div(p_last->b, dividers.divider1);
1488 
1489 	++p;
1490 
1491 	p->r = dc_fixpt_div(p_last->r, dividers.divider2);
1492 	p->g = dc_fixpt_div(p_last->g, dividers.divider2);
1493 	p->b = dc_fixpt_div(p_last->b, dividers.divider2);
1494 
1495 	++p;
1496 
1497 	p->r = dc_fixpt_div(p_last->r, dividers.divider3);
1498 	p->g = dc_fixpt_div(p_last->g, dividers.divider3);
1499 	p->b = dc_fixpt_div(p_last->b, dividers.divider3);
1500 }
1501 
copy_rgb_regamma_to_coordinates_x(struct hw_x_point * coordinates_x,uint32_t hw_points_num,const struct pwl_float_data_ex * rgb_ex)1502 static inline void copy_rgb_regamma_to_coordinates_x(
1503 		struct hw_x_point *coordinates_x,
1504 		uint32_t hw_points_num,
1505 		const struct pwl_float_data_ex *rgb_ex)
1506 {
1507 	struct hw_x_point *coords = coordinates_x;
1508 	uint32_t i = 0;
1509 	const struct pwl_float_data_ex *rgb_regamma = rgb_ex;
1510 
1511 	while (i <= hw_points_num + 1) {
1512 		coords->regamma_y_red = rgb_regamma->r;
1513 		coords->regamma_y_green = rgb_regamma->g;
1514 		coords->regamma_y_blue = rgb_regamma->b;
1515 
1516 		++coords;
1517 		++rgb_regamma;
1518 		++i;
1519 	}
1520 }
1521 
calculate_interpolated_hardware_curve(const struct dc_gamma * ramp,struct pixel_gamma_point * coeff128,struct pwl_float_data * rgb_user,const struct hw_x_point * coordinates_x,const struct gamma_pixel * axis_x,uint32_t number_of_points,struct dc_transfer_func_distributed_points * tf_pts)1522 static bool calculate_interpolated_hardware_curve(
1523 	const struct dc_gamma *ramp,
1524 	struct pixel_gamma_point *coeff128,
1525 	struct pwl_float_data *rgb_user,
1526 	const struct hw_x_point *coordinates_x,
1527 	const struct gamma_pixel *axis_x,
1528 	uint32_t number_of_points,
1529 	struct dc_transfer_func_distributed_points *tf_pts)
1530 {
1531 
1532 	const struct pixel_gamma_point *coeff = coeff128;
1533 	uint32_t max_entries = 3 - 1;
1534 
1535 	uint32_t i = 0;
1536 
1537 	for (i = 0; i < 3; i++) {
1538 		if (!build_custom_gamma_mapping_coefficients_worker(
1539 				ramp, coeff128, coordinates_x, axis_x, i,
1540 				number_of_points))
1541 			return false;
1542 	}
1543 
1544 	i = 0;
1545 	max_entries += ramp->num_entries;
1546 
1547 	/* TODO: float point case */
1548 
1549 	while (i <= number_of_points) {
1550 		tf_pts->red[i] = calculate_mapped_value(
1551 			rgb_user, coeff, CHANNEL_NAME_RED, max_entries);
1552 		tf_pts->green[i] = calculate_mapped_value(
1553 			rgb_user, coeff, CHANNEL_NAME_GREEN, max_entries);
1554 		tf_pts->blue[i] = calculate_mapped_value(
1555 			rgb_user, coeff, CHANNEL_NAME_BLUE, max_entries);
1556 
1557 		++coeff;
1558 		++i;
1559 	}
1560 
1561 	return true;
1562 }
1563 
1564 /* The "old" interpolation uses a complicated scheme to build an array of
1565  * coefficients while also using an array of 0-255 normalized to 0-1
1566  * Then there's another loop using both of the above + new scaled user ramp
1567  * and we concatenate them. It also searches for points of interpolation and
1568  * uses enums for positions.
1569  *
1570  * This function uses a different approach:
1571  * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255
1572  * To find index for hwX , we notice the following:
1573  * i/255 <= hwX < (i+1)/255  <=> i <= 255*hwX < i+1
1574  * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT
1575  *
1576  * Once the index is known, combined Y is simply:
1577  * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index)
1578  *
1579  * We should switch to this method in all cases, it's simpler and faster
1580  * ToDo one day - for now this only applies to ADL regamma to avoid regression
1581  * for regular use cases (sRGB and PQ)
1582  */
interpolate_user_regamma(uint32_t hw_points_num,struct pwl_float_data * rgb_user,bool apply_degamma,struct dc_transfer_func_distributed_points * tf_pts)1583 static void interpolate_user_regamma(uint32_t hw_points_num,
1584 		struct pwl_float_data *rgb_user,
1585 		bool apply_degamma,
1586 		struct dc_transfer_func_distributed_points *tf_pts)
1587 {
1588 	uint32_t i;
1589 	uint32_t color = 0;
1590 	int32_t index;
1591 	int32_t index_next;
1592 	struct fixed31_32 *tf_point;
1593 	struct fixed31_32 hw_x;
1594 	struct fixed31_32 norm_factor =
1595 			dc_fixpt_from_int(255);
1596 	struct fixed31_32 norm_x;
1597 	struct fixed31_32 index_f;
1598 	struct fixed31_32 lut1;
1599 	struct fixed31_32 lut2;
1600 	struct fixed31_32 delta_lut;
1601 	struct fixed31_32 delta_index;
1602 	const struct fixed31_32 one = dc_fixpt_from_int(1);
1603 
1604 	i = 0;
1605 	/* fixed_pt library has problems handling too small values */
1606 	while (i != 32) {
1607 		tf_pts->red[i] = dc_fixpt_zero;
1608 		tf_pts->green[i] = dc_fixpt_zero;
1609 		tf_pts->blue[i] = dc_fixpt_zero;
1610 		++i;
1611 	}
1612 	while (i <= hw_points_num + 1) {
1613 		for (color = 0; color < 3; color++) {
1614 			if (color == 0)
1615 				tf_point = &tf_pts->red[i];
1616 			else if (color == 1)
1617 				tf_point = &tf_pts->green[i];
1618 			else
1619 				tf_point = &tf_pts->blue[i];
1620 
1621 			if (apply_degamma) {
1622 				if (color == 0)
1623 					hw_x = coordinates_x[i].regamma_y_red;
1624 				else if (color == 1)
1625 					hw_x = coordinates_x[i].regamma_y_green;
1626 				else
1627 					hw_x = coordinates_x[i].regamma_y_blue;
1628 			} else
1629 				hw_x = coordinates_x[i].x;
1630 
1631 			if (dc_fixpt_le(one, hw_x))
1632 				hw_x = one;
1633 
1634 			norm_x = dc_fixpt_mul(norm_factor, hw_x);
1635 			index = dc_fixpt_floor(norm_x);
1636 			if (index < 0 || index > 255)
1637 				continue;
1638 
1639 			index_f = dc_fixpt_from_int(index);
1640 			index_next = (index == 255) ? index : index + 1;
1641 
1642 			if (color == 0) {
1643 				lut1 = rgb_user[index].r;
1644 				lut2 = rgb_user[index_next].r;
1645 			} else if (color == 1) {
1646 				lut1 = rgb_user[index].g;
1647 				lut2 = rgb_user[index_next].g;
1648 			} else {
1649 				lut1 = rgb_user[index].b;
1650 				lut2 = rgb_user[index_next].b;
1651 			}
1652 
1653 			// we have everything now, so interpolate
1654 			delta_lut = dc_fixpt_sub(lut2, lut1);
1655 			delta_index = dc_fixpt_sub(norm_x, index_f);
1656 
1657 			*tf_point = dc_fixpt_add(lut1,
1658 				dc_fixpt_mul(delta_index, delta_lut));
1659 		}
1660 		++i;
1661 	}
1662 }
1663 
build_new_custom_resulted_curve(uint32_t hw_points_num,struct dc_transfer_func_distributed_points * tf_pts)1664 static void build_new_custom_resulted_curve(
1665 	uint32_t hw_points_num,
1666 	struct dc_transfer_func_distributed_points *tf_pts)
1667 {
1668 	uint32_t i = 0;
1669 
1670 	while (i != hw_points_num + 1) {
1671 		tf_pts->red[i] = dc_fixpt_clamp(
1672 			tf_pts->red[i], dc_fixpt_zero,
1673 			dc_fixpt_one);
1674 		tf_pts->green[i] = dc_fixpt_clamp(
1675 			tf_pts->green[i], dc_fixpt_zero,
1676 			dc_fixpt_one);
1677 		tf_pts->blue[i] = dc_fixpt_clamp(
1678 			tf_pts->blue[i], dc_fixpt_zero,
1679 			dc_fixpt_one);
1680 
1681 		++i;
1682 	}
1683 }
1684 
apply_degamma_for_user_regamma(struct pwl_float_data_ex * rgb_regamma,uint32_t hw_points_num,struct calculate_buffer * cal_buffer)1685 static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma,
1686 		uint32_t hw_points_num, struct calculate_buffer *cal_buffer)
1687 {
1688 	uint32_t i;
1689 
1690 	struct gamma_coefficients coeff;
1691 	struct pwl_float_data_ex *rgb = rgb_regamma;
1692 	const struct hw_x_point *coord_x = coordinates_x;
1693 
1694 	build_coefficients(&coeff, TRANSFER_FUNCTION_SRGB);
1695 
1696 	i = 0;
1697 	while (i != hw_points_num + 1) {
1698 		rgb->r = translate_from_linear_space_ex(
1699 				coord_x->x, &coeff, 0, cal_buffer);
1700 		rgb->g = rgb->r;
1701 		rgb->b = rgb->r;
1702 		++coord_x;
1703 		++rgb;
1704 		++i;
1705 	}
1706 }
1707 
map_regamma_hw_to_x_user(const struct dc_gamma * ramp,struct pixel_gamma_point * coeff128,struct pwl_float_data * rgb_user,struct hw_x_point * coords_x,const struct gamma_pixel * axis_x,const struct pwl_float_data_ex * rgb_regamma,uint32_t hw_points_num,struct dc_transfer_func_distributed_points * tf_pts,bool map_user_ramp,bool do_clamping)1708 static bool map_regamma_hw_to_x_user(
1709 	const struct dc_gamma *ramp,
1710 	struct pixel_gamma_point *coeff128,
1711 	struct pwl_float_data *rgb_user,
1712 	struct hw_x_point *coords_x,
1713 	const struct gamma_pixel *axis_x,
1714 	const struct pwl_float_data_ex *rgb_regamma,
1715 	uint32_t hw_points_num,
1716 	struct dc_transfer_func_distributed_points *tf_pts,
1717 	bool map_user_ramp,
1718 	bool do_clamping)
1719 {
1720 	/* setup to spare calculated ideal regamma values */
1721 
1722 	int i = 0;
1723 	struct hw_x_point *coords = coords_x;
1724 	const struct pwl_float_data_ex *regamma = rgb_regamma;
1725 
1726 	if (ramp && map_user_ramp) {
1727 		copy_rgb_regamma_to_coordinates_x(coords,
1728 				hw_points_num,
1729 				rgb_regamma);
1730 
1731 		calculate_interpolated_hardware_curve(
1732 			ramp, coeff128, rgb_user, coords, axis_x,
1733 			hw_points_num, tf_pts);
1734 	} else {
1735 		/* just copy current rgb_regamma into  tf_pts */
1736 		while (i <= hw_points_num) {
1737 			tf_pts->red[i] = regamma->r;
1738 			tf_pts->green[i] = regamma->g;
1739 			tf_pts->blue[i] = regamma->b;
1740 
1741 			++regamma;
1742 			++i;
1743 		}
1744 	}
1745 
1746 	if (do_clamping) {
1747 		/* this should be named differently, all it does is clamp to 0-1 */
1748 		build_new_custom_resulted_curve(hw_points_num, tf_pts);
1749 	}
1750 
1751 	return true;
1752 }
1753 
1754 #define _EXTRA_POINTS 3
1755 
calculate_user_regamma_coeff(struct dc_transfer_func * output_tf,const struct regamma_lut * regamma,struct calculate_buffer * cal_buffer,const struct dc_gamma * ramp)1756 bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
1757 		const struct regamma_lut *regamma,
1758 		struct calculate_buffer *cal_buffer,
1759 		const struct dc_gamma *ramp)
1760 {
1761 	struct gamma_coefficients coeff;
1762 	const struct hw_x_point *coord_x = coordinates_x;
1763 	uint32_t i = 0;
1764 
1765 	do {
1766 		coeff.a0[i] = dc_fixpt_from_fraction(
1767 				regamma->coeff.A0[i], 10000000);
1768 		coeff.a1[i] = dc_fixpt_from_fraction(
1769 				regamma->coeff.A1[i], 1000);
1770 		coeff.a2[i] = dc_fixpt_from_fraction(
1771 				regamma->coeff.A2[i], 1000);
1772 		coeff.a3[i] = dc_fixpt_from_fraction(
1773 				regamma->coeff.A3[i], 1000);
1774 		coeff.user_gamma[i] = dc_fixpt_from_fraction(
1775 				regamma->coeff.gamma[i], 1000);
1776 
1777 		++i;
1778 	} while (i != 3);
1779 
1780 	i = 0;
1781 	/* fixed_pt library has problems handling too small values */
1782 	while (i != 32) {
1783 		output_tf->tf_pts.red[i] = dc_fixpt_zero;
1784 		output_tf->tf_pts.green[i] = dc_fixpt_zero;
1785 		output_tf->tf_pts.blue[i] = dc_fixpt_zero;
1786 		++coord_x;
1787 		++i;
1788 	}
1789 	while (i != MAX_HW_POINTS + 1) {
1790 		output_tf->tf_pts.red[i] = translate_from_linear_space_ex(
1791 				coord_x->x, &coeff, 0, cal_buffer);
1792 		output_tf->tf_pts.green[i] = translate_from_linear_space_ex(
1793 				coord_x->x, &coeff, 1, cal_buffer);
1794 		output_tf->tf_pts.blue[i] = translate_from_linear_space_ex(
1795 				coord_x->x, &coeff, 2, cal_buffer);
1796 		++coord_x;
1797 		++i;
1798 	}
1799 
1800 	if (ramp && ramp->type == GAMMA_CS_TFM_1D)
1801 		apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts);
1802 
1803 	// this function just clamps output to 0-1
1804 	build_new_custom_resulted_curve(MAX_HW_POINTS, &output_tf->tf_pts);
1805 	output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1806 
1807 	return true;
1808 }
1809 
calculate_user_regamma_ramp(struct dc_transfer_func * output_tf,const struct regamma_lut * regamma,struct calculate_buffer * cal_buffer,const struct dc_gamma * ramp)1810 bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
1811 		const struct regamma_lut *regamma,
1812 		struct calculate_buffer *cal_buffer,
1813 		const struct dc_gamma *ramp)
1814 {
1815 	struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
1816 	struct dividers dividers;
1817 
1818 	struct pwl_float_data *rgb_user = NULL;
1819 	struct pwl_float_data_ex *rgb_regamma = NULL;
1820 	bool ret = false;
1821 
1822 	if (regamma == NULL)
1823 		return false;
1824 
1825 	output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1826 
1827 	rgb_user = kcalloc(GAMMA_RGB_256_ENTRIES + _EXTRA_POINTS,
1828 			   sizeof(*rgb_user),
1829 			   GFP_KERNEL);
1830 	if (!rgb_user)
1831 		goto rgb_user_alloc_fail;
1832 
1833 	rgb_regamma = kcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
1834 			      sizeof(*rgb_regamma),
1835 			      GFP_KERNEL);
1836 	if (!rgb_regamma)
1837 		goto rgb_regamma_alloc_fail;
1838 
1839 	dividers.divider1 = dc_fixpt_from_fraction(3, 2);
1840 	dividers.divider2 = dc_fixpt_from_int(2);
1841 	dividers.divider3 = dc_fixpt_from_fraction(5, 2);
1842 
1843 	scale_user_regamma_ramp(rgb_user, &regamma->ramp, dividers);
1844 
1845 	if (regamma->flags.bits.applyDegamma == 1) {
1846 		apply_degamma_for_user_regamma(rgb_regamma, MAX_HW_POINTS, cal_buffer);
1847 		copy_rgb_regamma_to_coordinates_x(coordinates_x,
1848 				MAX_HW_POINTS, rgb_regamma);
1849 	}
1850 
1851 	interpolate_user_regamma(MAX_HW_POINTS, rgb_user,
1852 			regamma->flags.bits.applyDegamma, tf_pts);
1853 
1854 	// no custom HDR curves!
1855 	tf_pts->end_exponent = 0;
1856 	tf_pts->x_point_at_y1_red = 1;
1857 	tf_pts->x_point_at_y1_green = 1;
1858 	tf_pts->x_point_at_y1_blue = 1;
1859 
1860 	if (ramp && ramp->type == GAMMA_CS_TFM_1D)
1861 		apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts);
1862 
1863 	// this function just clamps output to 0-1
1864 	build_new_custom_resulted_curve(MAX_HW_POINTS, tf_pts);
1865 
1866 	ret = true;
1867 
1868 	kfree(rgb_regamma);
1869 rgb_regamma_alloc_fail:
1870 	kfree(rgb_user);
1871 rgb_user_alloc_fail:
1872 	return ret;
1873 }
1874 
mod_color_calculate_degamma_params(struct dc_color_caps * dc_caps,struct dc_transfer_func * input_tf,const struct dc_gamma * ramp,bool map_user_ramp)1875 bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps,
1876 		struct dc_transfer_func *input_tf,
1877 		const struct dc_gamma *ramp, bool map_user_ramp)
1878 {
1879 	struct dc_transfer_func_distributed_points *tf_pts = &input_tf->tf_pts;
1880 	struct dividers dividers;
1881 	struct pwl_float_data *rgb_user = NULL;
1882 	struct pwl_float_data_ex *curve = NULL;
1883 	struct gamma_pixel *axis_x = NULL;
1884 	struct pixel_gamma_point *coeff = NULL;
1885 	enum dc_transfer_func_predefined tf;
1886 	uint32_t i;
1887 	bool ret = false;
1888 
1889 	if (input_tf->type == TF_TYPE_BYPASS)
1890 		return false;
1891 
1892 	/* we can use hardcoded curve for plain SRGB TF
1893 	 * If linear, it's bypass if no user ramp
1894 	 */
1895 	if (input_tf->type == TF_TYPE_PREDEFINED) {
1896 		if ((input_tf->tf == TRANSFER_FUNCTION_SRGB ||
1897 				input_tf->tf == TRANSFER_FUNCTION_LINEAR) &&
1898 				!map_user_ramp)
1899 			return true;
1900 
1901 		if (dc_caps != NULL &&
1902 			dc_caps->dpp.dcn_arch == 1) {
1903 
1904 			if (input_tf->tf == TRANSFER_FUNCTION_PQ &&
1905 					dc_caps->dpp.dgam_rom_caps.pq == 1)
1906 				return true;
1907 
1908 			if (input_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
1909 					dc_caps->dpp.dgam_rom_caps.gamma2_2 == 1)
1910 				return true;
1911 
1912 			// HLG OOTF not accounted for
1913 			if (input_tf->tf == TRANSFER_FUNCTION_HLG &&
1914 					dc_caps->dpp.dgam_rom_caps.hlg == 1)
1915 				return true;
1916 		}
1917 	}
1918 
1919 	input_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1920 
1921 	if (map_user_ramp && ramp && ramp->type == GAMMA_RGB_256) {
1922 		rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
1923 				sizeof(*rgb_user),
1924 				GFP_KERNEL);
1925 		if (!rgb_user)
1926 			goto rgb_user_alloc_fail;
1927 
1928 		axis_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axis_x),
1929 				GFP_KERNEL);
1930 		if (!axis_x)
1931 			goto axis_x_alloc_fail;
1932 
1933 		dividers.divider1 = dc_fixpt_from_fraction(3, 2);
1934 		dividers.divider2 = dc_fixpt_from_int(2);
1935 		dividers.divider3 = dc_fixpt_from_fraction(5, 2);
1936 
1937 		build_evenly_distributed_points(
1938 				axis_x,
1939 				ramp->num_entries,
1940 				dividers);
1941 
1942 		scale_gamma(rgb_user, ramp, dividers);
1943 	}
1944 
1945 	curve = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*curve),
1946 			GFP_KERNEL);
1947 	if (!curve)
1948 		goto curve_alloc_fail;
1949 
1950 	coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff),
1951 			GFP_KERNEL);
1952 	if (!coeff)
1953 		goto coeff_alloc_fail;
1954 
1955 	tf = input_tf->tf;
1956 
1957 	if (tf == TRANSFER_FUNCTION_PQ)
1958 		build_de_pq(curve,
1959 				MAX_HW_POINTS,
1960 				coordinates_x);
1961 	else if (tf == TRANSFER_FUNCTION_SRGB ||
1962 		tf == TRANSFER_FUNCTION_BT709 ||
1963 		tf == TRANSFER_FUNCTION_GAMMA22 ||
1964 		tf == TRANSFER_FUNCTION_GAMMA24 ||
1965 		tf == TRANSFER_FUNCTION_GAMMA26)
1966 		build_degamma(curve,
1967 				MAX_HW_POINTS,
1968 				coordinates_x,
1969 				tf);
1970 	else if (tf == TRANSFER_FUNCTION_HLG)
1971 		build_hlg_degamma(curve,
1972 				MAX_HW_POINTS,
1973 				coordinates_x,
1974 				80, 1000);
1975 	else if (tf == TRANSFER_FUNCTION_LINEAR) {
1976 		// just copy coordinates_x into curve
1977 		i = 0;
1978 		while (i != MAX_HW_POINTS + 1) {
1979 			curve[i].r = coordinates_x[i].x;
1980 			curve[i].g = curve[i].r;
1981 			curve[i].b = curve[i].r;
1982 			i++;
1983 		}
1984 	} else
1985 		goto invalid_tf_fail;
1986 
1987 	tf_pts->end_exponent = 0;
1988 	tf_pts->x_point_at_y1_red = 1;
1989 	tf_pts->x_point_at_y1_green = 1;
1990 	tf_pts->x_point_at_y1_blue = 1;
1991 
1992 	if (input_tf->tf == TRANSFER_FUNCTION_PQ) {
1993 		/* just copy current rgb_regamma into  tf_pts */
1994 		struct pwl_float_data_ex *curvePt = curve;
1995 		int i = 0;
1996 
1997 		while (i <= MAX_HW_POINTS) {
1998 			tf_pts->red[i]   = curvePt->r;
1999 			tf_pts->green[i] = curvePt->g;
2000 			tf_pts->blue[i]  = curvePt->b;
2001 			++curvePt;
2002 			++i;
2003 		}
2004 	} else {
2005 		// clamps to 0-1
2006 		map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
2007 				coordinates_x, axis_x, curve,
2008 				MAX_HW_POINTS, tf_pts,
2009 				map_user_ramp && ramp && ramp->type == GAMMA_RGB_256,
2010 				true);
2011 	}
2012 
2013 
2014 
2015 	if (ramp && ramp->type == GAMMA_CUSTOM)
2016 		apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
2017 
2018 	ret = true;
2019 
2020 invalid_tf_fail:
2021 	kvfree(coeff);
2022 coeff_alloc_fail:
2023 	kvfree(curve);
2024 curve_alloc_fail:
2025 	kvfree(axis_x);
2026 axis_x_alloc_fail:
2027 	kvfree(rgb_user);
2028 rgb_user_alloc_fail:
2029 
2030 	return ret;
2031 }
2032 
calculate_curve(enum dc_transfer_func_predefined trans,struct dc_transfer_func_distributed_points * points,struct pwl_float_data_ex * rgb_regamma,const struct hdr_tm_params * fs_params,uint32_t sdr_ref_white_level,struct calculate_buffer * cal_buffer)2033 static bool calculate_curve(enum dc_transfer_func_predefined trans,
2034 				struct dc_transfer_func_distributed_points *points,
2035 				struct pwl_float_data_ex *rgb_regamma,
2036 				const struct hdr_tm_params *fs_params,
2037 				uint32_t sdr_ref_white_level,
2038 				struct calculate_buffer *cal_buffer)
2039 {
2040 	uint32_t i;
2041 	bool ret = false;
2042 
2043 	if (trans == TRANSFER_FUNCTION_UNITY ||
2044 		trans == TRANSFER_FUNCTION_LINEAR) {
2045 		points->end_exponent = 0;
2046 		points->x_point_at_y1_red = 1;
2047 		points->x_point_at_y1_green = 1;
2048 		points->x_point_at_y1_blue = 1;
2049 
2050 		for (i = 0; i <= MAX_HW_POINTS ; i++) {
2051 			rgb_regamma[i].r = coordinates_x[i].x;
2052 			rgb_regamma[i].g = coordinates_x[i].x;
2053 			rgb_regamma[i].b = coordinates_x[i].x;
2054 		}
2055 
2056 		ret = true;
2057 	} else if (trans == TRANSFER_FUNCTION_PQ) {
2058 		points->end_exponent = 7;
2059 		points->x_point_at_y1_red = 125;
2060 		points->x_point_at_y1_green = 125;
2061 		points->x_point_at_y1_blue = 125;
2062 
2063 		build_pq(rgb_regamma,
2064 				MAX_HW_POINTS,
2065 				coordinates_x,
2066 				sdr_ref_white_level);
2067 
2068 		ret = true;
2069 	} else if (trans == TRANSFER_FUNCTION_GAMMA22 &&
2070 			fs_params != NULL && fs_params->skip_tm == 0) {
2071 		build_freesync_hdr(rgb_regamma,
2072 				MAX_HW_POINTS,
2073 				coordinates_x,
2074 				fs_params,
2075 				cal_buffer);
2076 
2077 		ret = true;
2078 	} else if (trans == TRANSFER_FUNCTION_HLG) {
2079 		points->end_exponent = 4;
2080 		points->x_point_at_y1_red = 12;
2081 		points->x_point_at_y1_green = 12;
2082 		points->x_point_at_y1_blue = 12;
2083 
2084 		build_hlg_regamma(rgb_regamma,
2085 				MAX_HW_POINTS,
2086 				coordinates_x,
2087 				80, 1000);
2088 
2089 		ret = true;
2090 	} else {
2091 		// trans == TRANSFER_FUNCTION_SRGB
2092 		// trans == TRANSFER_FUNCTION_BT709
2093 		// trans == TRANSFER_FUNCTION_GAMMA22
2094 		// trans == TRANSFER_FUNCTION_GAMMA24
2095 		// trans == TRANSFER_FUNCTION_GAMMA26
2096 		points->end_exponent = 0;
2097 		points->x_point_at_y1_red = 1;
2098 		points->x_point_at_y1_green = 1;
2099 		points->x_point_at_y1_blue = 1;
2100 
2101 		build_regamma(rgb_regamma,
2102 				MAX_HW_POINTS,
2103 				coordinates_x,
2104 				trans,
2105 				cal_buffer);
2106 
2107 		ret = true;
2108 	}
2109 
2110 	return ret;
2111 }
2112 
mod_color_calculate_regamma_params(struct dc_transfer_func * output_tf,const struct dc_gamma * ramp,bool map_user_ramp,bool can_rom_be_used,const struct hdr_tm_params * fs_params,struct calculate_buffer * cal_buffer)2113 bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
2114 					const struct dc_gamma *ramp,
2115 					bool map_user_ramp,
2116 					bool can_rom_be_used,
2117 					const struct hdr_tm_params *fs_params,
2118 					struct calculate_buffer *cal_buffer)
2119 {
2120 	struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
2121 	struct dividers dividers;
2122 
2123 	struct pwl_float_data *rgb_user = NULL;
2124 	struct pwl_float_data_ex *rgb_regamma = NULL;
2125 	struct gamma_pixel *axis_x = NULL;
2126 	struct pixel_gamma_point *coeff = NULL;
2127 	enum dc_transfer_func_predefined tf;
2128 	bool do_clamping = true;
2129 	bool ret = false;
2130 
2131 	if (output_tf->type == TF_TYPE_BYPASS)
2132 		return false;
2133 
2134 	/* we can use hardcoded curve for plain SRGB TF */
2135 	if (output_tf->type == TF_TYPE_PREDEFINED && can_rom_be_used == true &&
2136 			output_tf->tf == TRANSFER_FUNCTION_SRGB) {
2137 		if (ramp == NULL)
2138 			return true;
2139 		if ((ramp->is_identity && ramp->type != GAMMA_CS_TFM_1D) ||
2140 		    (!map_user_ramp && ramp->type == GAMMA_RGB_256))
2141 			return true;
2142 	}
2143 
2144 	output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
2145 
2146 	if (ramp && ramp->type != GAMMA_CS_TFM_1D &&
2147 	    (map_user_ramp || ramp->type != GAMMA_RGB_256)) {
2148 		rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
2149 			    sizeof(*rgb_user),
2150 			    GFP_KERNEL);
2151 		if (!rgb_user)
2152 			goto rgb_user_alloc_fail;
2153 
2154 		axis_x = kvcalloc(ramp->num_entries + 3, sizeof(*axis_x),
2155 				GFP_KERNEL);
2156 		if (!axis_x)
2157 			goto axis_x_alloc_fail;
2158 
2159 		dividers.divider1 = dc_fixpt_from_fraction(3, 2);
2160 		dividers.divider2 = dc_fixpt_from_int(2);
2161 		dividers.divider3 = dc_fixpt_from_fraction(5, 2);
2162 
2163 		build_evenly_distributed_points(
2164 				axis_x,
2165 				ramp->num_entries,
2166 				dividers);
2167 
2168 		if (ramp->type == GAMMA_RGB_256 && map_user_ramp)
2169 			scale_gamma(rgb_user, ramp, dividers);
2170 		else if (ramp->type == GAMMA_RGB_FLOAT_1024)
2171 			scale_gamma_dx(rgb_user, ramp, dividers);
2172 	}
2173 
2174 	rgb_regamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
2175 			       sizeof(*rgb_regamma),
2176 			       GFP_KERNEL);
2177 	if (!rgb_regamma)
2178 		goto rgb_regamma_alloc_fail;
2179 
2180 	coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff),
2181 			 GFP_KERNEL);
2182 	if (!coeff)
2183 		goto coeff_alloc_fail;
2184 
2185 	tf = output_tf->tf;
2186 
2187 	ret = calculate_curve(tf,
2188 			tf_pts,
2189 			rgb_regamma,
2190 			fs_params,
2191 			output_tf->sdr_ref_white_level,
2192 			cal_buffer);
2193 
2194 	if (ret) {
2195 		do_clamping = !(output_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
2196 				fs_params != NULL && fs_params->skip_tm == 0);
2197 
2198 		map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
2199 					 coordinates_x, axis_x, rgb_regamma,
2200 					 MAX_HW_POINTS, tf_pts,
2201 					 (map_user_ramp || (ramp && ramp->type != GAMMA_RGB_256)) &&
2202 					 (ramp && ramp->type != GAMMA_CS_TFM_1D),
2203 					 do_clamping);
2204 
2205 		if (ramp && ramp->type == GAMMA_CS_TFM_1D)
2206 			apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
2207 	}
2208 
2209 	kvfree(coeff);
2210 coeff_alloc_fail:
2211 	kvfree(rgb_regamma);
2212 rgb_regamma_alloc_fail:
2213 	kvfree(axis_x);
2214 axis_x_alloc_fail:
2215 	kvfree(rgb_user);
2216 rgb_user_alloc_fail:
2217 	return ret;
2218 }
2219